Did some stuff
[matches/swarm.git] / src / slave.c
index 3327759..622bc7b 100644 (file)
@@ -1,7 +1,7 @@
-#define _XOPEN_SOURCE
+#define _XOPEN_SOURCE 700
 #define _GNU_SOURCE
 
-//#define _SIMPLE_SLAVE
+
 
 #include "slave.h"
 #include <assert.h>
@@ -12,6 +12,7 @@
 #include <errno.h>
 #include <pty.h>
 #include <fcntl.h>
+#include <string.h>
 
 #include <pthread.h>
 #include <syslog.h>
 
 Slave * slave;
 
+char name[BUFSIZ];
+
+void Slave_shell(int i, char * shell);
+void Slave_cleanup();
 
-int running;
 
 void Slave_main(Options * o)
 {
+       //fprintf(stderr, "%d\n", o->nCPU);
+
+
        
-       if (fork() != 0)
-               exit(EXIT_SUCCESS);
+       setbuf(stdin, NULL); setbuf(stdout, NULL); setbuf(stderr, NULL);
 
+       dup2(fileno(stdout), fileno(stderr)); // yes, this works, apparently
 
-       o->verbosity = 100;
-       freopen(SLAVE_LOGFILE, "w", stderr);
-       setbuf(stderr, NULL);
-       slave = (Slave*)(calloc(o->nCPU, sizeof(Slave)));
+       slave = (Slave*)(calloc(o->nCPU, sizeof(Slave)));       
+       atexit(Slave_cleanup);
 
-       int net_fd = -1;
-       if (o->encrypt)
-               net_fd = Network_client("localhost", o->port,100);
+
+       if (strcmp(o->master_addr, "-") != 0)
+       {
+               if (fork() != 0)
+                       exit(EXIT_SUCCESS);
+
+               //log_print(2, "Slave_main", "Using unsecured networking; connect to %s:%d", o->master_addr, o->port);
+               //log_print(2, "Slave_main", "Connecting to %s:%d", o->master_addr, o->port);
+               int net_fd = Network_client(o->master_addr, o->port, 100);
+               dup2(net_fd, fileno(stdin));
+               dup2(net_fd, fileno(stdout));
+               dup2(net_fd, fileno(stderr));
+               
+       }
        else
-               net_fd = Network_client(o->master_addr, o->port,100);
+       {
+               o->master_addr = "localhost";
+               //log_print(2, "Slave_main", "Using port forwarding; connect to %s", o->master_addr);
+       }
 
-       FILE * f = fdopen(net_fd, "w"); setbuf(f, NULL);
-       fprintf(f, "%d\n", o->nCPU);
+       char buffer[BUFSIZ];
+
+       fgets(name, sizeof(name), stdin);
+       name[strlen(name)-1] = '\0';
+       //log_print(LOGINFO, "Slave_main", "Started remote swarm \"%s\"", name);
 
-       log_print(2, "Slave_main", "Waiting on bell from master");
-       char c;
-       if (read(net_fd, &c, sizeof(char)) == 0 || c != '\a')
-               error("Slave_main", "Didn't get bell from master");
-       
 
+       fprintf(stdout, "%d\n", o->nCPU);
+       //log_print(2, "Slave_main", "Wrote nCPU %d", o->nCPU);
        
 
-       log_print(2, "Slave_main", "Got bell from master");
-       running = o->nCPU;
+       int port = 0;
        for (int i = 0; i < o->nCPU; ++i)
        {
-               int new_fd = net_fd;
-               if (i != o->nCPU-1)
-               {
+               //log_print(2, "Slave_main", "Waiting for port number...");
+               fgets(buffer, sizeof(buffer), stdin);
+               
+               buffer[strlen(buffer)-1] = '\0';
+               sscanf(buffer, "%d", &port);    
+               //log_print(2, "Slave_main", "Port number %d", port);
+               slave[i].in = Network_client(o->master_addr, port,20);
+               //log_print(2, "Slave_main", "Connected to %s:%d\n", o->master_addr, port);
+               slave[i].out = slave[i].in;
+
+               Slave_shell(i, o->shell);
+       }
+       
 
-                       
-                       if (read(net_fd, &c, sizeof(char)) == 0 || c != '\a')
-                               error("Slave_main", "Didn't get bell from master authorising connection of slave %d", i);
-                       sleep(1);
+       Slave_loop(o);
 
-                       log_print(3, "Slave_main", "Connecting slave %d to port %d at time %d", i, o->port+i+1, time(NULL));
-                       if (o->encrypt)
-                               new_fd = Network_client("localhost", o->port+i+1, 100);
-                       else
-                               new_fd = Network_client(o->master_addr, o->port+i+1, 100);
+       exit(EXIT_SUCCESS);
+}
 
-                       
-                       
-               }
+void Slave_shell(int i, char * shell)
+{
+       slave[i].pid = fork();
 
-               slave[i].in = new_fd; slave[i].out = new_fd;
 
-               slave[i].pid = fork();
-               if (slave[i].pid == 0)
-               {
-                       dup2(slave[i].in, fileno(stdin));
-                       dup2(slave[i].out, fileno(stdout));
-                       execlp(o->shell, o->shell, NULL);
-               }
+
+       if (slave[i].pid == 0)
+       {
+               dup2(slave[i].in, fileno(stdin));
+               dup2(slave[i].out, fileno(stdout));
+               //dup2(error_socket[1], fileno(stderr));
+
+               execlp(shell, shell, NULL);
        }
-       
-       Slave_loop(o);
 
-       free(slave);
-       exit(EXIT_SUCCESS);
+       // if the input is a network socket, this message gets sent to the master
+       // which will then echo it back to the socket and hence the shell
+       FILE * f = fdopen(slave[i].in, "w"); setbuf(f, NULL);
+       fprintf(f, "name=\"%s:%d\"\n", name,i);
 }
 
 void Slave_loop(Options * o)
 {
-       
+       fd_set readSet;
+       struct timeval tv;
+       tv.tv_sec = 0;
+       tv.tv_usec = 100000;
+
        int p = -1; int s = 0;
-       
-       while (running > 0)
+       char buffer[BUFSIZ];
+       while (true)
        {
+               FD_ZERO(&readSet);
+               FD_SET(fileno(stdin), &readSet);
                p = waitpid(-1, &s, 0);
                if (p == -1)
                {
-                       log_print(0, "Slave_loop", "waitpid : %s", strerror(errno));
+                       //log_print(0, "Slave_loop", "waitpid : %s", strerror(errno));
                        continue;
                }
-               if (s != SHELL_EXIT_CODE)
-               {
-                       // there was an error
 
-                       int i = 0;
-                       for (i = 0; i < o->nCPU; ++i)
-                       {
-                               if (slave[i].pid == p) break;
-                       }
-                       if (i >= o->nCPU)
-                               error("Slave_loop", "No child matches pid %d", p);
+               //log_print(3, "Slave_loop", "Detected child %d exiting...", p);
 
-                       log_print(0, "Slave_loop", "Child [%d] exits with status %d; restarting", p, s);
-                       slave[i].pid = fork();
-                       if (slave[i].pid == 0)
+               // check for an exit command from the master
+               select(fileno(stdin) + 1, &readSet, NULL, NULL, &tv);
+
+               if (FD_ISSET(fileno(stdin), &readSet))
+               {
+                       fgets(buffer, sizeof(buffer), stdin);
+                       if (strcmp(buffer, "exit\n") == 0)
                        {
-                               dup2(slave[i].in, fileno(stdin));
-                               dup2(slave[i].out, fileno(stdout));
-                               execlp(o->shell, o->shell, NULL);
+                               log_print(2, "Slave_loop", "Received notification of exit.\n");
+                               exit(EXIT_SUCCESS);
                        }
-
-                       char buffer[] = "\f\a\a\a";
-                       if (write(slave[i].in, buffer, strlen(buffer)) <= 0)
-                               log_print(0, "Slave_loop", "Slave %d input closed", i);
                }
-               else
-                       --running;
+               
+               int i = 0;
+               for (i = 0; i < o->nCPU; ++i)
+               {
+                       if (slave[i].pid == p) break;
+               }
+               if (i >= o->nCPU)
+                       error("Slave_loop", "No child matches pid %d", p);
+
+               sigchld_respond(s, name, i);
+               
+
+               // cancel any tasks at the master for this slave
+               
+               write(slave[i].out, SHELL_OUTPUT_FINISHED, SHELL_OUTPUT_FINISHED_LENGTH);
+
+               Slave_shell(i, o->shell);
+
+               
        }
 }
+
+void Slave_cleanup()
+{
+       for (int i = 0; i < options.nCPU; ++i)
+       {
+               kill(slave[i].pid, SIGTERM);
+       }
+       sleep(1);
+       for (int i = 0; i < options.nCPU; ++i)
+       {
+               kill(slave[i].pid, SIGKILL);
+       }
+       free(slave);
+}
+
+

UCC git Repository :: git.ucc.asn.au