X-Git-Url: https://git.ucc.asn.au/?a=blobdiff_plain;f=src%2Fslave.c;h=622bc7b443cf48bb98fe026a6fead4e57a23f1fa;hb=HEAD;hp=3327759265843dc6651b46a2a54e69a72dfa9c98;hpb=063fe056d81e8afd218f6a40ee62aa3463df3e9a;p=matches%2Fswarm.git diff --git a/src/slave.c b/src/slave.c index 3327759..622bc7b 100644 --- a/src/slave.c +++ b/src/slave.c @@ -1,7 +1,7 @@ -#define _XOPEN_SOURCE +#define _XOPEN_SOURCE 700 #define _GNU_SOURCE -//#define _SIMPLE_SLAVE + #include "slave.h" #include @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -26,117 +27,167 @@ Slave * slave; +char name[BUFSIZ]; + +void Slave_shell(int i, char * shell); +void Slave_cleanup(); -int running; void Slave_main(Options * o) { + //fprintf(stderr, "%d\n", o->nCPU); + + - if (fork() != 0) - exit(EXIT_SUCCESS); + setbuf(stdin, NULL); setbuf(stdout, NULL); setbuf(stderr, NULL); + dup2(fileno(stdout), fileno(stderr)); // yes, this works, apparently - o->verbosity = 100; - freopen(SLAVE_LOGFILE, "w", stderr); - setbuf(stderr, NULL); - slave = (Slave*)(calloc(o->nCPU, sizeof(Slave))); + slave = (Slave*)(calloc(o->nCPU, sizeof(Slave))); + atexit(Slave_cleanup); - int net_fd = -1; - if (o->encrypt) - net_fd = Network_client("localhost", o->port,100); + + if (strcmp(o->master_addr, "-") != 0) + { + if (fork() != 0) + exit(EXIT_SUCCESS); + + //log_print(2, "Slave_main", "Using unsecured networking; connect to %s:%d", o->master_addr, o->port); + //log_print(2, "Slave_main", "Connecting to %s:%d", o->master_addr, o->port); + int net_fd = Network_client(o->master_addr, o->port, 100); + dup2(net_fd, fileno(stdin)); + dup2(net_fd, fileno(stdout)); + dup2(net_fd, fileno(stderr)); + + } else - net_fd = Network_client(o->master_addr, o->port,100); + { + o->master_addr = "localhost"; + //log_print(2, "Slave_main", "Using port forwarding; connect to %s", o->master_addr); + } - FILE * f = fdopen(net_fd, "w"); setbuf(f, NULL); - fprintf(f, "%d\n", o->nCPU); + char buffer[BUFSIZ]; + + fgets(name, sizeof(name), stdin); + name[strlen(name)-1] = '\0'; + //log_print(LOGINFO, "Slave_main", "Started remote swarm \"%s\"", name); - log_print(2, "Slave_main", "Waiting on bell from master"); - char c; - if (read(net_fd, &c, sizeof(char)) == 0 || c != '\a') - error("Slave_main", "Didn't get bell from master"); - + fprintf(stdout, "%d\n", o->nCPU); + //log_print(2, "Slave_main", "Wrote nCPU %d", o->nCPU); - log_print(2, "Slave_main", "Got bell from master"); - running = o->nCPU; + int port = 0; for (int i = 0; i < o->nCPU; ++i) { - int new_fd = net_fd; - if (i != o->nCPU-1) - { + //log_print(2, "Slave_main", "Waiting for port number..."); + fgets(buffer, sizeof(buffer), stdin); + + buffer[strlen(buffer)-1] = '\0'; + sscanf(buffer, "%d", &port); + //log_print(2, "Slave_main", "Port number %d", port); + slave[i].in = Network_client(o->master_addr, port,20); + //log_print(2, "Slave_main", "Connected to %s:%d\n", o->master_addr, port); + slave[i].out = slave[i].in; + + Slave_shell(i, o->shell); + } + - - if (read(net_fd, &c, sizeof(char)) == 0 || c != '\a') - error("Slave_main", "Didn't get bell from master authorising connection of slave %d", i); - sleep(1); + Slave_loop(o); - log_print(3, "Slave_main", "Connecting slave %d to port %d at time %d", i, o->port+i+1, time(NULL)); - if (o->encrypt) - new_fd = Network_client("localhost", o->port+i+1, 100); - else - new_fd = Network_client(o->master_addr, o->port+i+1, 100); + exit(EXIT_SUCCESS); +} - - - } +void Slave_shell(int i, char * shell) +{ + slave[i].pid = fork(); - slave[i].in = new_fd; slave[i].out = new_fd; - slave[i].pid = fork(); - if (slave[i].pid == 0) - { - dup2(slave[i].in, fileno(stdin)); - dup2(slave[i].out, fileno(stdout)); - execlp(o->shell, o->shell, NULL); - } + + if (slave[i].pid == 0) + { + dup2(slave[i].in, fileno(stdin)); + dup2(slave[i].out, fileno(stdout)); + //dup2(error_socket[1], fileno(stderr)); + + execlp(shell, shell, NULL); } - - Slave_loop(o); - free(slave); - exit(EXIT_SUCCESS); + // if the input is a network socket, this message gets sent to the master + // which will then echo it back to the socket and hence the shell + FILE * f = fdopen(slave[i].in, "w"); setbuf(f, NULL); + fprintf(f, "name=\"%s:%d\"\n", name,i); } void Slave_loop(Options * o) { - + fd_set readSet; + struct timeval tv; + tv.tv_sec = 0; + tv.tv_usec = 100000; + int p = -1; int s = 0; - - while (running > 0) + char buffer[BUFSIZ]; + while (true) { + FD_ZERO(&readSet); + FD_SET(fileno(stdin), &readSet); p = waitpid(-1, &s, 0); if (p == -1) { - log_print(0, "Slave_loop", "waitpid : %s", strerror(errno)); + //log_print(0, "Slave_loop", "waitpid : %s", strerror(errno)); continue; } - if (s != SHELL_EXIT_CODE) - { - // there was an error - int i = 0; - for (i = 0; i < o->nCPU; ++i) - { - if (slave[i].pid == p) break; - } - if (i >= o->nCPU) - error("Slave_loop", "No child matches pid %d", p); + //log_print(3, "Slave_loop", "Detected child %d exiting...", p); - log_print(0, "Slave_loop", "Child [%d] exits with status %d; restarting", p, s); - slave[i].pid = fork(); - if (slave[i].pid == 0) + // check for an exit command from the master + select(fileno(stdin) + 1, &readSet, NULL, NULL, &tv); + + if (FD_ISSET(fileno(stdin), &readSet)) + { + fgets(buffer, sizeof(buffer), stdin); + if (strcmp(buffer, "exit\n") == 0) { - dup2(slave[i].in, fileno(stdin)); - dup2(slave[i].out, fileno(stdout)); - execlp(o->shell, o->shell, NULL); + log_print(2, "Slave_loop", "Received notification of exit.\n"); + exit(EXIT_SUCCESS); } - - char buffer[] = "\f\a\a\a"; - if (write(slave[i].in, buffer, strlen(buffer)) <= 0) - log_print(0, "Slave_loop", "Slave %d input closed", i); } - else - --running; + + int i = 0; + for (i = 0; i < o->nCPU; ++i) + { + if (slave[i].pid == p) break; + } + if (i >= o->nCPU) + error("Slave_loop", "No child matches pid %d", p); + + sigchld_respond(s, name, i); + + + // cancel any tasks at the master for this slave + + write(slave[i].out, SHELL_OUTPUT_FINISHED, SHELL_OUTPUT_FINISHED_LENGTH); + + Slave_shell(i, o->shell); + + } } + +void Slave_cleanup() +{ + for (int i = 0; i < options.nCPU; ++i) + { + kill(slave[i].pid, SIGTERM); + } + sleep(1); + for (int i = 0; i < options.nCPU; ++i) + { + kill(slave[i].pid, SIGKILL); + } + free(slave); +} + +