-#define _XOPEN_SOURCE
+#define _XOPEN_SOURCE 700
#define _GNU_SOURCE
-//#define _SIMPLE_SLAVE
+
#include "slave.h"
#include <assert.h>
#include <errno.h>
#include <pty.h>
#include <fcntl.h>
+#include <string.h>
#include <pthread.h>
#include <syslog.h>
Slave * slave;
+char name[BUFSIZ];
+
+void Slave_shell(int i, char * shell);
+void Slave_cleanup();
-int running;
void Slave_main(Options * o)
{
+ //fprintf(stderr, "%d\n", o->nCPU);
+
+
- if (fork() != 0)
- exit(EXIT_SUCCESS);
+ setbuf(stdin, NULL); setbuf(stdout, NULL); setbuf(stderr, NULL);
+ dup2(fileno(stdout), fileno(stderr)); // yes, this works, apparently
- o->verbosity = 100;
- freopen(SLAVE_LOGFILE, "w", stderr);
- setbuf(stderr, NULL);
- slave = (Slave*)(calloc(o->nCPU, sizeof(Slave)));
+ slave = (Slave*)(calloc(o->nCPU, sizeof(Slave)));
+ atexit(Slave_cleanup);
- int net_fd = -1;
- if (o->encrypt)
- net_fd = Network_client("localhost", o->port,100);
+
+ if (strcmp(o->master_addr, "-") != 0)
+ {
+ if (fork() != 0)
+ exit(EXIT_SUCCESS);
+
+ //log_print(2, "Slave_main", "Using unsecured networking; connect to %s:%d", o->master_addr, o->port);
+ //log_print(2, "Slave_main", "Connecting to %s:%d", o->master_addr, o->port);
+ int net_fd = Network_client(o->master_addr, o->port, 100);
+ dup2(net_fd, fileno(stdin));
+ dup2(net_fd, fileno(stdout));
+ dup2(net_fd, fileno(stderr));
+
+ }
else
- net_fd = Network_client(o->master_addr, o->port,100);
+ {
+ o->master_addr = "localhost";
+ //log_print(2, "Slave_main", "Using port forwarding; connect to %s", o->master_addr);
+ }
- FILE * f = fdopen(net_fd, "w"); setbuf(f, NULL);
- fprintf(f, "%d\n", o->nCPU);
+ char buffer[BUFSIZ];
+
+ fgets(name, sizeof(name), stdin);
+ name[strlen(name)-1] = '\0';
+ //log_print(LOGINFO, "Slave_main", "Started remote swarm \"%s\"", name);
- log_print(2, "Slave_main", "Waiting on bell from master");
- char c;
- if (read(net_fd, &c, sizeof(char)) == 0 || c != '\a')
- error("Slave_main", "Didn't get bell from master");
-
+ fprintf(stdout, "%d\n", o->nCPU);
+ //log_print(2, "Slave_main", "Wrote nCPU %d", o->nCPU);
- log_print(2, "Slave_main", "Got bell from master");
- running = o->nCPU;
+ int port = 0;
for (int i = 0; i < o->nCPU; ++i)
{
- int new_fd = net_fd;
- if (i != o->nCPU-1)
- {
+ //log_print(2, "Slave_main", "Waiting for port number...");
+ fgets(buffer, sizeof(buffer), stdin);
+
+ buffer[strlen(buffer)-1] = '\0';
+ sscanf(buffer, "%d", &port);
+ //log_print(2, "Slave_main", "Port number %d", port);
+ slave[i].in = Network_client(o->master_addr, port,20);
+ //log_print(2, "Slave_main", "Connected to %s:%d\n", o->master_addr, port);
+ slave[i].out = slave[i].in;
+
+ Slave_shell(i, o->shell);
+ }
+
-
- if (read(net_fd, &c, sizeof(char)) == 0 || c != '\a')
- error("Slave_main", "Didn't get bell from master authorising connection of slave %d", i);
- sleep(1);
+ Slave_loop(o);
- log_print(3, "Slave_main", "Connecting slave %d to port %d at time %d", i, o->port+i+1, time(NULL));
- if (o->encrypt)
- new_fd = Network_client("localhost", o->port+i+1, 100);
- else
- new_fd = Network_client(o->master_addr, o->port+i+1, 100);
+ exit(EXIT_SUCCESS);
+}
-
-
- }
+void Slave_shell(int i, char * shell)
+{
+ slave[i].pid = fork();
- slave[i].in = new_fd; slave[i].out = new_fd;
- slave[i].pid = fork();
- if (slave[i].pid == 0)
- {
- dup2(slave[i].in, fileno(stdin));
- dup2(slave[i].out, fileno(stdout));
- execlp(o->shell, o->shell, NULL);
- }
+
+ if (slave[i].pid == 0)
+ {
+ dup2(slave[i].in, fileno(stdin));
+ dup2(slave[i].out, fileno(stdout));
+ //dup2(error_socket[1], fileno(stderr));
+
+ execlp(shell, shell, NULL);
}
-
- Slave_loop(o);
- free(slave);
- exit(EXIT_SUCCESS);
+ // if the input is a network socket, this message gets sent to the master
+ // which will then echo it back to the socket and hence the shell
+ FILE * f = fdopen(slave[i].in, "w"); setbuf(f, NULL);
+ fprintf(f, "name=\"%s:%d\"\n", name,i);
}
void Slave_loop(Options * o)
{
-
+ fd_set readSet;
+ struct timeval tv;
+ tv.tv_sec = 0;
+ tv.tv_usec = 100000;
+
int p = -1; int s = 0;
-
- while (running > 0)
+ char buffer[BUFSIZ];
+ while (true)
{
+ FD_ZERO(&readSet);
+ FD_SET(fileno(stdin), &readSet);
p = waitpid(-1, &s, 0);
if (p == -1)
{
- log_print(0, "Slave_loop", "waitpid : %s", strerror(errno));
+ //log_print(0, "Slave_loop", "waitpid : %s", strerror(errno));
continue;
}
- if (s != SHELL_EXIT_CODE)
- {
- // there was an error
- int i = 0;
- for (i = 0; i < o->nCPU; ++i)
- {
- if (slave[i].pid == p) break;
- }
- if (i >= o->nCPU)
- error("Slave_loop", "No child matches pid %d", p);
+ //log_print(3, "Slave_loop", "Detected child %d exiting...", p);
- log_print(0, "Slave_loop", "Child [%d] exits with status %d; restarting", p, s);
- slave[i].pid = fork();
- if (slave[i].pid == 0)
+ // check for an exit command from the master
+ select(fileno(stdin) + 1, &readSet, NULL, NULL, &tv);
+
+ if (FD_ISSET(fileno(stdin), &readSet))
+ {
+ fgets(buffer, sizeof(buffer), stdin);
+ if (strcmp(buffer, "exit\n") == 0)
{
- dup2(slave[i].in, fileno(stdin));
- dup2(slave[i].out, fileno(stdout));
- execlp(o->shell, o->shell, NULL);
+ log_print(2, "Slave_loop", "Received notification of exit.\n");
+ exit(EXIT_SUCCESS);
}
-
- char buffer[] = "\f\a\a\a";
- if (write(slave[i].in, buffer, strlen(buffer)) <= 0)
- log_print(0, "Slave_loop", "Slave %d input closed", i);
}
- else
- --running;
+
+ int i = 0;
+ for (i = 0; i < o->nCPU; ++i)
+ {
+ if (slave[i].pid == p) break;
+ }
+ if (i >= o->nCPU)
+ error("Slave_loop", "No child matches pid %d", p);
+
+ sigchld_respond(s, name, i);
+
+
+ // cancel any tasks at the master for this slave
+
+ write(slave[i].out, SHELL_OUTPUT_FINISHED, SHELL_OUTPUT_FINISHED_LENGTH);
+
+ Slave_shell(i, o->shell);
+
+
}
}
+
+void Slave_cleanup()
+{
+ for (int i = 0; i < options.nCPU; ++i)
+ {
+ kill(slave[i].pid, SIGTERM);
+ }
+ sleep(1);
+ for (int i = 0; i < options.nCPU; ++i)
+ {
+ kill(slave[i].pid, SIGKILL);
+ }
+ free(slave);
+}
+
+