Things seem to work...
[matches/swarm.git] / src / slave.c
1 #define _XOPEN_SOURCE 700
2 #define _GNU_SOURCE
3
4
5
6 #include "slave.h"
7 #include <assert.h>
8
9 #include "network.h"
10 #include "daemon.h"
11 #include "log.h"
12 #include <errno.h>
13 #include <pty.h>
14 #include <fcntl.h>
15 #include <string.h>
16
17 #include <pthread.h>
18 #include <syslog.h>
19 #include <signal.h>
20 #include <unistd.h>
21 #include <sys/types.h>
22 #include <sys/wait.h>
23 #include <sys/socket.h>
24 #include <netinet/tcp.h>
25
26
27
28 Slave * slave;
29
30 char name[BUFSIZ];
31
32 void Slave_shell(int i, char * shell);
33 void Slave_cleanup();
34
35
36 void Slave_main(Options * o)
37 {
38
39         
40         setbuf(stdin, NULL); setbuf(stdout, NULL); setbuf(stderr, NULL);
41
42         dup2(fileno(stdout), fileno(stderr)); // yes, this works, apparently
43
44         slave = (Slave*)(calloc(o->nCPU, sizeof(slave)));       
45         atexit(Slave_cleanup);
46
47
48         if (strcmp(o->master_addr, "-") != 0)
49         {
50                 if (fork() != 0)
51                         exit(EXIT_SUCCESS);
52
53                 //log_print(2, "Slave_main", "Using unsecured networking; connect to %s:%d", o->master_addr, o->port);
54                 //log_print(2, "Slave_main", "Connecting to %s:%d", o->master_addr, o->port);
55                 int net_fd = Network_client(o->master_addr, o->port, 100);
56                 dup2(net_fd, fileno(stdin));
57                 dup2(net_fd, fileno(stdout));
58                 dup2(net_fd, fileno(stderr));
59                 
60         }
61         else
62         {
63                 o->master_addr = "localhost";
64                 //log_print(2, "Slave_main", "Using port forwarding; connect to %s", o->master_addr);
65         }
66
67         char buffer[BUFSIZ];
68
69         fgets(name, sizeof(name), stdin);
70         name[strlen(name)-1] = '\0';
71         //log_print(2, "Slave_main", "Got name %s", name);
72
73         fprintf(stdout, "%d\n", o->nCPU);
74         //log_print(2, "Slave_main", "Wrote nCPU %d", o->nCPU);
75         
76
77         int port = 0;
78         for (int i = 0; i < o->nCPU; ++i)
79         {
80                 //log_print(2, "Slave_main", "Waiting for port number...");
81                 fgets(buffer, sizeof(buffer), stdin);
82                 
83                 buffer[strlen(buffer)-1] = '\0';
84                 sscanf(buffer, "%d", &port);    
85                 //log_print(2, "Slave_main", "Port number %d", port);
86                 slave[i].in = Network_client(o->master_addr, port,20);
87                 //log_print(2, "Slave_main", "Connected to %s:%d\n", o->master_addr, port);
88                 slave[i].out = slave[i].in;
89
90                 Slave_shell(i, o->shell);
91         }
92         
93
94         Slave_loop(o);
95
96         exit(EXIT_SUCCESS);
97 }
98
99 void Slave_shell(int i, char * shell)
100 {
101         slave[i].pid = fork();
102
103
104
105         if (slave[i].pid == 0)
106         {
107                 dup2(slave[i].in, fileno(stdin));
108                 dup2(slave[i].out, fileno(stdout));
109                 //dup2(error_socket[1], fileno(stderr));
110
111                 execlp(shell, shell, NULL);
112         }
113
114         // if the input is a network socket, this message gets sent to the master
115         // which will then echo it back to the socket and hence the shell
116         FILE * f = fdopen(slave[i].in, "w"); setbuf(f, NULL);
117         fprintf(f, "name=\"%s:%d\"\n", name,i);
118 }
119
120 void Slave_loop(Options * o)
121 {
122         fd_set readSet;
123         struct timeval tv;
124         tv.tv_sec = 0;
125         tv.tv_usec = 100000;
126
127         int p = -1; int s = 0;
128         char buffer[BUFSIZ];
129         while (true)
130         {
131                 FD_ZERO(&readSet);
132                 FD_SET(fileno(stdin), &readSet);
133                 p = waitpid(-1, &s, 0);
134                 if (p == -1)
135                 {
136                         //log_print(0, "Slave_loop", "waitpid : %s", strerror(errno));
137                         continue;
138                 }
139
140                 //log_print(3, "Slave_loop", "Detected child %d exiting...", p);
141
142                 // check for an exit command from the master
143                 select(fileno(stdin) + 1, &readSet, NULL, NULL, &tv);
144
145                 if (FD_ISSET(fileno(stdin), &readSet))
146                 {
147                         fgets(buffer, sizeof(buffer), stdin);
148                         if (strcmp(buffer, "exit\n") == 0)
149                         {
150                                 log_print(2, "Slave_loop", "Received notification of exit.\n");
151                                 exit(EXIT_SUCCESS);
152                         }
153                 }
154                 
155                 int i = 0;
156                 for (i = 0; i < o->nCPU; ++i)
157                 {
158                         if (slave[i].pid == p) break;
159                 }
160                 if (i >= o->nCPU)
161                         error("Slave_loop", "No child matches pid %d", p);
162
163
164                 
165                 fprintf(stderr,"Unexpected exit of slave %s:%d", name, i);
166                 if (WIFSIGNALED(s))
167                 {
168                         int sig = WTERMSIG(s);
169                         fprintf(stderr," due to %s", strsignal(sig));
170                         if (sig == SIGKILL)
171                         {
172                                 fprintf(stderr," - %s committing suicide\n", name);
173                                 kill(getpid(), sig);
174                         }
175                 }
176                 else
177                 {               
178                         fprintf(stderr," return code %d.", s);
179                 }
180                 
181
182                 // cancel any tasks at the master for this slave
183                 static int len = -1;
184                 if (len < 0)
185                         len = strlen(o->end);
186                 write(slave[i].out, o->end, len);
187
188                 Slave_shell(i, o->shell);
189
190                 
191         }
192 }
193
194 void Slave_cleanup()
195 {
196         for (int i = 0; i < options.nCPU; ++i)
197         {
198                 kill(slave[i].pid, SIGTERM);
199         }
200         sleep(1);
201         for (int i = 0; i < options.nCPU; ++i)
202         {
203                 kill(slave[i].pid, SIGKILL);
204         }
205         free(slave);
206 }
207
208

UCC git Repository :: git.ucc.asn.au