Did some stuff
[matches/swarm.git] / src / slave.c
1 #define _XOPEN_SOURCE 700
2 #define _GNU_SOURCE
3
4
5
6 #include "slave.h"
7 #include <assert.h>
8
9 #include "network.h"
10 #include "daemon.h"
11 #include "log.h"
12 #include <errno.h>
13 #include <pty.h>
14 #include <fcntl.h>
15 #include <string.h>
16
17 #include <pthread.h>
18 #include <syslog.h>
19 #include <signal.h>
20 #include <unistd.h>
21 #include <sys/types.h>
22 #include <sys/wait.h>
23 #include <sys/socket.h>
24 #include <netinet/tcp.h>
25
26
27
28 Slave * slave;
29
30 char name[BUFSIZ];
31
32 void Slave_shell(int i, char * shell);
33 void Slave_cleanup();
34
35
36 void Slave_main(Options * o)
37 {
38         //fprintf(stderr, "%d\n", o->nCPU);
39
40
41         
42         setbuf(stdin, NULL); setbuf(stdout, NULL); setbuf(stderr, NULL);
43
44         dup2(fileno(stdout), fileno(stderr)); // yes, this works, apparently
45
46         slave = (Slave*)(calloc(o->nCPU, sizeof(Slave)));       
47         atexit(Slave_cleanup);
48
49
50         if (strcmp(o->master_addr, "-") != 0)
51         {
52                 if (fork() != 0)
53                         exit(EXIT_SUCCESS);
54
55                 //log_print(2, "Slave_main", "Using unsecured networking; connect to %s:%d", o->master_addr, o->port);
56                 //log_print(2, "Slave_main", "Connecting to %s:%d", o->master_addr, o->port);
57                 int net_fd = Network_client(o->master_addr, o->port, 100);
58                 dup2(net_fd, fileno(stdin));
59                 dup2(net_fd, fileno(stdout));
60                 dup2(net_fd, fileno(stderr));
61                 
62         }
63         else
64         {
65                 o->master_addr = "localhost";
66                 //log_print(2, "Slave_main", "Using port forwarding; connect to %s", o->master_addr);
67         }
68
69         char buffer[BUFSIZ];
70
71         fgets(name, sizeof(name), stdin);
72         name[strlen(name)-1] = '\0';
73         //log_print(LOGINFO, "Slave_main", "Started remote swarm \"%s\"", name);
74
75
76         fprintf(stdout, "%d\n", o->nCPU);
77         //log_print(2, "Slave_main", "Wrote nCPU %d", o->nCPU);
78         
79
80         int port = 0;
81         for (int i = 0; i < o->nCPU; ++i)
82         {
83                 //log_print(2, "Slave_main", "Waiting for port number...");
84                 fgets(buffer, sizeof(buffer), stdin);
85                 
86                 buffer[strlen(buffer)-1] = '\0';
87                 sscanf(buffer, "%d", &port);    
88                 //log_print(2, "Slave_main", "Port number %d", port);
89                 slave[i].in = Network_client(o->master_addr, port,20);
90                 //log_print(2, "Slave_main", "Connected to %s:%d\n", o->master_addr, port);
91                 slave[i].out = slave[i].in;
92
93                 Slave_shell(i, o->shell);
94         }
95         
96
97         Slave_loop(o);
98
99         exit(EXIT_SUCCESS);
100 }
101
102 void Slave_shell(int i, char * shell)
103 {
104         slave[i].pid = fork();
105
106
107
108         if (slave[i].pid == 0)
109         {
110                 dup2(slave[i].in, fileno(stdin));
111                 dup2(slave[i].out, fileno(stdout));
112                 //dup2(error_socket[1], fileno(stderr));
113
114                 execlp(shell, shell, NULL);
115         }
116
117         // if the input is a network socket, this message gets sent to the master
118         // which will then echo it back to the socket and hence the shell
119         FILE * f = fdopen(slave[i].in, "w"); setbuf(f, NULL);
120         fprintf(f, "name=\"%s:%d\"\n", name,i);
121 }
122
123 void Slave_loop(Options * o)
124 {
125         fd_set readSet;
126         struct timeval tv;
127         tv.tv_sec = 0;
128         tv.tv_usec = 100000;
129
130         int p = -1; int s = 0;
131         char buffer[BUFSIZ];
132         while (true)
133         {
134                 FD_ZERO(&readSet);
135                 FD_SET(fileno(stdin), &readSet);
136                 p = waitpid(-1, &s, 0);
137                 if (p == -1)
138                 {
139                         //log_print(0, "Slave_loop", "waitpid : %s", strerror(errno));
140                         continue;
141                 }
142
143                 //log_print(3, "Slave_loop", "Detected child %d exiting...", p);
144
145                 // check for an exit command from the master
146                 select(fileno(stdin) + 1, &readSet, NULL, NULL, &tv);
147
148                 if (FD_ISSET(fileno(stdin), &readSet))
149                 {
150                         fgets(buffer, sizeof(buffer), stdin);
151                         if (strcmp(buffer, "exit\n") == 0)
152                         {
153                                 log_print(2, "Slave_loop", "Received notification of exit.\n");
154                                 exit(EXIT_SUCCESS);
155                         }
156                 }
157                 
158                 int i = 0;
159                 for (i = 0; i < o->nCPU; ++i)
160                 {
161                         if (slave[i].pid == p) break;
162                 }
163                 if (i >= o->nCPU)
164                         error("Slave_loop", "No child matches pid %d", p);
165
166                 sigchld_respond(s, name, i);
167                 
168
169                 // cancel any tasks at the master for this slave
170                 
171                 write(slave[i].out, SHELL_OUTPUT_FINISHED, SHELL_OUTPUT_FINISHED_LENGTH);
172
173                 Slave_shell(i, o->shell);
174
175                 
176         }
177 }
178
179 void Slave_cleanup()
180 {
181         for (int i = 0; i < options.nCPU; ++i)
182         {
183                 kill(slave[i].pid, SIGTERM);
184         }
185         sleep(1);
186         for (int i = 0; i < options.nCPU; ++i)
187         {
188                 kill(slave[i].pid, SIGKILL);
189         }
190         free(slave);
191 }
192
193

UCC git Repository :: git.ucc.asn.au