add to, and prioritize the TODO a little.
[trinity.git] / child.c
blob9d0a60f9ad52c4da4a1c43a80c8113ff5d36ab79
1 /*
2 * Each process that gets forked runs this code.
3 */
5 #include <fcntl.h>
6 #include <errno.h>
7 #include <signal.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <unistd.h>
11 #include <sched.h>
12 #include <sys/time.h>
13 #include <sys/resource.h>
14 #include <sys/prctl.h>
16 #include "arch.h"
17 #include "child.h"
18 #include "list.h"
19 #include "log.h"
20 #include "maps.h"
21 #include "params.h" // for 'debug'
22 #include "pids.h"
23 #include "random.h"
24 #include "shm.h"
25 #include "signals.h"
26 #include "syscall.h"
27 #include "tables.h"
28 #include "trinity.h" // ARRAY_SIZE
29 #include "utils.h" // zmalloc
31 static void disable_coredumps(void)
33 struct rlimit limit = { .rlim_cur = 0, .rlim_max = 0 };
35 if (debug == TRUE) {
36 (void)signal(SIGABRT, SIG_DFL);
37 (void)signal(SIGSEGV, SIG_DFL);
38 return;
41 if (setrlimit(RLIMIT_CORE, &limit) != 0)
42 perror( "setrlimit(RLIMIT_CORE)" );
44 prctl(PR_SET_DUMPABLE, FALSE);
47 static void enable_coredumps(void)
49 struct rlimit limit = {
50 .rlim_cur = RLIM_INFINITY,
51 .rlim_max = RLIM_INFINITY
54 if (debug == TRUE)
55 return;
57 prctl(PR_SET_DUMPABLE, TRUE);
59 (void) setrlimit(RLIMIT_CORE, &limit);
61 static void set_make_it_fail(void)
63 int fd;
64 const char *buf = "1";
66 /* If we failed last time, don't bother trying in future. */
67 if (shm->do_make_it_fail == TRUE)
68 return;
70 fd = open("/proc/self/make-it-fail", O_WRONLY);
71 if (fd == -1)
72 return;
74 if (write(fd, buf, 1) == -1) {
75 if (errno != EPERM)
76 outputerr("writing to /proc/self/make-it-fail failed! (%s)\n", strerror(errno));
77 else
78 shm->do_make_it_fail = TRUE;
80 close(fd);
84 * We call this occasionally to set some FPU state, in the hopes that we
85 * might tickle some weird FPU/scheduler related bugs
87 static void use_fpu(void)
89 double x = 0;
90 asm volatile("":"+m" (x));
91 x += 1;
92 asm volatile("":"+m" (x));
95 int this_child = 0;
97 static void setup_page_maps(void)
99 unsigned long *page;
100 unsigned int i;
102 page = (void *) page_maps;
104 for (i = 0; i < page_size / sizeof(unsigned long); i++) {
105 struct map *map;
107 map = get_map();
108 page[i] = (unsigned long) map->ptr;
112 static void oom_score_adj(int adj)
114 FILE *fp;
116 fp = fopen("/proc/self/oom_score_adj", "w");
117 if (!fp)
118 return;
120 fprintf(fp, "%d", adj);
121 fclose(fp);
124 void init_child(int childno)
126 cpu_set_t set;
127 pid_t pid = getpid();
128 char childname[17];
130 this_child = childno;
132 set_seed(childno);
134 shm->kill_count[childno] = 0;
136 shm->num_mappings[childno] = 0;
137 shm->mappings[childno] = zmalloc(sizeof(struct map));
138 INIT_LIST_HEAD(&shm->mappings[childno]->list);
140 setup_page_maps();
142 if (sched_getaffinity(pid, sizeof(set), &set) == 0) {
143 CPU_ZERO(&set);
144 CPU_SET(childno, &set);
145 sched_setaffinity(pid, sizeof(set), &set);
148 shm->child_syscall_count[childno] = 0;
150 memset(childname, 0, sizeof(childname));
151 sprintf(childname, "trinity-c%d", childno);
152 prctl(PR_SET_NAME, (unsigned long) &childname);
154 oom_score_adj(500);
156 /* Wait for parent to set our pidslot */
157 while (shm->pids[childno] != getpid()) {
158 int ret = 0;
160 /* Make sure parent is actually alive to wait for us. */
161 ret = pid_alive(shm->mainpid);
162 if (ret != 0) {
163 shm->exit_reason = EXIT_SHM_CORRUPTION;
164 outputerr(BUGTXT "parent (%d) went away!\n", shm->mainpid);
165 sleep(20000);
169 /* Wait for all the children to start up. */
170 while (shm->ready == FALSE)
171 sleep(1);
173 set_make_it_fail();
175 if (rand() % 100 < 50)
176 use_fpu();
178 mask_signals_child();
180 disable_coredumps();
183 static void check_parent_pid(void)
185 pid_t pid;
186 unsigned int i;
187 static unsigned int parent_check_time = 10;
189 parent_check_time--;
190 if (parent_check_time != 0)
191 return;
193 parent_check_time = 10;
195 if (getppid() == shm->mainpid)
196 return;
198 pid = getpid();
200 //FIXME: Add locking so only one child does this output.
201 output(0, BUGTXT "CHILD (pid:%d) GOT REPARENTED! "
202 "parent pid:%d. Watchdog pid:%d\n",
203 pid, shm->mainpid, watchdog_pid);
204 output(0, BUGTXT "Last syscalls:\n");
206 for_each_pidslot(i) {
207 // Skip over 'boring' entries.
208 if ((shm->pids[i] == EMPTY_PIDSLOT) &&
209 (shm->previous[i].nr == 0) &&
210 (shm->child_syscall_count[i] == 0))
211 continue;
213 output(0, "[%d] pid:%d call:%s callno:%d\n",
214 i, shm->pids[i],
215 print_syscall_name(shm->previous[i].nr, shm->previous[i].do32bit),
216 shm->child_syscall_count[i]);
218 shm->exit_reason = EXIT_REPARENT_PROBLEM;
219 exit(EXIT_FAILURE);
220 //TODO: Emergency logging.
223 struct child_funcs {
224 const char *name;
225 bool (*func)(int childno);
226 unsigned char likelyhood;
229 static const struct child_funcs child_ops[] = {
230 { .name = "rand_syscalls", .func = child_random_syscalls, .likelyhood = 100 },
234 // FIXME: when we have different child ops, we're going to need to redo the progress detector.
235 static unsigned int handle_sigreturn(int childno)
237 static unsigned int count = 0;
238 static unsigned int last = -1;
240 output(2, "<timed out>\n"); /* Flush out the previous syscall output. */
242 /* Check if we're making any progress at all. */
243 if (shm->child_syscall_count[childno] == last) {
244 count++;
245 //output(1, "no progress for %d tries.\n", count);
246 } else {
247 count = 0;
248 last = shm->child_syscall_count[childno];
250 if (count == 3) {
251 output(1, "no progress for 3 tries, exiting child.\n");
252 return 0;
255 if (shm->kill_count[childno] > 0) {
256 output(1, "[%d] Missed a kill signal, exiting\n", getpid());
257 return 0;
260 if (sigwas != SIGALRM)
261 output(1, "[%d] Back from signal handler! (sig was %s)\n", getpid(), strsignal(sigwas));
263 return 1;
266 void child_process(int childno)
268 int ret;
269 const char *lastop = NULL;
271 ret = sigsetjmp(ret_jump, 1);
272 if (ret != 0) {
273 if (handle_sigreturn(childno) == 0)
274 return; // Exit the child, things are getting too weird.
277 while (shm->exit_reason == STILL_RUNNING) {
278 unsigned int i;
280 check_parent_pid();
282 while (shm->regenerating == TRUE)
283 sleep(1);
285 /* If the parent reseeded, we should reflect the latest seed too. */
286 if (shm->seed != shm->seeds[childno])
287 set_seed(childno);
289 /* Choose operations for this iteration. */
290 i = rand() % ARRAY_SIZE(child_ops);
292 if (rand() % 100 <= child_ops[i].likelyhood) {
293 if (lastop != child_ops[i].name) {
294 output(0, "Chose %s.\n", child_ops[i].name);
295 lastop = child_ops[i].name;
298 ret = child_ops[i].func(childno);
299 if (ret == FAIL)
300 return;
304 enable_coredumps();