move_pages: just use calloc
[trinity.git] / child.c
blob199e3988be784dea179bf46678f29896abea225a
1 /*
2 * Each process that gets forked runs this code.
3 */
5 #include <fcntl.h>
6 #include <errno.h>
7 #include <signal.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <unistd.h>
11 #include <sched.h>
12 #include <sys/time.h>
13 #include <sys/resource.h>
14 #include <sys/prctl.h>
16 #include "arch.h"
17 #include "child.h"
18 #include "list.h"
19 #include "log.h"
20 #include "maps.h"
21 #include "params.h" // for 'debug'
22 #include "pids.h"
23 #include "random.h"
24 #include "shm.h"
25 #include "signals.h"
26 #include "syscall.h"
27 #include "tables.h"
28 #include "trinity.h" // ARRAY_SIZE
29 #include "utils.h" // zmalloc
31 static void disable_coredumps(void)
33 struct rlimit limit = { .rlim_cur = 0, .rlim_max = 0 };
35 if (debug == TRUE) {
36 (void)signal(SIGABRT, SIG_DFL);
37 (void)signal(SIGSEGV, SIG_DFL);
38 return;
41 if (setrlimit(RLIMIT_CORE, &limit) != 0)
42 perror( "setrlimit(RLIMIT_CORE)" );
44 prctl(PR_SET_DUMPABLE, FALSE);
47 static void enable_coredumps(void)
49 struct rlimit limit = {
50 .rlim_cur = RLIM_INFINITY,
51 .rlim_max = RLIM_INFINITY
54 if (debug == TRUE)
55 return;
57 prctl(PR_SET_DUMPABLE, TRUE);
59 (void) setrlimit(RLIMIT_CORE, &limit);
62 static void set_make_it_fail(void)
64 int fd;
65 const char *buf = "1";
67 /* If we failed last time, it's probably because we don't
68 * have fault-injection enabled, so don't bother trying in future.
70 if (shm->dont_make_it_fail == TRUE)
71 return;
73 fd = open("/proc/self/make-it-fail", O_WRONLY);
74 if (fd == -1) {
75 shm->dont_make_it_fail = TRUE;
76 return;
79 if (write(fd, buf, 1) == -1) {
80 if (errno != EPERM)
81 outputerr("writing to /proc/self/make-it-fail failed! (%s)\n", strerror(errno));
82 shm->dont_make_it_fail = TRUE;
85 close(fd);
89 * We call this occasionally to set some FPU state, in the hopes that we
90 * might tickle some weird FPU/scheduler related bugs
92 static void use_fpu(void)
94 double x = 0;
95 asm volatile("":"+m" (x));
96 x += 1;
97 asm volatile("":"+m" (x));
100 int this_child = 0;
102 static void setup_page_maps(void)
104 unsigned long *page;
105 unsigned int i;
107 page = (void *) page_maps;
109 for (i = 0; i < page_size / sizeof(unsigned long); i++) {
110 struct map *map;
112 map = get_map();
113 page[i] = (unsigned long) map->ptr;
117 static void oom_score_adj(int adj)
119 FILE *fp;
121 fp = fopen("/proc/self/oom_score_adj", "w");
122 if (!fp)
123 return;
125 fprintf(fp, "%d", adj);
126 fclose(fp);
129 void init_child(int childno)
131 cpu_set_t set;
132 pid_t pid = getpid();
133 char childname[17];
135 this_child = childno;
137 set_seed(childno);
139 shm->kill_count[childno] = 0;
141 shm->num_mappings[childno] = 0;
142 shm->mappings[childno] = zmalloc(sizeof(struct map));
143 INIT_LIST_HEAD(&shm->mappings[childno]->list);
145 setup_page_maps();
147 if (sched_getaffinity(pid, sizeof(set), &set) == 0) {
148 CPU_ZERO(&set);
149 CPU_SET(childno, &set);
150 sched_setaffinity(pid, sizeof(set), &set);
153 shm->child_op_count[childno] = 0;
155 memset(childname, 0, sizeof(childname));
156 sprintf(childname, "trinity-c%d", childno);
157 prctl(PR_SET_NAME, (unsigned long) &childname);
159 oom_score_adj(500);
161 /* Wait for parent to set our pidslot */
162 while (shm->pids[childno] != getpid()) {
163 int ret = 0;
165 /* Make sure parent is actually alive to wait for us. */
166 ret = pid_alive(shm->mainpid);
167 if (ret != 0) {
168 shm->exit_reason = EXIT_SHM_CORRUPTION;
169 outputerr(BUGTXT "parent (%d) went away!\n", shm->mainpid);
170 sleep(20000);
174 /* Wait for all the children to start up. */
175 while (shm->ready == FALSE)
176 sleep(1);
178 set_make_it_fail();
180 if (rand() % 100 < 50)
181 use_fpu();
183 mask_signals_child();
185 disable_coredumps();
188 static void check_parent_pid(void)
190 pid_t pid;
191 unsigned int i;
192 static unsigned int parent_check_time = 10;
194 parent_check_time--;
195 if (parent_check_time != 0)
196 return;
198 parent_check_time = 10;
200 if (getppid() == shm->mainpid)
201 return;
203 pid = getpid();
205 //FIXME: Add locking so only one child does this output.
206 output(0, BUGTXT "CHILD (pid:%d) GOT REPARENTED! "
207 "parent pid:%d. Watchdog pid:%d\n",
208 pid, shm->mainpid, watchdog_pid);
209 output(0, BUGTXT "Last syscalls:\n");
211 for_each_pidslot(i) {
212 // Skip over 'boring' entries.
213 if ((shm->pids[i] == EMPTY_PIDSLOT) &&
214 (shm->previous[i].nr == 0) &&
215 (shm->child_op_count[i] == 0))
216 continue;
218 output(0, "[%d] pid:%d call:%s callno:%d\n",
219 i, shm->pids[i],
220 print_syscall_name(shm->previous[i].nr, shm->previous[i].do32bit),
221 shm->child_op_count[i]);
223 shm->exit_reason = EXIT_REPARENT_PROBLEM;
224 exit(EXIT_FAILURE);
225 //TODO: Emergency logging.
228 struct child_funcs {
229 const char *name;
230 bool (*func)(int childno);
231 unsigned char likelyhood;
234 static const struct child_funcs child_ops[] = {
235 { .name = "rand_syscalls", .func = child_random_syscalls, .likelyhood = 100 },
239 // FIXME: when we have different child ops, we're going to need to redo the progress detector.
240 static unsigned int handle_sigreturn(int childno)
242 static unsigned int count = 0;
243 static unsigned int last = -1;
245 output(2, "<timed out>\n"); /* Flush out the previous syscall output. */
247 /* Check if we're making any progress at all. */
248 if (shm->child_op_count[childno] == last) {
249 count++;
250 //output(1, "no progress for %d tries.\n", count);
251 } else {
252 count = 0;
253 last = shm->child_op_count[childno];
255 if (count == 3) {
256 output(1, "no progress for 3 tries, exiting child.\n");
257 return 0;
260 if (shm->kill_count[childno] > 0) {
261 output(1, "[%d] Missed a kill signal, exiting\n", getpid());
262 return 0;
265 if (sigwas != SIGALRM)
266 output(1, "[%d] Back from signal handler! (sig was %s)\n", getpid(), strsignal(sigwas));
268 return 1;
271 void child_process(int childno)
273 int ret;
275 ret = sigsetjmp(ret_jump, 1);
276 if (ret != 0) {
277 if (handle_sigreturn(childno) == 0)
278 return; // Exit the child, things are getting too weird.
281 while (shm->exit_reason == STILL_RUNNING) {
282 const char *lastop = NULL;
283 unsigned int i;
285 check_parent_pid();
287 while (shm->regenerating == TRUE)
288 sleep(1);
290 /* If the parent reseeded, we should reflect the latest seed too. */
291 if (shm->seed != shm->seeds[childno])
292 set_seed(childno);
294 /* Choose operations for this iteration. */
295 i = rand() % ARRAY_SIZE(child_ops);
297 if (rand() % 100 <= child_ops[i].likelyhood) {
298 if (lastop != child_ops[i].name) {
299 output(0, "Chose %s.\n", child_ops[i].name);
300 lastop = child_ops[i].name;
303 ret = child_ops[i].func(childno);
304 if (ret == FAIL)
305 return;
309 enable_coredumps();