[sanitizer] fix epoch handling in deadlock detector (before the fix, we could have...
[blocksruntime.git] / lib / msandr / msandr.cc
blob5159ddbddced54a3b0a6fec9d6591c842bec0004
1 //===-- msandr.cc ---------------------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is a part of MemorySanitizer.
12 // DynamoRio client for MemorySanitizer.
14 // MemorySanitizer requires that all program code is instrumented. Any memory
15 // store that can turn an uninitialized value into an initialized value must be
16 // observed by the tool, otherwise we risk reporting a false UMR.
18 // This also includes any libraries that the program depends on.
20 // In the case when rebuilding all program dependencies with MemorySanitizer is
21 // problematic, an experimental MSanDR tool (the code you are currently looking
22 // at) can be used. It is a DynamoRio-based tool that uses dynamic
23 // instrumentation to
24 // * Unpoison all memory stores.
25 // * Unpoison TLS slots used by MemorySanitizer to pass function arguments and
26 // return value shadow on anything that looks like a function call or a return
27 // from a function.
29 // This tool does not detect the use of uninitialized values in uninstrumented
30 // libraries. It merely gets rid of false positives by marking all data that
31 // passes through uninstrumented code as fully initialized.
32 //===----------------------------------------------------------------------===//
34 #include <dr_api.h>
35 #include <drutil.h>
36 #include <drmgr.h>
37 #include <drsyscall.h>
39 #include <sys/mman.h>
40 #include <sys/syscall.h> /* for SYS_mmap */
42 #include <string.h>
44 // XXX: it seems setting macro in CMakeLists.txt does not work,
45 // so manually set it here now.
47 // Building msandr client for running in DynamoRIO hybrid mode,
48 // which allows some module running natively.
49 // TODO: turn it on by default when hybrid is stable enough
50 // #define MSANDR_NATIVE_EXEC
52 #ifndef MSANDR_NATIVE_EXEC
53 #include <algorithm>
54 #include <set>
55 #include <string>
56 #include <vector>
57 #endif
59 #define TESTALL(mask, var) (((mask) & (var)) == (mask))
60 #define TESTANY(mask, var) (((mask) & (var)) != 0)
62 #define CHECK_IMPL(condition, file, line) \
63 do { \
64 if (!(condition)) { \
65 dr_printf("Check failed: `%s`\nat %s:%d\n", #condition, file, line); \
66 dr_abort(); \
67 } \
68 } while (0) // TODO: stacktrace
70 #define CHECK(condition) CHECK_IMPL(condition, __FILE__, __LINE__)
72 #define VERBOSITY 0
74 // Building msandr client for standalone test that does not need to
75 // run with msan build executables. Disable by default.
76 // #define MSANDR_STANDALONE_TEST
78 #define NUM_TLS_RETVAL 1
79 #define NUM_TLS_PARAM 6
81 #ifdef MSANDR_STANDALONE_TEST
82 // For testing purpose, we map app to shadow memory at [0x100000, 0x20000).
83 // Normally, the app starts at 0x400000:
84 // 00400000-004e0000 r-xp 00000000 fc:00 524343 /bin/bash
85 // so there should be no problem.
86 # define SHADOW_MEMORY_BASE ((void *)0x100000)
87 # define SHADOW_MEMORY_SIZE (0x100000)
88 # define SHADOW_MEMORY_MASK (SHADOW_MEMORY_SIZE - 4 /* to avoid overflow */)
89 #else
90 // shadow memory range [0x200000000000, 0x400000000000)
91 // assuming no app memory below 0x200000000000
92 # define SHADOW_MEMORY_MASK 0x3fffffffffffULL
93 #endif /* MSANDR_STANDALONE_TEST */
95 typedef void *(*WrapperFn)(void *);
96 extern "C" void __msan_set_indirect_call_wrapper(WrapperFn wrapper);
97 extern "C" void __msan_dr_is_initialized();
99 namespace {
101 int msan_retval_tls_offset;
102 int msan_param_tls_offset;
104 #ifndef MSANDR_NATIVE_EXEC
105 class ModuleData {
106 public:
107 ModuleData();
108 ModuleData(const module_data_t *info);
109 // Yes, we want default copy, assign, and dtor semantics.
111 public:
112 app_pc start_;
113 app_pc end_;
114 // Full path to the module.
115 std::string path_;
116 module_handle_t handle_;
117 bool should_instrument_;
118 bool executed_;
121 // A vector of loaded modules sorted by module bounds. We lookup the current PC
122 // in here from the bb event. This is better than an rb tree because the lookup
123 // is faster and the bb event occurs far more than the module load event.
124 std::vector<ModuleData> g_module_list;
126 ModuleData::ModuleData()
127 : start_(NULL), end_(NULL), path_(""), handle_(NULL),
128 should_instrument_(false), executed_(false) {
131 ModuleData::ModuleData(const module_data_t *info)
132 : start_(info->start), end_(info->end), path_(info->full_path),
133 handle_(info->handle),
134 // We'll check the black/white lists later and adjust this.
135 should_instrument_(true), executed_(false) {
137 #endif /* !MSANDR_NATIVE_EXEC */
139 int(*__msan_get_retval_tls_offset)();
140 int(*__msan_get_param_tls_offset)();
141 void (*__msan_unpoison)(void *base, size_t size);
142 bool (*__msan_is_in_loader)();
144 #ifdef MSANDR_STANDALONE_TEST
145 uint mock_msan_retval_tls_offset;
146 uint mock_msan_param_tls_offset;
147 static int mock_msan_get_retval_tls_offset() {
148 return (int)mock_msan_retval_tls_offset;
151 static int mock_msan_get_param_tls_offset() {
152 return (int)mock_msan_param_tls_offset;
155 static void mock_msan_unpoison(void *base, size_t size) {
156 /* do nothing */
159 static bool mock_msan_is_in_loader() {
160 return false;
162 #endif /* MSANDR_STANDALONE_TEST */
164 static generic_func_t LookupCallback(module_data_t *app, const char *name) {
165 #ifdef MSANDR_STANDALONE_TEST
166 if (strcmp("__msan_get_retval_tls_offset", name) == 0) {
167 return (generic_func_t)mock_msan_get_retval_tls_offset;
168 } else if (strcmp("__msan_get_param_tls_offset", name) == 0) {
169 return (generic_func_t)mock_msan_get_param_tls_offset;
170 } else if (strcmp("__msan_unpoison", name) == 0) {
171 return (generic_func_t)mock_msan_unpoison;
172 } else if (strcmp("__msan_is_in_loader", name) == 0) {
173 return (generic_func_t)mock_msan_is_in_loader;
175 CHECK(false);
176 return NULL;
177 #else /* !MSANDR_STANDALONE_TEST */
178 generic_func_t callback = dr_get_proc_address(app->handle, name);
179 if (callback == NULL) {
180 dr_printf("Couldn't find `%s` in %s\n", name, app->full_path);
181 CHECK(callback);
183 return callback;
184 #endif /* !MSANDR_STANDALONE_TEST */
187 void InitializeMSanCallbacks() {
188 module_data_t *app = dr_lookup_module_by_name(dr_get_application_name());
189 if (!app) {
190 dr_printf("%s - oops, dr_lookup_module_by_name failed!\n",
191 dr_get_application_name());
192 CHECK(app);
195 __msan_get_retval_tls_offset = (int (*)())
196 LookupCallback(app, "__msan_get_retval_tls_offset");
197 __msan_get_param_tls_offset = (int (*)())
198 LookupCallback(app, "__msan_get_param_tls_offset");
199 __msan_unpoison = (void(*)(void *, size_t))
200 LookupCallback(app, "__msan_unpoison");
201 __msan_is_in_loader = (bool (*)())
202 LookupCallback(app, "__msan_is_in_loader");
204 dr_free_module_data(app);
207 // FIXME: Handle absolute addresses and PC-relative addresses.
208 // FIXME: Handle TLS accesses via FS or GS. DR assumes all other segments have
209 // a zero base anyway.
210 bool OperandIsInteresting(opnd_t opnd) {
211 return (opnd_is_base_disp(opnd) && opnd_get_segment(opnd) != DR_SEG_FS &&
212 opnd_get_segment(opnd) != DR_SEG_GS);
215 bool WantToInstrument(instr_t *instr) {
216 // TODO: skip push instructions?
217 switch (instr_get_opcode(instr)) {
218 // FIXME: support the instructions excluded below:
219 case OP_rep_cmps:
220 // f3 a6 rep cmps %ds:(%rsi) %es:(%rdi) %rsi %rdi %rcx -> %rsi %rdi %rcx
221 return false;
224 // Labels appear due to drutil_expand_rep_string()
225 if (instr_is_label(instr))
226 return false;
228 CHECK(instr_ok_to_mangle(instr) == true);
230 if (instr_writes_memory(instr)) {
231 for (int d = 0; d < instr_num_dsts(instr); d++) {
232 opnd_t op = instr_get_dst(instr, d);
233 if (OperandIsInteresting(op))
234 return true;
238 return false;
241 #define PRE(at, what) instrlist_meta_preinsert(bb, at, INSTR_CREATE_##what);
242 #define PREF(at, what) instrlist_meta_preinsert(bb, at, what);
244 void InstrumentMops(void *drcontext, instrlist_t *bb, instr_t *instr, opnd_t op,
245 bool is_write) {
246 bool need_to_restore_eflags = false;
247 uint flags = instr_get_arith_flags(instr);
248 // TODO: do something smarter with flags and spills in general?
249 // For example, spill them only once for a sequence of instrumented
250 // instructions that don't change/read flags.
252 if (!TESTALL(EFLAGS_WRITE_6, flags) || TESTANY(EFLAGS_READ_6, flags)) {
253 if (VERBOSITY > 1)
254 dr_printf("Spilling eflags...\n");
255 need_to_restore_eflags = true;
256 // TODO: Maybe sometimes don't need to 'seto'.
257 // TODO: Maybe sometimes don't want to spill XAX here?
258 // TODO: No need to spill XAX here if XAX is not used in the BB.
259 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
260 dr_save_arith_flags_to_xax(drcontext, bb, instr);
261 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3);
262 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
265 #if 0
266 dr_printf("==DRMSAN== DEBUG: %d %d %d %d %d %d\n",
267 opnd_is_memory_reference(op), opnd_is_base_disp(op),
268 opnd_is_base_disp(op) ? opnd_get_index(op) : -1,
269 opnd_is_far_memory_reference(op), opnd_is_reg_pointer_sized(op),
270 opnd_is_base_disp(op) ? opnd_get_disp(op) : -1);
271 #endif
273 reg_id_t R1;
274 bool address_in_R1 = false;
275 if (opnd_is_base_disp(op) && opnd_get_index(op) == DR_REG_NULL &&
276 opnd_get_disp(op) == 0) {
277 // If this is a simple access with no offset or index, we can just use the
278 // base for R1.
279 address_in_R1 = true;
280 R1 = opnd_get_base(op);
281 } else {
282 // Otherwise, we need to compute the addr into R1.
283 // TODO: reuse some spare register? e.g. r15 on x64
284 // TODO: might be used as a non-mem-ref register?
285 R1 = DR_REG_XAX;
287 CHECK(reg_is_pointer_sized(R1)); // otherwise R2 may be wrong.
289 // Pick R2 from R8 to R15.
290 // It's OK if the instr uses R2 elsewhere, since we'll restore it before instr.
291 reg_id_t R2;
292 for (R2 = DR_REG_R8; R2 <= DR_REG_R15; R2++) {
293 if (!opnd_uses_reg(op, R2))
294 break;
296 CHECK((R2 <= DR_REG_R15) && R1 != R2);
298 // Save the current values of R1 and R2.
299 dr_save_reg(drcontext, bb, instr, R1, SPILL_SLOT_1);
300 // TODO: Something smarter than spilling a "fixed" register R2?
301 dr_save_reg(drcontext, bb, instr, R2, SPILL_SLOT_2);
303 if (!address_in_R1)
304 CHECK(drutil_insert_get_mem_addr(drcontext, bb, instr, op, R1, R2));
305 PRE(instr, mov_imm(drcontext, opnd_create_reg(R2),
306 OPND_CREATE_INT64(SHADOW_MEMORY_MASK)));
307 PRE(instr, and(drcontext, opnd_create_reg(R1), opnd_create_reg(R2)));
308 #ifdef MSANDR_STANDALONE_TEST
309 PRE(instr, add(drcontext, opnd_create_reg(R1),
310 OPND_CREATE_INT32(SHADOW_MEMORY_BASE)));
311 #endif
312 // There is no mov_st of a 64-bit immediate, so...
313 opnd_size_t op_size = opnd_get_size(op);
314 CHECK(op_size != OPSZ_NA);
315 uint access_size = opnd_size_in_bytes(op_size);
316 if (access_size <= 4 || op_size == OPSZ_PTR /* x64 support sign extension */) {
317 instr_t *label = INSTR_CREATE_label(drcontext);
318 opnd_t immed;
319 if (op_size == OPSZ_PTR || op_size == OPSZ_4)
320 immed = OPND_CREATE_INT32(0);
321 else
322 immed = opnd_create_immed_int((ptr_int_t) 0, op_size);
323 // we check if target is 0 before write to reduce unnecessary memory stores.
324 PRE(instr, cmp(drcontext,
325 opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size),
326 immed));
327 PRE(instr, jcc(drcontext, OP_je, opnd_create_instr(label)));
328 PRE(instr, mov_st(drcontext,
329 opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size),
330 immed));
331 PREF(instr, label);
332 } else {
333 // FIXME: tail?
334 for (uint ofs = 0; ofs < access_size; ofs += 4) {
335 instr_t *label = INSTR_CREATE_label(drcontext);
336 opnd_t immed = OPND_CREATE_INT32(0);
337 PRE(instr, cmp(drcontext, OPND_CREATE_MEM32(R1, ofs), immed));
338 PRE(instr, jcc(drcontext, OP_je, opnd_create_instr(label)));
339 PRE(instr, mov_st(drcontext, OPND_CREATE_MEM32(R1, ofs), immed));
340 PREF(instr, label)
344 // Restore the registers and flags.
345 dr_restore_reg(drcontext, bb, instr, R1, SPILL_SLOT_1);
346 dr_restore_reg(drcontext, bb, instr, R2, SPILL_SLOT_2);
348 // TODO: move aflags save/restore to per instr instead of per opnd
349 if (need_to_restore_eflags) {
350 if (VERBOSITY > 1)
351 dr_printf("Restoring eflags\n");
352 // TODO: Check if it's reverse to the dr_restore_reg above and optimize.
353 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
354 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3);
355 dr_restore_arith_flags_from_xax(drcontext, bb, instr);
356 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
359 // The original instruction is left untouched. The above instrumentation is just
360 // a prefix.
363 void InstrumentReturn(void *drcontext, instrlist_t *bb, instr_t *instr) {
364 #ifdef MSANDR_STANDALONE_TEST
365 PRE(instr,
366 mov_st(drcontext,
367 opnd_create_far_base_disp(DR_SEG_GS /* DR's TLS */,
368 DR_REG_NULL, DR_REG_NULL,
369 0, msan_retval_tls_offset,
370 OPSZ_PTR),
371 OPND_CREATE_INT32(0)));
372 #else /* !MSANDR_STANDALONE_TEST */
373 # ifdef MSANDR_NATIVE_EXEC
374 /* For optimized native exec, -mangle_app_seg and -private_loader are turned off,
375 * so we can reference msan_retval_tls_offset directly.
377 PRE(instr,
378 mov_st(drcontext,
379 opnd_create_far_base_disp(DR_SEG_FS, DR_REG_NULL, DR_REG_NULL, 0,
380 msan_retval_tls_offset, OPSZ_PTR),
381 OPND_CREATE_INT32(0)));
382 # else /* !MSANDR_NATIVE_EXEC */
383 /* XXX: the code below only works if -mangle_app_seg and -private_loader,
384 * which is turned off for optimized native exec
386 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
388 // Clobbers nothing except xax.
389 bool res =
390 dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX);
391 CHECK(res);
393 // TODO: unpoison more bytes?
394 PRE(instr,
395 mov_st(drcontext, OPND_CREATE_MEM64(DR_REG_XAX, msan_retval_tls_offset),
396 OPND_CREATE_INT32(0)));
398 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
399 # endif /* !MSANDR_NATIVE_EXEC */
400 // The original instruction is left untouched. The above instrumentation is just
401 // a prefix.
402 #endif /* !MSANDR_STANDALONE_TEST */
405 void InstrumentIndirectBranch(void *drcontext, instrlist_t *bb,
406 instr_t *instr) {
407 #ifdef MSANDR_STANDALONE_TEST
408 for (int i = 0; i < NUM_TLS_PARAM; ++i) {
409 PRE(instr,
410 mov_st(drcontext,
411 opnd_create_far_base_disp(DR_SEG_GS /* DR's TLS */,
412 DR_REG_NULL, DR_REG_NULL,
414 msan_param_tls_offset +
415 i * sizeof(void *),
416 OPSZ_PTR),
417 OPND_CREATE_INT32(0)));
419 #else /* !MSANDR_STANDALONE_TEST */
420 # ifdef MSANDR_NATIVE_EXEC
421 for (int i = 0; i < NUM_TLS_PARAM; ++i) {
422 PRE(instr,
423 mov_st(drcontext,
424 opnd_create_far_base_disp(DR_SEG_FS, DR_REG_NULL, DR_REG_NULL, 0,
425 msan_param_tls_offset + i*sizeof(void*),
426 OPSZ_PTR),
427 OPND_CREATE_INT32(0)));
429 # else /* !MSANDR_NATIVE_EXEC */
430 /* XXX: the code below only works if -mangle_app_seg and -private_loader,
431 * which is turned off for optimized native exec
433 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
435 // Clobbers nothing except xax.
436 bool res =
437 dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX);
438 CHECK(res);
440 // TODO: unpoison more bytes?
441 for (int i = 0; i < NUM_TLS_PARAM; ++i) {
442 PRE(instr,
443 mov_st(drcontext, OPND_CREATE_MEMPTR(DR_REG_XAX, msan_param_tls_offset +
444 i * sizeof(void *)),
445 OPND_CREATE_INT32(0)));
448 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
449 # endif /* !MSANDR_NATIVE_EXEC */
450 // The original instruction is left untouched. The above instrumentation is just
451 // a prefix.
452 #endif /* !MSANDR_STANDALONE_TEST */
455 #ifndef MSANDR_NATIVE_EXEC
456 // For use with binary search. Modules shouldn't overlap, so we shouldn't have
457 // to look at end_. If that can happen, we won't support such an application.
458 bool ModuleDataCompareStart(const ModuleData &left, const ModuleData &right) {
459 return left.start_ < right.start_;
462 // Look up the module containing PC. Should be relatively fast, as its called
463 // for each bb instrumentation.
464 ModuleData *LookupModuleByPC(app_pc pc) {
465 ModuleData fake_mod_data;
466 fake_mod_data.start_ = pc;
467 std::vector<ModuleData>::iterator it =
468 lower_bound(g_module_list.begin(), g_module_list.end(), fake_mod_data,
469 ModuleDataCompareStart);
470 // if (it == g_module_list.end())
471 // return NULL;
472 if (it == g_module_list.end() || pc < it->start_)
473 --it;
474 CHECK(it->start_ <= pc);
475 if (pc >= it->end_) {
476 // We're past the end of this module. We shouldn't be in the next module,
477 // or lower_bound lied to us.
478 ++it;
479 CHECK(it == g_module_list.end() || pc < it->start_);
480 return NULL;
483 // OK, we found the module.
484 return &*it;
487 bool ShouldInstrumentNonModuleCode() { return true; }
489 bool ShouldInstrumentModule(ModuleData *mod_data) {
490 // TODO(rnk): Flags for blacklist would get wired in here.
491 generic_func_t p =
492 dr_get_proc_address(mod_data->handle_, "__msan_track_origins");
493 return !p;
496 bool ShouldInstrumentPc(app_pc pc, ModuleData **pmod_data) {
497 ModuleData *mod_data = LookupModuleByPC(pc);
498 if (pmod_data)
499 *pmod_data = mod_data;
500 if (mod_data != NULL) {
501 // This module is on a blacklist.
502 if (!mod_data->should_instrument_) {
503 return false;
505 } else if (!ShouldInstrumentNonModuleCode()) {
506 return false;
508 return true;
510 #endif /* !MSANDR_NATIVE_CLIENT */
512 // TODO(rnk): Make sure we instrument after __msan_init.
513 dr_emit_flags_t
514 event_basic_block_app2app(void *drcontext, void *tag, instrlist_t *bb,
515 bool for_trace, bool translating) {
516 #ifndef MSANDR_NATIVE_EXEC
517 app_pc pc = dr_fragment_app_pc(tag);
518 if (ShouldInstrumentPc(pc, NULL))
519 CHECK(drutil_expand_rep_string(drcontext, bb));
520 #else /* MSANDR_NATIVE_EXEC */
521 CHECK(drutil_expand_rep_string(drcontext, bb));
522 #endif /* MSANDR_NATIVE_EXEC */
523 return DR_EMIT_PERSISTABLE;
526 dr_emit_flags_t event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
527 bool for_trace, bool translating) {
528 app_pc pc = dr_fragment_app_pc(tag);
529 #ifndef MSANDR_NATIVE_EXEC
530 ModuleData *mod_data;
532 if (!ShouldInstrumentPc(pc, &mod_data))
533 return DR_EMIT_PERSISTABLE;
535 if (VERBOSITY > 1)
536 dr_printf("============================================================\n");
537 if (VERBOSITY > 0) {
538 std::string mod_path = (mod_data ? mod_data->path_ : "<no module, JITed?>");
539 if (mod_data && !mod_data->executed_) {
540 mod_data->executed_ = true; // Nevermind this race.
541 dr_printf("Executing from new module: %s\n", mod_path.c_str());
543 dr_printf("BB to be instrumented: %p [from %s]; translating = %s\n", pc,
544 mod_path.c_str(), translating ? "true" : "false");
545 if (mod_data) {
546 // Match standard sanitizer trace format for free symbols.
547 // #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
548 dr_printf(" #0 %p (%s+%p)\n", pc, mod_data->path_.c_str(),
549 pc - mod_data->start_);
552 #endif /* !MSANDR_NATIVE_EXEC */
554 if (VERBOSITY > 1) {
555 instrlist_disassemble(drcontext, pc, bb, STDOUT);
556 instr_t *instr;
557 for (instr = instrlist_first(bb); instr; instr = instr_get_next(instr)) {
558 dr_printf("opcode: %d\n", instr_get_opcode(instr));
562 for (instr_t *i = instrlist_first(bb); i != NULL; i = instr_get_next(i)) {
563 int opcode = instr_get_opcode(i);
564 if (opcode == OP_ret || opcode == OP_ret_far) {
565 InstrumentReturn(drcontext, bb, i);
566 continue;
569 // These instructions hopefully cover all cases where control is transferred
570 // to a function in a different module (we only care about calls into
571 // compiler-instrumented modules).
572 // * call_ind is used for normal indirect calls.
573 // * jmp_ind is used for indirect tail calls, and calls through PLT (PLT
574 // stub includes a jump to an address from GOT).
575 if (opcode == OP_call_ind || opcode == OP_call_far_ind ||
576 opcode == OP_jmp_ind || opcode == OP_jmp_far_ind) {
577 InstrumentIndirectBranch(drcontext, bb, i);
578 continue;
581 if (!WantToInstrument(i))
582 continue;
584 if (VERBOSITY > 1) {
585 app_pc orig_pc = dr_fragment_app_pc(tag);
586 uint flags = instr_get_arith_flags(i);
587 dr_printf("+%d -> to be instrumented! [opcode=%d, flags = 0x%08X]\n",
588 instr_get_app_pc(i) - orig_pc, instr_get_opcode(i), flags);
591 if (instr_writes_memory(i)) {
592 // Instrument memory writes
593 // bool instrumented_anything = false;
594 for (int d = 0; d < instr_num_dsts(i); d++) {
595 opnd_t op = instr_get_dst(i, d);
596 if (!OperandIsInteresting(op))
597 continue;
599 // CHECK(!instrumented_anything);
600 // instrumented_anything = true;
601 InstrumentMops(drcontext, bb, i, op, true);
602 break; // only instrumenting the first dst
607 // TODO: optimize away redundant restore-spill pairs?
609 if (VERBOSITY > 1) {
610 pc = dr_fragment_app_pc(tag);
611 dr_printf("\nFinished instrumenting dynamorio_basic_block(PC=" PFX ")\n", pc);
612 instrlist_disassemble(drcontext, pc, bb, STDOUT);
614 return DR_EMIT_PERSISTABLE;
617 #ifndef MSANDR_NATIVE_EXEC
618 void event_module_load(void *drcontext, const module_data_t *info,
619 bool loaded) {
620 // Insert the module into the list while maintaining the ordering.
621 ModuleData mod_data(info);
622 std::vector<ModuleData>::iterator it =
623 upper_bound(g_module_list.begin(), g_module_list.end(), mod_data,
624 ModuleDataCompareStart);
625 it = g_module_list.insert(it, mod_data);
626 // Check if we should instrument this module.
627 it->should_instrument_ = ShouldInstrumentModule(&*it);
628 dr_module_set_should_instrument(info->handle, it->should_instrument_);
630 if (VERBOSITY > 0)
631 dr_printf("==DRMSAN== Loaded module: %s [%p...%p], instrumentation is %s\n",
632 info->full_path, info->start, info->end,
633 it->should_instrument_ ? "on" : "off");
636 void event_module_unload(void *drcontext, const module_data_t *info) {
637 if (VERBOSITY > 0)
638 dr_printf("==DRMSAN== Unloaded module: %s [%p...%p]\n", info->full_path,
639 info->start, info->end);
641 // Remove the module from the list.
642 ModuleData mod_data(info);
643 std::vector<ModuleData>::iterator it =
644 lower_bound(g_module_list.begin(), g_module_list.end(), mod_data,
645 ModuleDataCompareStart);
646 // It's a bug if we didn't actually find the module.
647 CHECK(it != g_module_list.end() && it->start_ == mod_data.start_ &&
648 it->end_ == mod_data.end_ && it->path_ == mod_data.path_);
649 g_module_list.erase(it);
651 #endif /* !MSANDR_NATIVE_EXEC */
653 void event_exit() {
654 // Clean up so DR doesn't tell us we're leaking memory.
655 drsys_exit();
656 drutil_exit();
657 drmgr_exit();
659 #ifdef MSANDR_STANDALONE_TEST
660 /* free tls */
661 bool res;
662 res = dr_raw_tls_cfree(msan_retval_tls_offset, NUM_TLS_RETVAL);
663 CHECK(res);
664 res = dr_raw_tls_cfree(msan_param_tls_offset, NUM_TLS_PARAM);
665 CHECK(res);
666 /* we do not bother to free the shadow memory */
667 #endif /* !MSANDR_STANDALONE_TEST */
668 if (VERBOSITY > 0)
669 dr_printf("==DRMSAN== DONE\n");
672 bool event_filter_syscall(void *drcontext, int sysnum) {
673 // FIXME: only intercept syscalls with memory effects.
674 return true; /* intercept everything */
677 bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) {
678 CHECK(arg->valid);
680 if (arg->pre)
681 return true;
682 if (!TESTANY(DRSYS_PARAM_OUT, arg->mode))
683 return true;
685 size_t sz = arg->size;
687 if (sz > 0xFFFFFFFF) {
688 drmf_status_t res;
689 drsys_syscall_t *syscall = (drsys_syscall_t *)user_data;
690 const char *name;
691 res = drsys_syscall_name(syscall, &name);
692 CHECK(res == DRMF_SUCCESS);
694 dr_printf("SANITY: syscall '%s' arg %d writes %llu bytes memory?!"
695 " Clipping to %llu.\n",
696 name, arg->ordinal, (unsigned long long) sz,
697 (unsigned long long)(sz & 0xFFFFFFFF));
700 if (VERBOSITY > 0) {
701 drmf_status_t res;
702 drsys_syscall_t *syscall = (drsys_syscall_t *)user_data;
703 const char *name;
704 res = drsys_syscall_name(syscall, &name);
705 CHECK(res == DRMF_SUCCESS);
706 dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n",
707 name, arg->ordinal, arg->start_addr,
708 (char *)arg->start_addr + sz);
711 // We don't switch to the app context because __msan_unpoison() doesn't need
712 // TLS segments.
713 __msan_unpoison(arg->start_addr, sz);
715 return true; /* keep going */
718 bool event_pre_syscall(void *drcontext, int sysnum) {
719 drsys_syscall_t *syscall;
720 drsys_sysnum_t sysnum_full;
721 bool known;
722 drsys_param_type_t ret_type;
723 drmf_status_t res;
724 const char *name;
726 res = drsys_cur_syscall(drcontext, &syscall);
727 CHECK(res == DRMF_SUCCESS);
729 res = drsys_syscall_number(syscall, &sysnum_full);
730 CHECK(res == DRMF_SUCCESS);
731 CHECK(sysnum == sysnum_full.number);
733 res = drsys_syscall_is_known(syscall, &known);
734 CHECK(res == DRMF_SUCCESS);
736 res = drsys_syscall_name(syscall, &name);
737 CHECK(res == DRMF_SUCCESS);
739 res = drsys_syscall_return_type(syscall, &ret_type);
740 CHECK(res == DRMF_SUCCESS);
741 CHECK(ret_type != DRSYS_TYPE_INVALID);
742 CHECK(!known || ret_type != DRSYS_TYPE_UNKNOWN);
744 res = drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, NULL);
745 CHECK(res == DRMF_SUCCESS);
747 return true;
750 static bool IsInLoader(void *drcontext) {
751 // TODO: This segment swap is inefficient. DR should just let us query the
752 // app segment base, which it has. Alternatively, if we disable
753 // -mangle_app_seg, then we won't need the swap.
754 bool need_swap = !dr_using_app_state(drcontext);
755 if (need_swap)
756 dr_switch_to_app_state(drcontext);
757 bool is_in_loader = __msan_is_in_loader();
758 if (need_swap)
759 dr_switch_to_dr_state(drcontext);
760 return is_in_loader;
763 void event_post_syscall(void *drcontext, int sysnum) {
764 drsys_syscall_t *syscall;
765 drsys_sysnum_t sysnum_full;
766 bool success = false;
767 drmf_status_t res;
769 res = drsys_cur_syscall(drcontext, &syscall);
770 CHECK(res == DRMF_SUCCESS);
772 res = drsys_syscall_number(syscall, &sysnum_full);
773 CHECK(res == DRMF_SUCCESS);
774 CHECK(sysnum == sysnum_full.number);
776 res = drsys_syscall_succeeded(syscall, dr_syscall_get_result(drcontext),
777 &success);
778 CHECK(res == DRMF_SUCCESS);
780 if (success) {
781 res =
782 drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall);
783 CHECK(res == DRMF_SUCCESS);
786 // Our normal mmap interceptor can't intercept calls from the loader itself.
787 // This means we don't clear the shadow for calls to dlopen. For now, we
788 // solve this by intercepting mmap from ld.so here, but ideally we'd have a
789 // solution that doesn't rely on msandr.
791 // Be careful not to intercept maps done by the msan rtl. Otherwise we end up
792 // unpoisoning vast regions of memory and OOMing.
793 // TODO: __msan_unpoison() could "flush" large regions of memory like tsan
794 // does instead of doing a large memset. However, we need the memory to be
795 // zeroed, where as tsan does not, so plain madvise is not enough.
796 if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) {
797 if (IsInLoader(drcontext)) {
798 app_pc base = (app_pc)dr_syscall_get_result(drcontext);
799 ptr_uint_t size;
800 drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size);
801 CHECK(res == DRMF_SUCCESS);
802 if (VERBOSITY > 0)
803 dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size);
804 // We don't switch to the app context because __msan_unpoison() doesn't
805 // need TLS segments.
806 __msan_unpoison(base, size);
811 } // namespace
813 DR_EXPORT void dr_init(client_id_t id) {
814 drmf_status_t res;
816 drmgr_init();
817 drutil_init();
819 #ifndef MSANDR_NATIVE_EXEC
820 // We should use drconfig to ignore these applications.
821 std::string app_name = dr_get_application_name();
822 // This blacklist will still run these apps through DR's code cache. On the
823 // other hand, we are able to follow children of these apps.
824 // FIXME: Once DR has detach, we could just detach here. Alternatively,
825 // if DR had a fork or exec hook to let us decide there, that would be nice.
826 // FIXME: make the blacklist cmd-adjustable.
827 if (app_name == "python" || app_name == "python2.7" || app_name == "bash" ||
828 app_name == "sh" || app_name == "true" || app_name == "exit" ||
829 app_name == "yes" || app_name == "echo")
830 return;
831 #endif /* !MSANDR_NATIVE_EXEC */
833 drsys_options_t ops;
834 memset(&ops, 0, sizeof(ops));
835 ops.struct_size = sizeof(ops);
836 ops.analyze_unknown_syscalls = false;
838 res = drsys_init(id, &ops);
839 CHECK(res == DRMF_SUCCESS);
841 dr_register_filter_syscall_event(event_filter_syscall);
842 drmgr_register_pre_syscall_event(event_pre_syscall);
843 drmgr_register_post_syscall_event(event_post_syscall);
844 res = drsys_filter_all_syscalls();
845 CHECK(res == DRMF_SUCCESS);
847 #ifdef MSANDR_STANDALONE_TEST
848 reg_id_t reg_seg;
849 /* alloc tls */
850 if (!dr_raw_tls_calloc(&reg_seg, &mock_msan_retval_tls_offset, NUM_TLS_RETVAL, 0))
851 CHECK(false);
852 CHECK(reg_seg == DR_SEG_GS /* x64 only! */);
853 if (!dr_raw_tls_calloc(&reg_seg, &mock_msan_param_tls_offset, NUM_TLS_PARAM, 0))
854 CHECK(false);
855 CHECK(reg_seg == DR_SEG_GS /* x64 only! */);
856 /* alloc shadow memory */
857 if (mmap(SHADOW_MEMORY_BASE, SHADOW_MEMORY_SIZE, PROT_READ|PROT_WRITE,
858 MAP_PRIVATE | MAP_ANON, -1, 0) != SHADOW_MEMORY_BASE) {
859 CHECK(false);
861 #endif /* MSANDR_STANDALONE_TEST */
862 InitializeMSanCallbacks();
864 // FIXME: the shadow is initialized earlier when DR calls one of our wrapper
865 // functions. This may change one day.
866 // TODO: make this more robust.
868 void *drcontext = dr_get_current_drcontext();
870 dr_switch_to_app_state(drcontext);
871 msan_retval_tls_offset = __msan_get_retval_tls_offset();
872 msan_param_tls_offset = __msan_get_param_tls_offset();
873 dr_switch_to_dr_state(drcontext);
874 if (VERBOSITY > 0) {
875 dr_printf("__msan_retval_tls offset: %d\n", msan_retval_tls_offset);
876 dr_printf("__msan_param_tls offset: %d\n", msan_param_tls_offset);
879 // Standard DR events.
880 dr_register_exit_event(event_exit);
882 drmgr_priority_t priority = {
883 sizeof(priority), /* size of struct */
884 "msandr", /* name of our operation */
885 NULL, /* optional name of operation we should precede */
886 NULL, /* optional name of operation we should follow */
888 }; /* numeric priority */
890 drmgr_register_bb_app2app_event(event_basic_block_app2app, &priority);
891 drmgr_register_bb_instru2instru_event(event_basic_block, &priority);
892 #ifndef MSANDR_NATIVE_EXEC
893 drmgr_register_module_load_event(event_module_load);
894 drmgr_register_module_unload_event(event_module_unload);
895 #endif /* MSANDR_NATIVE_EXEC */
896 __msan_dr_is_initialized();
897 __msan_set_indirect_call_wrapper(dr_app_handle_mbr_target);
898 if (VERBOSITY > 0)
899 dr_printf("==MSANDR== Starting!\n");