1 //===-- msandr.cc ---------------------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is a part of MemorySanitizer.
12 // DynamoRio client for MemorySanitizer.
14 // MemorySanitizer requires that all program code is instrumented. Any memory
15 // store that can turn an uninitialized value into an initialized value must be
16 // observed by the tool, otherwise we risk reporting a false UMR.
18 // This also includes any libraries that the program depends on.
20 // In the case when rebuilding all program dependencies with MemorySanitizer is
21 // problematic, an experimental MSanDR tool (the code you are currently looking
22 // at) can be used. It is a DynamoRio-based tool that uses dynamic
24 // * Unpoison all memory stores.
25 // * Unpoison TLS slots used by MemorySanitizer to pass function arguments and
26 // return value shadow on anything that looks like a function call or a return
29 // This tool does not detect the use of uninitialized values in uninstrumented
30 // libraries. It merely gets rid of false positives by marking all data that
31 // passes through uninstrumented code as fully initialized.
32 //===----------------------------------------------------------------------===//
37 #include <drsyscall.h>
40 #include <sys/syscall.h> /* for SYS_mmap */
44 // XXX: it seems setting macro in CMakeLists.txt does not work,
45 // so manually set it here now.
47 // Building msandr client for running in DynamoRIO hybrid mode,
48 // which allows some module running natively.
49 // TODO: turn it on by default when hybrid is stable enough
50 // #define MSANDR_NATIVE_EXEC
52 #ifndef MSANDR_NATIVE_EXEC
59 #define TESTALL(mask, var) (((mask) & (var)) == (mask))
60 #define TESTANY(mask, var) (((mask) & (var)) != 0)
62 #define CHECK_IMPL(condition, file, line) \
65 dr_printf("Check failed: `%s`\nat %s:%d\n", #condition, file, line); \
68 } while (0) // TODO: stacktrace
70 #define CHECK(condition) CHECK_IMPL(condition, __FILE__, __LINE__)
74 // Building msandr client for standalone test that does not need to
75 // run with msan build executables. Disable by default.
76 // #define MSANDR_STANDALONE_TEST
78 #define NUM_TLS_RETVAL 1
79 #define NUM_TLS_PARAM 6
81 #ifdef MSANDR_STANDALONE_TEST
82 // For testing purpose, we map app to shadow memory at [0x100000, 0x20000).
83 // Normally, the app starts at 0x400000:
84 // 00400000-004e0000 r-xp 00000000 fc:00 524343 /bin/bash
85 // so there should be no problem.
86 # define SHADOW_MEMORY_BASE ((void *)0x100000)
87 # define SHADOW_MEMORY_SIZE (0x100000)
88 # define SHADOW_MEMORY_MASK (SHADOW_MEMORY_SIZE - 4 /* to avoid overflow */)
90 // shadow memory range [0x200000000000, 0x400000000000)
91 // assuming no app memory below 0x200000000000
92 # define SHADOW_MEMORY_MASK 0x3fffffffffffULL
93 #endif /* MSANDR_STANDALONE_TEST */
95 typedef void *(*WrapperFn
)(void *);
96 extern "C" void __msan_set_indirect_call_wrapper(WrapperFn wrapper
);
97 extern "C" void __msan_dr_is_initialized();
101 int msan_retval_tls_offset
;
102 int msan_param_tls_offset
;
104 #ifndef MSANDR_NATIVE_EXEC
108 ModuleData(const module_data_t
*info
);
109 // Yes, we want default copy, assign, and dtor semantics.
114 // Full path to the module.
116 module_handle_t handle_
;
117 bool should_instrument_
;
121 // A vector of loaded modules sorted by module bounds. We lookup the current PC
122 // in here from the bb event. This is better than an rb tree because the lookup
123 // is faster and the bb event occurs far more than the module load event.
124 std::vector
<ModuleData
> g_module_list
;
126 ModuleData::ModuleData()
127 : start_(NULL
), end_(NULL
), path_(""), handle_(NULL
),
128 should_instrument_(false), executed_(false) {
131 ModuleData::ModuleData(const module_data_t
*info
)
132 : start_(info
->start
), end_(info
->end
), path_(info
->full_path
),
133 handle_(info
->handle
),
134 // We'll check the black/white lists later and adjust this.
135 should_instrument_(true), executed_(false) {
137 #endif /* !MSANDR_NATIVE_EXEC */
139 int(*__msan_get_retval_tls_offset
)();
140 int(*__msan_get_param_tls_offset
)();
141 void (*__msan_unpoison
)(void *base
, size_t size
);
142 bool (*__msan_is_in_loader
)();
144 #ifdef MSANDR_STANDALONE_TEST
145 uint mock_msan_retval_tls_offset
;
146 uint mock_msan_param_tls_offset
;
147 static int mock_msan_get_retval_tls_offset() {
148 return (int)mock_msan_retval_tls_offset
;
151 static int mock_msan_get_param_tls_offset() {
152 return (int)mock_msan_param_tls_offset
;
155 static void mock_msan_unpoison(void *base
, size_t size
) {
159 static bool mock_msan_is_in_loader() {
162 #endif /* MSANDR_STANDALONE_TEST */
164 static generic_func_t
LookupCallback(module_data_t
*app
, const char *name
) {
165 #ifdef MSANDR_STANDALONE_TEST
166 if (strcmp("__msan_get_retval_tls_offset", name
) == 0) {
167 return (generic_func_t
)mock_msan_get_retval_tls_offset
;
168 } else if (strcmp("__msan_get_param_tls_offset", name
) == 0) {
169 return (generic_func_t
)mock_msan_get_param_tls_offset
;
170 } else if (strcmp("__msan_unpoison", name
) == 0) {
171 return (generic_func_t
)mock_msan_unpoison
;
172 } else if (strcmp("__msan_is_in_loader", name
) == 0) {
173 return (generic_func_t
)mock_msan_is_in_loader
;
177 #else /* !MSANDR_STANDALONE_TEST */
178 generic_func_t callback
= dr_get_proc_address(app
->handle
, name
);
179 if (callback
== NULL
) {
180 dr_printf("Couldn't find `%s` in %s\n", name
, app
->full_path
);
184 #endif /* !MSANDR_STANDALONE_TEST */
187 void InitializeMSanCallbacks() {
188 module_data_t
*app
= dr_lookup_module_by_name(dr_get_application_name());
190 dr_printf("%s - oops, dr_lookup_module_by_name failed!\n",
191 dr_get_application_name());
195 __msan_get_retval_tls_offset
= (int (*)())
196 LookupCallback(app
, "__msan_get_retval_tls_offset");
197 __msan_get_param_tls_offset
= (int (*)())
198 LookupCallback(app
, "__msan_get_param_tls_offset");
199 __msan_unpoison
= (void(*)(void *, size_t))
200 LookupCallback(app
, "__msan_unpoison");
201 __msan_is_in_loader
= (bool (*)())
202 LookupCallback(app
, "__msan_is_in_loader");
204 dr_free_module_data(app
);
207 // FIXME: Handle absolute addresses and PC-relative addresses.
208 // FIXME: Handle TLS accesses via FS or GS. DR assumes all other segments have
209 // a zero base anyway.
210 bool OperandIsInteresting(opnd_t opnd
) {
211 return (opnd_is_base_disp(opnd
) && opnd_get_segment(opnd
) != DR_SEG_FS
&&
212 opnd_get_segment(opnd
) != DR_SEG_GS
);
215 bool WantToInstrument(instr_t
*instr
) {
216 // TODO: skip push instructions?
217 switch (instr_get_opcode(instr
)) {
218 // FIXME: support the instructions excluded below:
220 // f3 a6 rep cmps %ds:(%rsi) %es:(%rdi) %rsi %rdi %rcx -> %rsi %rdi %rcx
224 // Labels appear due to drutil_expand_rep_string()
225 if (instr_is_label(instr
))
228 CHECK(instr_ok_to_mangle(instr
) == true);
230 if (instr_writes_memory(instr
)) {
231 for (int d
= 0; d
< instr_num_dsts(instr
); d
++) {
232 opnd_t op
= instr_get_dst(instr
, d
);
233 if (OperandIsInteresting(op
))
241 #define PRE(at, what) instrlist_meta_preinsert(bb, at, INSTR_CREATE_##what);
242 #define PREF(at, what) instrlist_meta_preinsert(bb, at, what);
244 void InstrumentMops(void *drcontext
, instrlist_t
*bb
, instr_t
*instr
, opnd_t op
,
246 bool need_to_restore_eflags
= false;
247 uint flags
= instr_get_arith_flags(instr
);
248 // TODO: do something smarter with flags and spills in general?
249 // For example, spill them only once for a sequence of instrumented
250 // instructions that don't change/read flags.
252 if (!TESTALL(EFLAGS_WRITE_6
, flags
) || TESTANY(EFLAGS_READ_6
, flags
)) {
254 dr_printf("Spilling eflags...\n");
255 need_to_restore_eflags
= true;
256 // TODO: Maybe sometimes don't need to 'seto'.
257 // TODO: Maybe sometimes don't want to spill XAX here?
258 // TODO: No need to spill XAX here if XAX is not used in the BB.
259 dr_save_reg(drcontext
, bb
, instr
, DR_REG_XAX
, SPILL_SLOT_1
);
260 dr_save_arith_flags_to_xax(drcontext
, bb
, instr
);
261 dr_save_reg(drcontext
, bb
, instr
, DR_REG_XAX
, SPILL_SLOT_3
);
262 dr_restore_reg(drcontext
, bb
, instr
, DR_REG_XAX
, SPILL_SLOT_1
);
266 dr_printf("==DRMSAN== DEBUG: %d %d %d %d %d %d\n",
267 opnd_is_memory_reference(op
), opnd_is_base_disp(op
),
268 opnd_is_base_disp(op
) ? opnd_get_index(op
) : -1,
269 opnd_is_far_memory_reference(op
), opnd_is_reg_pointer_sized(op
),
270 opnd_is_base_disp(op
) ? opnd_get_disp(op
) : -1);
274 bool address_in_R1
= false;
275 if (opnd_is_base_disp(op
) && opnd_get_index(op
) == DR_REG_NULL
&&
276 opnd_get_disp(op
) == 0) {
277 // If this is a simple access with no offset or index, we can just use the
279 address_in_R1
= true;
280 R1
= opnd_get_base(op
);
282 // Otherwise, we need to compute the addr into R1.
283 // TODO: reuse some spare register? e.g. r15 on x64
284 // TODO: might be used as a non-mem-ref register?
287 CHECK(reg_is_pointer_sized(R1
)); // otherwise R2 may be wrong.
289 // Pick R2 from R8 to R15.
290 // It's OK if the instr uses R2 elsewhere, since we'll restore it before instr.
292 for (R2
= DR_REG_R8
; R2
<= DR_REG_R15
; R2
++) {
293 if (!opnd_uses_reg(op
, R2
))
296 CHECK((R2
<= DR_REG_R15
) && R1
!= R2
);
298 // Save the current values of R1 and R2.
299 dr_save_reg(drcontext
, bb
, instr
, R1
, SPILL_SLOT_1
);
300 // TODO: Something smarter than spilling a "fixed" register R2?
301 dr_save_reg(drcontext
, bb
, instr
, R2
, SPILL_SLOT_2
);
304 CHECK(drutil_insert_get_mem_addr(drcontext
, bb
, instr
, op
, R1
, R2
));
305 PRE(instr
, mov_imm(drcontext
, opnd_create_reg(R2
),
306 OPND_CREATE_INT64(SHADOW_MEMORY_MASK
)));
307 PRE(instr
, and(drcontext
, opnd_create_reg(R1
), opnd_create_reg(R2
)));
308 #ifdef MSANDR_STANDALONE_TEST
309 PRE(instr
, add(drcontext
, opnd_create_reg(R1
),
310 OPND_CREATE_INT32(SHADOW_MEMORY_BASE
)));
312 // There is no mov_st of a 64-bit immediate, so...
313 opnd_size_t op_size
= opnd_get_size(op
);
314 CHECK(op_size
!= OPSZ_NA
);
315 uint access_size
= opnd_size_in_bytes(op_size
);
316 if (access_size
<= 4 || op_size
== OPSZ_PTR
/* x64 support sign extension */) {
317 instr_t
*label
= INSTR_CREATE_label(drcontext
);
319 if (op_size
== OPSZ_PTR
|| op_size
== OPSZ_4
)
320 immed
= OPND_CREATE_INT32(0);
322 immed
= opnd_create_immed_int((ptr_int_t
) 0, op_size
);
323 // we check if target is 0 before write to reduce unnecessary memory stores.
324 PRE(instr
, cmp(drcontext
,
325 opnd_create_base_disp(R1
, DR_REG_NULL
, 0, 0, op_size
),
327 PRE(instr
, jcc(drcontext
, OP_je
, opnd_create_instr(label
)));
328 PRE(instr
, mov_st(drcontext
,
329 opnd_create_base_disp(R1
, DR_REG_NULL
, 0, 0, op_size
),
334 for (uint ofs
= 0; ofs
< access_size
; ofs
+= 4) {
335 instr_t
*label
= INSTR_CREATE_label(drcontext
);
336 opnd_t immed
= OPND_CREATE_INT32(0);
337 PRE(instr
, cmp(drcontext
, OPND_CREATE_MEM32(R1
, ofs
), immed
));
338 PRE(instr
, jcc(drcontext
, OP_je
, opnd_create_instr(label
)));
339 PRE(instr
, mov_st(drcontext
, OPND_CREATE_MEM32(R1
, ofs
), immed
));
344 // Restore the registers and flags.
345 dr_restore_reg(drcontext
, bb
, instr
, R1
, SPILL_SLOT_1
);
346 dr_restore_reg(drcontext
, bb
, instr
, R2
, SPILL_SLOT_2
);
348 // TODO: move aflags save/restore to per instr instead of per opnd
349 if (need_to_restore_eflags
) {
351 dr_printf("Restoring eflags\n");
352 // TODO: Check if it's reverse to the dr_restore_reg above and optimize.
353 dr_save_reg(drcontext
, bb
, instr
, DR_REG_XAX
, SPILL_SLOT_1
);
354 dr_restore_reg(drcontext
, bb
, instr
, DR_REG_XAX
, SPILL_SLOT_3
);
355 dr_restore_arith_flags_from_xax(drcontext
, bb
, instr
);
356 dr_restore_reg(drcontext
, bb
, instr
, DR_REG_XAX
, SPILL_SLOT_1
);
359 // The original instruction is left untouched. The above instrumentation is just
363 void InstrumentReturn(void *drcontext
, instrlist_t
*bb
, instr_t
*instr
) {
364 #ifdef MSANDR_STANDALONE_TEST
367 opnd_create_far_base_disp(DR_SEG_GS
/* DR's TLS */,
368 DR_REG_NULL
, DR_REG_NULL
,
369 0, msan_retval_tls_offset
,
371 OPND_CREATE_INT32(0)));
372 #else /* !MSANDR_STANDALONE_TEST */
373 # ifdef MSANDR_NATIVE_EXEC
374 /* For optimized native exec, -mangle_app_seg and -private_loader are turned off,
375 * so we can reference msan_retval_tls_offset directly.
379 opnd_create_far_base_disp(DR_SEG_FS
, DR_REG_NULL
, DR_REG_NULL
, 0,
380 msan_retval_tls_offset
, OPSZ_PTR
),
381 OPND_CREATE_INT32(0)));
382 # else /* !MSANDR_NATIVE_EXEC */
383 /* XXX: the code below only works if -mangle_app_seg and -private_loader,
384 * which is turned off for optimized native exec
386 dr_save_reg(drcontext
, bb
, instr
, DR_REG_XAX
, SPILL_SLOT_1
);
388 // Clobbers nothing except xax.
390 dr_insert_get_seg_base(drcontext
, bb
, instr
, DR_SEG_FS
, DR_REG_XAX
);
393 // TODO: unpoison more bytes?
395 mov_st(drcontext
, OPND_CREATE_MEM64(DR_REG_XAX
, msan_retval_tls_offset
),
396 OPND_CREATE_INT32(0)));
398 dr_restore_reg(drcontext
, bb
, instr
, DR_REG_XAX
, SPILL_SLOT_1
);
399 # endif /* !MSANDR_NATIVE_EXEC */
400 // The original instruction is left untouched. The above instrumentation is just
402 #endif /* !MSANDR_STANDALONE_TEST */
405 void InstrumentIndirectBranch(void *drcontext
, instrlist_t
*bb
,
407 #ifdef MSANDR_STANDALONE_TEST
408 for (int i
= 0; i
< NUM_TLS_PARAM
; ++i
) {
411 opnd_create_far_base_disp(DR_SEG_GS
/* DR's TLS */,
412 DR_REG_NULL
, DR_REG_NULL
,
414 msan_param_tls_offset
+
417 OPND_CREATE_INT32(0)));
419 #else /* !MSANDR_STANDALONE_TEST */
420 # ifdef MSANDR_NATIVE_EXEC
421 for (int i
= 0; i
< NUM_TLS_PARAM
; ++i
) {
424 opnd_create_far_base_disp(DR_SEG_FS
, DR_REG_NULL
, DR_REG_NULL
, 0,
425 msan_param_tls_offset
+ i
*sizeof(void*),
427 OPND_CREATE_INT32(0)));
429 # else /* !MSANDR_NATIVE_EXEC */
430 /* XXX: the code below only works if -mangle_app_seg and -private_loader,
431 * which is turned off for optimized native exec
433 dr_save_reg(drcontext
, bb
, instr
, DR_REG_XAX
, SPILL_SLOT_1
);
435 // Clobbers nothing except xax.
437 dr_insert_get_seg_base(drcontext
, bb
, instr
, DR_SEG_FS
, DR_REG_XAX
);
440 // TODO: unpoison more bytes?
441 for (int i
= 0; i
< NUM_TLS_PARAM
; ++i
) {
443 mov_st(drcontext
, OPND_CREATE_MEMPTR(DR_REG_XAX
, msan_param_tls_offset
+
445 OPND_CREATE_INT32(0)));
448 dr_restore_reg(drcontext
, bb
, instr
, DR_REG_XAX
, SPILL_SLOT_1
);
449 # endif /* !MSANDR_NATIVE_EXEC */
450 // The original instruction is left untouched. The above instrumentation is just
452 #endif /* !MSANDR_STANDALONE_TEST */
455 #ifndef MSANDR_NATIVE_EXEC
456 // For use with binary search. Modules shouldn't overlap, so we shouldn't have
457 // to look at end_. If that can happen, we won't support such an application.
458 bool ModuleDataCompareStart(const ModuleData
&left
, const ModuleData
&right
) {
459 return left
.start_
< right
.start_
;
462 // Look up the module containing PC. Should be relatively fast, as its called
463 // for each bb instrumentation.
464 ModuleData
*LookupModuleByPC(app_pc pc
) {
465 ModuleData fake_mod_data
;
466 fake_mod_data
.start_
= pc
;
467 std::vector
<ModuleData
>::iterator it
=
468 lower_bound(g_module_list
.begin(), g_module_list
.end(), fake_mod_data
,
469 ModuleDataCompareStart
);
470 // if (it == g_module_list.end())
472 if (it
== g_module_list
.end() || pc
< it
->start_
)
474 CHECK(it
->start_
<= pc
);
475 if (pc
>= it
->end_
) {
476 // We're past the end of this module. We shouldn't be in the next module,
477 // or lower_bound lied to us.
479 CHECK(it
== g_module_list
.end() || pc
< it
->start_
);
483 // OK, we found the module.
487 bool ShouldInstrumentNonModuleCode() { return true; }
489 bool ShouldInstrumentModule(ModuleData
*mod_data
) {
490 // TODO(rnk): Flags for blacklist would get wired in here.
492 dr_get_proc_address(mod_data
->handle_
, "__msan_track_origins");
496 bool ShouldInstrumentPc(app_pc pc
, ModuleData
**pmod_data
) {
497 ModuleData
*mod_data
= LookupModuleByPC(pc
);
499 *pmod_data
= mod_data
;
500 if (mod_data
!= NULL
) {
501 // This module is on a blacklist.
502 if (!mod_data
->should_instrument_
) {
505 } else if (!ShouldInstrumentNonModuleCode()) {
510 #endif /* !MSANDR_NATIVE_CLIENT */
512 // TODO(rnk): Make sure we instrument after __msan_init.
514 event_basic_block_app2app(void *drcontext
, void *tag
, instrlist_t
*bb
,
515 bool for_trace
, bool translating
) {
516 #ifndef MSANDR_NATIVE_EXEC
517 app_pc pc
= dr_fragment_app_pc(tag
);
518 if (ShouldInstrumentPc(pc
, NULL
))
519 CHECK(drutil_expand_rep_string(drcontext
, bb
));
520 #else /* MSANDR_NATIVE_EXEC */
521 CHECK(drutil_expand_rep_string(drcontext
, bb
));
522 #endif /* MSANDR_NATIVE_EXEC */
523 return DR_EMIT_PERSISTABLE
;
526 dr_emit_flags_t
event_basic_block(void *drcontext
, void *tag
, instrlist_t
*bb
,
527 bool for_trace
, bool translating
) {
528 app_pc pc
= dr_fragment_app_pc(tag
);
529 #ifndef MSANDR_NATIVE_EXEC
530 ModuleData
*mod_data
;
532 if (!ShouldInstrumentPc(pc
, &mod_data
))
533 return DR_EMIT_PERSISTABLE
;
536 dr_printf("============================================================\n");
538 std::string mod_path
= (mod_data
? mod_data
->path_
: "<no module, JITed?>");
539 if (mod_data
&& !mod_data
->executed_
) {
540 mod_data
->executed_
= true; // Nevermind this race.
541 dr_printf("Executing from new module: %s\n", mod_path
.c_str());
543 dr_printf("BB to be instrumented: %p [from %s]; translating = %s\n", pc
,
544 mod_path
.c_str(), translating
? "true" : "false");
546 // Match standard sanitizer trace format for free symbols.
547 // #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
548 dr_printf(" #0 %p (%s+%p)\n", pc
, mod_data
->path_
.c_str(),
549 pc
- mod_data
->start_
);
552 #endif /* !MSANDR_NATIVE_EXEC */
555 instrlist_disassemble(drcontext
, pc
, bb
, STDOUT
);
557 for (instr
= instrlist_first(bb
); instr
; instr
= instr_get_next(instr
)) {
558 dr_printf("opcode: %d\n", instr_get_opcode(instr
));
562 for (instr_t
*i
= instrlist_first(bb
); i
!= NULL
; i
= instr_get_next(i
)) {
563 int opcode
= instr_get_opcode(i
);
564 if (opcode
== OP_ret
|| opcode
== OP_ret_far
) {
565 InstrumentReturn(drcontext
, bb
, i
);
569 // These instructions hopefully cover all cases where control is transferred
570 // to a function in a different module (we only care about calls into
571 // compiler-instrumented modules).
572 // * call_ind is used for normal indirect calls.
573 // * jmp_ind is used for indirect tail calls, and calls through PLT (PLT
574 // stub includes a jump to an address from GOT).
575 if (opcode
== OP_call_ind
|| opcode
== OP_call_far_ind
||
576 opcode
== OP_jmp_ind
|| opcode
== OP_jmp_far_ind
) {
577 InstrumentIndirectBranch(drcontext
, bb
, i
);
581 if (!WantToInstrument(i
))
585 app_pc orig_pc
= dr_fragment_app_pc(tag
);
586 uint flags
= instr_get_arith_flags(i
);
587 dr_printf("+%d -> to be instrumented! [opcode=%d, flags = 0x%08X]\n",
588 instr_get_app_pc(i
) - orig_pc
, instr_get_opcode(i
), flags
);
591 if (instr_writes_memory(i
)) {
592 // Instrument memory writes
593 // bool instrumented_anything = false;
594 for (int d
= 0; d
< instr_num_dsts(i
); d
++) {
595 opnd_t op
= instr_get_dst(i
, d
);
596 if (!OperandIsInteresting(op
))
599 // CHECK(!instrumented_anything);
600 // instrumented_anything = true;
601 InstrumentMops(drcontext
, bb
, i
, op
, true);
602 break; // only instrumenting the first dst
607 // TODO: optimize away redundant restore-spill pairs?
610 pc
= dr_fragment_app_pc(tag
);
611 dr_printf("\nFinished instrumenting dynamorio_basic_block(PC=" PFX
")\n", pc
);
612 instrlist_disassemble(drcontext
, pc
, bb
, STDOUT
);
614 return DR_EMIT_PERSISTABLE
;
617 #ifndef MSANDR_NATIVE_EXEC
618 void event_module_load(void *drcontext
, const module_data_t
*info
,
620 // Insert the module into the list while maintaining the ordering.
621 ModuleData
mod_data(info
);
622 std::vector
<ModuleData
>::iterator it
=
623 upper_bound(g_module_list
.begin(), g_module_list
.end(), mod_data
,
624 ModuleDataCompareStart
);
625 it
= g_module_list
.insert(it
, mod_data
);
626 // Check if we should instrument this module.
627 it
->should_instrument_
= ShouldInstrumentModule(&*it
);
628 dr_module_set_should_instrument(info
->handle
, it
->should_instrument_
);
631 dr_printf("==DRMSAN== Loaded module: %s [%p...%p], instrumentation is %s\n",
632 info
->full_path
, info
->start
, info
->end
,
633 it
->should_instrument_
? "on" : "off");
636 void event_module_unload(void *drcontext
, const module_data_t
*info
) {
638 dr_printf("==DRMSAN== Unloaded module: %s [%p...%p]\n", info
->full_path
,
639 info
->start
, info
->end
);
641 // Remove the module from the list.
642 ModuleData
mod_data(info
);
643 std::vector
<ModuleData
>::iterator it
=
644 lower_bound(g_module_list
.begin(), g_module_list
.end(), mod_data
,
645 ModuleDataCompareStart
);
646 // It's a bug if we didn't actually find the module.
647 CHECK(it
!= g_module_list
.end() && it
->start_
== mod_data
.start_
&&
648 it
->end_
== mod_data
.end_
&& it
->path_
== mod_data
.path_
);
649 g_module_list
.erase(it
);
651 #endif /* !MSANDR_NATIVE_EXEC */
654 // Clean up so DR doesn't tell us we're leaking memory.
659 #ifdef MSANDR_STANDALONE_TEST
662 res
= dr_raw_tls_cfree(msan_retval_tls_offset
, NUM_TLS_RETVAL
);
664 res
= dr_raw_tls_cfree(msan_param_tls_offset
, NUM_TLS_PARAM
);
666 /* we do not bother to free the shadow memory */
667 #endif /* !MSANDR_STANDALONE_TEST */
669 dr_printf("==DRMSAN== DONE\n");
672 bool event_filter_syscall(void *drcontext
, int sysnum
) {
673 // FIXME: only intercept syscalls with memory effects.
674 return true; /* intercept everything */
677 bool drsys_iter_memarg_cb(drsys_arg_t
*arg
, void *user_data
) {
682 if (!TESTANY(DRSYS_PARAM_OUT
, arg
->mode
))
685 size_t sz
= arg
->size
;
687 if (sz
> 0xFFFFFFFF) {
689 drsys_syscall_t
*syscall
= (drsys_syscall_t
*)user_data
;
691 res
= drsys_syscall_name(syscall
, &name
);
692 CHECK(res
== DRMF_SUCCESS
);
694 dr_printf("SANITY: syscall '%s' arg %d writes %llu bytes memory?!"
695 " Clipping to %llu.\n",
696 name
, arg
->ordinal
, (unsigned long long) sz
,
697 (unsigned long long)(sz
& 0xFFFFFFFF));
702 drsys_syscall_t
*syscall
= (drsys_syscall_t
*)user_data
;
704 res
= drsys_syscall_name(syscall
, &name
);
705 CHECK(res
== DRMF_SUCCESS
);
706 dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n",
707 name
, arg
->ordinal
, arg
->start_addr
,
708 (char *)arg
->start_addr
+ sz
);
711 // We don't switch to the app context because __msan_unpoison() doesn't need
713 __msan_unpoison(arg
->start_addr
, sz
);
715 return true; /* keep going */
718 bool event_pre_syscall(void *drcontext
, int sysnum
) {
719 drsys_syscall_t
*syscall
;
720 drsys_sysnum_t sysnum_full
;
722 drsys_param_type_t ret_type
;
726 res
= drsys_cur_syscall(drcontext
, &syscall
);
727 CHECK(res
== DRMF_SUCCESS
);
729 res
= drsys_syscall_number(syscall
, &sysnum_full
);
730 CHECK(res
== DRMF_SUCCESS
);
731 CHECK(sysnum
== sysnum_full
.number
);
733 res
= drsys_syscall_is_known(syscall
, &known
);
734 CHECK(res
== DRMF_SUCCESS
);
736 res
= drsys_syscall_name(syscall
, &name
);
737 CHECK(res
== DRMF_SUCCESS
);
739 res
= drsys_syscall_return_type(syscall
, &ret_type
);
740 CHECK(res
== DRMF_SUCCESS
);
741 CHECK(ret_type
!= DRSYS_TYPE_INVALID
);
742 CHECK(!known
|| ret_type
!= DRSYS_TYPE_UNKNOWN
);
744 res
= drsys_iterate_memargs(drcontext
, drsys_iter_memarg_cb
, NULL
);
745 CHECK(res
== DRMF_SUCCESS
);
750 static bool IsInLoader(void *drcontext
) {
751 // TODO: This segment swap is inefficient. DR should just let us query the
752 // app segment base, which it has. Alternatively, if we disable
753 // -mangle_app_seg, then we won't need the swap.
754 bool need_swap
= !dr_using_app_state(drcontext
);
756 dr_switch_to_app_state(drcontext
);
757 bool is_in_loader
= __msan_is_in_loader();
759 dr_switch_to_dr_state(drcontext
);
763 void event_post_syscall(void *drcontext
, int sysnum
) {
764 drsys_syscall_t
*syscall
;
765 drsys_sysnum_t sysnum_full
;
766 bool success
= false;
769 res
= drsys_cur_syscall(drcontext
, &syscall
);
770 CHECK(res
== DRMF_SUCCESS
);
772 res
= drsys_syscall_number(syscall
, &sysnum_full
);
773 CHECK(res
== DRMF_SUCCESS
);
774 CHECK(sysnum
== sysnum_full
.number
);
776 res
= drsys_syscall_succeeded(syscall
, dr_syscall_get_result(drcontext
),
778 CHECK(res
== DRMF_SUCCESS
);
782 drsys_iterate_memargs(drcontext
, drsys_iter_memarg_cb
, (void *)syscall
);
783 CHECK(res
== DRMF_SUCCESS
);
786 // Our normal mmap interceptor can't intercept calls from the loader itself.
787 // This means we don't clear the shadow for calls to dlopen. For now, we
788 // solve this by intercepting mmap from ld.so here, but ideally we'd have a
789 // solution that doesn't rely on msandr.
791 // Be careful not to intercept maps done by the msan rtl. Otherwise we end up
792 // unpoisoning vast regions of memory and OOMing.
793 // TODO: __msan_unpoison() could "flush" large regions of memory like tsan
794 // does instead of doing a large memset. However, we need the memory to be
795 // zeroed, where as tsan does not, so plain madvise is not enough.
796 if (success
&& (sysnum
== SYS_mmap
IF_NOT_X64(|| sysnum
== SYS_mmap2
))) {
797 if (IsInLoader(drcontext
)) {
798 app_pc base
= (app_pc
)dr_syscall_get_result(drcontext
);
800 drmf_status_t res
= drsys_pre_syscall_arg(drcontext
, 1, &size
);
801 CHECK(res
== DRMF_SUCCESS
);
803 dr_printf("unpoisoning for dlopen: [%p-%p]\n", base
, base
+ size
);
804 // We don't switch to the app context because __msan_unpoison() doesn't
805 // need TLS segments.
806 __msan_unpoison(base
, size
);
813 DR_EXPORT
void dr_init(client_id_t id
) {
819 #ifndef MSANDR_NATIVE_EXEC
820 // We should use drconfig to ignore these applications.
821 std::string app_name
= dr_get_application_name();
822 // This blacklist will still run these apps through DR's code cache. On the
823 // other hand, we are able to follow children of these apps.
824 // FIXME: Once DR has detach, we could just detach here. Alternatively,
825 // if DR had a fork or exec hook to let us decide there, that would be nice.
826 // FIXME: make the blacklist cmd-adjustable.
827 if (app_name
== "python" || app_name
== "python2.7" || app_name
== "bash" ||
828 app_name
== "sh" || app_name
== "true" || app_name
== "exit" ||
829 app_name
== "yes" || app_name
== "echo")
831 #endif /* !MSANDR_NATIVE_EXEC */
834 memset(&ops
, 0, sizeof(ops
));
835 ops
.struct_size
= sizeof(ops
);
836 ops
.analyze_unknown_syscalls
= false;
838 res
= drsys_init(id
, &ops
);
839 CHECK(res
== DRMF_SUCCESS
);
841 dr_register_filter_syscall_event(event_filter_syscall
);
842 drmgr_register_pre_syscall_event(event_pre_syscall
);
843 drmgr_register_post_syscall_event(event_post_syscall
);
844 res
= drsys_filter_all_syscalls();
845 CHECK(res
== DRMF_SUCCESS
);
847 #ifdef MSANDR_STANDALONE_TEST
850 if (!dr_raw_tls_calloc(®_seg
, &mock_msan_retval_tls_offset
, NUM_TLS_RETVAL
, 0))
852 CHECK(reg_seg
== DR_SEG_GS
/* x64 only! */);
853 if (!dr_raw_tls_calloc(®_seg
, &mock_msan_param_tls_offset
, NUM_TLS_PARAM
, 0))
855 CHECK(reg_seg
== DR_SEG_GS
/* x64 only! */);
856 /* alloc shadow memory */
857 if (mmap(SHADOW_MEMORY_BASE
, SHADOW_MEMORY_SIZE
, PROT_READ
|PROT_WRITE
,
858 MAP_PRIVATE
| MAP_ANON
, -1, 0) != SHADOW_MEMORY_BASE
) {
861 #endif /* MSANDR_STANDALONE_TEST */
862 InitializeMSanCallbacks();
864 // FIXME: the shadow is initialized earlier when DR calls one of our wrapper
865 // functions. This may change one day.
866 // TODO: make this more robust.
868 void *drcontext
= dr_get_current_drcontext();
870 dr_switch_to_app_state(drcontext
);
871 msan_retval_tls_offset
= __msan_get_retval_tls_offset();
872 msan_param_tls_offset
= __msan_get_param_tls_offset();
873 dr_switch_to_dr_state(drcontext
);
875 dr_printf("__msan_retval_tls offset: %d\n", msan_retval_tls_offset
);
876 dr_printf("__msan_param_tls offset: %d\n", msan_param_tls_offset
);
879 // Standard DR events.
880 dr_register_exit_event(event_exit
);
882 drmgr_priority_t priority
= {
883 sizeof(priority
), /* size of struct */
884 "msandr", /* name of our operation */
885 NULL
, /* optional name of operation we should precede */
886 NULL
, /* optional name of operation we should follow */
888 }; /* numeric priority */
890 drmgr_register_bb_app2app_event(event_basic_block_app2app
, &priority
);
891 drmgr_register_bb_instru2instru_event(event_basic_block
, &priority
);
892 #ifndef MSANDR_NATIVE_EXEC
893 drmgr_register_module_load_event(event_module_load
);
894 drmgr_register_module_unload_event(event_module_unload
);
895 #endif /* MSANDR_NATIVE_EXEC */
896 __msan_dr_is_initialized();
897 __msan_set_indirect_call_wrapper(dr_app_handle_mbr_target
);
899 dr_printf("==MSANDR== Starting!\n");