Fixes for running with undefined-behavior sanitizer
[sbcl.git] / src / runtime / x86-64-arch.c
blob75437bea22e31a0e3141f7449ed7d900ed74a65b
1 /*
2 * This software is part of the SBCL system. See the README file for
3 * more information.
5 * This software is derived from the CMU CL system, which was
6 * written at Carnegie Mellon University and released into the
7 * public domain. The software is in the public domain and is
8 * provided with absolutely no warranty. See the COPYING and CREDITS
9 * files for more information.
12 #include <stdio.h>
14 #include "sbcl.h"
15 #include "runtime.h"
16 #include "globals.h"
17 #include "validate.h"
18 #include "os.h"
19 #include "sbcl.h"
20 #include "arch.h"
21 #include "lispregs.h"
22 #include "signal.h"
23 #include "alloc.h"
24 #include "interrupt.h"
25 #include "interr.h"
26 #include "breakpoint.h"
27 #include "thread.h"
28 #include "pseudo-atomic.h"
29 #include "unaligned.h"
31 #include "genesis/static-symbols.h"
32 #include "genesis/symbol.h"
35 #ifdef LISP_FEATURE_UD2_BREAKPOINTS
36 #define UD2_INST 0x0b0f /* UD2 */
37 #define BREAKPOINT_WIDTH 2
38 #else
39 #ifdef LISP_FEATURE_INT4_BREAKPOINTS
40 # define BREAKPOINT_INST 0xce /* INTO */
41 #else
42 # define BREAKPOINT_INST 0xcc /* INT3 */
43 #endif
44 #define BREAKPOINT_WIDTH 1
45 #endif
47 unsigned int cpuid_fn1_ecx;
48 unsigned int avx_supported = 0;
50 static void cpuid(unsigned info, unsigned subinfo,
51 unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx)
53 #ifdef _MSC_VER
54 int regs[4];
55 __cpuid(regs, info);
56 *eax = regs[0];
57 *ebx = regs[1];
58 *ecx = regs[2];
59 *edx = regs[3];
60 #else
61 __asm__("cpuid;" /* assembly code */
62 :"=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) /* outputs */
63 :"a" (info), "c" (subinfo) /* input: info into eax,
64 subinfo to ecx */
65 /* clobbers: none */
67 #endif
70 static void xgetbv(unsigned *eax, unsigned *edx)
72 __asm__("xgetbv;"
73 :"=a" (*eax), "=d" (*edx)
74 : "c" (0));
77 void arch_init(void)
79 unsigned int eax, ebx, ecx, edx;
81 cpuid(0, 0, &eax, &ebx, &ecx, &edx);
82 if (eax >= 1) { // see if we can execute basic id function 1
83 unsigned avx_mask = 0x18000000; // OXSAVE and AVX
84 cpuid(1, 0, &eax, &ebx, &ecx, &edx);
85 cpuid_fn1_ecx = ecx;
86 if ((ecx & avx_mask) == avx_mask) {
87 xgetbv(&eax, &edx);
88 if ((eax & 0x06) == 0x06) // YMM and XMM
89 avx_supported = 1;
94 #define FILL_VECTOR_T "FILL-VECTOR/T"
96 // Poke in a byte that changes an opcode to enable faster vector fill.
97 // Using fixed offsets and bytes is no worse than what we do elsewhere.
98 void tune_asm_routines_for_microarch(void)
100 // I don't know if this works on Windows
101 #ifndef _MSC_VER
102 unsigned int eax, ebx, ecx, edx;
103 cpuid(0, 0, &eax, &ebx, &ecx, &edx);
104 if (eax >= 7) {
105 cpuid(7, 0, &eax, &ebx, &ecx, &edx);
106 if (ebx & (1<<9)) // Enhanced Repeat Movs/Stos
107 asm_routine_poke(FILL_VECTOR_T, 0x12, 0x7C); // Change JMP to JL
109 #endif
112 /* Undo code patches so that the core file applies to the most generic
113 microarchitecture on startup. As it happens, FILL-VECTOR/T is fine
114 either way, but in general this might not be true for code using
115 instructions that don't exist on some cpu family members */
116 void untune_asm_routines_for_microarch(void)
118 asm_routine_poke(FILL_VECTOR_T, 0x12, 0xEB); // Change JL to JMP
121 #ifndef _WIN64
122 os_vm_address_t
123 arch_get_bad_addr(int sig, siginfo_t *code, os_context_t *context)
125 return (os_vm_address_t)code->si_addr;
127 #endif
131 * hacking signal contexts
133 * (This depends both on architecture, which determines what we might
134 * want to get to, and on OS, which determines how we get to it.)
137 os_context_register_t *
138 context_eflags_addr(os_context_t *context)
140 #if defined __linux__ || defined __sun
141 /* KLUDGE: As of kernel 2.2.14 on Red Hat 6.2, there's code in the
142 * <sys/ucontext.h> file to define symbolic names for offsets into
143 * gregs[], but it's conditional on __USE_GNU and not defined, so
144 * we need to do this nasty absolute index magic number thing
145 * instead. */
146 return (os_context_register_t*)&context->uc_mcontext.gregs[17];
147 #elif defined LISP_FEATURE_FREEBSD || defined(__DragonFly__)
148 return &context->uc_mcontext.mc_rflags;
149 #elif defined LISP_FEATURE_DARWIN
150 return CONTEXT_ADDR_FROM_STEM(rflags);
151 #elif defined __OpenBSD__
152 return &context->sc_rflags;
153 #elif defined __NetBSD__
154 return CONTEXT_ADDR_FROM_STEM(RFLAGS);
155 #elif defined _WIN64
156 return (os_context_register_t*)&context->win32_context->EFlags;
157 #else
158 #error unsupported OS
159 #endif
162 void arch_skip_instruction(os_context_t *context)
164 /* Assuming we get here via an INT3 xxx instruction, the PC now
165 * points to the interrupt code (a Lisp value) so we just move
166 * past it. Skip the code; after that, if the code is an
167 * error-trap or cerror-trap then skip the data bytes that follow. */
169 long code;
171 /* Get and skip the Lisp interrupt code. */
172 code = *(char*)(*os_context_pc_addr(context))++;
173 switch (code)
175 case trap_Error:
176 case trap_Cerror:
177 skip_internal_error(context);
179 break;
181 case trap_Breakpoint: /* not tested */
182 case trap_FunEndBreakpoint: /* not tested */
183 break;
185 #ifdef LISP_FEATURE_SB_SAFEPOINT
186 case trap_GlobalSafepoint:
187 case trap_CspSafepoint:
188 #endif
189 case trap_PendingInterrupt:
190 case trap_Halt:
191 case trap_SingleStepAround:
192 case trap_SingleStepBefore:
193 case trap_InvalidArgCount:
194 /* only needed to skip the Code */
195 break;
197 default:
198 fprintf(stderr,"[arch_skip_inst invalid code %ld\n]\n",code);
199 break;
202 FSHOW((stderr,
203 "/[arch_skip_inst resuming at %x]\n",
204 *os_context_pc_addr(context)));
207 unsigned char *
208 arch_internal_error_arguments(os_context_t *context)
210 return 1 + (unsigned char *)(*os_context_pc_addr(context));
213 boolean
214 arch_pseudo_atomic_atomic(os_context_t *context)
216 return get_pseudo_atomic_atomic(arch_os_get_current_thread());
219 void
220 arch_set_pseudo_atomic_interrupted(os_context_t *context)
222 struct thread *thread = arch_os_get_current_thread();
223 set_pseudo_atomic_interrupted(thread);
226 void
227 arch_clear_pseudo_atomic_interrupted(os_context_t *context)
229 struct thread *thread = arch_os_get_current_thread();
230 clear_pseudo_atomic_interrupted(thread);
234 * This stuff seems to get called for TRACE and debug activity.
237 unsigned int
238 arch_install_breakpoint(void *pc)
240 unsigned int result = *(unsigned int*)pc;
242 #ifndef LISP_FEATURE_UD2_BREAKPOINTS
243 *(char*)pc = BREAKPOINT_INST; /* x86 INT3 */
244 *((char*)pc+1) = trap_Breakpoint; /* Lisp trap code */
245 #else
246 *(char*)pc = UD2_INST & 0xff;
247 *((char*)pc+1) = UD2_INST >> 8;
248 *((char*)pc+2) = trap_Breakpoint;
249 #endif
251 return result;
254 void
255 arch_remove_breakpoint(void *pc, unsigned int orig_inst)
257 *((char *)pc) = orig_inst & 0xff;
258 *((char *)pc + 1) = (orig_inst & 0xff00) >> 8;
259 #if BREAKPOINT_WIDTH > 1
260 *((char *)pc + 2) = (orig_inst & 0xff0000) >> 16;
261 #endif
264 /* When single stepping, single_stepping holds the original instruction
265 * PC location. */
266 unsigned int *single_stepping = NULL;
267 #ifdef CANNOT_GET_TO_SINGLE_STEP_FLAG
268 unsigned int single_step_save1;
269 unsigned int single_step_save2;
270 unsigned int single_step_save3;
271 #endif
273 void
274 arch_do_displaced_inst(os_context_t *context, unsigned int orig_inst)
276 unsigned int *pc = (unsigned int*)(*os_context_pc_addr(context));
278 /* Put the original instruction back. */
279 arch_remove_breakpoint(pc, orig_inst);
281 #ifdef CANNOT_GET_TO_SINGLE_STEP_FLAG
282 /* Install helper instructions for the single step:
283 * pushf; or [esp],0x100; popf. */
284 single_step_save1 = *(pc-3);
285 single_step_save2 = *(pc-2);
286 single_step_save3 = *(pc-1);
287 *(pc-3) = 0x9c909090;
288 *(pc-2) = 0x00240c81;
289 *(pc-1) = 0x9d000001;
290 #else
291 *context_eflags_addr(context) |= 0x100;
292 #endif
294 single_stepping = pc;
296 #ifdef CANNOT_GET_TO_SINGLE_STEP_FLAG
297 *os_context_pc_addr(context) = (os_context_register_t)((char *)pc - 9);
298 #endif
301 void
302 arch_handle_breakpoint(os_context_t *context)
304 *os_context_pc_addr(context) -= BREAKPOINT_WIDTH;
305 handle_breakpoint(context);
308 void
309 arch_handle_fun_end_breakpoint(os_context_t *context)
311 *os_context_pc_addr(context) -= BREAKPOINT_WIDTH;
312 *os_context_pc_addr(context) =
313 (uword_t)handle_fun_end_breakpoint(context);
316 void
317 arch_handle_single_step_trap(os_context_t *context, int trap)
319 arch_skip_instruction(context);
320 /* On x86-64 the fdefn / function is always in RAX, so we pass
321 * 0 as the register_offset. */
322 handle_single_step_trap(context, trap, 0);
326 void
327 restore_breakpoint_from_single_step(os_context_t * context)
329 #ifdef CANNOT_GET_TO_SINGLE_STEP_FLAG
330 /* Un-install single step helper instructions. */
331 *(single_stepping-3) = single_step_save1;
332 *(single_stepping-2) = single_step_save2;
333 *(single_stepping-1) = single_step_save3;
334 #else
335 *context_eflags_addr(context) &= ~0x100;
336 #endif
337 /* Re-install the breakpoint if possible. */
338 if (((char *)*os_context_pc_addr(context) >
339 (char *)single_stepping) &&
340 ((char *)*os_context_pc_addr(context) <=
341 (char *)single_stepping + BREAKPOINT_WIDTH)) {
342 fprintf(stderr, "warning: couldn't reinstall breakpoint\n");
343 } else {
344 arch_install_breakpoint(single_stepping);
347 single_stepping = NULL;
348 return;
351 void
352 sigtrap_handler(int signal, siginfo_t *info, os_context_t *context)
354 unsigned int trap;
356 if (single_stepping) {
357 restore_breakpoint_from_single_step(context);
358 return;
361 /* This is just for info in case the monitor wants to print an
362 * approximation. */
363 access_control_stack_pointer(arch_os_get_current_thread()) =
364 (lispobj *)*os_context_sp_addr(context);
366 /* On entry %eip points just after the INT3 byte and aims at the
367 * 'kind' value (eg trap_Cerror). For error-trap and Cerror-trap a
368 * number of bytes will follow, the first is the length of the byte
369 * arguments to follow. */
370 trap = *(unsigned char *)(*os_context_pc_addr(context));
372 handle_trap(context, trap);
375 void
376 sigill_handler(int signal, siginfo_t *siginfo, os_context_t *context) {
377 /* Triggering SIGTRAP using int3 is unreliable on OS X/x86, so
378 * we need to use illegal instructions for traps.
380 #if defined(LISP_FEATURE_UD2_BREAKPOINTS) && !defined(LISP_FEATURE_MACH_EXCEPTION_HANDLER)
381 if (*((unsigned short *)*os_context_pc_addr(context)) == UD2_INST) {
382 *os_context_pc_addr(context) += 2;
383 return sigtrap_handler(signal, siginfo, context);
385 #elif defined(LISP_FEATURE_INT4_BREAKPOINTS) && !defined(LISP_FEATURE_MACH_EXCEPTION_HANDLER)
386 if (*((unsigned char *)*os_context_pc_addr(context)) == BREAKPOINT_INST) {
387 *os_context_pc_addr(context) += BREAKPOINT_WIDTH;
388 return sigtrap_handler(signal, siginfo, context);
390 #endif
392 fake_foreign_function_call(context);
393 lose("Unhandled SIGILL at %p.", *os_context_pc_addr(context));
396 #ifdef X86_64_SIGFPE_FIXUP
397 #define MXCSR_IE (0x01) /* Invalid Operation */
398 #define MXCSR_DE (0x02) /* Denormal */
399 #define MXCSR_ZE (0x04) /* Devide-by-Zero */
400 #define MXCSR_OE (0x08) /* Overflow */
401 #define MXCSR_UE (0x10) /* Underflow */
402 #define MXCSR_PE (0x20) /* Precision */
404 static inline int
405 mxcsr_to_code(unsigned int mxcsr)
407 /* Extract unmasked exception bits. */
408 mxcsr &= ~(mxcsr >> 7) & 0x3F;
410 /* This order is defined at "Intel 64 and IA-32 Architectures
411 * Software Developerfs Manual" Volume 1: "Basic Architecture",
412 * 4.9.2 "Floating-Point Exception Priority". */
413 if (mxcsr & MXCSR_IE)
414 return FPE_FLTINV;
415 else if (mxcsr & MXCSR_ZE)
416 return FPE_FLTDIV;
417 else if (mxcsr & MXCSR_DE)
418 return FPE_FLTUND;
419 else if (mxcsr & MXCSR_OE)
420 return FPE_FLTOVF;
421 else if (mxcsr & MXCSR_UE)
422 return FPE_FLTUND;
423 else if (mxcsr & MXCSR_PE)
424 return FPE_FLTRES;
426 return 0;
429 static void
430 sigfpe_handler(int signal, siginfo_t *siginfo, os_context_t *context)
432 unsigned int *mxcsr = arch_os_context_mxcsr_addr(context);
434 #ifndef LISP_FEATURE_DARWIN
435 /* Darwin doesn't handle accrued bits right. */
436 if (siginfo->si_code == 0)
437 #endif
438 { /* XMM exception */
439 siginfo->si_code = mxcsr_to_code(*mxcsr);
441 /* Clear sticky exception flag. */
442 *mxcsr &= ~0x3F;
445 interrupt_handle_now(signal, siginfo, context);
447 #endif
449 void
450 arch_install_interrupt_handlers()
452 SHOW("entering arch_install_interrupt_handlers()");
454 /* Note: The old CMU CL code here used sigtrap_handler() to handle
455 * SIGILL as well as SIGTRAP. I couldn't see any reason to do
456 * things that way. So, I changed to separate handlers when
457 * debugging a problem on OpenBSD, where SBCL wasn't catching
458 * SIGILL properly, but was instead letting the process be
459 * terminated with an "Illegal instruction" output. If this change
460 * turns out to break something (maybe breakpoint handling on some
461 * OS I haven't tested on?) and we have to go back to the old CMU
462 * CL way, I hope there will at least be a comment to explain
463 * why.. -- WHN 2001-06-07 */
464 #if !defined(LISP_FEATURE_MACH_EXCEPTION_HANDLER) && !defined(LISP_FEATURE_WIN32)
465 undoably_install_low_level_interrupt_handler(SIGILL , sigill_handler);
466 undoably_install_low_level_interrupt_handler(SIGTRAP, sigtrap_handler);
467 #endif
469 #if defined(X86_64_SIGFPE_FIXUP) && !defined(LISP_FEATURE_WIN32)
470 undoably_install_low_level_interrupt_handler(SIGFPE, sigfpe_handler);
471 #endif
473 SHOW("returning from arch_install_interrupt_handlers()");
476 #ifdef LISP_FEATURE_LINKAGE_TABLE
477 /* FIXME: It might be cleaner to generate these from the lisp side of
478 * things.
481 void
482 arch_write_linkage_table_jmp(char *reloc_addr, void *target_addr)
484 reloc_addr[0] = 0xFF; /* Opcode for near jump to absolute reg/mem64. */
485 reloc_addr[1] = 0x25; /* ModRM #b00 100 101, i.e. RIP-relative. */
486 UNALIGNED_STORE32((reloc_addr+2), 0); /* 32-bit displacement field = 0 */
487 UNALIGNED_STORE64((reloc_addr+6), (uword_t)target_addr);
488 /* write a nop for good measure. */
489 reloc_addr[14] = 0x90;
492 void
493 arch_write_linkage_table_ref(void *reloc_addr, void *target_addr)
495 *(uword_t *)reloc_addr = (uword_t)target_addr;
498 #endif
500 /* These setup and check *both* the sse2 and x87 FPUs. While lisp code
501 only uses the sse2 FPU, other code (such as libc) may use the x87 FPU.
504 unsigned int
505 arch_get_fp_modes()
507 unsigned int temp;
508 unsigned int result;
509 /* return the x87 exception flags ored in with the sse2
510 * control+status flags */
511 asm ("fnstsw %0" : "=m" (temp));
512 result = temp;
513 result &= 0x3F;
514 asm ("stmxcsr %0" : "=m" (temp));
515 result |= temp;
516 /* flip exception mask bits */
517 return result ^ (0x3F << 7);
520 struct fpenv
522 unsigned short cw;
523 unsigned short unused1;
524 unsigned short sw;
525 unsigned short unused2;
526 unsigned int other_regs[5];
529 void
530 arch_set_fp_modes(unsigned int mxcsr)
532 struct fpenv f_env;
533 unsigned int temp;
535 /* turn trap enable bits into exception mask */
536 mxcsr ^= 0x3F << 7;
538 /* set x87 modes */
539 asm ("fnstenv %0" : "=m" (f_env));
540 /* set control word: always long double precision
541 * get traps and rounding from mxcsr word */
542 f_env.cw = 0x300 | ((mxcsr >> 7) & 0x3F) | (((mxcsr >> 13) & 0x3) << 10);
543 /* set status word: only override exception flags, from mxcsr */
544 f_env.sw &= ~0x3F;
545 f_env.sw |= (mxcsr & 0x3F);
547 asm ("fldenv %0" : : "m" (f_env));
549 /* now, simply, load up the mxcsr register */
550 temp = mxcsr;
551 asm ("ldmxcsr %0" : : "m" (temp));
554 #ifdef LISP_FEATURE_IMMOBILE_CODE
555 /// Return the Lisp object that fdefn's raw_addr slot jumps to.
556 /// This will either be:
557 /// (1) a simple-fun,
558 /// (2) a funcallable-instance with an embedded trampoline that makes
559 /// it resemble a simple-fun in terms of call convention, or
560 /// (3) a code-component with no simple-fun within it, that makes
561 /// closures and other funcallable-instances look like simple-funs.
562 lispobj fdefn_callee_lispobj(struct fdefn* fdefn) {
563 extern unsigned asm_routines_end;
564 if (((lispobj)fdefn->raw_addr & 0xFE) == 0xE8) { // looks good
565 int32_t offs = UNALIGNED_LOAD32((char*)&fdefn->raw_addr + 1);
566 unsigned int raw_fun =
567 (int)(long)&fdefn->raw_addr + 5 + offs; // 5 = length of "JMP rel32"
568 switch (((unsigned char*)&fdefn->raw_addr)[5]) {
569 case 0x00: // no closure/fin trampoline
570 // If the target is an assembly routine, there is no simple-fun
571 // that corresponds to the entry point. The code is kept live
572 // by *ASSEMBLER-OBJECTS*. Otherwise, return the simple-fun.
573 return raw_fun < asm_routines_end ? 0 : raw_fun - FUN_RAW_ADDR_OFFSET;
574 case 0x48: // embedded funcallable instance trampoline
575 return (raw_fun - (4<<WORD_SHIFT)) | FUN_POINTER_LOWTAG;
576 case 0x90: // general closure/fin trampoline
577 return (raw_fun - offsetof(struct code, constants)) | OTHER_POINTER_LOWTAG;
579 } else if (fdefn->raw_addr == 0)
580 return 0;
581 lose("Can't decode fdefn raw addr @ %p: %p\n", fdefn, fdefn->raw_addr);
583 #endif