%other-pointer-widetag derive-type: derive for simple-array.
[sbcl.git] / src / runtime / x86-64-arch.c
blobbf8379845630a3393cd8808dadd4448f055eb6ec
1 /*
2 * This software is part of the SBCL system. See the README file for
3 * more information.
5 * This software is derived from the CMU CL system, which was
6 * written at Carnegie Mellon University and released into the
7 * public domain. The software is in the public domain and is
8 * provided with absolutely no warranty. See the COPYING and CREDITS
9 * files for more information.
12 #define _GNU_SOURCE /* for REG_RAX etc. from sys/ucontext */
14 #include <stdio.h>
16 #include "genesis/sbcl.h"
17 #include "runtime.h"
18 #include "globals.h"
19 #include "validate.h"
20 #include "os.h"
21 #include "arch.h"
22 #include "lispregs.h"
23 #include <signal.h>
24 #include "interrupt.h"
25 #include "interr.h"
26 #include "breakpoint.h"
27 #include "thread.h"
28 #include "pseudo-atomic.h"
29 #include "align.h"
30 #include "search.h"
31 #include "var-io.h"
33 #include "genesis/fdefn.h"
34 #include "genesis/static-symbols.h"
35 #include "genesis/symbol.h"
36 #include "core.h"
37 #include "gc.h"
39 #define INT3_INST 0xCC
40 #define INTO_INST 0xCE
41 #define UD2_INST 0x0b0f
42 #define BREAKPOINT_WIDTH 1
44 int avx_supported = 0, avx2_supported = 0;
46 static void cpuid(unsigned info, unsigned subinfo,
47 unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx)
49 #ifdef _MSC_VER
50 int regs[4];
51 __cpuid(regs, info);
52 *eax = regs[0];
53 *ebx = regs[1];
54 *ecx = regs[2];
55 *edx = regs[3];
56 #else
57 __asm__("cpuid;" /* assembly code */
58 :"=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) /* outputs */
59 :"a" (info), "c" (subinfo) /* input: info into eax,
60 subinfo to ecx */
61 /* clobbers: none */
63 #endif
66 static void xgetbv(unsigned *eax, unsigned *edx)
68 __asm__("xgetbv;"
69 :"=a" (*eax), "=d" (*edx)
70 : "c" (0));
73 #define VECTOR_FILL_T "VECTOR-FILL/T"
75 // Poke in a byte that changes an opcode to enable faster vector fill.
76 // Using fixed offsets and bytes is no worse than what we do elsewhere.
77 void tune_asm_routines_for_microarch(void)
79 unsigned int eax, ebx, ecx, edx;
80 unsigned int cpuid_fn1_ecx = 0;
82 cpuid(0, 0, &eax, &ebx, &ecx, &edx);
83 if (eax >= 1) { // see if we can execute basic id function 1
84 unsigned avx_mask = 0x18000000; // OXSAVE and AVX
85 cpuid(1, 0, &eax, &ebx, &ecx, &edx);
86 cpuid_fn1_ecx = ecx;
87 if ((ecx & avx_mask) == avx_mask) {
88 xgetbv(&eax, &edx);
89 if ((eax & 0x06) == 0x06) { // YMM and XMM
90 avx_supported = 1;
91 cpuid(7, 0, &eax, &ebx, &ecx, &edx);
92 if (ebx & 0x20) {
93 avx2_supported = 1;
98 int our_cpu_feature_bits = 0;
99 // avx2_supported gets copied into bit 1 of *CPU-FEATURE-BITS*
100 if (avx2_supported) our_cpu_feature_bits |= 1;
101 // POPCNT = ECX bit 23, which gets copied into bit 2 in *CPU-FEATURE-BITS*
102 if (cpuid_fn1_ecx & (1<<23)) our_cpu_feature_bits |= 2;
103 SetSymbolValue(CPU_FEATURE_BITS, make_fixnum(our_cpu_feature_bits), 0);
105 // I don't know if this works on Windows
106 #ifndef _MSC_VER
107 cpuid(0, 0, &eax, &ebx, &ecx, &edx);
108 if (eax >= 7) {
109 cpuid(7, 0, &eax, &ebx, &ecx, &edx);
110 if (ebx & (1<<9)) // Enhanced Repeat Movs/Stos
111 asm_routine_poke(VECTOR_FILL_T, 0x12, 0x7C); // Change JMP to JL
113 #endif
116 /* Undo code patches so that the core file applies to the most generic
117 microarchitecture on startup. As it happens, FILL-VECTOR/T is fine
118 either way, but in general this might not be true for code using
119 instructions that don't exist on some cpu family members */
120 void untune_asm_routines_for_microarch(void)
122 asm_routine_poke(VECTOR_FILL_T, 0x12, 0xEB); // Change JL to JMP
123 SetSymbolValue(CPU_FEATURE_BITS, 0, 0);
126 #ifndef _WIN64
127 os_vm_address_t
128 arch_get_bad_addr(int __attribute__((unused)) sig,
129 siginfo_t *code,
130 os_context_t __attribute__((unused)) *context)
132 return (os_vm_address_t)code->si_addr;
134 #endif
138 * hacking signal contexts
140 * (This depends both on architecture, which determines what we might
141 * want to get to, and on OS, which determines how we get to it.)
144 // I don't have an easy way to test changes for these OSes, so just use this
145 // context visitor based on the old slightly-less-efficient way of doing it.
146 #if defined LISP_FEATURE_SUNOS || defined LISP_FEATURE_HAIKU
147 void visit_context_registers(void (*proc)(os_context_register_t, void*),
148 os_context_t *context, void* arg)
150 proc(os_context_pc(context), arg);
151 proc(*os_context_register_addr(context, reg_RAX), arg);
152 proc(*os_context_register_addr(context, reg_RCX), arg);
153 proc(*os_context_register_addr(context, reg_RDX), arg);
154 proc(*os_context_register_addr(context, reg_RBX), arg);
155 proc(*os_context_register_addr(context, reg_RSI), arg);
156 proc(*os_context_register_addr(context, reg_RDI), arg);
157 proc(*os_context_register_addr(context, reg_R8 ), arg);
158 proc(*os_context_register_addr(context, reg_R9 ), arg);
159 proc(*os_context_register_addr(context, reg_R10), arg);
160 proc(*os_context_register_addr(context, reg_R11), arg);
161 proc(*os_context_register_addr(context, reg_R12), arg);
162 proc(*os_context_register_addr(context, reg_R13), arg);
163 proc(*os_context_register_addr(context, reg_R14), arg);
164 proc(*os_context_register_addr(context, reg_R15), arg);
166 #endif
168 os_context_register_t *
169 os_context_flags_addr(os_context_t *context)
171 #if defined __linux__
172 /* KLUDGE: As of kernel 2.2.14 on Red Hat 6.2, there's code in the
173 * <sys/ucontext.h> file to define symbolic names for offsets into
174 * gregs[], but it's conditional on __USE_GNU and not defined, so
175 * we need to do this nasty absolute index magic number thing
176 * instead. */
177 return (os_context_register_t*)&context->uc_mcontext.gregs[17];
178 #elif defined LISP_FEATURE_SUNOS
179 return &context->uc_mcontext.gregs[REG_RFL];
180 #elif defined LISP_FEATURE_FREEBSD || defined(__DragonFly__)
181 return &context->uc_mcontext.mc_rflags;
182 #elif defined __HAIKU__
183 return &context->uc_mcontext.rflags;
184 #elif defined LISP_FEATURE_DARWIN
185 return CONTEXT_ADDR_FROM_STEM(rflags);
186 #elif defined __OpenBSD__
187 return &context->sc_rflags;
188 #elif defined __NetBSD__
189 return CONTEXT_ADDR_FROM_STEM(RFLAGS);
190 #elif defined _WIN64
191 return (os_context_register_t*)&context->win32_context->EFlags;
192 #else
193 #error unsupported OS
194 #endif
197 void arch_skip_instruction(os_context_t *context)
199 /* Assuming we get here via an INT3 xxx instruction, the PC now
200 * points to the interrupt code (a Lisp value) so we just move
201 * past it. Skip the code; after that, if the code is an
202 * error-trap or cerror-trap then skip the data bytes that follow. */
204 long code;
206 /* Get and skip the Lisp interrupt code. */
207 code = *(char*)(OS_CONTEXT_PC(context)++);
208 switch (code)
210 case trap_Error:
211 case trap_Cerror:
212 skip_internal_error(context);
213 break;
214 case trap_UninitializedLoad:
215 // Skip 1 byte. We can't encode that the internal_error_nargs is 1
216 // because it is not an SC+OFFSET that follows the trap code.
217 OS_CONTEXT_PC(context) += 1;
218 break;
219 case trap_Breakpoint: /* not tested */
220 case trap_FunEndBreakpoint: /* not tested */
221 break;
223 #ifdef LISP_FEATURE_SB_SAFEPOINT
224 case trap_GlobalSafepoint:
225 case trap_CspSafepoint:
226 #endif
227 case trap_PendingInterrupt:
228 case trap_Halt:
229 case trap_SingleStepAround:
230 case trap_SingleStepBefore:
231 case trap_InvalidArgCount:
232 /* only needed to skip the Code */
233 break;
235 default:
236 fprintf(stderr,"[arch_skip_inst invalid code %ld\n]\n",code);
237 break;
242 unsigned char *
243 arch_internal_error_arguments(os_context_t *context)
245 return 1 + (unsigned char *)OS_CONTEXT_PC(context);
248 bool arch_pseudo_atomic_atomic(struct thread *thread) {
249 return get_pseudo_atomic_atomic(thread);
252 void arch_set_pseudo_atomic_interrupted(struct thread *thread) {
253 set_pseudo_atomic_interrupted(thread);
256 void arch_clear_pseudo_atomic_interrupted(struct thread *thread) {
257 clear_pseudo_atomic_interrupted(thread);
261 * This stuff seems to get called for TRACE and debug activity.
264 unsigned int
265 arch_install_breakpoint(void *pc)
267 unsigned int result = UNALIGNED_LOAD32(pc);
268 #ifdef LISP_FEATURE_INT4_BREAKPOINTS
269 *(char*)pc = INTO_INST;
270 #else
271 *(char*)pc = INT3_INST;
272 #endif
273 *((char*)pc+1) = trap_Breakpoint; /* Lisp trap code */
274 return result;
277 void
278 arch_remove_breakpoint(void *pc, unsigned int orig_inst)
280 *((char *)pc) = orig_inst & 0xff;
281 *((char *)pc + 1) = (orig_inst & 0xff00) >> 8;
284 /* When single stepping, single_stepping holds the original instruction
285 * PC location. */
286 unsigned int *single_stepping = NULL;
287 #ifdef CANNOT_GET_TO_SINGLE_STEP_FLAG
288 unsigned int single_step_save1;
289 unsigned int single_step_save2;
290 unsigned int single_step_save3;
291 #endif
293 void
294 arch_do_displaced_inst(os_context_t *context, unsigned int orig_inst)
296 unsigned int *pc = (unsigned int*)OS_CONTEXT_PC(context);
298 /* Put the original instruction back. */
299 arch_remove_breakpoint(pc, orig_inst);
301 #ifdef CANNOT_GET_TO_SINGLE_STEP_FLAG
302 /* Install helper instructions for the single step:
303 * pushf; or [esp],0x100; popf. */
304 single_step_save1 = *(pc-3);
305 single_step_save2 = *(pc-2);
306 single_step_save3 = *(pc-1);
307 *(pc-3) = 0x9c909090;
308 *(pc-2) = 0x00240c81;
309 *(pc-1) = 0x9d000001;
310 #else
311 *os_context_flags_addr(context) |= 0x100;
312 #endif
314 single_stepping = pc;
316 #ifdef CANNOT_GET_TO_SINGLE_STEP_FLAG
317 OS_CONTEXT_PC(context) = (os_context_register_t)((char *)pc - 9);
318 #endif
321 void
322 arch_handle_breakpoint(os_context_t *context)
324 OS_CONTEXT_PC(context) -= BREAKPOINT_WIDTH;
325 handle_breakpoint(context);
328 void
329 arch_handle_fun_end_breakpoint(os_context_t *context)
331 OS_CONTEXT_PC(context) -= BREAKPOINT_WIDTH;
332 OS_CONTEXT_PC(context) = (uword_t)handle_fun_end_breakpoint(context);
335 void
336 arch_handle_single_step_trap(os_context_t *context, int trap)
338 arch_skip_instruction(context);
339 /* On x86-64 the fdefn / function is always in RAX, so we pass
340 * 0 as the register_offset. */
341 handle_single_step_trap(context, trap, 0);
345 void
346 restore_breakpoint_from_single_step(os_context_t * context)
348 #ifdef CANNOT_GET_TO_SINGLE_STEP_FLAG
349 /* Un-install single step helper instructions. */
350 *(single_stepping-3) = single_step_save1;
351 *(single_stepping-2) = single_step_save2;
352 *(single_stepping-1) = single_step_save3;
353 #else
354 *os_context_flags_addr(context) &= ~0x100;
355 #endif
356 /* Re-install the breakpoint if possible. */
357 if (((char *)OS_CONTEXT_PC(context) > (char *)single_stepping) &&
358 ((char *)OS_CONTEXT_PC(context) <= (char *)single_stepping + BREAKPOINT_WIDTH)) {
359 fprintf(stderr, "warning: couldn't reinstall breakpoint\n");
360 } else {
361 arch_install_breakpoint(single_stepping);
364 single_stepping = NULL;
365 return;
368 void
369 sigtrap_handler(int __attribute__((unused)) signal,
370 siginfo_t __attribute__((unused)) *info,
371 os_context_t *context)
373 #ifdef LISP_FEATURE_INT1_BREAKPOINTS
374 // ICEBP instruction = handle-pending-interrupt following pseudo-atomic
375 if (((unsigned char*)OS_CONTEXT_PC(context))[-1] == 0xF1)
376 return interrupt_handle_pending(context);
377 #endif
379 unsigned int trap;
381 if (single_stepping) {
382 restore_breakpoint_from_single_step(context);
383 return;
386 /* This is just for info in case the monitor wants to print an
387 * approximation. */
388 access_control_stack_pointer(get_sb_vm_thread()) =
389 (lispobj *)*os_context_sp_addr(context);
391 /* On entry %rip points just after the INT3 byte and aims at the
392 * 'kind' value (eg trap_Cerror). For error-trap and Cerror-trap a
393 * number of bytes will follow, the first is the length of the byte
394 * arguments to follow. */
395 trap = *(unsigned char *)OS_CONTEXT_PC(context);
396 #ifdef LISP_FEATURE_IMMOBILE_SPACE
397 if (trap == trap_UndefinedFunction) {
398 // The interrupted PC pins this fdefn. Sigtrap is delivered on the ordinary stack,
399 // not the alternate stack.
400 // (FIXME: an interior pointer to an fdefn _should_ pin it, but doesn't)
401 lispobj* fdefn = (lispobj*)(OS_CONTEXT_PC(context) & ~LOWTAG_MASK);
402 if (fdefn && widetag_of(fdefn) == FDEFN_WIDETAG) {
403 // Return to undefined-tramp
404 OS_CONTEXT_PC(context) = (uword_t)((struct fdefn*)fdefn)->raw_addr;
405 // with RAX containing the FDEFN
406 *os_context_register_addr(context,reg_RAX) =
407 make_lispobj(fdefn, OTHER_POINTER_LOWTAG);
408 return;
411 #endif
412 handle_trap(context, trap);
415 void
416 sigill_handler(int __attribute__((unused)) signal,
417 siginfo_t __attribute__((unused)) *siginfo,
418 os_context_t *context) {
419 unsigned char* pc = (void*)OS_CONTEXT_PC(context);
420 if (UNALIGNED_LOAD16(pc) == UD2_INST) {
421 OS_CONTEXT_PC(context) += 2;
422 return sigtrap_handler(signal, siginfo, context);
424 // Interrupt if overflow (INTO) raises SIGILL in 64-bit mode
425 if (*(unsigned char *)pc == INTO_INST) {
426 OS_CONTEXT_PC(context) += 1;
427 return sigtrap_handler(signal, siginfo, context);
430 fake_foreign_function_call(context);
431 #ifdef LISP_FEATURE_LINUX
432 extern void sb_dump_mcontext(char*,void*);
433 sb_dump_mcontext("SIGILL received", context);
434 #endif
435 lose("Unhandled SIGILL at %p.", pc);
438 #ifdef X86_64_SIGFPE_FIXUP
439 #define MXCSR_IE (0x01) /* Invalid Operation */
440 #define MXCSR_DE (0x02) /* Denormal */
441 #define MXCSR_ZE (0x04) /* Devide-by-Zero */
442 #define MXCSR_OE (0x08) /* Overflow */
443 #define MXCSR_UE (0x10) /* Underflow */
444 #define MXCSR_PE (0x20) /* Precision */
446 static inline int
447 mxcsr_to_code(unsigned int mxcsr)
449 /* Extract unmasked exception bits. */
450 mxcsr &= ~(mxcsr >> 7) & 0x3F;
452 /* This order is defined at "Intel 64 and IA-32 Architectures
453 * Software Developerfs Manual" Volume 1: "Basic Architecture",
454 * 4.9.2 "Floating-Point Exception Priority". */
455 if (mxcsr & MXCSR_IE)
456 return FPE_FLTINV;
457 else if (mxcsr & MXCSR_ZE)
458 return FPE_FLTDIV;
459 else if (mxcsr & MXCSR_DE)
460 return FPE_FLTUND;
461 else if (mxcsr & MXCSR_OE)
462 return FPE_FLTOVF;
463 else if (mxcsr & MXCSR_UE)
464 return FPE_FLTUND;
465 else if (mxcsr & MXCSR_PE)
466 return FPE_FLTRES;
468 return 0;
471 static void
472 sigfpe_handler(int signal, siginfo_t *siginfo, os_context_t *context)
474 unsigned int *mxcsr = arch_os_context_mxcsr_addr(context);
476 #ifndef LISP_FEATURE_DARWIN
477 /* Darwin doesn't handle accrued bits right. */
478 if (siginfo->si_code == 0)
479 #endif
480 { /* XMM exception */
481 siginfo->si_code = mxcsr_to_code(*mxcsr);
483 /* Clear sticky exception flag. */
484 *mxcsr &= ~0x3F;
487 interrupt_handle_now(signal, siginfo, context);
489 #endif
491 void
492 arch_install_interrupt_handlers()
494 /* Note: The old CMU CL code here used sigtrap_handler() to handle
495 * SIGILL as well as SIGTRAP. I couldn't see any reason to do
496 * things that way. So, I changed to separate handlers when
497 * debugging a problem on OpenBSD, where SBCL wasn't catching
498 * SIGILL properly, but was instead letting the process be
499 * terminated with an "Illegal instruction" output. If this change
500 * turns out to break something (maybe breakpoint handling on some
501 * OS I haven't tested on?) and we have to go back to the old CMU
502 * CL way, I hope there will at least be a comment to explain
503 * why.. -- WHN 2001-06-07 */
504 #ifndef LISP_FEATURE_WIN32
505 ll_install_handler(SIGILL , sigill_handler);
506 ll_install_handler(SIGTRAP, sigtrap_handler);
507 #endif
509 #if defined(X86_64_SIGFPE_FIXUP) && !defined(LISP_FEATURE_WIN32)
510 ll_install_handler(SIGFPE, sigfpe_handler);
511 #endif
514 void
515 arch_write_linkage_table_entry(int index, void *target_addr, int datap)
517 char *reloc_addr = (char*)ALIEN_LINKAGE_TABLE_SPACE_START + index * ALIEN_LINKAGE_TABLE_ENTRY_SIZE;
518 if (datap) {
519 *(uword_t *)reloc_addr = (uword_t)target_addr;
520 return;
522 reloc_addr[0] = 0xFF; /* Opcode for near jump to absolute reg/mem64. */
523 reloc_addr[1] = 0x25; /* ModRM #b00 100 101, i.e. RIP-relative. */
524 UNALIGNED_STORE32((reloc_addr+2), 2); /* 32-bit displacement field = 2 */
525 reloc_addr[6] = 0x66; reloc_addr[7] = 0x90; /* 2-byte NOP */
526 *(void**)(reloc_addr+8) = target_addr;
529 /* These setup and check *both* the sse2 and x87 FPUs. While lisp code
530 only uses the sse2 FPU, other code (such as libc) may use the x87 FPU.
533 unsigned int
534 arch_get_fp_modes()
536 unsigned int temp;
537 unsigned int result;
538 /* return the x87 exception flags ored in with the sse2
539 * control+status flags */
540 asm ("fnstsw %0" : "=m" (temp));
541 result = temp;
542 result &= 0x3F;
543 asm ("stmxcsr %0" : "=m" (temp));
544 result |= temp;
545 /* flip exception mask bits */
546 return result ^ (0x3F << 7);
549 struct fpenv
551 unsigned short cw;
552 unsigned short unused1;
553 unsigned short sw;
554 unsigned short unused2;
555 unsigned int other_regs[5];
558 void
559 arch_set_fp_modes(unsigned int mxcsr)
561 struct fpenv f_env;
562 unsigned int temp;
564 /* turn trap enable bits into exception mask */
565 mxcsr ^= 0x3F << 7;
567 /* set x87 modes */
568 asm ("fnstenv %0" : "=m" (f_env));
569 /* set control word: always long double precision
570 * get traps and rounding from mxcsr word */
571 f_env.cw = 0x300 | ((mxcsr >> 7) & 0x3F) | (((mxcsr >> 13) & 0x3) << 10);
572 /* set status word: only override exception flags, from mxcsr */
573 f_env.sw &= ~0x3F;
574 f_env.sw |= (mxcsr & 0x3F);
576 asm ("fldenv %0" : : "m" (f_env));
578 /* now, simply, load up the mxcsr register */
579 temp = mxcsr;
580 asm ("ldmxcsr %0" : : "m" (temp));
583 /* Return the tagged pointer for which 'entrypoint' is the starting address.
584 * This result will have SIMPLE_FUN_WIDETAG or FUNCALLABLE_INSTANCE_WIDETAG.
585 * If the thing has been forwarded, we do NOT return the newspace copy.
587 lispobj entrypoint_taggedptr(uword_t entrypoint) {
588 if (!entrypoint || points_to_asm_code_p(entrypoint)) return 0;
589 lispobj* phdr = (lispobj*)(entrypoint - 2*N_WORD_BYTES);
590 if (forwarding_pointer_p(phdr)) {
591 gc_assert(lowtag_of(forwarding_pointer_value(phdr)) == FUN_POINTER_LOWTAG);
592 // We can't assert on the widetag if forwarded, because defragmentation
593 // puts the new logical object at some totally different physical address.
594 // This function doesn't know if defrag is occurring.
595 } else {
596 __attribute__((unused)) unsigned char widetag = widetag_of(phdr);
597 gc_assert(widetag == SIMPLE_FUN_WIDETAG || widetag == FUNCALLABLE_INSTANCE_WIDETAG);
599 return make_lispobj(phdr, FUN_POINTER_LOWTAG);
602 /* Return the lisp object that fdefn's raw_addr slot jumps to.
603 * In the event that the referenced object was forwarded, this returns the un-forwarded
604 * object (the forwarded value is used to assert some invariants though).
605 * If the fdefn jumps to the UNDEFINED-FDEFN routine, then return 0.
607 * Some legacy baggage is evident: in the first implementation of immobile fdefns,
608 * an fdefn used a 'jmp rel32' (relative to itself), and so you could decode the
609 * jump target only given the address of the fdefn. That is no longer true; fdefns use
610 * absolute jumps. Therefore it is possible to call entrypoint_taggedptr() with any
611 * raw_addr, whether or not you know the fdefn whence the raw_addr was obtained. */
612 lispobj decode_fdefn_rawfun(struct fdefn* fdefn) {
613 return entrypoint_taggedptr((uword_t)fdefn->raw_addr);
616 #ifdef LISP_FEATURE_SB_THREAD
617 #include "genesis/vector.h"
618 #define LOCK_PREFIX 0xF0
619 #undef SHOW_PC_RECORDING
621 extern unsigned int max_alloc_point_counters;
623 #ifdef LISP_FEATURE_WIN32
624 extern CRITICAL_SECTION alloc_profiler_lock;
625 #else
626 extern pthread_mutex_t alloc_profiler_lock;
627 #endif
630 static unsigned int claim_index(int qty) // qty is 1 or 2
632 static bool warning_issued;
633 unsigned int index = fixnum_value(SYMBOL(N_PROFILE_SITES)->value);
634 SYMBOL(N_PROFILE_SITES)->value += make_fixnum(qty);
635 if (fixnum_value(SYMBOL(N_PROFILE_SITES)->value) <= max_alloc_point_counters)
636 return index;
637 if (!warning_issued) {
638 fprintf(stderr, "allocation profile buffer overflowed\n");
639 warning_issued = 1;
641 return 0; // use the overflow bin(s)
644 static bool NO_SANITIZE_MEMORY
645 instrumentp(uword_t* sp, uword_t** pc, uword_t* old_word)
647 int __attribute__((unused)) ret = mutex_acquire(&alloc_profiler_lock);
648 gc_assert(ret);
649 uword_t next_pc = *sp;
650 // The instrumentation site was 8-byte aligned
651 uword_t return_pc = ALIGN_DOWN(next_pc, 8);
652 #if 0
653 if (!(return_pc >= instrument_from && return_pc < instrument_to))
654 return 0;
655 #endif
656 uword_t word = *(uword_t*)return_pc;
657 unsigned char opcode = word & 0xFF;
658 // Adjust the return PC to where the call instruction was,
659 // not the instruction after it.
660 *sp = return_pc;
661 // If > 1 thread called this routine at the same time,
662 // one of them would already have patched the call site.
663 if (opcode == LOCK_PREFIX)
664 return 0;
665 *pc = (uword_t*)return_pc;
666 *old_word = word;
667 return 1;
670 // logical index 'index' in the metadata array stores the code component
671 // and pc-offset relative to the component base address
672 static void record_pc(char* pc, unsigned int index, bool sizedp)
674 lispobj *code = component_ptr_from_pc(pc);
675 if (!code) {
676 fprintf(stderr, "can't identify code @ %p\n", pc);
677 } else {
678 #ifdef SHOW_PC_RECORDING
679 fprintf(stderr, "%#lx (%#lx) -> %d%s\n",
680 (uword_t)pc, make_lispobj(code, OTHER_POINTER_LOWTAG),
681 index, sizedp?" (sized)":"");
682 #endif
684 #if 0
685 if (!code) {
686 int ret = mutex_acquire(&free_pages_lock);
687 gc_assert(ret);
688 ensure_region_closed(code_region);
689 int ret = mutex_release(&free_pages_lock);
690 gc_assert(ret);
691 code = component_ptr_from_pc(pc);
693 #endif
694 struct vector* v = VECTOR(alloc_profile_data);
695 index <<= 1;
696 if (sizedp) {
697 v->data[index] = v->data[index+1] = NIL;
698 index += 2;
700 // Wasn't the point of code serial# that you don't store
701 // code blob pointers into the various profiling buffers? (FIXME?)
702 if (code) {
703 vector_notice_pointer_store(&v->data[index]);
704 v->data[index] = make_lispobj(code, OTHER_POINTER_LOWTAG);
705 // do not need to take notice of a fixnum store
706 v->data[index+1] = make_fixnum((lispobj)pc - (lispobj)code);
707 } else {
708 gc_assert(!((uword_t)pc & LOWTAG_MASK));
709 v->data[index] = make_fixnum(-1); // no code component found
710 v->data[index+1] = (lispobj)pc;
714 void allocation_tracker_counted(uword_t* sp)
716 uword_t *pc, word_at_pc;
717 if (instrumentp(sp, &pc, &word_at_pc)) {
718 unsigned int index = claim_index(1);
719 if (index == 0)
720 index = 2; // reserved overflow counter for fixed-size alloc
721 uword_t disp = index * 8;
722 int base_reg = -1;
723 if ((word_at_pc & 0xff) == 0xE8) {
724 // following is a 1-byte NOP and a dummy "TEST imm8" where the imm8
725 // encodes a register number.
726 base_reg = word_at_pc >> 56;
727 } else {
728 lose("Unexpected instruction format @ %p", pc);
730 // rewrite call into: LOCK INC QWORD PTR, [Rbase+n] ; opcode = 0xFF / 0
731 uword_t new_inst = 0xF0 | ((0x48|(base_reg>>3)) << 8) // w + possibly 'b'
732 | (0xFF << 16) | ((0x80L+(base_reg&7)) << 24) | (disp << 32);
733 // Ensure atomicity of the write. A plain store would probably do,
734 // but since this is self-modifying code, the most stringent memory
735 // order is prudent.
736 if (!__sync_bool_compare_and_swap(pc, word_at_pc, new_inst))
737 lose("alloc profiler failed to rewrite instruction @ %p", pc);
738 if (index != 2)
739 record_pc((char*)pc, index, 0);
741 int __attribute__((unused)) ret = mutex_release(&alloc_profiler_lock);
742 gc_assert(ret);
745 void allocation_tracker_sized(uword_t* sp)
747 uword_t *pc, word_at_pc;
748 if (instrumentp(sp, &pc, &word_at_pc)) {
749 int index = claim_index(2);
750 uword_t word_after_pc = pc[1];
751 int pair = word_at_pc >> 56;
752 int base_reg = pair & 0xf;
753 int size_reg = pair >> 4;
754 // rewrite call into:
755 // LOCK INC QWORD PTR, [Rbase+n] ; opcode = 0xFF / 0
756 uword_t disp = index * 8;
757 uword_t new_inst1 = 0xF0 | ((0x48 | (base_reg>>3)) << 8) // w + b
758 | (0xFF << 16) | ((0x80L+(base_reg&7)) << 24) | (disp << 32);
759 // LOCK ADD [Rbase+n+8], Rsize ; opcode = 0x01
760 int prefix = 0x48 | ((size_reg >> 3) << 2) | (base_reg >> 3); // w + r + b
761 int modrm = 0x80 | ((size_reg & 7) << 3) | (base_reg & 7);
762 disp = (1 + index) * 8;
763 uword_t new_inst2 =
764 0xF0 | (prefix << 8) | (0x01 << 16) | ((long)modrm << 24) | (disp << 32);
765 // Overwrite the second instruction first, because as soon as the CALL
766 // opcode is changed, fallthrough to the next instruction occurs.
767 if (!__sync_bool_compare_and_swap(pc+1, word_after_pc, new_inst2) ||
768 !__sync_bool_compare_and_swap(pc, word_at_pc, new_inst1))
769 lose("alloc profiler failed to rewrite instructions @ %p", pc);
770 if (index != 0) // can't record a PC for the overflow counts
771 record_pc((char*)pc, index, 1);
773 int __attribute__((unused)) ret = mutex_release(&alloc_profiler_lock);
774 gc_assert(ret);
776 #endif
778 __attribute__((sysv_abi)) lispobj call_into_lisp(lispobj fun, lispobj *args, int nargs) {
779 extern lispobj call_into_lisp_(lispobj, lispobj *, int, struct thread *)
780 __attribute__((sysv_abi));
781 return call_into_lisp_(fun, args, nargs, get_sb_vm_thread());
784 lispobj call_into_lisp_first_time(lispobj fun, lispobj *args, int nargs) {
785 extern lispobj call_into_lisp_first_time_(lispobj, lispobj *, int, struct thread *)
786 __attribute__((sysv_abi));
787 return call_into_lisp_first_time_(fun, args, nargs, get_sb_vm_thread());
790 #include "x86-arch-shared.inc"