2 * This software is part of the SBCL system. See the README file for
5 * This software is derived from the CMU CL system, which was
6 * written at Carnegie Mellon University and released into the
7 * public domain. The software is in the public domain and is
8 * provided with absolutely no warranty. See the COPYING and CREDITS
9 * files for more information.
12 #define _GNU_SOURCE /* for REG_RAX etc. from sys/ucontext */
16 #include "genesis/sbcl.h"
24 #include "interrupt.h"
26 #include "breakpoint.h"
28 #include "pseudo-atomic.h"
33 #include "genesis/fdefn.h"
34 #include "genesis/static-symbols.h"
35 #include "genesis/symbol.h"
39 #define INT3_INST 0xCC
40 #define INTO_INST 0xCE
41 #define UD2_INST 0x0b0f
42 #define BREAKPOINT_WIDTH 1
44 int avx_supported
= 0, avx2_supported
= 0;
46 static void cpuid(unsigned info
, unsigned subinfo
,
47 unsigned *eax
, unsigned *ebx
, unsigned *ecx
, unsigned *edx
)
57 __asm__("cpuid;" /* assembly code */
58 :"=a" (*eax
), "=b" (*ebx
), "=c" (*ecx
), "=d" (*edx
) /* outputs */
59 :"a" (info
), "c" (subinfo
) /* input: info into eax,
66 static void xgetbv(unsigned *eax
, unsigned *edx
)
69 :"=a" (*eax
), "=d" (*edx
)
73 #define VECTOR_FILL_T "VECTOR-FILL/T"
75 // Poke in a byte that changes an opcode to enable faster vector fill.
76 // Using fixed offsets and bytes is no worse than what we do elsewhere.
77 void tune_asm_routines_for_microarch(void)
79 unsigned int eax
, ebx
, ecx
, edx
;
80 unsigned int cpuid_fn1_ecx
= 0;
82 cpuid(0, 0, &eax
, &ebx
, &ecx
, &edx
);
83 if (eax
>= 1) { // see if we can execute basic id function 1
84 unsigned avx_mask
= 0x18000000; // OXSAVE and AVX
85 cpuid(1, 0, &eax
, &ebx
, &ecx
, &edx
);
87 if ((ecx
& avx_mask
) == avx_mask
) {
89 if ((eax
& 0x06) == 0x06) { // YMM and XMM
91 cpuid(7, 0, &eax
, &ebx
, &ecx
, &edx
);
98 int our_cpu_feature_bits
= 0;
99 // avx2_supported gets copied into bit 1 of *CPU-FEATURE-BITS*
100 if (avx2_supported
) our_cpu_feature_bits
|= 1;
101 // POPCNT = ECX bit 23, which gets copied into bit 2 in *CPU-FEATURE-BITS*
102 if (cpuid_fn1_ecx
& (1<<23)) our_cpu_feature_bits
|= 2;
103 SetSymbolValue(CPU_FEATURE_BITS
, make_fixnum(our_cpu_feature_bits
), 0);
105 // I don't know if this works on Windows
107 cpuid(0, 0, &eax
, &ebx
, &ecx
, &edx
);
109 cpuid(7, 0, &eax
, &ebx
, &ecx
, &edx
);
110 if (ebx
& (1<<9)) // Enhanced Repeat Movs/Stos
111 asm_routine_poke(VECTOR_FILL_T
, 0x12, 0x7C); // Change JMP to JL
116 /* Undo code patches so that the core file applies to the most generic
117 microarchitecture on startup. As it happens, FILL-VECTOR/T is fine
118 either way, but in general this might not be true for code using
119 instructions that don't exist on some cpu family members */
120 void untune_asm_routines_for_microarch(void)
122 asm_routine_poke(VECTOR_FILL_T
, 0x12, 0xEB); // Change JL to JMP
123 SetSymbolValue(CPU_FEATURE_BITS
, 0, 0);
128 arch_get_bad_addr(int __attribute__((unused
)) sig
,
130 os_context_t
__attribute__((unused
)) *context
)
132 return (os_vm_address_t
)code
->si_addr
;
138 * hacking signal contexts
140 * (This depends both on architecture, which determines what we might
141 * want to get to, and on OS, which determines how we get to it.)
144 // I don't have an easy way to test changes for these OSes, so just use this
145 // context visitor based on the old slightly-less-efficient way of doing it.
146 #if defined LISP_FEATURE_SUNOS || defined LISP_FEATURE_HAIKU
147 void visit_context_registers(void (*proc
)(os_context_register_t
, void*),
148 os_context_t
*context
, void* arg
)
150 proc(os_context_pc(context
), arg
);
151 proc(*os_context_register_addr(context
, reg_RAX
), arg
);
152 proc(*os_context_register_addr(context
, reg_RCX
), arg
);
153 proc(*os_context_register_addr(context
, reg_RDX
), arg
);
154 proc(*os_context_register_addr(context
, reg_RBX
), arg
);
155 proc(*os_context_register_addr(context
, reg_RSI
), arg
);
156 proc(*os_context_register_addr(context
, reg_RDI
), arg
);
157 proc(*os_context_register_addr(context
, reg_R8
), arg
);
158 proc(*os_context_register_addr(context
, reg_R9
), arg
);
159 proc(*os_context_register_addr(context
, reg_R10
), arg
);
160 proc(*os_context_register_addr(context
, reg_R11
), arg
);
161 proc(*os_context_register_addr(context
, reg_R12
), arg
);
162 proc(*os_context_register_addr(context
, reg_R13
), arg
);
163 proc(*os_context_register_addr(context
, reg_R14
), arg
);
164 proc(*os_context_register_addr(context
, reg_R15
), arg
);
168 os_context_register_t
*
169 os_context_flags_addr(os_context_t
*context
)
171 #if defined __linux__
172 /* KLUDGE: As of kernel 2.2.14 on Red Hat 6.2, there's code in the
173 * <sys/ucontext.h> file to define symbolic names for offsets into
174 * gregs[], but it's conditional on __USE_GNU and not defined, so
175 * we need to do this nasty absolute index magic number thing
177 return (os_context_register_t
*)&context
->uc_mcontext
.gregs
[17];
178 #elif defined LISP_FEATURE_SUNOS
179 return &context
->uc_mcontext
.gregs
[REG_RFL
];
180 #elif defined LISP_FEATURE_FREEBSD || defined(__DragonFly__)
181 return &context
->uc_mcontext
.mc_rflags
;
182 #elif defined __HAIKU__
183 return &context
->uc_mcontext
.rflags
;
184 #elif defined LISP_FEATURE_DARWIN
185 return CONTEXT_ADDR_FROM_STEM(rflags
);
186 #elif defined __OpenBSD__
187 return &context
->sc_rflags
;
188 #elif defined __NetBSD__
189 return CONTEXT_ADDR_FROM_STEM(RFLAGS
);
191 return (os_context_register_t
*)&context
->win32_context
->EFlags
;
193 #error unsupported OS
197 void arch_skip_instruction(os_context_t
*context
)
199 /* Assuming we get here via an INT3 xxx instruction, the PC now
200 * points to the interrupt code (a Lisp value) so we just move
201 * past it. Skip the code; after that, if the code is an
202 * error-trap or cerror-trap then skip the data bytes that follow. */
206 /* Get and skip the Lisp interrupt code. */
207 code
= *(char*)(OS_CONTEXT_PC(context
)++);
212 skip_internal_error(context
);
214 case trap_UninitializedLoad
:
215 // Skip 1 byte. We can't encode that the internal_error_nargs is 1
216 // because it is not an SC+OFFSET that follows the trap code.
217 OS_CONTEXT_PC(context
) += 1;
219 case trap_Breakpoint
: /* not tested */
220 case trap_FunEndBreakpoint
: /* not tested */
223 #ifdef LISP_FEATURE_SB_SAFEPOINT
224 case trap_GlobalSafepoint
:
225 case trap_CspSafepoint
:
227 case trap_PendingInterrupt
:
229 case trap_SingleStepAround
:
230 case trap_SingleStepBefore
:
231 case trap_InvalidArgCount
:
232 /* only needed to skip the Code */
236 fprintf(stderr
,"[arch_skip_inst invalid code %ld\n]\n",code
);
243 arch_internal_error_arguments(os_context_t
*context
)
245 return 1 + (unsigned char *)OS_CONTEXT_PC(context
);
248 bool arch_pseudo_atomic_atomic(struct thread
*thread
) {
249 return get_pseudo_atomic_atomic(thread
);
252 void arch_set_pseudo_atomic_interrupted(struct thread
*thread
) {
253 set_pseudo_atomic_interrupted(thread
);
256 void arch_clear_pseudo_atomic_interrupted(struct thread
*thread
) {
257 clear_pseudo_atomic_interrupted(thread
);
261 * This stuff seems to get called for TRACE and debug activity.
265 arch_install_breakpoint(void *pc
)
267 unsigned int result
= UNALIGNED_LOAD32(pc
);
268 #ifdef LISP_FEATURE_INT4_BREAKPOINTS
269 *(char*)pc
= INTO_INST
;
271 *(char*)pc
= INT3_INST
;
273 *((char*)pc
+1) = trap_Breakpoint
; /* Lisp trap code */
278 arch_remove_breakpoint(void *pc
, unsigned int orig_inst
)
280 *((char *)pc
) = orig_inst
& 0xff;
281 *((char *)pc
+ 1) = (orig_inst
& 0xff00) >> 8;
284 /* When single stepping, single_stepping holds the original instruction
286 unsigned int *single_stepping
= NULL
;
287 #ifdef CANNOT_GET_TO_SINGLE_STEP_FLAG
288 unsigned int single_step_save1
;
289 unsigned int single_step_save2
;
290 unsigned int single_step_save3
;
294 arch_do_displaced_inst(os_context_t
*context
, unsigned int orig_inst
)
296 unsigned int *pc
= (unsigned int*)OS_CONTEXT_PC(context
);
298 /* Put the original instruction back. */
299 arch_remove_breakpoint(pc
, orig_inst
);
301 #ifdef CANNOT_GET_TO_SINGLE_STEP_FLAG
302 /* Install helper instructions for the single step:
303 * pushf; or [esp],0x100; popf. */
304 single_step_save1
= *(pc
-3);
305 single_step_save2
= *(pc
-2);
306 single_step_save3
= *(pc
-1);
307 *(pc
-3) = 0x9c909090;
308 *(pc
-2) = 0x00240c81;
309 *(pc
-1) = 0x9d000001;
311 *os_context_flags_addr(context
) |= 0x100;
314 single_stepping
= pc
;
316 #ifdef CANNOT_GET_TO_SINGLE_STEP_FLAG
317 OS_CONTEXT_PC(context
) = (os_context_register_t
)((char *)pc
- 9);
322 arch_handle_breakpoint(os_context_t
*context
)
324 OS_CONTEXT_PC(context
) -= BREAKPOINT_WIDTH
;
325 handle_breakpoint(context
);
329 arch_handle_fun_end_breakpoint(os_context_t
*context
)
331 OS_CONTEXT_PC(context
) -= BREAKPOINT_WIDTH
;
332 OS_CONTEXT_PC(context
) = (uword_t
)handle_fun_end_breakpoint(context
);
336 arch_handle_single_step_trap(os_context_t
*context
, int trap
)
338 arch_skip_instruction(context
);
339 /* On x86-64 the fdefn / function is always in RAX, so we pass
340 * 0 as the register_offset. */
341 handle_single_step_trap(context
, trap
, 0);
346 restore_breakpoint_from_single_step(os_context_t
* context
)
348 #ifdef CANNOT_GET_TO_SINGLE_STEP_FLAG
349 /* Un-install single step helper instructions. */
350 *(single_stepping
-3) = single_step_save1
;
351 *(single_stepping
-2) = single_step_save2
;
352 *(single_stepping
-1) = single_step_save3
;
354 *os_context_flags_addr(context
) &= ~0x100;
356 /* Re-install the breakpoint if possible. */
357 if (((char *)OS_CONTEXT_PC(context
) > (char *)single_stepping
) &&
358 ((char *)OS_CONTEXT_PC(context
) <= (char *)single_stepping
+ BREAKPOINT_WIDTH
)) {
359 fprintf(stderr
, "warning: couldn't reinstall breakpoint\n");
361 arch_install_breakpoint(single_stepping
);
364 single_stepping
= NULL
;
369 sigtrap_handler(int __attribute__((unused
)) signal
,
370 siginfo_t
__attribute__((unused
)) *info
,
371 os_context_t
*context
)
373 #ifdef LISP_FEATURE_INT1_BREAKPOINTS
374 // ICEBP instruction = handle-pending-interrupt following pseudo-atomic
375 if (((unsigned char*)OS_CONTEXT_PC(context
))[-1] == 0xF1)
376 return interrupt_handle_pending(context
);
381 if (single_stepping
) {
382 restore_breakpoint_from_single_step(context
);
386 /* This is just for info in case the monitor wants to print an
388 access_control_stack_pointer(get_sb_vm_thread()) =
389 (lispobj
*)*os_context_sp_addr(context
);
391 /* On entry %rip points just after the INT3 byte and aims at the
392 * 'kind' value (eg trap_Cerror). For error-trap and Cerror-trap a
393 * number of bytes will follow, the first is the length of the byte
394 * arguments to follow. */
395 trap
= *(unsigned char *)OS_CONTEXT_PC(context
);
396 #ifdef LISP_FEATURE_IMMOBILE_SPACE
397 if (trap
== trap_UndefinedFunction
) {
398 // The interrupted PC pins this fdefn. Sigtrap is delivered on the ordinary stack,
399 // not the alternate stack.
400 // (FIXME: an interior pointer to an fdefn _should_ pin it, but doesn't)
401 lispobj
* fdefn
= (lispobj
*)(OS_CONTEXT_PC(context
) & ~LOWTAG_MASK
);
402 if (fdefn
&& widetag_of(fdefn
) == FDEFN_WIDETAG
) {
403 // Return to undefined-tramp
404 OS_CONTEXT_PC(context
) = (uword_t
)((struct fdefn
*)fdefn
)->raw_addr
;
405 // with RAX containing the FDEFN
406 *os_context_register_addr(context
,reg_RAX
) =
407 make_lispobj(fdefn
, OTHER_POINTER_LOWTAG
);
412 handle_trap(context
, trap
);
416 sigill_handler(int __attribute__((unused
)) signal
,
417 siginfo_t
__attribute__((unused
)) *siginfo
,
418 os_context_t
*context
) {
419 unsigned char* pc
= (void*)OS_CONTEXT_PC(context
);
420 if (UNALIGNED_LOAD16(pc
) == UD2_INST
) {
421 OS_CONTEXT_PC(context
) += 2;
422 return sigtrap_handler(signal
, siginfo
, context
);
424 // Interrupt if overflow (INTO) raises SIGILL in 64-bit mode
425 if (*(unsigned char *)pc
== INTO_INST
) {
426 OS_CONTEXT_PC(context
) += 1;
427 return sigtrap_handler(signal
, siginfo
, context
);
430 fake_foreign_function_call(context
);
431 #ifdef LISP_FEATURE_LINUX
432 extern void sb_dump_mcontext(char*,void*);
433 sb_dump_mcontext("SIGILL received", context
);
435 lose("Unhandled SIGILL at %p.", pc
);
438 #ifdef X86_64_SIGFPE_FIXUP
439 #define MXCSR_IE (0x01) /* Invalid Operation */
440 #define MXCSR_DE (0x02) /* Denormal */
441 #define MXCSR_ZE (0x04) /* Devide-by-Zero */
442 #define MXCSR_OE (0x08) /* Overflow */
443 #define MXCSR_UE (0x10) /* Underflow */
444 #define MXCSR_PE (0x20) /* Precision */
447 mxcsr_to_code(unsigned int mxcsr
)
449 /* Extract unmasked exception bits. */
450 mxcsr
&= ~(mxcsr
>> 7) & 0x3F;
452 /* This order is defined at "Intel 64 and IA-32 Architectures
453 * Software Developerfs Manual" Volume 1: "Basic Architecture",
454 * 4.9.2 "Floating-Point Exception Priority". */
455 if (mxcsr
& MXCSR_IE
)
457 else if (mxcsr
& MXCSR_ZE
)
459 else if (mxcsr
& MXCSR_DE
)
461 else if (mxcsr
& MXCSR_OE
)
463 else if (mxcsr
& MXCSR_UE
)
465 else if (mxcsr
& MXCSR_PE
)
472 sigfpe_handler(int signal
, siginfo_t
*siginfo
, os_context_t
*context
)
474 unsigned int *mxcsr
= arch_os_context_mxcsr_addr(context
);
476 #ifndef LISP_FEATURE_DARWIN
477 /* Darwin doesn't handle accrued bits right. */
478 if (siginfo
->si_code
== 0)
480 { /* XMM exception */
481 siginfo
->si_code
= mxcsr_to_code(*mxcsr
);
483 /* Clear sticky exception flag. */
487 interrupt_handle_now(signal
, siginfo
, context
);
492 arch_install_interrupt_handlers()
494 /* Note: The old CMU CL code here used sigtrap_handler() to handle
495 * SIGILL as well as SIGTRAP. I couldn't see any reason to do
496 * things that way. So, I changed to separate handlers when
497 * debugging a problem on OpenBSD, where SBCL wasn't catching
498 * SIGILL properly, but was instead letting the process be
499 * terminated with an "Illegal instruction" output. If this change
500 * turns out to break something (maybe breakpoint handling on some
501 * OS I haven't tested on?) and we have to go back to the old CMU
502 * CL way, I hope there will at least be a comment to explain
503 * why.. -- WHN 2001-06-07 */
504 #ifndef LISP_FEATURE_WIN32
505 ll_install_handler(SIGILL
, sigill_handler
);
506 ll_install_handler(SIGTRAP
, sigtrap_handler
);
509 #if defined(X86_64_SIGFPE_FIXUP) && !defined(LISP_FEATURE_WIN32)
510 ll_install_handler(SIGFPE
, sigfpe_handler
);
515 arch_write_linkage_table_entry(int index
, void *target_addr
, int datap
)
517 char *reloc_addr
= (char*)ALIEN_LINKAGE_TABLE_SPACE_START
+ index
* ALIEN_LINKAGE_TABLE_ENTRY_SIZE
;
519 *(uword_t
*)reloc_addr
= (uword_t
)target_addr
;
522 reloc_addr
[0] = 0xFF; /* Opcode for near jump to absolute reg/mem64. */
523 reloc_addr
[1] = 0x25; /* ModRM #b00 100 101, i.e. RIP-relative. */
524 UNALIGNED_STORE32((reloc_addr
+2), 2); /* 32-bit displacement field = 2 */
525 reloc_addr
[6] = 0x66; reloc_addr
[7] = 0x90; /* 2-byte NOP */
526 *(void**)(reloc_addr
+8) = target_addr
;
529 /* These setup and check *both* the sse2 and x87 FPUs. While lisp code
530 only uses the sse2 FPU, other code (such as libc) may use the x87 FPU.
538 /* return the x87 exception flags ored in with the sse2
539 * control+status flags */
540 asm ("fnstsw %0" : "=m" (temp
));
543 asm ("stmxcsr %0" : "=m" (temp
));
545 /* flip exception mask bits */
546 return result
^ (0x3F << 7);
552 unsigned short unused1
;
554 unsigned short unused2
;
555 unsigned int other_regs
[5];
559 arch_set_fp_modes(unsigned int mxcsr
)
564 /* turn trap enable bits into exception mask */
568 asm ("fnstenv %0" : "=m" (f_env
));
569 /* set control word: always long double precision
570 * get traps and rounding from mxcsr word */
571 f_env
.cw
= 0x300 | ((mxcsr
>> 7) & 0x3F) | (((mxcsr
>> 13) & 0x3) << 10);
572 /* set status word: only override exception flags, from mxcsr */
574 f_env
.sw
|= (mxcsr
& 0x3F);
576 asm ("fldenv %0" : : "m" (f_env
));
578 /* now, simply, load up the mxcsr register */
580 asm ("ldmxcsr %0" : : "m" (temp
));
583 /* Return the tagged pointer for which 'entrypoint' is the starting address.
584 * This result will have SIMPLE_FUN_WIDETAG or FUNCALLABLE_INSTANCE_WIDETAG.
585 * If the thing has been forwarded, we do NOT return the newspace copy.
587 lispobj
entrypoint_taggedptr(uword_t entrypoint
) {
588 if (!entrypoint
|| points_to_asm_code_p(entrypoint
)) return 0;
589 lispobj
* phdr
= (lispobj
*)(entrypoint
- 2*N_WORD_BYTES
);
590 if (forwarding_pointer_p(phdr
)) {
591 gc_assert(lowtag_of(forwarding_pointer_value(phdr
)) == FUN_POINTER_LOWTAG
);
592 // We can't assert on the widetag if forwarded, because defragmentation
593 // puts the new logical object at some totally different physical address.
594 // This function doesn't know if defrag is occurring.
596 __attribute__((unused
)) unsigned char widetag
= widetag_of(phdr
);
597 gc_assert(widetag
== SIMPLE_FUN_WIDETAG
|| widetag
== FUNCALLABLE_INSTANCE_WIDETAG
);
599 return make_lispobj(phdr
, FUN_POINTER_LOWTAG
);
602 /* Return the lisp object that fdefn's raw_addr slot jumps to.
603 * In the event that the referenced object was forwarded, this returns the un-forwarded
604 * object (the forwarded value is used to assert some invariants though).
605 * If the fdefn jumps to the UNDEFINED-FDEFN routine, then return 0.
607 * Some legacy baggage is evident: in the first implementation of immobile fdefns,
608 * an fdefn used a 'jmp rel32' (relative to itself), and so you could decode the
609 * jump target only given the address of the fdefn. That is no longer true; fdefns use
610 * absolute jumps. Therefore it is possible to call entrypoint_taggedptr() with any
611 * raw_addr, whether or not you know the fdefn whence the raw_addr was obtained. */
612 lispobj
decode_fdefn_rawfun(struct fdefn
* fdefn
) {
613 return entrypoint_taggedptr((uword_t
)fdefn
->raw_addr
);
616 #ifdef LISP_FEATURE_SB_THREAD
617 #include "genesis/vector.h"
618 #define LOCK_PREFIX 0xF0
619 #undef SHOW_PC_RECORDING
621 extern unsigned int max_alloc_point_counters
;
623 #ifdef LISP_FEATURE_WIN32
624 extern CRITICAL_SECTION alloc_profiler_lock
;
626 extern pthread_mutex_t alloc_profiler_lock
;
630 static unsigned int claim_index(int qty
) // qty is 1 or 2
632 static bool warning_issued
;
633 unsigned int index
= fixnum_value(SYMBOL(N_PROFILE_SITES
)->value
);
634 SYMBOL(N_PROFILE_SITES
)->value
+= make_fixnum(qty
);
635 if (fixnum_value(SYMBOL(N_PROFILE_SITES
)->value
) <= max_alloc_point_counters
)
637 if (!warning_issued
) {
638 fprintf(stderr
, "allocation profile buffer overflowed\n");
641 return 0; // use the overflow bin(s)
644 static bool NO_SANITIZE_MEMORY
645 instrumentp(uword_t
* sp
, uword_t
** pc
, uword_t
* old_word
)
647 int __attribute__((unused
)) ret
= mutex_acquire(&alloc_profiler_lock
);
649 uword_t next_pc
= *sp
;
650 // The instrumentation site was 8-byte aligned
651 uword_t return_pc
= ALIGN_DOWN(next_pc
, 8);
653 if (!(return_pc
>= instrument_from
&& return_pc
< instrument_to
))
656 uword_t word
= *(uword_t
*)return_pc
;
657 unsigned char opcode
= word
& 0xFF;
658 // Adjust the return PC to where the call instruction was,
659 // not the instruction after it.
661 // If > 1 thread called this routine at the same time,
662 // one of them would already have patched the call site.
663 if (opcode
== LOCK_PREFIX
)
665 *pc
= (uword_t
*)return_pc
;
670 // logical index 'index' in the metadata array stores the code component
671 // and pc-offset relative to the component base address
672 static void record_pc(char* pc
, unsigned int index
, bool sizedp
)
674 lispobj
*code
= component_ptr_from_pc(pc
);
676 fprintf(stderr
, "can't identify code @ %p\n", pc
);
678 #ifdef SHOW_PC_RECORDING
679 fprintf(stderr
, "%#lx (%#lx) -> %d%s\n",
680 (uword_t
)pc
, make_lispobj(code
, OTHER_POINTER_LOWTAG
),
681 index
, sizedp
?" (sized)":"");
686 int ret
= mutex_acquire(&free_pages_lock
);
688 ensure_region_closed(code_region
);
689 int ret
= mutex_release(&free_pages_lock
);
691 code
= component_ptr_from_pc(pc
);
694 struct vector
* v
= VECTOR(alloc_profile_data
);
697 v
->data
[index
] = v
->data
[index
+1] = NIL
;
700 // Wasn't the point of code serial# that you don't store
701 // code blob pointers into the various profiling buffers? (FIXME?)
703 vector_notice_pointer_store(&v
->data
[index
]);
704 v
->data
[index
] = make_lispobj(code
, OTHER_POINTER_LOWTAG
);
705 // do not need to take notice of a fixnum store
706 v
->data
[index
+1] = make_fixnum((lispobj
)pc
- (lispobj
)code
);
708 gc_assert(!((uword_t
)pc
& LOWTAG_MASK
));
709 v
->data
[index
] = make_fixnum(-1); // no code component found
710 v
->data
[index
+1] = (lispobj
)pc
;
714 void allocation_tracker_counted(uword_t
* sp
)
716 uword_t
*pc
, word_at_pc
;
717 if (instrumentp(sp
, &pc
, &word_at_pc
)) {
718 unsigned int index
= claim_index(1);
720 index
= 2; // reserved overflow counter for fixed-size alloc
721 uword_t disp
= index
* 8;
723 if ((word_at_pc
& 0xff) == 0xE8) {
724 // following is a 1-byte NOP and a dummy "TEST imm8" where the imm8
725 // encodes a register number.
726 base_reg
= word_at_pc
>> 56;
728 lose("Unexpected instruction format @ %p", pc
);
730 // rewrite call into: LOCK INC QWORD PTR, [Rbase+n] ; opcode = 0xFF / 0
731 uword_t new_inst
= 0xF0 | ((0x48|(base_reg
>>3)) << 8) // w + possibly 'b'
732 | (0xFF << 16) | ((0x80L
+(base_reg
&7)) << 24) | (disp
<< 32);
733 // Ensure atomicity of the write. A plain store would probably do,
734 // but since this is self-modifying code, the most stringent memory
736 if (!__sync_bool_compare_and_swap(pc
, word_at_pc
, new_inst
))
737 lose("alloc profiler failed to rewrite instruction @ %p", pc
);
739 record_pc((char*)pc
, index
, 0);
741 int __attribute__((unused
)) ret
= mutex_release(&alloc_profiler_lock
);
745 void allocation_tracker_sized(uword_t
* sp
)
747 uword_t
*pc
, word_at_pc
;
748 if (instrumentp(sp
, &pc
, &word_at_pc
)) {
749 int index
= claim_index(2);
750 uword_t word_after_pc
= pc
[1];
751 int pair
= word_at_pc
>> 56;
752 int base_reg
= pair
& 0xf;
753 int size_reg
= pair
>> 4;
754 // rewrite call into:
755 // LOCK INC QWORD PTR, [Rbase+n] ; opcode = 0xFF / 0
756 uword_t disp
= index
* 8;
757 uword_t new_inst1
= 0xF0 | ((0x48 | (base_reg
>>3)) << 8) // w + b
758 | (0xFF << 16) | ((0x80L
+(base_reg
&7)) << 24) | (disp
<< 32);
759 // LOCK ADD [Rbase+n+8], Rsize ; opcode = 0x01
760 int prefix
= 0x48 | ((size_reg
>> 3) << 2) | (base_reg
>> 3); // w + r + b
761 int modrm
= 0x80 | ((size_reg
& 7) << 3) | (base_reg
& 7);
762 disp
= (1 + index
) * 8;
764 0xF0 | (prefix
<< 8) | (0x01 << 16) | ((long)modrm
<< 24) | (disp
<< 32);
765 // Overwrite the second instruction first, because as soon as the CALL
766 // opcode is changed, fallthrough to the next instruction occurs.
767 if (!__sync_bool_compare_and_swap(pc
+1, word_after_pc
, new_inst2
) ||
768 !__sync_bool_compare_and_swap(pc
, word_at_pc
, new_inst1
))
769 lose("alloc profiler failed to rewrite instructions @ %p", pc
);
770 if (index
!= 0) // can't record a PC for the overflow counts
771 record_pc((char*)pc
, index
, 1);
773 int __attribute__((unused
)) ret
= mutex_release(&alloc_profiler_lock
);
778 __attribute__((sysv_abi
)) lispobj
call_into_lisp(lispobj fun
, lispobj
*args
, int nargs
) {
779 extern lispobj
call_into_lisp_(lispobj
, lispobj
*, int, struct thread
*)
780 __attribute__((sysv_abi
));
781 return call_into_lisp_(fun
, args
, nargs
, get_sb_vm_thread());
784 lispobj
call_into_lisp_first_time(lispobj fun
, lispobj
*args
, int nargs
) {
785 extern lispobj
call_into_lisp_first_time_(lispobj
, lispobj
*, int, struct thread
*)
786 __attribute__((sysv_abi
));
787 return call_into_lisp_first_time_(fun
, args
, nargs
, get_sb_vm_thread());
790 #include "x86-arch-shared.inc"