Optimize MAPCAR on large lists.
[sbcl.git] / src / runtime / ppc-arch.c
blob67bfe65037629629aaf949579c4dde9fdac92a23
1 /*
2 * This software is part of the SBCL system. See the README file for
3 * more information.
5 * This software is derived from the CMU CL system, which was
6 * written at Carnegie Mellon University and released into the
7 * public domain. The software is in the public domain and is
8 * provided with absolutely no warranty. See the COPYING and CREDITS
9 * files for more information.
12 #include <stdio.h>
14 #include "sbcl.h"
15 #include "arch.h"
16 #include "globals.h"
17 #include "validate.h"
18 #include "os.h"
19 #include "interrupt.h"
20 #include "lispregs.h"
21 #include "signal.h"
22 #include "interrupt.h"
23 #include "interr.h"
24 #include "breakpoint.h"
25 #include "alloc.h"
27 #if defined(LISP_FEATURE_GENCGC)
28 #include "gencgc-alloc-region.h"
29 #endif
31 #ifdef LISP_FEATURE_SB_THREAD
32 #include "pseudo-atomic.h"
33 #endif
35 /* The header files may not define PT_DAR/PT_DSISR. This definition
36 is correct for all versions of ppc linux >= 2.0.30
38 As of DR2.1u4, MkLinux doesn't pass these registers to signal
39 handlers correctly; a patch is necessary in order to (partially)
40 correct this.
42 Even with the patch, the DSISR may not have its 'write' bit set
43 correctly (it tends not to be set if the fault was caused by
44 something other than a protection violation.)
46 Caveat callers. */
48 #if defined (LISP_FEATURE_DARWIN) || defined(LISP_FEATURE_LINUX)
49 #ifndef PT_DAR
50 #define PT_DAR 41
51 #endif
53 #ifndef PT_DSISR
54 #define PT_DSISR 42
55 #endif
56 #endif
58 /* Magic encoding for the instruction used for traps. */
59 #define TRAP_INSTRUCTION(trap) ((3<<26) | (6 << 21) | (trap))
61 void arch_init() {
64 os_vm_address_t
65 arch_get_bad_addr(int sig, siginfo_t *code, os_context_t *context)
67 os_vm_address_t addr;
69 #if defined(LISP_FEATURE_NETBSD) || defined(LISP_FEATURE_OPENBSD)
70 addr = (os_vm_address_t) (code->si_addr);
71 #else
72 addr = (os_vm_address_t) (*os_context_register_addr(context,PT_DAR));
73 #endif
74 return addr;
78 void
79 arch_skip_instruction(os_context_t *context)
81 char** pcptr;
82 pcptr = (char**) os_context_pc_addr(context);
83 *pcptr += 4;
86 unsigned char *
87 arch_internal_error_arguments(os_context_t *context)
89 return (unsigned char *)(*os_context_pc_addr(context)+4);
93 boolean
94 arch_pseudo_atomic_atomic(os_context_t *context)
96 #ifdef LISP_FEATURE_SB_THREAD
97 struct thread *thread = arch_os_get_current_thread();
99 if (foreign_function_call_active_p(thread)) {
100 return get_pseudo_atomic_atomic(thread);
101 } else return
102 #else
103 /* FIXME: this foreign_function_call_active test is dubious at
104 * best. If a foreign call is made in a pseudo atomic section
105 * (?) or more likely a pseudo atomic section is in a foreign
106 * call then an interrupt is executed immediately. Maybe it
107 * has to do with C code not maintaining pseudo atomic
108 * properly. MG - 2005-08-10
110 * The foreign_function_call_active used to live at each call-site
111 * to arch_pseudo_atomic_atomic, but this seems clearer.
112 * --NS 2007-05-15 */
113 return (!foreign_function_call_active_p(arch_os_get_current_thread())) &&
114 #endif
115 ((*os_context_register_addr(context,reg_ALLOC)) & flag_PseudoAtomic);
118 void
119 arch_set_pseudo_atomic_interrupted(os_context_t *context)
121 #ifdef LISP_FEATURE_SB_THREAD
122 struct thread *thread = arch_os_get_current_thread();
124 if (foreign_function_call_active_p(thread)) {
125 set_pseudo_atomic_interrupted(thread);
126 } else
127 #endif
128 *os_context_register_addr(context,reg_ALLOC)
129 |= flag_PseudoAtomicInterrupted;
132 void
133 arch_clear_pseudo_atomic_interrupted(os_context_t *context)
135 #ifdef LISP_FEATURE_SB_THREAD
136 struct thread *thread = arch_os_get_current_thread();
138 if (foreign_function_call_active_p(thread)) {
139 clear_pseudo_atomic_interrupted(thread);
140 } else
141 #endif
142 *os_context_register_addr(context,reg_ALLOC)
143 &= ~flag_PseudoAtomicInterrupted;
146 unsigned int
147 arch_install_breakpoint(void *pc)
149 unsigned int *ptr = (unsigned int *)pc;
150 unsigned int result = *ptr;
151 *ptr = TRAP_INSTRUCTION(trap_Breakpoint);
152 os_flush_icache((os_vm_address_t) pc, sizeof(unsigned int));
153 return result;
156 void
157 arch_remove_breakpoint(void *pc, unsigned int orig_inst)
159 *(unsigned int *)pc = orig_inst;
160 os_flush_icache((os_vm_address_t) pc, sizeof(unsigned int));
164 * Perform the instruction that we overwrote with a breakpoint. As we
165 * don't have a single-step facility, this means we have to:
166 * - put the instruction back
167 * - put a second breakpoint at the following instruction,
168 * set after_breakpoint and continue execution.
170 * When the second breakpoint is hit (very shortly thereafter, we hope)
171 * sigtrap_handler gets called again, but follows the AfterBreakpoint
172 * arm, which
173 * - puts a bpt back in the first breakpoint place (running across a
174 * breakpoint shouldn't cause it to be uninstalled)
175 * - replaces the second bpt with the instruction it was meant to be
176 * - carries on
178 * Clear?
180 static unsigned int *skipped_break_addr, displaced_after_inst;
181 static sigset_t orig_sigmask;
183 static boolean
184 should_branch(os_context_t *context, unsigned int orig_inst)
186 /* orig_inst is a conditional branch instruction. We need to
187 * know if the branch will be taken if executed in context. */
188 int ctr = *os_context_ctr_addr(context);
189 int cr = *os_context_cr_addr(context);
190 int bo_field = (orig_inst >> 21) & 0x1f;
191 int bi_field = (orig_inst >> 16) & 0x1f;
192 int ctr_ok;
194 if (!(bo_field & 4)) ctr--; /* Decrement CTR if necessary. */
196 ctr_ok = (bo_field & 4) || ((ctr == 0) == ((bo_field & 2) == 2));
197 return ctr_ok && ((bo_field & 0x10) ||
198 !(((cr >> (31-bi_field)) ^ (bo_field >> 3)) & 1));
201 void
202 arch_do_displaced_inst(os_context_t *context, unsigned int orig_inst)
204 /* not sure how we ensure that we get the breakpoint reinstalled
205 * after doing this -dan */
206 unsigned int *pc = (unsigned int *)(*os_context_pc_addr(context));
207 unsigned int *next_pc;
208 int op = orig_inst >> 26;
209 int sub_op = (orig_inst & 0x7fe) >> 1; /* XL-form sub-opcode */
211 orig_sigmask = *os_context_sigmask_addr(context);
212 sigaddset_blockable(os_context_sigmask_addr(context));
214 *pc = orig_inst;
215 os_flush_icache((os_vm_address_t) pc, sizeof(unsigned int));
216 skipped_break_addr = pc;
218 /* Figure out where we will end up after running the displaced
219 * instruction by defaulting to the next instruction in the stream
220 * and then checking for branch instructions. FIXME: This will
221 * probably screw up if it attempts to step a trap instruction. */
222 next_pc = pc + 1;
224 if (op == 18) {
225 /* Branch I-form */
226 unsigned int displacement = orig_inst & 0x03fffffc;
227 /* Sign extend */
228 if (displacement & 0x02000000) {
229 displacement |= 0xc0000000;
231 if (orig_inst & 2) { /* Absolute Address */
232 next_pc = (unsigned int *)displacement;
233 } else {
234 next_pc = (unsigned int *)(((unsigned int)pc) + displacement);
236 } else if ((op == 16)
237 && should_branch(context, orig_inst)) {
238 /* Branch Conditional B-form */
239 unsigned int displacement = orig_inst & 0x0000fffc;
240 /* Sign extend */
241 if (displacement & 0x00008000) {
242 displacement |= 0xffff0000;
244 if (orig_inst & 2) { /* Absolute Address */
245 next_pc = (unsigned int *)displacement;
246 } else {
247 next_pc = (unsigned int *)(((unsigned int)pc) + displacement);
249 } else if ((op == 19) && (sub_op == 16)
250 && should_branch(context, orig_inst)) {
251 /* Branch Conditional to Link Register XL-form */
252 next_pc = (unsigned int *)
253 ((*os_context_lr_addr(context)) & ~3);
254 } else if ((op == 19) && (sub_op == 528)
255 && should_branch(context, orig_inst)) {
256 /* Branch Conditional to Count Register XL-form */
257 next_pc = (unsigned int *)
258 ((*os_context_ctr_addr(context)) & ~3);
261 /* Set the "after" breakpoint. */
262 displaced_after_inst = *next_pc;
263 *next_pc = TRAP_INSTRUCTION(trap_AfterBreakpoint);
264 os_flush_icache((os_vm_address_t)next_pc, sizeof(unsigned int));
267 #ifdef LISP_FEATURE_GENCGC
269 * Return non-zero if the current instruction is an allocation trap
271 static int
272 allocation_trap_p(os_context_t * context)
274 int result;
275 unsigned int *pc;
276 unsigned inst;
277 unsigned opcode;
278 unsigned src;
279 unsigned dst;
281 result = 0;
284 * First, the instruction has to be a TWLGE temp, NL3, which has the
285 * format.
286 * | 6| 5| 5 | 5 | 10|1| width
287 * |31|5 |dst|src| 4|0| field
289 pc = (unsigned int *) (*os_context_pc_addr(context));
290 inst = *pc;
292 #if 0
293 fprintf(stderr, "allocation_trap_p at %p: inst = 0x%08x\n", pc, inst);
294 #endif
296 opcode = inst >> 26;
297 src = (inst >> 11) & 0x1f;
298 dst = (inst >> 16) & 0x1f;
299 if ((opcode == 31) && (src == reg_NL3) && (5 == ((inst >> 21) & 0x1f))
300 && (4 == ((inst >> 1) & 0x3ff))) {
302 * We got the instruction. Now, look back to make sure it was
303 * proceeded by what we expected. The previous instruction
304 * should be an ADD or ADDI instruction.
306 unsigned int add_inst;
308 add_inst = pc[-1];
309 #if 0
310 fprintf(stderr, " add inst at %p: inst = 0x%08x\n",
311 pc - 1, add_inst);
312 #endif
313 opcode = add_inst >> 26;
314 if ((opcode == 31) && (266 == ((add_inst >> 1) & 0x1ff))) {
315 return 1;
316 } else if ((opcode == 14)) {
317 return 1;
318 } else {
319 fprintf(stderr,
320 "Whoa! Got allocation trap but could not find ADD or ADDI instruction: 0x%08x in the proper place\n",
321 add_inst);
324 return 0;
327 extern struct alloc_region boxed_region;
329 void
330 handle_allocation_trap(os_context_t * context)
332 unsigned int *pc;
333 unsigned int inst;
334 unsigned int target, target_ptr, end_addr;
335 unsigned int opcode;
336 int size;
337 boolean were_in_lisp;
338 char *memory;
340 target = 0;
341 size = 0;
343 #if 0
344 fprintf(stderr, "In handle_allocation_trap\n");
345 #endif
347 /* I don't think it's possible for us NOT to be in lisp when we get
348 * here. Remove this later? */
349 were_in_lisp = !foreign_function_call_active_p(arch_os_get_current_thread());
351 if (were_in_lisp) {
352 fake_foreign_function_call(context);
353 } else {
354 fprintf(stderr, "**** Whoa! allocation trap and we weren't in lisp!\n");
358 * Look at current instruction: TWNE temp, NL3. We're here because
359 * temp > NL3 and temp is the end of the allocation, and NL3 is
360 * current-region-end-addr.
362 * We need to adjust temp and alloc-tn.
365 pc = (unsigned int *) (*os_context_pc_addr(context));
366 inst = pc[0];
367 end_addr = (inst >> 11) & 0x1f;
368 target = (inst >> 16) & 0x1f;
370 target_ptr = *os_context_register_addr(context, target);
372 #if 0
373 fprintf(stderr, "handle_allocation_trap at %p:\n", pc);
374 fprintf(stderr, "boxed_region.free_pointer: %p\n", boxed_region.free_pointer);
375 fprintf(stderr, "boxed_region.end_addr: %p\n", boxed_region.end_addr);
376 fprintf(stderr, "target reg: %d, end_addr reg: %d\n", target, end_addr);
377 fprintf(stderr, "target: %x\n", *os_context_register_addr(context, target));
378 fprintf(stderr, "end_addr: %x\n", *os_context_register_addr(context, end_addr));
379 #endif
381 #if 0
382 fprintf(stderr, "handle_allocation_trap at %p:\n", pc);
383 fprintf(stderr, " trap inst = 0x%08x\n", inst);
384 fprintf(stderr, " target reg = %s\n", lisp_register_names[target]);
385 #endif
388 * Go back and look at the add/addi instruction. The second src arg
389 * is the size of the allocation. Get it and call alloc to allocate
390 * new space.
392 inst = pc[-1];
393 opcode = inst >> 26;
394 #if 0
395 fprintf(stderr, " add inst = 0x%08x, opcode = %d\n", inst, opcode);
396 #endif
397 if (opcode == 14) {
399 * ADDI temp-tn, alloc-tn, size
401 * Extract the size
403 size = (inst & 0xffff);
404 } else if (opcode == 31) {
406 * ADD temp-tn, alloc-tn, size-tn
408 * Extract the size
410 int reg;
412 reg = (inst >> 11) & 0x1f;
413 #if 0
414 fprintf(stderr, " add, reg = %s\n", lisp_register_names[reg]);
415 #endif
416 size = *os_context_register_addr(context, reg);
420 #if 0
421 fprintf(stderr, "Alloc %d to %s\n", size, lisp_register_names[target]);
422 #endif
424 #if INLINE_ALLOC_DEBUG
425 if ((((unsigned long)boxed_region.end_addr + size) / PAGE_SIZE) ==
426 (((unsigned long)boxed_region.end_addr) / PAGE_SIZE)) {
427 fprintf(stderr,"*** possibly bogus trap allocation of %d bytes at %p\n",
428 size, target_ptr);
429 fprintf(stderr, " dynamic_space_free_pointer: %p, boxed_region.end_addr %p\n",
430 dynamic_space_free_pointer, boxed_region.end_addr);
432 #endif
434 #if 0
435 fprintf(stderr, "Ready to alloc\n");
436 fprintf(stderr, "free_pointer = 0x%08x\n",
437 dynamic_space_free_pointer);
438 #endif
441 * alloc-tn was incremented by size. Need to decrement it by size
442 * to restore its original value. This is not true on GENCGC
443 * anymore. d_s_f_p and reg_alloc get out of sync, but the p_a
444 * bits stay intact and we set it to the proper value when it
445 * needs to be. Keep this comment here for the moment in case
446 * somebody tries to figure out what happened here.
448 /* dynamic_space_free_pointer =
449 (lispobj *) ((long) dynamic_space_free_pointer - size);
451 #if 0
452 fprintf(stderr, "free_pointer = 0x%08x new\n",
453 dynamic_space_free_pointer);
454 #endif
457 struct interrupt_data *data =
458 arch_os_get_current_thread()->interrupt_data;
459 data->allocation_trap_context = context;
460 memory = (char *) alloc(size);
461 data->allocation_trap_context = 0;
464 #if 0
465 fprintf(stderr, "alloc returned %p\n", memory);
466 fprintf(stderr, "free_pointer = 0x%08x\n",
467 dynamic_space_free_pointer);
468 #endif
471 * The allocation macro wants the result to point to the end of the
472 * object!
474 memory += size;
476 #if 0
477 fprintf(stderr, "object end at %p\n", memory);
478 #endif
480 *os_context_register_addr(context, target) = (unsigned long) memory;
481 #ifndef LISP_FEATURE_SB_THREAD
482 /* This is handled by the fake_foreign_function_call machinery on
483 * threaded targets. */
484 *os_context_register_addr(context, reg_ALLOC) =
485 (unsigned long) dynamic_space_free_pointer
486 | (*os_context_register_addr(context, reg_ALLOC)
487 & LOWTAG_MASK);
488 #endif
490 if (were_in_lisp) {
491 undo_fake_foreign_function_call(context);
494 /* Skip the allocation trap and the write of the updated free
495 * pointer back to the allocation region. This is two
496 * instructions when threading is enabled and four instructions
497 * otherwise. */
498 #ifdef LISP_FEATURE_SB_THREAD
499 (*os_context_pc_addr(context)) = pc + 2;
500 #else
501 (*os_context_pc_addr(context)) = pc + 4;
502 #endif
505 #endif
507 void
508 arch_handle_breakpoint(os_context_t *context)
510 handle_breakpoint(context);
513 void
514 arch_handle_fun_end_breakpoint(os_context_t *context)
516 *os_context_pc_addr(context)
517 =(int)handle_fun_end_breakpoint(context);
520 void
521 arch_handle_after_breakpoint(os_context_t *context)
523 *skipped_break_addr = TRAP_INSTRUCTION(trap_Breakpoint);
524 os_flush_icache((os_vm_address_t) skipped_break_addr,
525 sizeof(unsigned int));
526 skipped_break_addr = NULL;
527 *(unsigned int *)*os_context_pc_addr(context)
528 = displaced_after_inst;
529 *os_context_sigmask_addr(context)= orig_sigmask;
530 os_flush_icache((os_vm_address_t) *os_context_pc_addr(context),
531 sizeof(unsigned int));
534 void
535 arch_handle_single_step_trap(os_context_t *context, int trap)
537 unsigned int code = *((u32 *)(*os_context_pc_addr(context)));
538 int register_offset = code >> 5 & 0x1f;
539 handle_single_step_trap(context, trap, register_offset);
540 arch_skip_instruction(context);
543 static void
544 sigtrap_handler(int signal, siginfo_t *siginfo, os_context_t *context)
546 unsigned int code;
548 code=*((u32 *)(*os_context_pc_addr(context)));
549 if (code == ((3 << 26) | (0x18 << 21) | (reg_NL3 << 16))||
550 /* trap instruction from do_pending_interrupt */
551 code == 0x7fe00008) {
552 arch_clear_pseudo_atomic_interrupted(context);
553 arch_skip_instruction(context);
554 /* interrupt or GC was requested in PA; now we're done with the
555 PA section we may as well get around to it */
556 interrupt_handle_pending(context);
557 return;
560 #ifdef LISP_FEATURE_GENCGC
561 /* Is this an allocation trap? */
562 if (allocation_trap_p(context)) {
563 handle_allocation_trap(context);
564 return;
566 #endif
568 if ((code >> 16) == ((3 << 10) | (6 << 5))) {
569 /* twllei reg_ZERO,N will always trap if reg_ZERO = 0 */
570 int trap = code & 0x1f;
571 handle_trap(context,trap);
572 return;
574 /* twi :ne ... or twi ... nargs */
575 if (((code >> 26) == 3) && (((code >> 21) & 31) == 24
576 #ifdef LISP_FEATURE_PRECISE_ARG_COUNT_ERROR
577 || ((code >> 16) & 31) == reg_NARGS
578 #endif
579 )) {
580 interrupt_internal_error(context, 0);
581 return;
584 interrupt_handle_now(signal, (siginfo_t *)code, context);
588 void arch_install_interrupt_handlers()
590 undoably_install_low_level_interrupt_handler(SIGILL, sigtrap_handler);
591 undoably_install_low_level_interrupt_handler(SIGTRAP, sigtrap_handler);
594 void
595 ppc_flush_icache(os_vm_address_t address, os_vm_size_t length)
597 os_vm_address_t end = (os_vm_address_t) ((int)(address+length+(32-1)) &~(32-1));
598 extern void ppc_flush_cache_line(os_vm_address_t);
600 while (address < end) {
601 ppc_flush_cache_line(address);
602 address += 32;
606 #ifdef LISP_FEATURE_LINKAGE_TABLE
608 /* Linkage tables for PowerPC
610 * Linkage entry size is 16, because we need at least 4 instructions to
611 * implement a jump.
615 * Define the registers to use in the linkage jump table. Can be the
616 * same. Some care must be exercised when choosing these. It has to be
617 * a register that is not otherwise being used. reg_NFP is a good
618 * choice. call_into_c trashes reg_NFP without preserving it, so we can
619 * trash it in the linkage jump table.
621 #define LINKAGE_TEMP_REG reg_NFP
622 #define LINKAGE_ADDR_REG reg_NFP
625 * Insert the necessary jump instructions at the given address.
627 void
628 arch_write_linkage_table_jmp(char *reloc_addr, void *target_addr)
631 * Make JMP to function entry.
633 * The instruction sequence is:
635 * addis 13, 0, (hi part of addr)
636 * ori 13, 13, (low part of addr)
637 * mtctr 13
638 * bctr
641 int* inst_ptr;
642 unsigned long hi; /* Top 16 bits of address */
643 unsigned long lo; /* Low 16 bits of address */
644 unsigned int inst;
646 inst_ptr = (int*) reloc_addr;
649 * Split the target address into hi and lo parts for the sethi
650 * instruction. hi is the top 22 bits. lo is the low 10 bits.
652 hi = (unsigned long) target_addr;
653 lo = hi & 0xffff;
654 hi >>= 16;
657 * addis 13, 0, (hi part)
660 inst = (15 << 26) | (LINKAGE_TEMP_REG << 21) | (0 << 16) | hi;
661 *inst_ptr++ = inst;
664 * ori 13, 13, (lo part)
667 inst = (24 << 26) | (LINKAGE_TEMP_REG << 21) | (LINKAGE_TEMP_REG << 16) | lo;
668 *inst_ptr++ = inst;
671 * mtctr 13
674 inst = (31 << 26) | (LINKAGE_TEMP_REG << 21) | (9 << 16) | (467 << 1);
675 *inst_ptr++ = inst;
678 * bctr
681 inst = (19 << 26) | (20 << 21) | (528 << 1);
682 *inst_ptr++ = inst;
684 os_flush_icache((os_vm_address_t) reloc_addr, (char*) inst_ptr - reloc_addr);
687 void
688 arch_write_linkage_table_ref(void * reloc_addr, void *target_addr)
690 *(unsigned long *)reloc_addr = (unsigned long)target_addr;
693 #endif