src/runtime/x86-assem.S

   1 /*
   2  * very-low-level utilities for runtime support
   3  */
   4
   5 /*
   6  * This software is part of the SBCL system. See the README file for
   7  * more information.
   8  *
   9  * This software is derived from the CMU CL system, which was
  10  * written at Carnegie Mellon University and released into the
  11  * public domain. The software is in the public domain and is
  12  * provided with absolutely no warranty. See the COPYING and CREDITS
  13  * files for more information.
  14  */
  15 \f
  16 #define LANGUAGE_ASSEMBLY
  17 #include "sbcl.h"
  18 #include "validate.h"
  19 #include "genesis/closure.h"
  20 #include "genesis/funcallable-instance.h"
  21 #include "genesis/fdefn.h"
  22 #include "genesis/static-symbols.h"
  23 #include "genesis/symbol.h"
  24 #include "genesis/thread.h"
  25
  26 /* Minimize conditionalization for different OS naming schemes.
  27  *
  28  * (As of sbcl-0.8.10, this seems no longer to be much of an issue,
  29  * since everyone has converged on ELF. If this generality really
  30  * turns out not to matter, perhaps it's just clutter we could get
  31  * rid of? -- WHN 2004-04-18)
  32  *
  33  * (Except Win32, which is unlikely ever to be ELF, sorry. -- AB 2005-12-08)
  34  */
  35 #if defined __linux__  || defined LISP_FEATURE_FREEBSD || defined __NetBSD__ || defined __OpenBSD__ || \
  36         defined __sun || defined __DragonFly__
  37 #define GNAME(var) var
  38 #else
  39 #define GNAME(var) _##var
  40 #endif
  41
  42 /* Get the right type of alignment. Linux, FreeBSD and NetBSD (but not OpenBSD)
  43  * want alignment in bytes.
  44  *
  45  * (As in the GNAME() definitions above, as of sbcl-0.8.10, this seems
  46  * no longer to be much of an issue, since everyone has converged on
  47  * the same value. If this generality really turns out not to
  48  * matter any more, perhaps it's just clutter we could get
  49  * rid of? -- WHN 2004-04-18)
  50  */
  51 #if defined(__linux__) || defined(LISP_FEATURE_FREEBSD) || defined(__NetBSD__) || defined(__OpenBSD__) || \
  52         defined(__sun) || defined(LISP_FEATURE_WIN32) || defined(__DragonFly__)
  53 #define align_4byte     4
  54 #define align_8byte     8
  55 #define align_16byte    16
  56 #define align_page      4096
  57 #else
  58 #define align_4byte     2
  59 #define align_8byte     3
  60 #define align_16byte    4
  61 #define align_page      12
  62 #endif
  63
  64 /*
  65  * The assembler used for win32 doesn't like .type or .size directives,
  66  * so we want to conditionally kill them out. So let's wrap them in macros
  67  * that are defined to be no-ops on win32. Hopefully this still works on
  68  * other platforms.
  69  */
  70 #if !defined(LISP_FEATURE_WIN32) && !defined(LISP_FEATURE_DARWIN)
  71 #define TYPE(name) .type name,@function
  72 #define SIZE(name) .size name,.-name
  73 #else
  74 #define TYPE(name)
  75 #define SIZE(name)
  76 #endif
  77
  78 /* Helper macros for access to thread-locals slots for both OS types:
  79  * ------------------------------------------------------------------------
  80  *
  81  *                          Windows TEB block
  82  * ==================        __________
  83  * | Win32 %FS base | ---->  |        | 0
  84  * ==================        |        | 1
  85  *                           z        z
  86  *     TLS slots start here> |XXXXXXXX| e10 = TEB_STATIC_TLS_SLOTS_OFFSET
  87  *                           |XXXXXXXX| e11
  88  *                           z   ...  z
  89  *                           |XXXXXXXX| e4e
  90  *     TLS ends here>     ,- |XXXXXXXX| e4f = TEB_STATIC_TLS_SLOTS_OFFSET+63
  91  *                       /   z        z
  92  *                       |   ----------                    "os_address" ----.
  93  *                       |                                                   |
  94  *                       |   big blob of SBCL-specific thread-local data     |
  95  *                       |     |----------------------------------------| <--'
  96  *                       |     |   CONTROL, BINDING, ALIEN STACK        |
  97  *                       |     z                                        z
  98  * ==================    |     |----------------------------------------|
  99  * | Linux %FS base | -->|     |   FFI stack pointer                    |
 100  * ==================    |     |    (extra page for mprotect)           |
 101  *                        \    |----------------------------------------|
 102  *   (union p_t_d) ----->  \-> | struct thread {   | dynamic_values[0]  |
 103  *   .                         |   ...             |               [1]  |
 104  *   .                         z   ...             z               ...  z
 105  *   [tls data begins]         | }                 |               ...  | <-
 106  *   [declared end of p_t_d]   |----------------------------------------| . |
 107  *   .                         |                                   ...  | . |
 108  *   .                         |                           [TLS_SIZE-1] | <-|
 109  *   [tls data actually ends]  |----------------------------------------|   |
 110  *   .                         | ALTSTACK                               |   |
 111  *   .                         |----------------------------------------|   |
 112  *   .                         | struct nonpointer_thread_data { }      |   |
 113  *   .                         ------------------------------------------   |
 114  *   [blob actually ends]                                                   |
 115  *                                                                         /
 116  *                                                                        /
 117  *                                                                       /
 118  *          ______________________                                      /
 119  *          | struct symbol {    |                                     /
 120  *          z   ...              z                                    /
 121  *          |   fixnum tls_index;  // fixnum value relative to union /
 122  *          | }                  |           (< TLS_SIZE = 4096)
 123  *          ---------------------|
 124  */
 125 #ifdef LISP_FEATURE_WIN32
 126 # define TEB_STATIC_TLS_SLOTS_OFFSET 0xE10
 127 # define TEB_SBCL_THREAD_BASE_OFFSET (TEB_STATIC_TLS_SLOTS_OFFSET+(63*4))
 128 # define SBCL_THREAD_BASE_EA %fs:TEB_SBCL_THREAD_BASE_OFFSET
 129 # define MAYBE_FS(addr) addr
 130 # define LoadTlSymbolValueAddress(symbol,reg) ;         \
 131         movl    SBCL_THREAD_BASE_EA, reg ;              \
 132         addl    (symbol+SYMBOL_TLS_INDEX_OFFSET), reg ;
 133 # define LoadCurrentThreadSlot(offset,reg);     \
 134         movl    SBCL_THREAD_BASE_EA, reg ;      \
 135         movl    offset(reg), reg ;
 136 #elif defined(LISP_FEATURE_LINUX) || defined(LISP_FEATURE_SUNOS) || defined(LISP_FEATURE_FREEBSD) || \
 137         defined(LISP_FEATURE_DRAGONFLY)
 138   /* see comment in arch_os_thread_init */
 139 # define SBCL_THREAD_BASE_EA %fs:THREAD_SELFPTR_OFFSET
 140 # define MAYBE_FS(addr) addr
 141 #else
 142   /* perhaps there's an OS out there that actually supports %fs without
 143    * jumping through hoops, so just in case, here a default definition: */
 144 # define SBCL_THREAD_BASE_EA $0
 145 # define MAYBE_FS(addr) %fs:addr
 146 #endif
 147
 148 /* gas can't parse 4096LU; redefine */
 149 #if BACKEND_PAGE_BYTES == 4096
 150 # undef BACKEND_PAGE_BYTES
 151 # define BACKEND_PAGE_BYTES 4096
 152 #elif BACKEND_PAGE_BYTES == 32768
 153 # undef BACKEND_PAGE_BYTES
 154 # define BACKEND_PAGE_BYTES 32768
 155 #else
 156 # error BACKEND_PAGE_BYTES mismatch
 157 #endif
 158
 159 /* OAOOM because we don't have the C headers here */
 160 #define THREAD_CSP_PAGE_SIZE BACKEND_PAGE_BYTES
 161
 162 /* the CSP page sits right before the thread */
 163 #define THREAD_SAVED_CSP_OFFSET (-THREAD_CSP_PAGE_SIZE)
 164
 165 /*
 166  * x86/darwin (as of MacOS X 10.4.5) doesn't reliably file signal
 167  * handlers (SIGTRAP or Mach exception handlers) for 0xCC, wo we have
 168  * to use ud2 instead. ud2 is an undefined opcode, #x0b0f, or
 169  * 0F 0B in low-endian notation, that causes SIGILL to fire. We check
 170  * for this instruction in the SIGILL handler and if we see it, we
 171  * advance the EIP by two bytes to skip over ud2 instruction and
 172  * call sigtrap_handler. */
 173 #if defined(LISP_FEATURE_UD2_BREAKPOINTS)
 174 #define TRAP ud2
 175 #else
 176 #define TRAP int3
 177 #endif
 178
 179         .text
 180         .globl  GNAME(all_threads)
 181 \f
 182 /*
 183  * A call to call_into_c preserves esi, edi, and ebp.
 184  * (The C function will preserve ebx, esi, edi, and ebp across its
 185  * function call, but we trash ebx ourselves by using it to save the
 186  * return Lisp address.)
 187  *
 188  * Return values are in eax and maybe edx for quads, or st(0) for
 189  * floats.
 190  *
 191  * This should work for Lisp calls C calls Lisp calls C..
 192  *
 193  * FIXME & OAOOM: This duplicates call-out in src/compiler/x86/c-call.lisp,
 194  * so if you tweak this, change that too!
 195  */
 196 /*
 197  * Note on sections specific to LISP_FEATURE_SB_SAFEPOINT:
 198  *
 199  * The code below is essential to safepoint-based garbage collection,
 200  * and several details need to be considered for correct implementation.
 201  *
 202  * The stack spilling approach:
 203  *   On SB-SAFEPOINT platforms, the CALL-OUT vop is defined to spill all
 204  *   live Lisp TNs to the stack to provide information for conservative
 205  *   GC cooperatively (avoiding the need to retrieve register values
 206  *   from POSIX signal contexts or Windows GetThreadContext()).
 207  *
 208  * Finding the SP at all:
 209  *   The main remaining value needed by GC is the stack pointer (SP) at
 210  *   the moment of entering the foreign function.  For this purpose, a
 211  *   thread-local field for the SP is used.  Two stores to that field
 212  *   are done for each C call, one to save the SP before calling out and
 213  *   and one to undo that store afterwards.
 214  *
 215  * Stores as synchronization points:
 216  *   These two stores delimit the C call: While the SP is set, our
 217  *   thread is known not to run Lisp code: During GC, memory protection
 218  *   ensures that no thread proceeds across stores.
 219  *
 220  * The return PC issue:
 221  *   (Note that CALL-OUT has, in principle, two versions: Inline
 222  *   assembly in the VOP -or- alternatively the out-of-line version you
 223  *   are currently reading.  In reality, safepoint builds currently
 224  *   lack the inline code entirely.)
 225  *
 226  *   Both versions need to take special care with the return PC:
 227  *   - In the inline version of the code (if it existed), the two stores
 228  *     would be done directly in the CALL-OUT vop.  In that theoretical
 229  *     implementation, there is a time interval between return of the
 230  *     actual C call and a second SP store during which the return
 231  *     address might not be on the stack anymore.
 232  *   - In this out-of-line version, the stores are done during
 233  *     call_into_c's frame, but an equivalent problem arises: In order
 234  *     to present the stack of arguments as our foreign function expects
 235  *     them, call_into_c has to pop the Lisp return address into a
 236  *     register first; this register has to be preserved by GENCGC
 237  *     separately: our return address is not in the stack anymore.
 238  *   In both case, stack scanning alone is not sufficient to pin
 239  *   the return address, and we communicate it to GC explicitly
 240  *   in addition to the SP.
 241  *
 242  * Note on look-alike accessor macros with vastly different behaviour:
 243  *   THREAD_PC_AROUND_FOREIGN_CALL_OFFSET is an "ordinary" field of the
 244  *   struct thread, whereas THREAD_SAVED_CSP_OFFSET is a synchronization
 245  *   point on a potentially write-protected page.
 246 */
 247
 248         .text
 249         .align  align_16byte,0x90
 250         .globl GNAME(call_into_c)
 251         TYPE(GNAME(call_into_c))
 252 GNAME(call_into_c):
 253 /* Save the return Lisp address in ebx. */
 254         popl    %ebx
 255
 256 /* Setup the NPX for C */
 257         /* The VOP says regarding CLD: "Clear out DF: Darwin, Windows,
 258          * and Solaris at least require this, and it should not hurt
 259          * others either." call_into_c didn't have it, but better safe than
 260          * sorry. */
 261         cld
 262         fstp    %st(0)
 263         fstp    %st(0)
 264         fstp    %st(0)
 265         fstp    %st(0)
 266         fstp    %st(0)
 267         fstp    %st(0)
 268         fstp    %st(0)
 269         fstp    %st(0)
 270
 271 #ifdef LISP_FEATURE_SB_SAFEPOINT
 272         /* enter safe region: store SP and return PC */
 273         movl    SBCL_THREAD_BASE_EA,%edi
 274         movl    %esp,MAYBE_FS(THREAD_SAVED_CSP_OFFSET(%edi))
 275         movl    %ebx,MAYBE_FS(THREAD_PC_AROUND_FOREIGN_CALL_OFFSET(%edi))
 276 #endif
 277
 278         /* foreign call, preserving ESI, EDI, and EBX */
 279         call    *%eax             # normal callout using Lisp stack
 280         /* return values now in eax/edx OR st(0) */
 281
 282 #ifdef LISP_FEATURE_SB_SAFEPOINT
 283         /* leave region: clear the SP!  (Also unpin the return PC.) */
 284         xorl    %ecx,%ecx
 285         movl    %ecx,MAYBE_FS(THREAD_SAVED_CSP_OFFSET(%edi))
 286         movl    %ecx,MAYBE_FS(THREAD_PC_AROUND_FOREIGN_CALL_OFFSET(%edi))
 287 #endif
 288
 289         movl    %eax,%ecx         # remember integer return value
 290
 291 /* Check for a return FP value. */
 292         fxam
 293         fnstsw  %ax
 294         andl    $0x4500,%eax
 295         cmpl    $0x4100,%eax
 296         jne     Lfp_rtn_value
 297
 298 /* The return value is in eax, or eax,edx? */
 299 /* Set up the NPX stack for Lisp. */
 300         fldz                    # Ensure no regs are empty.
 301         fldz
 302         fldz
 303         fldz
 304         fldz
 305         fldz
 306         fldz
 307         fldz
 308
 309 /* Restore the return value. */
 310         movl    %ecx,%eax       # maybe return value
 311
 312 /* Return. */
 313         jmp     *%ebx
 314
 315 Lfp_rtn_value:
 316 /* The return result is in st(0). */
 317 /* Set up the NPX stack for Lisp, placing the result in st(0). */
 318         fldz                    # Ensure no regs are empty.
 319         fldz
 320         fldz
 321         fldz
 322         fldz
 323         fldz
 324         fldz
 325         fxch    %st(7)          # Move the result back to st(0).
 326
 327 /* We don't need to restore eax, because the result is in st(0). */
 328
 329 /* Return. FIXME: It would be nice to restructure this to use RET. */
 330         jmp     *%ebx
 331
 332         SIZE(GNAME(call_into_c))
 333
 334 \f
 335         .text
 336         .globl GNAME(call_into_lisp_first_time)
 337         TYPE(GNAME(call_into_lisp_first_time))
 338
 339 /* We don't worry too much about saving registers
 340  * here, because we never expect to return from the initial call to lisp
 341  * anyway */
 342
 343         .align  align_16byte,0x90
 344 GNAME(call_into_lisp_first_time):
 345         pushl   %ebp            # Save old frame pointer.
 346         movl    %esp,%ebp       # Establish new frame.
 347 #ifndef LISP_FEATURE_WIN32
 348         movl    GNAME(all_threads),%eax
 349         /* pthread machinery takes care of this for other threads */
 350         movl    THREAD_CONTROL_STACK_END_OFFSET(%eax) ,%esp
 351 #else
 352 /* Win32 -really- doesn't like you switching stacks out from under it. */
 353         movl    GNAME(all_threads),%eax
 354 #endif
 355         jmp     Lstack
 356 \f
 357         .text
 358         .globl GNAME(call_into_lisp)
 359         TYPE(GNAME(call_into_lisp))
 360
 361 /* The C conventions require that ebx, esi, edi, and ebp be preserved
 362  * across function calls. */
 363
 364         .align  align_16byte,0x90
 365 GNAME(call_into_lisp):
 366         pushl   %ebp            # Save old frame pointer.
 367         movl    %esp,%ebp       # Establish new frame.
 368
 369 Lstack:
 370 /* Save the NPX state */
 371         fwait                   # Catch any pending NPX exceptions.
 372         subl    $108,%esp       # Make room for the NPX state.
 373         fnsave  (%esp)          # save and reset NPX
 374
 375         movl    (%esp),%eax     # Load NPX control word.
 376         andl    $0xfffff2ff,%eax        # Set rounding mode to nearest.
 377         orl     $0x00000200,%eax        # Set precision to 64 bits.  (53-bit mantissa)
 378         pushl   %eax
 379         fldcw   (%esp)          # Recover modes.
 380         popl    %eax
 381
 382         fldz                    # Ensure no FP regs are empty.
 383         fldz
 384         fldz
 385         fldz
 386         fldz
 387         fldz
 388         fldz
 389         fldz
 390
 391 /* Save C regs: ebx esi edi. */
 392         pushl   %ebx
 393         pushl   %esi
 394         pushl   %edi
 395
 396 /* Clear descriptor regs. */
 397         xorl    %eax,%eax       # lexenv
 398         xorl    %ebx,%ebx       # available
 399         xorl    %ecx,%ecx       # arg count
 400         xorl    %edx,%edx       # first arg
 401         xorl    %edi,%edi       # second arg
 402         xorl    %esi,%esi       # third arg
 403
 404 /* no longer in function call */
 405         movl    %esp,%ebx       # remember current stack
 406         pushl   %ebx            # Save entry stack on (maybe) new stack.
 407
 408         /* Establish Lisp args. */
 409         movl     8(%ebp),%eax   # lexenv?
 410         movl    12(%ebp),%ebx   # address of arg vec
 411         movl    16(%ebp),%ecx   # num args
 412         shll    $2,%ecx         # Make num args into fixnum.
 413         cmpl    $0,%ecx
 414         je      Ldone
 415         movl    (%ebx),%edx     # arg0
 416         cmpl    $4,%ecx
 417         je      Ldone
 418         movl    4(%ebx),%edi    # arg1
 419         cmpl    $8,%ecx
 420         je      Ldone
 421         movl    8(%ebx),%esi    # arg2
 422 Ldone:
 423         /* Registers eax, ecx, edx, edi, and esi are now live. */
 424
 425 #ifdef LISP_FEATURE_WIN32
 426         /* Establish an SEH frame. */
 427 #ifdef LISP_FEATURE_SB_THREAD
 428         /* Save binding stack pointer */
 429         subl $4, %esp
 430         pushl %eax
 431         movl SBCL_THREAD_BASE_EA, %eax
 432         movl THREAD_BINDING_STACK_POINTER_OFFSET(%eax), %eax
 433         movl %eax, 4(%esp)
 434         popl %eax
 435 #else
 436         pushl   BINDING_STACK_POINTER + SYMBOL_VALUE_OFFSET
 437 #endif
 438         pushl   $GNAME(exception_handler_wrapper)
 439         pushl   %fs:0
 440         movl    %esp, %fs:0
 441 #endif
 442
 443         /* Alloc new frame. */
 444         push    %ebp            # Dummy for return address
 445         push    %ebp            # fp in save location S1
 446         mov     %esp,%ebp       # The current sp marks start of new frame.
 447         sub     $4,%esp         # Ensure 3 slots are allocated, two above.
 448
 449         call    *CLOSURE_FUN_OFFSET(%eax)
 450
 451         /* If the function returned multiple values, it will return to
 452            this point.  Lose them */
 453         jnc     LsingleValue
 454         mov     %ebx, %esp
 455 LsingleValue:
 456         /* A singled value function returns here */
 457
 458 #ifdef LISP_FEATURE_WIN32
 459         /* Remove our SEH frame. */
 460         mov     %fs:0,%esp
 461         popl    %fs:0
 462         add     $8, %esp
 463 #endif
 464
 465 /* Restore the stack, in case there was a stack change. */
 466         popl    %esp            # c-sp
 467
 468 /* Restore C regs: ebx esi edi. */
 469         popl    %edi
 470         popl    %esi
 471         popl    %ebx
 472
 473 /* Restore the NPX state. */
 474         frstor  (%esp)
 475         addl    $108, %esp
 476
 477         popl    %ebp            # c-sp
 478         movl    %edx,%eax       # c-val
 479         ret
 480         SIZE(GNAME(call_into_lisp))
 481 \f
 482 /* support for saving and restoring the NPX state from C */
 483         .text
 484         .globl  GNAME(fpu_save)
 485         TYPE(GNAME(fpu_save))
 486         .align  2,0x90
 487 GNAME(fpu_save):
 488         movl    4(%esp),%eax
 489         fnsave  (%eax)          # Save the NPX state. (resets NPX)
 490         ret
 491         SIZE(GNAME(fpu_save))
 492
 493         .globl  GNAME(fpu_restore)
 494         TYPE(GNAME(fpu_restore))
 495         .align  2,0x90
 496 GNAME(fpu_restore):
 497         movl    4(%esp),%eax
 498         frstor  (%eax)          # Restore the NPX state.
 499         ret
 500         SIZE(GNAME(fpu_restore))
 501 \f
 502 /*
 503  * the undefined-function trampoline
 504  */
 505         .text
 506         .align  align_16byte,0x90
 507         .globl GNAME(undefined_tramp)
 508         TYPE(GNAME(undefined_tramp))
 509         .byte   0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG
 510 GNAME(undefined_tramp):
 511         pop     4(%ebp)         # Save return PC for backtrace.
 512         TRAP
 513         .byte   trap_Error
 514         .byte   2
 515         .byte   UNDEFINED_FUN_ERROR
 516         .byte   sc_DescriptorReg # eax in the Descriptor-reg SC
 517         ret
 518         SIZE(GNAME(undefined_tramp))
 519
 520 /* KLUDGE: FIND-ESCAPED-FRAME (SYS:SRC;CODE;DEBUG-INT.LISP) needs
 521  * to know the name of the function immediately following the
 522  * undefined-function trampoline. */
 523
 524 /*
 525  * the closure trampoline
 526  */
 527         .text
 528         .align  align_16byte,0x90
 529         .globl GNAME(closure_tramp)
 530         TYPE(GNAME(closure_tramp))
 531         .byte   0, 0, 0, SIMPLE_FUN_HEADER_WIDETAG
 532 GNAME(closure_tramp):
 533         movl    FDEFN_FUN_OFFSET(%eax),%eax
 534         /* FIXME: The '*' after "jmp" in the next line is from PVE's
 535          * patch posted to the CMU CL mailing list Oct 6, 1999. It looks
 536          * reasonable, and it certainly seems as though if CMU CL needs it,
 537          * SBCL needs it too, but I haven't actually verified that it's
 538          * right. It would be good to find a way to force the flow of
 539          * control through here to test it. */
 540         jmp     *CLOSURE_FUN_OFFSET(%eax)
 541         SIZE(GNAME(closure_tramp))
 542
 543         .text
 544         .align  align_16byte,0x90
 545         .globl GNAME(funcallable_instance_tramp)
 546         TYPE(GNAME(funcallable_instance_tramp))
 547 GNAME(funcallable_instance_tramp):
 548         movl    FUNCALLABLE_INSTANCE_FUNCTION_OFFSET(%eax),%eax
 549         /* KLUDGE: on this platform, whatever kind of function is in %rax
 550          * now, the first word of it contains the address to jump to. */
 551         jmp     *CLOSURE_FUN_OFFSET(%eax)
 552         SIZE(GNAME(funcallable_instance_tramp))
 553
 554 /*
 555  * fun-end breakpoint magic
 556  */
 557
 558 /*
 559  * For an explanation of the magic involved in function-end
 560  * breakpoints, see the implementation in ppc-assem.S.
 561  */
 562
 563         .text
 564         .globl  GNAME(fun_end_breakpoint_guts)
 565         .align  align_16byte
 566 GNAME(fun_end_breakpoint_guts):
 567         /* Multiple Value return */
 568         jc      multiple_value_return
 569         /* Single value return: The eventual return will now use the
 570            multiple values return convention but with a return values
 571            count of one. */
 572         movl    %esp,%ebx       # Setup ebx - the ofp.
 573         subl    $4,%esp         # Allocate one stack slot for the return value
 574         movl    $4,%ecx         # Setup ecx for one return value.
 575         movl    $(NIL),%edi     # default second value
 576         movl    $(NIL),%esi     # default third value
 577
 578 multiple_value_return:
 579
 580         .globl GNAME(fun_end_breakpoint_trap)
 581 GNAME(fun_end_breakpoint_trap):
 582         TRAP
 583         .byte   trap_FunEndBreakpoint
 584         hlt                     # We should never return here.
 585
 586         .globl GNAME(fun_end_breakpoint_end)
 587 GNAME(fun_end_breakpoint_end):
 588
 589 \f
 590         .globl  GNAME(do_pending_interrupt)
 591         TYPE(GNAME(do_pending_interrupt))
 592         .align  align_16byte,0x90
 593 GNAME(do_pending_interrupt):
 594         TRAP
 595         .byte   trap_PendingInterrupt
 596         ret
 597         SIZE(GNAME(do_pending_interrupt))
 598 \f
 599 /* Allocate bytes and return the start of the allocated space
 600  * in the specified destination register.
 601  *
 602  * In the general case the size will be in the destination register.
 603  *
 604  * All registers must be preserved except the destination.
 605  * The C conventions will preserve ebx, esi, edi, and ebp.
 606  * So only eax, ecx, and edx need special care here.
 607  *
 608  * ALLOC factors out the logic of calling alloc(): stack alignment, etc.
 609  *
 610  * DEFINE_ALLOC_TO_FOO defines an alloction routine.
 611  */
 612
 613 #ifdef LISP_FEATURE_DARWIN
 614 #define ALLOC(size)                                             \
 615         pushl   %ebp;              /* Save EBP               */ \
 616         movl    %esp,%ebp;         /* Save ESP to EBP        */ \
 617         pushl   $0;                /* Reserve space for arg  */ \
 618         andl    $0xfffffff0,%esp;  /* Align stack to 16bytes */ \
 619         movl    size, (%esp);      /* Argument to alloc      */ \
 620         call    GNAME(alloc);                                   \
 621         movl    %ebp,%esp;         /* Restore ESP from EBP   */ \
 622         popl    %ebp;              /* Restore EBP            */
 623 #else
 624 #define ALLOC(size)                                             \
 625         pushl   size;              /* Argument to alloc      */ \
 626         call    GNAME(alloc);                                   \
 627         addl    $4,%esp;           /* Pop argument           */
 628 #endif
 629
 630 #define DEFINE_ALLOC_TO_EAX(name,size)                          \
 631         .globl  GNAME(name);                                    \
 632         TYPE(GNAME(name));                                      \
 633         .align  align_16byte,0x90;                              \
 634 GNAME(name):                                                    \
 635         pushl   %ecx;              /* Save ECX and EDX       */ \
 636         pushl   %edx;                                           \
 637         ALLOC(size)                                             \
 638         popl    %edx;              /* Restore ECX and EDX    */ \
 639         popl    %ecx;                                           \
 640         ret;                                                    \
 641         SIZE(GNAME(name))
 642
 643 #define DEFINE_ALLOC_TO_ECX(name,size)                          \
 644         .globl  GNAME(name);                                    \
 645         TYPE(GNAME(name));                                      \
 646         .align  align_16byte,0x90;                              \
 647 GNAME(name):                                                    \
 648         pushl   %eax;              /* Save EAX and EDX       */ \
 649         pushl   %edx;                                           \
 650         ALLOC(size)                                             \
 651         movl    %eax,%ecx;         /* Result to destination  */ \
 652         popl    %edx;                                           \
 653         popl    %eax;                                           \
 654         ret;                                                    \
 655         SIZE(GNAME(name))
 656
 657 #define DEFINE_ALLOC_TO_EDX(name,size)                          \
 658         .globl  GNAME(name);                                    \
 659         TYPE(GNAME(name));                                      \
 660         .align  align_16byte,0x90;                              \
 661 GNAME(name):                                                    \
 662         pushl   %eax;               /* Save EAX and ECX      */ \
 663         pushl   %ecx;                                           \
 664         ALLOC(size)                                             \
 665         movl    %eax,%edx;          /* Restore EAX and ECX   */ \
 666         popl    %ecx;                                           \
 667         popl    %eax;                                           \
 668         ret;                                                    \
 669         SIZE(GNAME(name))
 670
 671 #define DEFINE_ALLOC_TO_REG(name,reg,size)                      \
 672         .globl  GNAME(name);                                    \
 673         TYPE(GNAME(name));                                      \
 674         .align  align_16byte,0x90;                              \
 675 GNAME(name):                                                    \
 676         pushl   %eax;              /* Save EAX, ECX, and EDX */ \
 677         pushl   %ecx;                                           \
 678         pushl   %edx;                                           \
 679         ALLOC(size)                                             \
 680         movl    %eax,reg;          /* Restore them           */ \
 681         popl    %edx;                                           \
 682         popl    %ecx;                                           \
 683         popl    %eax;                                           \
 684         ret;                                                    \
 685         SIZE(GNAME(name))
 686
 687 DEFINE_ALLOC_TO_EAX(alloc_to_eax,%eax)
 688 DEFINE_ALLOC_TO_EAX(alloc_8_to_eax,$8)
 689 DEFINE_ALLOC_TO_EAX(alloc_16_to_eax,$16)
 690
 691 DEFINE_ALLOC_TO_ECX(alloc_to_ecx,%ecx)
 692 DEFINE_ALLOC_TO_ECX(alloc_8_to_ecx,$8)
 693 DEFINE_ALLOC_TO_ECX(alloc_16_to_ecx,$16)
 694
 695 DEFINE_ALLOC_TO_EDX(alloc_to_edx,%edx)
 696 DEFINE_ALLOC_TO_EDX(alloc_8_to_edx,$8)
 697 DEFINE_ALLOC_TO_EDX(alloc_16_to_edx,$16)
 698
 699 DEFINE_ALLOC_TO_REG(alloc_to_ebx,%ebx,%ebx)
 700 DEFINE_ALLOC_TO_REG(alloc_8_to_ebx,%ebx,$8)
 701 DEFINE_ALLOC_TO_REG(alloc_16_to_ebx,%ebx,$16)
 702
 703 DEFINE_ALLOC_TO_REG(alloc_to_esi,%esi,%esi)
 704 DEFINE_ALLOC_TO_REG(alloc_8_to_esi,%esi,$8)
 705 DEFINE_ALLOC_TO_REG(alloc_16_to_esi,%esi,$16)
 706
 707 DEFINE_ALLOC_TO_REG(alloc_to_edi,%edi,%edi)
 708 DEFINE_ALLOC_TO_REG(alloc_8_to_edi,%edi,$8)
 709 DEFINE_ALLOC_TO_REG(alloc_16_to_edi,%edi,$16)
 710
 711 /* Called from lisp when an inline allocation overflows.
 712  * Every register except the result needs to be preserved.
 713  * We depend on C to preserve ebx, esi, edi, and ebp.
 714  * But where necessary must save eax, ecx, edx. */
 715
 716 #ifdef LISP_FEATURE_SB_THREAD
 717 #define START_REGION %fs:THREAD_ALLOC_REGION_OFFSET
 718 #else
 719 #define START_REGION GNAME(boxed_region)
 720 #endif
 721
 722 #if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_WIN32)
 723 #define ALLOC_OVERFLOW(size,scratch)                            \
 724         movl SBCL_THREAD_BASE_EA, scratch;                      \
 725         /* Calculate the size for the allocation. */            \
 726         subl THREAD_ALLOC_REGION_OFFSET(scratch),size;          \
 727         ALLOC(size)
 728 #else
 729 #define ALLOC_OVERFLOW(size,scratch)                    \
 730           /* Calculate the size for the allocation. */  \
 731           subl    START_REGION,size;                    \
 732           ALLOC(size)
 733 #endif
 734
 735 /* This routine handles an overflow with eax=crfp+size. So the
 736    size=eax-crfp. */
 737         .align  align_16byte
 738         .globl  GNAME(alloc_overflow_eax)
 739         TYPE(GNAME(alloc_overflow_eax))
 740 GNAME(alloc_overflow_eax):
 741         pushl   %ecx            # Save ecx
 742         pushl   %edx            # Save edx
 743         ALLOC_OVERFLOW(%eax,%edx)
 744         popl    %edx    # Restore edx.
 745         popl    %ecx    # Restore ecx.
 746         ret
 747         SIZE(GNAME(alloc_overflow_eax))
 748
 749         .align  align_16byte
 750         .globl  GNAME(alloc_overflow_ecx)
 751         TYPE(GNAME(alloc_overflow_ecx))
 752 GNAME(alloc_overflow_ecx):
 753         pushl   %eax            # Save eax
 754         pushl   %edx            # Save edx
 755         ALLOC_OVERFLOW(%ecx,%edx)
 756         movl    %eax,%ecx       # setup the destination.
 757         popl    %edx    # Restore edx.
 758         popl    %eax    # Restore eax.
 759         ret
 760         SIZE(GNAME(alloc_overflow_ecx))
 761
 762         .align  align_16byte
 763         .globl  GNAME(alloc_overflow_edx)
 764         TYPE(GNAME(alloc_overflow_edx))
 765 GNAME(alloc_overflow_edx):
 766         pushl   %eax            # Save eax
 767         pushl   %ecx            # Save ecx
 768         ALLOC_OVERFLOW(%edx,%ecx)
 769         movl    %eax,%edx       # setup the destination.
 770         popl    %ecx    # Restore ecx.
 771         popl    %eax    # Restore eax.
 772         ret
 773         SIZE(GNAME(alloc_overflow_edx))
 774
 775 /* This routine handles an overflow with ebx=crfp+size. So the
 776    size=ebx-crfp. */
 777         .align  align_16byte
 778         .globl  GNAME(alloc_overflow_ebx)
 779         TYPE(GNAME(alloc_overflow_ebx))
 780 GNAME(alloc_overflow_ebx):
 781         pushl   %eax            # Save eax
 782         pushl   %ecx            # Save ecx
 783         pushl   %edx            # Save edx
 784         ALLOC_OVERFLOW(%ebx,%edx)
 785         movl    %eax,%ebx       # setup the destination.
 786         popl    %edx    # Restore edx.
 787         popl    %ecx    # Restore ecx.
 788         popl    %eax    # Restore eax.
 789         ret
 790         SIZE(GNAME(alloc_overflow_ebx))
 791
 792 /* This routine handles an overflow with esi=crfp+size. So the
 793    size=esi-crfp. */
 794         .align  align_16byte
 795         .globl  GNAME(alloc_overflow_esi)
 796         TYPE(GNAME(alloc_overflow_esi))
 797 GNAME(alloc_overflow_esi):
 798         pushl   %eax            # Save eax
 799         pushl   %ecx            # Save ecx
 800         pushl   %edx            # Save edx
 801         ALLOC_OVERFLOW(%esi,%edx)
 802         movl    %eax,%esi       # setup the destination.
 803         popl    %edx    # Restore edx.
 804         popl    %ecx    # Restore ecx.
 805         popl    %eax    # Restore eax.
 806         ret
 807         SIZE(GNAME(alloc_overflow_esi))
 808
 809         .align  align_16byte
 810         .globl  GNAME(alloc_overflow_edi)
 811         TYPE(GNAME(alloc_overflow_edi))
 812 GNAME(alloc_overflow_edi):
 813         pushl   %eax            # Save eax
 814         pushl   %ecx            # Save ecx
 815         pushl   %edx            # Save edx
 816         ALLOC_OVERFLOW(%edi,%edx)
 817         movl    %eax,%edi       # setup the destination.
 818         popl    %edx    # Restore edx.
 819         popl    %ecx    # Restore ecx.
 820         popl    %eax    # Restore eax.
 821         ret
 822         SIZE(GNAME(alloc_overflow_edi))
 823
 824
 825 #ifdef LISP_FEATURE_WIN32
 826         /* The guts of the exception-handling system doesn't use
 827          * frame pointers, which manages to throw off backtraces
 828          * rather badly.  So here we grab the (known-good) EBP
 829          * and EIP from the exception context and use it to fake
 830          * up a stack frame which will skip over the system SEH
 831          * code. */
 832         .align  align_16byte
 833         .globl  GNAME(exception_handler_wrapper)
 834         TYPE(GNAME(exception_handler_wrapper))
 835 GNAME(exception_handler_wrapper):
 836         /* Context layout is: */
 837         /* 7 dwords before FSA. (0x1c) */
 838         /* 8 dwords and 0x50 bytes in the FSA. (0x70/0x8c) */
 839         /* 4 dwords segregs. (0x10/0x9c) */
 840         /* 6 dwords non-stack GPRs. (0x18/0xb4) */
 841         /* EBP (at 0xb4) */
 842         /* EIP (at 0xb8) */
 843 #define CONTEXT_EBP_OFFSET 0xb4
 844 #define CONTEXT_EIP_OFFSET 0xb8
 845         /* some other stuff we don't care about. */
 846         pushl   %ebp
 847         movl    0x10(%esp), %ebp        /* context */
 848         pushl   CONTEXT_EIP_OFFSET(%ebp)
 849         pushl   CONTEXT_EBP_OFFSET(%ebp)
 850         movl    %esp, %ebp
 851         pushl   0x1c(%esp)
 852         pushl   0x1c(%esp)
 853         pushl   0x1c(%esp)
 854         pushl   0x1c(%esp)
 855         call    GNAME(handle_exception)
 856         lea     8(%ebp), %esp
 857         popl    %ebp
 858         ret
 859         SIZE(GNAME(exception_handler_wrapper))
 860 #endif
 861
 862 #ifdef LISP_FEATURE_DARWIN
 863         .align align_16byte
 864         .globl GNAME(call_into_lisp_tramp)
 865         TYPE(GNAME(call_into_lisp_tramp))
 866 GNAME(call_into_lisp_tramp):
 867         /* 1. build the stack frame from the block that's pointed to by ECX
 868            2. free the block
 869            3. set ECX to 0
 870            4. call the function via call_into_lisp
 871         */
 872         pushl   0(%ecx)          /* return address */
 873
 874         pushl   %ebp
 875         movl    %esp, %ebp
 876
 877         pushl   32(%ecx)         /* eflags */
 878         pushl   28(%ecx)         /* EAX */
 879         pushl   20(%ecx)         /* ECX */
 880         pushl   16(%ecx)         /* EDX */
 881         pushl   24(%ecx)         /* EBX */
 882         pushl   $0                /* popal is going to ignore esp */
 883         pushl   %ebp              /* is this right?? */
 884         pushl   12(%ecx)         /* ESI */
 885         pushl   8(%ecx)          /* EDI */
 886         pushl   $0                /* args for call_into_lisp */
 887         pushl   $0
 888         pushl   4(%ecx)          /* function to call */
 889
 890         /* free our save block */
 891         pushl   %ecx              /* reserve sufficient space on stack for args */
 892         pushl   %ecx
 893         andl    $0xfffffff0, %esp  /* align stack */
 894         movl    $0x40, 4(%esp)
 895         movl    %ecx, (%esp)
 896         call    GNAME(os_invalidate)
 897
 898         /* call call_into_lisp */
 899         leal    -48(%ebp), %esp
 900         call    GNAME(call_into_lisp)
 901
 902         /* Clean up our mess */
 903         leal    -36(%ebp), %esp
 904         popal
 905         popfl
 906         leave
 907         ret
 908
 909         SIZE(call_into_lisp_tramp)
 910 #endif
 911
 912         .align  align_16byte,0x90
 913         .globl  GNAME(post_signal_tramp)
 914         TYPE(GNAME(post_signal_tramp))
 915 GNAME(post_signal_tramp):
 916         /* this is notionally the second half of a function whose first half
 917          * doesn't exist.  This is where call_into_lisp returns when called
 918          * using return_to_lisp_function */
 919         addl $12,%esp   /* clear call_into_lisp args from stack */
 920         popal           /* restore registers */
 921         popfl
 922 #ifdef LISP_FEATURE_DARWIN
 923         /* skip two padding words */
 924         addl $8,%esp
 925 #endif
 926         leave
 927         ret
 928         SIZE(GNAME(post_signal_tramp))
 929
 930
 931         /* fast_bzero implementations and code to detect which implementation
 932          * to use.
 933          */
 934 \f
 935         .globl GNAME(fast_bzero_pointer)
 936         .data
 937         .align  align_16byte
 938 GNAME(fast_bzero_pointer):
 939         /* Variable containing a pointer to the bzero function to use.
 940          * Initially points to a basic function.  Change this variable
 941          * to fast_bzero_detect if OS supports SSE.  */
 942         .long GNAME(fast_bzero_base)
 943 \f
 944         .text
 945         .align  align_16byte,0x90
 946         .globl GNAME(fast_bzero)
 947         TYPE(GNAME(fast_bzero))
 948 GNAME(fast_bzero):
 949         /* Indirect function call */
 950         jmp *GNAME(fast_bzero_pointer)
 951         SIZE(GNAME(fast_bzero))
 952
 953 \f
 954         .text
 955         .align  align_16byte,0x90
 956         .globl GNAME(fast_bzero_detect)
 957         TYPE(GNAME(fast_bzero_detect))
 958 GNAME(fast_bzero_detect):
 959         /* Decide whether to use SSE, MMX or REP version */
 960         push %eax /* CPUID uses EAX-EDX */
 961         push %ebx
 962         push %ecx
 963         push %edx
 964         mov $1, %eax
 965         cpuid
 966         test $0x04000000, %edx    /* SSE2 needed for MOVNTDQ */
 967         jnz Lsse2
 968         /* Originally there was another case here for using the
 969          * MOVNTQ instruction for processors that supported MMX but
 970          * not SSE2. This turned out to be a loss especially on
 971          * Athlons (where this instruction is apparently microcoded
 972          * somewhat slowly). So for simplicity revert to REP STOSL
 973          * for all non-SSE2 processors.
 974          */
 975 Lbase:
 976         movl $(GNAME(fast_bzero_base)), GNAME(fast_bzero_pointer)
 977         jmp Lrestore
 978 Lsse2:
 979         movl $(GNAME(fast_bzero_sse)), GNAME(fast_bzero_pointer)
 980         jmp Lrestore
 981
 982 Lrestore:
 983         pop %edx
 984         pop %ecx
 985         pop %ebx
 986         pop %eax
 987         jmp *GNAME(fast_bzero_pointer)
 988
 989         SIZE(GNAME(fast_bzero_detect))
 990
 991 \f
 992         .text
 993         .align  align_16byte,0x90
 994         .globl GNAME(fast_bzero_sse)
 995         TYPE(GNAME(fast_bzero_sse))
 996
 997 GNAME(fast_bzero_sse):
 998         /* A fast routine for zero-filling blocks of memory that are
 999          * guaranteed to start and end at a 4096-byte aligned address.
1000          */
1001         push %esi                 /* Save temporary registers */
1002         push %edi
1003         mov 16(%esp), %esi        /* Parameter: amount of bytes to fill */
1004         mov 12(%esp), %edi        /* Parameter: start address */
1005         shr $6, %esi              /* Amount of 64-byte blocks to copy */
1006         jz Lend_sse               /* If none, stop */
1007         movups %xmm7, -16(%esp)   /* Save XMM register */
1008         xorps  %xmm7, %xmm7       /* Zero the XMM register */
1009         jmp Lloop_sse
1010         .align align_16byte
1011 Lloop_sse:
1012
1013         /* Copy the 16 zeroes from xmm7 to memory, 4 times. MOVNTDQ is the
1014          * non-caching double-quadword moving variant, i.e. the memory areas
1015          * we're touching are not fetched into the L1 cache, since we're just
1016          * going to overwrite the memory soon anyway.
1017          */
1018         movntdq %xmm7, 0(%edi)
1019         movntdq %xmm7, 16(%edi)
1020         movntdq %xmm7, 32(%edi)
1021         movntdq %xmm7, 48(%edi)
1022
1023         add $64, %edi /* Advance pointer */
1024         dec %esi      /* Decrement 64-byte block count */
1025         jnz Lloop_sse
1026         movups -16(%esp), %xmm7 /* Restore the XMM register */
1027         sfence        /* Ensure that weakly ordered writes are flushed. */
1028 Lend_sse:
1029         mov 12(%esp), %esi      /* Parameter: start address */
1030         prefetcht0 0(%esi)      /* Prefetch the start of the block into cache,
1031                                  * since it's likely to be used immediately. */
1032         pop %edi      /* Restore temp registers */
1033         pop %esi
1034         ret
1035         SIZE(GNAME(fast_bzero_sse))
1036
1037 \f
1038         .text
1039         .align  align_16byte,0x90
1040         .globl GNAME(fast_bzero_base)
1041         TYPE(GNAME(fast_bzero_base))
1042
1043 GNAME(fast_bzero_base):
1044         /* A fast routine for zero-filling blocks of memory that are
1045          * guaranteed to start and end at a 4096-byte aligned address.
1046          */
1047         push %eax                 /* Save temporary registers */
1048         push %ecx
1049         push %edi
1050         mov 20(%esp), %ecx        /* Parameter: amount of bytes to fill */
1051         mov 16(%esp), %edi        /* Parameter: start address */
1052         xor %eax, %eax            /* Zero EAX */
1053         shr $2, %ecx              /* Amount of 4-byte blocks to copy */
1054         jz  Lend_base
1055
1056         rep
1057         stosl                     /* Store EAX to *EDI, ECX times, incrementing
1058                                    * EDI by 4 after each store */
1059
1060 Lend_base:
1061         pop %edi                  /* Restore temp registers */
1062         pop %ecx
1063         pop %eax
1064         ret
1065         SIZE(GNAME(fast_bzero_base))
1066
1067 \f
1068 /* When LISP_FEATURE_C_STACK_IS_CONTROL_STACK, we cannot safely scrub
1069  * the control stack from C, largely due to not knowing where the
1070  * active stack frame ends.  On such platforms, we reimplement the
1071  * core scrubbing logic in assembly, in this case here:
1072  */
1073         .text
1074         .align  align_16byte,0x90
1075         .globl GNAME(arch_scrub_control_stack)
1076         TYPE(GNAME(arch_scrub_control_stack))
1077 GNAME(arch_scrub_control_stack):
1078         /* We are passed three parameters:
1079          * A (struct thread *) at [ESP+4],
1080          * the address of the guard page at [ESP+8], and
1081          * the address of the hard guard page at [ESP+12].
1082          * We may trash EAX, ECX, and EDX with impunity.
1083          * [ESP] is our return address, [ESP-4] is the first
1084          * stack slot to scrub. */
1085
1086         /* We start by setting up our scrub pointer in EAX, our
1087          * guard page upper bound in ECX, and our hard guard
1088          * page upper bound in EDX. */
1089         lea     -4(%esp), %eax
1090         mov     GNAME(os_vm_page_size),%edx
1091         mov     %edx, %ecx
1092         add     8(%esp), %ecx
1093         add     12(%esp), %edx
1094
1095         /* We need to do a memory operation relative to the
1096          * thread pointer, so put it in %ecx and our guard
1097          * page upper bound in 4(%esp). */
1098         xchg    4(%esp), %ecx
1099
1100         /* Now we begin our main scrub loop. */
1101 ascs_outer_loop:
1102
1103         /* If we're about to scrub the hard guard page, exit. */
1104         cmp     %edx, %eax
1105         jae     ascs_check_guard_page
1106         cmp     12(%esp), %eax
1107         ja      ascs_finished
1108
1109 ascs_check_guard_page:
1110         /* If we're about to scrub the guard page, and the guard
1111          * page is protected, exit. */
1112         cmp     4(%esp), %eax
1113         jae     ascs_clear_loop
1114         cmp     8(%esp), %eax
1115         jbe     ascs_clear_loop
1116         cmpl    $(NIL), THREAD_CONTROL_STACK_GUARD_PAGE_PROTECTED_OFFSET(%ecx)
1117         jne     ascs_finished
1118
1119         /* Clear memory backwards to the start of the (4KiB) page */
1120 ascs_clear_loop:
1121         movl    $0, (%eax)
1122         test    $0xfff, %eax
1123         lea     -4(%eax), %eax
1124         jnz     ascs_clear_loop
1125
1126         /* If we're about to hit the hard guard page, exit. */
1127         cmp     %edx, %eax
1128         jae     ascs_finished
1129
1130         /* If the next (previous?) 4KiB page contains a non-zero
1131          * word, continue scrubbing. */
1132 ascs_check_loop:
1133         testl   $-1, (%eax)
1134         jnz     ascs_outer_loop
1135         test    $0xfff, %eax
1136         lea     -4(%eax), %eax
1137         jnz     ascs_check_loop
1138
1139 ascs_finished:
1140         ret
1141         SIZE(GNAME(arch_scrub_control_stack))