3 dnl m4 macros for x86 assembler.
5 dnl Copyright 1999-2003, 2007, 2010, 2012, 2014 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
22 dnl or both in parallel, as here.
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
36 dnl m4 isn't perfect for processing BSD style x86 assembler code, the main
39 dnl 1. Doing define(foo,123) and then using foo in an addressing mode like
40 dnl foo(%ebx) expands as a macro rather than a constant. This is worked
41 dnl around by using deflit() from asm-defs.m4, instead of define().
43 dnl 2. Immediates in macro definitions need a space or `' to stop the $
44 dnl looking like a macro parameter. For example,
46 dnl define(foo, `mov $ 123, %eax')
48 dnl This is only a problem in macro definitions, not in ordinary text,
49 dnl and not in macro parameters like text passed to forloop() or ifdef().
52 deflit(GMP_LIMB_BYTES, 4)
55 dnl Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL. We
56 dnl undefine PIC since we don't need to be position independent in this
57 dnl case and definitely don't want the ELF style _GLOBAL_OFFSET_TABLE_ etc.
59 ifdef(`DLL_EXPORT',`undefine(`PIC')')
62 dnl Usage: CPUVEC_FUNCS_LIST
64 dnl A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
65 dnl order they appear in that structure.
67 define(CPUVEC_FUNCS_LIST,
107 dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
109 dnl In the x86 code we use explicit TEXT and ALIGN() calls in the code,
110 dnl since different alignments are wanted in various circumstances. So for
115 dnl PROLOGUE(mpn_add_n)
119 define(`PROLOGUE_cpu',
121 m4_assert_defined(`WANT_PROFILING')
126 ifelse(WANT_PROFILING,`prof', ` call_mcount')
127 ifelse(WANT_PROFILING,`gprof', ` call_mcount')
128 ifelse(WANT_PROFILING,`instrument',` call_instrument(enter)')
132 dnl Usage: COFF_TYPE(GSYM_PREFIX`'foo)
134 dnl Emit COFF style ".def ... .endef" type information for a function, when
135 dnl supported. The argument should include any GSYM_PREFIX.
137 dnl See autoconf macro GMP_ASM_COFF_TYPE for HAVE_COFF_TYPE.
141 m4_assert_defined(`HAVE_COFF_TYPE')
142 `ifelse(HAVE_COFF_TYPE,yes,
149 dnl Usage: call_mcount
151 dnl For `gprof' style profiling, %ebp is setup as a frame pointer. None of
152 dnl the assembler routines use %ebp this way, so it's done only for the
153 dnl benefit of mcount. glibc sysdeps/i386/i386-mcount.S shows how mcount
154 dnl gets the current function from (%esp) and the parent from 4(%ebp).
156 dnl For `prof' style profiling gcc generates mcount calls without setting
157 dnl up %ebp, and the same is done here.
159 define(`call_mcount',
160 m4_assert_numargs(-1)
161 m4_assert_defined(`WANT_PROFILING')
162 m4_assert_defined(`MCOUNT_PIC_REG')
163 m4_assert_defined(`MCOUNT_NONPIC_REG')
164 m4_assert_defined(`MCOUNT_PIC_CALL')
165 m4_assert_defined(`MCOUNT_NONPIC_CALL')
166 `ifelse(ifdef(`PIC',`MCOUNT_PIC_REG',`MCOUNT_NONPIC_REG'),,,
169 L(mcount_data_`'mcount_counter):
173 ifelse(WANT_PROFILING,`gprof',
180 L(mcount_here_`'mcount_counter):
181 addl $_GLOBAL_OFFSET_TABLE_+[.-L(mcount_here_`'mcount_counter)], %ebx
182 ifelse(MCOUNT_PIC_REG,,,
183 ` leal L(mcount_data_`'mcount_counter)@GOTOFF(%ebx), MCOUNT_PIC_REG')
187 ifelse(MCOUNT_NONPIC_REG,,,
188 ` movl `$'L(mcount_data_`'mcount_counter), MCOUNT_NONPIC_REG
192 ifelse(WANT_PROFILING,`gprof',
195 define(`mcount_counter',incr(mcount_counter))
198 define(mcount_counter,1)
201 dnl Usage: call_instrument(enter|exit)
203 dnl Call __cyg_profile_func_enter or __cyg_profile_func_exit.
205 dnl For PIC, most routines don't require _GLOBAL_OFFSET_TABLE_ themselves
206 dnl so %ebx is just setup for these calls. It's a bit wasteful to repeat
207 dnl the setup for the exit call having done it earlier for the enter, but
208 dnl there's nowhere very convenient to hold %ebx through the length of a
209 dnl routine, in general.
211 dnl For PIC, because instrument_current_function will be within the current
212 dnl object file we can get it just as an offset from %eip, there's no need
215 dnl No attempt is made to maintain the stack alignment gcc generates with
216 dnl -mpreferred-stack-boundary. This wouldn't be hard, but it seems highly
217 dnl unlikely the instrumenting functions would be doing anything that'd
218 dnl benefit from alignment, in particular they're unlikely to be using
219 dnl doubles or long doubles on the stack.
221 dnl The FRAME scheme is used to conveniently account for the register saves
222 dnl before accessing the return address. Any previous value is saved and
223 dnl restored, since plenty of code keeps a value across a "ret" in the
224 dnl middle of a routine.
226 define(call_instrument,
230 ` pushl %eax FRAME_pushl() C return value
233 ` pushl %ebx FRAME_pushl()
235 L(instrument_here_`'instrument_count):
237 addl $_GLOBAL_OFFSET_TABLE_+[.-L(instrument_here_`'instrument_count)], %ebx
238 C use addl rather than leal to avoid old gas bugs, see mpn/x86/README
239 addl $instrument_current_function-L(instrument_here_`'instrument_count), %ecx
240 pushl m4_empty_if_zero(FRAME)(%esp) FRAME_pushl() C return addr
241 pushl %ecx FRAME_pushl() C this function
242 call GSYM_PREFIX`'__cyg_profile_func_$1@PLT
247 pushl m4_empty_if_zero(FRAME)(%esp) FRAME_pushl() C return addr
248 pushl $instrument_current_function FRAME_pushl() C this function
249 call GSYM_PREFIX`'__cyg_profile_func_$1
253 ` popl %eax C return value
256 define(`instrument_count',incr(instrument_count))
258 define(instrument_count,1)
261 dnl Usage: instrument_current_function
263 dnl Return the current function name for instrumenting purposes. This is
264 dnl PROLOGUE_current_function, but it sticks at the first such name seen.
266 dnl Sticking to the first name seen ensures that multiple-entrypoint
267 dnl functions like mpn_add_nc and mpn_add_n will make enter and exit calls
268 dnl giving the same function address.
270 define(instrument_current_function,
271 m4_assert_numargs(-1)
272 `ifdef(`instrument_current_function_seen',
273 `instrument_current_function_seen',
274 `define(`instrument_current_function_seen',PROLOGUE_current_function)dnl
275 PROLOGUE_current_function')')
278 dnl Usage: call_movl_eip_to_ebx
280 dnl Generate a call to L(movl_eip_to_ebx), and record the need for that
283 define(call_movl_eip_to_ebx,
284 m4_assert_numargs(-1)
285 `call L(movl_eip_to_ebx)
286 define(`movl_eip_to_ebx_needed',1)')
288 dnl Usage: generate_movl_eip_to_ebx
290 dnl Emit a L(movl_eip_to_ebx) routine, if needed and not already generated.
292 define(generate_movl_eip_to_ebx,
293 m4_assert_numargs(-1)
294 `ifelse(movl_eip_to_ebx_needed,1,
295 `ifelse(movl_eip_to_ebx_done,1,,
299 define(`movl_eip_to_ebx_done',1)
305 dnl Generate a "ret", but if doing instrumented profiling then call
306 dnl __cyg_profile_func_exit first.
309 m4_assert_numargs(-1)
310 m4_assert_defined(`WANT_PROFILING')
311 `ifelse(WANT_PROFILING,instrument,
314 generate_movl_eip_to_ebx
318 dnl Usage: ret_internal
320 dnl A plain "ret", without any __cyg_profile_func_exit call. This can be
321 dnl used for a return which is internal to some function, such as when
322 dnl getting %eip for PIC.
325 m4_assert_numargs(-1)
329 dnl Usage: ret_instrument
331 dnl Generate call to __cyg_profile_func_exit and then a ret. If a ret has
332 dnl already been seen from this function then jump to that chunk of code,
333 dnl rather than emitting it again.
335 define(ret_instrument,
336 m4_assert_numargs(-1)
337 `ifelse(m4_unquote(ret_instrument_seen_`'instrument_current_function),1,
338 `jmp L(instrument_exit_`'instrument_current_function)',
339 `define(ret_instrument_seen_`'instrument_current_function,1)
340 L(instrument_exit_`'instrument_current_function):
341 call_instrument(exit)
345 dnl Usage: _GLOBAL_OFFSET_TABLE_
347 dnl Expand to _GLOBAL_OFFSET_TABLE_ plus any necessary underscore prefix.
348 dnl This lets us write plain _GLOBAL_OFFSET_TABLE_ in SVR4 style, but still
349 dnl work with systems requiring an extra underscore such as OpenBSD.
351 dnl deflit is used so "leal _GLOBAL_OFFSET_TABLE_(%eax), %ebx" will come
352 dnl out right, though that form doesn't work properly in gas (see
355 deflit(_GLOBAL_OFFSET_TABLE_,
356 m4_assert_defined(`GOT_GSYM_PREFIX')
357 `GOT_GSYM_PREFIX`_GLOBAL_OFFSET_TABLE_'')
360 dnl --------------------------------------------------------------------------
361 dnl Various x86 macros.
365 dnl Usage: ALIGN_OFFSET(bytes,offset)
367 dnl Align to `offset' away from a multiple of `bytes'.
369 dnl This is useful for testing, for example align to something very strict
370 dnl and see what effect offsets from it have, "ALIGN_OFFSET(256,32)".
372 dnl Generally you wouldn't execute across the padding, but it's done with
373 dnl nop's so it'll work.
378 forloop(`i',1,$2,` nop
382 dnl Usage: defframe(name,offset)
384 dnl Make a definition like the following with which to access a parameter
385 dnl or variable on the stack.
387 dnl define(name,`FRAME+offset(%esp)')
389 dnl Actually m4_empty_if_zero(FRAME+offset) is used, which will save one
390 dnl byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp).
391 dnl Use define(`defframe_empty_if_zero_disabled',1) if for some reason the
392 dnl zero offset is wanted.
394 dnl The new macro also gets a check that when it's used FRAME is actually
395 dnl defined, and that the final %esp offset isn't negative, which would
396 dnl mean an attempt to access something below the current %esp.
398 dnl deflit() is used rather than a plain define(), so the new macro won't
399 dnl delete any following parenthesized expression. name(%edi) will come
400 dnl out say as 16(%esp)(%edi). This isn't valid assembler and should
401 dnl provoke an error, which is better than silently giving just 16(%esp).
403 dnl See README for more on the suggested way to access the stack frame.
408 m4_assert_defined(`FRAME')
409 `defframe_check_notbelow(`$1',$2,FRAME)dnl
410 defframe_empty_if_zero(FRAME+($2))(%esp)')')
412 dnl Called: defframe_empty_if_zero(expression)
413 define(defframe_empty_if_zero,
415 `ifelse(defframe_empty_if_zero_disabled,1,
417 `m4_empty_if_zero($1)')')
419 dnl Called: defframe_check_notbelow(`name',offset,FRAME)
420 define(defframe_check_notbelow,
422 `ifelse(eval(($3)+($2)<0),1,
423 `m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes
427 dnl Usage: FRAME_pushl()
429 dnl FRAME_addl_esp(n)
430 dnl FRAME_subl_esp(n)
432 dnl Adjust FRAME appropriately for a pushl or popl, or for an addl or subl
435 dnl Using these macros is completely optional. Sometimes it makes more
436 dnl sense to put explicit deflit(`FRAME',N) forms, especially when there's
437 dnl jumps and different sequences of FRAME values need to be used in
438 dnl different places.
442 m4_assert_defined(`FRAME')
443 `deflit(`FRAME',eval(FRAME+4))')
447 m4_assert_defined(`FRAME')
448 `deflit(`FRAME',eval(FRAME-4))')
450 define(FRAME_addl_esp,
452 m4_assert_defined(`FRAME')
453 `deflit(`FRAME',eval(FRAME-($1)))')
455 define(FRAME_subl_esp,
457 m4_assert_defined(`FRAME')
458 `deflit(`FRAME',eval(FRAME+($1)))')
461 dnl Usage: defframe_pushl(name)
463 dnl Do a combination FRAME_pushl() and a defframe() to name the stack
464 dnl location just pushed. This should come after a pushl instruction.
465 dnl Putting it on the same line works and avoids lengthening the code. For
468 dnl pushl %eax defframe_pushl(VAR_COUNTER)
470 dnl Notice the defframe() is done with an unquoted -FRAME thus giving its
471 dnl current value without tracking future changes.
473 define(defframe_pushl,
475 `FRAME_pushl()defframe(`$1',-FRAME)')
478 dnl --------------------------------------------------------------------------
479 dnl Assembler instruction macros.
483 dnl Usage: emms_or_femms
484 dnl femms_available_p
486 dnl femms_available_p expands to 1 or 0 according to whether the AMD 3DNow
487 dnl femms instruction is available. emms_or_femms expands to femms if
488 dnl available, or emms if not.
490 dnl emms_or_femms is meant for use in the K6 directory where plain K6
491 dnl (without femms) and K6-2 and K6-3 (with a slightly faster femms) are
492 dnl supported together.
494 dnl On K7 femms is no longer faster and is just an alias for emms, so plain
495 dnl emms may as well be used.
497 define(femms_available_p,
498 m4_assert_numargs(-1)
502 `HAVE_HOST_CPU_athlon')')
504 define(emms_or_femms,
505 m4_assert_numargs(-1)
506 `ifelse(femms_available_p,1,`femms',`emms')')
511 dnl Gas 2.9.1 which comes with FreeBSD 3.4 doesn't support femms, so the
512 dnl following is a replacement using .byte.
515 m4_assert_numargs(-1)
516 `.byte 15,14 C AMD 3DNow femms')
519 dnl Usage: jadcl0(op)
521 dnl Generate a jnc/incl as a substitute for adcl $0,op. Note this isn't an
522 dnl exact replacement, since it doesn't set the flags like adcl does.
524 dnl This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and
525 dnl mpn_sqr_basecase because on K6 an adcl is slow, the branch
526 dnl misprediction penalty is small, and the multiply algorithm used leads
527 dnl to a carry bit on average only 1/4 of the time.
529 dnl jadcl0_disabled can be set to 1 to instead generate an ordinary adcl
530 dnl for comparison. For example,
532 dnl define(`jadcl0_disabled',1)
534 dnl When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is
535 dnl the same size as an adcl. This makes it possible to use the exact same
536 dnl computed jump code when testing the relative speed of the two.
540 `ifelse(jadcl0_disabled,1,
542 `jnc L(jadcl0_`'jadcl0_counter)
544 L(jadcl0_`'jadcl0_counter):
545 define(`jadcl0_counter',incr(jadcl0_counter))')')
547 define(jadcl0_counter,1)
550 dnl Usage: x86_lookup(target, key,value, key,value, ...)
551 dnl x86_lookup_p(target, key,value, key,value, ...)
553 dnl Look for `target' among the `key' parameters.
555 dnl x86_lookup expands to the corresponding `value', or generates an error
556 dnl if `target' isn't found.
558 dnl x86_lookup_p expands to 1 if `target' is found, or 0 if not.
561 m4_assert_numargs_range(1,999)
562 `ifelse(eval($#<3),1,
563 `m4_error(`unrecognised part of x86 instruction: $1
565 `ifelse(`$1',`$2', `$3',
566 `x86_lookup(`$1',shift(shift(shift($@))))')')')
569 m4_assert_numargs_range(1,999)
570 `ifelse(eval($#<3),1, `0',
571 `ifelse(`$1',`$2', `1',
572 `x86_lookup_p(`$1',shift(shift(shift($@))))')')')
575 dnl Usage: x86_opcode_reg32(reg)
576 dnl x86_opcode_reg32_p(reg)
578 dnl x86_opcode_reg32 expands to the standard 3 bit encoding for the given
579 dnl 32-bit register, eg. `%ebp' turns into 5.
581 dnl x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0
584 define(x86_opcode_reg32,
586 `x86_lookup(`$1',x86_opcode_reg32_list)')
588 define(x86_opcode_reg32_p,
590 `x86_lookup_p(`$1',x86_opcode_reg32_list)')
592 define(x86_opcode_reg32_list,
603 dnl Usage: x86_opcode_tttn(cond)
605 dnl Expand to the 4-bit "tttn" field value for the given x86 branch
606 dnl condition (like `c', `ae', etc).
608 define(x86_opcode_tttn,
610 `x86_lookup(`$1',x86_opcode_ttn_list)')
612 define(x86_opcode_tttn_list,
615 `b', 2, `c', 2, `nae',2,
616 `nb', 3, `nc', 3, `ae', 3,
623 `p', 10, `pe', 10, `npo',10,
624 `np', 11, `npe',11, `po', 11,
631 dnl Usage: cmovCC(%srcreg,%dstreg)
633 dnl Emit a cmov instruction, using a .byte sequence, since various past
634 dnl versions of gas don't know cmov. For example,
636 dnl cmovz( %eax, %ebx)
638 dnl The source operand can only be a plain register. (m4 code implementing
639 dnl full memory addressing modes exists, believe it or not, but isn't
640 dnl currently needed and isn't included.)
642 dnl All the standard conditions are defined. Attempting to use one without
643 dnl the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke
644 dnl an error. This protects against writing something old gas wouldn't
647 dnl Called: define_cmov_many(cond,tttn,cond,tttn,...)
648 define(define_cmov_many,
649 `ifelse(m4_length(`$1'),0,,
650 `define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')')
652 dnl Called: define_cmov(cond,tttn)
653 dnl Emit basically define(cmov<cond>,`cmov_internal(<cond>,<ttn>,`$1',`$2')')
657 m4_instruction_wrapper()
659 `cmov_internal'(m4_doublequote($`'0),``$2'',dnl
660 m4_doublequote($`'1),m4_doublequote($`'2)))')
662 define_cmov_many(x86_opcode_tttn_list)
664 dnl Called: cmov_internal(name,tttn,src,dst)
665 define(cmov_internal,
670 eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl
674 dnl Usage: x86_opcode_regmmx(reg)
676 dnl Validate the given mmx register, and return its number, 0 to 7.
678 define(x86_opcode_regmmx,
680 `x86_lookup(`$1',x86_opcode_regmmx_list)')
682 define(x86_opcode_regmmx_list,
693 dnl Usage: psadbw(%srcreg,%dstreg)
695 dnl Oldish versions of gas don't know psadbw, in particular gas 2.9.1 on
696 dnl FreeBSD 3.3 and 3.4 doesn't, so instead emit .byte sequences. For
699 dnl psadbw( %mm1, %mm2)
701 dnl Only register->register forms are supported here, which suffices for
702 dnl the current code.
705 m4_instruction_wrapper()
708 eval(192+x86_opcode_regmmx(`$2')*8+x86_opcode_regmmx(`$1')) dnl
712 dnl Usage: Zdisp(inst,op,op,op)
714 dnl Generate explicit .byte sequences if necessary to force a byte-sized
715 dnl zero displacement on an instruction. For example,
717 dnl Zdisp( movl, 0,(%esi), %eax)
721 dnl .byte 139,70,0 C movl 0(%esi), %eax
723 dnl If the displacement given isn't 0, then normal assembler code is
724 dnl generated. For example,
726 dnl Zdisp( movl, 4,(%esi), %eax)
730 dnl movl 4(%esi), %eax
732 dnl This means a single Zdisp() form can be used with an expression for the
733 dnl displacement, and .byte will be used only if necessary. The
734 dnl displacement argument is eval()ed.
736 dnl Because there aren't many places a 0(reg) form is wanted, Zdisp is
737 dnl implemented with a table of instructions and encodings. A new entry is
738 dnl needed for any different operation or registers. The table is split
739 dnl into separate macros to avoid overflowing BSD m4 macro expansion space.
743 `define(`Zdisp_found',0)dnl
748 ifelse(Zdisp_found,0,
749 `m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4
753 Zdisp_match( adcl, 0,(%edx), %eax, `0x13,0x42,0x00', $@)`'dnl
754 Zdisp_match( adcl, 0,(%edx), %ebx, `0x13,0x5a,0x00', $@)`'dnl
755 Zdisp_match( adcl, 0,(%edx), %esi, `0x13,0x72,0x00', $@)`'dnl
756 Zdisp_match( addl, %ebx, 0,(%edi), `0x01,0x5f,0x00', $@)`'dnl
757 Zdisp_match( addl, %ecx, 0,(%edi), `0x01,0x4f,0x00', $@)`'dnl
758 Zdisp_match( addl, %esi, 0,(%edi), `0x01,0x77,0x00', $@)`'dnl
759 Zdisp_match( sbbl, 0,(%edx), %eax, `0x1b,0x42,0x00', $@)`'dnl
760 Zdisp_match( sbbl, 0,(%edx), %esi, `0x1b,0x72,0x00', $@)`'dnl
761 Zdisp_match( subl, %ecx, 0,(%edi), `0x29,0x4f,0x00', $@)`'dnl
762 Zdisp_match( movzbl, 0,(%eax,%ebp), %eax, `0x0f,0xb6,0x44,0x28,0x00', $@)`'dnl
763 Zdisp_match( movzbl, 0,(%ecx,%edi), %edi, `0x0f,0xb6,0x7c,0x39,0x00', $@)`'dnl
764 Zdisp_match( adc, 0,(%ebx,%ecx,4), %eax, `0x13,0x44,0x8b,0x00', $@)`'dnl
765 Zdisp_match( sbb, 0,(%ebx,%ecx,4), %eax, `0x1b,0x44,0x8b,0x00', $@)`'dnl
768 Zdisp_match( movl, %eax, 0,(%edi), `0x89,0x47,0x00', $@)`'dnl
769 Zdisp_match( movl, %ebx, 0,(%edi), `0x89,0x5f,0x00', $@)`'dnl
770 Zdisp_match( movl, %esi, 0,(%edi), `0x89,0x77,0x00', $@)`'dnl
771 Zdisp_match( movl, 0,(%ebx), %eax, `0x8b,0x43,0x00', $@)`'dnl
772 Zdisp_match( movl, 0,(%ebx), %esi, `0x8b,0x73,0x00', $@)`'dnl
773 Zdisp_match( movl, 0,(%edx), %eax, `0x8b,0x42,0x00', $@)`'dnl
774 Zdisp_match( movl, 0,(%esi), %eax, `0x8b,0x46,0x00', $@)`'dnl
775 Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl
776 Zdisp_match( mov, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl
777 Zdisp_match( mov, %eax, 0,(%edi,%ecx,4), `0x89,0x44,0x8f,0x00', $@)`'dnl
780 Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl
781 Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl
782 Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl
783 Zdisp_match( movq, 0,(%ebx,%ecx,4), %mm0, `0x0f,0x6f,0x44,0x8b,0x00', $@)`'dnl
784 Zdisp_match( movq, 0,(%edx), %mm0, `0x0f,0x6f,0x42,0x00', $@)`'dnl
785 Zdisp_match( movq, 0,(%esi), %mm0, `0x0f,0x6f,0x46,0x00', $@)`'dnl
786 Zdisp_match( movq, %mm0, 0,(%edi), `0x0f,0x7f,0x47,0x00', $@)`'dnl
787 Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl
788 Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl
789 Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl
792 Zdisp_match( movd, 0,(%eax,%ecx,4), %mm0, `0x0f,0x6e,0x44,0x88,0x00', $@)`'dnl
793 Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl
794 Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl
795 Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl
796 Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl
797 Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl
798 Zdisp_match( movd, %mm0, 0,(%edx,%ecx,4), `0x0f,0x7e,0x44,0x8a,0x00', $@)`'dnl
803 `ifelse(eval(m4_stringequal_p(`$1',`$6')
804 && m4_stringequal_p(`$2',0)
805 && m4_stringequal_p(`$3',`$8')
806 && m4_stringequal_p(`$4',`$9')),1,
807 `define(`Zdisp_found',1)dnl
809 ` .byte $5 C `$1 0$3, $4'',
812 `ifelse(eval(m4_stringequal_p(`$1',`$6')
813 && m4_stringequal_p(`$2',`$7')
814 && m4_stringequal_p(`$3',0)
815 && m4_stringequal_p(`$4',`$9')),1,
816 `define(`Zdisp_found',1)dnl
818 ` .byte $5 C `$1 $2, 0$4'',
819 ` $6 $7, $8$9')')')')
822 dnl Usage: shldl(count,src,dst)
823 dnl shrdl(count,src,dst)
824 dnl shldw(count,src,dst)
825 dnl shrdw(count,src,dst)
827 dnl Generate a double-shift instruction, possibly omitting a %cl count
828 dnl parameter if that's what the assembler requires, as indicated by
829 dnl WANT_SHLDL_CL in config.m4. For example,
831 dnl shldl( %cl, %eax, %ebx)
833 dnl turns into either
835 dnl shldl %cl, %eax, %ebx
839 dnl Immediate counts are always passed through unchanged. For example,
841 dnl shrdl( $2, %esi, %edi)
843 dnl shrdl $2, %esi, %edi
846 dnl If you forget to use the macro form "shldl( ...)" and instead write
847 dnl just a plain "shldl ...", an error results. This ensures the necessary
848 dnl variant treatment of %cl isn't accidentally bypassed.
850 define(define_shd_instruction,
853 m4_instruction_wrapper()
855 `shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl
856 m4_doublequote($`'2),m4_doublequote($`'3)))')
858 dnl Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc
859 define_shd_instruction(shldl)
860 define_shd_instruction(shrdl)
861 define_shd_instruction(shldw)
862 define_shd_instruction(shrdw)
864 dnl Called: shd_instruction(op,count,src,dst)
865 define(shd_instruction,
867 m4_assert_defined(`WANT_SHLDL_CL')
868 `ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1,
870 ``$1' `$2', `$3', `$4'')')
873 dnl Usage: ASSERT([cond][,instructions])
875 dnl If WANT_ASSERT is 1, output the given instructions and expect the given
876 dnl flags condition to then be satisfied. For example,
878 dnl ASSERT(ne, `cmpl %eax, %ebx')
880 dnl The instructions can be omitted to just assert a flags condition with
881 dnl no extra calculation. For example,
885 dnl When `instructions' is not empty, a pushf/popf is added to preserve the
886 dnl flags, but the instructions themselves must preserve any registers that
887 dnl matter. FRAME is adjusted for the push and pop, so the instructions
888 dnl given can use defframe() stack variables.
890 dnl The condition can be omitted to just output the given instructions when
891 dnl assertion checking is wanted. In this case the pushf/popf is omitted.
894 dnl ASSERT(, `movl %eax, VAR_KEEPVAL')
897 m4_assert_numargs_range(1,2)
898 m4_assert_defined(`WANT_ASSERT')
899 `ifelse(WANT_ASSERT,1,
903 ifelse(`$2',,,` pushf ifdef(`FRAME',`FRAME_pushl()')')
905 j`$1' L(ASSERT_ok`'ASSERT_counter)
906 ud2 C assertion failed
907 L(ASSERT_ok`'ASSERT_counter):
908 ifelse(`$2',,,` popf ifdef(`FRAME',`FRAME_popl()')')
909 define(`ASSERT_counter',incr(ASSERT_counter))')')')
911 define(ASSERT_counter,1)
914 dnl Usage: movl_text_address(label,register)
916 dnl Get the address of a text segment label, using either a plain movl or a
917 dnl position-independent calculation, as necessary. For example,
919 dnl movl_code_address(L(foo),%eax)
921 dnl This macro is only meant for use in ASSERT()s or when testing, since
922 dnl the PIC sequence it generates will want to be done with a ret balancing
923 dnl the call on CPUs with return address branch prediction.
925 dnl The addl generated here has a backward reference to the label, and so
926 dnl won't suffer from the two forwards references bug in old gas (described
927 dnl in mpn/x86/README).
929 define(movl_text_address,
932 `call L(movl_text_address_`'movl_text_address_counter)
933 L(movl_text_address_`'movl_text_address_counter):
935 addl `$'$1-L(movl_text_address_`'movl_text_address_counter), $2
936 define(`movl_text_address_counter',incr(movl_text_address_counter))',
939 define(movl_text_address_counter,1)
942 dnl Usage: notl_or_xorl_GMP_NUMB_MASK(reg)
944 dnl Expand to either "notl `reg'" or "xorl $GMP_NUMB_BITS,`reg'" as
945 dnl appropriate for nails in use or not.
947 define(notl_or_xorl_GMP_NUMB_MASK,
949 `ifelse(GMP_NAIL_BITS,0,
951 `xorl $GMP_NUMB_MASK, `$1'')')
954 dnl Usage LEA(symbol,reg)
955 dnl Usage LEAL(symbol_local_to_file,reg)
960 ifelse(index(defn(`load_eip'), `$2'),-1,
961 `m4append(`load_eip',
964 L(movl_eip_`'substr($2,1)):
968 call L(movl_eip_`'substr($2,1))
969 addl $_GLOBAL_OFFSET_TABLE_, $2
978 ifelse(index(defn(`load_eip'), `$2'),-1,
979 `m4append(`load_eip',
982 L(movl_eip_`'substr($2,1)):
986 call L(movl_eip_`'substr($2,1))
987 addl $_GLOBAL_OFFSET_TABLE_, $2
988 leal $1@GOTOFF($2), $2
995 define(`ASM_END',`load_eip')
997 define(`load_eip', `') dnl updated in LEA/LEAL
1000 define(`DEF_OBJECT',
1001 m4_assert_numargs_range(1,2)
1003 ALIGN(ifelse($#,1,2,$2))
1007 define(`END_OBJECT',
1008 m4_assert_numargs(1)
1009 ` SIZE(`$1',.-`$1')')
1011 dnl Usage: CALL(funcname)
1015 m4_assert_numargs(1)
1017 `call GSYM_PREFIX`'$1@PLT',
1018 `call GSYM_PREFIX`'$1')')
1021 `define(`PIC_WITH_EBX')',
1022 `undefine(`PIC_WITH_EBX')')