3 dnl m4 macros for amd64 assembler.
5 dnl Copyright 1999-2005, 2008, 2009, 2011-2013 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
22 dnl or both in parallel, as here.
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
34 dnl Usage: CPUVEC_FUNCS_LIST
36 dnl A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
37 dnl order they appear in that structure.
39 define(CPUVEC_FUNCS_LIST,
79 dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
81 dnl In the amd64 code we use explicit TEXT and ALIGN() calls in the code,
82 dnl since different alignments are wanted in various circumstances. So for
87 dnl PROLOGUE(mpn_add_n)
91 define(`PROLOGUE_cpu',
99 dnl Usage: ASSERT([cond][,instructions])
101 dnl If WANT_ASSERT is 1, output the given instructions and expect the given
102 dnl flags condition to then be satisfied. For example,
104 dnl ASSERT(ne, `cmpq %rax, %rbx')
106 dnl The instructions can be omitted to just assert a flags condition with
107 dnl no extra calculation. For example,
111 dnl When `instructions' is not empty, a pushfq/popfq is added for
112 dnl convenience to preserve the flags, but the instructions themselves must
113 dnl preserve any registers that matter.
115 dnl The condition can be omitted to just output the given instructions when
116 dnl assertion checking is wanted. In this case the pushf/popf is omitted.
119 dnl ASSERT(, `movq %rax, VAR_KEEPVAL')
122 m4_assert_numargs_range(1,2)
123 m4_assert_defined(`WANT_ASSERT')
124 `ifelse(WANT_ASSERT,1,
130 `j$1' L(ASSERT_ok`'ASSERT_counter)
131 ud2 C assertion failed
132 L(ASSERT_ok`'ASSERT_counter):
133 ifelse(`$2',,,` popfq')
134 define(`ASSERT_counter',incr(ASSERT_counter))')')')
136 define(ASSERT_counter,1)
140 `mov $1@GOTPCREL(%rip), $2'
147 m4_assert_numargs_range(1,2)
149 ALIGN(ifelse($#,1,2,$2))
155 ` SIZE(`$1',.-`$1')')
159 `ifelse($1,`%rax',`%eax',
175 `ifelse($1,`%rax',`%al',
192 dnl Usage: CALL(funcname)
197 `call GSYM_PREFIX`'$1@PLT'
199 `call GSYM_PREFIX`'$1'
203 define(`JUMPTABSECT', `.section .data.rel.ro.local,"aw",@progbits')
206 dnl Usage: JMPENT(targlabel,tablabel)
216 dnl These macros are defined just for DOS64, where they provide calling
217 dnl sequence glue code.
219 define(`FUNC_ENTRY',`')
220 define(`FUNC_EXIT',`')
223 dnl Target ABI macros.
226 define(`IFSTD', `$1')
227 define(`IFELF', `$1')
230 dnl Usage: PROTECT(symbol)
232 dnl Used for private GMP symbols that should never be overridden by users.
233 dnl This can save reloc entries and improve shlib sharing as well as
234 dnl application startup times
236 define(`PROTECT', `.hidden $1')
239 dnl Usage: x86_lookup(target, key,value, key,value, ...)
241 dnl Look for `target' among the `key' parameters.
243 dnl x86_lookup expands to the corresponding `value', or generates an error
244 dnl if `target' isn't found.
247 m4_assert_numargs_range(1,999)
248 `ifelse(eval($#<3),1,
249 `m4_error(`unrecognised part of x86 instruction: $1
251 `ifelse(`$1',`$2', `$3',
252 `x86_lookup(`$1',shift(shift(shift($@))))')')')
255 dnl Usage: x86_opcode_regxmm(reg)
257 dnl Validate the given xmm register, and return its number, 0 to 7.
259 define(x86_opcode_regxmm,
261 `x86_lookup(`$1',x86_opcode_regxmm_list)')
263 define(x86_opcode_regxmm_list,
281 dnl Usage: palignr($imm,%srcreg,%dstreg)
283 dnl Emit a palignr instruction, using a .byte sequence, since obsolete but
284 dnl still distributed versions of gas don't know SSSE3 instructions.
289 ifelse(eval(x86_opcode_regxmm($3) >= 8 || x86_opcode_regxmm($2) >= 8),1,
290 `eval(0x40+x86_opcode_regxmm($3)/8*4+x86_opcode_regxmm($2)/8),')dnl
292 eval(0xc0+x86_opcode_regxmm($3)%8*8+x86_opcode_regxmm($2)%8),dnl
298 dnl regnum(op) raw operand index (so slightly misnamed)
299 dnl regnumh(op) high bit of register operand nimber
300 dnl ix(op) 0 for reg operand, 1 for plain pointer operand.
303 define(`regnum',`x86_lookup(`$1',oplist)')
304 define(`regnumh',`eval(regnum($1)/8 & 1)')
305 define(`ix',`eval(regnum($1)/16)')
307 ``%rax', 0, `%rcx', 1, `%rdx', 2, `%rbx', 3,
308 `%rsp', 4, `%rbp', 5, `%rsi', 6, `%rdi', 7,
309 `%r8', 8, `%r9', 9, `%r10', 10, `%r11', 11,
310 `%r12', 12, `%r13', 13, `%r14', 14, `%r15', 15,
311 `(%rax)',16, `(%rcx)',17, `(%rdx)',18, `(%rbx)',19,
312 `(%rsp)',20, `(%rbp)',21, `(%rsi)',22, `(%rdi)',23,
313 `(%r8)', 24, `(%r9)', 25, `(%r10)',26, `(%r11)',27,
314 `(%r12)',28, `(%r13)',29, `(%r14)',30, `(%r15)',31')
319 dnl mulx(reg1,reg2,reg3)
323 dnl mulx((reg1),reg2,reg3)
325 dnl where reg1 is any register but rsp,rbp,r12,r13, or
327 dnl mulx(off,(reg1),reg2,reg3)
329 dnl where reg1 is any register but rsp,r12.
331 dnl The exceptions are due to special coding needed for some registers; rsp
332 dnl and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
333 dnl offset-less form.
335 dnl Other addressing forms are not handled. Invalid forms are not properly
336 dnl detected. Offsets that don't fit one byte are not handled correctly.
341 ,eval(0xe2^32*regnumh($1)^128*regnumh($3))`'dnl
342 ,eval(0xfb-8*regnum($2))`'dnl
344 ,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1))`'dnl
346 ,eval(0xe2^32*regnumh($2)^128*regnumh($4))`'dnl
347 ,eval(0xfb-8*regnum($3))`'dnl
349 ,eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)))`'dnl
350 ,eval(($1 + 256) % 256)`'dnl
360 dnl adcx((reg1),reg2)
361 dnl adox((reg1),reg2)
363 dnl where reg1 is any register but rsp,rbp,r12,r13, or
365 dnl adcx(off,(reg1),reg2)
366 dnl adox(off,(reg1),reg2)
368 dnl where reg1 is any register but rsp,r12.
370 dnl The exceptions are due to special coding needed for some registers; rsp
371 dnl and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
372 dnl offset-less form.
374 dnl Other addressing forms are not handled. Invalid forms are not properly
375 dnl detected. Offsets that don't fit one byte are not handled correctly.
377 define(`adx_helper',`dnl
378 ,eval(0x48+regnumh($1)+4*regnumh($2))`'dnl
387 ,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($2))-0xc0*ix($1))`'dnl
390 ,eval(0x40+(7 & regnum($2))+8*(7 & regnum($3)))`'dnl
391 ,eval(($1 + 256) % 256)`'dnl