beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / fat / fat_entry.asm
blob25655cfbff9a8cc78d50747c96c682d7941c781a
1 dnl x86 fat binary entrypoints.
3 dnl Copyright 2003, 2012, 2014 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 dnl Forcibly disable profiling.
35 dnl
36 dnl The entrypoints and inits are small enough not to worry about, the real
37 dnl routines arrived at will have any profiling. Also, the way the code
38 dnl here ends with a jump means we won't work properly with the
39 dnl "instrument" profiling scheme anyway.
41 define(`WANT_PROFILING',no)
44 TEXT
47 dnl Usage: FAT_ENTRY(name, offset)
48 dnl
49 dnl Emit a fat binary entrypoint function of the given name. This is the
50 dnl normal entry for applications, eg. __gmpn_add_n.
51 dnl
52 dnl The code simply jumps through the function pointer in __gmpn_cpuvec at
53 dnl the given "offset" (in bytes).
54 dnl
55 dnl For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be
56 dnl fine for all x86s.
57 dnl
58 dnl For PIC, the jumps are 20 bytes each, and are best aligned to 16 to
59 dnl ensure at least the first two instructions don't cross a cache line
60 dnl boundary.
61 dnl
62 dnl Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE
63 dnl grepping in configure, stopping that code trying to eval something with
64 dnl $1 in it.
66 define(FAT_ENTRY,
67 m4_assert_numargs(2)
68 ` ALIGN(ifdef(`PIC',16,8))
69 `'PROLOGUE($1)dnl
70 ifdef(`PIC',`dnl
71 ifdef(`DARWIN',`
72 call L(movl_eip_edx)
73 movl L(___gmpn_cpuvec)$non_lazy_ptr-.(%edx), %edx
74 jmp *m4_empty_if_zero($2)(%edx)
75 ',`dnl
76 call L(movl_eip_edx)
77 L(entry_here$2):
78 addl $_GLOBAL_OFFSET_TABLE_+[.-L(entry_here$2)], %edx
79 movl GSYM_PREFIX`'__gmpn_cpuvec@GOT(%edx), %edx
80 jmp *m4_empty_if_zero($2)(%edx)
82 ',`dnl non-PIC
83 jmp *GSYM_PREFIX`'__gmpn_cpuvec+$2
85 EPILOGUE()
89 dnl FAT_ENTRY for each CPUVEC_FUNCS_LIST
90 dnl
92 define(`CPUVEC_offset',0)
93 foreach(i,
94 `FAT_ENTRY(MPN(i),CPUVEC_offset)
95 define(`CPUVEC_offset',eval(CPUVEC_offset + 4))',
96 CPUVEC_FUNCS_LIST)
98 ifdef(`PIC',`
99 ALIGN(8)
100 L(movl_eip_edx):
101 movl (%esp), %edx
102 ret_internal
103 ifdef(`DARWIN',`
104 .section __IMPORT,__pointers,non_lazy_symbol_pointers
105 L(___gmpn_cpuvec)$non_lazy_ptr:
106 .indirect_symbol ___gmpn_cpuvec
107 .long 0
108 TEXT
113 dnl Usage: FAT_INIT(name, offset)
115 dnl Emit a fat binary initializer function of the given name. These
116 dnl functions are the initial values for the pointers in __gmpn_cpuvec.
118 dnl The code simply calls __gmpn_cpuvec_init, and then jumps back through
119 dnl the __gmpn_cpuvec pointer, at the given "offset" (in bytes).
120 dnl __gmpn_cpuvec_init will have stored the address of the selected
121 dnl implementation there.
123 dnl Only one of these routines will be executed, and only once, since after
124 dnl that all the __gmpn_cpuvec pointers go to real routines. So there's no
125 dnl need for anything special here, just something small and simple. To
126 dnl keep code size down, "fat_init" is a shared bit of code, arrived at
127 dnl with the offset in %al. %al is used since the movb instruction is 2
128 dnl bytes where %eax would be 4.
130 dnl Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the
131 dnl HAVE_NATIVE grepping in configure, preventing that code trying to eval
132 dnl something with $1 in it.
134 define(FAT_INIT,
135 m4_assert_numargs(2)
136 `PROLOGUE($1)dnl
137 movb $`'$2, %al
138 jmp L(fat_init)
139 EPILOGUE()
142 L(fat_init):
143 C al __gmpn_cpuvec byte offset
145 movzbl %al, %eax
146 pushl %eax
148 ifdef(`PIC',`dnl
149 ifdef(`DARWIN',`
150 sub $8, %esp
151 CALL( __gmpn_cpuvec_init)
152 add $8, %esp
153 call L(movl_eip_edx)
154 movl L(___gmpn_cpuvec)$non_lazy_ptr-.(%edx), %edx
155 ',`dnl
156 pushl %ebx
157 call L(movl_eip_ebx)
158 L(init_here):
159 addl $_GLOBAL_OFFSET_TABLE_+[.-L(init_here)], %ebx
160 CALL( __gmpn_cpuvec_init)
161 movl GSYM_PREFIX`'__gmpn_cpuvec@GOT(%ebx), %edx
162 popl %ebx
164 popl %eax
165 jmp *(%edx,%eax)
167 L(movl_eip_ebx):
168 movl (%esp), %ebx
169 ret_internal
170 ',`dnl non-PIC
171 sub $8, %esp C needed on Darwin, harmless elsewhere
172 CALL( __gmpn_cpuvec_init)
173 add $8, %esp C needed on Darwin, harmless elsewhere
174 popl %eax
175 jmp *GSYM_PREFIX`'__gmpn_cpuvec(%eax)
178 dnl FAT_INIT for each CPUVEC_FUNCS_LIST
181 define(`CPUVEC_offset',0)
182 foreach(i,
183 `FAT_INIT(MPN(i`'_init),CPUVEC_offset)
184 define(`CPUVEC_offset',eval(CPUVEC_offset + 4))',
185 CPUVEC_FUNCS_LIST)
189 C long __gmpn_cpuid (char dst[12], int id);
191 C This is called only once, so just something simple and compact is fine.
193 defframe(PARAM_ID, 8)
194 defframe(PARAM_DST, 4)
195 deflit(`FRAME',0)
197 PROLOGUE(__gmpn_cpuid)
198 pushl %esi FRAME_pushl()
199 pushl %ebx FRAME_pushl()
200 movl PARAM_ID, %eax
201 cpuid
202 movl PARAM_DST, %esi
203 movl %ebx, (%esi)
204 movl %edx, 4(%esi)
205 movl %ecx, 8(%esi)
206 popl %ebx
207 popl %esi
209 EPILOGUE()
212 C int __gmpn_cpuid_available (void);
214 C Return non-zero if the cpuid instruction is available, which means late
215 C model 80486 and higher. 80386 and early 80486 don't have cpuid.
217 C The test follows Intel AP-485 application note, namely that if bit 21 is
218 C modifiable then cpuid is supported. This test is reentrant and thread
219 C safe, since of course any interrupt or context switch will preserve the
220 C flags while we're tinkering with them.
222 C This is called only once, so just something simple and compact is fine.
224 PROLOGUE(__gmpn_cpuid_available)
225 pushf
226 popl %ecx C old flags
228 movl %ecx, %edx
229 xorl $0x200000, %edx
230 pushl %edx
231 popf
232 pushf
233 popl %edx C tweaked flags
235 movl $1, %eax
236 cmpl %ecx, %edx
237 jne L(available)
238 xorl %eax, %eax C not changed, so cpuid not available
240 L(available):
242 EPILOGUE()
243 ASM_END()