hadamard: Add 4x4 test.
[aom.git] / aom_ports / x86_abi_support.asm
blobf1a65f53e54172ffa99700e819283dbf29cd079c
2 ; Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 ; This source code is subject to the terms of the BSD 2 Clause License and
5 ; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 ; was not distributed with this source code in the LICENSE file, you can
7 ; obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 ; Media Patent License 1.0 was not distributed with this source code in the
9 ; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 %include "config/aom_config.asm"
17 ; 32/64 bit compatibility macros
19 ; In general, we make the source use 64 bit syntax, then twiddle with it using
20 ; the preprocessor to get the 32 bit syntax on 32 bit platforms.
22 %ifidn __OUTPUT_FORMAT__,elf32
23 %define ABI_IS_32BIT 1
24 %elifidn __OUTPUT_FORMAT__,macho32
25 %define ABI_IS_32BIT 1
26 %elifidn __OUTPUT_FORMAT__,win32
27 %define ABI_IS_32BIT 1
28 %elifidn __OUTPUT_FORMAT__,aout
29 %define ABI_IS_32BIT 1
30 %else
31 %define ABI_IS_32BIT 0
32 %endif
34 %if ABI_IS_32BIT
35 %define rax eax
36 %define rbx ebx
37 %define rcx ecx
38 %define rdx edx
39 %define rsi esi
40 %define rdi edi
41 %define rsp esp
42 %define rbp ebp
43 %define movsxd mov
44 %macro movq 2
45 %ifidn %1,eax
46 movd %1,%2
47 %elifidn %2,eax
48 movd %1,%2
49 %elifidn %1,ebx
50 movd %1,%2
51 %elifidn %2,ebx
52 movd %1,%2
53 %elifidn %1,ecx
54 movd %1,%2
55 %elifidn %2,ecx
56 movd %1,%2
57 %elifidn %1,edx
58 movd %1,%2
59 %elifidn %2,edx
60 movd %1,%2
61 %elifidn %1,esi
62 movd %1,%2
63 %elifidn %2,esi
64 movd %1,%2
65 %elifidn %1,edi
66 movd %1,%2
67 %elifidn %2,edi
68 movd %1,%2
69 %elifidn %1,esp
70 movd %1,%2
71 %elifidn %2,esp
72 movd %1,%2
73 %elifidn %1,ebp
74 movd %1,%2
75 %elifidn %2,ebp
76 movd %1,%2
77 %else
78 movq %1,%2
79 %endif
80 %endmacro
81 %endif
84 ; LIBAOM_YASM_WIN64
85 ; Set LIBAOM_YASM_WIN64 if output is Windows 64bit so the code will work if x64
86 ; or win64 is defined on the Yasm command line.
87 %ifidn __OUTPUT_FORMAT__,win64
88 %define LIBAOM_YASM_WIN64 1
89 %elifidn __OUTPUT_FORMAT__,x64
90 %define LIBAOM_YASM_WIN64 1
91 %else
92 %define LIBAOM_YASM_WIN64 0
93 %endif
95 ; Declare groups of platforms
96 %ifidn __OUTPUT_FORMAT__,elf32
97 %define LIBAOM_ELF 1
98 %elifidn __OUTPUT_FORMAT__,elfx32
99 %define LIBAOM_ELF 1
100 %elifidn __OUTPUT_FORMAT__,elf64
101 %define LIBAOM_ELF 1
102 %else
103 %define LIBAOM_ELF 0
104 %endif
106 %ifidn __OUTPUT_FORMAT__,macho32
107 %define LIBAOM_MACHO 1
108 %elifidn __OUTPUT_FORMAT__,macho64
109 %define LIBAOM_MACHO 1
110 %else
111 %define LIBAOM_MACHO 0
112 %endif
114 ; sym()
115 ; Return the proper symbol name for the target ABI.
117 ; Certain ABIs, notably MS COFF and Darwin MACH-O, require that symbols
118 ; with C linkage be prefixed with an underscore.
120 %if LIBAOM_ELF || LIBAOM_YASM_WIN64
121 %define sym(x) x
122 %else
123 ; Mach-O / COFF
124 %define sym(x) _ %+ x
125 %endif
127 ; globalsym()
128 ; Return a global declaration with the proper decoration for the target ABI.
130 ; When CHROMIUM is defined, include attributes to hide the symbol from the
131 ; global namespace.
133 ; Chromium doesn't like exported global symbols due to symbol clashing with
134 ; plugins among other things.
136 ; Requires Chromium's patched copy of yasm:
137 ; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761
138 ; http://www.tortall.net/projects/yasm/ticket/236
139 ; or nasm > 2.14.
141 %ifdef CHROMIUM
142 %ifdef __NASM_VER__
143 %if __NASM_VERSION_ID__ < 0x020e0000 ; 2.14
144 ; nasm < 2.14 does not support :private_extern directive
145 %fatal Must use nasm 2.14 or newer
146 %endif
147 %endif
149 %if LIBAOM_ELF
150 %define globalsym(x) global sym(x) %+ :function hidden
151 %elif LIBAOM_MACHO
152 %define globalsym(x) global sym(x) %+ :private_extern
153 %else
154 ; COFF / PE32+
155 %define globalsym(x) global sym(x)
156 %endif
157 %else
158 %define globalsym(x) global sym(x)
159 %endif
161 ; arg()
162 ; Return the address specification of the given argument
164 %if ABI_IS_32BIT
165 %define arg(x) [ebp+8+4*x]
166 %else
167 ; 64 bit ABI passes arguments in registers. This is a workaround to get up
168 ; and running quickly. Relies on SHADOW_ARGS_TO_STACK
169 %if LIBAOM_YASM_WIN64
170 %define arg(x) [rbp+16+8*x]
171 %else
172 %define arg(x) [rbp-8-8*x]
173 %endif
174 %endif
176 ; REG_SZ_BYTES, REG_SZ_BITS
177 ; Size of a register
178 %if ABI_IS_32BIT
179 %define REG_SZ_BYTES 4
180 %define REG_SZ_BITS 32
181 %else
182 %define REG_SZ_BYTES 8
183 %define REG_SZ_BITS 64
184 %endif
187 ; ALIGN_STACK <alignment> <register>
188 ; This macro aligns the stack to the given alignment (in bytes). The stack
189 ; is left such that the previous value of the stack pointer is the first
190 ; argument on the stack (ie, the inverse of this macro is 'pop rsp.')
191 ; This macro uses one temporary register, which is not preserved, and thus
192 ; must be specified as an argument.
193 %macro ALIGN_STACK 2
194 mov %2, rsp
195 and rsp, -%1
196 lea rsp, [rsp - (%1 - REG_SZ_BYTES)]
197 push %2
198 %endmacro
202 ; The Microsoft assembler tries to impose a certain amount of type safety in
203 ; its register usage. YASM doesn't recognize these directives, so we just
204 ; %define them away to maintain as much compatibility as possible with the
205 ; original inline assembler we're porting from.
207 %idefine PTR
208 %idefine XMMWORD
209 %idefine MMWORD
211 ; PIC macros
213 %if ABI_IS_32BIT
214 %if CONFIG_PIC=1
215 %ifidn __OUTPUT_FORMAT__,elf32
216 %define WRT_PLT wrt ..plt
217 %macro GET_GOT 1
218 extern _GLOBAL_OFFSET_TABLE_
219 push %1
220 call %%get_got
221 %%sub_offset:
222 jmp %%exitGG
223 %%get_got:
224 mov %1, [esp]
225 add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
227 %%exitGG:
228 %undef GLOBAL
229 %define GLOBAL(x) x + %1 wrt ..gotoff
230 %undef RESTORE_GOT
231 %define RESTORE_GOT pop %1
232 %endmacro
233 %elifidn __OUTPUT_FORMAT__,macho32
234 %macro GET_GOT 1
235 push %1
236 call %%get_got
237 %%get_got:
238 pop %1
239 %undef GLOBAL
240 %define GLOBAL(x) x + %1 - %%get_got
241 %undef RESTORE_GOT
242 %define RESTORE_GOT pop %1
243 %endmacro
244 %endif
245 %endif
247 %ifdef CHROMIUM
248 %ifidn __OUTPUT_FORMAT__,macho32
249 %define HIDDEN_DATA(x) x:private_extern
250 %else
251 %define HIDDEN_DATA(x) x
252 %endif
253 %else
254 %define HIDDEN_DATA(x) x
255 %endif
256 %else
257 %macro GET_GOT 1
258 %endmacro
259 %define GLOBAL(x) rel x
260 %ifidn __OUTPUT_FORMAT__,elf64
261 %define WRT_PLT wrt ..plt
262 %define HIDDEN_DATA(x) x:data hidden
263 %elifidn __OUTPUT_FORMAT__,elfx32
264 %define WRT_PLT wrt ..plt
265 %define HIDDEN_DATA(x) x:data hidden
266 %elifidn __OUTPUT_FORMAT__,macho64
267 %ifdef CHROMIUM
268 %define HIDDEN_DATA(x) x:private_extern
269 %else
270 %define HIDDEN_DATA(x) x
271 %endif
272 %else
273 %define HIDDEN_DATA(x) x
274 %endif
275 %endif
276 %ifnmacro GET_GOT
277 %macro GET_GOT 1
278 %endmacro
279 %define GLOBAL(x) x
280 %endif
281 %ifndef RESTORE_GOT
282 %define RESTORE_GOT
283 %endif
284 %ifndef WRT_PLT
285 %define WRT_PLT
286 %endif
288 %if ABI_IS_32BIT
289 %macro SHADOW_ARGS_TO_STACK 1
290 %endm
291 %define UNSHADOW_ARGS
292 %else
293 %if LIBAOM_YASM_WIN64
294 %macro SHADOW_ARGS_TO_STACK 1 ; argc
295 %if %1 > 0
296 mov arg(0),rcx
297 %endif
298 %if %1 > 1
299 mov arg(1),rdx
300 %endif
301 %if %1 > 2
302 mov arg(2),r8
303 %endif
304 %if %1 > 3
305 mov arg(3),r9
306 %endif
307 %endm
308 %else
309 %macro SHADOW_ARGS_TO_STACK 1 ; argc
310 %if %1 > 0
311 push rdi
312 %endif
313 %if %1 > 1
314 push rsi
315 %endif
316 %if %1 > 2
317 push rdx
318 %endif
319 %if %1 > 3
320 push rcx
321 %endif
322 %if %1 > 4
323 push r8
324 %endif
325 %if %1 > 5
326 push r9
327 %endif
328 %if %1 > 6
329 %assign i %1-6
330 %assign off 16
331 %rep i
332 mov rax,[rbp+off]
333 push rax
334 %assign off off+8
335 %endrep
336 %endif
337 %endm
338 %endif
339 %define UNSHADOW_ARGS mov rsp, rbp
340 %endif
342 ; Win64 ABI requires that XMM6:XMM15 are callee saved
343 ; SAVE_XMM n, [u]
344 ; store registers 6-n on the stack
345 ; if u is specified, use unaligned movs.
346 ; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return
347 ; value. Typically we follow this up with 'push rbp' - re-aligning the stack -
348 ; but in some cases this is not done and unaligned movs must be used.
349 %if LIBAOM_YASM_WIN64
350 %macro SAVE_XMM 1-2 a
351 %if %1 < 6
352 %error Only xmm registers 6-15 must be preserved
353 %else
354 %assign last_xmm %1
355 %define movxmm movdq %+ %2
356 %assign xmm_stack_space ((last_xmm - 5) * 16)
357 sub rsp, xmm_stack_space
358 %assign i 6
359 %rep (last_xmm - 5)
360 movxmm [rsp + ((i - 6) * 16)], xmm %+ i
361 %assign i i+1
362 %endrep
363 %endif
364 %endmacro
365 %macro RESTORE_XMM 0
366 %ifndef last_xmm
367 %error RESTORE_XMM must be paired with SAVE_XMM n
368 %else
369 %assign i last_xmm
370 %rep (last_xmm - 5)
371 movxmm xmm %+ i, [rsp +((i - 6) * 16)]
372 %assign i i-1
373 %endrep
374 add rsp, xmm_stack_space
375 ; there are a couple functions which return from multiple places.
376 ; otherwise, we could uncomment these:
377 ; %undef last_xmm
378 ; %undef xmm_stack_space
379 ; %undef movxmm
380 %endif
381 %endmacro
382 %else
383 %macro SAVE_XMM 1-2
384 %endmacro
385 %macro RESTORE_XMM 0
386 %endmacro
387 %endif
389 ; Name of the rodata section
391 ; .rodata seems to be an elf-ism, as it doesn't work on OSX.
393 %ifidn __OUTPUT_FORMAT__,macho64
394 %define SECTION_RODATA section .text
395 %elifidn __OUTPUT_FORMAT__,macho32
396 %macro SECTION_RODATA 0
397 section .text
398 %endmacro
399 %elifidn __OUTPUT_FORMAT__,aout
400 %define SECTION_RODATA section .data
401 %else
402 %define SECTION_RODATA section .rodata
403 %endif
406 ; Tell GNU ld that we don't require an executable stack.
407 %ifidn __OUTPUT_FORMAT__,elf32
408 section .note.GNU-stack noalloc noexec nowrite progbits
409 section .text
410 %elifidn __OUTPUT_FORMAT__,elf64
411 section .note.GNU-stack noalloc noexec nowrite progbits
412 section .text
413 %elifidn __OUTPUT_FORMAT__,elfx32
414 section .note.GNU-stack noalloc noexec nowrite progbits
415 section .text
416 %endif