Added vp9_short_idct1_32x32_c
[aom.git] / vpx_ports / x86_abi_support.asm
blob0c9fe377405693d7fcb406d823519d3c2d623a9e
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 %include "vpx_config.asm"
14 ; 32/64 bit compatibility macros
16 ; In general, we make the source use 64 bit syntax, then twiddle with it using
17 ; the preprocessor to get the 32 bit syntax on 32 bit platforms.
19 %ifidn __OUTPUT_FORMAT__,elf32
20 %define ABI_IS_32BIT 1
21 %elifidn __OUTPUT_FORMAT__,macho32
22 %define ABI_IS_32BIT 1
23 %elifidn __OUTPUT_FORMAT__,win32
24 %define ABI_IS_32BIT 1
25 %elifidn __OUTPUT_FORMAT__,aout
26 %define ABI_IS_32BIT 1
27 %else
28 %define ABI_IS_32BIT 0
29 %endif
31 %if ABI_IS_32BIT
32 %define rax eax
33 %define rbx ebx
34 %define rcx ecx
35 %define rdx edx
36 %define rsi esi
37 %define rdi edi
38 %define rsp esp
39 %define rbp ebp
40 %define movsxd mov
41 %macro movq 2
42 %ifidn %1,eax
43 movd %1,%2
44 %elifidn %2,eax
45 movd %1,%2
46 %elifidn %1,ebx
47 movd %1,%2
48 %elifidn %2,ebx
49 movd %1,%2
50 %elifidn %1,ecx
51 movd %1,%2
52 %elifidn %2,ecx
53 movd %1,%2
54 %elifidn %1,edx
55 movd %1,%2
56 %elifidn %2,edx
57 movd %1,%2
58 %elifidn %1,esi
59 movd %1,%2
60 %elifidn %2,esi
61 movd %1,%2
62 %elifidn %1,edi
63 movd %1,%2
64 %elifidn %2,edi
65 movd %1,%2
66 %elifidn %1,esp
67 movd %1,%2
68 %elifidn %2,esp
69 movd %1,%2
70 %elifidn %1,ebp
71 movd %1,%2
72 %elifidn %2,ebp
73 movd %1,%2
74 %else
75 movq %1,%2
76 %endif
77 %endmacro
78 %endif
81 ; sym()
82 ; Return the proper symbol name for the target ABI.
84 ; Certain ABIs, notably MS COFF and Darwin MACH-O, require that symbols
85 ; with C linkage be prefixed with an underscore.
87 %ifidn __OUTPUT_FORMAT__,elf32
88 %define sym(x) x
89 %elifidn __OUTPUT_FORMAT__,elf64
90 %define sym(x) x
91 %elifidn __OUTPUT_FORMAT__,elfx32
92 %define sym(x) x
93 %elifidn __OUTPUT_FORMAT__,x64
94 %define sym(x) x
95 %else
96 %define sym(x) _ %+ x
97 %endif
99 ; PRIVATE
100 ; Macro for the attribute to hide a global symbol for the target ABI.
101 ; This is only active if CHROMIUM is defined.
103 ; Chromium doesn't like exported global symbols due to symbol clashing with
104 ; plugins among other things.
106 ; Requires Chromium's patched copy of yasm:
107 ; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761
108 ; http://www.tortall.net/projects/yasm/ticket/236
110 %ifdef CHROMIUM
111 %ifidn __OUTPUT_FORMAT__,elf32
112 %define PRIVATE :hidden
113 %elifidn __OUTPUT_FORMAT__,elf64
114 %define PRIVATE :hidden
115 %elifidn __OUTPUT_FORMAT__,elfx32
116 %define PRIVATE :hidden
117 %elifidn __OUTPUT_FORMAT__,x64
118 %define PRIVATE
119 %else
120 %define PRIVATE :private_extern
121 %endif
122 %else
123 %define PRIVATE
124 %endif
126 ; arg()
127 ; Return the address specification of the given argument
129 %if ABI_IS_32BIT
130 %define arg(x) [ebp+8+4*x]
131 %else
132 ; 64 bit ABI passes arguments in registers. This is a workaround to get up
133 ; and running quickly. Relies on SHADOW_ARGS_TO_STACK
134 %ifidn __OUTPUT_FORMAT__,x64
135 %define arg(x) [rbp+16+8*x]
136 %else
137 %define arg(x) [rbp-8-8*x]
138 %endif
139 %endif
141 ; REG_SZ_BYTES, REG_SZ_BITS
142 ; Size of a register
143 %if ABI_IS_32BIT
144 %define REG_SZ_BYTES 4
145 %define REG_SZ_BITS 32
146 %else
147 %define REG_SZ_BYTES 8
148 %define REG_SZ_BITS 64
149 %endif
152 ; ALIGN_STACK <alignment> <register>
153 ; This macro aligns the stack to the given alignment (in bytes). The stack
154 ; is left such that the previous value of the stack pointer is the first
155 ; argument on the stack (ie, the inverse of this macro is 'pop rsp.')
156 ; This macro uses one temporary register, which is not preserved, and thus
157 ; must be specified as an argument.
158 %macro ALIGN_STACK 2
159 mov %2, rsp
160 and rsp, -%1
161 lea rsp, [rsp - (%1 - REG_SZ_BYTES)]
162 push %2
163 %endmacro
167 ; The Microsoft assembler tries to impose a certain amount of type safety in
168 ; its register usage. YASM doesn't recognize these directives, so we just
169 ; %define them away to maintain as much compatibility as possible with the
170 ; original inline assembler we're porting from.
172 %idefine PTR
173 %idefine XMMWORD
174 %idefine MMWORD
176 ; PIC macros
178 %if ABI_IS_32BIT
179 %if CONFIG_PIC=1
180 %ifidn __OUTPUT_FORMAT__,elf32
181 %define GET_GOT_SAVE_ARG 1
182 %define WRT_PLT wrt ..plt
183 %macro GET_GOT 1
184 extern _GLOBAL_OFFSET_TABLE_
185 push %1
186 call %%get_got
187 %%sub_offset:
188 jmp %%exitGG
189 %%get_got:
190 mov %1, [esp]
191 add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
193 %%exitGG:
194 %undef GLOBAL
195 %define GLOBAL(x) x + %1 wrt ..gotoff
196 %undef RESTORE_GOT
197 %define RESTORE_GOT pop %1
198 %endmacro
199 %elifidn __OUTPUT_FORMAT__,macho32
200 %define GET_GOT_SAVE_ARG 1
201 %macro GET_GOT 1
202 push %1
203 call %%get_got
204 %%get_got:
205 pop %1
206 %undef GLOBAL
207 %define GLOBAL(x) x + %1 - %%get_got
208 %undef RESTORE_GOT
209 %define RESTORE_GOT pop %1
210 %endmacro
211 %endif
212 %endif
214 %ifdef CHROMIUM
215 %ifidn __OUTPUT_FORMAT__,macho32
216 %define HIDDEN_DATA(x) x:private_extern
217 %else
218 %define HIDDEN_DATA(x) x
219 %endif
220 %else
221 %define HIDDEN_DATA(x) x
222 %endif
223 %else
224 %macro GET_GOT 1
225 %endmacro
226 %define GLOBAL(x) rel x
227 %ifidn __OUTPUT_FORMAT__,elf64
228 %define WRT_PLT wrt ..plt
229 %define HIDDEN_DATA(x) x:data hidden
230 %elifidn __OUTPUT_FORMAT__,elfx32
231 %define WRT_PLT wrt ..plt
232 %define HIDDEN_DATA(x) x:data hidden
233 %else
234 %define HIDDEN_DATA(x) x
235 %endif
236 %endif
237 %ifnmacro GET_GOT
238 %macro GET_GOT 1
239 %endmacro
240 %define GLOBAL(x) x
241 %endif
242 %ifndef RESTORE_GOT
243 %define RESTORE_GOT
244 %endif
245 %ifndef WRT_PLT
246 %define WRT_PLT
247 %endif
249 %if ABI_IS_32BIT
250 %macro SHADOW_ARGS_TO_STACK 1
251 %endm
252 %define UNSHADOW_ARGS
253 %else
254 %ifidn __OUTPUT_FORMAT__,x64
255 %macro SHADOW_ARGS_TO_STACK 1 ; argc
256 %if %1 > 0
257 mov arg(0),rcx
258 %endif
259 %if %1 > 1
260 mov arg(1),rdx
261 %endif
262 %if %1 > 2
263 mov arg(2),r8
264 %endif
265 %if %1 > 3
266 mov arg(3),r9
267 %endif
268 %endm
269 %else
270 %macro SHADOW_ARGS_TO_STACK 1 ; argc
271 %if %1 > 0
272 push rdi
273 %endif
274 %if %1 > 1
275 push rsi
276 %endif
277 %if %1 > 2
278 push rdx
279 %endif
280 %if %1 > 3
281 push rcx
282 %endif
283 %if %1 > 4
284 push r8
285 %endif
286 %if %1 > 5
287 push r9
288 %endif
289 %if %1 > 6
290 %assign i %1-6
291 %assign off 16
292 %rep i
293 mov rax,[rbp+off]
294 push rax
295 %assign off off+8
296 %endrep
297 %endif
298 %endm
299 %endif
300 %define UNSHADOW_ARGS mov rsp, rbp
301 %endif
303 ; Win64 ABI requires that XMM6:XMM15 are callee saved
304 ; SAVE_XMM n, [u]
305 ; store registers 6-n on the stack
306 ; if u is specified, use unaligned movs.
307 ; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return
308 ; value. Typically we follow this up with 'push rbp' - re-aligning the stack -
309 ; but in some cases this is not done and unaligned movs must be used.
310 %ifidn __OUTPUT_FORMAT__,x64
311 %macro SAVE_XMM 1-2 a
312 %if %1 < 6
313 %error Only xmm registers 6-15 must be preserved
314 %else
315 %assign last_xmm %1
316 %define movxmm movdq %+ %2
317 %assign xmm_stack_space ((last_xmm - 5) * 16)
318 sub rsp, xmm_stack_space
319 %assign i 6
320 %rep (last_xmm - 5)
321 movxmm [rsp + ((i - 6) * 16)], xmm %+ i
322 %assign i i+1
323 %endrep
324 %endif
325 %endmacro
326 %macro RESTORE_XMM 0
327 %ifndef last_xmm
328 %error RESTORE_XMM must be paired with SAVE_XMM n
329 %else
330 %assign i last_xmm
331 %rep (last_xmm - 5)
332 movxmm xmm %+ i, [rsp +((i - 6) * 16)]
333 %assign i i-1
334 %endrep
335 add rsp, xmm_stack_space
336 ; there are a couple functions which return from multiple places.
337 ; otherwise, we could uncomment these:
338 ; %undef last_xmm
339 ; %undef xmm_stack_space
340 ; %undef movxmm
341 %endif
342 %endmacro
343 %else
344 %macro SAVE_XMM 1-2
345 %endmacro
346 %macro RESTORE_XMM 0
347 %endmacro
348 %endif
350 ; Name of the rodata section
352 ; .rodata seems to be an elf-ism, as it doesn't work on OSX.
354 %ifidn __OUTPUT_FORMAT__,macho64
355 %define SECTION_RODATA section .text
356 %elifidn __OUTPUT_FORMAT__,macho32
357 %macro SECTION_RODATA 0
358 section .text
359 %endmacro
360 %elifidn __OUTPUT_FORMAT__,aout
361 %define SECTION_RODATA section .data
362 %else
363 %define SECTION_RODATA section .rodata
364 %endif
367 ; Tell GNU ld that we don't require an executable stack.
368 %ifidn __OUTPUT_FORMAT__,elf32
369 section .note.GNU-stack noalloc noexec nowrite progbits
370 section .text
371 %elifidn __OUTPUT_FORMAT__,elf64
372 section .note.GNU-stack noalloc noexec nowrite progbits
373 section .text
374 %elifidn __OUTPUT_FORMAT__,elfx32
375 section .note.GNU-stack noalloc noexec nowrite progbits
376 section .text
377 %endif