Add sse2/ssse3 intra predictors for 16x4
[aom.git] / aom_ports / x86_abi_support.asm
blob6aeee60a06cfc58d609e4b55bb3894f55e1b195c
2 ; Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 ; This source code is subject to the terms of the BSD 2 Clause License and
5 ; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 ; was not distributed with this source code in the LICENSE file, you can
7 ; obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 ; Media Patent License 1.0 was not distributed with this source code in the
9 ; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 %include "aom_config.asm"
17 ; 32/64 bit compatibility macros
19 ; In general, we make the source use 64 bit syntax, then twiddle with it using
20 ; the preprocessor to get the 32 bit syntax on 32 bit platforms.
22 %ifidn __OUTPUT_FORMAT__,elf32
23 %define ABI_IS_32BIT 1
24 %elifidn __OUTPUT_FORMAT__,macho32
25 %define ABI_IS_32BIT 1
26 %elifidn __OUTPUT_FORMAT__,win32
27 %define ABI_IS_32BIT 1
28 %elifidn __OUTPUT_FORMAT__,aout
29 %define ABI_IS_32BIT 1
30 %else
31 %define ABI_IS_32BIT 0
32 %endif
34 %if ABI_IS_32BIT
35 %define rax eax
36 %define rbx ebx
37 %define rcx ecx
38 %define rdx edx
39 %define rsi esi
40 %define rdi edi
41 %define rsp esp
42 %define rbp ebp
43 %define movsxd mov
44 %macro movq 2
45 %ifidn %1,eax
46 movd %1,%2
47 %elifidn %2,eax
48 movd %1,%2
49 %elifidn %1,ebx
50 movd %1,%2
51 %elifidn %2,ebx
52 movd %1,%2
53 %elifidn %1,ecx
54 movd %1,%2
55 %elifidn %2,ecx
56 movd %1,%2
57 %elifidn %1,edx
58 movd %1,%2
59 %elifidn %2,edx
60 movd %1,%2
61 %elifidn %1,esi
62 movd %1,%2
63 %elifidn %2,esi
64 movd %1,%2
65 %elifidn %1,edi
66 movd %1,%2
67 %elifidn %2,edi
68 movd %1,%2
69 %elifidn %1,esp
70 movd %1,%2
71 %elifidn %2,esp
72 movd %1,%2
73 %elifidn %1,ebp
74 movd %1,%2
75 %elifidn %2,ebp
76 movd %1,%2
77 %else
78 movq %1,%2
79 %endif
80 %endmacro
81 %endif
84 ; LIBAOM_YASM_WIN64
85 ; Set LIBAOM_YASM_WIN64 if output is Windows 64bit so the code will work if x64
86 ; or win64 is defined on the Yasm command line.
87 %ifidn __OUTPUT_FORMAT__,win64
88 %define LIBAOM_YASM_WIN64 1
89 %elifidn __OUTPUT_FORMAT__,x64
90 %define LIBAOM_YASM_WIN64 1
91 %else
92 %define LIBAOM_YASM_WIN64 0
93 %endif
95 ; sym()
96 ; Return the proper symbol name for the target ABI.
98 ; Certain ABIs, notably MS COFF and Darwin MACH-O, require that symbols
99 ; with C linkage be prefixed with an underscore.
101 %ifidn __OUTPUT_FORMAT__,elf32
102 %define sym(x) x
103 %elifidn __OUTPUT_FORMAT__,elf64
104 %define sym(x) x
105 %elifidn __OUTPUT_FORMAT__,elfx32
106 %define sym(x) x
107 %elif LIBAOM_YASM_WIN64
108 %define sym(x) x
109 %else
110 %define sym(x) _ %+ x
111 %endif
113 ; PRIVATE
114 ; Macro for the attribute to hide a global symbol for the target ABI.
115 ; This is only active if CHROMIUM is defined.
117 ; Chromium doesn't like exported global symbols due to symbol clashing with
118 ; plugins among other things.
120 ; Requires Chromium's patched copy of yasm:
121 ; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761
122 ; http://www.tortall.net/projects/yasm/ticket/236
124 %ifdef CHROMIUM
125 %ifidn __OUTPUT_FORMAT__,elf32
126 %define PRIVATE :hidden
127 %elifidn __OUTPUT_FORMAT__,elf64
128 %define PRIVATE :hidden
129 %elifidn __OUTPUT_FORMAT__,elfx32
130 %define PRIVATE :hidden
131 %elif LIBAOM_YASM_WIN64
132 %define PRIVATE
133 %else
134 %define PRIVATE :private_extern
135 %endif
136 %else
137 %define PRIVATE
138 %endif
140 ; arg()
141 ; Return the address specification of the given argument
143 %if ABI_IS_32BIT
144 %define arg(x) [ebp+8+4*x]
145 %else
146 ; 64 bit ABI passes arguments in registers. This is a workaround to get up
147 ; and running quickly. Relies on SHADOW_ARGS_TO_STACK
148 %if LIBAOM_YASM_WIN64
149 %define arg(x) [rbp+16+8*x]
150 %else
151 %define arg(x) [rbp-8-8*x]
152 %endif
153 %endif
155 ; REG_SZ_BYTES, REG_SZ_BITS
156 ; Size of a register
157 %if ABI_IS_32BIT
158 %define REG_SZ_BYTES 4
159 %define REG_SZ_BITS 32
160 %else
161 %define REG_SZ_BYTES 8
162 %define REG_SZ_BITS 64
163 %endif
166 ; ALIGN_STACK <alignment> <register>
167 ; This macro aligns the stack to the given alignment (in bytes). The stack
168 ; is left such that the previous value of the stack pointer is the first
169 ; argument on the stack (ie, the inverse of this macro is 'pop rsp.')
170 ; This macro uses one temporary register, which is not preserved, and thus
171 ; must be specified as an argument.
172 %macro ALIGN_STACK 2
173 mov %2, rsp
174 and rsp, -%1
175 lea rsp, [rsp - (%1 - REG_SZ_BYTES)]
176 push %2
177 %endmacro
181 ; The Microsoft assembler tries to impose a certain amount of type safety in
182 ; its register usage. YASM doesn't recognize these directives, so we just
183 ; %define them away to maintain as much compatibility as possible with the
184 ; original inline assembler we're porting from.
186 %idefine PTR
187 %idefine XMMWORD
188 %idefine MMWORD
190 ; PIC macros
192 %if ABI_IS_32BIT
193 %if CONFIG_PIC=1
194 %ifidn __OUTPUT_FORMAT__,elf32
195 %define WRT_PLT wrt ..plt
196 %macro GET_GOT 1
197 extern _GLOBAL_OFFSET_TABLE_
198 push %1
199 call %%get_got
200 %%sub_offset:
201 jmp %%exitGG
202 %%get_got:
203 mov %1, [esp]
204 add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
206 %%exitGG:
207 %undef GLOBAL
208 %define GLOBAL(x) x + %1 wrt ..gotoff
209 %undef RESTORE_GOT
210 %define RESTORE_GOT pop %1
211 %endmacro
212 %elifidn __OUTPUT_FORMAT__,macho32
213 %macro GET_GOT 1
214 push %1
215 call %%get_got
216 %%get_got:
217 pop %1
218 %undef GLOBAL
219 %define GLOBAL(x) x + %1 - %%get_got
220 %undef RESTORE_GOT
221 %define RESTORE_GOT pop %1
222 %endmacro
223 %endif
224 %endif
226 %ifdef CHROMIUM
227 %ifidn __OUTPUT_FORMAT__,macho32
228 %define HIDDEN_DATA(x) x:private_extern
229 %else
230 %define HIDDEN_DATA(x) x
231 %endif
232 %else
233 %define HIDDEN_DATA(x) x
234 %endif
235 %else
236 %macro GET_GOT 1
237 %endmacro
238 %define GLOBAL(x) rel x
239 %ifidn __OUTPUT_FORMAT__,elf64
240 %define WRT_PLT wrt ..plt
241 %define HIDDEN_DATA(x) x:data hidden
242 %elifidn __OUTPUT_FORMAT__,elfx32
243 %define WRT_PLT wrt ..plt
244 %define HIDDEN_DATA(x) x:data hidden
245 %elifidn __OUTPUT_FORMAT__,macho64
246 %ifdef CHROMIUM
247 %define HIDDEN_DATA(x) x:private_extern
248 %else
249 %define HIDDEN_DATA(x) x
250 %endif
251 %else
252 %define HIDDEN_DATA(x) x
253 %endif
254 %endif
255 %ifnmacro GET_GOT
256 %macro GET_GOT 1
257 %endmacro
258 %define GLOBAL(x) x
259 %endif
260 %ifndef RESTORE_GOT
261 %define RESTORE_GOT
262 %endif
263 %ifndef WRT_PLT
264 %define WRT_PLT
265 %endif
267 %if ABI_IS_32BIT
268 %macro SHADOW_ARGS_TO_STACK 1
269 %endm
270 %define UNSHADOW_ARGS
271 %else
272 %if LIBAOM_YASM_WIN64
273 %macro SHADOW_ARGS_TO_STACK 1 ; argc
274 %if %1 > 0
275 mov arg(0),rcx
276 %endif
277 %if %1 > 1
278 mov arg(1),rdx
279 %endif
280 %if %1 > 2
281 mov arg(2),r8
282 %endif
283 %if %1 > 3
284 mov arg(3),r9
285 %endif
286 %endm
287 %else
288 %macro SHADOW_ARGS_TO_STACK 1 ; argc
289 %if %1 > 0
290 push rdi
291 %endif
292 %if %1 > 1
293 push rsi
294 %endif
295 %if %1 > 2
296 push rdx
297 %endif
298 %if %1 > 3
299 push rcx
300 %endif
301 %if %1 > 4
302 push r8
303 %endif
304 %if %1 > 5
305 push r9
306 %endif
307 %if %1 > 6
308 %assign i %1-6
309 %assign off 16
310 %rep i
311 mov rax,[rbp+off]
312 push rax
313 %assign off off+8
314 %endrep
315 %endif
316 %endm
317 %endif
318 %define UNSHADOW_ARGS mov rsp, rbp
319 %endif
321 ; Win64 ABI requires that XMM6:XMM15 are callee saved
322 ; SAVE_XMM n, [u]
323 ; store registers 6-n on the stack
324 ; if u is specified, use unaligned movs.
325 ; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return
326 ; value. Typically we follow this up with 'push rbp' - re-aligning the stack -
327 ; but in some cases this is not done and unaligned movs must be used.
328 %if LIBAOM_YASM_WIN64
329 %macro SAVE_XMM 1-2 a
330 %if %1 < 6
331 %error Only xmm registers 6-15 must be preserved
332 %else
333 %assign last_xmm %1
334 %define movxmm movdq %+ %2
335 %assign xmm_stack_space ((last_xmm - 5) * 16)
336 sub rsp, xmm_stack_space
337 %assign i 6
338 %rep (last_xmm - 5)
339 movxmm [rsp + ((i - 6) * 16)], xmm %+ i
340 %assign i i+1
341 %endrep
342 %endif
343 %endmacro
344 %macro RESTORE_XMM 0
345 %ifndef last_xmm
346 %error RESTORE_XMM must be paired with SAVE_XMM n
347 %else
348 %assign i last_xmm
349 %rep (last_xmm - 5)
350 movxmm xmm %+ i, [rsp +((i - 6) * 16)]
351 %assign i i-1
352 %endrep
353 add rsp, xmm_stack_space
354 ; there are a couple functions which return from multiple places.
355 ; otherwise, we could uncomment these:
356 ; %undef last_xmm
357 ; %undef xmm_stack_space
358 ; %undef movxmm
359 %endif
360 %endmacro
361 %else
362 %macro SAVE_XMM 1-2
363 %endmacro
364 %macro RESTORE_XMM 0
365 %endmacro
366 %endif
368 ; Name of the rodata section
370 ; .rodata seems to be an elf-ism, as it doesn't work on OSX.
372 %ifidn __OUTPUT_FORMAT__,macho64
373 %define SECTION_RODATA section .text
374 %elifidn __OUTPUT_FORMAT__,macho32
375 %macro SECTION_RODATA 0
376 section .text
377 %endmacro
378 %elifidn __OUTPUT_FORMAT__,aout
379 %define SECTION_RODATA section .data
380 %else
381 %define SECTION_RODATA section .rodata
382 %endif
385 ; Tell GNU ld that we don't require an executable stack.
386 %ifidn __OUTPUT_FORMAT__,elf32
387 section .note.GNU-stack noalloc noexec nowrite progbits
388 section .text
389 %elifidn __OUTPUT_FORMAT__,elf64
390 section .note.GNU-stack noalloc noexec nowrite progbits
391 section .text
392 %elifidn __OUTPUT_FORMAT__,elfx32
393 section .note.GNU-stack noalloc noexec nowrite progbits
394 section .text
395 %endif