ARM: Use movw/movt more when available
[glibc.git] / sysdeps / arm / sysdep.h
blob8614b4a0581df440dc02eaa5fd762944bc7412c9
1 /* Assembler macros for ARM.
2 Copyright (C) 1997-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <http://www.gnu.org/licenses/>. */
19 #include <sysdeps/generic/sysdep.h>
20 #include <features.h>
22 #ifndef __ASSEMBLER__
23 # include <stdint.h>
24 #else
25 # include <arm-features.h>
26 #endif
28 /* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */
29 #ifndef __ARM_ARCH
30 # ifdef __ARM_ARCH_2__
31 # define __ARM_ARCH 2
32 # elif defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__)
33 # define __ARM_ARCH 3
34 # elif defined (__ARM_ARCH_4__) || defined (__ARM_ARCH_4T__)
35 # define __ARM_ARCH 4
36 # elif defined (__ARM_ARCH_5__) || defined (__ARM_ARCH_5E__) \
37 || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
38 || defined(__ARM_ARCH_5TEJ__)
39 # define __ARM_ARCH 5
40 # elif defined (__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
41 || defined (__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
42 || defined (__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
43 # define __ARM_ARCH 6
44 # elif defined (__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
45 || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
46 || defined(__ARM_ARCH_7EM__)
47 # define __ARM_ARCH 7
48 # else
49 # error unknown arm architecture
50 # endif
51 #endif
53 #if __ARM_ARCH > 4 || defined (__ARM_ARCH_4T__)
54 # define ARCH_HAS_BX
55 #endif
56 #if __ARM_ARCH > 4
57 # define ARCH_HAS_BLX
58 #endif
59 #if __ARM_ARCH > 6 || defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__)
60 # define ARCH_HAS_HARD_TP
61 #endif
62 #if __ARM_ARCH > 6 || defined (__ARM_ARCH_6T2__)
63 # define ARCH_HAS_T2
64 #endif
66 #ifdef __ASSEMBLER__
68 /* Syntactic details of assembler. */
70 #define ALIGNARG(log2) log2
71 #define ASM_SIZE_DIRECTIVE(name) .size name,.-name
73 #define PLTJMP(_x) _x##(PLT)
75 #ifdef ARCH_HAS_BX
76 # define BX(R) bx R
77 # define BXC(C, R) bx##C R
78 # ifdef ARCH_HAS_BLX
79 # define BLX(R) blx R
80 # else
81 # define BLX(R) mov lr, pc; bx R
82 # endif
83 #else
84 # define BX(R) mov pc, R
85 # define BXC(C, R) mov##C pc, R
86 # define BLX(R) mov lr, pc; mov pc, R
87 #endif
89 #define DO_RET(R) BX(R)
90 #define RETINSTR(C, R) BXC(C, R)
92 /* Define an entry point visible from C. */
93 #define ENTRY(name) \
94 .globl C_SYMBOL_NAME(name); \
95 .type C_SYMBOL_NAME(name),%function; \
96 .align ALIGNARG(4); \
97 C_LABEL(name) \
98 CFI_SECTIONS; \
99 cfi_startproc; \
100 CALL_MCOUNT
102 #define CFI_SECTIONS \
103 .cfi_sections .debug_frame
105 #undef END
106 #define END(name) \
107 cfi_endproc; \
108 ASM_SIZE_DIRECTIVE(name)
110 /* If compiled for profiling, call `mcount' at the start of each function. */
111 #ifdef PROF
112 /* Call __gnu_mcount_nc if GCC >= 4.4. */
113 #if __GNUC_PREREQ(4,4)
114 #define CALL_MCOUNT \
115 push {lr}; \
116 cfi_adjust_cfa_offset (4); \
117 cfi_rel_offset (lr, 0); \
118 bl PLTJMP(mcount); \
119 cfi_adjust_cfa_offset (-4); \
120 cfi_restore (lr)
121 #else /* else call _mcount */
122 #define CALL_MCOUNT \
123 push {lr}; \
124 cfi_adjust_cfa_offset (4); \
125 cfi_rel_offset (lr, 0); \
126 bl PLTJMP(mcount); \
127 pops {lr}; \
128 cfi_adjust_cfa_offset (-4); \
129 cfi_restore (lr)
130 #endif
131 #else
132 #define CALL_MCOUNT /* Do nothing. */
133 #endif
135 /* Since C identifiers are not normally prefixed with an underscore
136 on this system, the asm identifier `syscall_error' intrudes on the
137 C name space. Make sure we use an innocuous name. */
138 #define syscall_error __syscall_error
139 #if __GNUC_PREREQ(4,4)
140 #define mcount __gnu_mcount_nc
141 #else
142 #define mcount _mcount
143 #endif
145 /* Tag_ABI_align8_preserved: This code preserves 8-byte
146 alignment in any callee. */
147 .eabi_attribute 25, 1
148 /* Tag_ABI_align8_needed: This code may require 8-byte alignment from
149 the caller. */
150 .eabi_attribute 24, 1
152 /* The thumb2 encoding is reasonably complete. Unless suppressed, use it. */
153 .syntax unified
154 # if defined(__thumb2__) && !defined(NO_THUMB)
155 .thumb
156 #else
157 # undef __thumb__
158 # undef __thumb2__
159 .arm
160 # endif
162 /* Load or store to/from address X + Y into/from R, (maybe) using T.
163 X or Y can use T freely; T can be R if OP is a load. The first
164 version eschews the two-register addressing mode, while the
165 second version uses it. */
166 # define LDST_INDEXED_NOINDEX(OP, R, T, X, Y) \
167 add T, X, Y; \
168 sfi_breg T, \
169 OP R, [T]
170 # define LDST_INDEXED_INDEX(OP, R, X, Y) \
171 OP R, [X, Y]
173 # ifdef ARM_NO_INDEX_REGISTER
174 /* We're never using the two-register addressing mode, so this
175 always uses an intermediate add. */
176 # define LDST_INDEXED(OP, R, T, X, Y) LDST_INDEXED_NOINDEX (OP, R, T, X, Y)
177 # define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_NOINDEX (OP, R, T, pc, X)
178 # else
179 /* The two-register addressing mode is OK, except on Thumb with pc. */
180 # define LDST_INDEXED(OP, R, T, X, Y) LDST_INDEXED_INDEX (OP, R, X, Y)
181 # ifdef __thumb2__
182 # define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_NOINDEX (OP, R, T, pc, X)
183 # else
184 # define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_INDEX (OP, R, pc, X)
185 # endif
186 # endif
188 /* Load or store to/from a pc-relative EXPR into/from R, using T. */
189 # ifdef __thumb2__
190 # define LDST_PCREL(OP, R, T, EXPR) \
191 ldr T, 98f; \
192 .subsection 2; \
193 98: .word EXPR - 99f - PC_OFS; \
194 .previous; \
195 99: add T, T, pc; \
196 OP R, [T]
197 # elif defined (ARCH_HAS_T2) && ARM_PCREL_MOVW_OK
198 # define LDST_PCREL(OP, R, T, EXPR) \
199 movw T, #:lower16:EXPR - 99f - PC_OFS; \
200 movt T, #:upper16:EXPR - 99f - PC_OFS; \
201 99: LDST_PC_INDEXED (OP, R, T, T)
202 # else
203 # define LDST_PCREL(OP, R, T, EXPR) \
204 ldr T, 98f; \
205 .subsection 2; \
206 98: .word EXPR - 99f - PC_OFS; \
207 .previous; \
208 99: OP R, [pc, T]
209 # endif
211 /* Load from a global SYMBOL + CONSTANT into R, using T. */
212 # if defined (ARCH_HAS_T2) && !defined (PIC)
213 # define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
214 movw T, #:lower16:SYMBOL; \
215 movt T, #:upper16:SYMBOL; \
216 ldr R, [T, $CONSTANT]
217 # elif defined (ARCH_HAS_T2) && defined (PIC) && ARM_PCREL_MOVW_OK
218 # define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
219 movw R, #:lower16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \
220 movw T, #:lower16:99f - 98f - PC_OFS; \
221 movt R, #:upper16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \
222 movt T, #:upper16:99f - 98f - PC_OFS; \
223 .pushsection .rodata.cst4, "aM", %progbits, 4; \
224 .balign 4; \
225 99: .word SYMBOL##(GOT); \
226 .popsection; \
227 97: add R, R, pc; \
228 98: LDST_PC_INDEXED (ldr, T, T, T); \
229 LDST_INDEXED (ldr, R, T, R, T); \
230 ldr R, [R, $CONSTANT]
231 # else
232 # define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
233 ldr T, 99f; \
234 ldr R, 100f; \
235 98: add T, T, pc; \
236 ldr T, [T, R]; \
237 .subsection 2; \
238 99: .word _GLOBAL_OFFSET_TABLE_ - 98b - PC_OFS; \
239 100: .word SYMBOL##(GOT); \
240 .previous; \
241 ldr R, [T, $CONSTANT]
242 # endif
244 /* This is the same as LDR_GLOBAL, but for a SYMBOL that is known to
245 be in the same linked object (as for one with hidden visibility).
246 We can avoid the GOT indirection in the PIC case. For the pure
247 static case, LDR_GLOBAL is already optimal. */
248 # ifdef PIC
249 # define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \
250 LDST_PCREL (ldr, R, T, SYMBOL + CONSTANT)
251 # else
252 # define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \
253 LDR_GLOBAL (R, T, SYMBOL, CONSTANT)
254 # endif
256 /* Cope with negative memory offsets, which thumb can't encode.
257 Use NEGOFF_ADJ_BASE to (conditionally) alter the base register,
258 and then NEGOFF_OFF1 to use 0 for thumb and the offset for arm,
259 or NEGOFF_OFF2 to use A-B for thumb and A for arm. */
260 # ifdef __thumb2__
261 # define NEGOFF_ADJ_BASE(R, OFF) add R, R, $OFF
262 # define NEGOFF_ADJ_BASE2(D, S, OFF) add D, S, $OFF
263 # define NEGOFF_OFF1(R, OFF) [R]
264 # define NEGOFF_OFF2(R, OFFA, OFFB) [R, $((OFFA) - (OFFB))]
265 # else
266 # define NEGOFF_ADJ_BASE(R, OFF)
267 # define NEGOFF_ADJ_BASE2(D, S, OFF) mov D, S
268 # define NEGOFF_OFF1(R, OFF) [R, $OFF]
269 # define NEGOFF_OFF2(R, OFFA, OFFB) [R, $OFFA]
270 # endif
272 /* Helper to get the TLS base pointer. The interface is that TMP is a
273 register that may be used to hold the LR, if necessary. TMP may be
274 LR itself to indicate that LR need not be saved. The base pointer
275 is returned in R0. Only R0 and TMP are modified. */
277 # ifdef ARCH_HAS_HARD_TP
278 /* If the cpu has cp15 available, use it. */
279 # define GET_TLS(TMP) mrc p15, 0, r0, c13, c0, 3
280 # else
281 /* At this generic level we have no tricks to pull. Call the ABI routine. */
282 # define GET_TLS(TMP) \
283 push { r1, r2, r3, lr }; \
284 cfi_remember_state; \
285 cfi_adjust_cfa_offset (16); \
286 cfi_rel_offset (r1, 0); \
287 cfi_rel_offset (r2, 4); \
288 cfi_rel_offset (r3, 8); \
289 cfi_rel_offset (lr, 12); \
290 bl __aeabi_read_tp; \
291 pop { r1, r2, r3, lr }; \
292 cfi_restore_state
293 # endif /* ARCH_HAS_HARD_TP */
295 # ifndef ARM_SFI_MACROS
296 # define ARM_SFI_MACROS 1
297 /* This assembly macro is prepended to any load/store instruction,
298 pulling the base register out of the addressing mode syntax and
299 making it the first operand of the macro. For example:
300 ldr r0, [r1]
301 becomes:
302 sfi_breg r1, ldr r0, [\B]
303 The \B stands in for the base register that is the first operand
304 to the macro, so we can avoid error-prone repetition of the base
305 register in two places on the line.
307 This is used for all memory access through a base register other
308 than PC or SP. It's intended to support SFI schemes such as
309 Native Client, where the OS will enforce that all load/store
310 instructions use a special form. In any such configuration,
311 another sysdep.h file will have defined ARM_SFI_MACROS and
312 provided its own assembly macros with the same interface. */
314 .macro sfi_breg basereg, insn, operands:vararg
315 .macro _sfi_breg_doit B
316 \insn \operands
317 .endm
318 _sfi_breg_doit \basereg
319 .purgem _sfi_breg_doit
320 .endm
322 /* This assembly macro replaces the "pld" instruction.
323 The syntax:
324 sfi_pld REGISTER, #OFFSET
325 is exactly equivalent to:
326 sfi_breg REGISTER, pld [\B, #OFFSET]
327 (and ", #OFFSET" is optional). We have a separate macro
328 only to work around a bug in GAS versions prior to 2.23.2,
329 that misparses the sfi_breg macro expansion in this case. */
331 .macro sfi_pld basereg, offset=#0
332 pld [\basereg, \offset]
333 .endm
335 /* This macro precedes any instruction that directly changes the SP.
336 It's not needed for push/pop or for any kind of load or store that
337 implicitly changes the SP via the ! syntax. */
338 # define sfi_sp /* Nothing to do. */
340 # endif
342 /* These are the directives used for EABI unwind info.
343 Wrap them in macros so another configuration's sysdep.h
344 file can define them away if it doesn't use EABI unwind info. */
345 # define eabi_fnstart .fnstart
346 # define eabi_fnend .fnend
347 # define eabi_save(...) .save __VA_ARGS__
348 # define eabi_cantunwind .cantunwind
349 # define eabi_pad(n) .pad n
351 #endif /* __ASSEMBLER__ */
353 /* This number is the offset from the pc at the current location. */
354 #ifdef __thumb__
355 # define PC_OFS 4
356 #else
357 # define PC_OFS 8
358 #endif
360 /* Pointer mangling support. */
361 #if (defined IS_IN_rtld || \
362 (!defined SHARED && (!defined NOT_IN_libc || defined IS_IN_libpthread)))
363 # ifdef __ASSEMBLER__
364 # define PTR_MANGLE_LOAD(guard, tmp) \
365 LDR_HIDDEN (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local), 0)
366 # define PTR_MANGLE(dst, src, guard, tmp) \
367 PTR_MANGLE_LOAD(guard, tmp); \
368 PTR_MANGLE2(dst, src, guard)
369 /* Use PTR_MANGLE2 for efficiency if guard is already loaded. */
370 # define PTR_MANGLE2(dst, src, guard) \
371 eor dst, src, guard
372 # define PTR_DEMANGLE(dst, src, guard, tmp) \
373 PTR_MANGLE (dst, src, guard, tmp)
374 # define PTR_DEMANGLE2(dst, src, guard) \
375 PTR_MANGLE2 (dst, src, guard)
376 # else
377 extern uintptr_t __pointer_chk_guard_local attribute_relro attribute_hidden;
378 # define PTR_MANGLE(var) \
379 (var) = (__typeof (var)) ((uintptr_t) (var) ^ __pointer_chk_guard_local)
380 # define PTR_DEMANGLE(var) PTR_MANGLE (var)
381 # endif
382 #else
383 # ifdef __ASSEMBLER__
384 # define PTR_MANGLE_LOAD(guard, tmp) \
385 LDR_GLOBAL (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard), 0);
386 # define PTR_MANGLE(dst, src, guard, tmp) \
387 PTR_MANGLE_LOAD(guard, tmp); \
388 PTR_MANGLE2(dst, src, guard)
389 /* Use PTR_MANGLE2 for efficiency if guard is already loaded. */
390 # define PTR_MANGLE2(dst, src, guard) \
391 eor dst, src, guard
392 # define PTR_DEMANGLE(dst, src, guard, tmp) \
393 PTR_MANGLE (dst, src, guard, tmp)
394 # define PTR_DEMANGLE2(dst, src, guard) \
395 PTR_MANGLE2 (dst, src, guard)
396 # else
397 extern uintptr_t __pointer_chk_guard attribute_relro;
398 # define PTR_MANGLE(var) \
399 (var) = (__typeof (var)) ((uintptr_t) (var) ^ __pointer_chk_guard)
400 # define PTR_DEMANGLE(var) PTR_MANGLE (var)
401 # endif
402 #endif