Improve support for arm-wince-pe target:
[official-gcc.git] / gcc / config / xtensa / lib1funcs.asm
blob384b2079e48cbe67d6d3edaed5eff37968f2c6c9
1 /* Assembly functions for the Xtensa version of libgcc1.
2 Copyright (C) 2001,2002,2003 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 In addition to the permissions in the GNU General Public License, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of this file into combinations with other programs,
15 and to distribute those combinations without any restriction coming
16 from the use of this file. (The General Public License restrictions
17 do apply in other respects; for example, they cover modification of
18 the file, and distribution when not linked into a combine
19 executable.)
21 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
22 WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 for more details.
26 You should have received a copy of the GNU General Public License
27 along with GCC; see the file COPYING. If not, write to the Free
28 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
29 02111-1307, USA. */
31 #include "xtensa/xtensa-config.h"
33 # Define macros for the ABS and ADDX* instructions to handle cases
34 # where they are not included in the Xtensa processor configuration.
36 .macro do_abs dst, src, tmp
37 #if XCHAL_HAVE_ABS
38 abs \dst, \src
39 #else
40 neg \tmp, \src
41 movgez \tmp, \src, \src
42 mov \dst, \tmp
43 #endif
44 .endm
46 .macro do_addx2 dst, as, at, tmp
47 #if XCHAL_HAVE_ADDX
48 addx2 \dst, \as, \at
49 #else
50 slli \tmp, \as, 1
51 add \dst, \tmp, \at
52 #endif
53 .endm
55 .macro do_addx4 dst, as, at, tmp
56 #if XCHAL_HAVE_ADDX
57 addx4 \dst, \as, \at
58 #else
59 slli \tmp, \as, 2
60 add \dst, \tmp, \at
61 #endif
62 .endm
64 .macro do_addx8 dst, as, at, tmp
65 #if XCHAL_HAVE_ADDX
66 addx8 \dst, \as, \at
67 #else
68 slli \tmp, \as, 3
69 add \dst, \tmp, \at
70 #endif
71 .endm
73 #ifdef L_mulsi3
74 .align 4
75 .global __mulsi3
76 .type __mulsi3,@function
77 __mulsi3:
78 entry sp, 16
80 #if XCHAL_HAVE_MUL16
81 or a4, a2, a3
82 srai a4, a4, 16
83 bnez a4, .LMUL16
84 mul16u a2, a2, a3
85 retw
86 .LMUL16:
87 srai a4, a2, 16
88 srai a5, a3, 16
89 mul16u a7, a4, a3
90 mul16u a6, a5, a2
91 mul16u a4, a2, a3
92 add a7, a7, a6
93 slli a7, a7, 16
94 add a2, a7, a4
96 #elif XCHAL_HAVE_MAC16
97 mul.aa.hl a2, a3
98 mula.aa.lh a2, a3
99 rsr a5, 16 # ACCLO
100 umul.aa.ll a2, a3
101 rsr a4, 16 # ACCLO
102 slli a5, a5, 16
103 add a2, a4, a5
105 #else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
107 # Multiply one bit at a time, but unroll the loop 4x to better
108 # exploit the addx instructions and avoid overhead.
109 # Peel the first iteration to save a cycle on init.
111 # Avoid negative numbers.
112 xor a5, a2, a3 # top bit is 1 iff one of the inputs is negative
113 do_abs a3, a3, a6
114 do_abs a2, a2, a6
116 # Swap so the second argument is smaller.
117 sub a7, a2, a3
118 mov a4, a3
119 movgez a4, a2, a7 # a4 = max(a2, a3)
120 movltz a3, a2, a7 # a3 = min(a2, a3)
122 movi a2, 0
123 extui a6, a3, 0, 1
124 movnez a2, a4, a6
126 do_addx2 a7, a4, a2, a7
127 extui a6, a3, 1, 1
128 movnez a2, a7, a6
130 do_addx4 a7, a4, a2, a7
131 extui a6, a3, 2, 1
132 movnez a2, a7, a6
134 do_addx8 a7, a4, a2, a7
135 extui a6, a3, 3, 1
136 movnez a2, a7, a6
138 bgeui a3, 16, .Lmult_main_loop
139 neg a3, a2
140 movltz a2, a3, a5
141 retw
143 .align 4
144 .Lmult_main_loop:
145 srli a3, a3, 4
146 slli a4, a4, 4
148 add a7, a4, a2
149 extui a6, a3, 0, 1
150 movnez a2, a7, a6
152 do_addx2 a7, a4, a2, a7
153 extui a6, a3, 1, 1
154 movnez a2, a7, a6
156 do_addx4 a7, a4, a2, a7
157 extui a6, a3, 2, 1
158 movnez a2, a7, a6
160 do_addx8 a7, a4, a2, a7
161 extui a6, a3, 3, 1
162 movnez a2, a7, a6
164 bgeui a3, 16, .Lmult_main_loop
166 neg a3, a2
167 movltz a2, a3, a5
169 #endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
171 retw
172 .size __mulsi3,.-__mulsi3
174 #endif /* L_mulsi3 */
177 # Define a macro for the NSAU (unsigned normalize shift amount)
178 # instruction, which computes the number of leading zero bits,
179 # to handle cases where it is not included in the Xtensa processor
180 # configuration.
182 .macro do_nsau cnt, val, tmp, a
183 #if XCHAL_HAVE_NSA
184 nsau \cnt, \val
185 #else
186 mov \a, \val
187 movi \cnt, 0
188 extui \tmp, \a, 16, 16
189 bnez \tmp, 0f
190 movi \cnt, 16
191 slli \a, \a, 16
193 extui \tmp, \a, 24, 8
194 bnez \tmp, 1f
195 addi \cnt, \cnt, 8
196 slli \a, \a, 8
198 movi \tmp, __nsau_data
199 extui \a, \a, 24, 8
200 add \tmp, \tmp, \a
201 l8ui \tmp, \tmp, 0
202 add \cnt, \cnt, \tmp
203 #endif /* !XCHAL_HAVE_NSA */
204 .endm
206 #ifdef L_nsau
207 .section .rodata
208 .align 4
209 .global __nsau_data
210 .type __nsau_data,@object
211 __nsau_data:
212 #if !XCHAL_HAVE_NSA
213 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
214 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
215 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
216 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
217 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
218 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
219 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
220 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
221 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
222 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
223 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
224 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
225 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
226 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
227 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
228 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
229 #endif /* !XCHAL_HAVE_NSA */
230 .size __nsau_data,.-__nsau_data
231 .hidden __nsau_data
232 #endif /* L_nsau */
235 #ifdef L_udivsi3
236 .align 4
237 .global __udivsi3
238 .type __udivsi3,@function
239 __udivsi3:
240 entry sp, 16
241 bltui a3, 2, .Lle_one # check if the divisor <= 1
243 mov a6, a2 # keep dividend in a6
244 do_nsau a5, a6, a2, a7 # dividend_shift = nsau(dividend)
245 do_nsau a4, a3, a2, a7 # divisor_shift = nsau(divisor)
246 bgeu a5, a4, .Lspecial
248 sub a4, a4, a5 # count = divisor_shift - dividend_shift
249 ssl a4
250 sll a3, a3 # divisor <<= count
251 movi a2, 0 # quotient = 0
253 # test-subtract-and-shift loop; one quotient bit on each iteration
254 #if XCHAL_HAVE_LOOPS
255 loopnez a4, .Lloopend
256 #endif /* XCHAL_HAVE_LOOPS */
257 .Lloop:
258 bltu a6, a3, .Lzerobit
259 sub a6, a6, a3
260 addi a2, a2, 1
261 .Lzerobit:
262 slli a2, a2, 1
263 srli a3, a3, 1
264 #if !XCHAL_HAVE_LOOPS
265 addi a4, a4, -1
266 bnez a4, .Lloop
267 #endif /* !XCHAL_HAVE_LOOPS */
268 .Lloopend:
270 bltu a6, a3, .Lreturn
271 addi a2, a2, 1 # increment quotient if dividend >= divisor
272 .Lreturn:
273 retw
275 .Lspecial:
276 # return dividend >= divisor
277 movi a2, 0
278 bltu a6, a3, .Lreturn2
279 movi a2, 1
280 .Lreturn2:
281 retw
283 .Lle_one:
284 beqz a3, .Lerror # if divisor == 1, return the dividend
285 retw
286 .Lerror:
287 movi a2, 0 # just return 0; could throw an exception
288 retw
289 .size __udivsi3,.-__udivsi3
291 #endif /* L_udivsi3 */
294 #ifdef L_divsi3
295 .align 4
296 .global __divsi3
297 .type __divsi3,@function
298 __divsi3:
299 entry sp, 16
300 xor a7, a2, a3 # sign = dividend ^ divisor
301 do_abs a6, a2, a4 # udividend = abs(dividend)
302 do_abs a3, a3, a4 # udivisor = abs(divisor)
303 bltui a3, 2, .Lle_one # check if udivisor <= 1
304 do_nsau a5, a6, a2, a8 # udividend_shift = nsau(udividend)
305 do_nsau a4, a3, a2, a8 # udivisor_shift = nsau(udivisor)
306 bgeu a5, a4, .Lspecial
308 sub a4, a4, a5 # count = udivisor_shift - udividend_shift
309 ssl a4
310 sll a3, a3 # udivisor <<= count
311 movi a2, 0 # quotient = 0
313 # test-subtract-and-shift loop; one quotient bit on each iteration
314 #if XCHAL_HAVE_LOOPS
315 loopnez a4, .Lloopend
316 #endif /* XCHAL_HAVE_LOOPS */
317 .Lloop:
318 bltu a6, a3, .Lzerobit
319 sub a6, a6, a3
320 addi a2, a2, 1
321 .Lzerobit:
322 slli a2, a2, 1
323 srli a3, a3, 1
324 #if !XCHAL_HAVE_LOOPS
325 addi a4, a4, -1
326 bnez a4, .Lloop
327 #endif /* !XCHAL_HAVE_LOOPS */
328 .Lloopend:
330 bltu a6, a3, .Lreturn
331 addi a2, a2, 1 # increment quotient if udividend >= udivisor
332 .Lreturn:
333 neg a5, a2
334 movltz a2, a5, a7 # return (sign < 0) ? -quotient : quotient
335 retw
337 .Lspecial:
338 movi a2, 0
339 bltu a6, a3, .Lreturn2 # if dividend < divisor, return 0
340 movi a2, 1
341 movi a4, -1
342 movltz a2, a4, a7 # else return (sign < 0) ? -1 : 1
343 .Lreturn2:
344 retw
346 .Lle_one:
347 beqz a3, .Lerror
348 neg a2, a6 # if udivisor == 1, then return...
349 movgez a2, a6, a7 # (sign < 0) ? -udividend : udividend
350 retw
351 .Lerror:
352 movi a2, 0 # just return 0; could throw an exception
353 retw
354 .size __divsi3,.-__divsi3
356 #endif /* L_divsi3 */
359 #ifdef L_umodsi3
360 .align 4
361 .global __umodsi3
362 .type __umodsi3,@function
363 __umodsi3:
364 entry sp, 16
365 bltui a3, 2, .Lle_one # check if the divisor is <= 1
367 do_nsau a5, a2, a6, a7 # dividend_shift = nsau(dividend)
368 do_nsau a4, a3, a6, a7 # divisor_shift = nsau(divisor)
369 bgeu a5, a4, .Lspecial
371 sub a4, a4, a5 # count = divisor_shift - dividend_shift
372 ssl a4
373 sll a3, a3 # divisor <<= count
375 # test-subtract-and-shift loop
376 #if XCHAL_HAVE_LOOPS
377 loopnez a4, .Lloopend
378 #endif /* XCHAL_HAVE_LOOPS */
379 .Lloop:
380 bltu a2, a3, .Lzerobit
381 sub a2, a2, a3
382 .Lzerobit:
383 srli a3, a3, 1
384 #if !XCHAL_HAVE_LOOPS
385 addi a4, a4, -1
386 bnez a4, .Lloop
387 #endif /* !XCHAL_HAVE_LOOPS */
388 .Lloopend:
390 bltu a2, a3, .Lreturn
391 sub a2, a2, a3 # subtract once more if dividend >= divisor
392 .Lreturn:
393 retw
395 .Lspecial:
396 bltu a2, a3, .Lreturn2
397 sub a2, a2, a3 # subtract once if dividend >= divisor
398 .Lreturn2:
399 retw
401 .Lle_one:
402 # the divisor is either 0 or 1, so just return 0.
403 # someday we may want to throw an exception if the divisor is 0.
404 movi a2, 0
405 retw
406 .size __umodsi3,.-__umodsi3
408 #endif /* L_umodsi3 */
411 #ifdef L_modsi3
412 .align 4
413 .global __modsi3
414 .type __modsi3,@function
415 __modsi3:
416 entry sp, 16
417 mov a7, a2 # save original (signed) dividend
418 do_abs a2, a2, a4 # udividend = abs(dividend)
419 do_abs a3, a3, a4 # udivisor = abs(divisor)
420 bltui a3, 2, .Lle_one # check if udivisor <= 1
421 do_nsau a5, a2, a6, a8 # udividend_shift = nsau(udividend)
422 do_nsau a4, a3, a6, a8 # udivisor_shift = nsau(udivisor)
423 bgeu a5, a4, .Lspecial
425 sub a4, a4, a5 # count = udivisor_shift - udividend_shift
426 ssl a4
427 sll a3, a3 # udivisor <<= count
429 # test-subtract-and-shift loop
430 #if XCHAL_HAVE_LOOPS
431 loopnez a4, .Lloopend
432 #endif /* XCHAL_HAVE_LOOPS */
433 .Lloop:
434 bltu a2, a3, .Lzerobit
435 sub a2, a2, a3
436 .Lzerobit:
437 srli a3, a3, 1
438 #if !XCHAL_HAVE_LOOPS
439 addi a4, a4, -1
440 bnez a4, .Lloop
441 #endif /* !XCHAL_HAVE_LOOPS */
442 .Lloopend:
444 bltu a2, a3, .Lreturn
445 sub a2, a2, a3 # subtract once more if udividend >= udivisor
446 .Lreturn:
447 bgez a7, .Lpositive
448 neg a2, a2 # if (dividend < 0), return -udividend
449 .Lpositive:
450 retw
452 .Lspecial:
453 bltu a2, a3, .Lreturn2
454 sub a2, a2, a3 # subtract once if dividend >= divisor
455 .Lreturn2:
456 bgez a7, .Lpositive2
457 neg a2, a2 # if (dividend < 0), return -udividend
458 .Lpositive2:
459 retw
461 .Lle_one:
462 # udivisor is either 0 or 1, so just return 0.
463 # someday we may want to throw an exception if udivisor is 0.
464 movi a2, 0
465 retw
466 .size __modsi3,.-__modsi3
468 #endif /* L_modsi3 */