2003-12-26 Guilhem Lavaux <guilhem@kaffe.org>
[official-gcc.git] / gcc / config / xtensa / lib1funcs.asm
blob2df00218a5a8f156fb6c4d71653a6cb62ba8f891
1 /* Assembly functions for the Xtensa version of libgcc1.
2 Copyright (C) 2001,2002,2003 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 In addition to the permissions in the GNU General Public License, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of this file into combinations with other programs,
15 and to distribute those combinations without any restriction coming
16 from the use of this file. (The General Public License restrictions
17 do apply in other respects; for example, they cover modification of
18 the file, and distribution when not linked into a combine
19 executable.)
21 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
22 WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 for more details.
26 You should have received a copy of the GNU General Public License
27 along with GCC; see the file COPYING. If not, write to the Free
28 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
29 02111-1307, USA. */
31 #include "xtensa-config.h"
33 # Note: These functions use a minimum stack frame size of 32. This is
34 # necessary for Xtensa configurations that only support a fixed register
35 # window size of 8, where even leaf functions (such as these) need to
36 # allocate space for a 4-word "extra save area".
38 # Define macros for the ABS and ADDX* instructions to handle cases
39 # where they are not included in the Xtensa processor configuration.
41 .macro do_abs dst, src, tmp
42 #if XCHAL_HAVE_ABS
43 abs \dst, \src
44 #else
45 neg \tmp, \src
46 movgez \tmp, \src, \src
47 mov \dst, \tmp
48 #endif
49 .endm
51 .macro do_addx2 dst, as, at, tmp
52 #if XCHAL_HAVE_ADDX
53 addx2 \dst, \as, \at
54 #else
55 slli \tmp, \as, 1
56 add \dst, \tmp, \at
57 #endif
58 .endm
60 .macro do_addx4 dst, as, at, tmp
61 #if XCHAL_HAVE_ADDX
62 addx4 \dst, \as, \at
63 #else
64 slli \tmp, \as, 2
65 add \dst, \tmp, \at
66 #endif
67 .endm
69 .macro do_addx8 dst, as, at, tmp
70 #if XCHAL_HAVE_ADDX
71 addx8 \dst, \as, \at
72 #else
73 slli \tmp, \as, 3
74 add \dst, \tmp, \at
75 #endif
76 .endm
78 # Define macros for function entry and return, supporting either the
79 # standard register windowed ABI or the non-windowed call0 ABI. These
80 # macros do not allocate any extra stack space, so they only work for
81 # leaf functions that do not need to spill anything to the stack.
83 .macro abi_entry reg, size
84 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
85 entry \reg, \size
86 #else
87 /* do nothing */
88 #endif
89 .endm
91 .macro abi_return
92 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
93 retw
94 #else
95 ret
96 #endif
97 .endm
100 #ifdef L_mulsi3
101 .align 4
102 .global __mulsi3
103 .type __mulsi3,@function
104 __mulsi3:
105 abi_entry sp, 32
107 #if XCHAL_HAVE_MUL16
108 or a4, a2, a3
109 srai a4, a4, 16
110 bnez a4, .LMUL16
111 mul16u a2, a2, a3
112 abi_return
113 .LMUL16:
114 srai a4, a2, 16
115 srai a5, a3, 16
116 mul16u a7, a4, a3
117 mul16u a6, a5, a2
118 mul16u a4, a2, a3
119 add a7, a7, a6
120 slli a7, a7, 16
121 add a2, a7, a4
123 #elif XCHAL_HAVE_MAC16
124 mul.aa.hl a2, a3
125 mula.aa.lh a2, a3
126 rsr a5, 16 # ACCLO
127 umul.aa.ll a2, a3
128 rsr a4, 16 # ACCLO
129 slli a5, a5, 16
130 add a2, a4, a5
132 #else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
134 # Multiply one bit at a time, but unroll the loop 4x to better
135 # exploit the addx instructions and avoid overhead.
136 # Peel the first iteration to save a cycle on init.
138 # Avoid negative numbers.
139 xor a5, a2, a3 # top bit is 1 iff one of the inputs is negative
140 do_abs a3, a3, a6
141 do_abs a2, a2, a6
143 # Swap so the second argument is smaller.
144 sub a7, a2, a3
145 mov a4, a3
146 movgez a4, a2, a7 # a4 = max(a2, a3)
147 movltz a3, a2, a7 # a3 = min(a2, a3)
149 movi a2, 0
150 extui a6, a3, 0, 1
151 movnez a2, a4, a6
153 do_addx2 a7, a4, a2, a7
154 extui a6, a3, 1, 1
155 movnez a2, a7, a6
157 do_addx4 a7, a4, a2, a7
158 extui a6, a3, 2, 1
159 movnez a2, a7, a6
161 do_addx8 a7, a4, a2, a7
162 extui a6, a3, 3, 1
163 movnez a2, a7, a6
165 bgeui a3, 16, .Lmult_main_loop
166 neg a3, a2
167 movltz a2, a3, a5
168 abi_return
170 .align 4
171 .Lmult_main_loop:
172 srli a3, a3, 4
173 slli a4, a4, 4
175 add a7, a4, a2
176 extui a6, a3, 0, 1
177 movnez a2, a7, a6
179 do_addx2 a7, a4, a2, a7
180 extui a6, a3, 1, 1
181 movnez a2, a7, a6
183 do_addx4 a7, a4, a2, a7
184 extui a6, a3, 2, 1
185 movnez a2, a7, a6
187 do_addx8 a7, a4, a2, a7
188 extui a6, a3, 3, 1
189 movnez a2, a7, a6
191 bgeui a3, 16, .Lmult_main_loop
193 neg a3, a2
194 movltz a2, a3, a5
196 #endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
198 abi_return
199 .size __mulsi3,.-__mulsi3
201 #endif /* L_mulsi3 */
204 # Define a macro for the NSAU (unsigned normalize shift amount)
205 # instruction, which computes the number of leading zero bits,
206 # to handle cases where it is not included in the Xtensa processor
207 # configuration.
209 .macro do_nsau cnt, val, tmp, a
210 #if XCHAL_HAVE_NSA
211 nsau \cnt, \val
212 #else
213 mov \a, \val
214 movi \cnt, 0
215 extui \tmp, \a, 16, 16
216 bnez \tmp, 0f
217 movi \cnt, 16
218 slli \a, \a, 16
220 extui \tmp, \a, 24, 8
221 bnez \tmp, 1f
222 addi \cnt, \cnt, 8
223 slli \a, \a, 8
225 movi \tmp, __nsau_data
226 extui \a, \a, 24, 8
227 add \tmp, \tmp, \a
228 l8ui \tmp, \tmp, 0
229 add \cnt, \cnt, \tmp
230 #endif /* !XCHAL_HAVE_NSA */
231 .endm
233 #ifdef L_nsau
234 .section .rodata
235 .align 4
236 .global __nsau_data
237 .type __nsau_data,@object
238 __nsau_data:
239 #if !XCHAL_HAVE_NSA
240 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
241 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
242 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
243 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
244 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
245 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
246 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
247 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
248 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
249 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
250 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
251 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
252 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
253 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
254 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
255 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
256 #endif /* !XCHAL_HAVE_NSA */
257 .size __nsau_data,.-__nsau_data
258 .hidden __nsau_data
259 #endif /* L_nsau */
262 #ifdef L_udivsi3
263 .align 4
264 .global __udivsi3
265 .type __udivsi3,@function
266 __udivsi3:
267 abi_entry sp, 32
268 bltui a3, 2, .Lle_one # check if the divisor <= 1
270 mov a6, a2 # keep dividend in a6
271 do_nsau a5, a6, a2, a7 # dividend_shift = nsau(dividend)
272 do_nsau a4, a3, a2, a7 # divisor_shift = nsau(divisor)
273 bgeu a5, a4, .Lspecial
275 sub a4, a4, a5 # count = divisor_shift - dividend_shift
276 ssl a4
277 sll a3, a3 # divisor <<= count
278 movi a2, 0 # quotient = 0
280 # test-subtract-and-shift loop; one quotient bit on each iteration
281 #if XCHAL_HAVE_LOOPS
282 loopnez a4, .Lloopend
283 #endif /* XCHAL_HAVE_LOOPS */
284 .Lloop:
285 bltu a6, a3, .Lzerobit
286 sub a6, a6, a3
287 addi a2, a2, 1
288 .Lzerobit:
289 slli a2, a2, 1
290 srli a3, a3, 1
291 #if !XCHAL_HAVE_LOOPS
292 addi a4, a4, -1
293 bnez a4, .Lloop
294 #endif /* !XCHAL_HAVE_LOOPS */
295 .Lloopend:
297 bltu a6, a3, .Lreturn
298 addi a2, a2, 1 # increment quotient if dividend >= divisor
299 .Lreturn:
300 abi_return
302 .Lspecial:
303 # return dividend >= divisor
304 movi a2, 0
305 bltu a6, a3, .Lreturn2
306 movi a2, 1
307 .Lreturn2:
308 abi_return
310 .Lle_one:
311 beqz a3, .Lerror # if divisor == 1, return the dividend
312 abi_return
313 .Lerror:
314 movi a2, 0 # just return 0; could throw an exception
315 abi_return
316 .size __udivsi3,.-__udivsi3
318 #endif /* L_udivsi3 */
321 #ifdef L_divsi3
322 .align 4
323 .global __divsi3
324 .type __divsi3,@function
325 __divsi3:
326 abi_entry sp, 32
327 xor a7, a2, a3 # sign = dividend ^ divisor
328 do_abs a6, a2, a4 # udividend = abs(dividend)
329 do_abs a3, a3, a4 # udivisor = abs(divisor)
330 bltui a3, 2, .Lle_one # check if udivisor <= 1
331 do_nsau a5, a6, a2, a8 # udividend_shift = nsau(udividend)
332 do_nsau a4, a3, a2, a8 # udivisor_shift = nsau(udivisor)
333 bgeu a5, a4, .Lspecial
335 sub a4, a4, a5 # count = udivisor_shift - udividend_shift
336 ssl a4
337 sll a3, a3 # udivisor <<= count
338 movi a2, 0 # quotient = 0
340 # test-subtract-and-shift loop; one quotient bit on each iteration
341 #if XCHAL_HAVE_LOOPS
342 loopnez a4, .Lloopend
343 #endif /* XCHAL_HAVE_LOOPS */
344 .Lloop:
345 bltu a6, a3, .Lzerobit
346 sub a6, a6, a3
347 addi a2, a2, 1
348 .Lzerobit:
349 slli a2, a2, 1
350 srli a3, a3, 1
351 #if !XCHAL_HAVE_LOOPS
352 addi a4, a4, -1
353 bnez a4, .Lloop
354 #endif /* !XCHAL_HAVE_LOOPS */
355 .Lloopend:
357 bltu a6, a3, .Lreturn
358 addi a2, a2, 1 # increment quotient if udividend >= udivisor
359 .Lreturn:
360 neg a5, a2
361 movltz a2, a5, a7 # return (sign < 0) ? -quotient : quotient
362 abi_return
364 .Lspecial:
365 movi a2, 0
366 bltu a6, a3, .Lreturn2 # if dividend < divisor, return 0
367 movi a2, 1
368 movi a4, -1
369 movltz a2, a4, a7 # else return (sign < 0) ? -1 : 1
370 .Lreturn2:
371 abi_return
373 .Lle_one:
374 beqz a3, .Lerror
375 neg a2, a6 # if udivisor == 1, then return...
376 movgez a2, a6, a7 # (sign < 0) ? -udividend : udividend
377 abi_return
378 .Lerror:
379 movi a2, 0 # just return 0; could throw an exception
380 abi_return
381 .size __divsi3,.-__divsi3
383 #endif /* L_divsi3 */
386 #ifdef L_umodsi3
387 .align 4
388 .global __umodsi3
389 .type __umodsi3,@function
390 __umodsi3:
391 abi_entry sp, 32
392 bltui a3, 2, .Lle_one # check if the divisor is <= 1
394 do_nsau a5, a2, a6, a7 # dividend_shift = nsau(dividend)
395 do_nsau a4, a3, a6, a7 # divisor_shift = nsau(divisor)
396 bgeu a5, a4, .Lspecial
398 sub a4, a4, a5 # count = divisor_shift - dividend_shift
399 ssl a4
400 sll a3, a3 # divisor <<= count
402 # test-subtract-and-shift loop
403 #if XCHAL_HAVE_LOOPS
404 loopnez a4, .Lloopend
405 #endif /* XCHAL_HAVE_LOOPS */
406 .Lloop:
407 bltu a2, a3, .Lzerobit
408 sub a2, a2, a3
409 .Lzerobit:
410 srli a3, a3, 1
411 #if !XCHAL_HAVE_LOOPS
412 addi a4, a4, -1
413 bnez a4, .Lloop
414 #endif /* !XCHAL_HAVE_LOOPS */
415 .Lloopend:
417 bltu a2, a3, .Lreturn
418 sub a2, a2, a3 # subtract once more if dividend >= divisor
419 .Lreturn:
420 abi_return
422 .Lspecial:
423 bltu a2, a3, .Lreturn2
424 sub a2, a2, a3 # subtract once if dividend >= divisor
425 .Lreturn2:
426 abi_return
428 .Lle_one:
429 # the divisor is either 0 or 1, so just return 0.
430 # someday we may want to throw an exception if the divisor is 0.
431 movi a2, 0
432 abi_return
433 .size __umodsi3,.-__umodsi3
435 #endif /* L_umodsi3 */
438 #ifdef L_modsi3
439 .align 4
440 .global __modsi3
441 .type __modsi3,@function
442 __modsi3:
443 abi_entry sp, 32
444 mov a7, a2 # save original (signed) dividend
445 do_abs a2, a2, a4 # udividend = abs(dividend)
446 do_abs a3, a3, a4 # udivisor = abs(divisor)
447 bltui a3, 2, .Lle_one # check if udivisor <= 1
448 do_nsau a5, a2, a6, a8 # udividend_shift = nsau(udividend)
449 do_nsau a4, a3, a6, a8 # udivisor_shift = nsau(udivisor)
450 bgeu a5, a4, .Lspecial
452 sub a4, a4, a5 # count = udivisor_shift - udividend_shift
453 ssl a4
454 sll a3, a3 # udivisor <<= count
456 # test-subtract-and-shift loop
457 #if XCHAL_HAVE_LOOPS
458 loopnez a4, .Lloopend
459 #endif /* XCHAL_HAVE_LOOPS */
460 .Lloop:
461 bltu a2, a3, .Lzerobit
462 sub a2, a2, a3
463 .Lzerobit:
464 srli a3, a3, 1
465 #if !XCHAL_HAVE_LOOPS
466 addi a4, a4, -1
467 bnez a4, .Lloop
468 #endif /* !XCHAL_HAVE_LOOPS */
469 .Lloopend:
471 bltu a2, a3, .Lreturn
472 sub a2, a2, a3 # subtract once more if udividend >= udivisor
473 .Lreturn:
474 bgez a7, .Lpositive
475 neg a2, a2 # if (dividend < 0), return -udividend
476 .Lpositive:
477 abi_return
479 .Lspecial:
480 bltu a2, a3, .Lreturn2
481 sub a2, a2, a3 # subtract once if dividend >= divisor
482 .Lreturn2:
483 bgez a7, .Lpositive2
484 neg a2, a2 # if (dividend < 0), return -udividend
485 .Lpositive2:
486 abi_return
488 .Lle_one:
489 # udivisor is either 0 or 1, so just return 0.
490 # someday we may want to throw an exception if udivisor is 0.
491 movi a2, 0
492 abi_return
493 .size __modsi3,.-__modsi3
495 #endif /* L_modsi3 */