1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2008 by Jens Arnold
11 * Copyright (C) 2009 by Andrew Mahone
13 * Optimised replacements for libgcc functions
15 * Based on: libgcc routines for ARM cpu, additional algorithms from ARM System
17 * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
18 * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
19 * Free Software Foundation, Inc.
21 * This program is free software; you can redistribute it and/or
22 * modify it under the terms of the GNU General Public License
23 * as published by the Free Software Foundation; either version 2
24 * of the License, or (at your option) any later version.
26 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
27 * KIND, either express or implied.
29 ****************************************************************************/
33 .macro ARM_SDIV32_PRE numerator, divisor, sign
34 /* sign[31] = divisor sign */
35 ands \sign, \divisor, #1<<31
36 rsbeq \divisor, \divisor, #0
37 /* sign[31] = result sign, sign[0:30], C = numerator sign */
38 eors \sign, \sign, \numerator, asr #32
39 rsbcs \numerator, \numerator, #0
42 .macro ARM_SDIV32_POST quotient, remainder, sign
43 movs \sign, \sign, lsl #1
45 rsbcs \quotient, \quotient, #0
47 .ifnc "", "\remainder"
48 rsbmi \remainder, \remainder, #0
53 .macro ARMV4_UDIV32_BODY numerator, divisor, quotient, remainder, tmp, bits, div0label, return
54 .ifnc "", "\div0label"
55 rsbs \divisor, \divisor, #0
58 rsb \divisor, \divisor, #0
60 /* This SWAR divider requires a numerator less than 1<<31, because it must
61 be able to shift the remainder left at each step without shifting out
62 topmost bit. Since a shift might be needed for the aligned remainder to
63 exceed the divisor, the topmost bit must be unset at the start to avoid
64 this overflow case. The original numerator is saved so that the result
65 can be corrected after the reduced division completes. */
66 cmn \numerator, \divisor
68 .ifc "\numerator", "\remainder"
81 movmi \numerator, \numerator, lsr #1
85 cmn \divisor, \numerator, lsr #shift
86 subcs \bits, \bits, #shift
87 movcs \divisor, \divisor, lsl #shift
88 .set shift, shift >> 1
90 adds \numerator, \numerator, \divisor
91 subcc \numerator, \numerator, \divisor
92 add pc, pc, \bits, lsl #3
95 adcs \numerator, \divisor, \numerator, lsl #1
96 subcc \numerator, \numerator, \divisor
98 adc \numerator, \numerator, \numerator
99 movs \tmp, \tmp, asr #1
100 rsb \bits, \bits, #31
103 mov \remainder, \numerator, lsr \bits
105 .ifc "", "\remainder"
106 mov \divisor, \numerator, lsr \bits
107 eor \quotient, \numerator, \divisor, lsl \bits
109 mov \remainder, \numerator, lsr \bits
110 eor \quotient, \numerator, \remainder, lsl \bits
119 mov \tmp, \numerator, lsr \bits
120 eor \numerator, \numerator, \tmp, lsl \bits
123 adds \tmp, \tmp, \divisor, asr \bits
124 .ifnc "", "\quotient"
125 adc \quotient, \numerator, \numerator
127 .ifnc "", "\remainder"
128 subcc \remainder, \tmp, \divisor, asr \bits
129 movcs \remainder, \tmp
137 .ifnc "", "\remainder"
138 .ifnc "\remainder", "\numerator"
139 mov \remainder, \numerator
142 .ifnc "", "\quotient"
152 .macro ARMV4_SDIV32_BODY numerator, divisor, quotient, remainder, bits, sign, div0label, return
153 /* When this is wrapped for signed division, the wrapper code will handle
154 inverting the divisor, and also the zero divisor test. */
155 ARM_SDIV32_PRE \numerator, \divisor, \sign
156 .ifnc "", "\div0label"
157 tst \divisor, \divisor
160 /* This SWAR divider requires a numerator less than 1<<31, because it must
161 be able to shift the remainder left at each step without shifting out
162 topmost bit. With signed inputs, whose absolute value may not exceed
163 1<<31,this may be accomplished simply by subtracting the divisor before
164 beginning division, and adding 1 to the quotient. */
165 adds \numerator, \numerator, \divisor
170 cmn \divisor, \numerator, lsr #shift
171 subcs \bits, \bits, #shift
172 movcs \divisor, \divisor, lsl #shift
173 .set shift, shift >> 1
175 adds \numerator, \numerator, \divisor
176 subcc \numerator, \numerator, \divisor
177 add pc, pc, \bits, lsl #3
180 adcs \numerator, \divisor, \numerator, lsl #1
181 subcc \numerator, \numerator, \divisor
183 rsb \bits, \bits, #31
184 adc \numerator, \numerator, \numerator
186 mov \remainder, \numerator, lsr \bits
188 .ifc "", "\remainder"
189 mov \divisor, \numerator, lsr \bits
190 add \numerator, \numerator, #1
191 sub \quotient, \numerator, \divisor, lsl \bits
193 mov \remainder, \numerator, lsr \bits
194 add \numerator, \numerator, #1
195 sub \quotient, \numerator, \remainder, lsl \bits
199 ARM_SDIV32_POST \quotient, \remainder, \sign
205 .ifnc "", "\remainder"
206 sub \remainder, \numerator, \divisor
208 .ifnc "", "\quotient"
212 ARM_SDIV32_POST "", \remainder, \sign
216 ARM_SDIV32_POST \quotient, \remainder, \sign
221 .macro ARMV5_UDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, div0label, return
222 cmp \numerator, \divisor
225 mov \inv, \divisor, lsl \bits
226 add \neg, pc, \inv, lsr #25
227 /* Test whether divisor is 2^N */
229 /* Load approximate reciprocal */
230 ldrhib \inv, [\neg, #.L_udiv_est_table-.-64]
232 subs \bits, \bits, #7
233 rsb \neg, \divisor, #0
234 /* Scale approximate reciprocal, or else branch to large-divisor path */
235 movpl \divisor, \inv, lsl \bits
237 /* Newton-Raphson iteration to improve reciprocal accuracy */
238 mul \inv, \divisor, \neg
239 smlawt \divisor, \divisor, \inv, \divisor
240 mul \inv, \divisor, \neg
241 /* Complete N-R math and produce approximate quotient. Use smmla/smmul on
244 tst \numerator, \numerator
245 smmla \divisor, \divisor, \inv, \divisor
246 /* Branch to large-numerator handler, or else use smmul if sign bit is not
247 set. This wins on average with random numerators, and should be no
248 slower than using umull for small numerator, even if prediction fails.
251 smmul \inv, \numerator, \divisor
253 /* ARMv5e lacks smmul, so always uses umull. */
255 smlal \bits, \divisor, \inv, \divisor
256 umull \bits, \inv, \numerator, \divisor
258 /* Calculate remainder and correct result. */
259 add \numerator, \numerator, \neg
260 .ifnc "", "\remainder"
261 mla \remainder, \inv, \neg, \numerator
262 .ifnc "", "\quotient"
265 subcs \remainder, \remainder, \neg
266 addpl \remainder, \remainder, \neg, lsl #1
267 addcc \quotient, \quotient, #1
268 addpl \quotient, \quotient, #2
271 subcs \remainder, \remainder, \neg
272 addpl \remainder, \remainder, \neg, lsl #1
275 mla \divisor, \inv, \neg, \numerator
278 addcc \quotient, \quotient, #1
279 addpl \quotient, \quotient, #2
287 /* Very large divisors can be handled without further improving the
288 reciprocal. First the reciprocal must be reduced to ensure that it
289 underestimates the correct value. */
292 mov \divisor, \inv, lsr \bits
293 /* Calculate approximate quotient and remainder */
294 umull \bits, \inv, \numerator, \divisor
295 /* Correct quotient and remainder */
296 .ifnc "", "\remainder"
297 mla \remainder, \inv, \neg, \numerator
298 .ifnc "", "\quotient"
300 cmn \neg, \remainder, lsr #1
301 addcs \remainder, \remainder, \neg, lsl #1
302 addcs \quotient, \quotient, #2
304 addcs \remainder, \remainder, \neg
305 addcs \quotient, \quotient, #1
307 cmn \neg, \remainder, lsr #1
308 addcs \remainder, \remainder, \neg, lsl #1
310 addcs \remainder, \remainder, \neg
313 mla \divisor, \inv, \neg, \numerator
315 cmn \neg, \divisor, lsr #1
316 addcs \divisor, \divisor, \neg, lsl #1
317 addcs \quotient, \quotient, #2
319 addcs \quotient, \quotient, #1
327 /* Handle division by powers of two by shifting right. Mod is handled
328 by using divisor-1 as a bitmask. */
329 .ifnc "", "\remainder"
330 .ifnc "", "\div0label"
333 .ifnc "", "\quotient"
334 sub \divisor, \divisor, #1
335 rsb \bits, \bits, #31
336 and \remainder, \numerator, \divisor
337 mov \quotient, \numerator, lsr \bits
339 sub \divisor, \divisor, #1
340 and \remainder, \numerator, \divisor
343 rsb \bits, \bits, #31
344 .ifnc "", "\div0label"
347 mov \quotient, \numerator, lsr \bits
355 /* Handle numerator < divisor - quotient is zero, remainder is numerator,
356 which must be restored to its original value on ARMv6. */
357 .ifnc "", "\remainder"
358 mov \remainder, \numerator
360 .ifnc "", "\quotient"
368 /* Handle large (sign bit set) numerators. Works exactly as the ARMv5e code
370 umull \bits, \inv, \numerator, \divisor
371 add \numerator, \numerator, \neg
372 .ifnc "", "\remainder"
373 mla \remainder, \inv, \neg, \numerator
374 .ifnc "", "\quotient"
375 mla \remainder, \inv, \neg, \numerator
378 subcs \remainder, \remainder, \neg
379 addpl \remainder, \remainder, \neg, lsl #1
380 addcc \quotient, \quotient, #1
381 addpl \quotient, \quotient, #2
384 subcs \remainder, \remainder, \neg
385 addpl \remainder, \remainder, \neg, lsl #1
388 mla \divisor, \inv, \neg, \numerator
391 addcc \quotient, \quotient, #1
392 addpl \quotient, \quotient, #2
403 .macro ARMV5_SDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, sign, div0label, return
404 /* sign[31] = divisor sign */
405 ands \sign, \divisor, #1<<31
406 rsbne \divisor, \divisor, #0
407 /* sign[31] = result sign, sign[0:30], C = numerator sign */
408 eors \sign, \sign, \numerator, asr #32
410 rsbcs \numerator, \numerator, #0
411 /* On ARMv6, subtract divisor before performing division, which ensures
412 numerator sign bit is clear and smmul may be used in place of umull. The
413 fixup for the results can be fit entirely into existing delay slots on
414 the main division paths. It costs 1c in the num<div path if the
415 the remainder is to be produced in the numerator's register, and 1c in
416 the power-of-2-divisor path only if producing both remainder and
419 subs \numerator, \numerator, \divisor
421 cmp \numerator, \divisor
423 movcs \inv, \divisor, lsl \bits
425 /* Test whether divisor is 2^N */
427 add \inv, pc, \inv, lsr #25
429 /* Load approximate reciprocal */
430 ldrb \inv, [\inv, #.L_udiv_est_table-.-64]
431 subs \bits, \bits, #7
432 rsb \neg, \divisor, #0
433 /* Scale approximate reciprocal, or else branch to large-divisor path */
434 movpl \divisor, \inv, lsl \bits
436 /* Newton-Raphson iteration to improve reciprocal accuracy */
437 mul \inv, \divisor, \neg
438 smlawt \divisor, \divisor, \inv, \divisor
439 mul \inv, \divisor, \neg
440 /* Complete N-R math and produce approximate quotient. Use smmla/smmul on
443 smmla \divisor, \divisor, \inv, \divisor
444 smmul \inv, \numerator, \divisor
447 smlal \bits, \divisor, \inv, \divisor
448 umull \bits, \inv, \numerator, \divisor
450 /* Calculate remainder and correct quotient. */
451 add \numerator, \numerator, \neg
452 .ifnc "", "\remainder"
453 mla \remainder, \inv, \neg, \numerator
454 .ifnc "", "\quotient"
456 add \quotient, \inv, #1
461 subcs \remainder, \remainder, \neg
462 addpl \remainder, \remainder, \neg, lsl #1
463 addcc \quotient, \quotient, #1
464 addpl \quotient, \quotient, #2
467 subcs \remainder, \remainder, \neg
468 addpl \remainder, \remainder, \neg, lsl #1
471 mla \divisor, \inv, \neg, \numerator
473 add \quotient, \inv, #1
478 addcc \quotient, \quotient, #1
479 addpl \quotient, \quotient, #2
481 ARM_SDIV32_POST \quotient, \remainder, \sign
488 /* Very large divisors can be handled without further improving the
489 reciprocal. First the reciprocal must be reduced to ensure that it
490 underestimates the correct value. */
493 mov \divisor, \inv, lsr \bits
494 /* Calculate approximate quotient and remainder */
496 smmul \inv, \numerator, \divisor
498 umull \bits, \inv, \numerator, \divisor
500 /* Correct quotient and remainder */
501 .ifnc "", "\remainder"
502 mla \remainder, \inv, \neg, \numerator
503 .ifnc "", "\quotient"
505 add \quotient, \inv, #1
509 cmn \neg, \remainder, lsr #1
510 addcs \remainder, \remainder, \neg, lsl #1
511 addcs \quotient, \quotient, #2
513 addcs \remainder, \remainder, \neg
514 addcs \quotient, \quotient, #1
516 cmn \neg, \remainder, lsr #1
517 addcs \remainder, \remainder, \neg, lsl #1
519 addcs \remainder, \remainder, \neg
522 mla \divisor, \inv, \neg, \numerator
524 add \quotient, \inv, #1
528 cmn \neg, \divisor, lsr #1
529 addcs \divisor, \divisor, \neg, lsl #1
530 addcs \quotient, \quotient, #2
532 addcs \quotient, \quotient, #1
534 ARM_SDIV32_POST \quotient, \remainder, \sign
541 /* Handle division by powers of two by shifting right. Mod is handled
542 by using divisor-1 as a bitmask. */
543 .ifnc "", "\div0label"
546 .ifnc "", "\remainder"
547 .ifnc "", "\quotient"
548 rsb \bits, \bits, #31
550 add \numerator, \numerator, \divisor
552 sub \divisor, \divisor, #1
553 and \remainder, \numerator, \divisor
554 mov \quotient, \numerator, lsr \bits
556 sub \divisor, \divisor, #1
557 and \remainder, \numerator, \divisor
560 rsb \bits, \bits, #31
562 add \numerator, \numerator, \divisor
564 mov \quotient, \numerator, lsr \bits
566 ARM_SDIV32_POST \quotient, \remainder, \sign
573 /* Handle numerator < divisor - quotient is zero, remainder is numerator,
574 which must be restored to its original value on ARMv6. */
575 .ifnc "", "\remainder"
577 add \remainder, \numerator, \divisor
579 .ifnc "\remainder", "\numerator"
580 mov \remainder, \numerator
584 .ifnc "", "\quotient"
587 .ifnc "", "\remainder"
588 ARM_SDIV32_POST "", \remainder, \sign
603 .size __div0_wrap_s, . - __div0_wrap_s
609 .size __div0_wrap, . - __div0_wrap
613 .type __divsi3,%function
615 .type __udivsi3,%function
617 .type __udivsi3,%function
619 /* The div+mod averagess a fraction of a cycle worse for signed values, and
620 slightly better for unsigned, so just alias div to divmod. */
621 .global __aeabi_uidivmod
622 .type __aeabi_uidivmod,%function
623 .global __aeabi_uidiv
624 .type __aeabi_uidiv,%function
625 .set __aeabi_uidiv,__aeabi_uidivmod
626 .global __aeabi_idivmod
627 .type __aeabi_idivmod,%function
629 .type __aeabi_idiv,%function
630 .set __aeabi_idiv,__aeabi_idivmod
636 .type __clzsi2, %function
639 orr r0, r0, r0, lsr #8
640 orr r0, r0, r0, lsr #4
641 orr r0, r0, r0, lsr #2
642 orr r0, r0, r0, lsr #1
643 bic r0, r0, r0, lsr #16
644 rsb r0, r0, r0, lsl #14
645 rsb r0, r0, r0, lsl #11
646 rsb r0, r0, r0, lsl #9
647 ldrb r0, [pc, r0, lsr #26]
649 .byte 32, 20, 19, 0, 0, 18, 0, 7, 10, 17, 0, 0, 14, 0, 6, 0
650 .byte 0, 9, 0, 16, 0, 0, 1, 26, 0, 13, 0, 0, 24, 5, 0, 0
651 .byte 0, 21, 0, 8, 11, 0, 15, 0, 0, 0, 0, 2, 27, 0, 25, 0
652 .byte 22, 0, 12, 0, 0, 3, 28, 0, 23, 0, 4, 29, 0, 0, 30, 31
653 .size __clzsi2, .-__clzsi2
657 ARMV4_UDIV32_BODY r0, r1, r0, "", r2, r3, __div0_wrap, 1
658 .size __udivsi3, . - __udivsi3
661 ARMV4_SDIV32_BODY r0, r1, r0, "", r2, r3, __div0_wrap, 1
662 .size __divsi3, . - __divsi3
666 ARMV4_UDIV32_BODY r0, r1, r0, r1, r2, r3, __div0_wrap, 1
667 .size __aeabi_uidivmod, . - __aeabi_uidivmo
670 ARMV4_SDIV32_BODY r0, r1, r0, r1, r2, r3, __div0_wrap, 1
671 .size __aeabi_idivmod, . - __aeabi_idivmod
677 ARMV5_UDIV32_BODY r0, r1, r0, "", r2, r3, ip, __div0_wrap, 1
678 .size __udivsi3, . - __udivsi3
682 ARMV5_SDIV32_BODY r0, r1, r0, "", r2, lr, ip, r3, __div0_wrap_s, "ldr pc, [sp, #-4]"
683 .size __divsi3, . - __divsi3
687 ARMV5_UDIV32_BODY r0, r1, r0, r1, r2, r3, ip, __div0_wrap, 1
688 .size __aeabi_uidivmod, . - __aeabi_uidivmo
692 ARMV5_SDIV32_BODY r0, r1, r0, r1, r2, lr, ip, r3, __div0_wrap_s, "ldr pc, [sp, #-4]"
693 .size __aeabi_idivmod, . - __aeabi_idivmod
697 .byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6
698 .byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf
699 .byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc
700 .byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac
701 .byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f
702 .byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93
703 .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89
704 .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81