1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2008 by Jens Arnold
11 * Copyright (C) 2009 by Andrew Mahone
13 * Optimised unsigned integer division for ARMv4
15 * Based on: libgcc routines for ARM cpu, additional algorithms from ARM System
17 * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
18 * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
19 * Free Software Foundation, Inc.
21 * This program is free software; you can redistribute it and/or
22 * modify it under the terms of the GNU General Public License
23 * as published by the Free Software Foundation; either version 2
24 * of the License, or (at your option) any later version.
26 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
27 * KIND, either express or implied.
29 ****************************************************************************/
32 /* Codecs should not normally do this, but we need to check a macro, and
33 * codecs.h would confuse the assembler. */
37 .section .icode,"ax",%progbits
43 .type udiv32_arm,%function
46 /* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2)
47 for dividing a 30-bit value by a 15-bit value, with two operations per
48 iteration by storing quotient and remainder together and adding the previous
49 quotient bit during trial subtraction. Modified to work with any dividend
50 and divisor both less than 1 << 30, and skipping trials by calculating bits
52 .macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder
55 /* Shift the divisor left until it aligns with the numerator. If it already
56 has the high bit set, this is fine, everything inside .rept will be
57 skipped, and the add before and adcs after will set the one-bit result
59 cmn \divisor, \dividend, lsr #16
60 movcs \divisor, \divisor, lsl #16
61 addcs \bits, \bits, #16
62 cmn \divisor, \dividend, lsr #8
63 movcs \divisor, \divisor, lsl #8
64 addcs \bits, \bits, #8
65 cmn \divisor, \dividend, lsr #4
66 movcs \divisor, \divisor, lsl #4
67 addcs \bits, \bits, #4
68 cmn \divisor, \dividend, lsr #2
69 movcs \divisor, \divisor, lsl #2
70 addcs \bits, \bits, #2
71 cmn \divisor, \dividend, lsr #1
72 movcs \divisor, \divisor, lsl #1
73 addcs \bits, \bits, #1
74 adds \result, \dividend, \divisor
75 subcc \result, \result, \divisor
76 rsb \curbit, \bits, #31
77 add pc, pc, \curbit, lsl #3
80 adcs \result, \divisor, \result, lsl #1
81 /* Fix the remainder portion of the result. This must be done because the
82 handler for 32-bit numerators needs the remainder. */
83 subcc \result, \result, \divisor
85 /* Shift remainder/quotient left one, add final quotient bit */
86 adc \result, \result, \result
87 mov \remainder, \result, lsr \bits
88 eor \quotient, \result, \remainder, lsl \bits
92 #if CONFIG_CPU == PP5020
94 #elif CONFIG_CPU == PP5002
99 #elif CONFIG_CPU == AS3525
100 .set recip_max, 42752
101 #elif CONFIG_CPU == S5L8701
102 .set recip_max, 13184
103 #elif CONFIG_CPU == S5L8700
113 adr r3, .L_udiv_recip_table-12
114 ldr r2, [r3, r1, lsl #2]
129 /* Invert divisor. ARM_DIV_31_BODY uses adc to both subtract the divisor
130 and add the next bit of the result. The correction code at .L_udiv32
131 does not need the divisor inverted, but can be modified to work with it,
132 and this allows the zero divisor test to be done early and without an
133 explicit comparison. */
139 /* High bit must be unset, otherwise shift numerator right, calculate,
140 and correct results. As this case is very uncommon we want to avoid
141 any other delays on the main path in handling it, so the long divide
142 calls the short divide as a function. */
145 ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1
148 /* store original numerator and divisor, we'll need them to correct the
150 stmdb sp, { r0, r1, lr }
151 /* Call __div0 here if divisor is zero, otherwise it would report the wrong
155 ldmdb sp, { r2, r3, lr }
156 /* Move the low bit of the original numerator to the carry bit */
158 /* Shift the remainder left one and add in the carry bit */
160 /* Subtract the original divisor from the remainder, setting carry if the
161 result is non-negative */
163 /* Shift quotient left one and add carry bit */
167 /* __div0 expects the calling address on the top of the stack */
170 #if defined(__ARM_EABI__) || !defined(USE_IRAM)
181 .set q, 0x40000000 / div
182 .set r, (0x40000000 - (q * div))<<1
196 .set q, 0x40000000 / div * 4
202 .size udiv32_arm, . - udiv32_arm
205 .macro ARMV5_UDIV32_BODY numerator, divisor, quotient, bits, inv, neg, div0label
206 cmp \numerator, \divisor
209 mov \inv, \divisor, lsl \bits
210 add \neg, pc, \inv, lsr #25
212 ldrhib \inv, [\neg, #.L_udiv_est_table-.-64]
214 subs \bits, \bits, #7
215 rsb \neg, \divisor, #0
216 movpl \divisor, \inv, lsl \bits
218 mul \inv, \divisor, \neg
219 smlawt \divisor, \divisor, \inv, \divisor
220 mul \inv, \divisor, \neg
221 /* This will save a cycle on ARMv6, but does not produce a correct result
222 if numerator sign bit is set. This case accounts for about 1 in 10^7 of
223 divisions, done by the APE decoder, so we specialize for the more common
224 case and handle the uncommon large-numerator separately */
226 tst \numerator, \numerator
227 smmla \divisor, \divisor, \inv, \divisor
229 smmul \inv, \numerator, \divisor
232 smlal \bits, \divisor, \inv, \divisor
233 umull \bits, \inv, \numerator, \divisor
235 add \numerator, \numerator, \neg
236 mla \divisor, \inv, \neg, \numerator
239 addcc \quotient, \quotient, #1
240 addpl \quotient, \quotient, #2
245 mov \divisor, \inv, lsr \bits
246 umull \bits, \inv, \numerator, \divisor
247 mla \divisor, \inv, \neg, \numerator
249 cmn \neg, \divisor, lsr #1
250 addcs \divisor, \divisor, \neg, lsl #1
251 addcs \quotient, \quotient, #2
253 addcs \quotient, \quotient, #1
256 .ifnc "", "\div0label"
257 rsb \bits, \bits, #31
260 mov \quotient, \numerator, lsr \bits
267 umull \bits, \inv, \numerator, \divisor
268 add \numerator, \numerator, \neg
269 mla \divisor, \inv, \neg, \numerator
272 addcc \quotient, \quotient, #1
273 addpl \quotient, \quotient, #2
279 ARMV5_UDIV32_BODY r0, r1, r0, r2, r3, ip, .L_div0
281 /* __div0 expects the calling address on the top of the stack */
284 #if defined(__ARM_EABI__) || !defined(USE_IRAM)
291 .byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6
292 .byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf
293 .byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc
294 .byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac
295 .byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f
296 .byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93
297 .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89
298 .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81
300 .size udiv32_arm, . - udiv32_arm