apps/codecs/lib/udiv32_armv4.S

   1 /***************************************************************************
   2  *             __________               __   ___.
   3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7  *                     \/            \/     \/    \/            \/
   8  * $Id$
   9  *
  10  * Copyright (C) 2008 by Jens Arnold
  11  *
  12  * Optimised unsigned integer division for ARMv4
  13  *
  14  * Based on: libgcc routines for ARM cpu.
  15  * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
  16  * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
  17  * Free Software Foundation, Inc.
  18  *
  19  * This program is free software; you can redistribute it and/or
  20  * modify it under the terms of the GNU General Public License
  21  * as published by the Free Software Foundation; either version 2
  22  * of the License, or (at your option) any later version.
  23  *
  24  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  25  * KIND, either express or implied.
  26  *
  27  ****************************************************************************/
  28
  29 #include "config.h"
  30 /* Codecs should not normally do this, but we need to check a macro, and
  31  * codecs.h would confuse the assembler. */
  32
  33 .macro ARM_DIV_BODY dividend, divisor, result, curbit
  34
  35     mov     \result, \dividend
  36     mov     \curbit, #90          @ 3 * 30, (calculating branch dest)
  37     cmp     \divisor, \result, lsr #16
  38     movls   \result,\result, lsr #16
  39     subls   \curbit, \curbit, #48
  40     cmp     \divisor, \result, lsr #8
  41     movls   \result,\result, lsr #8
  42     subls   \curbit, \curbit, #24
  43     cmp     \divisor, \result, lsr #4
  44     movls   \result,\result, lsr #4
  45     subls   \curbit, \curbit, #12
  46     cmp     \divisor, \result, lsr #2
  47     subls   \curbit, \curbit, #6
  48     @ Calculation is only done down to shift=2, because the shift=1 step
  49     @ would need 3 more cycles, but would only gain 1.5 cycles on average.
  50     mov     \result, #0
  51     add     pc, pc, \curbit, lsl #2
  52     nop
  53     .set    shift, 32
  54     .rept   31
  55     .set    shift, shift - 1
  56     cmp     \divisor, \dividend, lsr #shift
  57     orrls   \result, \result, #(1 << shift)
  58     subls   \dividend, \dividend, \divisor, lsl #shift
  59     .endr   @ shift==0 in the .rept would cause a warning  for lsr #0
  60     cmp     \divisor, \dividend
  61     orrls   \result, \result, #1
  62     @subls  \dividend, \dividend, \divisor  @ correct remainder not needed
  63 .endm
  64
  65 .macro ARM_DIV2_ORDER divisor, order
  66
  67     @ There's exactly one bit set in the divisor, so ffs() can be used
  68     @ This is the ffs algorithm devised by D.Seal and posted to
  69     @ comp.sys.arm on 16 Feb 1994.
  70     adr     \order, L_ffs_table
  71     orr     \divisor, \divisor, \divisor, lsl #4   @  = X * 0x11
  72     orr     \divisor, \divisor, \divisor, lsl #6   @  = X * 0x451
  73     rsb     \divisor, \divisor, \divisor, lsl #16  @  = X * 0x0450fbaf
  74
  75     ldrb    \order, [\order, \divisor, lsr #26]
  76 .endm
  77
  78
  79 #ifdef USE_IRAM
  80     .section    .icode,"ax",%progbits
  81 #else
  82     .text
  83 #endif
  84     .align
  85     .global udiv32_arm
  86     .type   udiv32_arm,%function
  87
  88 udiv32_arm:
  89     subs    r2, r1, #1
  90     bxeq    lr
  91     bcc     20f
  92     cmp     r0, r1
  93     bls     10f
  94     tst     r1, r2
  95     beq     30f
  96
  97     ARM_DIV_BODY r0, r1, r2, r3
  98     mov     r0, r2
  99     bx      lr
 100
 101 10:
 102     moveq   r0, #1
 103 20:
 104     movne   r0, #0
 105     bx      lr
 106
 107 30:
 108     ARM_DIV2_ORDER r1, r2
 109     mov     r0, r0, lsr r2
 110     bx      lr
 111
 112 L_ffs_table:
 113     @        0   1   2   3   4   5   6   7
 114     @----------------------------------------------
 115     .byte   32,  0,  1, 12,  2,  6,  0, 13  @  0- 7
 116     .byte    3,  0,  7,  0,  0,  0,  0, 14  @  8-15
 117     .byte   10,  4,  0,  0,  8,  0,  0, 25  @ 16-23
 118     .byte    0,  0,  0,  0,  0, 21, 27, 15  @ 24-31
 119     .byte   31, 11,  5,  0,  0,  0,  0,  0  @ 32-39
 120     .byte    9,  0,  0, 24,  0,  0, 20, 26  @ 40-47
 121     .byte   30,  0,  0,  0,  0, 23,  0, 19  @ 48-55
 122     .byte   29,  0, 22, 18, 28, 17, 16,  0  @ 56-63