source/libs/gmp/gmp-src/mpn/arm/v6t2/divrem_1.asm

   1 dnl  ARM v6t2 mpn_divrem_1 and mpn_preinv_divrem_1.
   2
   3 dnl  Contributed to the GNU project by Torbjörn Granlund.
   4
   5 dnl  Copyright 2012 Free Software Foundation, Inc.
   6
   7 dnl  This file is part of the GNU MP Library.
   8 dnl
   9 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
  10 dnl  it under the terms of either:
  11 dnl
  12 dnl    * the GNU Lesser General Public License as published by the Free
  13 dnl      Software Foundation; either version 3 of the License, or (at your
  14 dnl      option) any later version.
  15 dnl
  16 dnl  or
  17 dnl
  18 dnl    * the GNU General Public License as published by the Free Software
  19 dnl      Foundation; either version 2 of the License, or (at your option) any
  20 dnl      later version.
  21 dnl
  22 dnl  or both in parallel, as here.
  23 dnl
  24 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
  25 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  26 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  27 dnl  for more details.
  28 dnl
  29 dnl  You should have received copies of the GNU General Public License and the
  30 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
  31 dnl  see https://www.gnu.org/licenses/.
  32
  33 include(`../config.m4')
  34
  35 C               norm    unorm   frac
  36 C StrongARM      -       -       -
  37 C XScale         -       -       -
  38 C Cortex-A7      ?       ?       ?
  39 C Cortex-A8      ?       ?       ?
  40 C Cortex-A9     13      14      13
  41 C Cortex-A15    11.4    11.8    11.1
  42
  43 C TODO
  44 C  * Optimise inner-loops better, they could likely run a cycle or two faster.
  45 C  * Decrease register usage, streamline non-loop code.
  46
  47 define(`qp_arg',  `r0')
  48 define(`fn',      `r1')
  49 define(`up_arg',  `r2')
  50 define(`n_arg',   `r3')
  51 define(`d_arg',   `0')
  52 define(`dinv_arg',`4')
  53 define(`cnt_arg', `8')
  54
  55 define(`n',       `r9')
  56 define(`qp',      `r5')
  57 define(`up',      `r6')
  58 define(`cnt',     `r7')
  59 define(`tnc',     `r10')
  60 define(`dinv',    `r0')
  61 define(`d',       `r4')
  62
  63 ASM_START()
  64 PROLOGUE(mpn_preinv_divrem_1)
  65         stmfd   sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
  66         ldr     d,    [sp, #9*4+d_arg]
  67         ldr     cnt,  [sp, #9*4+cnt_arg]
  68         str     r1, [sp, #9*4+d_arg]    C reuse d stack slot for fn
  69         sub     n, r3, #1
  70         add     r3, r1, n
  71         cmp     d, #0
  72         add     qp, qp_arg, r3, lsl #2  C put qp at Q[] end
  73         add     up, up_arg, n, lsl #2   C put up at U[] end
  74         ldr     dinv, [sp, #9*4+dinv_arg]
  75         blt     L(nent)
  76         b       L(uent)
  77 EPILOGUE()
  78
  79 PROLOGUE(mpn_divrem_1)
  80         stmfd   sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
  81         sub     n, r3, #1
  82         ldr     d, [sp, #9*4+d_arg]     C d
  83         str     r1, [sp, #9*4+d_arg]    C reuse d stack slot for fn
  84         add     r3, r1, n
  85         cmp     d, #0
  86         add     qp, qp_arg, r3, lsl #2  C put qp at Q[] end
  87         add     up, up_arg, n, lsl #2   C put up at U[] end
  88         blt     L(normalised)
  89
  90 L(unnorm):
  91         clz     cnt, d
  92         mov     r0, d, lsl cnt          C pass d << cnt
  93         bl      mpn_invert_limb
  94 L(uent):
  95         mov     d, d, lsl cnt           C d <<= cnt
  96         cmp     n, #0
  97         mov     r1, #0                  C r
  98         blt     L(frac)
  99
 100         ldr     r11, [up, #0]
 101
 102         rsb     tnc, cnt, #32
 103         mov     r1, r11, lsr tnc
 104         mov     r11, r11, lsl cnt
 105         beq     L(uend)
 106
 107         ldr     r3, [up, #-4]!
 108         orr     r2, r11, r3, lsr tnc
 109         b       L(mid)
 110
 111 L(utop):
 112         mls     r1, d, r8, r11
 113         mov     r11, r3, lsl cnt
 114         ldr     r3, [up, #-4]!
 115         cmp     r1, r2
 116         addhi   r1, r1, d
 117         subhi   r8, r8, #1
 118         orr     r2, r11, r3, lsr tnc
 119         cmp     r1, d
 120         bcs     L(ufx)
 121 L(uok): str     r8, [qp], #-4
 122 L(mid): add     r8, r1, #1
 123         mov     r11, r2
 124         umlal   r2, r8, r1, dinv
 125         subs    n, n, #1
 126         bne     L(utop)
 127
 128         mls     r1, d, r8, r11
 129         mov     r11, r3, lsl cnt
 130         cmp     r1, r2
 131         addhi   r1, r1, d
 132         subhi   r8, r8, #1
 133         cmp     r1, d
 134         rsbcs   r1, d, r1
 135         addcs   r8, r8, #1
 136         str     r8, [qp], #-4
 137
 138 L(uend):add     r8, r1, #1
 139         mov     r2, r11
 140         umlal   r2, r8, r1, dinv
 141         mls     r1, d, r8, r11
 142         cmp     r1, r2
 143         addhi   r1, r1, d
 144         subhi   r8, r8, #1
 145         cmp     r1, d
 146         rsbcs   r1, d, r1
 147         addcs   r8, r8, #1
 148         str     r8, [qp], #-4
 149 L(frac):
 150         ldr     r2, [sp, #9*4+d_arg]    C fn
 151         cmp     r2, #0
 152         beq     L(fend)
 153
 154 L(ftop):mov     r6, #0
 155         add     r3, r1, #1
 156         umlal   r6, r3, r1, dinv
 157         mov     r8, #0
 158         mls     r1, d, r3, r8
 159         cmp     r1, r6
 160         addhi   r1, r1, d
 161         subhi   r3, r3, #1
 162         subs    r2, r2, #1
 163         str     r3, [qp], #-4
 164         bne     L(ftop)
 165
 166 L(fend):mov     r11, r1, lsr cnt
 167 L(rtn): mov     r0, r11
 168         ldmfd   sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 169
 170 L(normalised):
 171         mov     r0, d
 172         bl      mpn_invert_limb
 173 L(nent):
 174         cmp     n, #0
 175         mov     r11, #0                 C r
 176         blt     L(nend)
 177
 178         ldr     r11, [up, #0]
 179         cmp     r11, d
 180         movlo   r2, #0                  C hi q limb
 181         movhs   r2, #1                  C hi q limb
 182         subhs   r11, r11, d
 183
 184         str     r2, [qp], #-4
 185         cmp     n, #0
 186         beq     L(nend)
 187
 188 L(ntop):ldr     r1, [up, #-4]!
 189         add     r12, r11, #1
 190         umlal   r1, r12, r11, dinv
 191         ldr     r3, [up, #0]
 192         mls     r11, d, r12, r3
 193         cmp     r11, r1
 194         addhi   r11, r11, d
 195         subhi   r12, r12, #1
 196         cmp     d, r11
 197         bls     L(nfx)
 198 L(nok): str     r12, [qp], #-4
 199         subs    n, n, #1
 200         bne     L(ntop)
 201
 202 L(nend):mov     r1, r11                 C r
 203         mov     cnt, #0                 C shift cnt
 204         b       L(frac)
 205
 206 L(nfx): add     r12, r12, #1
 207         rsb     r11, d, r11
 208         b       L(nok)
 209 L(ufx): rsb     r1, d, r1
 210         add     r8, r8, #1
 211         b       L(uok)
 212 EPILOGUE()