source/libs/gmp/gmp-src/mpn/powerpc64/mode64/invert_limb.asm

   1 dnl  PowerPC-64 mpn_invert_limb -- Invert a normalized limb.
   2
   3 dnl  Copyright 2004-2006, 2008, 2010, 2013 Free Software Foundation, Inc.
   4
   5 dnl  This file is part of the GNU MP Library.
   6 dnl
   7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
   8 dnl  it under the terms of either:
   9 dnl
  10 dnl    * the GNU Lesser General Public License as published by the Free
  11 dnl      Software Foundation; either version 3 of the License, or (at your
  12 dnl      option) any later version.
  13 dnl
  14 dnl  or
  15 dnl
  16 dnl    * the GNU General Public License as published by the Free Software
  17 dnl      Foundation; either version 2 of the License, or (at your option) any
  18 dnl      later version.
  19 dnl
  20 dnl  or both in parallel, as here.
  21 dnl
  22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
  23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  25 dnl  for more details.
  26 dnl
  27 dnl  You should have received copies of the GNU General Public License and the
  28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
  29 dnl  see https://www.gnu.org/licenses/.
  30
  31 include(`../config.m4')
  32
  33 C                  cycles/limb (approximate)
  34 C POWER3/PPC630         80
  35 C POWER4/PPC970         86
  36 C POWER5                86
  37 C POWER6               170
  38 C POWER7                66
  39
  40 ASM_START()
  41 PROLOGUE(mpn_invert_limb,toc)
  42         LEAL(   r12, approx_tab)
  43         srdi    r9, r3, 32
  44         rlwinm  r9, r9, 10, 23, 30      C (d >> 55) & 0x1fe
  45         srdi    r10, r3, 24             C d >> 24
  46         lis     r11, 0x1000
  47         rldicl  r8, r3, 0, 63           C d mod 2
  48         addi    r10, r10, 1             C d40
  49         sldi    r11, r11, 32            C 2^60
  50         srdi    r7, r3, 1               C d/2
  51         add     r7, r7, r8              C d63 = ceil(d/2)
  52         neg     r8, r8                  C mask = -(d mod 2)
  53         lhzx    r0, r9, r12
  54         mullw   r9, r0, r0              C v0*v0
  55         sldi    r6, r0, 11              C v0 << 11
  56         addi    r0, r6, -1              C (v0 << 11) - 1
  57         mulld   r9, r9, r10             C v0*v0*d40
  58         srdi    r9, r9, 40              C v0*v0*d40 >> 40
  59         subf    r9, r9, r0              C v1 = (v0 << 11) - (v0*v0*d40 >> 40) - 1
  60         mulld   r0, r9, r10             C v1*d40
  61         sldi    r6, r9, 13              C v1 << 13
  62         subf    r0, r0, r11             C 2^60 - v1*d40
  63         mulld   r0, r0, r9              C v1 * (2^60 - v1*d40)
  64         srdi    r0, r0, 47              C v1 * (2^60 - v1*d40) >> 47
  65         add     r0, r0, r6              C v2 = (v1 << 13) + (v1 * (2^60 - v1*d40) >> 47)
  66         mulld   r11, r0, r7             C v2 * d63
  67         srdi    r10, r0, 1              C v2 >> 1
  68         sldi    r9, r0, 31              C v2 << 31
  69         and     r8, r10, r8             C (v2 >> 1) & mask
  70         subf    r8, r11, r8             C ((v2 >> 1) & mask) - v2 * d63
  71         mulhdu  r0, r8, r0              C p1 = v2 * (((v2 >> 1) & mask) - v2 * d63)
  72         srdi    r0, r0, 1               C p1 >> 1
  73         add     r0, r0, r9              C v3 = (v2 << 31) + (p1 >> 1)
  74         nop
  75         mulld   r11, r0, r3
  76         mulhdu  r9, r0, r3
  77         addc    r10, r11, r3
  78         adde    r3, r9, r3
  79         subf    r3, r3, r0
  80         blr
  81 EPILOGUE()
  82
  83 DEF_OBJECT(approx_tab)
  84 forloop(i,256,512-1,dnl
  85 `       .short  eval(0x7fd00/i)
  86 ')dnl
  87 END_OBJECT(approx_tab)
  88 ASM_END()