source/libs/gmp/gmp-src/mpn/ia64/invert_limb.asm

   1 dnl  IA-64 mpn_invert_limb -- Invert a normalized limb.
   2
   3 dnl  Contributed to the GNU project by Torbjorn Granlund and Kevin Ryde.
   4
   5 dnl  Copyright 2000, 2002, 2004 Free Software Foundation, Inc.
   6
   7 dnl  This file is part of the GNU MP Library.
   8 dnl
   9 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
  10 dnl  it under the terms of either:
  11 dnl
  12 dnl    * the GNU Lesser General Public License as published by the Free
  13 dnl      Software Foundation; either version 3 of the License, or (at your
  14 dnl      option) any later version.
  15 dnl
  16 dnl  or
  17 dnl
  18 dnl    * the GNU General Public License as published by the Free Software
  19 dnl      Foundation; either version 2 of the License, or (at your option) any
  20 dnl      later version.
  21 dnl
  22 dnl  or both in parallel, as here.
  23 dnl
  24 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
  25 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  26 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  27 dnl  for more details.
  28 dnl
  29 dnl  You should have received copies of the GNU General Public License and the
  30 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
  31 dnl  see https://www.gnu.org/licenses/.
  32
  33 include(`../config.m4')
  34
  35 C INPUT PARAMETERS
  36 C d = r32
  37
  38 C           cycles
  39 C Itanium:    74
  40 C Itanium 2:  50+6
  41
  42 C It should be possible to avoid the xmpy.hu and the following tests by
  43 C explicitly chopping in the last fma.  That would save about 10 cycles.
  44
  45 ASM_START()
  46         .sdata
  47         .align 16
  48 ifdef(`HAVE_DOUBLE_IEEE_LITTLE_ENDIAN',`
  49 .LC0:   data4 0x00000000, 0x80000000, 0x0000403f, 0x00000000    C 2^64
  50 .LC1:   data4 0x00000000, 0x80000000, 0x0000407f, 0x00000000    C 2^128
  51
  52 ',`ifdef(`HAVE_DOUBLE_IEEE_BIG_ENDIAN',`
  53 .LC0:   data4 0x403f8000, 0x00000000, 0x00000000, 0x00000000    C 2^64
  54 .LC1:   data4 0x407f8000, 0x00000000, 0x00000000, 0x00000000    C 2^128
  55
  56 ',`m4_error(`Oops, need to know float endianness
  57 ')')')
  58
  59
  60 PROLOGUE(mpn_invert_limb)
  61                 C 00
  62         addl            r14 = @gprel(.LC0), gp
  63         addl            r15 = @gprel(.LC1), gp
  64         setf.sig        f7 = r32
  65         add             r9 = r32, r32           C check for d = 2^63
  66         ;;      C 01
  67         ldfe            f10 = [r14]             C 2^64
  68         ldfe            f8 = [r15]              C 2^128
  69         cmp.eq          p6, p0 = 0, r9          C check for d = 2^63
  70         mov             r8 = -1                 C retval for 2^63
  71    (p6) br.ret.spnt.many b0
  72         ;;      C 07
  73         fmpy.s1         f11 = f7, f10           C f11 = d * 2^64
  74         fnma.s1         f6 = f7, f10, f8        C f6 = 2^128 - d * 2^64
  75         ;;      C 11
  76         frcpa.s1        f8, p6 = f6, f7
  77         ;;      C 15
  78    (p6) fnma.s1         f9 = f7, f8, f1
  79    (p6) fmpy.s1         f10 = f6, f8
  80         ;;      C 19
  81    (p6) fmpy.s1         f11 = f9, f9
  82    (p6) fma.s1          f10 = f9, f10, f10
  83         ;;      C 23
  84    (p6) fma.s1          f8 = f9, f8, f8
  85    (p6) fma.s1          f9 = f11, f10, f10
  86         ;;      C 27
  87    (p6) fma.s1          f8 = f11, f8, f8
  88    (p6) fnma.s1         f10 = f7, f9, f6
  89         ;;      C 31
  90    (p6) fma.s1          f8 = f10, f8, f9
  91         ;;      C 35
  92         fcvt.fxu.trunc.s1 f8 = f8
  93         ;;      C 39
  94         getf.sig        r8 = f8
  95         xmpy.hu         f10 = f8, f7            C di * d
  96         ;;      C 43
  97         getf.sig        r14 = f10
  98         andcm           r9 = -1, r32            C one's complement
  99         ;;      C 48
 100         cmp.ltu         p6, p0 = r9, r14        C got overflow?
 101         ;;      C 49
 102    (p6) add             r8 = -1, r8             C adjust di down
 103         br.ret.sptk.many b0
 104 EPILOGUE()
 105 ASM_END()