1 dnl x86 mpn_invert_limb
3 dnl Contributed to the GNU project by Niels Möller
5 dnl Copyright
2009, 2011, 2015 Free Software Foundation
, Inc.
7 dnl
This file is part of the GNU MP Library.
9 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
10 dnl it under the terms of
either:
12 dnl
* the GNU Lesser General
Public License as published by the Free
13 dnl Software Foundation
; either version 3 of the License, or (at your
14 dnl option
) any later version.
18 dnl
* the GNU General
Public License as published by the Free Software
19 dnl Foundation
; either version 2 of the License, or (at your option) any
22 dnl
or both
in parallel
, as here.
24 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
25 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
26 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
29 dnl You should have received copies of the GNU General
Public License
and the
30 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
31 dnl see
https://www.gnu.
org/licenses
/.
33 include(`..
/config.m4
')
37 C P6 model 0-8,10-12 ?
38 C P6 model 9 (Banias) ?
39 C P6 model 13 (Dothan) ?
40 C P4 model 0 (Willamette) ?
42 C P4 model 2 (Northwood) ?
43 C P4 model 3 (Prescott) ?
44 C P4 model 4 (Nocona) ?
50 C * These c/l numbers are for a non-PIC build. Consider falling back to using
51 C the 'div' instruction for PIC builds.
52 C * Perhaps use this file--or at least the algorithm--for more machines than k7.
56 C Current approximation is in %eax and/or %ecx
57 C %ebx and %edx are temporaries
58 C %esi and %ebp are unused
60 defframe(PARAM_DIVISOR,4)
64 C Make approx_tab global to work around Apple relocation bug.
66 deflit
(`approx_tab
', MPN(invert_limb_tab))
71 PROLOGUE
(mpn_invert_limb
)
73 mov PARAM_DIVISOR, %eax
74 C Avoid push/pop on k7.
75 sub $8, %esp FRAME_subl_esp(8)
82 LEAL
( approx_tab
, %ebx)
83 movzwl
-1024(%ebx, %eax, 2), %eax
85 movzwl -1024+approx_tab(%eax, %eax), %eax C %eax = v0
88 C v1
= (v0
<< 4) - ((v0
*v0
*d_21
) >> 32) - 1
95 mov %edi, %ebx C Prepare
98 sub %eax, %ebx C
%ebx = d_31
, %eax = mask
101 sub %edx, %ecx C
%ecx = v1
103 C v_2
= (v1
<< 15) + ((v1
*(2^
48 - v1
* d31
+ (v1
>> 1) & mask)) >> 33)
109 mov %edi, %eax C Prepare for next
mul
112 add %edx, %ecx C
%ecx = v2
118 sub %edx, %eax C
%eax = v3
128 DEF_OBJECT
(approx_tab
,2)
129 .value
0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27
130 .value
0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d
131 .value
0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61
132 .value
0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894
133 .value
0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3
134 .value
0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520
135 .value
0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379
136 .value
0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de
137 .value
0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e
138 .value
0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8
139 .value
0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e
140 .value
0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd
141 .value
0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76
142 .value
0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918
143 .value
0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3
144 .value
0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676
145 .value
0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532
146 .value
0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5
147 .value
0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1
148 .value
0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193
149 .value
0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d
150 .value
0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d
151 .value
0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35
152 .value
0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22
153 .value
0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16
154 .value
0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10
155 .value
0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f
156 .value
0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914
157 .value
0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f
158 .value
0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e
159 .value
0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643
160 .value
0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d
161 .value
0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b
162 .value
0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e
163 .value
0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6
164 .value
0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1
165 .value
0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121
166 .value
0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056
167 .value
0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e
168 .value
0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca
169 .value
0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09
170 .value
0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d
171 .value
0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93
172 .value
0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde
173 .value
0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b
174 .value
0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c
175 .value
0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0
176 .value
0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927
177 .value
0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881
178 .value
0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de
179 .value
0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e
180 .value
0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1
181 .value
0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606
182 .value
0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e
183 .value
0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8
184 .value
0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445
185 .value
0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5
186 .value
0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327
187 .value
0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b
188 .value
0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211
189 .value
0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a
190 .value
0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104
191 .value
0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081
192 .value
0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000
193 END_OBJECT
(approx_tab
)