1 /* x86_64 fat binary initializers.
3 Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
4 Torbjorn Granlund (port to x86_64)
6 THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.
7 THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
8 COMPLETELY IN FUTURE GNU MP RELEASES.
10 Copyright 2003, 2004, 2009, 2011-2015 Free Software Foundation, Inc.
12 This file is part of the GNU MP Library.
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
17 * the GNU Lesser General Public License as published by the Free
18 Software Foundation; either version 3 of the License, or (at your
19 option) any later version.
23 * the GNU General Public License as published by the Free Software
24 Foundation; either version 2 of the License, or (at your option) any
27 or both in parallel, as here.
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library. If not,
36 see https://www.gnu.org/licenses/. */
38 #include <stdio.h> /* for printf */
39 #include <stdlib.h> /* for getenv */
45 /* Change this to "#define TRACE(x) x" for some traces. */
50 long __gmpn_cpuid (char [12], int);
54 /* The "name"s in the table are values for the GMP_CPU_TYPE environment
55 variable. Anything can be used, but for now it's the canonical cpu types
56 as per config.guess/config.sub. */
58 #define __gmpn_cpuid fake_cpuid
60 #define MAKE_FMS(family, model) \
61 ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20) \
62 + (((model) & 0xf) << 4) + (((model) & 0xf0) << 12))
68 } fake_cpuid_table
[] = {
69 { "core2", "GenuineIntel", MAKE_FMS (6, 0xf) },
70 { "nehalem", "GenuineIntel", MAKE_FMS (6, 0x1a) },
71 { "nhm", "GenuineIntel", MAKE_FMS (6, 0x1a) },
72 { "atom", "GenuineIntel", MAKE_FMS (6, 0x1c) },
73 { "westmere", "GenuineIntel", MAKE_FMS (6, 0x25) },
74 { "wsm", "GenuineIntel", MAKE_FMS (6, 0x25) },
75 { "sandybridge","GenuineIntel", MAKE_FMS (6, 0x2a) },
76 { "sbr", "GenuineIntel", MAKE_FMS (6, 0x2a) },
77 { "silvermont", "GenuineIntel", MAKE_FMS (6, 0x37) },
78 { "slm", "GenuineIntel", MAKE_FMS (6, 0x37) },
79 { "haswell", "GenuineIntel", MAKE_FMS (6, 0x3c) },
80 { "hwl", "GenuineIntel", MAKE_FMS (6, 0x3c) },
81 { "broadwell", "GenuineIntel", MAKE_FMS (6, 0x3d) },
82 { "bwl", "GenuineIntel", MAKE_FMS (6, 0x3d) },
83 { "skylake", "GenuineIntel", MAKE_FMS (6, 0x5e) },
84 { "sky", "GenuineIntel", MAKE_FMS (6, 0x5e) },
85 { "pentium4", "GenuineIntel", MAKE_FMS (15, 3) },
87 { "k8", "AuthenticAMD", MAKE_FMS (15, 0) },
88 { "k10", "AuthenticAMD", MAKE_FMS (16, 0) },
89 { "bobcat", "AuthenticAMD", MAKE_FMS (20, 1) },
90 { "bulldozer", "AuthenticAMD", MAKE_FMS (21, 1) },
91 { "piledriver", "AuthenticAMD", MAKE_FMS (21, 2) },
92 { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) },
93 { "excavator", "AuthenticAMD", MAKE_FMS (21, 0x60) },
94 { "jaguar", "AuthenticAMD", MAKE_FMS (22, 1) },
96 { "nano", "CentaurHauls", MAKE_FMS (6, 15) },
100 fake_cpuid_lookup (void)
105 s
= getenv ("GMP_CPU_TYPE");
108 printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n");
112 for (i
= 0; i
< numberof (fake_cpuid_table
); i
++)
113 if (strcmp (s
, fake_cpuid_table
[i
].name
) == 0)
116 printf ("GMP_CPU_TYPE=%s unknown\n", s
);
121 fake_cpuid (char dst
[12], unsigned int id
)
123 int i
= fake_cpuid_lookup();
127 memcpy (dst
, fake_cpuid_table
[i
].vendor
, 12);
130 return fake_cpuid_table
[i
].fms
;
132 dst
[0] = 0xff; /* BMI1, AVX2, etc */
133 dst
[1] = 0xff; /* BMI2, etc */
136 dst
[4 + 29 / 8] = (1 << (29 % 8)); /* "long" mode */
139 printf ("fake_cpuid(): oops, unknown id %d\n", id
);
146 typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t
));
147 typedef DECL_preinv_mod_1 ((*preinv_mod_1_t
));
149 struct cpuvec_t __gmpn_cpuvec
= {
151 __MPN(addlsh1_n_init
),
152 __MPN(addlsh2_n_init
),
153 __MPN(addmul_1_init
),
154 __MPN(addmul_2_init
),
155 __MPN(bdiv_dbm1c_init
),
156 __MPN(cnd_add_n_init
),
157 __MPN(cnd_sub_n_init
),
161 __MPN(divexact_1_init
),
162 __MPN(divrem_1_init
),
167 __MPN(mod_1_1p_init
),
168 __MPN(mod_1_1p_cps_init
),
169 __MPN(mod_1s_2p_init
),
170 __MPN(mod_1s_2p_cps_init
),
171 __MPN(mod_1s_4p_init
),
172 __MPN(mod_1s_4p_cps_init
),
173 __MPN(mod_34lsub1_init
),
174 __MPN(modexact_1c_odd_init
),
176 __MPN(mul_basecase_init
),
177 __MPN(mullo_basecase_init
),
178 __MPN(preinv_divrem_1_init
),
179 __MPN(preinv_mod_1_init
),
183 __MPN(sqr_basecase_init
),
185 __MPN(sublsh1_n_init
),
186 __MPN(submul_1_init
),
190 int __gmpn_cpuvec_initialized
= 0;
192 /* The following setups start with generic x86, then overwrite with
193 specifics for a chip, and higher versions of that chip.
195 The arrangement of the setups here will normally be the same as the $path
196 selections in configure.in for the respective chips.
198 This code is reentrant and thread safe. We always calculate the same
199 decided_cpuvec, so if two copies of the code are running it doesn't
200 matter which completes first, both write the same to __gmpn_cpuvec.
202 We need to go via decided_cpuvec because if one thread has completed
203 __gmpn_cpuvec then it may be making use of the threshold values in that
204 vector. If another thread is still running __gmpn_cpuvec_init then we
205 don't want it to write different values to those fields since some of the
206 asm routines only operate correctly up to their own defined threshold,
207 not an arbitrary value. */
210 __gmpn_cpuvec_init (void)
212 struct cpuvec_t decided_cpuvec
;
213 char vendor_string
[13];
214 char dummy_string
[12];
218 TRACE (printf ("__gmpn_cpuvec_init:\n"));
220 memset (&decided_cpuvec
, '\0', sizeof (decided_cpuvec
));
225 __gmpn_cpuid (vendor_string
, 0);
226 vendor_string
[12] = 0;
228 fms
= __gmpn_cpuid (dummy_string
, 1);
229 family
= ((fms
>> 8) & 0xf) + ((fms
>> 20) & 0xff);
230 model
= ((fms
>> 4) & 0xf) + ((fms
>> 12) & 0xf0);
232 /* Check extended feature flags */
233 __gmpn_cpuid (dummy_string
, 0x80000001);
234 if ((dummy_string
[4 + 29 / 8] & (1 << (29 % 8))) == 0)
235 abort (); /* longmode-capable-bit turned off! */
237 /*********************************************************/
238 /*** WARNING: keep this list in sync with config.guess ***/
239 /*********************************************************/
240 if (strcmp (vendor_string
, "GenuineIntel") == 0)
247 case 0x0f: /* Conroe Merom Kentsfield Allendale */
255 case 0x17: /* PNR Wolfdale Yorkfield */
258 case 0x1d: /* PNR Dunnington */
262 case 0x1c: /* Atom Silverthorne */
263 case 0x26: /* Atom Lincroft */
264 case 0x27: /* Atom Saltwell? */
265 case 0x36: /* Atom Cedarview/Saltwell */
269 case 0x1a: /* NHM Gainestown */
271 case 0x1e: /* NHM Lynnfield/Jasper */
278 case 0x25: /* WSM Clarkdale/Arrandale */
282 case 0x2c: /* WSM Gulftown */
283 case 0x2e: /* NHM Beckton */
284 case 0x2f: /* WSM Eagleton */
285 case 0x37: /* Silvermont */
286 case 0x4a: /* Silvermont */
287 case 0x4c: /* Airmont */
288 case 0x4d: /* Silvermont/Avoton */
289 case 0x5a: /* Silvermont */
290 case 0x5c: /* Goldmont */
291 case 0x5f: /* Goldmont */
293 CPUVEC_SETUP_coreinhm
;
297 case 0x2d: /* SBC-EP */
299 case 0x3e: /* IBR Ivytown */
301 CPUVEC_SETUP_coreinhm
;
302 CPUVEC_SETUP_coreisbr
;
304 case 0x3c: /* Haswell client */
305 case 0x3f: /* Haswell server */
306 case 0x45: /* Haswell ULT */
307 case 0x46: /* Crystal Well */
309 CPUVEC_SETUP_coreinhm
;
310 CPUVEC_SETUP_coreisbr
;
311 /* Some Haswells lack BMI2. Let them appear as Sandybridges for
313 __gmpn_cpuid (dummy_string
, 7);
314 if ((dummy_string
[0 + 8 / 8] & (1 << (8 % 8))) != 0)
315 CPUVEC_SETUP_coreihwl
;
317 case 0x3d: /* Broadwell */
318 case 0x47: /* Broadwell */
319 case 0x4f: /* Broadwell server */
320 case 0x56: /* Broadwell microserver */
322 CPUVEC_SETUP_coreinhm
;
323 CPUVEC_SETUP_coreisbr
;
324 CPUVEC_SETUP_coreihwl
;
325 CPUVEC_SETUP_coreibwl
;
327 case 0x4e: /* Skylake client */
328 case 0x55: /* Skylake server */
329 case 0x5e: /* Skylake */
330 case 0x8e: /* Cabylake */
331 case 0x9e: /* Cabylake */
333 CPUVEC_SETUP_coreinhm
;
334 CPUVEC_SETUP_coreisbr
;
335 CPUVEC_SETUP_coreihwl
;
336 CPUVEC_SETUP_coreibwl
;
337 CPUVEC_SETUP_skylake
;
343 CPUVEC_SETUP_pentium4
;
347 else if (strcmp (vendor_string
, "AuthenticAMD") == 0)
352 case 0x11: /* "fam 11h", mix of k8 and k10 */
359 case 0x12: /* k10 (llano) */
364 case 0x14: /* bobcat */
365 case 0x16: /* jaguar */
371 case 0x15: /* bulldozer, piledriver, steamroller, excavator */
377 else if (strcmp (vendor_string
, "CentaurHauls") == 0)
388 /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1.
389 Instead default to the plain versions from whichever CPU we detected.
390 The function arguments are compatible, no need for any glue code. */
391 if (decided_cpuvec
.preinv_divrem_1
== NULL
)
392 decided_cpuvec
.preinv_divrem_1
=(preinv_divrem_1_t
)decided_cpuvec
.divrem_1
;
393 if (decided_cpuvec
.preinv_mod_1
== NULL
)
394 decided_cpuvec
.preinv_mod_1
=(preinv_mod_1_t
) decided_cpuvec
.mod_1
;
396 ASSERT_CPUVEC (decided_cpuvec
);
397 CPUVEC_INSTALL (decided_cpuvec
);
399 /* Set this once the threshold fields are ready.
400 Use volatile to prevent it getting moved. */
401 *((volatile int *) &__gmpn_cpuvec_initialized
) = 1;