beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / fat / fat.c
blob30cd2104ff50c9bc3d16c11e81fe52b91a4e5de0
1 /* x86_64 fat binary initializers.
3 Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
4 Torbjorn Granlund (port to x86_64)
6 THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.
7 THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
8 COMPLETELY IN FUTURE GNU MP RELEASES.
10 Copyright 2003, 2004, 2009, 2011-2015 Free Software Foundation, Inc.
12 This file is part of the GNU MP Library.
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
17 * the GNU Lesser General Public License as published by the Free
18 Software Foundation; either version 3 of the License, or (at your
19 option) any later version.
23 * the GNU General Public License as published by the Free Software
24 Foundation; either version 2 of the License, or (at your option) any
25 later version.
27 or both in parallel, as here.
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
32 for more details.
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library. If not,
36 see https://www.gnu.org/licenses/. */
38 #include <stdio.h> /* for printf */
39 #include <stdlib.h> /* for getenv */
40 #include <string.h>
42 #include "gmp.h"
43 #include "gmp-impl.h"
45 /* Change this to "#define TRACE(x) x" for some traces. */
46 #define TRACE(x)
49 /* fat_entry.asm */
50 long __gmpn_cpuid (char [12], int);
53 #if WANT_FAKE_CPUID
54 /* The "name"s in the table are values for the GMP_CPU_TYPE environment
55 variable. Anything can be used, but for now it's the canonical cpu types
56 as per config.guess/config.sub. */
58 #define __gmpn_cpuid fake_cpuid
60 #define MAKE_FMS(family, model) \
61 ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20) \
62 + (((model) & 0xf) << 4) + (((model) & 0xf0) << 12))
64 static struct {
65 const char *name;
66 const char *vendor;
67 unsigned fms;
68 } fake_cpuid_table[] = {
69 { "core2", "GenuineIntel", MAKE_FMS (6, 0xf) },
70 { "nehalem", "GenuineIntel", MAKE_FMS (6, 0x1a) },
71 { "nhm", "GenuineIntel", MAKE_FMS (6, 0x1a) },
72 { "atom", "GenuineIntel", MAKE_FMS (6, 0x1c) },
73 { "westmere", "GenuineIntel", MAKE_FMS (6, 0x25) },
74 { "wsm", "GenuineIntel", MAKE_FMS (6, 0x25) },
75 { "sandybridge","GenuineIntel", MAKE_FMS (6, 0x2a) },
76 { "sbr", "GenuineIntel", MAKE_FMS (6, 0x2a) },
77 { "silvermont", "GenuineIntel", MAKE_FMS (6, 0x37) },
78 { "slm", "GenuineIntel", MAKE_FMS (6, 0x37) },
79 { "haswell", "GenuineIntel", MAKE_FMS (6, 0x3c) },
80 { "hwl", "GenuineIntel", MAKE_FMS (6, 0x3c) },
81 { "broadwell", "GenuineIntel", MAKE_FMS (6, 0x3d) },
82 { "bwl", "GenuineIntel", MAKE_FMS (6, 0x3d) },
83 { "skylake", "GenuineIntel", MAKE_FMS (6, 0x5e) },
84 { "sky", "GenuineIntel", MAKE_FMS (6, 0x5e) },
85 { "pentium4", "GenuineIntel", MAKE_FMS (15, 3) },
87 { "k8", "AuthenticAMD", MAKE_FMS (15, 0) },
88 { "k10", "AuthenticAMD", MAKE_FMS (16, 0) },
89 { "bobcat", "AuthenticAMD", MAKE_FMS (20, 1) },
90 { "bulldozer", "AuthenticAMD", MAKE_FMS (21, 1) },
91 { "piledriver", "AuthenticAMD", MAKE_FMS (21, 2) },
92 { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) },
93 { "excavator", "AuthenticAMD", MAKE_FMS (21, 0x60) },
94 { "jaguar", "AuthenticAMD", MAKE_FMS (22, 1) },
96 { "nano", "CentaurHauls", MAKE_FMS (6, 15) },
99 static int
100 fake_cpuid_lookup (void)
102 char *s;
103 int i;
105 s = getenv ("GMP_CPU_TYPE");
106 if (s == NULL)
108 printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n");
109 abort ();
112 for (i = 0; i < numberof (fake_cpuid_table); i++)
113 if (strcmp (s, fake_cpuid_table[i].name) == 0)
114 return i;
116 printf ("GMP_CPU_TYPE=%s unknown\n", s);
117 abort ();
120 static long
121 fake_cpuid (char dst[12], unsigned int id)
123 int i = fake_cpuid_lookup();
125 switch (id) {
126 case 0:
127 memcpy (dst, fake_cpuid_table[i].vendor, 12);
128 return 0;
129 case 1:
130 return fake_cpuid_table[i].fms;
131 case 7:
132 dst[0] = 0xff; /* BMI1, AVX2, etc */
133 dst[1] = 0xff; /* BMI2, etc */
134 return 0;
135 case 0x80000001:
136 dst[4 + 29 / 8] = (1 << (29 % 8)); /* "long" mode */
137 return 0;
138 default:
139 printf ("fake_cpuid(): oops, unknown id %d\n", id);
140 abort ();
143 #endif
146 typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t));
147 typedef DECL_preinv_mod_1 ((*preinv_mod_1_t));
149 struct cpuvec_t __gmpn_cpuvec = {
150 __MPN(add_n_init),
151 __MPN(addlsh1_n_init),
152 __MPN(addlsh2_n_init),
153 __MPN(addmul_1_init),
154 __MPN(addmul_2_init),
155 __MPN(bdiv_dbm1c_init),
156 __MPN(cnd_add_n_init),
157 __MPN(cnd_sub_n_init),
158 __MPN(com_init),
159 __MPN(copyd_init),
160 __MPN(copyi_init),
161 __MPN(divexact_1_init),
162 __MPN(divrem_1_init),
163 __MPN(gcd_1_init),
164 __MPN(lshift_init),
165 __MPN(lshiftc_init),
166 __MPN(mod_1_init),
167 __MPN(mod_1_1p_init),
168 __MPN(mod_1_1p_cps_init),
169 __MPN(mod_1s_2p_init),
170 __MPN(mod_1s_2p_cps_init),
171 __MPN(mod_1s_4p_init),
172 __MPN(mod_1s_4p_cps_init),
173 __MPN(mod_34lsub1_init),
174 __MPN(modexact_1c_odd_init),
175 __MPN(mul_1_init),
176 __MPN(mul_basecase_init),
177 __MPN(mullo_basecase_init),
178 __MPN(preinv_divrem_1_init),
179 __MPN(preinv_mod_1_init),
180 __MPN(redc_1_init),
181 __MPN(redc_2_init),
182 __MPN(rshift_init),
183 __MPN(sqr_basecase_init),
184 __MPN(sub_n_init),
185 __MPN(sublsh1_n_init),
186 __MPN(submul_1_init),
190 int __gmpn_cpuvec_initialized = 0;
192 /* The following setups start with generic x86, then overwrite with
193 specifics for a chip, and higher versions of that chip.
195 The arrangement of the setups here will normally be the same as the $path
196 selections in configure.in for the respective chips.
198 This code is reentrant and thread safe. We always calculate the same
199 decided_cpuvec, so if two copies of the code are running it doesn't
200 matter which completes first, both write the same to __gmpn_cpuvec.
202 We need to go via decided_cpuvec because if one thread has completed
203 __gmpn_cpuvec then it may be making use of the threshold values in that
204 vector. If another thread is still running __gmpn_cpuvec_init then we
205 don't want it to write different values to those fields since some of the
206 asm routines only operate correctly up to their own defined threshold,
207 not an arbitrary value. */
209 void
210 __gmpn_cpuvec_init (void)
212 struct cpuvec_t decided_cpuvec;
213 char vendor_string[13];
214 char dummy_string[12];
215 long fms;
216 int family, model;
218 TRACE (printf ("__gmpn_cpuvec_init:\n"));
220 memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec));
222 CPUVEC_SETUP_x86_64;
223 CPUVEC_SETUP_fat;
225 __gmpn_cpuid (vendor_string, 0);
226 vendor_string[12] = 0;
228 fms = __gmpn_cpuid (dummy_string, 1);
229 family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
230 model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
232 /* Check extended feature flags */
233 __gmpn_cpuid (dummy_string, 0x80000001);
234 if ((dummy_string[4 + 29 / 8] & (1 << (29 % 8))) == 0)
235 abort (); /* longmode-capable-bit turned off! */
237 /*********************************************************/
238 /*** WARNING: keep this list in sync with config.guess ***/
239 /*********************************************************/
240 if (strcmp (vendor_string, "GenuineIntel") == 0)
242 switch (family)
244 case 6:
245 switch (model)
247 case 0x0f: /* Conroe Merom Kentsfield Allendale */
248 case 0x10:
249 case 0x11:
250 case 0x12:
251 case 0x13:
252 case 0x14:
253 case 0x15:
254 case 0x16:
255 case 0x17: /* PNR Wolfdale Yorkfield */
256 case 0x18:
257 case 0x19:
258 case 0x1d: /* PNR Dunnington */
259 CPUVEC_SETUP_core2;
260 break;
262 case 0x1c: /* Atom Silverthorne */
263 case 0x26: /* Atom Lincroft */
264 case 0x27: /* Atom Saltwell? */
265 case 0x36: /* Atom Cedarview/Saltwell */
266 CPUVEC_SETUP_atom;
267 break;
269 case 0x1a: /* NHM Gainestown */
270 case 0x1b:
271 case 0x1e: /* NHM Lynnfield/Jasper */
272 case 0x1f:
273 case 0x20:
274 case 0x21:
275 case 0x22:
276 case 0x23:
277 case 0x24:
278 case 0x25: /* WSM Clarkdale/Arrandale */
279 case 0x28:
280 case 0x29:
281 case 0x2b:
282 case 0x2c: /* WSM Gulftown */
283 case 0x2e: /* NHM Beckton */
284 case 0x2f: /* WSM Eagleton */
285 case 0x37: /* Silvermont */
286 case 0x4a: /* Silvermont */
287 case 0x4c: /* Airmont */
288 case 0x4d: /* Silvermont/Avoton */
289 case 0x5a: /* Silvermont */
290 case 0x5c: /* Goldmont */
291 case 0x5f: /* Goldmont */
292 CPUVEC_SETUP_core2;
293 CPUVEC_SETUP_coreinhm;
294 break;
296 case 0x2a: /* SB */
297 case 0x2d: /* SBC-EP */
298 case 0x3a: /* IBR */
299 case 0x3e: /* IBR Ivytown */
300 CPUVEC_SETUP_core2;
301 CPUVEC_SETUP_coreinhm;
302 CPUVEC_SETUP_coreisbr;
303 break;
304 case 0x3c: /* Haswell client */
305 case 0x3f: /* Haswell server */
306 case 0x45: /* Haswell ULT */
307 case 0x46: /* Crystal Well */
308 CPUVEC_SETUP_core2;
309 CPUVEC_SETUP_coreinhm;
310 CPUVEC_SETUP_coreisbr;
311 /* Some Haswells lack BMI2. Let them appear as Sandybridges for
312 now. */
313 __gmpn_cpuid (dummy_string, 7);
314 if ((dummy_string[0 + 8 / 8] & (1 << (8 % 8))) != 0)
315 CPUVEC_SETUP_coreihwl;
316 break;
317 case 0x3d: /* Broadwell */
318 case 0x47: /* Broadwell */
319 case 0x4f: /* Broadwell server */
320 case 0x56: /* Broadwell microserver */
321 CPUVEC_SETUP_core2;
322 CPUVEC_SETUP_coreinhm;
323 CPUVEC_SETUP_coreisbr;
324 CPUVEC_SETUP_coreihwl;
325 CPUVEC_SETUP_coreibwl;
326 break;
327 case 0x4e: /* Skylake client */
328 case 0x55: /* Skylake server */
329 case 0x5e: /* Skylake */
330 case 0x8e: /* Cabylake */
331 case 0x9e: /* Cabylake */
332 CPUVEC_SETUP_core2;
333 CPUVEC_SETUP_coreinhm;
334 CPUVEC_SETUP_coreisbr;
335 CPUVEC_SETUP_coreihwl;
336 CPUVEC_SETUP_coreibwl;
337 CPUVEC_SETUP_skylake;
338 break;
340 break;
342 case 15:
343 CPUVEC_SETUP_pentium4;
344 break;
347 else if (strcmp (vendor_string, "AuthenticAMD") == 0)
349 switch (family)
351 case 0x0f: /* k8 */
352 case 0x11: /* "fam 11h", mix of k8 and k10 */
353 case 0x13:
354 case 0x17:
355 CPUVEC_SETUP_k8;
356 break;
358 case 0x10: /* k10 */
359 case 0x12: /* k10 (llano) */
360 CPUVEC_SETUP_k8;
361 CPUVEC_SETUP_k10;
362 break;
364 case 0x14: /* bobcat */
365 case 0x16: /* jaguar */
366 CPUVEC_SETUP_k8;
367 CPUVEC_SETUP_k10;
368 CPUVEC_SETUP_bobcat;
369 break;
371 case 0x15: /* bulldozer, piledriver, steamroller, excavator */
372 CPUVEC_SETUP_k8;
373 CPUVEC_SETUP_k10;
374 CPUVEC_SETUP_bd1;
377 else if (strcmp (vendor_string, "CentaurHauls") == 0)
379 switch (family)
381 case 6:
382 if (model >= 15)
383 CPUVEC_SETUP_nano;
384 break;
388 /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1.
389 Instead default to the plain versions from whichever CPU we detected.
390 The function arguments are compatible, no need for any glue code. */
391 if (decided_cpuvec.preinv_divrem_1 == NULL)
392 decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1;
393 if (decided_cpuvec.preinv_mod_1 == NULL)
394 decided_cpuvec.preinv_mod_1 =(preinv_mod_1_t) decided_cpuvec.mod_1;
396 ASSERT_CPUVEC (decided_cpuvec);
397 CPUVEC_INSTALL (decided_cpuvec);
399 /* Set this once the threshold fields are ready.
400 Use volatile to prevent it getting moved. */
401 *((volatile int *) &__gmpn_cpuvec_initialized) = 1;