Merge commit '74ecdb5171c9f3673b9393b1a3dc6f3a65e93895'
[unleashed.git] / arch / x86 / kernel / platform / i86pc / os / cpuid.c
blob54fc0dba12d0a2c245257f97e52b147a9325a3af
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
28 * Copyright (c) 2010, Intel Corporation.
29 * All rights reserved.
32 * Portions Copyright 2009 Advanced Micro Devices, Inc.
35 * Copyright 2018 Joyent, Inc.
38 * Various routines to handle identification
39 * and classification of x86 processors.
42 #include <sys/types.h>
43 #include <sys/archsystm.h>
44 #include <sys/x86_archext.h>
45 #include <sys/kmem.h>
46 #include <sys/systm.h>
47 #include <sys/cmn_err.h>
48 #include <sys/sunddi.h>
49 #include <sys/sunndi.h>
50 #include <sys/cpuvar.h>
51 #include <sys/processor.h>
52 #include <sys/sysmacros.h>
53 #include <sys/pg.h>
54 #include <sys/fp.h>
55 #include <sys/controlregs.h>
56 #include <sys/bitmap.h>
57 #include <sys/auxv_386.h>
58 #include <sys/memnode.h>
59 #include <sys/pci_cfgspace.h>
60 #include <sys/comm_page.h>
61 #include <sys/mach_mmu.h>
62 #include <sys/tsc.h>
64 #include <sys/ontrap.h>
67 * Pass 0 of cpuid feature analysis happens in locore. It contains special code
68 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with
69 * them accordingly. For most modern processors, feature detection occurs here
70 * in pass 1.
72 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup()
73 * for the boot CPU and does the basic analysis that the early kernel needs.
74 * x86_featureset is set based on the return value of cpuid_pass1() of the boot
75 * CPU.
77 * Pass 1 includes:
79 * o Determining vendor/model/family/stepping and setting x86_type and
80 * x86_vendor accordingly.
81 * o Processing the feature flags returned by the cpuid instruction while
82 * applying any workarounds or tricks for the specific processor.
83 * o Mapping the feature flags into illumos feature bits (X86_*).
84 * o Processing extended feature flags if supported by the processor,
85 * again while applying specific processor knowledge.
86 * o Determining the CMT characteristics of the system.
88 * Pass 1 is done on non-boot CPUs during their initialization and the results
89 * are used only as a meager attempt at ensuring that all processors within the
90 * system support the same features.
92 * Pass 2 of cpuid feature analysis happens just at the beginning
93 * of startup(). It just copies in and corrects the remainder
94 * of the cpuid data we depend on: standard cpuid functions that we didn't
95 * need for pass1 feature analysis, and extended cpuid functions beyond the
96 * simple feature processing done in pass1.
98 * Pass 3 of cpuid analysis is invoked after basic kernel services; in
99 * particular kernel memory allocation has been made available. It creates a
100 * readable brand string based on the data collected in the first two passes.
102 * Pass 4 of cpuid analysis is invoked after post_startup() when all
103 * the support infrastructure for various hardware features has been
104 * initialized. It determines which processor features will be reported
105 * to userland via the aux vector.
107 * All passes are executed on all CPUs, but only the boot CPU determines what
108 * features the kernel will use.
110 * Much of the worst junk in this file is for the support of processors
111 * that didn't really implement the cpuid instruction properly.
113 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon,
114 * the pass numbers. Accordingly, changes to the pass code may require changes
115 * to the accessor code.
118 uint_t x86_vendor = X86_VENDOR_IntelClone;
119 uint_t x86_type = X86_TYPE_OTHER;
120 uint_t x86_clflush_size = 0;
122 #if defined(__xpv)
123 int x86_use_pcid = 0;
124 int x86_use_invpcid = 0;
125 #else
126 int x86_use_pcid = -1;
127 int x86_use_invpcid = -1;
128 #endif
130 uint_t pentiumpro_bug4046376;
132 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
134 static char *x86_feature_names[NUM_X86_FEATURES] = {
135 "lgpg",
136 "tsc",
137 "msr",
138 "mtrr",
139 "pge",
140 "de",
141 "cmov",
142 "mmx",
143 "mca",
144 "pae",
145 "cv8",
146 "pat",
147 "sep",
148 "sse",
149 "sse2",
150 "htt",
151 "asysc",
152 "nx",
153 "sse3",
154 "cx16",
155 "cmp",
156 "tscp",
157 "mwait",
158 "sse4a",
159 "cpuid",
160 "ssse3",
161 "sse4_1",
162 "sse4_2",
163 "1gpg",
164 "clfsh",
165 "64",
166 "aes",
167 "pclmulqdq",
168 "xsave",
169 "avx",
170 "vmx",
171 "svm",
172 "topoext",
173 "f16c",
174 "rdrand",
175 "x2apic",
176 "avx2",
177 "bmi1",
178 "bmi2",
179 "fma",
180 "smep",
181 "smap",
182 "adx",
183 "rdseed",
184 "mpx",
185 "avx512f",
186 "avx512dq",
187 "avx512pf",
188 "avx512er",
189 "avx512cd",
190 "avx512bw",
191 "avx512vl",
192 "avx512fma",
193 "avx512vbmi",
194 "avx512_vpopcntdq",
195 "avx512_4vnniw",
196 "avx512_4fmaps",
197 "xsaveopt",
198 "xsavec",
199 "xsaves",
200 "sha",
201 "umip",
202 "pku",
203 "ospke",
204 "pcid",
205 "invpcid",
208 boolean_t
209 is_x86_feature(void *featureset, uint_t feature)
211 ASSERT(feature < NUM_X86_FEATURES);
212 return (BT_TEST((ulong_t *)featureset, feature));
215 void
216 add_x86_feature(void *featureset, uint_t feature)
218 ASSERT(feature < NUM_X86_FEATURES);
219 BT_SET((ulong_t *)featureset, feature);
222 void
223 remove_x86_feature(void *featureset, uint_t feature)
225 ASSERT(feature < NUM_X86_FEATURES);
226 BT_CLEAR((ulong_t *)featureset, feature);
229 boolean_t
230 compare_x86_featureset(void *setA, void *setB)
233 * We assume that the unused bits of the bitmap are always zero.
235 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
236 return (B_TRUE);
237 } else {
238 return (B_FALSE);
242 void
243 print_x86_featureset(void *featureset)
245 uint_t i;
247 for (i = 0; i < NUM_X86_FEATURES; i++) {
248 if (is_x86_feature(featureset, i)) {
249 cmn_err(CE_CONT, "?x86_feature: %s\n",
250 x86_feature_names[i]);
255 /* Note: This is the maximum size for the CPU, not the size of the structure. */
256 static size_t xsave_state_size = 0;
257 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
258 boolean_t xsave_force_disable = B_FALSE;
259 extern int disable_smap;
262 * This is set to platform type we are running on.
264 static int platform_type = -1;
267 * Variable to patch if hypervisor platform detection needs to be
268 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
270 int enable_platform_detection = 1;
273 * monitor/mwait info.
275 * size_actual and buf_actual are the real address and size allocated to get
276 * proper mwait_buf alignement. buf_actual and size_actual should be passed
277 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use
278 * processor cache-line alignment, but this is not guarantied in the furture.
280 struct mwait_info {
281 size_t mon_min; /* min size to avoid missed wakeups */
282 size_t mon_max; /* size to avoid false wakeups */
283 size_t size_actual; /* size actually allocated */
284 void *buf_actual; /* memory actually allocated */
285 uint32_t support; /* processor support of monitor/mwait */
289 * xsave/xrestor info.
291 * This structure contains HW feature bits and the size of the xsave save area.
292 * Note: the kernel declares a fixed size (AVX_XSAVE_SIZE) structure
293 * (xsave_state) to describe the xsave layout. However, at runtime the
294 * per-lwp xsave area is dynamically allocated based on xsav_max_size. The
295 * xsave_state structure simply represents the legacy layout of the beginning
296 * of the xsave area.
298 struct xsave_info {
299 uint32_t xsav_hw_features_low; /* Supported HW features */
300 uint32_t xsav_hw_features_high; /* Supported HW features */
301 size_t xsav_max_size; /* max size save area for HW features */
302 size_t ymm_size; /* AVX: size of ymm save area */
303 size_t ymm_offset; /* AVX: offset for ymm save area */
304 size_t bndregs_size; /* MPX: size of bndregs save area */
305 size_t bndregs_offset; /* MPX: offset for bndregs save area */
306 size_t bndcsr_size; /* MPX: size of bndcsr save area */
307 size_t bndcsr_offset; /* MPX: offset for bndcsr save area */
308 size_t opmask_size; /* AVX512: size of opmask save */
309 size_t opmask_offset; /* AVX512: offset for opmask save */
310 size_t zmmlo_size; /* AVX512: size of zmm 256 save */
311 size_t zmmlo_offset; /* AVX512: offset for zmm 256 save */
312 size_t zmmhi_size; /* AVX512: size of zmm hi reg save */
313 size_t zmmhi_offset; /* AVX512: offset for zmm hi reg save */
318 * These constants determine how many of the elements of the
319 * cpuid we cache in the cpuid_info data structure; the
320 * remaining elements are accessible via the cpuid instruction.
323 #define NMAX_CPI_STD 8 /* eax = 0 .. 7 */
324 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */
327 * Some terminology needs to be explained:
328 * - Socket: Something that can be plugged into a motherboard.
329 * - Package: Same as socket
330 * - Chip: Same as socket. Note that AMD's documentation uses term "chip"
331 * differently: there, chip is the same as processor node (below)
332 * - Processor node: Some AMD processors have more than one
333 * "subprocessor" embedded in a package. These subprocessors (nodes)
334 * are fully-functional processors themselves with cores, caches,
335 * memory controllers, PCI configuration spaces. They are connected
336 * inside the package with Hypertransport links. On single-node
337 * processors, processor node is equivalent to chip/socket/package.
338 * - Compute Unit: Some AMD processors pair cores in "compute units" that
339 * share the FPU and the I$ and L2 caches.
342 struct cpuid_info {
343 uint_t cpi_pass; /* last pass completed */
345 * standard function information
347 uint_t cpi_maxeax; /* fn 0: %eax */
348 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */
349 uint_t cpi_vendor; /* enum of cpi_vendorstr */
351 uint_t cpi_family; /* fn 1: extended family */
352 uint_t cpi_model; /* fn 1: extended model */
353 uint_t cpi_step; /* fn 1: stepping */
354 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */
355 /* AMD: package/socket # */
356 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */
357 int cpi_clogid; /* fn 1: %ebx: thread # */
358 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */
359 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */
360 uint_t cpi_ncache; /* fn 2: number of elements */
361 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
362 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */
363 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */
364 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */
365 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 7 */
367 * extended function information
369 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */
370 char cpi_brandstr[49]; /* fn 0x8000000[234] */
371 uint8_t cpi_pabits; /* fn 0x80000006: %eax */
372 uint8_t cpi_vabits; /* fn 0x80000006: %eax */
373 uint8_t cpi_fp_amd_save; /* AMD: FP error pointer save rqd. */
374 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */
376 id_t cpi_coreid; /* same coreid => strands share core */
377 int cpi_pkgcoreid; /* core number within single package */
378 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */
379 /* Intel: fn 4: %eax[31-26] */
381 * supported feature information
383 uint32_t cpi_support[6];
384 #define STD_EDX_FEATURES 0
385 #define AMD_EDX_FEATURES 1
386 #define TM_EDX_FEATURES 2
387 #define STD_ECX_FEATURES 3
388 #define AMD_ECX_FEATURES 4
389 #define STD_EBX_FEATURES 5
391 * Synthesized information, where known.
393 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */
394 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */
395 uint32_t cpi_socket; /* Chip package/socket type */
397 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */
398 uint32_t cpi_apicid;
399 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */
400 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */
401 /* Intel: 1 */
402 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */
403 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */
405 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */
409 static struct cpuid_info cpuid_info0;
412 * These bit fields are defined by the Intel Application Note AP-485
413 * "Intel Processor Identification and the CPUID Instruction"
415 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
416 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
417 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
418 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
419 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
420 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
422 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx)
423 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx)
424 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx)
425 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx)
426 #define CPI_FEATURES_7_0_EBX(cpi) ((cpi)->cpi_std[7].cp_ebx)
427 #define CPI_FEATURES_7_0_ECX(cpi) ((cpi)->cpi_std[7].cp_ecx)
428 #define CPI_FEATURES_7_0_EDX(cpi) ((cpi)->cpi_std[7].cp_edx)
430 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
431 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
432 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
433 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
435 #define CPI_MAXEAX_MAX 0x100 /* sanity control */
436 #define CPI_XMAXEAX_MAX 0x80000100
437 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */
438 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */
441 * Function 4 (Deterministic Cache Parameters) macros
442 * Defined by Intel Application Note AP-485
444 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26)
445 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14)
446 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9)
447 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8)
448 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5)
449 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0)
450 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8)
452 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22)
453 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12)
454 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0)
456 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0)
458 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0)
462 * A couple of shorthand macros to identify "later" P6-family chips
463 * like the Pentium M and Core. First, the "older" P6-based stuff
464 * (loosely defined as "pre-Pentium-4"):
465 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
467 #define IS_LEGACY_P6(cpi) ( \
468 cpi->cpi_family == 6 && \
469 (cpi->cpi_model == 1 || \
470 cpi->cpi_model == 3 || \
471 cpi->cpi_model == 5 || \
472 cpi->cpi_model == 6 || \
473 cpi->cpi_model == 7 || \
474 cpi->cpi_model == 8 || \
475 cpi->cpi_model == 0xA || \
476 cpi->cpi_model == 0xB) \
479 /* A "new F6" is everything with family 6 that's not the above */
480 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
482 /* Extended family/model support */
483 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
484 cpi->cpi_family >= 0xf)
487 * Info for monitor/mwait idle loop.
489 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
490 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
491 * 2006.
492 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
493 * Documentation Updates" #33633, Rev 2.05, December 2006.
495 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */
496 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */
497 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */
498 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
499 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2)
500 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1)
501 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
502 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
504 * Number of sub-cstates for a given c-state.
506 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \
507 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
510 * XSAVE leaf 0xD enumeration
512 #define CPUID_LEAFD_2_YMM_OFFSET 576
513 #define CPUID_LEAFD_2_YMM_SIZE 256
516 * Functions we consune from cpuid_subr.c; don't publish these in a header
517 * file to try and keep people using the expected cpuid_* interfaces.
519 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
520 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
521 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
522 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
523 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
526 * Apply up various platform-dependent restrictions where the
527 * underlying platform restrictions mean the CPU can be marked
528 * as less capable than its cpuid instruction would imply.
530 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */
533 * Some undocumented ways of patching the results of the cpuid
534 * instruction to permit running Solaris 10 on future cpus that
535 * we don't currently support. Could be set to non-zero values
536 * via settings in eeprom.
539 uint32_t cpuid_feature_ecx_include;
540 uint32_t cpuid_feature_ecx_exclude;
541 uint32_t cpuid_feature_edx_include;
542 uint32_t cpuid_feature_edx_exclude;
545 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
547 void
548 cpuid_alloc_space(cpu_t *cpu)
551 * By convention, cpu0 is the boot cpu, which is set up
552 * before memory allocation is available. All other cpus get
553 * their cpuid_info struct allocated here.
555 ASSERT(cpu->cpu_id != 0);
556 ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
557 cpu->cpu_m.mcpu_cpi =
558 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
561 void
562 cpuid_free_space(cpu_t *cpu)
564 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
565 int i;
567 ASSERT(cpi != NULL);
568 ASSERT(cpi != &cpuid_info0);
571 * Free up any function 4 related dynamic storage
573 for (i = 1; i < cpi->cpi_std_4_size; i++)
574 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs));
575 if (cpi->cpi_std_4_size > 0)
576 kmem_free(cpi->cpi_std_4,
577 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
579 kmem_free(cpi, sizeof (*cpi));
580 cpu->cpu_m.mcpu_cpi = NULL;
584 * Determine the type of the underlying platform. This is used to customize
585 * initialization of various subsystems (e.g. TSC). determine_platform() must
586 * only ever be called once to prevent two processors from seeing different
587 * values of platform_type. Must be called before cpuid_pass1(), the earliest
588 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
590 void
591 determine_platform(void)
593 struct cpuid_regs cp;
594 uint32_t base;
595 uint32_t regs[4];
596 char *hvstr = (char *)regs;
598 ASSERT(platform_type == -1);
600 platform_type = HW_NATIVE;
602 if (!enable_platform_detection)
603 return;
606 * If Hypervisor CPUID bit is set, try to determine hypervisor
607 * vendor signature, and set platform type accordingly.
609 * References:
610 * http://lkml.org/lkml/2008/10/1/246
611 * http://kb.vmware.com/kb/1009458
613 cp.cp_eax = 0x1;
614 (void) __cpuid_insn(&cp);
615 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
616 cp.cp_eax = 0x40000000;
617 (void) __cpuid_insn(&cp);
618 regs[0] = cp.cp_ebx;
619 regs[1] = cp.cp_ecx;
620 regs[2] = cp.cp_edx;
621 regs[3] = 0;
622 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
623 platform_type = HW_XEN_HVM;
624 return;
626 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
627 platform_type = HW_VMWARE;
628 return;
630 if (strcmp(hvstr, HVSIG_KVM) == 0) {
631 platform_type = HW_KVM;
632 return;
634 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
635 platform_type = HW_MICROSOFT;
636 } else {
638 * Check older VMware hardware versions. VMware hypervisor is
639 * detected by performing an IN operation to VMware hypervisor
640 * port and checking that value returned in %ebx is VMware
641 * hypervisor magic value.
643 * References: http://kb.vmware.com/kb/1009458
645 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
646 if (regs[1] == VMWARE_HVMAGIC) {
647 platform_type = HW_VMWARE;
648 return;
653 * Check Xen hypervisor. In a fully virtualized domain,
654 * Xen's pseudo-cpuid function returns a string representing the
655 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
656 * supported cpuid function. We need at least a (base + 2) leaf value
657 * to do what we want to do. Try different base values, since the
658 * hypervisor might use a different one depending on whether Hyper-V
659 * emulation is switched on by default or not.
661 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
662 cp.cp_eax = base;
663 (void) __cpuid_insn(&cp);
664 regs[0] = cp.cp_ebx;
665 regs[1] = cp.cp_ecx;
666 regs[2] = cp.cp_edx;
667 regs[3] = 0;
668 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
669 cp.cp_eax >= (base + 2)) {
670 platform_type &= ~HW_NATIVE;
671 platform_type |= HW_XEN_HVM;
672 return;
678 get_hwenv(void)
680 ASSERT(platform_type != -1);
681 return (platform_type);
685 is_controldom(void)
687 return (0);
691 static void
692 cpuid_intel_getids(cpu_t *cpu, void *feature)
694 uint_t i;
695 uint_t chipid_shift = 0;
696 uint_t coreid_shift = 0;
697 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
699 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
700 chipid_shift++;
702 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
703 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
705 if (is_x86_feature(feature, X86FSET_CMP)) {
707 * Multi-core (and possibly multi-threaded)
708 * processors.
710 uint_t ncpu_per_core;
711 if (cpi->cpi_ncore_per_chip == 1)
712 ncpu_per_core = cpi->cpi_ncpu_per_chip;
713 else if (cpi->cpi_ncore_per_chip > 1)
714 ncpu_per_core = cpi->cpi_ncpu_per_chip /
715 cpi->cpi_ncore_per_chip;
717 * 8bit APIC IDs on dual core Pentiums
718 * look like this:
720 * +-----------------------+------+------+
721 * | Physical Package ID | MC | HT |
722 * +-----------------------+------+------+
723 * <------- chipid -------->
724 * <------- coreid --------------->
725 * <--- clogid -->
726 * <------>
727 * pkgcoreid
729 * Where the number of bits necessary to
730 * represent MC and HT fields together equals
731 * to the minimum number of bits necessary to
732 * store the value of cpi->cpi_ncpu_per_chip.
733 * Of those bits, the MC part uses the number
734 * of bits necessary to store the value of
735 * cpi->cpi_ncore_per_chip.
737 for (i = 1; i < ncpu_per_core; i <<= 1)
738 coreid_shift++;
739 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
740 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
741 } else if (is_x86_feature(feature, X86FSET_HTT)) {
743 * Single-core multi-threaded processors.
745 cpi->cpi_coreid = cpi->cpi_chipid;
746 cpi->cpi_pkgcoreid = 0;
748 cpi->cpi_procnodeid = cpi->cpi_chipid;
749 cpi->cpi_compunitid = cpi->cpi_coreid;
752 static void
753 cpuid_amd_getids(cpu_t *cpu)
755 int i, first_half, coreidsz;
756 uint32_t nb_caps_reg;
757 uint_t node2_1;
758 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
759 struct cpuid_regs *cp;
762 * AMD CMP chips currently have a single thread per core.
764 * Since no two cpus share a core we must assign a distinct coreid
765 * per cpu, and we do this by using the cpu_id. This scheme does not,
766 * however, guarantee that sibling cores of a chip will have sequential
767 * coreids starting at a multiple of the number of cores per chip -
768 * that is usually the case, but if the ACPI MADT table is presented
769 * in a different order then we need to perform a few more gymnastics
770 * for the pkgcoreid.
772 * All processors in the system have the same number of enabled
773 * cores. Cores within a processor are always numbered sequentially
774 * from 0 regardless of how many or which are disabled, and there
775 * is no way for operating system to discover the real core id when some
776 * are disabled.
778 * In family 0x15, the cores come in pairs called compute units. They
779 * share I$ and L2 caches and the FPU. Enumeration of this feature is
780 * simplified by the new topology extensions CPUID leaf, indicated by
781 * the X86 feature X86FSET_TOPOEXT.
784 cpi->cpi_coreid = cpu->cpu_id;
785 cpi->cpi_compunitid = cpu->cpu_id;
787 if (cpi->cpi_xmaxeax >= 0x80000008) {
789 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
792 * In AMD parlance chip is really a node while Solaris
793 * sees chip as equivalent to socket/package.
795 cpi->cpi_ncore_per_chip =
796 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
797 if (coreidsz == 0) {
798 /* Use legacy method */
799 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
800 coreidsz++;
801 if (coreidsz == 0)
802 coreidsz = 1;
804 } else {
805 /* Assume single-core part */
806 cpi->cpi_ncore_per_chip = 1;
807 coreidsz = 1;
810 cpi->cpi_clogid = cpi->cpi_pkgcoreid =
811 cpi->cpi_apicid & ((1<<coreidsz) - 1);
812 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip;
814 /* Get node ID, compute unit ID */
815 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
816 cpi->cpi_xmaxeax >= 0x8000001e) {
817 cp = &cpi->cpi_extd[0x1e];
818 cp->cp_eax = 0x8000001e;
819 (void) __cpuid_insn(cp);
821 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
822 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
823 cpi->cpi_cores_per_compunit = BITX(cp->cp_ebx, 15, 8) + 1;
824 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0)
825 + (cpi->cpi_ncore_per_chip / cpi->cpi_cores_per_compunit)
826 * (cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg);
827 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
828 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
829 } else if (cpi->cpi_family == 0x10) {
831 * See if we are a multi-node processor.
832 * All processors in the system have the same number of nodes
834 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8);
835 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
836 /* Single-node */
837 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
838 coreidsz);
839 } else {
842 * Multi-node revision D (2 nodes per package
843 * are supported)
845 cpi->cpi_procnodes_per_pkg = 2;
847 first_half = (cpi->cpi_pkgcoreid <=
848 (cpi->cpi_ncore_per_chip/2 - 1));
850 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
851 /* We are BSP */
852 cpi->cpi_procnodeid = (first_half ? 0 : 1);
853 } else {
855 /* We are AP */
856 /* NodeId[2:1] bits to use for reading F3xe8 */
857 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
859 nb_caps_reg =
860 pci_getl_func(0, 24 + node2_1, 3, 0xe8);
863 * Check IntNodeNum bit (31:30, but bit 31 is
864 * always 0 on dual-node processors)
866 if (BITX(nb_caps_reg, 30, 30) == 0)
867 cpi->cpi_procnodeid = node2_1 +
868 !first_half;
869 else
870 cpi->cpi_procnodeid = node2_1 +
871 first_half;
874 } else {
875 cpi->cpi_procnodeid = 0;
878 cpi->cpi_chipid =
879 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
883 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
885 void
886 setup_xfem(void)
888 uint64_t flags = XFEATURE_LEGACY_FP;
890 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
892 if (is_x86_feature(x86_featureset, X86FSET_SSE))
893 flags |= XFEATURE_SSE;
895 if (is_x86_feature(x86_featureset, X86FSET_AVX))
896 flags |= XFEATURE_AVX;
898 if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
899 flags |= XFEATURE_AVX512;
901 set_xcr(XFEATURE_ENABLED_MASK, flags);
903 xsave_bv_all = flags;
906 void
907 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
909 uint32_t mask_ecx, mask_edx;
910 struct cpuid_info *cpi;
911 struct cpuid_regs *cp;
912 int xcpuid;
913 extern int idle_cpu_prefer_mwait;
916 * Space statically allocated for BSP, ensure pointer is set
918 if (cpu->cpu_id == 0) {
919 if (cpu->cpu_m.mcpu_cpi == NULL)
920 cpu->cpu_m.mcpu_cpi = &cpuid_info0;
923 add_x86_feature(featureset, X86FSET_CPUID);
925 cpi = cpu->cpu_m.mcpu_cpi;
926 ASSERT(cpi != NULL);
927 cp = &cpi->cpi_std[0];
928 cp->cp_eax = 0;
929 cpi->cpi_maxeax = __cpuid_insn(cp);
931 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
932 *iptr++ = cp->cp_ebx;
933 *iptr++ = cp->cp_edx;
934 *iptr++ = cp->cp_ecx;
935 *(char *)&cpi->cpi_vendorstr[12] = '\0';
938 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
939 x86_vendor = cpi->cpi_vendor; /* for compatibility */
942 * Limit the range in case of weird hardware
944 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
945 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
946 if (cpi->cpi_maxeax < 1)
947 goto pass1_done;
949 cp = &cpi->cpi_std[1];
950 cp->cp_eax = 1;
951 (void) __cpuid_insn(cp);
954 * Extract identifying constants for easy access.
956 cpi->cpi_model = CPI_MODEL(cpi);
957 cpi->cpi_family = CPI_FAMILY(cpi);
959 if (cpi->cpi_family == 0xf)
960 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
963 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
964 * Intel, and presumably everyone else, uses model == 0xf, as
965 * one would expect (max value means possible overflow). Sigh.
968 switch (cpi->cpi_vendor) {
969 case X86_VENDOR_Intel:
970 if (IS_EXTENDED_MODEL_INTEL(cpi))
971 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
972 break;
973 case X86_VENDOR_AMD:
974 if (CPI_FAMILY(cpi) == 0xf)
975 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
976 break;
977 default:
978 if (cpi->cpi_model == 0xf)
979 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
980 break;
983 cpi->cpi_step = CPI_STEP(cpi);
984 cpi->cpi_brandid = CPI_BRANDID(cpi);
987 * *default* assumptions:
988 * - believe %edx feature word
989 * - ignore %ecx feature word
990 * - 32-bit virtual and physical addressing
992 mask_edx = 0xffffffff;
993 mask_ecx = 0;
995 cpi->cpi_pabits = cpi->cpi_vabits = 32;
997 switch (cpi->cpi_vendor) {
998 case X86_VENDOR_Intel:
999 if (cpi->cpi_family == 5)
1000 x86_type = X86_TYPE_P5;
1001 else if (IS_LEGACY_P6(cpi)) {
1002 x86_type = X86_TYPE_P6;
1003 pentiumpro_bug4046376 = 1;
1005 * Clear the SEP bit when it was set erroneously
1007 if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
1008 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
1009 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
1010 x86_type = X86_TYPE_P4;
1012 * We don't currently depend on any of the %ecx
1013 * features until Prescott, so we'll only check
1014 * this from P4 onwards. We might want to revisit
1015 * that idea later.
1017 mask_ecx = 0xffffffff;
1018 } else if (cpi->cpi_family > 0xf)
1019 mask_ecx = 0xffffffff;
1021 * We don't support MONITOR/MWAIT if leaf 5 is not available
1022 * to obtain the monitor linesize.
1024 if (cpi->cpi_maxeax < 5)
1025 mask_ecx &= ~CPUID_INTC_ECX_MON;
1026 break;
1027 case X86_VENDOR_IntelClone:
1028 default:
1029 break;
1030 case X86_VENDOR_AMD:
1031 #if defined(OPTERON_ERRATUM_108)
1032 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
1033 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
1034 cpi->cpi_model = 0xc;
1035 } else
1036 #endif
1037 if (cpi->cpi_family == 5) {
1039 * AMD K5 and K6
1041 * These CPUs have an incomplete implementation
1042 * of MCA/MCE which we mask away.
1044 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
1047 * Model 0 uses the wrong (APIC) bit
1048 * to indicate PGE. Fix it here.
1050 if (cpi->cpi_model == 0) {
1051 if (cp->cp_edx & 0x200) {
1052 cp->cp_edx &= ~0x200;
1053 cp->cp_edx |= CPUID_INTC_EDX_PGE;
1058 * Early models had problems w/ MMX; disable.
1060 if (cpi->cpi_model < 6)
1061 mask_edx &= ~CPUID_INTC_EDX_MMX;
1065 * For newer families, SSE3 and CX16, at least, are valid;
1066 * enable all
1068 if (cpi->cpi_family >= 0xf)
1069 mask_ecx = 0xffffffff;
1071 * We don't support MONITOR/MWAIT if leaf 5 is not available
1072 * to obtain the monitor linesize.
1074 if (cpi->cpi_maxeax < 5)
1075 mask_ecx &= ~CPUID_INTC_ECX_MON;
1078 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD
1079 * processors. AMD does not intend MWAIT to be used in the cpu
1080 * idle loop on current and future processors. 10h and future
1081 * AMD processors use more power in MWAIT than HLT.
1082 * Pre-family-10h Opterons do not have the MWAIT instruction.
1084 idle_cpu_prefer_mwait = 0;
1086 break;
1087 case X86_VENDOR_TM:
1089 * workaround the NT workaround in CMS 4.1
1091 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
1092 (cpi->cpi_step == 2 || cpi->cpi_step == 3))
1093 cp->cp_edx |= CPUID_INTC_EDX_CX8;
1094 break;
1095 case X86_VENDOR_Centaur:
1097 * workaround the NT workarounds again
1099 if (cpi->cpi_family == 6)
1100 cp->cp_edx |= CPUID_INTC_EDX_CX8;
1101 break;
1102 case X86_VENDOR_Cyrix:
1104 * We rely heavily on the probing in locore
1105 * to actually figure out what parts, if any,
1106 * of the Cyrix cpuid instruction to believe.
1108 switch (x86_type) {
1109 case X86_TYPE_CYRIX_486:
1110 mask_edx = 0;
1111 break;
1112 case X86_TYPE_CYRIX_6x86:
1113 mask_edx = 0;
1114 break;
1115 case X86_TYPE_CYRIX_6x86L:
1116 mask_edx =
1117 CPUID_INTC_EDX_DE |
1118 CPUID_INTC_EDX_CX8;
1119 break;
1120 case X86_TYPE_CYRIX_6x86MX:
1121 mask_edx =
1122 CPUID_INTC_EDX_DE |
1123 CPUID_INTC_EDX_MSR |
1124 CPUID_INTC_EDX_CX8 |
1125 CPUID_INTC_EDX_PGE |
1126 CPUID_INTC_EDX_CMOV |
1127 CPUID_INTC_EDX_MMX;
1128 break;
1129 case X86_TYPE_CYRIX_GXm:
1130 mask_edx =
1131 CPUID_INTC_EDX_MSR |
1132 CPUID_INTC_EDX_CX8 |
1133 CPUID_INTC_EDX_CMOV |
1134 CPUID_INTC_EDX_MMX;
1135 break;
1136 case X86_TYPE_CYRIX_MediaGX:
1137 break;
1138 case X86_TYPE_CYRIX_MII:
1139 case X86_TYPE_VIA_CYRIX_III:
1140 mask_edx =
1141 CPUID_INTC_EDX_DE |
1142 CPUID_INTC_EDX_TSC |
1143 CPUID_INTC_EDX_MSR |
1144 CPUID_INTC_EDX_CX8 |
1145 CPUID_INTC_EDX_PGE |
1146 CPUID_INTC_EDX_CMOV |
1147 CPUID_INTC_EDX_MMX;
1148 break;
1149 default:
1150 break;
1152 break;
1156 if (xsave_force_disable) {
1157 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
1158 mask_ecx &= ~CPUID_INTC_ECX_AVX;
1159 mask_ecx &= ~CPUID_INTC_ECX_F16C;
1160 mask_ecx &= ~CPUID_INTC_ECX_FMA;
1164 * Now we've figured out the masks that determine
1165 * which bits we choose to believe, apply the masks
1166 * to the feature words, then map the kernel's view
1167 * of these feature words into its feature word.
1169 cp->cp_edx &= mask_edx;
1170 cp->cp_ecx &= mask_ecx;
1173 * apply any platform restrictions (we don't call this
1174 * immediately after __cpuid_insn here, because we need the
1175 * workarounds applied above first)
1177 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
1180 * In addition to ecx and edx, Intel is storing a bunch of instruction
1181 * set extensions in leaf 7's ebx, ecx, and edx.
1183 if (cpi->cpi_vendor == X86_VENDOR_Intel && cpi->cpi_maxeax >= 7) {
1184 struct cpuid_regs *ecp;
1185 ecp = &cpi->cpi_std[7];
1186 ecp->cp_eax = 7;
1187 ecp->cp_ecx = 0;
1188 (void) __cpuid_insn(ecp);
1190 * If XSAVE has been disabled, just ignore all of the
1191 * extended-save-area dependent flags here.
1193 if (xsave_force_disable) {
1194 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
1195 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
1196 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
1197 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_MPX;
1198 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_ALL_AVX512;
1199 ecp->cp_ecx &= ~CPUID_INTC_ECX_7_0_ALL_AVX512;
1200 ecp->cp_edx &= ~CPUID_INTC_EDX_7_0_ALL_AVX512;
1203 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP)
1204 add_x86_feature(featureset, X86FSET_SMEP);
1206 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_INVPCID) {
1207 add_x86_feature(featureset, X86FSET_INVPCID);
1211 * We check disable_smap here in addition to in startup_smap()
1212 * to ensure CPUs that aren't the boot CPU don't accidentally
1213 * include it in the feature set and thus generate a mismatched
1214 * x86 feature set across CPUs. Note that at this time we only
1215 * enable SMAP for the 64-bit kernel.
1217 #if defined(__amd64)
1218 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMAP &&
1219 disable_smap == 0)
1220 add_x86_feature(featureset, X86FSET_SMAP);
1221 #endif
1222 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_MPX)
1223 add_x86_feature(featureset, X86FSET_MPX);
1225 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED)
1226 add_x86_feature(featureset, X86FSET_RDSEED);
1228 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX)
1229 add_x86_feature(featureset, X86FSET_ADX);
1233 * fold in overrides from the "eeprom" mechanism
1235 cp->cp_edx |= cpuid_feature_edx_include;
1236 cp->cp_edx &= ~cpuid_feature_edx_exclude;
1238 cp->cp_ecx |= cpuid_feature_ecx_include;
1239 cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
1241 if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
1242 add_x86_feature(featureset, X86FSET_LARGEPAGE);
1244 if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
1245 add_x86_feature(featureset, X86FSET_TSC);
1247 if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
1248 add_x86_feature(featureset, X86FSET_MSR);
1250 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
1251 add_x86_feature(featureset, X86FSET_MTRR);
1253 if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
1254 add_x86_feature(featureset, X86FSET_PGE);
1256 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
1257 add_x86_feature(featureset, X86FSET_CMOV);
1259 if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
1260 add_x86_feature(featureset, X86FSET_MMX);
1262 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
1263 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
1264 add_x86_feature(featureset, X86FSET_MCA);
1266 if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
1267 add_x86_feature(featureset, X86FSET_PAE);
1269 if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
1270 add_x86_feature(featureset, X86FSET_CX8);
1272 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
1273 add_x86_feature(featureset, X86FSET_CX16);
1275 if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
1276 add_x86_feature(featureset, X86FSET_PAT);
1278 if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
1279 add_x86_feature(featureset, X86FSET_SEP);
1281 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
1283 * In our implementation, fxsave/fxrstor
1284 * are prerequisites before we'll even
1285 * try and do SSE things.
1287 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
1288 add_x86_feature(featureset, X86FSET_SSE);
1290 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
1291 add_x86_feature(featureset, X86FSET_SSE2);
1293 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
1294 add_x86_feature(featureset, X86FSET_SSE3);
1296 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
1297 add_x86_feature(featureset, X86FSET_SSSE3);
1299 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
1300 add_x86_feature(featureset, X86FSET_SSE4_1);
1302 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
1303 add_x86_feature(featureset, X86FSET_SSE4_2);
1305 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
1306 add_x86_feature(featureset, X86FSET_AES);
1308 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
1309 add_x86_feature(featureset, X86FSET_PCLMULQDQ);
1312 if (cpi->cpi_std[7].cp_ebx & CPUID_INTC_EBX_7_0_SHA)
1313 add_x86_feature(featureset, X86FSET_SHA);
1315 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_UMIP)
1316 add_x86_feature(featureset, X86FSET_UMIP);
1317 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_PKU)
1318 add_x86_feature(featureset, X86FSET_PKU);
1319 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_OSPKE)
1320 add_x86_feature(featureset, X86FSET_OSPKE);
1322 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
1323 add_x86_feature(featureset, X86FSET_XSAVE);
1325 /* We only test AVX & AVX512 when there is XSAVE */
1327 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
1328 add_x86_feature(featureset,
1329 X86FSET_AVX);
1332 * Intel says we can't check these without also
1333 * checking AVX.
1335 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
1336 add_x86_feature(featureset,
1337 X86FSET_F16C);
1339 if (cp->cp_ecx & CPUID_INTC_ECX_FMA)
1340 add_x86_feature(featureset,
1341 X86FSET_FMA);
1343 if (cpi->cpi_std[7].cp_ebx &
1344 CPUID_INTC_EBX_7_0_BMI1)
1345 add_x86_feature(featureset,
1346 X86FSET_BMI1);
1348 if (cpi->cpi_std[7].cp_ebx &
1349 CPUID_INTC_EBX_7_0_BMI2)
1350 add_x86_feature(featureset,
1351 X86FSET_BMI2);
1353 if (cpi->cpi_std[7].cp_ebx &
1354 CPUID_INTC_EBX_7_0_AVX2)
1355 add_x86_feature(featureset,
1356 X86FSET_AVX2);
1359 if (cpi->cpi_std[7].cp_ebx &
1360 CPUID_INTC_EBX_7_0_AVX512F) {
1361 add_x86_feature(featureset, X86FSET_AVX512F);
1363 if (cpi->cpi_std[7].cp_ebx &
1364 CPUID_INTC_EBX_7_0_AVX512DQ)
1365 add_x86_feature(featureset,
1366 X86FSET_AVX512DQ);
1367 if (cpi->cpi_std[7].cp_ebx &
1368 CPUID_INTC_EBX_7_0_AVX512IFMA)
1369 add_x86_feature(featureset,
1370 X86FSET_AVX512FMA);
1371 if (cpi->cpi_std[7].cp_ebx &
1372 CPUID_INTC_EBX_7_0_AVX512PF)
1373 add_x86_feature(featureset,
1374 X86FSET_AVX512PF);
1375 if (cpi->cpi_std[7].cp_ebx &
1376 CPUID_INTC_EBX_7_0_AVX512ER)
1377 add_x86_feature(featureset,
1378 X86FSET_AVX512ER);
1379 if (cpi->cpi_std[7].cp_ebx &
1380 CPUID_INTC_EBX_7_0_AVX512CD)
1381 add_x86_feature(featureset,
1382 X86FSET_AVX512CD);
1383 if (cpi->cpi_std[7].cp_ebx &
1384 CPUID_INTC_EBX_7_0_AVX512BW)
1385 add_x86_feature(featureset,
1386 X86FSET_AVX512BW);
1387 if (cpi->cpi_std[7].cp_ebx &
1388 CPUID_INTC_EBX_7_0_AVX512VL)
1389 add_x86_feature(featureset,
1390 X86FSET_AVX512VL);
1392 if (cpi->cpi_std[7].cp_ecx &
1393 CPUID_INTC_ECX_7_0_AVX512VBMI)
1394 add_x86_feature(featureset,
1395 X86FSET_AVX512VBMI);
1396 if (cpi->cpi_std[7].cp_ecx &
1397 CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
1398 add_x86_feature(featureset,
1399 X86FSET_AVX512VPOPCDQ);
1401 if (cpi->cpi_std[7].cp_edx &
1402 CPUID_INTC_EDX_7_0_AVX5124NNIW)
1403 add_x86_feature(featureset,
1404 X86FSET_AVX512NNIW);
1405 if (cpi->cpi_std[7].cp_edx &
1406 CPUID_INTC_EDX_7_0_AVX5124FMAPS)
1407 add_x86_feature(featureset,
1408 X86FSET_AVX512FMAPS);
1413 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
1414 if (cp->cp_ecx & CPUID_INTC_ECX_PCID) {
1415 add_x86_feature(featureset, X86FSET_PCID);
1419 if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
1420 add_x86_feature(featureset, X86FSET_X2APIC);
1422 if (cp->cp_edx & CPUID_INTC_EDX_DE) {
1423 add_x86_feature(featureset, X86FSET_DE);
1425 if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
1428 * We require the CLFLUSH instruction for erratum workaround
1429 * to use MONITOR/MWAIT.
1431 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1432 cpi->cpi_mwait.support |= MWAIT_SUPPORT;
1433 add_x86_feature(featureset, X86FSET_MWAIT);
1434 } else {
1435 extern int idle_cpu_assert_cflush_monitor;
1438 * All processors we are aware of which have
1439 * MONITOR/MWAIT also have CLFLUSH.
1441 if (idle_cpu_assert_cflush_monitor) {
1442 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
1443 (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
1448 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
1449 add_x86_feature(featureset, X86FSET_VMX);
1452 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
1453 add_x86_feature(featureset, X86FSET_RDRAND);
1456 * Only need it first time, rest of the cpus would follow suit.
1457 * we only capture this for the bootcpu.
1459 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1460 add_x86_feature(featureset, X86FSET_CLFSH);
1461 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
1463 if (is_x86_feature(featureset, X86FSET_PAE))
1464 cpi->cpi_pabits = 36;
1467 * Hyperthreading configuration is slightly tricky on Intel
1468 * and pure clones, and even trickier on AMD.
1470 * (AMD chose to set the HTT bit on their CMP processors,
1471 * even though they're not actually hyperthreaded. Thus it
1472 * takes a bit more work to figure out what's really going
1473 * on ... see the handling of the CMP_LGCY bit below)
1475 if (cp->cp_edx & CPUID_INTC_EDX_HTT) {
1476 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
1477 if (cpi->cpi_ncpu_per_chip > 1)
1478 add_x86_feature(featureset, X86FSET_HTT);
1479 } else {
1480 cpi->cpi_ncpu_per_chip = 1;
1483 if (cpi->cpi_vendor == X86_VENDOR_Intel && cpi->cpi_maxeax >= 0xD &&
1484 !xsave_force_disable) {
1485 struct cpuid_regs r, *ecp;
1487 ecp = &r;
1488 ecp->cp_eax = 0xD;
1489 ecp->cp_ecx = 1;
1490 ecp->cp_edx = ecp->cp_ebx = 0;
1491 (void) __cpuid_insn(ecp);
1493 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEOPT)
1494 add_x86_feature(featureset, X86FSET_XSAVEOPT);
1495 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEC)
1496 add_x86_feature(featureset, X86FSET_XSAVEC);
1497 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVES)
1498 add_x86_feature(featureset, X86FSET_XSAVES);
1502 * Work on the "extended" feature information, doing
1503 * some basic initialization for cpuid_pass2()
1505 xcpuid = 0;
1506 switch (cpi->cpi_vendor) {
1507 case X86_VENDOR_Intel:
1509 * On KVM we know we will have proper support for extended
1510 * cpuid.
1512 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf ||
1513 (get_hwenv() == HW_KVM && cpi->cpi_family == 6 &&
1514 (cpi->cpi_model == 6 || cpi->cpi_model == 2)))
1515 xcpuid++;
1516 break;
1517 case X86_VENDOR_AMD:
1518 if (cpi->cpi_family > 5 ||
1519 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
1520 xcpuid++;
1521 break;
1522 case X86_VENDOR_Cyrix:
1524 * Only these Cyrix CPUs are -known- to support
1525 * extended cpuid operations.
1527 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
1528 x86_type == X86_TYPE_CYRIX_GXm)
1529 xcpuid++;
1530 break;
1531 case X86_VENDOR_Centaur:
1532 case X86_VENDOR_TM:
1533 default:
1534 xcpuid++;
1535 break;
1538 if (xcpuid) {
1539 cp = &cpi->cpi_extd[0];
1540 cp->cp_eax = 0x80000000;
1541 cpi->cpi_xmaxeax = __cpuid_insn(cp);
1544 if (cpi->cpi_xmaxeax & 0x80000000) {
1546 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
1547 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
1549 switch (cpi->cpi_vendor) {
1550 case X86_VENDOR_Intel:
1551 case X86_VENDOR_AMD:
1552 if (cpi->cpi_xmaxeax < 0x80000001)
1553 break;
1554 cp = &cpi->cpi_extd[1];
1555 cp->cp_eax = 0x80000001;
1556 (void) __cpuid_insn(cp);
1558 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1559 cpi->cpi_family == 5 &&
1560 cpi->cpi_model == 6 &&
1561 cpi->cpi_step == 6) {
1563 * K6 model 6 uses bit 10 to indicate SYSC
1564 * Later models use bit 11. Fix it here.
1566 if (cp->cp_edx & 0x400) {
1567 cp->cp_edx &= ~0x400;
1568 cp->cp_edx |= CPUID_AMD_EDX_SYSC;
1572 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
1575 * Compute the additions to the kernel's feature word.
1577 if (cp->cp_edx & CPUID_AMD_EDX_NX) {
1578 add_x86_feature(featureset, X86FSET_NX);
1582 * Regardless whether or not we boot 64-bit,
1583 * we should have a way to identify whether
1584 * the CPU is capable of running 64-bit.
1586 if (cp->cp_edx & CPUID_AMD_EDX_LM) {
1587 add_x86_feature(featureset, X86FSET_64);
1590 #if defined(__amd64)
1591 /* 1 GB large page - enable only for 64 bit kernel */
1592 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
1593 add_x86_feature(featureset, X86FSET_1GPG);
1595 #endif
1597 if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
1598 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
1599 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
1600 add_x86_feature(featureset, X86FSET_SSE4A);
1604 * If both the HTT and CMP_LGCY bits are set,
1605 * then we're not actually HyperThreaded. Read
1606 * "AMD CPUID Specification" for more details.
1608 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1609 is_x86_feature(featureset, X86FSET_HTT) &&
1610 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) {
1611 remove_x86_feature(featureset, X86FSET_HTT);
1612 add_x86_feature(featureset, X86FSET_CMP);
1614 #if defined(__amd64)
1616 * It's really tricky to support syscall/sysret in
1617 * the i386 kernel; we rely on sysenter/sysexit
1618 * instead. In the amd64 kernel, things are -way-
1619 * better.
1621 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
1622 add_x86_feature(featureset, X86FSET_ASYSC);
1626 * While we're thinking about system calls, note
1627 * that AMD processors don't support sysenter
1628 * in long mode at all, so don't try to program them.
1630 if (x86_vendor == X86_VENDOR_AMD) {
1631 remove_x86_feature(featureset, X86FSET_SEP);
1633 #endif
1634 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
1635 add_x86_feature(featureset, X86FSET_TSCP);
1638 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
1639 add_x86_feature(featureset, X86FSET_SVM);
1642 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
1643 add_x86_feature(featureset, X86FSET_TOPOEXT);
1645 break;
1646 default:
1647 break;
1651 * Get CPUID data about processor cores and hyperthreads.
1653 switch (cpi->cpi_vendor) {
1654 case X86_VENDOR_Intel:
1655 if (cpi->cpi_maxeax >= 4) {
1656 cp = &cpi->cpi_std[4];
1657 cp->cp_eax = 4;
1658 cp->cp_ecx = 0;
1659 (void) __cpuid_insn(cp);
1660 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
1662 /*FALLTHROUGH*/
1663 case X86_VENDOR_AMD:
1664 if (cpi->cpi_xmaxeax < 0x80000008)
1665 break;
1666 cp = &cpi->cpi_extd[8];
1667 cp->cp_eax = 0x80000008;
1668 (void) __cpuid_insn(cp);
1669 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp);
1672 * Virtual and physical address limits from
1673 * cpuid override previously guessed values.
1675 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
1676 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
1677 break;
1678 default:
1679 break;
1683 * Derive the number of cores per chip
1685 switch (cpi->cpi_vendor) {
1686 case X86_VENDOR_Intel:
1687 if (cpi->cpi_maxeax < 4) {
1688 cpi->cpi_ncore_per_chip = 1;
1689 break;
1690 } else {
1691 cpi->cpi_ncore_per_chip =
1692 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1;
1694 break;
1695 case X86_VENDOR_AMD:
1696 if (cpi->cpi_xmaxeax < 0x80000008) {
1697 cpi->cpi_ncore_per_chip = 1;
1698 break;
1699 } else {
1701 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is
1702 * 1 less than the number of physical cores on
1703 * the chip. In family 0x10 this value can
1704 * be affected by "downcoring" - it reflects
1705 * 1 less than the number of cores actually
1706 * enabled on this node.
1708 cpi->cpi_ncore_per_chip =
1709 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
1711 break;
1712 default:
1713 cpi->cpi_ncore_per_chip = 1;
1714 break;
1718 * Get CPUID data about TSC Invariance in Deep C-State.
1720 switch (cpi->cpi_vendor) {
1721 case X86_VENDOR_Intel:
1722 if (cpi->cpi_maxeax >= 7) {
1723 cp = &cpi->cpi_extd[7];
1724 cp->cp_eax = 0x80000007;
1725 cp->cp_ecx = 0;
1726 (void) __cpuid_insn(cp);
1728 break;
1729 default:
1730 break;
1732 } else {
1733 cpi->cpi_ncore_per_chip = 1;
1737 * If more than one core, then this processor is CMP.
1739 if (cpi->cpi_ncore_per_chip > 1) {
1740 add_x86_feature(featureset, X86FSET_CMP);
1744 * If the number of cores is the same as the number
1745 * of CPUs, then we cannot have HyperThreading.
1747 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) {
1748 remove_x86_feature(featureset, X86FSET_HTT);
1751 cpi->cpi_apicid = CPI_APIC_ID(cpi);
1752 cpi->cpi_procnodes_per_pkg = 1;
1753 cpi->cpi_cores_per_compunit = 1;
1754 if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE &&
1755 is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) {
1757 * Single-core single-threaded processors.
1759 cpi->cpi_chipid = -1;
1760 cpi->cpi_clogid = 0;
1761 cpi->cpi_coreid = cpu->cpu_id;
1762 cpi->cpi_pkgcoreid = 0;
1763 if (cpi->cpi_vendor == X86_VENDOR_AMD)
1764 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
1765 else
1766 cpi->cpi_procnodeid = cpi->cpi_chipid;
1767 } else if (cpi->cpi_ncpu_per_chip > 1) {
1768 if (cpi->cpi_vendor == X86_VENDOR_Intel)
1769 cpuid_intel_getids(cpu, featureset);
1770 else if (cpi->cpi_vendor == X86_VENDOR_AMD)
1771 cpuid_amd_getids(cpu);
1772 else {
1774 * All other processors are currently
1775 * assumed to have single cores.
1777 cpi->cpi_coreid = cpi->cpi_chipid;
1778 cpi->cpi_pkgcoreid = 0;
1779 cpi->cpi_procnodeid = cpi->cpi_chipid;
1780 cpi->cpi_compunitid = cpi->cpi_chipid;
1785 * Synthesize chip "revision" and socket type
1787 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
1788 cpi->cpi_model, cpi->cpi_step);
1789 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
1790 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
1791 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
1792 cpi->cpi_model, cpi->cpi_step);
1795 * While we're here, check for the AMD "Error Pointer Zero/Restore"
1796 * feature. This can be used to setup the FP save handlers
1797 * appropriately.
1799 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
1800 if (cpi->cpi_xmaxeax >= 0x80000008 &&
1801 cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
1802 /* Special handling for AMD FP not necessary. */
1803 cpi->cpi_fp_amd_save = 0;
1804 } else {
1805 cpi->cpi_fp_amd_save = 1;
1809 pass1_done:
1810 cpi->cpi_pass = 1;
1814 * Make copies of the cpuid table entries we depend on, in
1815 * part for ease of parsing now, in part so that we have only
1816 * one place to correct any of it, in part for ease of
1817 * later export to userland, and in part so we can look at
1818 * this stuff in a crash dump.
1821 /*ARGSUSED*/
1822 void
1823 cpuid_pass2(cpu_t *cpu)
1825 uint_t n, nmax;
1826 int i;
1827 struct cpuid_regs *cp;
1828 uint8_t *dp;
1829 uint32_t *iptr;
1830 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1832 ASSERT(cpi->cpi_pass == 1);
1834 if (cpi->cpi_maxeax < 1)
1835 goto pass2_done;
1837 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
1838 nmax = NMAX_CPI_STD;
1840 * (We already handled n == 0 and n == 1 in pass 1)
1842 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
1843 cp->cp_eax = n;
1846 * CPUID function 4 expects %ecx to be initialized
1847 * with an index which indicates which cache to return
1848 * information about. The OS is expected to call function 4
1849 * with %ecx set to 0, 1, 2, ... until it returns with
1850 * EAX[4:0] set to 0, which indicates there are no more
1851 * caches.
1853 * Here, populate cpi_std[4] with the information returned by
1854 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
1855 * when dynamic memory allocation becomes available.
1857 * Note: we need to explicitly initialize %ecx here, since
1858 * function 4 may have been previously invoked.
1860 * The same is all true for CPUID function 7.
1862 if (n == 4 || n == 7)
1863 cp->cp_ecx = 0;
1865 (void) __cpuid_insn(cp);
1866 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
1867 switch (n) {
1868 case 2:
1870 * "the lower 8 bits of the %eax register
1871 * contain a value that identifies the number
1872 * of times the cpuid [instruction] has to be
1873 * executed to obtain a complete image of the
1874 * processor's caching systems."
1876 * How *do* they make this stuff up?
1878 cpi->cpi_ncache = sizeof (*cp) *
1879 BITX(cp->cp_eax, 7, 0);
1880 if (cpi->cpi_ncache == 0)
1881 break;
1882 cpi->cpi_ncache--; /* skip count byte */
1885 * Well, for now, rather than attempt to implement
1886 * this slightly dubious algorithm, we just look
1887 * at the first 15 ..
1889 if (cpi->cpi_ncache > (sizeof (*cp) - 1))
1890 cpi->cpi_ncache = sizeof (*cp) - 1;
1892 dp = cpi->cpi_cacheinfo;
1893 if (BITX(cp->cp_eax, 31, 31) == 0) {
1894 uint8_t *p = (void *)&cp->cp_eax;
1895 for (i = 1; i < 4; i++)
1896 if (p[i] != 0)
1897 *dp++ = p[i];
1899 if (BITX(cp->cp_ebx, 31, 31) == 0) {
1900 uint8_t *p = (void *)&cp->cp_ebx;
1901 for (i = 0; i < 4; i++)
1902 if (p[i] != 0)
1903 *dp++ = p[i];
1905 if (BITX(cp->cp_ecx, 31, 31) == 0) {
1906 uint8_t *p = (void *)&cp->cp_ecx;
1907 for (i = 0; i < 4; i++)
1908 if (p[i] != 0)
1909 *dp++ = p[i];
1911 if (BITX(cp->cp_edx, 31, 31) == 0) {
1912 uint8_t *p = (void *)&cp->cp_edx;
1913 for (i = 0; i < 4; i++)
1914 if (p[i] != 0)
1915 *dp++ = p[i];
1917 break;
1919 case 3: /* Processor serial number, if PSN supported */
1920 break;
1922 case 4: /* Deterministic cache parameters */
1923 break;
1925 case 5: /* Monitor/Mwait parameters */
1927 size_t mwait_size;
1930 * check cpi_mwait.support which was set in cpuid_pass1
1932 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
1933 break;
1936 * Protect ourself from insane mwait line size.
1937 * Workaround for incomplete hardware emulator(s).
1939 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
1940 if (mwait_size < sizeof (uint32_t) ||
1941 !ISP2(mwait_size)) {
1942 #if DEBUG
1943 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
1944 "size %ld", cpu->cpu_id, (long)mwait_size);
1945 #endif
1946 break;
1949 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
1950 cpi->cpi_mwait.mon_max = mwait_size;
1951 if (MWAIT_EXTENSION(cpi)) {
1952 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
1953 if (MWAIT_INT_ENABLE(cpi))
1954 cpi->cpi_mwait.support |=
1955 MWAIT_ECX_INT_ENABLE;
1957 break;
1959 default:
1960 break;
1964 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) {
1965 struct cpuid_regs regs;
1967 cp = &regs;
1968 cp->cp_eax = 0xB;
1969 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1971 (void) __cpuid_insn(cp);
1974 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1975 * indicates that the extended topology enumeration leaf is
1976 * available.
1978 if (cp->cp_ebx) {
1979 uint32_t x2apic_id;
1980 uint_t coreid_shift = 0;
1981 uint_t ncpu_per_core = 1;
1982 uint_t chipid_shift = 0;
1983 uint_t ncpu_per_chip = 1;
1984 uint_t i;
1985 uint_t level;
1987 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1988 cp->cp_eax = 0xB;
1989 cp->cp_ecx = i;
1991 (void) __cpuid_insn(cp);
1992 level = CPI_CPU_LEVEL_TYPE(cp);
1994 if (level == 1) {
1995 x2apic_id = cp->cp_edx;
1996 coreid_shift = BITX(cp->cp_eax, 4, 0);
1997 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1998 } else if (level == 2) {
1999 x2apic_id = cp->cp_edx;
2000 chipid_shift = BITX(cp->cp_eax, 4, 0);
2001 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
2005 cpi->cpi_apicid = x2apic_id;
2006 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
2007 cpi->cpi_ncore_per_chip = ncpu_per_chip /
2008 ncpu_per_core;
2009 cpi->cpi_chipid = x2apic_id >> chipid_shift;
2010 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
2011 cpi->cpi_coreid = x2apic_id >> coreid_shift;
2012 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
2015 /* Make cp NULL so that we don't stumble on others */
2016 cp = NULL;
2020 * XSAVE enumeration
2022 if (cpi->cpi_maxeax >= 0xD) {
2023 struct cpuid_regs regs;
2024 boolean_t cpuid_d_valid = B_TRUE;
2026 cp = &regs;
2027 cp->cp_eax = 0xD;
2028 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
2030 (void) __cpuid_insn(cp);
2033 * Sanity checks for debug
2035 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
2036 (cp->cp_eax & XFEATURE_SSE) == 0) {
2037 cpuid_d_valid = B_FALSE;
2040 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
2041 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
2042 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
2045 * If the hw supports AVX, get the size and offset in the save
2046 * area for the ymm state.
2048 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
2049 cp->cp_eax = 0xD;
2050 cp->cp_ecx = 2;
2051 cp->cp_edx = cp->cp_ebx = 0;
2053 (void) __cpuid_insn(cp);
2055 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
2056 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
2057 cpuid_d_valid = B_FALSE;
2060 cpi->cpi_xsave.ymm_size = cp->cp_eax;
2061 cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
2065 * If the hw supports MPX, get the size and offset in the
2066 * save area for BNDREGS and BNDCSR.
2068 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_MPX) {
2069 cp->cp_eax = 0xD;
2070 cp->cp_ecx = 3;
2071 cp->cp_edx = cp->cp_ebx = 0;
2073 (void) __cpuid_insn(cp);
2075 cpi->cpi_xsave.bndregs_size = cp->cp_eax;
2076 cpi->cpi_xsave.bndregs_offset = cp->cp_ebx;
2078 cp->cp_eax = 0xD;
2079 cp->cp_ecx = 4;
2080 cp->cp_edx = cp->cp_ebx = 0;
2082 (void) __cpuid_insn(cp);
2084 cpi->cpi_xsave.bndcsr_size = cp->cp_eax;
2085 cpi->cpi_xsave.bndcsr_offset = cp->cp_ebx;
2089 * If the hw supports AVX512, get the size and offset in the
2090 * save area for the opmask registers and zmm state.
2092 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX512) {
2093 cp->cp_eax = 0xD;
2094 cp->cp_ecx = 5;
2095 cp->cp_edx = cp->cp_ebx = 0;
2097 (void) __cpuid_insn(cp);
2099 cpi->cpi_xsave.opmask_size = cp->cp_eax;
2100 cpi->cpi_xsave.opmask_offset = cp->cp_ebx;
2102 cp->cp_eax = 0xD;
2103 cp->cp_ecx = 6;
2104 cp->cp_edx = cp->cp_ebx = 0;
2106 (void) __cpuid_insn(cp);
2108 cpi->cpi_xsave.zmmlo_size = cp->cp_eax;
2109 cpi->cpi_xsave.zmmlo_offset = cp->cp_ebx;
2111 cp->cp_eax = 0xD;
2112 cp->cp_ecx = 7;
2113 cp->cp_edx = cp->cp_ebx = 0;
2115 (void) __cpuid_insn(cp);
2117 cpi->cpi_xsave.zmmhi_size = cp->cp_eax;
2118 cpi->cpi_xsave.zmmhi_offset = cp->cp_ebx;
2121 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
2122 xsave_state_size = 0;
2123 } else if (cpuid_d_valid) {
2124 xsave_state_size = cpi->cpi_xsave.xsav_max_size;
2125 } else {
2126 /* Broken CPUID 0xD, probably in HVM */
2127 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
2128 "value: hw_low = %d, hw_high = %d, xsave_size = %d"
2129 ", ymm_size = %d, ymm_offset = %d\n",
2130 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
2131 cpi->cpi_xsave.xsav_hw_features_high,
2132 (int)cpi->cpi_xsave.xsav_max_size,
2133 (int)cpi->cpi_xsave.ymm_size,
2134 (int)cpi->cpi_xsave.ymm_offset);
2136 if (xsave_state_size != 0) {
2138 * This must be a non-boot CPU. We cannot
2139 * continue, because boot cpu has already
2140 * enabled XSAVE.
2142 ASSERT(cpu->cpu_id != 0);
2143 cmn_err(CE_PANIC, "cpu%d: we have already "
2144 "enabled XSAVE on boot cpu, cannot "
2145 "continue.", cpu->cpu_id);
2146 } else {
2148 * If we reached here on the boot CPU, it's also
2149 * almost certain that we'll reach here on the
2150 * non-boot CPUs. When we're here on a boot CPU
2151 * we should disable the feature, on a non-boot
2152 * CPU we need to confirm that we have.
2154 if (cpu->cpu_id == 0) {
2155 remove_x86_feature(x86_featureset,
2156 X86FSET_XSAVE);
2157 remove_x86_feature(x86_featureset,
2158 X86FSET_AVX);
2159 remove_x86_feature(x86_featureset,
2160 X86FSET_F16C);
2161 remove_x86_feature(x86_featureset,
2162 X86FSET_BMI1);
2163 remove_x86_feature(x86_featureset,
2164 X86FSET_BMI2);
2165 remove_x86_feature(x86_featureset,
2166 X86FSET_FMA);
2167 remove_x86_feature(x86_featureset,
2168 X86FSET_AVX2);
2169 remove_x86_feature(x86_featureset,
2170 X86FSET_MPX);
2171 remove_x86_feature(x86_featureset,
2172 X86FSET_AVX512F);
2173 remove_x86_feature(x86_featureset,
2174 X86FSET_AVX512DQ);
2175 remove_x86_feature(x86_featureset,
2176 X86FSET_AVX512PF);
2177 remove_x86_feature(x86_featureset,
2178 X86FSET_AVX512ER);
2179 remove_x86_feature(x86_featureset,
2180 X86FSET_AVX512CD);
2181 remove_x86_feature(x86_featureset,
2182 X86FSET_AVX512BW);
2183 remove_x86_feature(x86_featureset,
2184 X86FSET_AVX512VL);
2185 remove_x86_feature(x86_featureset,
2186 X86FSET_AVX512FMA);
2187 remove_x86_feature(x86_featureset,
2188 X86FSET_AVX512VBMI);
2189 remove_x86_feature(x86_featureset,
2190 X86FSET_AVX512VPOPCDQ);
2191 remove_x86_feature(x86_featureset,
2192 X86FSET_AVX512NNIW);
2193 remove_x86_feature(x86_featureset,
2194 X86FSET_AVX512FMAPS);
2196 CPI_FEATURES_ECX(cpi) &=
2197 ~CPUID_INTC_ECX_XSAVE;
2198 CPI_FEATURES_ECX(cpi) &=
2199 ~CPUID_INTC_ECX_AVX;
2200 CPI_FEATURES_ECX(cpi) &=
2201 ~CPUID_INTC_ECX_F16C;
2202 CPI_FEATURES_ECX(cpi) &=
2203 ~CPUID_INTC_ECX_FMA;
2204 CPI_FEATURES_7_0_EBX(cpi) &=
2205 ~CPUID_INTC_EBX_7_0_BMI1;
2206 CPI_FEATURES_7_0_EBX(cpi) &=
2207 ~CPUID_INTC_EBX_7_0_BMI2;
2208 CPI_FEATURES_7_0_EBX(cpi) &=
2209 ~CPUID_INTC_EBX_7_0_AVX2;
2210 CPI_FEATURES_7_0_EBX(cpi) &=
2211 ~CPUID_INTC_EBX_7_0_MPX;
2212 CPI_FEATURES_7_0_EBX(cpi) &=
2213 ~CPUID_INTC_EBX_7_0_ALL_AVX512;
2215 CPI_FEATURES_7_0_ECX(cpi) &=
2216 ~CPUID_INTC_ECX_7_0_ALL_AVX512;
2218 CPI_FEATURES_7_0_EDX(cpi) &=
2219 ~CPUID_INTC_EDX_7_0_ALL_AVX512;
2221 xsave_force_disable = B_TRUE;
2222 } else {
2223 VERIFY(is_x86_feature(x86_featureset,
2224 X86FSET_XSAVE) == B_FALSE);
2231 if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
2232 goto pass2_done;
2234 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD)
2235 nmax = NMAX_CPI_EXTD;
2237 * Copy the extended properties, fixing them as we go.
2238 * (We already handled n == 0 and n == 1 in pass 1)
2240 iptr = (void *)cpi->cpi_brandstr;
2241 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
2242 cp->cp_eax = 0x80000000 + n;
2243 (void) __cpuid_insn(cp);
2244 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp);
2245 switch (n) {
2246 case 2:
2247 case 3:
2248 case 4:
2250 * Extract the brand string
2252 *iptr++ = cp->cp_eax;
2253 *iptr++ = cp->cp_ebx;
2254 *iptr++ = cp->cp_ecx;
2255 *iptr++ = cp->cp_edx;
2256 break;
2257 case 5:
2258 switch (cpi->cpi_vendor) {
2259 case X86_VENDOR_AMD:
2261 * The Athlon and Duron were the first
2262 * parts to report the sizes of the
2263 * TLB for large pages. Before then,
2264 * we don't trust the data.
2266 if (cpi->cpi_family < 6 ||
2267 (cpi->cpi_family == 6 &&
2268 cpi->cpi_model < 1))
2269 cp->cp_eax = 0;
2270 break;
2271 default:
2272 break;
2274 break;
2275 case 6:
2276 switch (cpi->cpi_vendor) {
2277 case X86_VENDOR_AMD:
2279 * The Athlon and Duron were the first
2280 * AMD parts with L2 TLB's.
2281 * Before then, don't trust the data.
2283 if (cpi->cpi_family < 6 ||
2284 cpi->cpi_family == 6 &&
2285 cpi->cpi_model < 1)
2286 cp->cp_eax = cp->cp_ebx = 0;
2288 * AMD Duron rev A0 reports L2
2289 * cache size incorrectly as 1K
2290 * when it is really 64K
2292 if (cpi->cpi_family == 6 &&
2293 cpi->cpi_model == 3 &&
2294 cpi->cpi_step == 0) {
2295 cp->cp_ecx &= 0xffff;
2296 cp->cp_ecx |= 0x400000;
2298 break;
2299 case X86_VENDOR_Cyrix: /* VIA C3 */
2301 * VIA C3 processors are a bit messed
2302 * up w.r.t. encoding cache sizes in %ecx
2304 if (cpi->cpi_family != 6)
2305 break;
2307 * model 7 and 8 were incorrectly encoded
2309 * xxx is model 8 really broken?
2311 if (cpi->cpi_model == 7 ||
2312 cpi->cpi_model == 8)
2313 cp->cp_ecx =
2314 BITX(cp->cp_ecx, 31, 24) << 16 |
2315 BITX(cp->cp_ecx, 23, 16) << 12 |
2316 BITX(cp->cp_ecx, 15, 8) << 8 |
2317 BITX(cp->cp_ecx, 7, 0);
2319 * model 9 stepping 1 has wrong associativity
2321 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
2322 cp->cp_ecx |= 8 << 12;
2323 break;
2324 case X86_VENDOR_Intel:
2326 * Extended L2 Cache features function.
2327 * First appeared on Prescott.
2329 default:
2330 break;
2332 break;
2333 default:
2334 break;
2338 pass2_done:
2339 cpi->cpi_pass = 2;
2342 static const char *
2343 intel_cpubrand(const struct cpuid_info *cpi)
2345 int i;
2347 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2348 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2349 return ("i486");
2351 switch (cpi->cpi_family) {
2352 case 5:
2353 return ("Intel Pentium(r)");
2354 case 6:
2355 switch (cpi->cpi_model) {
2356 uint_t celeron, xeon;
2357 const struct cpuid_regs *cp;
2358 case 0:
2359 case 1:
2360 case 2:
2361 return ("Intel Pentium(r) Pro");
2362 case 3:
2363 case 4:
2364 return ("Intel Pentium(r) II");
2365 case 6:
2366 return ("Intel Celeron(r)");
2367 case 5:
2368 case 7:
2369 celeron = xeon = 0;
2370 cp = &cpi->cpi_std[2]; /* cache info */
2372 for (i = 1; i < 4; i++) {
2373 uint_t tmp;
2375 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
2376 if (tmp == 0x40)
2377 celeron++;
2378 if (tmp >= 0x44 && tmp <= 0x45)
2379 xeon++;
2382 for (i = 0; i < 2; i++) {
2383 uint_t tmp;
2385 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
2386 if (tmp == 0x40)
2387 celeron++;
2388 else if (tmp >= 0x44 && tmp <= 0x45)
2389 xeon++;
2392 for (i = 0; i < 4; i++) {
2393 uint_t tmp;
2395 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
2396 if (tmp == 0x40)
2397 celeron++;
2398 else if (tmp >= 0x44 && tmp <= 0x45)
2399 xeon++;
2402 for (i = 0; i < 4; i++) {
2403 uint_t tmp;
2405 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
2406 if (tmp == 0x40)
2407 celeron++;
2408 else if (tmp >= 0x44 && tmp <= 0x45)
2409 xeon++;
2412 if (celeron)
2413 return ("Intel Celeron(r)");
2414 if (xeon)
2415 return (cpi->cpi_model == 5 ?
2416 "Intel Pentium(r) II Xeon(tm)" :
2417 "Intel Pentium(r) III Xeon(tm)");
2418 return (cpi->cpi_model == 5 ?
2419 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
2420 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
2421 default:
2422 break;
2424 default:
2425 break;
2428 /* BrandID is present if the field is nonzero */
2429 if (cpi->cpi_brandid != 0) {
2430 static const struct {
2431 uint_t bt_bid;
2432 const char *bt_str;
2433 } brand_tbl[] = {
2434 { 0x1, "Intel(r) Celeron(r)" },
2435 { 0x2, "Intel(r) Pentium(r) III" },
2436 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" },
2437 { 0x4, "Intel(r) Pentium(r) III" },
2438 { 0x6, "Mobile Intel(r) Pentium(r) III" },
2439 { 0x7, "Mobile Intel(r) Celeron(r)" },
2440 { 0x8, "Intel(r) Pentium(r) 4" },
2441 { 0x9, "Intel(r) Pentium(r) 4" },
2442 { 0xa, "Intel(r) Celeron(r)" },
2443 { 0xb, "Intel(r) Xeon(tm)" },
2444 { 0xc, "Intel(r) Xeon(tm) MP" },
2445 { 0xe, "Mobile Intel(r) Pentium(r) 4" },
2446 { 0xf, "Mobile Intel(r) Celeron(r)" },
2447 { 0x11, "Mobile Genuine Intel(r)" },
2448 { 0x12, "Intel(r) Celeron(r) M" },
2449 { 0x13, "Mobile Intel(r) Celeron(r)" },
2450 { 0x14, "Intel(r) Celeron(r)" },
2451 { 0x15, "Mobile Genuine Intel(r)" },
2452 { 0x16, "Intel(r) Pentium(r) M" },
2453 { 0x17, "Mobile Intel(r) Celeron(r)" }
2455 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
2456 uint_t sgn;
2458 sgn = (cpi->cpi_family << 8) |
2459 (cpi->cpi_model << 4) | cpi->cpi_step;
2461 for (i = 0; i < btblmax; i++)
2462 if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
2463 break;
2464 if (i < btblmax) {
2465 if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
2466 return ("Intel(r) Celeron(r)");
2467 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
2468 return ("Intel(r) Xeon(tm) MP");
2469 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
2470 return ("Intel(r) Xeon(tm)");
2471 return (brand_tbl[i].bt_str);
2475 return (NULL);
2478 static const char *
2479 amd_cpubrand(const struct cpuid_info *cpi)
2481 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2482 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2483 return ("i486 compatible");
2485 switch (cpi->cpi_family) {
2486 case 5:
2487 switch (cpi->cpi_model) {
2488 case 0:
2489 case 1:
2490 case 2:
2491 case 3:
2492 case 4:
2493 case 5:
2494 return ("AMD-K5(r)");
2495 case 6:
2496 case 7:
2497 return ("AMD-K6(r)");
2498 case 8:
2499 return ("AMD-K6(r)-2");
2500 case 9:
2501 return ("AMD-K6(r)-III");
2502 default:
2503 return ("AMD (family 5)");
2505 case 6:
2506 switch (cpi->cpi_model) {
2507 case 1:
2508 return ("AMD-K7(tm)");
2509 case 0:
2510 case 2:
2511 case 4:
2512 return ("AMD Athlon(tm)");
2513 case 3:
2514 case 7:
2515 return ("AMD Duron(tm)");
2516 case 6:
2517 case 8:
2518 case 10:
2520 * Use the L2 cache size to distinguish
2522 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
2523 "AMD Athlon(tm)" : "AMD Duron(tm)");
2524 default:
2525 return ("AMD (family 6)");
2527 default:
2528 break;
2531 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
2532 cpi->cpi_brandid != 0) {
2533 switch (BITX(cpi->cpi_brandid, 7, 5)) {
2534 case 3:
2535 return ("AMD Opteron(tm) UP 1xx");
2536 case 4:
2537 return ("AMD Opteron(tm) DP 2xx");
2538 case 5:
2539 return ("AMD Opteron(tm) MP 8xx");
2540 default:
2541 return ("AMD Opteron(tm)");
2545 return (NULL);
2548 static const char *
2549 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
2551 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2552 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
2553 type == X86_TYPE_CYRIX_486)
2554 return ("i486 compatible");
2556 switch (type) {
2557 case X86_TYPE_CYRIX_6x86:
2558 return ("Cyrix 6x86");
2559 case X86_TYPE_CYRIX_6x86L:
2560 return ("Cyrix 6x86L");
2561 case X86_TYPE_CYRIX_6x86MX:
2562 return ("Cyrix 6x86MX");
2563 case X86_TYPE_CYRIX_GXm:
2564 return ("Cyrix GXm");
2565 case X86_TYPE_CYRIX_MediaGX:
2566 return ("Cyrix MediaGX");
2567 case X86_TYPE_CYRIX_MII:
2568 return ("Cyrix M2");
2569 case X86_TYPE_VIA_CYRIX_III:
2570 return ("VIA Cyrix M3");
2571 default:
2573 * Have another wild guess ..
2575 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
2576 return ("Cyrix 5x86");
2577 else if (cpi->cpi_family == 5) {
2578 switch (cpi->cpi_model) {
2579 case 2:
2580 return ("Cyrix 6x86"); /* Cyrix M1 */
2581 case 4:
2582 return ("Cyrix MediaGX");
2583 default:
2584 break;
2586 } else if (cpi->cpi_family == 6) {
2587 switch (cpi->cpi_model) {
2588 case 0:
2589 return ("Cyrix 6x86MX"); /* Cyrix M2? */
2590 case 5:
2591 case 6:
2592 case 7:
2593 case 8:
2594 case 9:
2595 return ("VIA C3");
2596 default:
2597 break;
2600 break;
2602 return (NULL);
2606 * This only gets called in the case that the CPU extended
2607 * feature brand string (0x80000002, 0x80000003, 0x80000004)
2608 * aren't available, or contain null bytes for some reason.
2610 static void
2611 fabricate_brandstr(struct cpuid_info *cpi)
2613 const char *brand = NULL;
2615 switch (cpi->cpi_vendor) {
2616 case X86_VENDOR_Intel:
2617 brand = intel_cpubrand(cpi);
2618 break;
2619 case X86_VENDOR_AMD:
2620 brand = amd_cpubrand(cpi);
2621 break;
2622 case X86_VENDOR_Cyrix:
2623 brand = cyrix_cpubrand(cpi, x86_type);
2624 break;
2625 case X86_VENDOR_NexGen:
2626 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2627 brand = "NexGen Nx586";
2628 break;
2629 case X86_VENDOR_Centaur:
2630 if (cpi->cpi_family == 5)
2631 switch (cpi->cpi_model) {
2632 case 4:
2633 brand = "Centaur C6";
2634 break;
2635 case 8:
2636 brand = "Centaur C2";
2637 break;
2638 case 9:
2639 brand = "Centaur C3";
2640 break;
2641 default:
2642 break;
2644 break;
2645 case X86_VENDOR_Rise:
2646 if (cpi->cpi_family == 5 &&
2647 (cpi->cpi_model == 0 || cpi->cpi_model == 2))
2648 brand = "Rise mP6";
2649 break;
2650 case X86_VENDOR_SiS:
2651 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2652 brand = "SiS 55x";
2653 break;
2654 case X86_VENDOR_TM:
2655 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
2656 brand = "Transmeta Crusoe TM3x00 or TM5x00";
2657 break;
2658 case X86_VENDOR_NSC:
2659 case X86_VENDOR_UMC:
2660 default:
2661 break;
2663 if (brand) {
2664 (void) strcpy((char *)cpi->cpi_brandstr, brand);
2665 return;
2669 * If all else fails ...
2671 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
2672 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
2673 cpi->cpi_model, cpi->cpi_step);
2677 * This routine is called just after kernel memory allocation
2678 * becomes available on cpu0, and as part of mp_startup() on
2679 * the other cpus.
2681 * Fixup the brand string, and collect any information from cpuid
2682 * that requires dynamically allocated storage to represent.
2684 /*ARGSUSED*/
2685 void
2686 cpuid_pass3(cpu_t *cpu)
2688 int i, max, shft, level, size;
2689 struct cpuid_regs regs;
2690 struct cpuid_regs *cp;
2691 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2693 ASSERT(cpi->cpi_pass == 2);
2696 * Function 4: Deterministic cache parameters
2698 * Take this opportunity to detect the number of threads
2699 * sharing the last level cache, and construct a corresponding
2700 * cache id. The respective cpuid_info members are initialized
2701 * to the default case of "no last level cache sharing".
2703 cpi->cpi_ncpu_shr_last_cache = 1;
2704 cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
2706 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) {
2709 * Find the # of elements (size) returned by fn 4, and along
2710 * the way detect last level cache sharing details.
2712 bzero(&regs, sizeof (regs));
2713 cp = &regs;
2714 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
2715 cp->cp_eax = 4;
2716 cp->cp_ecx = i;
2718 (void) __cpuid_insn(cp);
2720 if (CPI_CACHE_TYPE(cp) == 0)
2721 break;
2722 level = CPI_CACHE_LVL(cp);
2723 if (level > max) {
2724 max = level;
2725 cpi->cpi_ncpu_shr_last_cache =
2726 CPI_NTHR_SHR_CACHE(cp) + 1;
2729 cpi->cpi_std_4_size = size = i;
2732 * Allocate the cpi_std_4 array. The first element
2733 * references the regs for fn 4, %ecx == 0, which
2734 * cpuid_pass2() stashed in cpi->cpi_std[4].
2736 if (size > 0) {
2737 cpi->cpi_std_4 =
2738 kmem_alloc(size * sizeof (cp), KM_SLEEP);
2739 cpi->cpi_std_4[0] = &cpi->cpi_std[4];
2742 * Allocate storage to hold the additional regs
2743 * for function 4, %ecx == 1 .. cpi_std_4_size.
2745 * The regs for fn 4, %ecx == 0 has already
2746 * been allocated as indicated above.
2748 for (i = 1; i < size; i++) {
2749 cp = cpi->cpi_std_4[i] =
2750 kmem_zalloc(sizeof (regs), KM_SLEEP);
2751 cp->cp_eax = 4;
2752 cp->cp_ecx = i;
2754 (void) __cpuid_insn(cp);
2758 * Determine the number of bits needed to represent
2759 * the number of CPUs sharing the last level cache.
2761 * Shift off that number of bits from the APIC id to
2762 * derive the cache id.
2764 shft = 0;
2765 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
2766 shft++;
2767 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
2771 * Now fixup the brand string
2773 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) {
2774 fabricate_brandstr(cpi);
2775 } else {
2778 * If we successfully extracted a brand string from the cpuid
2779 * instruction, clean it up by removing leading spaces and
2780 * similar junk.
2782 if (cpi->cpi_brandstr[0]) {
2783 size_t maxlen = sizeof (cpi->cpi_brandstr);
2784 char *src, *dst;
2786 dst = src = (char *)cpi->cpi_brandstr;
2787 src[maxlen - 1] = '\0';
2789 * strip leading spaces
2791 while (*src == ' ')
2792 src++;
2794 * Remove any 'Genuine' or "Authentic" prefixes
2796 if (strncmp(src, "Genuine ", 8) == 0)
2797 src += 8;
2798 if (strncmp(src, "Authentic ", 10) == 0)
2799 src += 10;
2802 * Now do an in-place copy.
2803 * Map (R) to (r) and (TM) to (tm).
2804 * The era of teletypes is long gone, and there's
2805 * -really- no need to shout.
2807 while (*src != '\0') {
2808 if (src[0] == '(') {
2809 if (strncmp(src + 1, "R)", 2) == 0) {
2810 (void) strncpy(dst, "(r)", 3);
2811 src += 3;
2812 dst += 3;
2813 continue;
2815 if (strncmp(src + 1, "TM)", 3) == 0) {
2816 (void) strncpy(dst, "(tm)", 4);
2817 src += 4;
2818 dst += 4;
2819 continue;
2822 *dst++ = *src++;
2824 *dst = '\0';
2827 * Finally, remove any trailing spaces
2829 while (--dst > cpi->cpi_brandstr)
2830 if (*dst == ' ')
2831 *dst = '\0';
2832 else
2833 break;
2834 } else
2835 fabricate_brandstr(cpi);
2837 cpi->cpi_pass = 3;
2841 * This routine is called out of bind_hwcap() much later in the life
2842 * of the kernel (post_startup()). The job of this routine is to resolve
2843 * the hardware feature support and kernel support for those features into
2844 * what we're actually going to tell applications via the aux vector.
2846 void
2847 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
2849 struct cpuid_info *cpi;
2850 uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
2852 if (cpu == NULL)
2853 cpu = CPU;
2854 cpi = cpu->cpu_m.mcpu_cpi;
2856 ASSERT(cpi->cpi_pass == 3);
2858 if (cpi->cpi_maxeax >= 1) {
2859 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
2860 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
2861 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES];
2863 *edx = CPI_FEATURES_EDX(cpi);
2864 *ecx = CPI_FEATURES_ECX(cpi);
2865 *ebx = CPI_FEATURES_7_0_EBX(cpi);
2868 * [these require explicit kernel support]
2870 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
2871 *edx &= ~CPUID_INTC_EDX_SEP;
2873 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
2874 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
2875 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
2876 *edx &= ~CPUID_INTC_EDX_SSE2;
2878 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
2879 *edx &= ~CPUID_INTC_EDX_HTT;
2881 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
2882 *ecx &= ~CPUID_INTC_ECX_SSE3;
2884 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
2885 *ecx &= ~CPUID_INTC_ECX_SSSE3;
2886 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
2887 *ecx &= ~CPUID_INTC_ECX_SSE4_1;
2888 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
2889 *ecx &= ~CPUID_INTC_ECX_SSE4_2;
2890 if (!is_x86_feature(x86_featureset, X86FSET_AES))
2891 *ecx &= ~CPUID_INTC_ECX_AES;
2892 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
2893 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
2894 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
2895 *ecx &= ~(CPUID_INTC_ECX_XSAVE |
2896 CPUID_INTC_ECX_OSXSAVE);
2897 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
2898 *ecx &= ~CPUID_INTC_ECX_AVX;
2899 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
2900 *ecx &= ~CPUID_INTC_ECX_F16C;
2901 if (!is_x86_feature(x86_featureset, X86FSET_FMA))
2902 *ecx &= ~CPUID_INTC_ECX_FMA;
2903 if (!is_x86_feature(x86_featureset, X86FSET_BMI1))
2904 *ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
2905 if (!is_x86_feature(x86_featureset, X86FSET_BMI2))
2906 *ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
2907 if (!is_x86_feature(x86_featureset, X86FSET_AVX2))
2908 *ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
2909 if (!is_x86_feature(x86_featureset, X86FSET_RDSEED))
2910 *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED;
2911 if (!is_x86_feature(x86_featureset, X86FSET_ADX))
2912 *ebx &= ~CPUID_INTC_EBX_7_0_ADX;
2915 * [no explicit support required beyond x87 fp context]
2917 if (!fpu_exists)
2918 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
2921 * Now map the supported feature vector to things that we
2922 * think userland will care about.
2924 if (*edx & CPUID_INTC_EDX_SEP)
2925 hwcap_flags |= AV_386_SEP;
2926 if (*edx & CPUID_INTC_EDX_SSE)
2927 hwcap_flags |= AV_386_FXSR | AV_386_SSE;
2928 if (*edx & CPUID_INTC_EDX_SSE2)
2929 hwcap_flags |= AV_386_SSE2;
2930 if (*ecx & CPUID_INTC_ECX_SSE3)
2931 hwcap_flags |= AV_386_SSE3;
2932 if (*ecx & CPUID_INTC_ECX_SSSE3)
2933 hwcap_flags |= AV_386_SSSE3;
2934 if (*ecx & CPUID_INTC_ECX_SSE4_1)
2935 hwcap_flags |= AV_386_SSE4_1;
2936 if (*ecx & CPUID_INTC_ECX_SSE4_2)
2937 hwcap_flags |= AV_386_SSE4_2;
2938 if (*ecx & CPUID_INTC_ECX_MOVBE)
2939 hwcap_flags |= AV_386_MOVBE;
2940 if (*ecx & CPUID_INTC_ECX_AES)
2941 hwcap_flags |= AV_386_AES;
2942 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
2943 hwcap_flags |= AV_386_PCLMULQDQ;
2944 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
2945 (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
2946 hwcap_flags |= AV_386_XSAVE;
2948 if (*ecx & CPUID_INTC_ECX_AVX) {
2949 uint32_t *ecx_7 = &CPI_FEATURES_7_0_ECX(cpi);
2950 uint32_t *edx_7 = &CPI_FEATURES_7_0_EDX(cpi);
2952 hwcap_flags |= AV_386_AVX;
2953 if (*ecx & CPUID_INTC_ECX_F16C)
2954 hwcap_flags_2 |= AV_386_2_F16C;
2955 if (*ecx & CPUID_INTC_ECX_FMA)
2956 hwcap_flags_2 |= AV_386_2_FMA;
2958 if (*ebx & CPUID_INTC_EBX_7_0_BMI1)
2959 hwcap_flags_2 |= AV_386_2_BMI1;
2960 if (*ebx & CPUID_INTC_EBX_7_0_BMI2)
2961 hwcap_flags_2 |= AV_386_2_BMI2;
2962 if (*ebx & CPUID_INTC_EBX_7_0_AVX2)
2963 hwcap_flags_2 |= AV_386_2_AVX2;
2964 if (*ebx & CPUID_INTC_EBX_7_0_AVX512F)
2965 hwcap_flags_2 |= AV_386_2_AVX512F;
2966 if (*ebx & CPUID_INTC_EBX_7_0_AVX512DQ)
2967 hwcap_flags_2 |= AV_386_2_AVX512DQ;
2968 if (*ebx & CPUID_INTC_EBX_7_0_AVX512IFMA)
2969 hwcap_flags_2 |= AV_386_2_AVX512IFMA;
2970 if (*ebx & CPUID_INTC_EBX_7_0_AVX512PF)
2971 hwcap_flags_2 |= AV_386_2_AVX512PF;
2972 if (*ebx & CPUID_INTC_EBX_7_0_AVX512ER)
2973 hwcap_flags_2 |= AV_386_2_AVX512ER;
2974 if (*ebx & CPUID_INTC_EBX_7_0_AVX512CD)
2975 hwcap_flags_2 |= AV_386_2_AVX512CD;
2976 if (*ebx & CPUID_INTC_EBX_7_0_AVX512BW)
2977 hwcap_flags_2 |= AV_386_2_AVX512BW;
2978 if (*ebx & CPUID_INTC_EBX_7_0_AVX512VL)
2979 hwcap_flags_2 |= AV_386_2_AVX512VL;
2981 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VBMI)
2982 hwcap_flags_2 |= AV_386_2_AVX512VBMI;
2983 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
2984 hwcap_flags_2 |= AV_386_2_AVX512VPOPCDQ;
2986 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124NNIW)
2987 hwcap_flags_2 |= AV_386_2_AVX512_4NNIW;
2988 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124FMAPS)
2989 hwcap_flags_2 |= AV_386_2_AVX512_4FMAPS;
2992 if (*ecx & CPUID_INTC_ECX_VMX)
2993 hwcap_flags |= AV_386_VMX;
2994 if (*ecx & CPUID_INTC_ECX_POPCNT)
2995 hwcap_flags |= AV_386_POPCNT;
2996 if (*edx & CPUID_INTC_EDX_FPU)
2997 hwcap_flags |= AV_386_FPU;
2998 if (*edx & CPUID_INTC_EDX_MMX)
2999 hwcap_flags |= AV_386_MMX;
3001 if (*edx & CPUID_INTC_EDX_TSC)
3002 hwcap_flags |= AV_386_TSC;
3003 if (*edx & CPUID_INTC_EDX_CX8)
3004 hwcap_flags |= AV_386_CX8;
3005 if (*edx & CPUID_INTC_EDX_CMOV)
3006 hwcap_flags |= AV_386_CMOV;
3007 if (*ecx & CPUID_INTC_ECX_CX16)
3008 hwcap_flags |= AV_386_CX16;
3010 if (*ecx & CPUID_INTC_ECX_RDRAND)
3011 hwcap_flags_2 |= AV_386_2_RDRAND;
3012 if (*ebx & CPUID_INTC_EBX_7_0_ADX)
3013 hwcap_flags_2 |= AV_386_2_ADX;
3014 if (*ebx & CPUID_INTC_EBX_7_0_RDSEED)
3015 hwcap_flags_2 |= AV_386_2_RDSEED;
3019 if (cpi->cpi_xmaxeax < 0x80000001)
3020 goto pass4_done;
3022 switch (cpi->cpi_vendor) {
3023 struct cpuid_regs cp;
3024 uint32_t *edx, *ecx;
3026 case X86_VENDOR_Intel:
3028 * Seems like Intel duplicated what we necessary
3029 * here to make the initial crop of 64-bit OS's work.
3030 * Hopefully, those are the only "extended" bits
3031 * they'll add.
3033 /*FALLTHROUGH*/
3035 case X86_VENDOR_AMD:
3036 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
3037 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
3039 *edx = CPI_FEATURES_XTD_EDX(cpi);
3040 *ecx = CPI_FEATURES_XTD_ECX(cpi);
3043 * [these features require explicit kernel support]
3045 switch (cpi->cpi_vendor) {
3046 case X86_VENDOR_Intel:
3047 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
3048 *edx &= ~CPUID_AMD_EDX_TSCP;
3049 break;
3051 case X86_VENDOR_AMD:
3052 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
3053 *edx &= ~CPUID_AMD_EDX_TSCP;
3054 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
3055 *ecx &= ~CPUID_AMD_ECX_SSE4A;
3056 break;
3058 default:
3059 break;
3063 * [no explicit support required beyond
3064 * x87 fp context and exception handlers]
3066 if (!fpu_exists)
3067 *edx &= ~(CPUID_AMD_EDX_MMXamd |
3068 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
3070 if (!is_x86_feature(x86_featureset, X86FSET_NX))
3071 *edx &= ~CPUID_AMD_EDX_NX;
3072 #if !defined(__amd64)
3073 *edx &= ~CPUID_AMD_EDX_LM;
3074 #endif
3076 * Now map the supported feature vector to
3077 * things that we think userland will care about.
3079 #if defined(__amd64)
3080 if (*edx & CPUID_AMD_EDX_SYSC)
3081 hwcap_flags |= AV_386_AMD_SYSC;
3082 #endif
3083 if (*edx & CPUID_AMD_EDX_MMXamd)
3084 hwcap_flags |= AV_386_AMD_MMX;
3085 if (*edx & CPUID_AMD_EDX_3DNow)
3086 hwcap_flags |= AV_386_AMD_3DNow;
3087 if (*edx & CPUID_AMD_EDX_3DNowx)
3088 hwcap_flags |= AV_386_AMD_3DNowx;
3089 if (*ecx & CPUID_AMD_ECX_SVM)
3090 hwcap_flags |= AV_386_AMD_SVM;
3092 switch (cpi->cpi_vendor) {
3093 case X86_VENDOR_AMD:
3094 if (*edx & CPUID_AMD_EDX_TSCP)
3095 hwcap_flags |= AV_386_TSCP;
3096 if (*ecx & CPUID_AMD_ECX_AHF64)
3097 hwcap_flags |= AV_386_AHF;
3098 if (*ecx & CPUID_AMD_ECX_SSE4A)
3099 hwcap_flags |= AV_386_AMD_SSE4A;
3100 if (*ecx & CPUID_AMD_ECX_LZCNT)
3101 hwcap_flags |= AV_386_AMD_LZCNT;
3102 break;
3104 case X86_VENDOR_Intel:
3105 if (*edx & CPUID_AMD_EDX_TSCP)
3106 hwcap_flags |= AV_386_TSCP;
3108 * Aarrgh.
3109 * Intel uses a different bit in the same word.
3111 if (*ecx & CPUID_INTC_ECX_AHF64)
3112 hwcap_flags |= AV_386_AHF;
3113 break;
3115 default:
3116 break;
3118 break;
3120 case X86_VENDOR_TM:
3121 cp.cp_eax = 0x80860001;
3122 (void) __cpuid_insn(&cp);
3123 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
3124 break;
3126 default:
3127 break;
3130 pass4_done:
3131 cpi->cpi_pass = 4;
3132 if (hwcap_out != NULL) {
3133 hwcap_out[0] = hwcap_flags;
3134 hwcap_out[1] = hwcap_flags_2;
3140 * Simulate the cpuid instruction using the data we previously
3141 * captured about this CPU. We try our best to return the truth
3142 * about the hardware, independently of kernel support.
3144 uint32_t
3145 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
3147 struct cpuid_info *cpi;
3148 struct cpuid_regs *xcp;
3150 if (cpu == NULL)
3151 cpu = CPU;
3152 cpi = cpu->cpu_m.mcpu_cpi;
3154 ASSERT(cpuid_checkpass(cpu, 3));
3157 * CPUID data is cached in two separate places: cpi_std for standard
3158 * CPUID functions, and cpi_extd for extended CPUID functions.
3160 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD)
3161 xcp = &cpi->cpi_std[cp->cp_eax];
3162 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax &&
3163 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD)
3164 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000];
3165 else
3167 * The caller is asking for data from an input parameter which
3168 * the kernel has not cached. In this case we go fetch from
3169 * the hardware and return the data directly to the user.
3171 return (__cpuid_insn(cp));
3173 cp->cp_eax = xcp->cp_eax;
3174 cp->cp_ebx = xcp->cp_ebx;
3175 cp->cp_ecx = xcp->cp_ecx;
3176 cp->cp_edx = xcp->cp_edx;
3177 return (cp->cp_eax);
3181 cpuid_checkpass(cpu_t *cpu, int pass)
3183 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
3184 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
3188 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
3190 ASSERT(cpuid_checkpass(cpu, 3));
3192 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
3196 cpuid_is_cmt(cpu_t *cpu)
3198 if (cpu == NULL)
3199 cpu = CPU;
3201 ASSERT(cpuid_checkpass(cpu, 1));
3203 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
3207 * AMD and Intel both implement the 64-bit variant of the syscall
3208 * instruction (syscallq), so if there's -any- support for syscall,
3209 * cpuid currently says "yes, we support this".
3211 * However, Intel decided to -not- implement the 32-bit variant of the
3212 * syscall instruction, so we provide a predicate to allow our caller
3213 * to test that subtlety here.
3215 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
3216 * even in the case where the hardware would in fact support it.
3218 /*ARGSUSED*/
3220 cpuid_syscall32_insn(cpu_t *cpu)
3222 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
3224 if (cpu == NULL)
3225 cpu = CPU;
3227 /*CSTYLED*/
3229 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3231 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
3232 cpi->cpi_xmaxeax >= 0x80000001 &&
3233 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
3234 return (1);
3236 return (0);
3240 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
3242 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3244 static const char fmt[] =
3245 "x86 (%s %X family %d model %d step %d clock %d MHz)";
3246 static const char fmt_ht[] =
3247 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
3249 ASSERT(cpuid_checkpass(cpu, 1));
3251 if (cpuid_is_cmt(cpu))
3252 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
3253 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
3254 cpi->cpi_family, cpi->cpi_model,
3255 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
3256 return (snprintf(s, n, fmt,
3257 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
3258 cpi->cpi_family, cpi->cpi_model,
3259 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
3262 const char *
3263 cpuid_getvendorstr(cpu_t *cpu)
3265 ASSERT(cpuid_checkpass(cpu, 1));
3266 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
3269 uint_t
3270 cpuid_getvendor(cpu_t *cpu)
3272 ASSERT(cpuid_checkpass(cpu, 1));
3273 return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
3276 uint_t
3277 cpuid_getfamily(cpu_t *cpu)
3279 ASSERT(cpuid_checkpass(cpu, 1));
3280 return (cpu->cpu_m.mcpu_cpi->cpi_family);
3283 uint_t
3284 cpuid_getmodel(cpu_t *cpu)
3286 ASSERT(cpuid_checkpass(cpu, 1));
3287 return (cpu->cpu_m.mcpu_cpi->cpi_model);
3290 uint_t
3291 cpuid_get_ncpu_per_chip(cpu_t *cpu)
3293 ASSERT(cpuid_checkpass(cpu, 1));
3294 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
3297 uint_t
3298 cpuid_get_ncore_per_chip(cpu_t *cpu)
3300 ASSERT(cpuid_checkpass(cpu, 1));
3301 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
3304 uint_t
3305 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
3307 ASSERT(cpuid_checkpass(cpu, 2));
3308 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
3311 id_t
3312 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
3314 ASSERT(cpuid_checkpass(cpu, 2));
3315 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
3318 uint_t
3319 cpuid_getstep(cpu_t *cpu)
3321 ASSERT(cpuid_checkpass(cpu, 1));
3322 return (cpu->cpu_m.mcpu_cpi->cpi_step);
3325 uint_t
3326 cpuid_getsig(struct cpu *cpu)
3328 ASSERT(cpuid_checkpass(cpu, 1));
3329 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
3332 uint32_t
3333 cpuid_getchiprev(struct cpu *cpu)
3335 ASSERT(cpuid_checkpass(cpu, 1));
3336 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
3339 const char *
3340 cpuid_getchiprevstr(struct cpu *cpu)
3342 ASSERT(cpuid_checkpass(cpu, 1));
3343 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
3346 uint32_t
3347 cpuid_getsockettype(struct cpu *cpu)
3349 ASSERT(cpuid_checkpass(cpu, 1));
3350 return (cpu->cpu_m.mcpu_cpi->cpi_socket);
3353 const char *
3354 cpuid_getsocketstr(cpu_t *cpu)
3356 static const char *socketstr = NULL;
3357 struct cpuid_info *cpi;
3359 ASSERT(cpuid_checkpass(cpu, 1));
3360 cpi = cpu->cpu_m.mcpu_cpi;
3362 /* Assume that socket types are the same across the system */
3363 if (socketstr == NULL)
3364 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
3365 cpi->cpi_model, cpi->cpi_step);
3368 return (socketstr);
3372 cpuid_get_chipid(cpu_t *cpu)
3374 ASSERT(cpuid_checkpass(cpu, 1));
3376 if (cpuid_is_cmt(cpu))
3377 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
3378 return (cpu->cpu_id);
3381 id_t
3382 cpuid_get_coreid(cpu_t *cpu)
3384 ASSERT(cpuid_checkpass(cpu, 1));
3385 return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
3389 cpuid_get_pkgcoreid(cpu_t *cpu)
3391 ASSERT(cpuid_checkpass(cpu, 1));
3392 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
3396 cpuid_get_clogid(cpu_t *cpu)
3398 ASSERT(cpuid_checkpass(cpu, 1));
3399 return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
3403 cpuid_get_cacheid(cpu_t *cpu)
3405 ASSERT(cpuid_checkpass(cpu, 1));
3406 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
3409 uint_t
3410 cpuid_get_procnodeid(cpu_t *cpu)
3412 ASSERT(cpuid_checkpass(cpu, 1));
3413 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
3416 uint_t
3417 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
3419 ASSERT(cpuid_checkpass(cpu, 1));
3420 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
3423 uint_t
3424 cpuid_get_compunitid(cpu_t *cpu)
3426 ASSERT(cpuid_checkpass(cpu, 1));
3427 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
3430 uint_t
3431 cpuid_get_cores_per_compunit(cpu_t *cpu)
3433 ASSERT(cpuid_checkpass(cpu, 1));
3434 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
3437 /*ARGSUSED*/
3439 cpuid_have_cr8access(cpu_t *cpu)
3441 #if defined(__amd64)
3442 return (1);
3443 #else
3444 struct cpuid_info *cpi;
3446 ASSERT(cpu != NULL);
3447 cpi = cpu->cpu_m.mcpu_cpi;
3448 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
3449 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
3450 return (1);
3451 return (0);
3452 #endif
3455 uint32_t
3456 cpuid_get_apicid(cpu_t *cpu)
3458 ASSERT(cpuid_checkpass(cpu, 1));
3459 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
3460 return (UINT32_MAX);
3461 } else {
3462 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
3466 void
3467 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
3469 struct cpuid_info *cpi;
3471 if (cpu == NULL)
3472 cpu = CPU;
3473 cpi = cpu->cpu_m.mcpu_cpi;
3475 ASSERT(cpuid_checkpass(cpu, 1));
3477 if (pabits)
3478 *pabits = cpi->cpi_pabits;
3479 if (vabits)
3480 *vabits = cpi->cpi_vabits;
3483 size_t
3484 cpuid_get_xsave_size()
3486 return (MAX(cpuid_info0.cpi_xsave.xsav_max_size,
3487 sizeof (struct xsave_state)));
3491 * Return true if the CPUs on this system require 'pointer clearing' for the
3492 * floating point error pointer exception handling. In the past, this has been
3493 * true for all AMD K7 & K8 CPUs, although newer AMD CPUs have been changed to
3494 * behave the same as Intel. This is checked via the CPUID_AMD_EBX_ERR_PTR_ZERO
3495 * feature bit and is reflected in the cpi_fp_amd_save member. Once this has
3496 * been confirmed on hardware which supports that feature, this test should be
3497 * narrowed. In the meantime, we always follow the existing behavior on any AMD
3498 * CPU.
3500 boolean_t
3501 cpuid_need_fp_excp_handling()
3503 return (cpuid_info0.cpi_vendor == X86_VENDOR_AMD);
3507 * Returns the number of data TLB entries for a corresponding
3508 * pagesize. If it can't be computed, or isn't known, the
3509 * routine returns zero. If you ask about an architecturally
3510 * impossible pagesize, the routine will panic (so that the
3511 * hat implementor knows that things are inconsistent.)
3513 uint_t
3514 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
3516 struct cpuid_info *cpi;
3517 uint_t dtlb_nent = 0;
3519 if (cpu == NULL)
3520 cpu = CPU;
3521 cpi = cpu->cpu_m.mcpu_cpi;
3523 ASSERT(cpuid_checkpass(cpu, 1));
3526 * Check the L2 TLB info
3528 if (cpi->cpi_xmaxeax >= 0x80000006) {
3529 struct cpuid_regs *cp = &cpi->cpi_extd[6];
3531 switch (pagesize) {
3533 case 4 * 1024:
3535 * All zero in the top 16 bits of the register
3536 * indicates a unified TLB. Size is in low 16 bits.
3538 if ((cp->cp_ebx & 0xffff0000) == 0)
3539 dtlb_nent = cp->cp_ebx & 0x0000ffff;
3540 else
3541 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
3542 break;
3544 case 2 * 1024 * 1024:
3545 if ((cp->cp_eax & 0xffff0000) == 0)
3546 dtlb_nent = cp->cp_eax & 0x0000ffff;
3547 else
3548 dtlb_nent = BITX(cp->cp_eax, 27, 16);
3549 break;
3551 default:
3552 panic("unknown L2 pagesize");
3553 /*NOTREACHED*/
3557 if (dtlb_nent != 0)
3558 return (dtlb_nent);
3561 * No L2 TLB support for this size, try L1.
3563 if (cpi->cpi_xmaxeax >= 0x80000005) {
3564 struct cpuid_regs *cp = &cpi->cpi_extd[5];
3566 switch (pagesize) {
3567 case 4 * 1024:
3568 dtlb_nent = BITX(cp->cp_ebx, 23, 16);
3569 break;
3570 case 2 * 1024 * 1024:
3571 dtlb_nent = BITX(cp->cp_eax, 23, 16);
3572 break;
3573 default:
3574 panic("unknown L1 d-TLB pagesize");
3575 /*NOTREACHED*/
3579 return (dtlb_nent);
3583 * Return 0 if the erratum is not present or not applicable, positive
3584 * if it is, and negative if the status of the erratum is unknown.
3586 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
3587 * Processors" #25759, Rev 3.57, August 2005
3590 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
3592 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3593 uint_t eax;
3596 * Bail out if this CPU isn't an AMD CPU, or if it's
3597 * a legacy (32-bit) AMD CPU.
3599 if (cpi->cpi_vendor != X86_VENDOR_AMD ||
3600 cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
3601 cpi->cpi_family == 6)
3603 return (0);
3605 eax = cpi->cpi_std[1].cp_eax;
3607 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50)
3608 #define SH_B3(eax) (eax == 0xf51)
3609 #define B(eax) (SH_B0(eax) || SH_B3(eax))
3611 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58)
3613 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
3614 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
3615 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2)
3616 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
3618 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
3619 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0)
3620 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0)
3621 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
3623 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
3624 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */
3625 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0)
3626 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71)
3627 #define BH_E4(eax) (eax == 0x20fb1)
3628 #define SH_E5(eax) (eax == 0x20f42)
3629 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2)
3630 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32)
3631 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
3632 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
3633 DH_E6(eax) || JH_E6(eax))
3635 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
3636 #define DR_B0(eax) (eax == 0x100f20)
3637 #define DR_B1(eax) (eax == 0x100f21)
3638 #define DR_BA(eax) (eax == 0x100f2a)
3639 #define DR_B2(eax) (eax == 0x100f22)
3640 #define DR_B3(eax) (eax == 0x100f23)
3641 #define RB_C0(eax) (eax == 0x100f40)
3643 switch (erratum) {
3644 case 1:
3645 return (cpi->cpi_family < 0x10);
3646 case 51: /* what does the asterisk mean? */
3647 return (B(eax) || SH_C0(eax) || CG(eax));
3648 case 52:
3649 return (B(eax));
3650 case 57:
3651 return (cpi->cpi_family <= 0x11);
3652 case 58:
3653 return (B(eax));
3654 case 60:
3655 return (cpi->cpi_family <= 0x11);
3656 case 61:
3657 case 62:
3658 case 63:
3659 case 64:
3660 case 65:
3661 case 66:
3662 case 68:
3663 case 69:
3664 case 70:
3665 case 71:
3666 return (B(eax));
3667 case 72:
3668 return (SH_B0(eax));
3669 case 74:
3670 return (B(eax));
3671 case 75:
3672 return (cpi->cpi_family < 0x10);
3673 case 76:
3674 return (B(eax));
3675 case 77:
3676 return (cpi->cpi_family <= 0x11);
3677 case 78:
3678 return (B(eax) || SH_C0(eax));
3679 case 79:
3680 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3681 case 80:
3682 case 81:
3683 case 82:
3684 return (B(eax));
3685 case 83:
3686 return (B(eax) || SH_C0(eax) || CG(eax));
3687 case 85:
3688 return (cpi->cpi_family < 0x10);
3689 case 86:
3690 return (SH_C0(eax) || CG(eax));
3691 case 88:
3692 #if !defined(__amd64)
3693 return (0);
3694 #else
3695 return (B(eax) || SH_C0(eax));
3696 #endif
3697 case 89:
3698 return (cpi->cpi_family < 0x10);
3699 case 90:
3700 return (B(eax) || SH_C0(eax) || CG(eax));
3701 case 91:
3702 case 92:
3703 return (B(eax) || SH_C0(eax));
3704 case 93:
3705 return (SH_C0(eax));
3706 case 94:
3707 return (B(eax) || SH_C0(eax) || CG(eax));
3708 case 95:
3709 #if !defined(__amd64)
3710 return (0);
3711 #else
3712 return (B(eax) || SH_C0(eax));
3713 #endif
3714 case 96:
3715 return (B(eax) || SH_C0(eax) || CG(eax));
3716 case 97:
3717 case 98:
3718 return (SH_C0(eax) || CG(eax));
3719 case 99:
3720 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3721 case 100:
3722 return (B(eax) || SH_C0(eax));
3723 case 101:
3724 case 103:
3725 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3726 case 104:
3727 return (SH_C0(eax) || CG(eax) || D0(eax));
3728 case 105:
3729 case 106:
3730 case 107:
3731 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3732 case 108:
3733 return (DH_CG(eax));
3734 case 109:
3735 return (SH_C0(eax) || CG(eax) || D0(eax));
3736 case 110:
3737 return (D0(eax) || EX(eax));
3738 case 111:
3739 return (CG(eax));
3740 case 112:
3741 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3742 case 113:
3743 return (eax == 0x20fc0);
3744 case 114:
3745 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3746 case 115:
3747 return (SH_E0(eax) || JH_E1(eax));
3748 case 116:
3749 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3750 case 117:
3751 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3752 case 118:
3753 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
3754 JH_E6(eax));
3755 case 121:
3756 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3757 case 122:
3758 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
3759 case 123:
3760 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
3761 case 131:
3762 return (cpi->cpi_family < 0x10);
3763 case 6336786:
3765 * Test for AdvPowerMgmtInfo.TscPStateInvariant
3766 * if this is a K8 family or newer processor
3768 if (CPI_FAMILY(cpi) == 0xf) {
3769 struct cpuid_regs regs;
3770 regs.cp_eax = 0x80000007;
3771 (void) __cpuid_insn(&regs);
3772 return (!(regs.cp_edx & 0x100));
3774 return (0);
3775 case 6323525:
3776 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
3777 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
3779 case 6671130:
3781 * check for processors (pre-Shanghai) that do not provide
3782 * optimal management of 1gb ptes in its tlb.
3784 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
3786 case 298:
3787 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
3788 DR_B2(eax) || RB_C0(eax));
3790 case 721:
3791 #if defined(__amd64)
3792 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
3793 #else
3794 return (0);
3795 #endif
3797 default:
3798 return (-1);
3804 * Determine if specified erratum is present via OSVW (OS Visible Workaround).
3805 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
3808 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
3810 struct cpuid_info *cpi;
3811 uint_t osvwid;
3812 static int osvwfeature = -1;
3813 uint64_t osvwlength;
3816 cpi = cpu->cpu_m.mcpu_cpi;
3818 /* confirm OSVW supported */
3819 if (osvwfeature == -1) {
3820 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
3821 } else {
3822 /* assert that osvw feature setting is consistent on all cpus */
3823 ASSERT(osvwfeature ==
3824 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
3826 if (!osvwfeature)
3827 return (-1);
3829 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
3831 switch (erratum) {
3832 case 298: /* osvwid is 0 */
3833 osvwid = 0;
3834 if (osvwlength <= (uint64_t)osvwid) {
3835 /* osvwid 0 is unknown */
3836 return (-1);
3840 * Check the OSVW STATUS MSR to determine the state
3841 * of the erratum where:
3842 * 0 - fixed by HW
3843 * 1 - BIOS has applied the workaround when BIOS
3844 * workaround is available. (Or for other errata,
3845 * OS workaround is required.)
3846 * For a value of 1, caller will confirm that the
3847 * erratum 298 workaround has indeed been applied by BIOS.
3849 * A 1 may be set in cpus that have a HW fix
3850 * in a mixed cpu system. Regarding erratum 298:
3851 * In a multiprocessor platform, the workaround above
3852 * should be applied to all processors regardless of
3853 * silicon revision when an affected processor is
3854 * present.
3857 return (rdmsr(MSR_AMD_OSVW_STATUS +
3858 (osvwid / OSVW_ID_CNT_PER_MSR)) &
3859 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
3861 default:
3862 return (-1);
3866 static const char assoc_str[] = "associativity";
3867 static const char line_str[] = "line-size";
3868 static const char size_str[] = "size";
3870 static void
3871 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
3872 uint32_t val)
3874 char buf[128];
3877 * ndi_prop_update_int() is used because it is desirable for
3878 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
3880 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
3881 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
3885 * Intel-style cache/tlb description
3887 * Standard cpuid level 2 gives a randomly ordered
3888 * selection of tags that index into a table that describes
3889 * cache and tlb properties.
3892 static const char l1_icache_str[] = "l1-icache";
3893 static const char l1_dcache_str[] = "l1-dcache";
3894 static const char l2_cache_str[] = "l2-cache";
3895 static const char l3_cache_str[] = "l3-cache";
3896 static const char itlb4k_str[] = "itlb-4K";
3897 static const char dtlb4k_str[] = "dtlb-4K";
3898 static const char itlb2M_str[] = "itlb-2M";
3899 static const char itlb4M_str[] = "itlb-4M";
3900 static const char dtlb4M_str[] = "dtlb-4M";
3901 static const char dtlb24_str[] = "dtlb0-2M-4M";
3902 static const char itlb424_str[] = "itlb-4K-2M-4M";
3903 static const char itlb24_str[] = "itlb-2M-4M";
3904 static const char dtlb44_str[] = "dtlb-4K-4M";
3905 static const char sl1_dcache_str[] = "sectored-l1-dcache";
3906 static const char sl2_cache_str[] = "sectored-l2-cache";
3907 static const char itrace_str[] = "itrace-cache";
3908 static const char sl3_cache_str[] = "sectored-l3-cache";
3909 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
3911 static const struct cachetab {
3912 uint8_t ct_code;
3913 uint8_t ct_assoc;
3914 uint16_t ct_line_size;
3915 size_t ct_size;
3916 const char *ct_label;
3917 } intel_ctab[] = {
3919 * maintain descending order!
3921 * Codes ignored - Reason
3922 * ----------------------
3923 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
3924 * f0H/f1H - Currently we do not interpret prefetch size by design
3926 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
3927 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
3928 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
3929 { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
3930 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
3931 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
3932 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
3933 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
3934 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
3935 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
3936 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
3937 { 0xd0, 4, 64, 512*1024, l3_cache_str},
3938 { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
3939 { 0xc0, 4, 0, 8, dtlb44_str },
3940 { 0xba, 4, 0, 64, dtlb4k_str },
3941 { 0xb4, 4, 0, 256, dtlb4k_str },
3942 { 0xb3, 4, 0, 128, dtlb4k_str },
3943 { 0xb2, 4, 0, 64, itlb4k_str },
3944 { 0xb0, 4, 0, 128, itlb4k_str },
3945 { 0x87, 8, 64, 1024*1024, l2_cache_str},
3946 { 0x86, 4, 64, 512*1024, l2_cache_str},
3947 { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
3948 { 0x84, 8, 32, 1024*1024, l2_cache_str},
3949 { 0x83, 8, 32, 512*1024, l2_cache_str},
3950 { 0x82, 8, 32, 256*1024, l2_cache_str},
3951 { 0x80, 8, 64, 512*1024, l2_cache_str},
3952 { 0x7f, 2, 64, 512*1024, l2_cache_str},
3953 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
3954 { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
3955 { 0x7b, 8, 64, 512*1024, sl2_cache_str},
3956 { 0x7a, 8, 64, 256*1024, sl2_cache_str},
3957 { 0x79, 8, 64, 128*1024, sl2_cache_str},
3958 { 0x78, 8, 64, 1024*1024, l2_cache_str},
3959 { 0x73, 8, 0, 64*1024, itrace_str},
3960 { 0x72, 8, 0, 32*1024, itrace_str},
3961 { 0x71, 8, 0, 16*1024, itrace_str},
3962 { 0x70, 8, 0, 12*1024, itrace_str},
3963 { 0x68, 4, 64, 32*1024, sl1_dcache_str},
3964 { 0x67, 4, 64, 16*1024, sl1_dcache_str},
3965 { 0x66, 4, 64, 8*1024, sl1_dcache_str},
3966 { 0x60, 8, 64, 16*1024, sl1_dcache_str},
3967 { 0x5d, 0, 0, 256, dtlb44_str},
3968 { 0x5c, 0, 0, 128, dtlb44_str},
3969 { 0x5b, 0, 0, 64, dtlb44_str},
3970 { 0x5a, 4, 0, 32, dtlb24_str},
3971 { 0x59, 0, 0, 16, dtlb4k_str},
3972 { 0x57, 4, 0, 16, dtlb4k_str},
3973 { 0x56, 4, 0, 16, dtlb4M_str},
3974 { 0x55, 0, 0, 7, itlb24_str},
3975 { 0x52, 0, 0, 256, itlb424_str},
3976 { 0x51, 0, 0, 128, itlb424_str},
3977 { 0x50, 0, 0, 64, itlb424_str},
3978 { 0x4f, 0, 0, 32, itlb4k_str},
3979 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
3980 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
3981 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
3982 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
3983 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
3984 { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
3985 { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
3986 { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
3987 { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
3988 { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
3989 { 0x44, 4, 32, 1024*1024, l2_cache_str},
3990 { 0x43, 4, 32, 512*1024, l2_cache_str},
3991 { 0x42, 4, 32, 256*1024, l2_cache_str},
3992 { 0x41, 4, 32, 128*1024, l2_cache_str},
3993 { 0x3e, 4, 64, 512*1024, sl2_cache_str},
3994 { 0x3d, 6, 64, 384*1024, sl2_cache_str},
3995 { 0x3c, 4, 64, 256*1024, sl2_cache_str},
3996 { 0x3b, 2, 64, 128*1024, sl2_cache_str},
3997 { 0x3a, 6, 64, 192*1024, sl2_cache_str},
3998 { 0x39, 4, 64, 128*1024, sl2_cache_str},
3999 { 0x30, 8, 64, 32*1024, l1_icache_str},
4000 { 0x2c, 8, 64, 32*1024, l1_dcache_str},
4001 { 0x29, 8, 64, 4096*1024, sl3_cache_str},
4002 { 0x25, 8, 64, 2048*1024, sl3_cache_str},
4003 { 0x23, 8, 64, 1024*1024, sl3_cache_str},
4004 { 0x22, 4, 64, 512*1024, sl3_cache_str},
4005 { 0x0e, 6, 64, 24*1024, l1_dcache_str},
4006 { 0x0d, 4, 32, 16*1024, l1_dcache_str},
4007 { 0x0c, 4, 32, 16*1024, l1_dcache_str},
4008 { 0x0b, 4, 0, 4, itlb4M_str},
4009 { 0x0a, 2, 32, 8*1024, l1_dcache_str},
4010 { 0x08, 4, 32, 16*1024, l1_icache_str},
4011 { 0x06, 4, 32, 8*1024, l1_icache_str},
4012 { 0x05, 4, 0, 32, dtlb4M_str},
4013 { 0x04, 4, 0, 8, dtlb4M_str},
4014 { 0x03, 4, 0, 64, dtlb4k_str},
4015 { 0x02, 4, 0, 2, itlb4M_str},
4016 { 0x01, 4, 0, 32, itlb4k_str},
4017 { 0 }
4020 static const struct cachetab cyrix_ctab[] = {
4021 { 0x70, 4, 0, 32, "tlb-4K" },
4022 { 0x80, 4, 16, 16*1024, "l1-cache" },
4023 { 0 }
4027 * Search a cache table for a matching entry
4029 static const struct cachetab *
4030 find_cacheent(const struct cachetab *ct, uint_t code)
4032 if (code != 0) {
4033 for (; ct->ct_code != 0; ct++)
4034 if (ct->ct_code <= code)
4035 break;
4036 if (ct->ct_code == code)
4037 return (ct);
4039 return (NULL);
4043 * Populate cachetab entry with L2 or L3 cache-information using
4044 * cpuid function 4. This function is called from intel_walk_cacheinfo()
4045 * when descriptor 0x49 is encountered. It returns 0 if no such cache
4046 * information is found.
4048 static int
4049 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
4051 uint32_t level, i;
4052 int ret = 0;
4054 for (i = 0; i < cpi->cpi_std_4_size; i++) {
4055 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]);
4057 if (level == 2 || level == 3) {
4058 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1;
4059 ct->ct_line_size =
4060 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1;
4061 ct->ct_size = ct->ct_assoc *
4062 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) *
4063 ct->ct_line_size *
4064 (cpi->cpi_std_4[i]->cp_ecx + 1);
4066 if (level == 2) {
4067 ct->ct_label = l2_cache_str;
4068 } else if (level == 3) {
4069 ct->ct_label = l3_cache_str;
4071 ret = 1;
4075 return (ret);
4079 * Walk the cacheinfo descriptor, applying 'func' to every valid element
4080 * The walk is terminated if the walker returns non-zero.
4082 static void
4083 intel_walk_cacheinfo(struct cpuid_info *cpi,
4084 void *arg, int (*func)(void *, const struct cachetab *))
4086 const struct cachetab *ct;
4087 struct cachetab des_49_ct, des_b1_ct;
4088 uint8_t *dp;
4089 int i;
4091 if ((dp = cpi->cpi_cacheinfo) == NULL)
4092 return;
4093 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
4095 * For overloaded descriptor 0x49 we use cpuid function 4
4096 * if supported by the current processor, to create
4097 * cache information.
4098 * For overloaded descriptor 0xb1 we use X86_PAE flag
4099 * to disambiguate the cache information.
4101 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
4102 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
4103 ct = &des_49_ct;
4104 } else if (*dp == 0xb1) {
4105 des_b1_ct.ct_code = 0xb1;
4106 des_b1_ct.ct_assoc = 4;
4107 des_b1_ct.ct_line_size = 0;
4108 if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
4109 des_b1_ct.ct_size = 8;
4110 des_b1_ct.ct_label = itlb2M_str;
4111 } else {
4112 des_b1_ct.ct_size = 4;
4113 des_b1_ct.ct_label = itlb4M_str;
4115 ct = &des_b1_ct;
4116 } else {
4117 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
4118 continue;
4122 if (func(arg, ct) != 0) {
4123 break;
4129 * (Like the Intel one, except for Cyrix CPUs)
4131 static void
4132 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
4133 void *arg, int (*func)(void *, const struct cachetab *))
4135 const struct cachetab *ct;
4136 uint8_t *dp;
4137 int i;
4139 if ((dp = cpi->cpi_cacheinfo) == NULL)
4140 return;
4141 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
4143 * Search Cyrix-specific descriptor table first ..
4145 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
4146 if (func(arg, ct) != 0)
4147 break;
4148 continue;
4151 * .. else fall back to the Intel one
4153 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
4154 if (func(arg, ct) != 0)
4155 break;
4156 continue;
4162 * A cacheinfo walker that adds associativity, line-size, and size properties
4163 * to the devinfo node it is passed as an argument.
4165 static int
4166 add_cacheent_props(void *arg, const struct cachetab *ct)
4168 dev_info_t *devi = arg;
4170 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
4171 if (ct->ct_line_size != 0)
4172 add_cache_prop(devi, ct->ct_label, line_str,
4173 ct->ct_line_size);
4174 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
4175 return (0);
4179 static const char fully_assoc[] = "fully-associative?";
4182 * AMD style cache/tlb description
4184 * Extended functions 5 and 6 directly describe properties of
4185 * tlbs and various cache levels.
4187 static void
4188 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
4190 switch (assoc) {
4191 case 0: /* reserved; ignore */
4192 break;
4193 default:
4194 add_cache_prop(devi, label, assoc_str, assoc);
4195 break;
4196 case 0xff:
4197 add_cache_prop(devi, label, fully_assoc, 1);
4198 break;
4202 static void
4203 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
4205 if (size == 0)
4206 return;
4207 add_cache_prop(devi, label, size_str, size);
4208 add_amd_assoc(devi, label, assoc);
4211 static void
4212 add_amd_cache(dev_info_t *devi, const char *label,
4213 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
4215 if (size == 0 || line_size == 0)
4216 return;
4217 add_amd_assoc(devi, label, assoc);
4219 * Most AMD parts have a sectored cache. Multiple cache lines are
4220 * associated with each tag. A sector consists of all cache lines
4221 * associated with a tag. For example, the AMD K6-III has a sector
4222 * size of 2 cache lines per tag.
4224 if (lines_per_tag != 0)
4225 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
4226 add_cache_prop(devi, label, line_str, line_size);
4227 add_cache_prop(devi, label, size_str, size * 1024);
4230 static void
4231 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
4233 switch (assoc) {
4234 case 0: /* off */
4235 break;
4236 case 1:
4237 case 2:
4238 case 4:
4239 add_cache_prop(devi, label, assoc_str, assoc);
4240 break;
4241 case 6:
4242 add_cache_prop(devi, label, assoc_str, 8);
4243 break;
4244 case 8:
4245 add_cache_prop(devi, label, assoc_str, 16);
4246 break;
4247 case 0xf:
4248 add_cache_prop(devi, label, fully_assoc, 1);
4249 break;
4250 default: /* reserved; ignore */
4251 break;
4255 static void
4256 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
4258 if (size == 0 || assoc == 0)
4259 return;
4260 add_amd_l2_assoc(devi, label, assoc);
4261 add_cache_prop(devi, label, size_str, size);
4264 static void
4265 add_amd_l2_cache(dev_info_t *devi, const char *label,
4266 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
4268 if (size == 0 || assoc == 0 || line_size == 0)
4269 return;
4270 add_amd_l2_assoc(devi, label, assoc);
4271 if (lines_per_tag != 0)
4272 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
4273 add_cache_prop(devi, label, line_str, line_size);
4274 add_cache_prop(devi, label, size_str, size * 1024);
4277 static void
4278 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
4280 struct cpuid_regs *cp;
4282 if (cpi->cpi_xmaxeax < 0x80000005)
4283 return;
4284 cp = &cpi->cpi_extd[5];
4287 * 4M/2M L1 TLB configuration
4289 * We report the size for 2M pages because AMD uses two
4290 * TLB entries for one 4M page.
4292 add_amd_tlb(devi, "dtlb-2M",
4293 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
4294 add_amd_tlb(devi, "itlb-2M",
4295 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
4298 * 4K L1 TLB configuration
4301 switch (cpi->cpi_vendor) {
4302 uint_t nentries;
4303 case X86_VENDOR_TM:
4304 if (cpi->cpi_family >= 5) {
4306 * Crusoe processors have 256 TLB entries, but
4307 * cpuid data format constrains them to only
4308 * reporting 255 of them.
4310 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
4311 nentries = 256;
4313 * Crusoe processors also have a unified TLB
4315 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
4316 nentries);
4317 break;
4319 /*FALLTHROUGH*/
4320 default:
4321 add_amd_tlb(devi, itlb4k_str,
4322 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
4323 add_amd_tlb(devi, dtlb4k_str,
4324 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
4325 break;
4329 * data L1 cache configuration
4332 add_amd_cache(devi, l1_dcache_str,
4333 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
4334 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
4337 * code L1 cache configuration
4340 add_amd_cache(devi, l1_icache_str,
4341 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
4342 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
4344 if (cpi->cpi_xmaxeax < 0x80000006)
4345 return;
4346 cp = &cpi->cpi_extd[6];
4348 /* Check for a unified L2 TLB for large pages */
4350 if (BITX(cp->cp_eax, 31, 16) == 0)
4351 add_amd_l2_tlb(devi, "l2-tlb-2M",
4352 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4353 else {
4354 add_amd_l2_tlb(devi, "l2-dtlb-2M",
4355 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
4356 add_amd_l2_tlb(devi, "l2-itlb-2M",
4357 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4360 /* Check for a unified L2 TLB for 4K pages */
4362 if (BITX(cp->cp_ebx, 31, 16) == 0) {
4363 add_amd_l2_tlb(devi, "l2-tlb-4K",
4364 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4365 } else {
4366 add_amd_l2_tlb(devi, "l2-dtlb-4K",
4367 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
4368 add_amd_l2_tlb(devi, "l2-itlb-4K",
4369 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
4372 add_amd_l2_cache(devi, l2_cache_str,
4373 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
4374 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
4378 * There are two basic ways that the x86 world describes it cache
4379 * and tlb architecture - Intel's way and AMD's way.
4381 * Return which flavor of cache architecture we should use
4383 static int
4384 x86_which_cacheinfo(struct cpuid_info *cpi)
4386 switch (cpi->cpi_vendor) {
4387 case X86_VENDOR_Intel:
4388 if (cpi->cpi_maxeax >= 2)
4389 return (X86_VENDOR_Intel);
4390 break;
4391 case X86_VENDOR_AMD:
4393 * The K5 model 1 was the first part from AMD that reported
4394 * cache sizes via extended cpuid functions.
4396 if (cpi->cpi_family > 5 ||
4397 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
4398 return (X86_VENDOR_AMD);
4399 break;
4400 case X86_VENDOR_TM:
4401 if (cpi->cpi_family >= 5)
4402 return (X86_VENDOR_AMD);
4403 /*FALLTHROUGH*/
4404 default:
4406 * If they have extended CPU data for 0x80000005
4407 * then we assume they have AMD-format cache
4408 * information.
4410 * If not, and the vendor happens to be Cyrix,
4411 * then try our-Cyrix specific handler.
4413 * If we're not Cyrix, then assume we're using Intel's
4414 * table-driven format instead.
4416 if (cpi->cpi_xmaxeax >= 0x80000005)
4417 return (X86_VENDOR_AMD);
4418 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
4419 return (X86_VENDOR_Cyrix);
4420 else if (cpi->cpi_maxeax >= 2)
4421 return (X86_VENDOR_Intel);
4422 break;
4424 return (-1);
4427 void
4428 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
4429 struct cpuid_info *cpi)
4431 dev_info_t *cpu_devi;
4432 int create;
4434 cpu_devi = (dev_info_t *)dip;
4436 /* device_type */
4437 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4438 "device_type", "cpu");
4440 /* reg */
4441 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4442 "reg", cpu_id);
4444 /* cpu-mhz, and clock-frequency */
4445 if (cpu_freq > 0) {
4446 long long mul;
4448 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4449 "cpu-mhz", cpu_freq);
4450 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
4451 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4452 "clock-frequency", (int)mul);
4455 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
4456 return;
4459 /* vendor-id */
4460 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4461 "vendor-id", cpi->cpi_vendorstr);
4463 if (cpi->cpi_maxeax == 0) {
4464 return;
4468 * family, model, and step
4470 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4471 "family", CPI_FAMILY(cpi));
4472 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4473 "cpu-model", CPI_MODEL(cpi));
4474 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4475 "stepping-id", CPI_STEP(cpi));
4477 /* type */
4478 switch (cpi->cpi_vendor) {
4479 case X86_VENDOR_Intel:
4480 create = 1;
4481 break;
4482 default:
4483 create = 0;
4484 break;
4486 if (create)
4487 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4488 "type", CPI_TYPE(cpi));
4490 /* ext-family */
4491 switch (cpi->cpi_vendor) {
4492 case X86_VENDOR_Intel:
4493 case X86_VENDOR_AMD:
4494 create = cpi->cpi_family >= 0xf;
4495 break;
4496 default:
4497 create = 0;
4498 break;
4500 if (create)
4501 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4502 "ext-family", CPI_FAMILY_XTD(cpi));
4504 /* ext-model */
4505 switch (cpi->cpi_vendor) {
4506 case X86_VENDOR_Intel:
4507 create = IS_EXTENDED_MODEL_INTEL(cpi);
4508 break;
4509 case X86_VENDOR_AMD:
4510 create = CPI_FAMILY(cpi) == 0xf;
4511 break;
4512 default:
4513 create = 0;
4514 break;
4516 if (create)
4517 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4518 "ext-model", CPI_MODEL_XTD(cpi));
4520 /* generation */
4521 switch (cpi->cpi_vendor) {
4522 case X86_VENDOR_AMD:
4524 * AMD K5 model 1 was the first part to support this
4526 create = cpi->cpi_xmaxeax >= 0x80000001;
4527 break;
4528 default:
4529 create = 0;
4530 break;
4532 if (create)
4533 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4534 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
4536 /* brand-id */
4537 switch (cpi->cpi_vendor) {
4538 case X86_VENDOR_Intel:
4540 * brand id first appeared on Pentium III Xeon model 8,
4541 * and Celeron model 8 processors and Opteron
4543 create = cpi->cpi_family > 6 ||
4544 (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
4545 break;
4546 case X86_VENDOR_AMD:
4547 create = cpi->cpi_family >= 0xf;
4548 break;
4549 default:
4550 create = 0;
4551 break;
4553 if (create && cpi->cpi_brandid != 0) {
4554 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4555 "brand-id", cpi->cpi_brandid);
4558 /* chunks, and apic-id */
4559 switch (cpi->cpi_vendor) {
4561 * first available on Pentium IV and Opteron (K8)
4563 case X86_VENDOR_Intel:
4564 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4565 break;
4566 case X86_VENDOR_AMD:
4567 create = cpi->cpi_family >= 0xf;
4568 break;
4569 default:
4570 create = 0;
4571 break;
4573 if (create) {
4574 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4575 "chunks", CPI_CHUNKS(cpi));
4576 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4577 "apic-id", cpi->cpi_apicid);
4578 if (cpi->cpi_chipid >= 0) {
4579 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4580 "chip#", cpi->cpi_chipid);
4581 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4582 "clog#", cpi->cpi_clogid);
4586 /* cpuid-features */
4587 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4588 "cpuid-features", CPI_FEATURES_EDX(cpi));
4591 /* cpuid-features-ecx */
4592 switch (cpi->cpi_vendor) {
4593 case X86_VENDOR_Intel:
4594 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4595 break;
4596 case X86_VENDOR_AMD:
4597 create = cpi->cpi_family >= 0xf;
4598 break;
4599 default:
4600 create = 0;
4601 break;
4603 if (create)
4604 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4605 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
4607 /* ext-cpuid-features */
4608 switch (cpi->cpi_vendor) {
4609 case X86_VENDOR_Intel:
4610 case X86_VENDOR_AMD:
4611 case X86_VENDOR_Cyrix:
4612 case X86_VENDOR_TM:
4613 case X86_VENDOR_Centaur:
4614 create = cpi->cpi_xmaxeax >= 0x80000001;
4615 break;
4616 default:
4617 create = 0;
4618 break;
4620 if (create) {
4621 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4622 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
4623 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4624 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
4628 * Brand String first appeared in Intel Pentium IV, AMD K5
4629 * model 1, and Cyrix GXm. On earlier models we try and
4630 * simulate something similar .. so this string should always
4631 * same -something- about the processor, however lame.
4633 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4634 "brand-string", cpi->cpi_brandstr);
4637 * Finally, cache and tlb information
4639 switch (x86_which_cacheinfo(cpi)) {
4640 case X86_VENDOR_Intel:
4641 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4642 break;
4643 case X86_VENDOR_Cyrix:
4644 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4645 break;
4646 case X86_VENDOR_AMD:
4647 amd_cache_info(cpi, cpu_devi);
4648 break;
4649 default:
4650 break;
4654 struct l2info {
4655 int *l2i_csz;
4656 int *l2i_lsz;
4657 int *l2i_assoc;
4658 int l2i_ret;
4662 * A cacheinfo walker that fetches the size, line-size and associativity
4663 * of the L2 cache
4665 static int
4666 intel_l2cinfo(void *arg, const struct cachetab *ct)
4668 struct l2info *l2i = arg;
4669 int *ip;
4671 if (ct->ct_label != l2_cache_str &&
4672 ct->ct_label != sl2_cache_str)
4673 return (0); /* not an L2 -- keep walking */
4675 if ((ip = l2i->l2i_csz) != NULL)
4676 *ip = ct->ct_size;
4677 if ((ip = l2i->l2i_lsz) != NULL)
4678 *ip = ct->ct_line_size;
4679 if ((ip = l2i->l2i_assoc) != NULL)
4680 *ip = ct->ct_assoc;
4681 l2i->l2i_ret = ct->ct_size;
4682 return (1); /* was an L2 -- terminate walk */
4686 * AMD L2/L3 Cache and TLB Associativity Field Definition:
4688 * Unlike the associativity for the L1 cache and tlb where the 8 bit
4689 * value is the associativity, the associativity for the L2 cache and
4690 * tlb is encoded in the following table. The 4 bit L2 value serves as
4691 * an index into the amd_afd[] array to determine the associativity.
4692 * -1 is undefined. 0 is fully associative.
4695 static int amd_afd[] =
4696 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
4698 static void
4699 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
4701 struct cpuid_regs *cp;
4702 uint_t size, assoc;
4703 int i;
4704 int *ip;
4706 if (cpi->cpi_xmaxeax < 0x80000006)
4707 return;
4708 cp = &cpi->cpi_extd[6];
4710 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
4711 (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
4712 uint_t cachesz = size * 1024;
4713 assoc = amd_afd[i];
4715 ASSERT(assoc != -1);
4717 if ((ip = l2i->l2i_csz) != NULL)
4718 *ip = cachesz;
4719 if ((ip = l2i->l2i_lsz) != NULL)
4720 *ip = BITX(cp->cp_ecx, 7, 0);
4721 if ((ip = l2i->l2i_assoc) != NULL)
4722 *ip = assoc;
4723 l2i->l2i_ret = cachesz;
4728 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
4730 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4731 struct l2info __l2info, *l2i = &__l2info;
4733 l2i->l2i_csz = csz;
4734 l2i->l2i_lsz = lsz;
4735 l2i->l2i_assoc = assoc;
4736 l2i->l2i_ret = -1;
4738 switch (x86_which_cacheinfo(cpi)) {
4739 case X86_VENDOR_Intel:
4740 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4741 break;
4742 case X86_VENDOR_Cyrix:
4743 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4744 break;
4745 case X86_VENDOR_AMD:
4746 amd_l2cacheinfo(cpi, l2i);
4747 break;
4748 default:
4749 break;
4751 return (l2i->l2i_ret);
4756 cpuid_mwait_alloc(cpu_t *cpu)
4758 uint32_t *buf;
4759 uint32_t *mwait;
4760 size_t size;
4762 ASSERT(cpuid_checkpass(CPU, 2));
4764 size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
4765 if (size == 0)
4766 return (EINVAL);
4769 * kmem_alloc() returns cache line size aligned data for size
4770 * allocations. size is currently cache line sized. Neither of
4771 * these implementation details are guarantied to be true in the
4772 * future.
4774 * First try allocating size as kmem_alloc() currently returns
4775 * correctly aligned memory. If kmem_alloc() does not return
4776 * size aligned memory, then use size ROUNDUP.
4778 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
4779 * decide to free this memory.
4781 buf = kmem_zalloc(size, KM_SLEEP);
4782 if (buf == (uint32_t *)P2ROUNDUP((uintptr_t)buf, size)) {
4783 mwait = buf;
4784 } else {
4785 kmem_free(buf, size);
4786 buf = kmem_zalloc(size * 2, KM_SLEEP);
4788 mwait = (uint32_t *)P2ROUNDUP((uintptr_t)buf, size);
4789 size *= 2;
4792 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = buf;
4793 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = size;
4795 *mwait = MWAIT_RUNNING;
4797 cpu->cpu_m.mcpu_mwait = mwait;
4799 return (0);
4802 void
4803 cpuid_mwait_free(cpu_t *cpu)
4805 if (cpu->cpu_m.mcpu_cpi == NULL) {
4806 return;
4809 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
4810 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
4811 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
4812 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
4815 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
4816 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
4819 void
4820 patch_tsc_read(int flag)
4822 size_t cnt;
4824 switch (flag) {
4825 case TSC_NONE:
4826 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
4827 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
4828 break;
4829 case TSC_RDTSC_MFENCE:
4830 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
4831 (void) memcpy((void *)tsc_read,
4832 (void *)&_tsc_mfence_start, cnt);
4833 break;
4834 case TSC_RDTSC_LFENCE:
4835 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
4836 (void) memcpy((void *)tsc_read,
4837 (void *)&_tsc_lfence_start, cnt);
4838 break;
4839 case TSC_TSCP:
4840 cnt = &_tscp_end - &_tscp_start;
4841 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
4842 break;
4843 default:
4844 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */
4845 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag);
4846 break;
4848 tsc_type = flag;
4852 cpuid_deep_cstates_supported(void)
4854 struct cpuid_info *cpi;
4855 struct cpuid_regs regs;
4857 ASSERT(cpuid_checkpass(CPU, 1));
4859 cpi = CPU->cpu_m.mcpu_cpi;
4861 if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
4862 return (0);
4864 switch (cpi->cpi_vendor) {
4865 case X86_VENDOR_Intel:
4866 if (cpi->cpi_xmaxeax < 0x80000007)
4867 return (0);
4870 * TSC run at a constant rate in all ACPI C-states?
4872 regs.cp_eax = 0x80000007;
4873 (void) __cpuid_insn(&regs);
4874 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
4876 default:
4877 return (0);
4882 void
4883 post_startup_cpu_fixups(void)
4886 * Some AMD processors support C1E state. Entering this state will
4887 * cause the local APIC timer to stop, which we can't deal with at
4888 * this time.
4890 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
4891 on_trap_data_t otd;
4892 uint64_t reg;
4894 if (!on_trap(&otd, OT_DATA_ACCESS)) {
4895 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
4896 /* Disable C1E state if it is enabled by BIOS */
4897 if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
4898 AMD_ACTONCMPHALT_MASK) {
4899 reg &= ~(AMD_ACTONCMPHALT_MASK <<
4900 AMD_ACTONCMPHALT_SHIFT);
4901 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
4904 no_trap();
4908 void
4909 enable_pcid(void)
4911 if (x86_use_pcid == -1)
4912 x86_use_pcid = is_x86_feature(x86_featureset, X86FSET_PCID);
4914 if (x86_use_invpcid == -1) {
4915 x86_use_invpcid = is_x86_feature(x86_featureset,
4916 X86FSET_INVPCID);
4919 if (!x86_use_pcid)
4920 return;
4923 * Intel say that on setting PCIDE, it immediately starts using the PCID
4924 * bits; better make sure there's nothing there.
4926 ASSERT((getcr3() & MMU_PAGEOFFSET) == PCID_NONE);
4928 setcr4(getcr4() | CR4_PCIDE);
4932 * Setup necessary registers to enable XSAVE feature on this processor.
4933 * This function needs to be called early enough, so that no xsave/xrstor
4934 * ops will execute on the processor before the MSRs are properly set up.
4936 * Current implementation has the following assumption:
4937 * - cpuid_pass1() is done, so that X86 features are known.
4938 * - fpu_probe() is done, so that fp_save_mech is chosen.
4940 void
4941 xsave_setup_msr(cpu_t *cpu)
4943 ASSERT(fp_save_mech == FP_XSAVE);
4944 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
4946 /* Enable OSXSAVE in CR4. */
4947 setcr4(getcr4() | CR4_OSXSAVE);
4949 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
4950 * correct value.
4952 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
4953 setup_xfem();
4957 * Starting with the Westmere processor the local
4958 * APIC timer will continue running in all C-states,
4959 * including the deepest C-states.
4962 cpuid_arat_supported(void)
4964 struct cpuid_info *cpi;
4965 struct cpuid_regs regs;
4967 ASSERT(cpuid_checkpass(CPU, 1));
4968 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4970 cpi = CPU->cpu_m.mcpu_cpi;
4972 switch (cpi->cpi_vendor) {
4973 case X86_VENDOR_Intel:
4975 * Always-running Local APIC Timer is
4976 * indicated by CPUID.6.EAX[2].
4978 if (cpi->cpi_maxeax >= 6) {
4979 regs.cp_eax = 6;
4980 (void) cpuid_insn(NULL, &regs);
4981 return (regs.cp_eax & CPUID_CSTATE_ARAT);
4982 } else {
4983 return (0);
4985 default:
4986 return (0);
4991 * Check support for Intel ENERGY_PERF_BIAS feature
4994 cpuid_iepb_supported(struct cpu *cp)
4996 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
4997 struct cpuid_regs regs;
4999 ASSERT(cpuid_checkpass(cp, 1));
5001 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
5002 !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
5003 return (0);
5007 * Intel ENERGY_PERF_BIAS MSR is indicated by
5008 * capability bit CPUID.6.ECX.3
5010 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
5011 return (0);
5013 regs.cp_eax = 0x6;
5014 (void) cpuid_insn(NULL, &regs);
5015 return (regs.cp_ecx & CPUID_EPB_SUPPORT);
5019 * Check support for TSC deadline timer
5021 * TSC deadline timer provides a superior software programming
5022 * model over local APIC timer that eliminates "time drifts".
5023 * Instead of specifying a relative time, software specifies an
5024 * absolute time as the target at which the processor should
5025 * generate a timer event.
5028 cpuid_deadline_tsc_supported(void)
5030 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
5031 struct cpuid_regs regs;
5033 ASSERT(cpuid_checkpass(CPU, 1));
5034 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
5036 switch (cpi->cpi_vendor) {
5037 case X86_VENDOR_Intel:
5038 if (cpi->cpi_maxeax >= 1) {
5039 regs.cp_eax = 1;
5040 (void) cpuid_insn(NULL, &regs);
5041 return (regs.cp_ecx & CPUID_DEADLINE_TSC);
5042 } else {
5043 return (0);
5045 default:
5046 return (0);
5050 #if defined(__amd64) && !defined(__xpv)
5052 * Patch in versions of bcopy for high performance Intel Nhm processors
5053 * and later...
5055 void
5056 patch_memops(uint_t vendor)
5058 size_t cnt, i;
5059 caddr_t to, from;
5061 if ((vendor == X86_VENDOR_Intel) &&
5062 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
5063 cnt = &bcopy_patch_end - &bcopy_patch_start;
5064 to = &bcopy_ck_size;
5065 from = &bcopy_patch_start;
5066 for (i = 0; i < cnt; i++) {
5067 *to++ = *from++;
5071 #endif /* __amd64 && !__xpv */
5074 * This function finds the number of bits to represent the number of cores per
5075 * chip and the number of strands per core for the Intel platforms.
5076 * It re-uses the x2APIC cpuid code of the cpuid_pass2().
5078 void
5079 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits)
5081 struct cpuid_regs regs;
5082 struct cpuid_regs *cp = &regs;
5084 if (vendor != X86_VENDOR_Intel) {
5085 return;
5088 /* if the cpuid level is 0xB, extended topo is available. */
5089 cp->cp_eax = 0;
5090 if (__cpuid_insn(cp) >= 0xB) {
5092 cp->cp_eax = 0xB;
5093 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
5094 (void) __cpuid_insn(cp);
5097 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
5098 * indicates that the extended topology enumeration leaf is
5099 * available.
5101 if (cp->cp_ebx) {
5102 uint_t coreid_shift = 0;
5103 uint_t chipid_shift = 0;
5104 uint_t i;
5105 uint_t level;
5107 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
5108 cp->cp_eax = 0xB;
5109 cp->cp_ecx = i;
5111 (void) __cpuid_insn(cp);
5112 level = CPI_CPU_LEVEL_TYPE(cp);
5114 if (level == 1) {
5116 * Thread level processor topology
5117 * Number of bits shift right APIC ID
5118 * to get the coreid.
5120 coreid_shift = BITX(cp->cp_eax, 4, 0);
5121 } else if (level == 2) {
5123 * Core level processor topology
5124 * Number of bits shift right APIC ID
5125 * to get the chipid.
5127 chipid_shift = BITX(cp->cp_eax, 4, 0);
5131 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
5132 *strand_nbits = coreid_shift;
5133 *core_nbits = chipid_shift - coreid_shift;