4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011 by Delphix. All rights reserved.
26 * Copyright (c) 2010, Intel Corporation.
27 * All rights reserved.
30 * Portions Copyright 2009 Advanced Micro Devices, Inc.
33 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
36 * Various routines to handle identification
37 * and classification of x86 processors.
40 #include <sys/types.h>
41 #include <sys/archsystm.h>
42 #include <sys/x86_archext.h>
44 #include <sys/systm.h>
45 #include <sys/cmn_err.h>
46 #include <sys/sunddi.h>
47 #include <sys/sunndi.h>
48 #include <sys/cpuvar.h>
49 #include <sys/processor.h>
50 #include <sys/sysmacros.h>
53 #include <sys/controlregs.h>
54 #include <sys/bitmap.h>
55 #include <sys/auxv_386.h>
56 #include <sys/memnode.h>
57 #include <sys/pci_cfgspace.h>
60 #include <sys/hypervisor.h>
62 #include <sys/ontrap.h>
66 * Pass 0 of cpuid feature analysis happens in locore. It contains special code
67 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with
68 * them accordingly. For most modern processors, feature detection occurs here
71 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup()
72 * for the boot CPU and does the basic analysis that the early kernel needs.
73 * x86_featureset is set based on the return value of cpuid_pass1() of the boot
78 * o Determining vendor/model/family/stepping and setting x86_type and
79 * x86_vendor accordingly.
80 * o Processing the feature flags returned by the cpuid instruction while
81 * applying any workarounds or tricks for the specific processor.
82 * o Mapping the feature flags into Solaris feature bits (X86_*).
83 * o Processing extended feature flags if supported by the processor,
84 * again while applying specific processor knowledge.
85 * o Determining the CMT characteristics of the system.
87 * Pass 1 is done on non-boot CPUs during their initialization and the results
88 * are used only as a meager attempt at ensuring that all processors within the
89 * system support the same features.
91 * Pass 2 of cpuid feature analysis happens just at the beginning
92 * of startup(). It just copies in and corrects the remainder
93 * of the cpuid data we depend on: standard cpuid functions that we didn't
94 * need for pass1 feature analysis, and extended cpuid functions beyond the
95 * simple feature processing done in pass1.
97 * Pass 3 of cpuid analysis is invoked after basic kernel services; in
98 * particular kernel memory allocation has been made available. It creates a
99 * readable brand string based on the data collected in the first two passes.
101 * Pass 4 of cpuid analysis is invoked after post_startup() when all
102 * the support infrastructure for various hardware features has been
103 * initialized. It determines which processor features will be reported
104 * to userland via the aux vector.
106 * All passes are executed on all CPUs, but only the boot CPU determines what
107 * features the kernel will use.
109 * Much of the worst junk in this file is for the support of processors
110 * that didn't really implement the cpuid instruction properly.
112 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon,
113 * the pass numbers. Accordingly, changes to the pass code may require changes
114 * to the accessor code.
117 uint_t x86_vendor
= X86_VENDOR_IntelClone
;
118 uint_t x86_type
= X86_TYPE_OTHER
;
119 uint_t x86_clflush_size
= 0;
121 uint_t pentiumpro_bug4046376
;
122 uint_t pentiumpro_bug4064495
;
124 uchar_t x86_featureset
[BT_SIZEOFMAP(NUM_X86_FEATURES
)];
126 static char *x86_feature_names
[NUM_X86_FEATURES
] = {
168 is_x86_feature(void *featureset
, uint_t feature
)
170 ASSERT(feature
< NUM_X86_FEATURES
);
171 return (BT_TEST((ulong_t
*)featureset
, feature
));
175 add_x86_feature(void *featureset
, uint_t feature
)
177 ASSERT(feature
< NUM_X86_FEATURES
);
178 BT_SET((ulong_t
*)featureset
, feature
);
182 remove_x86_feature(void *featureset
, uint_t feature
)
184 ASSERT(feature
< NUM_X86_FEATURES
);
185 BT_CLEAR((ulong_t
*)featureset
, feature
);
189 compare_x86_featureset(void *setA
, void *setB
)
192 * We assume that the unused bits of the bitmap are always zero.
194 if (memcmp(setA
, setB
, BT_SIZEOFMAP(NUM_X86_FEATURES
)) == 0) {
202 print_x86_featureset(void *featureset
)
206 for (i
= 0; i
< NUM_X86_FEATURES
; i
++) {
207 if (is_x86_feature(featureset
, i
)) {
208 cmn_err(CE_CONT
, "?x86_feature: %s\n",
209 x86_feature_names
[i
]);
216 static size_t xsave_state_size
= 0;
217 uint64_t xsave_bv_all
= (XFEATURE_LEGACY_FP
| XFEATURE_SSE
);
218 boolean_t xsave_force_disable
= B_FALSE
;
221 * This is set to platform type Solaris is running on.
223 static int platform_type
= -1;
227 * Variable to patch if hypervisor platform detection needs to be
228 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
230 int enable_platform_detection
= 1;
234 * monitor/mwait info.
236 * size_actual and buf_actual are the real address and size allocated to get
237 * proper mwait_buf alignement. buf_actual and size_actual should be passed
238 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use
239 * processor cache-line alignment, but this is not guarantied in the furture.
242 size_t mon_min
; /* min size to avoid missed wakeups */
243 size_t mon_max
; /* size to avoid false wakeups */
244 size_t size_actual
; /* size actually allocated */
245 void *buf_actual
; /* memory actually allocated */
246 uint32_t support
; /* processor support of monitor/mwait */
250 * xsave/xrestor info.
252 * This structure contains HW feature bits and size of the xsave save area.
253 * Note: the kernel will use the maximum size required for all hardware
254 * features. It is not optimize for potential memory savings if features at
255 * the end of the save area are not enabled.
258 uint32_t xsav_hw_features_low
; /* Supported HW features */
259 uint32_t xsav_hw_features_high
; /* Supported HW features */
260 size_t xsav_max_size
; /* max size save area for HW features */
261 size_t ymm_size
; /* AVX: size of ymm save area */
262 size_t ymm_offset
; /* AVX: offset for ymm save area */
267 * These constants determine how many of the elements of the
268 * cpuid we cache in the cpuid_info data structure; the
269 * remaining elements are accessible via the cpuid instruction.
272 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */
273 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */
276 * Some terminology needs to be explained:
277 * - Socket: Something that can be plugged into a motherboard.
278 * - Package: Same as socket
279 * - Chip: Same as socket. Note that AMD's documentation uses term "chip"
280 * differently: there, chip is the same as processor node (below)
281 * - Processor node: Some AMD processors have more than one
282 * "subprocessor" embedded in a package. These subprocessors (nodes)
283 * are fully-functional processors themselves with cores, caches,
284 * memory controllers, PCI configuration spaces. They are connected
285 * inside the package with Hypertransport links. On single-node
286 * processors, processor node is equivalent to chip/socket/package.
287 * - Compute Unit: Some AMD processors pair cores in "compute units" that
288 * share the FPU and the I$ and L2 caches.
292 uint_t cpi_pass
; /* last pass completed */
294 * standard function information
296 uint_t cpi_maxeax
; /* fn 0: %eax */
297 char cpi_vendorstr
[13]; /* fn 0: %ebx:%ecx:%edx */
298 uint_t cpi_vendor
; /* enum of cpi_vendorstr */
300 uint_t cpi_family
; /* fn 1: extended family */
301 uint_t cpi_model
; /* fn 1: extended model */
302 uint_t cpi_step
; /* fn 1: stepping */
303 chipid_t cpi_chipid
; /* fn 1: %ebx: Intel: chip # */
304 /* AMD: package/socket # */
305 uint_t cpi_brandid
; /* fn 1: %ebx: brand ID */
306 int cpi_clogid
; /* fn 1: %ebx: thread # */
307 uint_t cpi_ncpu_per_chip
; /* fn 1: %ebx: logical cpu count */
308 uint8_t cpi_cacheinfo
[16]; /* fn 2: intel-style cache desc */
309 uint_t cpi_ncache
; /* fn 2: number of elements */
310 uint_t cpi_ncpu_shr_last_cache
; /* fn 4: %eax: ncpus sharing cache */
311 id_t cpi_last_lvl_cacheid
; /* fn 4: %eax: derived cache id */
312 uint_t cpi_std_4_size
; /* fn 4: number of fn 4 elements */
313 struct cpuid_regs
**cpi_std_4
; /* fn 4: %ecx == 0 .. fn4_size */
314 struct cpuid_regs cpi_std
[NMAX_CPI_STD
]; /* 0 .. 5 */
316 * extended function information
318 uint_t cpi_xmaxeax
; /* fn 0x80000000: %eax */
319 char cpi_brandstr
[49]; /* fn 0x8000000[234] */
320 uint8_t cpi_pabits
; /* fn 0x80000006: %eax */
321 uint8_t cpi_vabits
; /* fn 0x80000006: %eax */
322 struct cpuid_regs cpi_extd
[NMAX_CPI_EXTD
]; /* 0x800000XX */
324 id_t cpi_coreid
; /* same coreid => strands share core */
325 int cpi_pkgcoreid
; /* core number within single package */
326 uint_t cpi_ncore_per_chip
; /* AMD: fn 0x80000008: %ecx[7-0] */
327 /* Intel: fn 4: %eax[31-26] */
329 * supported feature information
331 uint32_t cpi_support
[5];
332 #define STD_EDX_FEATURES 0
333 #define AMD_EDX_FEATURES 1
334 #define TM_EDX_FEATURES 2
335 #define STD_ECX_FEATURES 3
336 #define AMD_ECX_FEATURES 4
338 * Synthesized information, where known.
340 uint32_t cpi_chiprev
; /* See X86_CHIPREV_* in x86_archext.h */
341 const char *cpi_chiprevstr
; /* May be NULL if chiprev unknown */
342 uint32_t cpi_socket
; /* Chip package/socket type */
344 struct mwait_info cpi_mwait
; /* fn 5: monitor/mwait info */
346 uint_t cpi_procnodeid
; /* AMD: nodeID on HT, Intel: chipid */
347 uint_t cpi_procnodes_per_pkg
; /* AMD: # of nodes in the package */
349 uint_t cpi_compunitid
; /* AMD: ComputeUnit ID, Intel: coreid */
350 uint_t cpi_cores_per_compunit
; /* AMD: # of cores in the ComputeUnit */
352 struct xsave_info cpi_xsave
; /* fn D: xsave/xrestor info */
356 static struct cpuid_info cpuid_info0
;
359 * These bit fields are defined by the Intel Application Note AP-485
360 * "Intel Processor Identification and the CPUID Instruction"
362 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
363 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
364 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
365 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
366 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
367 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
369 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx)
370 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx)
371 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx)
372 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx)
374 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
375 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
376 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
377 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
379 #define CPI_MAXEAX_MAX 0x100 /* sanity control */
380 #define CPI_XMAXEAX_MAX 0x80000100
381 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */
382 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */
385 * Function 4 (Deterministic Cache Parameters) macros
386 * Defined by Intel Application Note AP-485
388 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26)
389 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14)
390 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9)
391 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8)
392 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5)
393 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0)
394 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8)
396 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22)
397 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12)
398 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0)
400 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0)
402 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0)
406 * A couple of shorthand macros to identify "later" P6-family chips
407 * like the Pentium M and Core. First, the "older" P6-based stuff
408 * (loosely defined as "pre-Pentium-4"):
409 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
412 #define IS_LEGACY_P6(cpi) ( \
413 cpi->cpi_family == 6 && \
414 (cpi->cpi_model == 1 || \
415 cpi->cpi_model == 3 || \
416 cpi->cpi_model == 5 || \
417 cpi->cpi_model == 6 || \
418 cpi->cpi_model == 7 || \
419 cpi->cpi_model == 8 || \
420 cpi->cpi_model == 0xA || \
421 cpi->cpi_model == 0xB) \
424 /* A "new F6" is everything with family 6 that's not the above */
425 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
427 /* Extended family/model support */
428 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
429 cpi->cpi_family >= 0xf)
432 * Info for monitor/mwait idle loop.
434 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
435 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
437 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
438 * Documentation Updates" #33633, Rev 2.05, December 2006.
440 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */
441 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */
442 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */
443 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
444 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2)
445 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1)
446 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
447 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
449 * Number of sub-cstates for a given c-state.
451 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \
452 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
455 * XSAVE leaf 0xD enumeration
457 #define CPUID_LEAFD_2_YMM_OFFSET 576
458 #define CPUID_LEAFD_2_YMM_SIZE 256
461 * Functions we consune from cpuid_subr.c; don't publish these in a header
462 * file to try and keep people using the expected cpuid_* interfaces.
464 extern uint32_t _cpuid_skt(uint_t
, uint_t
, uint_t
, uint_t
);
465 extern const char *_cpuid_sktstr(uint_t
, uint_t
, uint_t
, uint_t
);
466 extern uint32_t _cpuid_chiprev(uint_t
, uint_t
, uint_t
, uint_t
);
467 extern const char *_cpuid_chiprevstr(uint_t
, uint_t
, uint_t
, uint_t
);
468 extern uint_t
_cpuid_vendorstr_to_vendorcode(char *);
471 * Apply up various platform-dependent restrictions where the
472 * underlying platform restrictions mean the CPU can be marked
473 * as less capable than its cpuid instruction would imply.
477 platform_cpuid_mangle(uint_t vendor
, uint32_t eax
, struct cpuid_regs
*cp
)
481 uint32_t mcamask
= DOMAIN_IS_INITDOMAIN(xen_info
) ?
482 0 : CPUID_INTC_EDX_MCA
;
486 CPUID_INTC_EDX_VME
| CPUID_INTC_EDX_DE
|
487 CPUID_INTC_EDX_SEP
| CPUID_INTC_EDX_MTRR
|
488 CPUID_INTC_EDX_PGE
| CPUID_INTC_EDX_PAT
|
489 CPUID_AMD_EDX_SYSC
| CPUID_INTC_EDX_SEP
|
490 CPUID_INTC_EDX_PSE36
| CPUID_INTC_EDX_HTT
);
496 ~(CPUID_AMD_EDX_PSE
|
497 CPUID_INTC_EDX_VME
| CPUID_INTC_EDX_DE
|
498 CPUID_AMD_EDX_MTRR
| CPUID_AMD_EDX_PGE
|
499 CPUID_AMD_EDX_PAT
| CPUID_AMD_EDX_PSE36
|
500 CPUID_AMD_EDX_SYSC
| CPUID_INTC_EDX_SEP
|
502 cp
->cp_ecx
&= ~CPUID_AMD_ECX_CMP_LGCY
;
509 case X86_VENDOR_Intel
:
513 * Zero out the (ncores-per-chip - 1) field
515 cp
->cp_eax
&= 0x03fffffff;
525 cp
->cp_ecx
&= ~CPUID_AMD_ECX_CR8D
;
530 * Zero out the (ncores-per-chip - 1) field
532 cp
->cp_ecx
&= 0xffffff00;
543 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */
547 * Some undocumented ways of patching the results of the cpuid
548 * instruction to permit running Solaris 10 on future cpus that
549 * we don't currently support. Could be set to non-zero values
550 * via settings in eeprom.
553 uint32_t cpuid_feature_ecx_include
;
554 uint32_t cpuid_feature_ecx_exclude
;
555 uint32_t cpuid_feature_edx_include
;
556 uint32_t cpuid_feature_edx_exclude
;
559 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
562 cpuid_alloc_space(cpu_t
*cpu
)
565 * By convention, cpu0 is the boot cpu, which is set up
566 * before memory allocation is available. All other cpus get
567 * their cpuid_info struct allocated here.
569 ASSERT(cpu
->cpu_id
!= 0);
570 ASSERT(cpu
->cpu_m
.mcpu_cpi
== NULL
);
571 cpu
->cpu_m
.mcpu_cpi
=
572 kmem_zalloc(sizeof (*cpu
->cpu_m
.mcpu_cpi
), KM_SLEEP
);
576 cpuid_free_space(cpu_t
*cpu
)
578 struct cpuid_info
*cpi
= cpu
->cpu_m
.mcpu_cpi
;
582 ASSERT(cpi
!= &cpuid_info0
);
585 * Free up any function 4 related dynamic storage
587 for (i
= 1; i
< cpi
->cpi_std_4_size
; i
++)
588 kmem_free(cpi
->cpi_std_4
[i
], sizeof (struct cpuid_regs
));
589 if (cpi
->cpi_std_4_size
> 0)
590 kmem_free(cpi
->cpi_std_4
,
591 cpi
->cpi_std_4_size
* sizeof (struct cpuid_regs
*));
593 kmem_free(cpi
, sizeof (*cpi
));
594 cpu
->cpu_m
.mcpu_cpi
= NULL
;
600 * Determine the type of the underlying platform. This is used to customize
601 * initialization of various subsystems (e.g. TSC). determine_platform() must
602 * only ever be called once to prevent two processors from seeing different
603 * values of platform_type, it must be called before cpuid_pass1(), the
604 * earliest consumer to execute.
607 determine_platform(void)
609 struct cpuid_regs cp
;
611 uint32_t xen_signature
[4], base
;
613 ASSERT(platform_type
== -1);
615 platform_type
= HW_NATIVE
;
617 if (!enable_platform_detection
)
621 * In a fully virtualized domain, Xen's pseudo-cpuid function
622 * returns a string representing the Xen signature in %ebx, %ecx,
623 * and %edx. %eax contains the maximum supported cpuid function.
624 * We need at least a (base + 2) leaf value to do what we want
625 * to do. Try different base values, since the hypervisor might
626 * use a different one depending on whether hyper-v emulation
627 * is switched on by default or not.
629 for (base
= 0x40000000; base
< 0x40010000; base
+= 0x100) {
631 (void) __cpuid_insn(&cp
);
632 xen_signature
[0] = cp
.cp_ebx
;
633 xen_signature
[1] = cp
.cp_ecx
;
634 xen_signature
[2] = cp
.cp_edx
;
635 xen_signature
[3] = 0;
636 xen_str
= (char *)xen_signature
;
637 if (strcmp("XenVMMXenVMM", xen_str
) == 0 &&
638 cp
.cp_eax
>= (base
+ 2)) {
639 platform_type
= HW_XEN_HVM
;
644 if (vmware_platform()) /* running under vmware hypervisor? */
645 platform_type
= HW_VMWARE
;
651 ASSERT(platform_type
!= -1);
652 return (platform_type
);
672 return (DOMAIN_IS_INITDOMAIN(xen_info
));
678 cpuid_intel_getids(cpu_t
*cpu
, void *feature
)
681 uint_t chipid_shift
= 0;
682 uint_t coreid_shift
= 0;
683 struct cpuid_info
*cpi
= cpu
->cpu_m
.mcpu_cpi
;
685 for (i
= 1; i
< cpi
->cpi_ncpu_per_chip
; i
<<= 1)
688 cpi
->cpi_chipid
= cpi
->cpi_apicid
>> chipid_shift
;
689 cpi
->cpi_clogid
= cpi
->cpi_apicid
& ((1 << chipid_shift
) - 1);
691 if (is_x86_feature(feature
, X86FSET_CMP
)) {
693 * Multi-core (and possibly multi-threaded)
696 uint_t ncpu_per_core
;
697 if (cpi
->cpi_ncore_per_chip
== 1)
698 ncpu_per_core
= cpi
->cpi_ncpu_per_chip
;
699 else if (cpi
->cpi_ncore_per_chip
> 1)
700 ncpu_per_core
= cpi
->cpi_ncpu_per_chip
/
701 cpi
->cpi_ncore_per_chip
;
703 * 8bit APIC IDs on dual core Pentiums
706 * +-----------------------+------+------+
707 * | Physical Package ID | MC | HT |
708 * +-----------------------+------+------+
709 * <------- chipid -------->
710 * <------- coreid --------------->
715 * Where the number of bits necessary to
716 * represent MC and HT fields together equals
717 * to the minimum number of bits necessary to
718 * store the value of cpi->cpi_ncpu_per_chip.
719 * Of those bits, the MC part uses the number
720 * of bits necessary to store the value of
721 * cpi->cpi_ncore_per_chip.
723 for (i
= 1; i
< ncpu_per_core
; i
<<= 1)
725 cpi
->cpi_coreid
= cpi
->cpi_apicid
>> coreid_shift
;
726 cpi
->cpi_pkgcoreid
= cpi
->cpi_clogid
>> coreid_shift
;
727 } else if (is_x86_feature(feature
, X86FSET_HTT
)) {
729 * Single-core multi-threaded processors.
731 cpi
->cpi_coreid
= cpi
->cpi_chipid
;
732 cpi
->cpi_pkgcoreid
= 0;
734 cpi
->cpi_procnodeid
= cpi
->cpi_chipid
;
735 cpi
->cpi_compunitid
= cpi
->cpi_coreid
;
739 cpuid_amd_getids(cpu_t
*cpu
)
741 int i
, first_half
, coreidsz
;
742 uint32_t nb_caps_reg
;
744 struct cpuid_info
*cpi
= cpu
->cpu_m
.mcpu_cpi
;
745 struct cpuid_regs
*cp
;
748 * AMD CMP chips currently have a single thread per core.
750 * Since no two cpus share a core we must assign a distinct coreid
751 * per cpu, and we do this by using the cpu_id. This scheme does not,
752 * however, guarantee that sibling cores of a chip will have sequential
753 * coreids starting at a multiple of the number of cores per chip -
754 * that is usually the case, but if the ACPI MADT table is presented
755 * in a different order then we need to perform a few more gymnastics
758 * All processors in the system have the same number of enabled
759 * cores. Cores within a processor are always numbered sequentially
760 * from 0 regardless of how many or which are disabled, and there
761 * is no way for operating system to discover the real core id when some
764 * In family 0x15, the cores come in pairs called compute units. They
765 * share I$ and L2 caches and the FPU. Enumeration of this feature is
766 * simplified by the new topology extensions CPUID leaf, indicated by
767 * the X86 feature X86FSET_TOPOEXT.
770 cpi
->cpi_coreid
= cpu
->cpu_id
;
771 cpi
->cpi_compunitid
= cpu
->cpu_id
;
773 if (cpi
->cpi_xmaxeax
>= 0x80000008) {
775 coreidsz
= BITX((cpi
)->cpi_extd
[8].cp_ecx
, 15, 12);
778 * In AMD parlance chip is really a node while Solaris
779 * sees chip as equivalent to socket/package.
781 cpi
->cpi_ncore_per_chip
=
782 BITX((cpi
)->cpi_extd
[8].cp_ecx
, 7, 0) + 1;
784 /* Use legacy method */
785 for (i
= 1; i
< cpi
->cpi_ncore_per_chip
; i
<<= 1)
791 /* Assume single-core part */
792 cpi
->cpi_ncore_per_chip
= 1;
796 cpi
->cpi_clogid
= cpi
->cpi_pkgcoreid
=
797 cpi
->cpi_apicid
& ((1<<coreidsz
) - 1);
798 cpi
->cpi_ncpu_per_chip
= cpi
->cpi_ncore_per_chip
;
800 /* Get node ID, compute unit ID */
801 if (is_x86_feature(x86_featureset
, X86FSET_TOPOEXT
) &&
802 cpi
->cpi_xmaxeax
>= 0x8000001e) {
803 cp
= &cpi
->cpi_extd
[0x1e];
804 cp
->cp_eax
= 0x8000001e;
805 (void) __cpuid_insn(cp
);
807 cpi
->cpi_procnodes_per_pkg
= BITX(cp
->cp_ecx
, 10, 8) + 1;
808 cpi
->cpi_procnodeid
= BITX(cp
->cp_ecx
, 7, 0);
809 cpi
->cpi_cores_per_compunit
= BITX(cp
->cp_ebx
, 15, 8) + 1;
810 cpi
->cpi_compunitid
= BITX(cp
->cp_ebx
, 7, 0)
811 + (cpi
->cpi_ncore_per_chip
/ cpi
->cpi_cores_per_compunit
)
812 * (cpi
->cpi_procnodeid
/ cpi
->cpi_procnodes_per_pkg
);
813 } else if (cpi
->cpi_family
== 0xf || cpi
->cpi_family
>= 0x11) {
814 cpi
->cpi_procnodeid
= (cpi
->cpi_apicid
>> coreidsz
) & 7;
815 } else if (cpi
->cpi_family
== 0x10) {
817 * See if we are a multi-node processor.
818 * All processors in the system have the same number of nodes
820 nb_caps_reg
= pci_getl_func(0, 24, 3, 0xe8);
821 if ((cpi
->cpi_model
< 8) || BITX(nb_caps_reg
, 29, 29) == 0) {
823 cpi
->cpi_procnodeid
= BITX(cpi
->cpi_apicid
, 5,
828 * Multi-node revision D (2 nodes per package
831 cpi
->cpi_procnodes_per_pkg
= 2;
833 first_half
= (cpi
->cpi_pkgcoreid
<=
834 (cpi
->cpi_ncore_per_chip
/2 - 1));
836 if (cpi
->cpi_apicid
== cpi
->cpi_pkgcoreid
) {
838 cpi
->cpi_procnodeid
= (first_half
? 0 : 1);
842 /* NodeId[2:1] bits to use for reading F3xe8 */
843 node2_1
= BITX(cpi
->cpi_apicid
, 5, 4) << 1;
846 pci_getl_func(0, 24 + node2_1
, 3, 0xe8);
849 * Check IntNodeNum bit (31:30, but bit 31 is
850 * always 0 on dual-node processors)
852 if (BITX(nb_caps_reg
, 30, 30) == 0)
853 cpi
->cpi_procnodeid
= node2_1
+
856 cpi
->cpi_procnodeid
= node2_1
+
861 cpi
->cpi_procnodeid
= 0;
865 cpi
->cpi_procnodeid
/ cpi
->cpi_procnodes_per_pkg
;
869 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
874 uint64_t flags
= XFEATURE_LEGACY_FP
;
876 ASSERT(is_x86_feature(x86_featureset
, X86FSET_XSAVE
));
878 if (is_x86_feature(x86_featureset
, X86FSET_SSE
))
879 flags
|= XFEATURE_SSE
;
881 if (is_x86_feature(x86_featureset
, X86FSET_AVX
))
882 flags
|= XFEATURE_AVX
;
884 set_xcr(XFEATURE_ENABLED_MASK
, flags
);
886 xsave_bv_all
= flags
;
890 cpuid_pass1(cpu_t
*cpu
, uchar_t
*featureset
)
892 uint32_t mask_ecx
, mask_edx
;
893 struct cpuid_info
*cpi
;
894 struct cpuid_regs
*cp
;
897 extern int idle_cpu_prefer_mwait
;
901 * Space statically allocated for BSP, ensure pointer is set
903 if (cpu
->cpu_id
== 0) {
904 if (cpu
->cpu_m
.mcpu_cpi
== NULL
)
905 cpu
->cpu_m
.mcpu_cpi
= &cpuid_info0
;
908 add_x86_feature(featureset
, X86FSET_CPUID
);
910 cpi
= cpu
->cpu_m
.mcpu_cpi
;
912 cp
= &cpi
->cpi_std
[0];
914 cpi
->cpi_maxeax
= __cpuid_insn(cp
);
916 uint32_t *iptr
= (uint32_t *)cpi
->cpi_vendorstr
;
917 *iptr
++ = cp
->cp_ebx
;
918 *iptr
++ = cp
->cp_edx
;
919 *iptr
++ = cp
->cp_ecx
;
920 *(char *)&cpi
->cpi_vendorstr
[12] = '\0';
923 cpi
->cpi_vendor
= _cpuid_vendorstr_to_vendorcode(cpi
->cpi_vendorstr
);
924 x86_vendor
= cpi
->cpi_vendor
; /* for compatibility */
927 * Limit the range in case of weird hardware
929 if (cpi
->cpi_maxeax
> CPI_MAXEAX_MAX
)
930 cpi
->cpi_maxeax
= CPI_MAXEAX_MAX
;
931 if (cpi
->cpi_maxeax
< 1)
934 cp
= &cpi
->cpi_std
[1];
936 (void) __cpuid_insn(cp
);
939 * Extract identifying constants for easy access.
941 cpi
->cpi_model
= CPI_MODEL(cpi
);
942 cpi
->cpi_family
= CPI_FAMILY(cpi
);
944 if (cpi
->cpi_family
== 0xf)
945 cpi
->cpi_family
+= CPI_FAMILY_XTD(cpi
);
948 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
949 * Intel, and presumably everyone else, uses model == 0xf, as
950 * one would expect (max value means possible overflow). Sigh.
953 switch (cpi
->cpi_vendor
) {
954 case X86_VENDOR_Intel
:
955 if (IS_EXTENDED_MODEL_INTEL(cpi
))
956 cpi
->cpi_model
+= CPI_MODEL_XTD(cpi
) << 4;
959 if (CPI_FAMILY(cpi
) == 0xf)
960 cpi
->cpi_model
+= CPI_MODEL_XTD(cpi
) << 4;
963 if (cpi
->cpi_model
== 0xf)
964 cpi
->cpi_model
+= CPI_MODEL_XTD(cpi
) << 4;
968 cpi
->cpi_step
= CPI_STEP(cpi
);
969 cpi
->cpi_brandid
= CPI_BRANDID(cpi
);
972 * *default* assumptions:
973 * - believe %edx feature word
974 * - ignore %ecx feature word
975 * - 32-bit virtual and physical addressing
977 mask_edx
= 0xffffffff;
980 cpi
->cpi_pabits
= cpi
->cpi_vabits
= 32;
982 switch (cpi
->cpi_vendor
) {
983 case X86_VENDOR_Intel
:
984 if (cpi
->cpi_family
== 5)
985 x86_type
= X86_TYPE_P5
;
986 else if (IS_LEGACY_P6(cpi
)) {
987 x86_type
= X86_TYPE_P6
;
988 pentiumpro_bug4046376
= 1;
989 pentiumpro_bug4064495
= 1;
991 * Clear the SEP bit when it was set erroneously
993 if (cpi
->cpi_model
< 3 && cpi
->cpi_step
< 3)
994 cp
->cp_edx
&= ~CPUID_INTC_EDX_SEP
;
995 } else if (IS_NEW_F6(cpi
) || cpi
->cpi_family
== 0xf) {
996 x86_type
= X86_TYPE_P4
;
998 * We don't currently depend on any of the %ecx
999 * features until Prescott, so we'll only check
1000 * this from P4 onwards. We might want to revisit
1003 mask_ecx
= 0xffffffff;
1004 } else if (cpi
->cpi_family
> 0xf)
1005 mask_ecx
= 0xffffffff;
1007 * We don't support MONITOR/MWAIT if leaf 5 is not available
1008 * to obtain the monitor linesize.
1010 if (cpi
->cpi_maxeax
< 5)
1011 mask_ecx
&= ~CPUID_INTC_ECX_MON
;
1013 case X86_VENDOR_IntelClone
:
1016 case X86_VENDOR_AMD
:
1017 #if defined(OPTERON_ERRATUM_108)
1018 if (cpi
->cpi_family
== 0xf && cpi
->cpi_model
== 0xe) {
1019 cp
->cp_eax
= (0xf0f & cp
->cp_eax
) | 0xc0;
1020 cpi
->cpi_model
= 0xc;
1023 if (cpi
->cpi_family
== 5) {
1027 * These CPUs have an incomplete implementation
1028 * of MCA/MCE which we mask away.
1030 mask_edx
&= ~(CPUID_INTC_EDX_MCE
| CPUID_INTC_EDX_MCA
);
1033 * Model 0 uses the wrong (APIC) bit
1034 * to indicate PGE. Fix it here.
1036 if (cpi
->cpi_model
== 0) {
1037 if (cp
->cp_edx
& 0x200) {
1038 cp
->cp_edx
&= ~0x200;
1039 cp
->cp_edx
|= CPUID_INTC_EDX_PGE
;
1044 * Early models had problems w/ MMX; disable.
1046 if (cpi
->cpi_model
< 6)
1047 mask_edx
&= ~CPUID_INTC_EDX_MMX
;
1051 * For newer families, SSE3 and CX16, at least, are valid;
1054 if (cpi
->cpi_family
>= 0xf)
1055 mask_ecx
= 0xffffffff;
1057 * We don't support MONITOR/MWAIT if leaf 5 is not available
1058 * to obtain the monitor linesize.
1060 if (cpi
->cpi_maxeax
< 5)
1061 mask_ecx
&= ~CPUID_INTC_ECX_MON
;
1065 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD
1066 * processors. AMD does not intend MWAIT to be used in the cpu
1067 * idle loop on current and future processors. 10h and future
1068 * AMD processors use more power in MWAIT than HLT.
1069 * Pre-family-10h Opterons do not have the MWAIT instruction.
1071 idle_cpu_prefer_mwait
= 0;
1077 * workaround the NT workaround in CMS 4.1
1079 if (cpi
->cpi_family
== 5 && cpi
->cpi_model
== 4 &&
1080 (cpi
->cpi_step
== 2 || cpi
->cpi_step
== 3))
1081 cp
->cp_edx
|= CPUID_INTC_EDX_CX8
;
1083 case X86_VENDOR_Centaur
:
1085 * workaround the NT workarounds again
1087 if (cpi
->cpi_family
== 6)
1088 cp
->cp_edx
|= CPUID_INTC_EDX_CX8
;
1090 case X86_VENDOR_Cyrix
:
1092 * We rely heavily on the probing in locore
1093 * to actually figure out what parts, if any,
1094 * of the Cyrix cpuid instruction to believe.
1097 case X86_TYPE_CYRIX_486
:
1100 case X86_TYPE_CYRIX_6x86
:
1103 case X86_TYPE_CYRIX_6x86L
:
1108 case X86_TYPE_CYRIX_6x86MX
:
1111 CPUID_INTC_EDX_MSR
|
1112 CPUID_INTC_EDX_CX8
|
1113 CPUID_INTC_EDX_PGE
|
1114 CPUID_INTC_EDX_CMOV
|
1117 case X86_TYPE_CYRIX_GXm
:
1119 CPUID_INTC_EDX_MSR
|
1120 CPUID_INTC_EDX_CX8
|
1121 CPUID_INTC_EDX_CMOV
|
1124 case X86_TYPE_CYRIX_MediaGX
:
1126 case X86_TYPE_CYRIX_MII
:
1127 case X86_TYPE_VIA_CYRIX_III
:
1130 CPUID_INTC_EDX_TSC
|
1131 CPUID_INTC_EDX_MSR
|
1132 CPUID_INTC_EDX_CX8
|
1133 CPUID_INTC_EDX_PGE
|
1134 CPUID_INTC_EDX_CMOV
|
1145 * Do not support MONITOR/MWAIT under a hypervisor
1147 mask_ecx
&= ~CPUID_INTC_ECX_MON
;
1149 * Do not support XSAVE under a hypervisor for now
1151 xsave_force_disable
= B_TRUE
;
1155 if (xsave_force_disable
) {
1156 mask_ecx
&= ~CPUID_INTC_ECX_XSAVE
;
1157 mask_ecx
&= ~CPUID_INTC_ECX_AVX
;
1161 * Now we've figured out the masks that determine
1162 * which bits we choose to believe, apply the masks
1163 * to the feature words, then map the kernel's view
1164 * of these feature words into its feature word.
1166 cp
->cp_edx
&= mask_edx
;
1167 cp
->cp_ecx
&= mask_ecx
;
1170 * apply any platform restrictions (we don't call this
1171 * immediately after __cpuid_insn here, because we need the
1172 * workarounds applied above first)
1174 platform_cpuid_mangle(cpi
->cpi_vendor
, 1, cp
);
1177 * fold in overrides from the "eeprom" mechanism
1179 cp
->cp_edx
|= cpuid_feature_edx_include
;
1180 cp
->cp_edx
&= ~cpuid_feature_edx_exclude
;
1182 cp
->cp_ecx
|= cpuid_feature_ecx_include
;
1183 cp
->cp_ecx
&= ~cpuid_feature_ecx_exclude
;
1185 if (cp
->cp_edx
& CPUID_INTC_EDX_PSE
) {
1186 add_x86_feature(featureset
, X86FSET_LARGEPAGE
);
1188 if (cp
->cp_edx
& CPUID_INTC_EDX_TSC
) {
1189 add_x86_feature(featureset
, X86FSET_TSC
);
1191 if (cp
->cp_edx
& CPUID_INTC_EDX_MSR
) {
1192 add_x86_feature(featureset
, X86FSET_MSR
);
1194 if (cp
->cp_edx
& CPUID_INTC_EDX_MTRR
) {
1195 add_x86_feature(featureset
, X86FSET_MTRR
);
1197 if (cp
->cp_edx
& CPUID_INTC_EDX_PGE
) {
1198 add_x86_feature(featureset
, X86FSET_PGE
);
1200 if (cp
->cp_edx
& CPUID_INTC_EDX_CMOV
) {
1201 add_x86_feature(featureset
, X86FSET_CMOV
);
1203 if (cp
->cp_edx
& CPUID_INTC_EDX_MMX
) {
1204 add_x86_feature(featureset
, X86FSET_MMX
);
1206 if ((cp
->cp_edx
& CPUID_INTC_EDX_MCE
) != 0 &&
1207 (cp
->cp_edx
& CPUID_INTC_EDX_MCA
) != 0) {
1208 add_x86_feature(featureset
, X86FSET_MCA
);
1210 if (cp
->cp_edx
& CPUID_INTC_EDX_PAE
) {
1211 add_x86_feature(featureset
, X86FSET_PAE
);
1213 if (cp
->cp_edx
& CPUID_INTC_EDX_CX8
) {
1214 add_x86_feature(featureset
, X86FSET_CX8
);
1216 if (cp
->cp_ecx
& CPUID_INTC_ECX_CX16
) {
1217 add_x86_feature(featureset
, X86FSET_CX16
);
1219 if (cp
->cp_edx
& CPUID_INTC_EDX_PAT
) {
1220 add_x86_feature(featureset
, X86FSET_PAT
);
1222 if (cp
->cp_edx
& CPUID_INTC_EDX_SEP
) {
1223 add_x86_feature(featureset
, X86FSET_SEP
);
1225 if (cp
->cp_edx
& CPUID_INTC_EDX_FXSR
) {
1227 * In our implementation, fxsave/fxrstor
1228 * are prerequisites before we'll even
1229 * try and do SSE things.
1231 if (cp
->cp_edx
& CPUID_INTC_EDX_SSE
) {
1232 add_x86_feature(featureset
, X86FSET_SSE
);
1234 if (cp
->cp_edx
& CPUID_INTC_EDX_SSE2
) {
1235 add_x86_feature(featureset
, X86FSET_SSE2
);
1237 if (cp
->cp_ecx
& CPUID_INTC_ECX_SSE3
) {
1238 add_x86_feature(featureset
, X86FSET_SSE3
);
1240 if (cp
->cp_ecx
& CPUID_INTC_ECX_SSSE3
) {
1241 add_x86_feature(featureset
, X86FSET_SSSE3
);
1243 if (cp
->cp_ecx
& CPUID_INTC_ECX_SSE4_1
) {
1244 add_x86_feature(featureset
, X86FSET_SSE4_1
);
1246 if (cp
->cp_ecx
& CPUID_INTC_ECX_SSE4_2
) {
1247 add_x86_feature(featureset
, X86FSET_SSE4_2
);
1249 if (cp
->cp_ecx
& CPUID_INTC_ECX_AES
) {
1250 add_x86_feature(featureset
, X86FSET_AES
);
1252 if (cp
->cp_ecx
& CPUID_INTC_ECX_PCLMULQDQ
) {
1253 add_x86_feature(featureset
, X86FSET_PCLMULQDQ
);
1256 if (cp
->cp_ecx
& CPUID_INTC_ECX_XSAVE
) {
1257 add_x86_feature(featureset
, X86FSET_XSAVE
);
1258 /* We only test AVX when there is XSAVE */
1259 if (cp
->cp_ecx
& CPUID_INTC_ECX_AVX
) {
1260 add_x86_feature(featureset
,
1265 if (cp
->cp_edx
& CPUID_INTC_EDX_DE
) {
1266 add_x86_feature(featureset
, X86FSET_DE
);
1269 if (cp
->cp_ecx
& CPUID_INTC_ECX_MON
) {
1272 * We require the CLFLUSH instruction for erratum workaround
1273 * to use MONITOR/MWAIT.
1275 if (cp
->cp_edx
& CPUID_INTC_EDX_CLFSH
) {
1276 cpi
->cpi_mwait
.support
|= MWAIT_SUPPORT
;
1277 add_x86_feature(featureset
, X86FSET_MWAIT
);
1279 extern int idle_cpu_assert_cflush_monitor
;
1282 * All processors we are aware of which have
1283 * MONITOR/MWAIT also have CLFLUSH.
1285 if (idle_cpu_assert_cflush_monitor
) {
1286 ASSERT((cp
->cp_ecx
& CPUID_INTC_ECX_MON
) &&
1287 (cp
->cp_edx
& CPUID_INTC_EDX_CLFSH
));
1293 if (cp
->cp_ecx
& CPUID_INTC_ECX_VMX
) {
1294 add_x86_feature(featureset
, X86FSET_VMX
);
1298 * Only need it first time, rest of the cpus would follow suit.
1299 * we only capture this for the bootcpu.
1301 if (cp
->cp_edx
& CPUID_INTC_EDX_CLFSH
) {
1302 add_x86_feature(featureset
, X86FSET_CLFSH
);
1303 x86_clflush_size
= (BITX(cp
->cp_ebx
, 15, 8) * 8);
1305 if (is_x86_feature(featureset
, X86FSET_PAE
))
1306 cpi
->cpi_pabits
= 36;
1309 * Hyperthreading configuration is slightly tricky on Intel
1310 * and pure clones, and even trickier on AMD.
1312 * (AMD chose to set the HTT bit on their CMP processors,
1313 * even though they're not actually hyperthreaded. Thus it
1314 * takes a bit more work to figure out what's really going
1315 * on ... see the handling of the CMP_LGCY bit below)
1317 if (cp
->cp_edx
& CPUID_INTC_EDX_HTT
) {
1318 cpi
->cpi_ncpu_per_chip
= CPI_CPU_COUNT(cpi
);
1319 if (cpi
->cpi_ncpu_per_chip
> 1)
1320 add_x86_feature(featureset
, X86FSET_HTT
);
1322 cpi
->cpi_ncpu_per_chip
= 1;
1326 * Work on the "extended" feature information, doing
1327 * some basic initialization for cpuid_pass2()
1330 switch (cpi
->cpi_vendor
) {
1331 case X86_VENDOR_Intel
:
1332 if (IS_NEW_F6(cpi
) || cpi
->cpi_family
>= 0xf)
1335 case X86_VENDOR_AMD
:
1336 if (cpi
->cpi_family
> 5 ||
1337 (cpi
->cpi_family
== 5 && cpi
->cpi_model
>= 1))
1340 case X86_VENDOR_Cyrix
:
1342 * Only these Cyrix CPUs are -known- to support
1343 * extended cpuid operations.
1345 if (x86_type
== X86_TYPE_VIA_CYRIX_III
||
1346 x86_type
== X86_TYPE_CYRIX_GXm
)
1349 case X86_VENDOR_Centaur
:
1357 cp
= &cpi
->cpi_extd
[0];
1358 cp
->cp_eax
= 0x80000000;
1359 cpi
->cpi_xmaxeax
= __cpuid_insn(cp
);
1362 if (cpi
->cpi_xmaxeax
& 0x80000000) {
1364 if (cpi
->cpi_xmaxeax
> CPI_XMAXEAX_MAX
)
1365 cpi
->cpi_xmaxeax
= CPI_XMAXEAX_MAX
;
1367 switch (cpi
->cpi_vendor
) {
1368 case X86_VENDOR_Intel
:
1369 case X86_VENDOR_AMD
:
1370 if (cpi
->cpi_xmaxeax
< 0x80000001)
1372 cp
= &cpi
->cpi_extd
[1];
1373 cp
->cp_eax
= 0x80000001;
1374 (void) __cpuid_insn(cp
);
1376 if (cpi
->cpi_vendor
== X86_VENDOR_AMD
&&
1377 cpi
->cpi_family
== 5 &&
1378 cpi
->cpi_model
== 6 &&
1379 cpi
->cpi_step
== 6) {
1381 * K6 model 6 uses bit 10 to indicate SYSC
1382 * Later models use bit 11. Fix it here.
1384 if (cp
->cp_edx
& 0x400) {
1385 cp
->cp_edx
&= ~0x400;
1386 cp
->cp_edx
|= CPUID_AMD_EDX_SYSC
;
1390 platform_cpuid_mangle(cpi
->cpi_vendor
, 0x80000001, cp
);
1393 * Compute the additions to the kernel's feature word.
1395 if (cp
->cp_edx
& CPUID_AMD_EDX_NX
) {
1396 add_x86_feature(featureset
, X86FSET_NX
);
1400 * Regardless whether or not we boot 64-bit,
1401 * we should have a way to identify whether
1402 * the CPU is capable of running 64-bit.
1404 if (cp
->cp_edx
& CPUID_AMD_EDX_LM
) {
1405 add_x86_feature(featureset
, X86FSET_64
);
1408 #if defined(__amd64)
1409 /* 1 GB large page - enable only for 64 bit kernel */
1410 if (cp
->cp_edx
& CPUID_AMD_EDX_1GPG
) {
1411 add_x86_feature(featureset
, X86FSET_1GPG
);
1415 if ((cpi
->cpi_vendor
== X86_VENDOR_AMD
) &&
1416 (cpi
->cpi_std
[1].cp_edx
& CPUID_INTC_EDX_FXSR
) &&
1417 (cp
->cp_ecx
& CPUID_AMD_ECX_SSE4A
)) {
1418 add_x86_feature(featureset
, X86FSET_SSE4A
);
1422 * If both the HTT and CMP_LGCY bits are set,
1423 * then we're not actually HyperThreaded. Read
1424 * "AMD CPUID Specification" for more details.
1426 if (cpi
->cpi_vendor
== X86_VENDOR_AMD
&&
1427 is_x86_feature(featureset
, X86FSET_HTT
) &&
1428 (cp
->cp_ecx
& CPUID_AMD_ECX_CMP_LGCY
)) {
1429 remove_x86_feature(featureset
, X86FSET_HTT
);
1430 add_x86_feature(featureset
, X86FSET_CMP
);
1432 #if defined(__amd64)
1434 * It's really tricky to support syscall/sysret in
1435 * the i386 kernel; we rely on sysenter/sysexit
1436 * instead. In the amd64 kernel, things are -way-
1439 if (cp
->cp_edx
& CPUID_AMD_EDX_SYSC
) {
1440 add_x86_feature(featureset
, X86FSET_ASYSC
);
1444 * While we're thinking about system calls, note
1445 * that AMD processors don't support sysenter
1446 * in long mode at all, so don't try to program them.
1448 if (x86_vendor
== X86_VENDOR_AMD
) {
1449 remove_x86_feature(featureset
, X86FSET_SEP
);
1452 if (cp
->cp_edx
& CPUID_AMD_EDX_TSCP
) {
1453 add_x86_feature(featureset
, X86FSET_TSCP
);
1456 if (cp
->cp_ecx
& CPUID_AMD_ECX_SVM
) {
1457 add_x86_feature(featureset
, X86FSET_SVM
);
1460 if (cp
->cp_ecx
& CPUID_AMD_ECX_TOPOEXT
) {
1461 add_x86_feature(featureset
, X86FSET_TOPOEXT
);
1469 * Get CPUID data about processor cores and hyperthreads.
1471 switch (cpi
->cpi_vendor
) {
1472 case X86_VENDOR_Intel
:
1473 if (cpi
->cpi_maxeax
>= 4) {
1474 cp
= &cpi
->cpi_std
[4];
1477 (void) __cpuid_insn(cp
);
1478 platform_cpuid_mangle(cpi
->cpi_vendor
, 4, cp
);
1481 case X86_VENDOR_AMD
:
1482 if (cpi
->cpi_xmaxeax
< 0x80000008)
1484 cp
= &cpi
->cpi_extd
[8];
1485 cp
->cp_eax
= 0x80000008;
1486 (void) __cpuid_insn(cp
);
1487 platform_cpuid_mangle(cpi
->cpi_vendor
, 0x80000008, cp
);
1490 * Virtual and physical address limits from
1491 * cpuid override previously guessed values.
1493 cpi
->cpi_pabits
= BITX(cp
->cp_eax
, 7, 0);
1494 cpi
->cpi_vabits
= BITX(cp
->cp_eax
, 15, 8);
1501 * Derive the number of cores per chip
1503 switch (cpi
->cpi_vendor
) {
1504 case X86_VENDOR_Intel
:
1505 if (cpi
->cpi_maxeax
< 4) {
1506 cpi
->cpi_ncore_per_chip
= 1;
1509 cpi
->cpi_ncore_per_chip
=
1510 BITX((cpi
)->cpi_std
[4].cp_eax
, 31, 26) + 1;
1513 case X86_VENDOR_AMD
:
1514 if (cpi
->cpi_xmaxeax
< 0x80000008) {
1515 cpi
->cpi_ncore_per_chip
= 1;
1519 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is
1520 * 1 less than the number of physical cores on
1521 * the chip. In family 0x10 this value can
1522 * be affected by "downcoring" - it reflects
1523 * 1 less than the number of cores actually
1524 * enabled on this node.
1526 cpi
->cpi_ncore_per_chip
=
1527 BITX((cpi
)->cpi_extd
[8].cp_ecx
, 7, 0) + 1;
1531 cpi
->cpi_ncore_per_chip
= 1;
1536 * Get CPUID data about TSC Invariance in Deep C-State.
1538 switch (cpi
->cpi_vendor
) {
1539 case X86_VENDOR_Intel
:
1540 if (cpi
->cpi_maxeax
>= 7) {
1541 cp
= &cpi
->cpi_extd
[7];
1542 cp
->cp_eax
= 0x80000007;
1544 (void) __cpuid_insn(cp
);
1551 cpi
->cpi_ncore_per_chip
= 1;
1555 * If more than one core, then this processor is CMP.
1557 if (cpi
->cpi_ncore_per_chip
> 1) {
1558 add_x86_feature(featureset
, X86FSET_CMP
);
1562 * If the number of cores is the same as the number
1563 * of CPUs, then we cannot have HyperThreading.
1565 if (cpi
->cpi_ncpu_per_chip
== cpi
->cpi_ncore_per_chip
) {
1566 remove_x86_feature(featureset
, X86FSET_HTT
);
1569 cpi
->cpi_apicid
= CPI_APIC_ID(cpi
);
1570 cpi
->cpi_procnodes_per_pkg
= 1;
1571 cpi
->cpi_cores_per_compunit
= 1;
1572 if (is_x86_feature(featureset
, X86FSET_HTT
) == B_FALSE
&&
1573 is_x86_feature(featureset
, X86FSET_CMP
) == B_FALSE
) {
1575 * Single-core single-threaded processors.
1577 cpi
->cpi_chipid
= -1;
1578 cpi
->cpi_clogid
= 0;
1579 cpi
->cpi_coreid
= cpu
->cpu_id
;
1580 cpi
->cpi_pkgcoreid
= 0;
1581 if (cpi
->cpi_vendor
== X86_VENDOR_AMD
)
1582 cpi
->cpi_procnodeid
= BITX(cpi
->cpi_apicid
, 3, 0);
1584 cpi
->cpi_procnodeid
= cpi
->cpi_chipid
;
1585 } else if (cpi
->cpi_ncpu_per_chip
> 1) {
1586 if (cpi
->cpi_vendor
== X86_VENDOR_Intel
)
1587 cpuid_intel_getids(cpu
, featureset
);
1588 else if (cpi
->cpi_vendor
== X86_VENDOR_AMD
)
1589 cpuid_amd_getids(cpu
);
1592 * All other processors are currently
1593 * assumed to have single cores.
1595 cpi
->cpi_coreid
= cpi
->cpi_chipid
;
1596 cpi
->cpi_pkgcoreid
= 0;
1597 cpi
->cpi_procnodeid
= cpi
->cpi_chipid
;
1598 cpi
->cpi_compunitid
= cpi
->cpi_chipid
;
1603 * Synthesize chip "revision" and socket type
1605 cpi
->cpi_chiprev
= _cpuid_chiprev(cpi
->cpi_vendor
, cpi
->cpi_family
,
1606 cpi
->cpi_model
, cpi
->cpi_step
);
1607 cpi
->cpi_chiprevstr
= _cpuid_chiprevstr(cpi
->cpi_vendor
,
1608 cpi
->cpi_family
, cpi
->cpi_model
, cpi
->cpi_step
);
1609 cpi
->cpi_socket
= _cpuid_skt(cpi
->cpi_vendor
, cpi
->cpi_family
,
1610 cpi
->cpi_model
, cpi
->cpi_step
);
1617 * Make copies of the cpuid table entries we depend on, in
1618 * part for ease of parsing now, in part so that we have only
1619 * one place to correct any of it, in part for ease of
1620 * later export to userland, and in part so we can look at
1621 * this stuff in a crash dump.
1626 cpuid_pass2(cpu_t
*cpu
)
1630 struct cpuid_regs
*cp
;
1633 struct cpuid_info
*cpi
= cpu
->cpu_m
.mcpu_cpi
;
1635 ASSERT(cpi
->cpi_pass
== 1);
1637 if (cpi
->cpi_maxeax
< 1)
1640 if ((nmax
= cpi
->cpi_maxeax
+ 1) > NMAX_CPI_STD
)
1641 nmax
= NMAX_CPI_STD
;
1643 * (We already handled n == 0 and n == 1 in pass 1)
1645 for (n
= 2, cp
= &cpi
->cpi_std
[2]; n
< nmax
; n
++, cp
++) {
1649 * CPUID function 4 expects %ecx to be initialized
1650 * with an index which indicates which cache to return
1651 * information about. The OS is expected to call function 4
1652 * with %ecx set to 0, 1, 2, ... until it returns with
1653 * EAX[4:0] set to 0, which indicates there are no more
1656 * Here, populate cpi_std[4] with the information returned by
1657 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
1658 * when dynamic memory allocation becomes available.
1660 * Note: we need to explicitly initialize %ecx here, since
1661 * function 4 may have been previously invoked.
1666 (void) __cpuid_insn(cp
);
1667 platform_cpuid_mangle(cpi
->cpi_vendor
, n
, cp
);
1671 * "the lower 8 bits of the %eax register
1672 * contain a value that identifies the number
1673 * of times the cpuid [instruction] has to be
1674 * executed to obtain a complete image of the
1675 * processor's caching systems."
1677 * How *do* they make this stuff up?
1679 cpi
->cpi_ncache
= sizeof (*cp
) *
1680 BITX(cp
->cp_eax
, 7, 0);
1681 if (cpi
->cpi_ncache
== 0)
1683 cpi
->cpi_ncache
--; /* skip count byte */
1686 * Well, for now, rather than attempt to implement
1687 * this slightly dubious algorithm, we just look
1688 * at the first 15 ..
1690 if (cpi
->cpi_ncache
> (sizeof (*cp
) - 1))
1691 cpi
->cpi_ncache
= sizeof (*cp
) - 1;
1693 dp
= cpi
->cpi_cacheinfo
;
1694 if (BITX(cp
->cp_eax
, 31, 31) == 0) {
1695 uint8_t *p
= (void *)&cp
->cp_eax
;
1696 for (i
= 1; i
< 4; i
++)
1700 if (BITX(cp
->cp_ebx
, 31, 31) == 0) {
1701 uint8_t *p
= (void *)&cp
->cp_ebx
;
1702 for (i
= 0; i
< 4; i
++)
1706 if (BITX(cp
->cp_ecx
, 31, 31) == 0) {
1707 uint8_t *p
= (void *)&cp
->cp_ecx
;
1708 for (i
= 0; i
< 4; i
++)
1712 if (BITX(cp
->cp_edx
, 31, 31) == 0) {
1713 uint8_t *p
= (void *)&cp
->cp_edx
;
1714 for (i
= 0; i
< 4; i
++)
1720 case 3: /* Processor serial number, if PSN supported */
1723 case 4: /* Deterministic cache parameters */
1726 case 5: /* Monitor/Mwait parameters */
1731 * check cpi_mwait.support which was set in cpuid_pass1
1733 if (!(cpi
->cpi_mwait
.support
& MWAIT_SUPPORT
))
1737 * Protect ourself from insane mwait line size.
1738 * Workaround for incomplete hardware emulator(s).
1740 mwait_size
= (size_t)MWAIT_SIZE_MAX(cpi
);
1741 if (mwait_size
< sizeof (uint32_t) ||
1742 !ISP2(mwait_size
)) {
1744 cmn_err(CE_NOTE
, "Cannot handle cpu %d mwait "
1745 "size %ld", cpu
->cpu_id
, (long)mwait_size
);
1750 cpi
->cpi_mwait
.mon_min
= (size_t)MWAIT_SIZE_MIN(cpi
);
1751 cpi
->cpi_mwait
.mon_max
= mwait_size
;
1752 if (MWAIT_EXTENSION(cpi
)) {
1753 cpi
->cpi_mwait
.support
|= MWAIT_EXTENSIONS
;
1754 if (MWAIT_INT_ENABLE(cpi
))
1755 cpi
->cpi_mwait
.support
|=
1756 MWAIT_ECX_INT_ENABLE
;
1765 if (cpi
->cpi_maxeax
>= 0xB && cpi
->cpi_vendor
== X86_VENDOR_Intel
) {
1766 struct cpuid_regs regs
;
1770 cp
->cp_edx
= cp
->cp_ebx
= cp
->cp_ecx
= 0;
1772 (void) __cpuid_insn(cp
);
1775 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1776 * indicates that the extended topology enumeration leaf is
1781 uint_t coreid_shift
= 0;
1782 uint_t ncpu_per_core
= 1;
1783 uint_t chipid_shift
= 0;
1784 uint_t ncpu_per_chip
= 1;
1788 for (i
= 0; i
< CPI_FNB_ECX_MAX
; i
++) {
1792 (void) __cpuid_insn(cp
);
1793 level
= CPI_CPU_LEVEL_TYPE(cp
);
1796 x2apic_id
= cp
->cp_edx
;
1797 coreid_shift
= BITX(cp
->cp_eax
, 4, 0);
1798 ncpu_per_core
= BITX(cp
->cp_ebx
, 15, 0);
1799 } else if (level
== 2) {
1800 x2apic_id
= cp
->cp_edx
;
1801 chipid_shift
= BITX(cp
->cp_eax
, 4, 0);
1802 ncpu_per_chip
= BITX(cp
->cp_ebx
, 15, 0);
1806 cpi
->cpi_apicid
= x2apic_id
;
1807 cpi
->cpi_ncpu_per_chip
= ncpu_per_chip
;
1808 cpi
->cpi_ncore_per_chip
= ncpu_per_chip
/
1810 cpi
->cpi_chipid
= x2apic_id
>> chipid_shift
;
1811 cpi
->cpi_clogid
= x2apic_id
& ((1 << chipid_shift
) - 1);
1812 cpi
->cpi_coreid
= x2apic_id
>> coreid_shift
;
1813 cpi
->cpi_pkgcoreid
= cpi
->cpi_clogid
>> coreid_shift
;
1816 /* Make cp NULL so that we don't stumble on others */
1823 if (cpi
->cpi_maxeax
>= 0xD) {
1824 struct cpuid_regs regs
;
1825 boolean_t cpuid_d_valid
= B_TRUE
;
1829 cp
->cp_edx
= cp
->cp_ebx
= cp
->cp_ecx
= 0;
1831 (void) __cpuid_insn(cp
);
1834 * Sanity checks for debug
1836 if ((cp
->cp_eax
& XFEATURE_LEGACY_FP
) == 0 ||
1837 (cp
->cp_eax
& XFEATURE_SSE
) == 0) {
1838 cpuid_d_valid
= B_FALSE
;
1841 cpi
->cpi_xsave
.xsav_hw_features_low
= cp
->cp_eax
;
1842 cpi
->cpi_xsave
.xsav_hw_features_high
= cp
->cp_edx
;
1843 cpi
->cpi_xsave
.xsav_max_size
= cp
->cp_ecx
;
1846 * If the hw supports AVX, get the size and offset in the save
1847 * area for the ymm state.
1849 if (cpi
->cpi_xsave
.xsav_hw_features_low
& XFEATURE_AVX
) {
1852 cp
->cp_edx
= cp
->cp_ebx
= 0;
1854 (void) __cpuid_insn(cp
);
1856 if (cp
->cp_ebx
!= CPUID_LEAFD_2_YMM_OFFSET
||
1857 cp
->cp_eax
!= CPUID_LEAFD_2_YMM_SIZE
) {
1858 cpuid_d_valid
= B_FALSE
;
1861 cpi
->cpi_xsave
.ymm_size
= cp
->cp_eax
;
1862 cpi
->cpi_xsave
.ymm_offset
= cp
->cp_ebx
;
1865 if (is_x86_feature(x86_featureset
, X86FSET_XSAVE
)) {
1866 xsave_state_size
= 0;
1867 } else if (cpuid_d_valid
) {
1868 xsave_state_size
= cpi
->cpi_xsave
.xsav_max_size
;
1870 /* Broken CPUID 0xD, probably in HVM */
1871 cmn_err(CE_WARN
, "cpu%d: CPUID.0xD returns invalid "
1872 "value: hw_low = %d, hw_high = %d, xsave_size = %d"
1873 ", ymm_size = %d, ymm_offset = %d\n",
1874 cpu
->cpu_id
, cpi
->cpi_xsave
.xsav_hw_features_low
,
1875 cpi
->cpi_xsave
.xsav_hw_features_high
,
1876 (int)cpi
->cpi_xsave
.xsav_max_size
,
1877 (int)cpi
->cpi_xsave
.ymm_size
,
1878 (int)cpi
->cpi_xsave
.ymm_offset
);
1880 if (xsave_state_size
!= 0) {
1882 * This must be a non-boot CPU. We cannot
1883 * continue, because boot cpu has already
1886 ASSERT(cpu
->cpu_id
!= 0);
1887 cmn_err(CE_PANIC
, "cpu%d: we have already "
1888 "enabled XSAVE on boot cpu, cannot "
1889 "continue.", cpu
->cpu_id
);
1892 * Must be from boot CPU, OK to disable XSAVE.
1894 ASSERT(cpu
->cpu_id
== 0);
1895 remove_x86_feature(x86_featureset
,
1897 remove_x86_feature(x86_featureset
, X86FSET_AVX
);
1898 CPI_FEATURES_ECX(cpi
) &= ~CPUID_INTC_ECX_XSAVE
;
1899 CPI_FEATURES_ECX(cpi
) &= ~CPUID_INTC_ECX_AVX
;
1900 xsave_force_disable
= B_TRUE
;
1906 if ((cpi
->cpi_xmaxeax
& 0x80000000) == 0)
1909 if ((nmax
= cpi
->cpi_xmaxeax
- 0x80000000 + 1) > NMAX_CPI_EXTD
)
1910 nmax
= NMAX_CPI_EXTD
;
1912 * Copy the extended properties, fixing them as we go.
1913 * (We already handled n == 0 and n == 1 in pass 1)
1915 iptr
= (void *)cpi
->cpi_brandstr
;
1916 for (n
= 2, cp
= &cpi
->cpi_extd
[2]; n
< nmax
; cp
++, n
++) {
1917 cp
->cp_eax
= 0x80000000 + n
;
1918 (void) __cpuid_insn(cp
);
1919 platform_cpuid_mangle(cpi
->cpi_vendor
, 0x80000000 + n
, cp
);
1925 * Extract the brand string
1927 *iptr
++ = cp
->cp_eax
;
1928 *iptr
++ = cp
->cp_ebx
;
1929 *iptr
++ = cp
->cp_ecx
;
1930 *iptr
++ = cp
->cp_edx
;
1933 switch (cpi
->cpi_vendor
) {
1934 case X86_VENDOR_AMD
:
1936 * The Athlon and Duron were the first
1937 * parts to report the sizes of the
1938 * TLB for large pages. Before then,
1939 * we don't trust the data.
1941 if (cpi
->cpi_family
< 6 ||
1942 (cpi
->cpi_family
== 6 &&
1943 cpi
->cpi_model
< 1))
1951 switch (cpi
->cpi_vendor
) {
1952 case X86_VENDOR_AMD
:
1954 * The Athlon and Duron were the first
1955 * AMD parts with L2 TLB's.
1956 * Before then, don't trust the data.
1958 if (cpi
->cpi_family
< 6 ||
1959 cpi
->cpi_family
== 6 &&
1961 cp
->cp_eax
= cp
->cp_ebx
= 0;
1963 * AMD Duron rev A0 reports L2
1964 * cache size incorrectly as 1K
1965 * when it is really 64K
1967 if (cpi
->cpi_family
== 6 &&
1968 cpi
->cpi_model
== 3 &&
1969 cpi
->cpi_step
== 0) {
1970 cp
->cp_ecx
&= 0xffff;
1971 cp
->cp_ecx
|= 0x400000;
1974 case X86_VENDOR_Cyrix
: /* VIA C3 */
1976 * VIA C3 processors are a bit messed
1977 * up w.r.t. encoding cache sizes in %ecx
1979 if (cpi
->cpi_family
!= 6)
1982 * model 7 and 8 were incorrectly encoded
1984 * xxx is model 8 really broken?
1986 if (cpi
->cpi_model
== 7 ||
1987 cpi
->cpi_model
== 8)
1989 BITX(cp
->cp_ecx
, 31, 24) << 16 |
1990 BITX(cp
->cp_ecx
, 23, 16) << 12 |
1991 BITX(cp
->cp_ecx
, 15, 8) << 8 |
1992 BITX(cp
->cp_ecx
, 7, 0);
1994 * model 9 stepping 1 has wrong associativity
1996 if (cpi
->cpi_model
== 9 && cpi
->cpi_step
== 1)
1997 cp
->cp_ecx
|= 8 << 12;
1999 case X86_VENDOR_Intel
:
2001 * Extended L2 Cache features function.
2002 * First appeared on Prescott.
2018 intel_cpubrand(const struct cpuid_info
*cpi
)
2022 if (!is_x86_feature(x86_featureset
, X86FSET_CPUID
) ||
2023 cpi
->cpi_maxeax
< 1 || cpi
->cpi_family
< 5)
2026 switch (cpi
->cpi_family
) {
2028 return ("Intel Pentium(r)");
2030 switch (cpi
->cpi_model
) {
2031 uint_t celeron
, xeon
;
2032 const struct cpuid_regs
*cp
;
2036 return ("Intel Pentium(r) Pro");
2039 return ("Intel Pentium(r) II");
2041 return ("Intel Celeron(r)");
2045 cp
= &cpi
->cpi_std
[2]; /* cache info */
2047 for (i
= 1; i
< 4; i
++) {
2050 tmp
= (cp
->cp_eax
>> (8 * i
)) & 0xff;
2053 if (tmp
>= 0x44 && tmp
<= 0x45)
2057 for (i
= 0; i
< 2; i
++) {
2060 tmp
= (cp
->cp_ebx
>> (8 * i
)) & 0xff;
2063 else if (tmp
>= 0x44 && tmp
<= 0x45)
2067 for (i
= 0; i
< 4; i
++) {
2070 tmp
= (cp
->cp_ecx
>> (8 * i
)) & 0xff;
2073 else if (tmp
>= 0x44 && tmp
<= 0x45)
2077 for (i
= 0; i
< 4; i
++) {
2080 tmp
= (cp
->cp_edx
>> (8 * i
)) & 0xff;
2083 else if (tmp
>= 0x44 && tmp
<= 0x45)
2088 return ("Intel Celeron(r)");
2090 return (cpi
->cpi_model
== 5 ?
2091 "Intel Pentium(r) II Xeon(tm)" :
2092 "Intel Pentium(r) III Xeon(tm)");
2093 return (cpi
->cpi_model
== 5 ?
2094 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
2095 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
2103 /* BrandID is present if the field is nonzero */
2104 if (cpi
->cpi_brandid
!= 0) {
2105 static const struct {
2109 { 0x1, "Intel(r) Celeron(r)" },
2110 { 0x2, "Intel(r) Pentium(r) III" },
2111 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" },
2112 { 0x4, "Intel(r) Pentium(r) III" },
2113 { 0x6, "Mobile Intel(r) Pentium(r) III" },
2114 { 0x7, "Mobile Intel(r) Celeron(r)" },
2115 { 0x8, "Intel(r) Pentium(r) 4" },
2116 { 0x9, "Intel(r) Pentium(r) 4" },
2117 { 0xa, "Intel(r) Celeron(r)" },
2118 { 0xb, "Intel(r) Xeon(tm)" },
2119 { 0xc, "Intel(r) Xeon(tm) MP" },
2120 { 0xe, "Mobile Intel(r) Pentium(r) 4" },
2121 { 0xf, "Mobile Intel(r) Celeron(r)" },
2122 { 0x11, "Mobile Genuine Intel(r)" },
2123 { 0x12, "Intel(r) Celeron(r) M" },
2124 { 0x13, "Mobile Intel(r) Celeron(r)" },
2125 { 0x14, "Intel(r) Celeron(r)" },
2126 { 0x15, "Mobile Genuine Intel(r)" },
2127 { 0x16, "Intel(r) Pentium(r) M" },
2128 { 0x17, "Mobile Intel(r) Celeron(r)" }
2130 uint_t btblmax
= sizeof (brand_tbl
) / sizeof (brand_tbl
[0]);
2133 sgn
= (cpi
->cpi_family
<< 8) |
2134 (cpi
->cpi_model
<< 4) | cpi
->cpi_step
;
2136 for (i
= 0; i
< btblmax
; i
++)
2137 if (brand_tbl
[i
].bt_bid
== cpi
->cpi_brandid
)
2140 if (sgn
== 0x6b1 && cpi
->cpi_brandid
== 3)
2141 return ("Intel(r) Celeron(r)");
2142 if (sgn
< 0xf13 && cpi
->cpi_brandid
== 0xb)
2143 return ("Intel(r) Xeon(tm) MP");
2144 if (sgn
< 0xf13 && cpi
->cpi_brandid
== 0xe)
2145 return ("Intel(r) Xeon(tm)");
2146 return (brand_tbl
[i
].bt_str
);
2154 amd_cpubrand(const struct cpuid_info
*cpi
)
2156 if (!is_x86_feature(x86_featureset
, X86FSET_CPUID
) ||
2157 cpi
->cpi_maxeax
< 1 || cpi
->cpi_family
< 5)
2158 return ("i486 compatible");
2160 switch (cpi
->cpi_family
) {
2162 switch (cpi
->cpi_model
) {
2169 return ("AMD-K5(r)");
2172 return ("AMD-K6(r)");
2174 return ("AMD-K6(r)-2");
2176 return ("AMD-K6(r)-III");
2178 return ("AMD (family 5)");
2181 switch (cpi
->cpi_model
) {
2183 return ("AMD-K7(tm)");
2187 return ("AMD Athlon(tm)");
2190 return ("AMD Duron(tm)");
2195 * Use the L2 cache size to distinguish
2197 return ((cpi
->cpi_extd
[6].cp_ecx
>> 16) >= 256 ?
2198 "AMD Athlon(tm)" : "AMD Duron(tm)");
2200 return ("AMD (family 6)");
2206 if (cpi
->cpi_family
== 0xf && cpi
->cpi_model
== 5 &&
2207 cpi
->cpi_brandid
!= 0) {
2208 switch (BITX(cpi
->cpi_brandid
, 7, 5)) {
2210 return ("AMD Opteron(tm) UP 1xx");
2212 return ("AMD Opteron(tm) DP 2xx");
2214 return ("AMD Opteron(tm) MP 8xx");
2216 return ("AMD Opteron(tm)");
2224 cyrix_cpubrand(struct cpuid_info
*cpi
, uint_t type
)
2226 if (!is_x86_feature(x86_featureset
, X86FSET_CPUID
) ||
2227 cpi
->cpi_maxeax
< 1 || cpi
->cpi_family
< 5 ||
2228 type
== X86_TYPE_CYRIX_486
)
2229 return ("i486 compatible");
2232 case X86_TYPE_CYRIX_6x86
:
2233 return ("Cyrix 6x86");
2234 case X86_TYPE_CYRIX_6x86L
:
2235 return ("Cyrix 6x86L");
2236 case X86_TYPE_CYRIX_6x86MX
:
2237 return ("Cyrix 6x86MX");
2238 case X86_TYPE_CYRIX_GXm
:
2239 return ("Cyrix GXm");
2240 case X86_TYPE_CYRIX_MediaGX
:
2241 return ("Cyrix MediaGX");
2242 case X86_TYPE_CYRIX_MII
:
2243 return ("Cyrix M2");
2244 case X86_TYPE_VIA_CYRIX_III
:
2245 return ("VIA Cyrix M3");
2248 * Have another wild guess ..
2250 if (cpi
->cpi_family
== 4 && cpi
->cpi_model
== 9)
2251 return ("Cyrix 5x86");
2252 else if (cpi
->cpi_family
== 5) {
2253 switch (cpi
->cpi_model
) {
2255 return ("Cyrix 6x86"); /* Cyrix M1 */
2257 return ("Cyrix MediaGX");
2261 } else if (cpi
->cpi_family
== 6) {
2262 switch (cpi
->cpi_model
) {
2264 return ("Cyrix 6x86MX"); /* Cyrix M2? */
2281 * This only gets called in the case that the CPU extended
2282 * feature brand string (0x80000002, 0x80000003, 0x80000004)
2283 * aren't available, or contain null bytes for some reason.
2286 fabricate_brandstr(struct cpuid_info
*cpi
)
2288 const char *brand
= NULL
;
2290 switch (cpi
->cpi_vendor
) {
2291 case X86_VENDOR_Intel
:
2292 brand
= intel_cpubrand(cpi
);
2294 case X86_VENDOR_AMD
:
2295 brand
= amd_cpubrand(cpi
);
2297 case X86_VENDOR_Cyrix
:
2298 brand
= cyrix_cpubrand(cpi
, x86_type
);
2300 case X86_VENDOR_NexGen
:
2301 if (cpi
->cpi_family
== 5 && cpi
->cpi_model
== 0)
2302 brand
= "NexGen Nx586";
2304 case X86_VENDOR_Centaur
:
2305 if (cpi
->cpi_family
== 5)
2306 switch (cpi
->cpi_model
) {
2308 brand
= "Centaur C6";
2311 brand
= "Centaur C2";
2314 brand
= "Centaur C3";
2320 case X86_VENDOR_Rise
:
2321 if (cpi
->cpi_family
== 5 &&
2322 (cpi
->cpi_model
== 0 || cpi
->cpi_model
== 2))
2325 case X86_VENDOR_SiS
:
2326 if (cpi
->cpi_family
== 5 && cpi
->cpi_model
== 0)
2330 if (cpi
->cpi_family
== 5 && cpi
->cpi_model
== 4)
2331 brand
= "Transmeta Crusoe TM3x00 or TM5x00";
2333 case X86_VENDOR_NSC
:
2334 case X86_VENDOR_UMC
:
2339 (void) strcpy((char *)cpi
->cpi_brandstr
, brand
);
2344 * If all else fails ...
2346 (void) snprintf(cpi
->cpi_brandstr
, sizeof (cpi
->cpi_brandstr
),
2347 "%s %d.%d.%d", cpi
->cpi_vendorstr
, cpi
->cpi_family
,
2348 cpi
->cpi_model
, cpi
->cpi_step
);
2352 * This routine is called just after kernel memory allocation
2353 * becomes available on cpu0, and as part of mp_startup() on
2356 * Fixup the brand string, and collect any information from cpuid
2357 * that requires dynamicically allocated storage to represent.
2361 cpuid_pass3(cpu_t
*cpu
)
2363 int i
, max
, shft
, level
, size
;
2364 struct cpuid_regs regs
;
2365 struct cpuid_regs
*cp
;
2366 struct cpuid_info
*cpi
= cpu
->cpu_m
.mcpu_cpi
;
2368 ASSERT(cpi
->cpi_pass
== 2);
2371 * Function 4: Deterministic cache parameters
2373 * Take this opportunity to detect the number of threads
2374 * sharing the last level cache, and construct a corresponding
2375 * cache id. The respective cpuid_info members are initialized
2376 * to the default case of "no last level cache sharing".
2378 cpi
->cpi_ncpu_shr_last_cache
= 1;
2379 cpi
->cpi_last_lvl_cacheid
= cpu
->cpu_id
;
2381 if (cpi
->cpi_maxeax
>= 4 && cpi
->cpi_vendor
== X86_VENDOR_Intel
) {
2384 * Find the # of elements (size) returned by fn 4, and along
2385 * the way detect last level cache sharing details.
2387 bzero(®s
, sizeof (regs
));
2389 for (i
= 0, max
= 0; i
< CPI_FN4_ECX_MAX
; i
++) {
2393 (void) __cpuid_insn(cp
);
2395 if (CPI_CACHE_TYPE(cp
) == 0)
2397 level
= CPI_CACHE_LVL(cp
);
2400 cpi
->cpi_ncpu_shr_last_cache
=
2401 CPI_NTHR_SHR_CACHE(cp
) + 1;
2404 cpi
->cpi_std_4_size
= size
= i
;
2407 * Allocate the cpi_std_4 array. The first element
2408 * references the regs for fn 4, %ecx == 0, which
2409 * cpuid_pass2() stashed in cpi->cpi_std[4].
2413 kmem_alloc(size
* sizeof (cp
), KM_SLEEP
);
2414 cpi
->cpi_std_4
[0] = &cpi
->cpi_std
[4];
2417 * Allocate storage to hold the additional regs
2418 * for function 4, %ecx == 1 .. cpi_std_4_size.
2420 * The regs for fn 4, %ecx == 0 has already
2421 * been allocated as indicated above.
2423 for (i
= 1; i
< size
; i
++) {
2424 cp
= cpi
->cpi_std_4
[i
] =
2425 kmem_zalloc(sizeof (regs
), KM_SLEEP
);
2429 (void) __cpuid_insn(cp
);
2433 * Determine the number of bits needed to represent
2434 * the number of CPUs sharing the last level cache.
2436 * Shift off that number of bits from the APIC id to
2437 * derive the cache id.
2440 for (i
= 1; i
< cpi
->cpi_ncpu_shr_last_cache
; i
<<= 1)
2442 cpi
->cpi_last_lvl_cacheid
= cpi
->cpi_apicid
>> shft
;
2446 * Now fixup the brand string
2448 if ((cpi
->cpi_xmaxeax
& 0x80000000) == 0) {
2449 fabricate_brandstr(cpi
);
2453 * If we successfully extracted a brand string from the cpuid
2454 * instruction, clean it up by removing leading spaces and
2457 if (cpi
->cpi_brandstr
[0]) {
2458 size_t maxlen
= sizeof (cpi
->cpi_brandstr
);
2461 dst
= src
= (char *)cpi
->cpi_brandstr
;
2462 src
[maxlen
- 1] = '\0';
2464 * strip leading spaces
2469 * Remove any 'Genuine' or "Authentic" prefixes
2471 if (strncmp(src
, "Genuine ", 8) == 0)
2473 if (strncmp(src
, "Authentic ", 10) == 0)
2477 * Now do an in-place copy.
2478 * Map (R) to (r) and (TM) to (tm).
2479 * The era of teletypes is long gone, and there's
2480 * -really- no need to shout.
2482 while (*src
!= '\0') {
2483 if (src
[0] == '(') {
2484 if (strncmp(src
+ 1, "R)", 2) == 0) {
2485 (void) strncpy(dst
, "(r)", 3);
2490 if (strncmp(src
+ 1, "TM)", 3) == 0) {
2491 (void) strncpy(dst
, "(tm)", 4);
2502 * Finally, remove any trailing spaces
2504 while (--dst
> cpi
->cpi_brandstr
)
2510 fabricate_brandstr(cpi
);
2516 * This routine is called out of bind_hwcap() much later in the life
2517 * of the kernel (post_startup()). The job of this routine is to resolve
2518 * the hardware feature support and kernel support for those features into
2519 * what we're actually going to tell applications via the aux vector.
2522 cpuid_pass4(cpu_t
*cpu
)
2524 struct cpuid_info
*cpi
;
2525 uint_t hwcap_flags
= 0;
2529 cpi
= cpu
->cpu_m
.mcpu_cpi
;
2531 ASSERT(cpi
->cpi_pass
== 3);
2533 if (cpi
->cpi_maxeax
>= 1) {
2534 uint32_t *edx
= &cpi
->cpi_support
[STD_EDX_FEATURES
];
2535 uint32_t *ecx
= &cpi
->cpi_support
[STD_ECX_FEATURES
];
2537 *edx
= CPI_FEATURES_EDX(cpi
);
2538 *ecx
= CPI_FEATURES_ECX(cpi
);
2541 * [these require explicit kernel support]
2543 if (!is_x86_feature(x86_featureset
, X86FSET_SEP
))
2544 *edx
&= ~CPUID_INTC_EDX_SEP
;
2546 if (!is_x86_feature(x86_featureset
, X86FSET_SSE
))
2547 *edx
&= ~(CPUID_INTC_EDX_FXSR
|CPUID_INTC_EDX_SSE
);
2548 if (!is_x86_feature(x86_featureset
, X86FSET_SSE2
))
2549 *edx
&= ~CPUID_INTC_EDX_SSE2
;
2551 if (!is_x86_feature(x86_featureset
, X86FSET_HTT
))
2552 *edx
&= ~CPUID_INTC_EDX_HTT
;
2554 if (!is_x86_feature(x86_featureset
, X86FSET_SSE3
))
2555 *ecx
&= ~CPUID_INTC_ECX_SSE3
;
2557 if (!is_x86_feature(x86_featureset
, X86FSET_SSSE3
))
2558 *ecx
&= ~CPUID_INTC_ECX_SSSE3
;
2559 if (!is_x86_feature(x86_featureset
, X86FSET_SSE4_1
))
2560 *ecx
&= ~CPUID_INTC_ECX_SSE4_1
;
2561 if (!is_x86_feature(x86_featureset
, X86FSET_SSE4_2
))
2562 *ecx
&= ~CPUID_INTC_ECX_SSE4_2
;
2563 if (!is_x86_feature(x86_featureset
, X86FSET_AES
))
2564 *ecx
&= ~CPUID_INTC_ECX_AES
;
2565 if (!is_x86_feature(x86_featureset
, X86FSET_PCLMULQDQ
))
2566 *ecx
&= ~CPUID_INTC_ECX_PCLMULQDQ
;
2567 if (!is_x86_feature(x86_featureset
, X86FSET_XSAVE
))
2568 *ecx
&= ~(CPUID_INTC_ECX_XSAVE
|
2569 CPUID_INTC_ECX_OSXSAVE
);
2570 if (!is_x86_feature(x86_featureset
, X86FSET_AVX
))
2571 *ecx
&= ~CPUID_INTC_ECX_AVX
;
2574 * [no explicit support required beyond x87 fp context]
2577 *edx
&= ~(CPUID_INTC_EDX_FPU
| CPUID_INTC_EDX_MMX
);
2580 * Now map the supported feature vector to things that we
2581 * think userland will care about.
2583 if (*edx
& CPUID_INTC_EDX_SEP
)
2584 hwcap_flags
|= AV_386_SEP
;
2585 if (*edx
& CPUID_INTC_EDX_SSE
)
2586 hwcap_flags
|= AV_386_FXSR
| AV_386_SSE
;
2587 if (*edx
& CPUID_INTC_EDX_SSE2
)
2588 hwcap_flags
|= AV_386_SSE2
;
2589 if (*ecx
& CPUID_INTC_ECX_SSE3
)
2590 hwcap_flags
|= AV_386_SSE3
;
2591 if (*ecx
& CPUID_INTC_ECX_SSSE3
)
2592 hwcap_flags
|= AV_386_SSSE3
;
2593 if (*ecx
& CPUID_INTC_ECX_SSE4_1
)
2594 hwcap_flags
|= AV_386_SSE4_1
;
2595 if (*ecx
& CPUID_INTC_ECX_SSE4_2
)
2596 hwcap_flags
|= AV_386_SSE4_2
;
2597 if (*ecx
& CPUID_INTC_ECX_MOVBE
)
2598 hwcap_flags
|= AV_386_MOVBE
;
2599 if (*ecx
& CPUID_INTC_ECX_AES
)
2600 hwcap_flags
|= AV_386_AES
;
2601 if (*ecx
& CPUID_INTC_ECX_PCLMULQDQ
)
2602 hwcap_flags
|= AV_386_PCLMULQDQ
;
2603 if ((*ecx
& CPUID_INTC_ECX_XSAVE
) &&
2604 (*ecx
& CPUID_INTC_ECX_OSXSAVE
)) {
2605 hwcap_flags
|= AV_386_XSAVE
;
2607 if (*ecx
& CPUID_INTC_ECX_AVX
)
2608 hwcap_flags
|= AV_386_AVX
;
2610 if (*ecx
& CPUID_INTC_ECX_VMX
)
2611 hwcap_flags
|= AV_386_VMX
;
2612 if (*ecx
& CPUID_INTC_ECX_POPCNT
)
2613 hwcap_flags
|= AV_386_POPCNT
;
2614 if (*edx
& CPUID_INTC_EDX_FPU
)
2615 hwcap_flags
|= AV_386_FPU
;
2616 if (*edx
& CPUID_INTC_EDX_MMX
)
2617 hwcap_flags
|= AV_386_MMX
;
2619 if (*edx
& CPUID_INTC_EDX_TSC
)
2620 hwcap_flags
|= AV_386_TSC
;
2621 if (*edx
& CPUID_INTC_EDX_CX8
)
2622 hwcap_flags
|= AV_386_CX8
;
2623 if (*edx
& CPUID_INTC_EDX_CMOV
)
2624 hwcap_flags
|= AV_386_CMOV
;
2625 if (*ecx
& CPUID_INTC_ECX_CX16
)
2626 hwcap_flags
|= AV_386_CX16
;
2629 if (cpi
->cpi_xmaxeax
< 0x80000001)
2632 switch (cpi
->cpi_vendor
) {
2633 struct cpuid_regs cp
;
2634 uint32_t *edx
, *ecx
;
2636 case X86_VENDOR_Intel
:
2638 * Seems like Intel duplicated what we necessary
2639 * here to make the initial crop of 64-bit OS's work.
2640 * Hopefully, those are the only "extended" bits
2645 case X86_VENDOR_AMD
:
2646 edx
= &cpi
->cpi_support
[AMD_EDX_FEATURES
];
2647 ecx
= &cpi
->cpi_support
[AMD_ECX_FEATURES
];
2649 *edx
= CPI_FEATURES_XTD_EDX(cpi
);
2650 *ecx
= CPI_FEATURES_XTD_ECX(cpi
);
2653 * [these features require explicit kernel support]
2655 switch (cpi
->cpi_vendor
) {
2656 case X86_VENDOR_Intel
:
2657 if (!is_x86_feature(x86_featureset
, X86FSET_TSCP
))
2658 *edx
&= ~CPUID_AMD_EDX_TSCP
;
2661 case X86_VENDOR_AMD
:
2662 if (!is_x86_feature(x86_featureset
, X86FSET_TSCP
))
2663 *edx
&= ~CPUID_AMD_EDX_TSCP
;
2664 if (!is_x86_feature(x86_featureset
, X86FSET_SSE4A
))
2665 *ecx
&= ~CPUID_AMD_ECX_SSE4A
;
2673 * [no explicit support required beyond
2674 * x87 fp context and exception handlers]
2677 *edx
&= ~(CPUID_AMD_EDX_MMXamd
|
2678 CPUID_AMD_EDX_3DNow
| CPUID_AMD_EDX_3DNowx
);
2680 if (!is_x86_feature(x86_featureset
, X86FSET_NX
))
2681 *edx
&= ~CPUID_AMD_EDX_NX
;
2682 #if !defined(__amd64)
2683 *edx
&= ~CPUID_AMD_EDX_LM
;
2686 * Now map the supported feature vector to
2687 * things that we think userland will care about.
2689 #if defined(__amd64)
2690 if (*edx
& CPUID_AMD_EDX_SYSC
)
2691 hwcap_flags
|= AV_386_AMD_SYSC
;
2693 if (*edx
& CPUID_AMD_EDX_MMXamd
)
2694 hwcap_flags
|= AV_386_AMD_MMX
;
2695 if (*edx
& CPUID_AMD_EDX_3DNow
)
2696 hwcap_flags
|= AV_386_AMD_3DNow
;
2697 if (*edx
& CPUID_AMD_EDX_3DNowx
)
2698 hwcap_flags
|= AV_386_AMD_3DNowx
;
2699 if (*ecx
& CPUID_AMD_ECX_SVM
)
2700 hwcap_flags
|= AV_386_AMD_SVM
;
2702 switch (cpi
->cpi_vendor
) {
2703 case X86_VENDOR_AMD
:
2704 if (*edx
& CPUID_AMD_EDX_TSCP
)
2705 hwcap_flags
|= AV_386_TSCP
;
2706 if (*ecx
& CPUID_AMD_ECX_AHF64
)
2707 hwcap_flags
|= AV_386_AHF
;
2708 if (*ecx
& CPUID_AMD_ECX_SSE4A
)
2709 hwcap_flags
|= AV_386_AMD_SSE4A
;
2710 if (*ecx
& CPUID_AMD_ECX_LZCNT
)
2711 hwcap_flags
|= AV_386_AMD_LZCNT
;
2714 case X86_VENDOR_Intel
:
2715 if (*edx
& CPUID_AMD_EDX_TSCP
)
2716 hwcap_flags
|= AV_386_TSCP
;
2719 * Intel uses a different bit in the same word.
2721 if (*ecx
& CPUID_INTC_ECX_AHF64
)
2722 hwcap_flags
|= AV_386_AHF
;
2731 cp
.cp_eax
= 0x80860001;
2732 (void) __cpuid_insn(&cp
);
2733 cpi
->cpi_support
[TM_EDX_FEATURES
] = cp
.cp_edx
;
2742 return (hwcap_flags
);
2747 * Simulate the cpuid instruction using the data we previously
2748 * captured about this CPU. We try our best to return the truth
2749 * about the hardware, independently of kernel support.
2752 cpuid_insn(cpu_t
*cpu
, struct cpuid_regs
*cp
)
2754 struct cpuid_info
*cpi
;
2755 struct cpuid_regs
*xcp
;
2759 cpi
= cpu
->cpu_m
.mcpu_cpi
;
2761 ASSERT(cpuid_checkpass(cpu
, 3));
2764 * CPUID data is cached in two separate places: cpi_std for standard
2765 * CPUID functions, and cpi_extd for extended CPUID functions.
2767 if (cp
->cp_eax
<= cpi
->cpi_maxeax
&& cp
->cp_eax
< NMAX_CPI_STD
)
2768 xcp
= &cpi
->cpi_std
[cp
->cp_eax
];
2769 else if (cp
->cp_eax
>= 0x80000000 && cp
->cp_eax
<= cpi
->cpi_xmaxeax
&&
2770 cp
->cp_eax
< 0x80000000 + NMAX_CPI_EXTD
)
2771 xcp
= &cpi
->cpi_extd
[cp
->cp_eax
- 0x80000000];
2774 * The caller is asking for data from an input parameter which
2775 * the kernel has not cached. In this case we go fetch from
2776 * the hardware and return the data directly to the user.
2778 return (__cpuid_insn(cp
));
2780 cp
->cp_eax
= xcp
->cp_eax
;
2781 cp
->cp_ebx
= xcp
->cp_ebx
;
2782 cp
->cp_ecx
= xcp
->cp_ecx
;
2783 cp
->cp_edx
= xcp
->cp_edx
;
2784 return (cp
->cp_eax
);
2788 cpuid_checkpass(cpu_t
*cpu
, int pass
)
2790 return (cpu
!= NULL
&& cpu
->cpu_m
.mcpu_cpi
!= NULL
&&
2791 cpu
->cpu_m
.mcpu_cpi
->cpi_pass
>= pass
);
2795 cpuid_getbrandstr(cpu_t
*cpu
, char *s
, size_t n
)
2797 ASSERT(cpuid_checkpass(cpu
, 3));
2799 return (snprintf(s
, n
, "%s", cpu
->cpu_m
.mcpu_cpi
->cpi_brandstr
));
2803 cpuid_is_cmt(cpu_t
*cpu
)
2808 ASSERT(cpuid_checkpass(cpu
, 1));
2810 return (cpu
->cpu_m
.mcpu_cpi
->cpi_chipid
>= 0);
2814 * AMD and Intel both implement the 64-bit variant of the syscall
2815 * instruction (syscallq), so if there's -any- support for syscall,
2816 * cpuid currently says "yes, we support this".
2818 * However, Intel decided to -not- implement the 32-bit variant of the
2819 * syscall instruction, so we provide a predicate to allow our caller
2820 * to test that subtlety here.
2822 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
2823 * even in the case where the hardware would in fact support it.
2827 cpuid_syscall32_insn(cpu_t
*cpu
)
2829 ASSERT(cpuid_checkpass((cpu
== NULL
? CPU
: cpu
), 1));
2837 struct cpuid_info
*cpi
= cpu
->cpu_m
.mcpu_cpi
;
2839 if (cpi
->cpi_vendor
== X86_VENDOR_AMD
&&
2840 cpi
->cpi_xmaxeax
>= 0x80000001 &&
2841 (CPI_FEATURES_XTD_EDX(cpi
) & CPUID_AMD_EDX_SYSC
))
2849 cpuid_getidstr(cpu_t
*cpu
, char *s
, size_t n
)
2851 struct cpuid_info
*cpi
= cpu
->cpu_m
.mcpu_cpi
;
2853 static const char fmt
[] =
2854 "x86 (%s %X family %d model %d step %d clock %d MHz)";
2855 static const char fmt_ht
[] =
2856 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
2858 ASSERT(cpuid_checkpass(cpu
, 1));
2860 if (cpuid_is_cmt(cpu
))
2861 return (snprintf(s
, n
, fmt_ht
, cpi
->cpi_chipid
,
2862 cpi
->cpi_vendorstr
, cpi
->cpi_std
[1].cp_eax
,
2863 cpi
->cpi_family
, cpi
->cpi_model
,
2864 cpi
->cpi_step
, cpu
->cpu_type_info
.pi_clock
));
2865 return (snprintf(s
, n
, fmt
,
2866 cpi
->cpi_vendorstr
, cpi
->cpi_std
[1].cp_eax
,
2867 cpi
->cpi_family
, cpi
->cpi_model
,
2868 cpi
->cpi_step
, cpu
->cpu_type_info
.pi_clock
));
2872 cpuid_getvendorstr(cpu_t
*cpu
)
2874 ASSERT(cpuid_checkpass(cpu
, 1));
2875 return ((const char *)cpu
->cpu_m
.mcpu_cpi
->cpi_vendorstr
);
2879 cpuid_getvendor(cpu_t
*cpu
)
2881 ASSERT(cpuid_checkpass(cpu
, 1));
2882 return (cpu
->cpu_m
.mcpu_cpi
->cpi_vendor
);
2886 cpuid_getfamily(cpu_t
*cpu
)
2888 ASSERT(cpuid_checkpass(cpu
, 1));
2889 return (cpu
->cpu_m
.mcpu_cpi
->cpi_family
);
2893 cpuid_getmodel(cpu_t
*cpu
)
2895 ASSERT(cpuid_checkpass(cpu
, 1));
2896 return (cpu
->cpu_m
.mcpu_cpi
->cpi_model
);
2900 cpuid_get_ncpu_per_chip(cpu_t
*cpu
)
2902 ASSERT(cpuid_checkpass(cpu
, 1));
2903 return (cpu
->cpu_m
.mcpu_cpi
->cpi_ncpu_per_chip
);
2907 cpuid_get_ncore_per_chip(cpu_t
*cpu
)
2909 ASSERT(cpuid_checkpass(cpu
, 1));
2910 return (cpu
->cpu_m
.mcpu_cpi
->cpi_ncore_per_chip
);
2914 cpuid_get_ncpu_sharing_last_cache(cpu_t
*cpu
)
2916 ASSERT(cpuid_checkpass(cpu
, 2));
2917 return (cpu
->cpu_m
.mcpu_cpi
->cpi_ncpu_shr_last_cache
);
2921 cpuid_get_last_lvl_cacheid(cpu_t
*cpu
)
2923 ASSERT(cpuid_checkpass(cpu
, 2));
2924 return (cpu
->cpu_m
.mcpu_cpi
->cpi_last_lvl_cacheid
);
2928 cpuid_getstep(cpu_t
*cpu
)
2930 ASSERT(cpuid_checkpass(cpu
, 1));
2931 return (cpu
->cpu_m
.mcpu_cpi
->cpi_step
);
2935 cpuid_getsig(struct cpu
*cpu
)
2937 ASSERT(cpuid_checkpass(cpu
, 1));
2938 return (cpu
->cpu_m
.mcpu_cpi
->cpi_std
[1].cp_eax
);
2942 cpuid_getchiprev(struct cpu
*cpu
)
2944 ASSERT(cpuid_checkpass(cpu
, 1));
2945 return (cpu
->cpu_m
.mcpu_cpi
->cpi_chiprev
);
2949 cpuid_getchiprevstr(struct cpu
*cpu
)
2951 ASSERT(cpuid_checkpass(cpu
, 1));
2952 return (cpu
->cpu_m
.mcpu_cpi
->cpi_chiprevstr
);
2956 cpuid_getsockettype(struct cpu
*cpu
)
2958 ASSERT(cpuid_checkpass(cpu
, 1));
2959 return (cpu
->cpu_m
.mcpu_cpi
->cpi_socket
);
2963 cpuid_getsocketstr(cpu_t
*cpu
)
2965 static const char *socketstr
= NULL
;
2966 struct cpuid_info
*cpi
;
2968 ASSERT(cpuid_checkpass(cpu
, 1));
2969 cpi
= cpu
->cpu_m
.mcpu_cpi
;
2971 /* Assume that socket types are the same across the system */
2972 if (socketstr
== NULL
)
2973 socketstr
= _cpuid_sktstr(cpi
->cpi_vendor
, cpi
->cpi_family
,
2974 cpi
->cpi_model
, cpi
->cpi_step
);
2981 cpuid_get_chipid(cpu_t
*cpu
)
2983 ASSERT(cpuid_checkpass(cpu
, 1));
2985 if (cpuid_is_cmt(cpu
))
2986 return (cpu
->cpu_m
.mcpu_cpi
->cpi_chipid
);
2987 return (cpu
->cpu_id
);
2991 cpuid_get_coreid(cpu_t
*cpu
)
2993 ASSERT(cpuid_checkpass(cpu
, 1));
2994 return (cpu
->cpu_m
.mcpu_cpi
->cpi_coreid
);
2998 cpuid_get_pkgcoreid(cpu_t
*cpu
)
3000 ASSERT(cpuid_checkpass(cpu
, 1));
3001 return (cpu
->cpu_m
.mcpu_cpi
->cpi_pkgcoreid
);
3005 cpuid_get_clogid(cpu_t
*cpu
)
3007 ASSERT(cpuid_checkpass(cpu
, 1));
3008 return (cpu
->cpu_m
.mcpu_cpi
->cpi_clogid
);
3012 cpuid_get_cacheid(cpu_t
*cpu
)
3014 ASSERT(cpuid_checkpass(cpu
, 1));
3015 return (cpu
->cpu_m
.mcpu_cpi
->cpi_last_lvl_cacheid
);
3019 cpuid_get_procnodeid(cpu_t
*cpu
)
3021 ASSERT(cpuid_checkpass(cpu
, 1));
3022 return (cpu
->cpu_m
.mcpu_cpi
->cpi_procnodeid
);
3026 cpuid_get_procnodes_per_pkg(cpu_t
*cpu
)
3028 ASSERT(cpuid_checkpass(cpu
, 1));
3029 return (cpu
->cpu_m
.mcpu_cpi
->cpi_procnodes_per_pkg
);
3033 cpuid_get_compunitid(cpu_t
*cpu
)
3035 ASSERT(cpuid_checkpass(cpu
, 1));
3036 return (cpu
->cpu_m
.mcpu_cpi
->cpi_compunitid
);
3040 cpuid_get_cores_per_compunit(cpu_t
*cpu
)
3042 ASSERT(cpuid_checkpass(cpu
, 1));
3043 return (cpu
->cpu_m
.mcpu_cpi
->cpi_cores_per_compunit
);
3048 cpuid_have_cr8access(cpu_t
*cpu
)
3050 #if defined(__amd64)
3053 struct cpuid_info
*cpi
;
3055 ASSERT(cpu
!= NULL
);
3056 cpi
= cpu
->cpu_m
.mcpu_cpi
;
3057 if (cpi
->cpi_vendor
== X86_VENDOR_AMD
&& cpi
->cpi_maxeax
>= 1 &&
3058 (CPI_FEATURES_XTD_ECX(cpi
) & CPUID_AMD_ECX_CR8D
) != 0)
3065 cpuid_get_apicid(cpu_t
*cpu
)
3067 ASSERT(cpuid_checkpass(cpu
, 1));
3068 if (cpu
->cpu_m
.mcpu_cpi
->cpi_maxeax
< 1) {
3069 return (UINT32_MAX
);
3071 return (cpu
->cpu_m
.mcpu_cpi
->cpi_apicid
);
3076 cpuid_get_addrsize(cpu_t
*cpu
, uint_t
*pabits
, uint_t
*vabits
)
3078 struct cpuid_info
*cpi
;
3082 cpi
= cpu
->cpu_m
.mcpu_cpi
;
3084 ASSERT(cpuid_checkpass(cpu
, 1));
3087 *pabits
= cpi
->cpi_pabits
;
3089 *vabits
= cpi
->cpi_vabits
;
3093 * Returns the number of data TLB entries for a corresponding
3094 * pagesize. If it can't be computed, or isn't known, the
3095 * routine returns zero. If you ask about an architecturally
3096 * impossible pagesize, the routine will panic (so that the
3097 * hat implementor knows that things are inconsistent.)
3100 cpuid_get_dtlb_nent(cpu_t
*cpu
, size_t pagesize
)
3102 struct cpuid_info
*cpi
;
3103 uint_t dtlb_nent
= 0;
3107 cpi
= cpu
->cpu_m
.mcpu_cpi
;
3109 ASSERT(cpuid_checkpass(cpu
, 1));
3112 * Check the L2 TLB info
3114 if (cpi
->cpi_xmaxeax
>= 0x80000006) {
3115 struct cpuid_regs
*cp
= &cpi
->cpi_extd
[6];
3121 * All zero in the top 16 bits of the register
3122 * indicates a unified TLB. Size is in low 16 bits.
3124 if ((cp
->cp_ebx
& 0xffff0000) == 0)
3125 dtlb_nent
= cp
->cp_ebx
& 0x0000ffff;
3127 dtlb_nent
= BITX(cp
->cp_ebx
, 27, 16);
3130 case 2 * 1024 * 1024:
3131 if ((cp
->cp_eax
& 0xffff0000) == 0)
3132 dtlb_nent
= cp
->cp_eax
& 0x0000ffff;
3134 dtlb_nent
= BITX(cp
->cp_eax
, 27, 16);
3138 panic("unknown L2 pagesize");
3147 * No L2 TLB support for this size, try L1.
3149 if (cpi
->cpi_xmaxeax
>= 0x80000005) {
3150 struct cpuid_regs
*cp
= &cpi
->cpi_extd
[5];
3154 dtlb_nent
= BITX(cp
->cp_ebx
, 23, 16);
3156 case 2 * 1024 * 1024:
3157 dtlb_nent
= BITX(cp
->cp_eax
, 23, 16);
3160 panic("unknown L1 d-TLB pagesize");
3169 * Return 0 if the erratum is not present or not applicable, positive
3170 * if it is, and negative if the status of the erratum is unknown.
3172 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
3173 * Processors" #25759, Rev 3.57, August 2005
3176 cpuid_opteron_erratum(cpu_t
*cpu
, uint_t erratum
)
3178 struct cpuid_info
*cpi
= cpu
->cpu_m
.mcpu_cpi
;
3182 * Bail out if this CPU isn't an AMD CPU, or if it's
3183 * a legacy (32-bit) AMD CPU.
3185 if (cpi
->cpi_vendor
!= X86_VENDOR_AMD
||
3186 cpi
->cpi_family
== 4 || cpi
->cpi_family
== 5 ||
3187 cpi
->cpi_family
== 6)
3191 eax
= cpi
->cpi_std
[1].cp_eax
;
3193 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50)
3194 #define SH_B3(eax) (eax == 0xf51)
3195 #define B(eax) (SH_B0(eax) || SH_B3(eax))
3197 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58)
3199 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
3200 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
3201 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2)
3202 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
3204 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
3205 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0)
3206 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0)
3207 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
3209 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
3210 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */
3211 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0)
3212 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71)
3213 #define BH_E4(eax) (eax == 0x20fb1)
3214 #define SH_E5(eax) (eax == 0x20f42)
3215 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2)
3216 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32)
3217 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
3218 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
3219 DH_E6(eax) || JH_E6(eax))
3221 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
3222 #define DR_B0(eax) (eax == 0x100f20)
3223 #define DR_B1(eax) (eax == 0x100f21)
3224 #define DR_BA(eax) (eax == 0x100f2a)
3225 #define DR_B2(eax) (eax == 0x100f22)
3226 #define DR_B3(eax) (eax == 0x100f23)
3227 #define RB_C0(eax) (eax == 0x100f40)
3231 return (cpi
->cpi_family
< 0x10);
3232 case 51: /* what does the asterisk mean? */
3233 return (B(eax
) || SH_C0(eax
) || CG(eax
));
3237 return (cpi
->cpi_family
<= 0x11);
3241 return (cpi
->cpi_family
<= 0x11);
3254 return (SH_B0(eax
));
3258 return (cpi
->cpi_family
< 0x10);
3262 return (cpi
->cpi_family
<= 0x11);
3264 return (B(eax
) || SH_C0(eax
));
3266 return (B(eax
) || SH_C0(eax
) || CG(eax
) || D0(eax
) || EX(eax
));
3272 return (B(eax
) || SH_C0(eax
) || CG(eax
));
3274 return (cpi
->cpi_family
< 0x10);
3276 return (SH_C0(eax
) || CG(eax
));
3278 #if !defined(__amd64)
3281 return (B(eax
) || SH_C0(eax
));
3284 return (cpi
->cpi_family
< 0x10);
3286 return (B(eax
) || SH_C0(eax
) || CG(eax
));
3289 return (B(eax
) || SH_C0(eax
));
3291 return (SH_C0(eax
));
3293 return (B(eax
) || SH_C0(eax
) || CG(eax
));
3295 #if !defined(__amd64)
3298 return (B(eax
) || SH_C0(eax
));
3301 return (B(eax
) || SH_C0(eax
) || CG(eax
));
3304 return (SH_C0(eax
) || CG(eax
));
3306 return (B(eax
) || SH_C0(eax
) || CG(eax
) || D0(eax
));
3308 return (B(eax
) || SH_C0(eax
));
3311 return (B(eax
) || SH_C0(eax
) || CG(eax
) || D0(eax
));
3313 return (SH_C0(eax
) || CG(eax
) || D0(eax
));
3317 return (B(eax
) || SH_C0(eax
) || CG(eax
) || D0(eax
));
3319 return (DH_CG(eax
));
3321 return (SH_C0(eax
) || CG(eax
) || D0(eax
));
3323 return (D0(eax
) || EX(eax
));
3327 return (B(eax
) || SH_C0(eax
) || CG(eax
) || D0(eax
) || EX(eax
));
3329 return (eax
== 0x20fc0);
3331 return (SH_E0(eax
) || JH_E1(eax
) || DH_E3(eax
));
3333 return (SH_E0(eax
) || JH_E1(eax
));
3335 return (SH_E0(eax
) || JH_E1(eax
) || DH_E3(eax
));
3337 return (B(eax
) || SH_C0(eax
) || CG(eax
) || D0(eax
));
3339 return (SH_E0(eax
) || JH_E1(eax
) || SH_E4(eax
) || BH_E4(eax
) ||
3342 return (B(eax
) || SH_C0(eax
) || CG(eax
) || D0(eax
) || EX(eax
));
3344 return (cpi
->cpi_family
< 0x10 || cpi
->cpi_family
== 0x11);
3346 return (JH_E1(eax
) || BH_E4(eax
) || JH_E6(eax
));
3348 return (cpi
->cpi_family
< 0x10);
3351 * Test for AdvPowerMgmtInfo.TscPStateInvariant
3352 * if this is a K8 family or newer processor
3354 if (CPI_FAMILY(cpi
) == 0xf) {
3355 struct cpuid_regs regs
;
3356 regs
.cp_eax
= 0x80000007;
3357 (void) __cpuid_insn(®s
);
3358 return (!(regs
.cp_edx
& 0x100));
3362 return (((((eax
>> 12) & 0xff00) + (eax
& 0xf00)) |
3363 (((eax
>> 4) & 0xf) | ((eax
>> 12) & 0xf0))) < 0xf40);
3367 * check for processors (pre-Shanghai) that do not provide
3368 * optimal management of 1gb ptes in its tlb.
3370 return (cpi
->cpi_family
== 0x10 && cpi
->cpi_model
< 4);
3373 return (DR_AX(eax
) || DR_B0(eax
) || DR_B1(eax
) || DR_BA(eax
) ||
3374 DR_B2(eax
) || RB_C0(eax
));
3377 #if defined(__amd64)
3378 return (cpi
->cpi_family
== 0x10 || cpi
->cpi_family
== 0x12);
3390 * Determine if specified erratum is present via OSVW (OS Visible Workaround).
3391 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
3394 osvw_opteron_erratum(cpu_t
*cpu
, uint_t erratum
)
3396 struct cpuid_info
*cpi
;
3398 static int osvwfeature
= -1;
3399 uint64_t osvwlength
;
3402 cpi
= cpu
->cpu_m
.mcpu_cpi
;
3404 /* confirm OSVW supported */
3405 if (osvwfeature
== -1) {
3406 osvwfeature
= cpi
->cpi_extd
[1].cp_ecx
& CPUID_AMD_ECX_OSVW
;
3408 /* assert that osvw feature setting is consistent on all cpus */
3409 ASSERT(osvwfeature
==
3410 (cpi
->cpi_extd
[1].cp_ecx
& CPUID_AMD_ECX_OSVW
));
3415 osvwlength
= rdmsr(MSR_AMD_OSVW_ID_LEN
) & OSVW_ID_LEN_MASK
;
3418 case 298: /* osvwid is 0 */
3420 if (osvwlength
<= (uint64_t)osvwid
) {
3421 /* osvwid 0 is unknown */
3426 * Check the OSVW STATUS MSR to determine the state
3427 * of the erratum where:
3429 * 1 - BIOS has applied the workaround when BIOS
3430 * workaround is available. (Or for other errata,
3431 * OS workaround is required.)
3432 * For a value of 1, caller will confirm that the
3433 * erratum 298 workaround has indeed been applied by BIOS.
3435 * A 1 may be set in cpus that have a HW fix
3436 * in a mixed cpu system. Regarding erratum 298:
3437 * In a multiprocessor platform, the workaround above
3438 * should be applied to all processors regardless of
3439 * silicon revision when an affected processor is
3443 return (rdmsr(MSR_AMD_OSVW_STATUS
+
3444 (osvwid
/ OSVW_ID_CNT_PER_MSR
)) &
3445 (1ULL << (osvwid
% OSVW_ID_CNT_PER_MSR
)));
3452 static const char assoc_str
[] = "associativity";
3453 static const char line_str
[] = "line-size";
3454 static const char size_str
[] = "size";
3457 add_cache_prop(dev_info_t
*devi
, const char *label
, const char *type
,
3463 * ndi_prop_update_int() is used because it is desirable for
3464 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
3466 if (snprintf(buf
, sizeof (buf
), "%s-%s", label
, type
) < sizeof (buf
))
3467 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, devi
, buf
, val
);
3471 * Intel-style cache/tlb description
3473 * Standard cpuid level 2 gives a randomly ordered
3474 * selection of tags that index into a table that describes
3475 * cache and tlb properties.
3478 static const char l1_icache_str
[] = "l1-icache";
3479 static const char l1_dcache_str
[] = "l1-dcache";
3480 static const char l2_cache_str
[] = "l2-cache";
3481 static const char l3_cache_str
[] = "l3-cache";
3482 static const char itlb4k_str
[] = "itlb-4K";
3483 static const char dtlb4k_str
[] = "dtlb-4K";
3484 static const char itlb2M_str
[] = "itlb-2M";
3485 static const char itlb4M_str
[] = "itlb-4M";
3486 static const char dtlb4M_str
[] = "dtlb-4M";
3487 static const char dtlb24_str
[] = "dtlb0-2M-4M";
3488 static const char itlb424_str
[] = "itlb-4K-2M-4M";
3489 static const char itlb24_str
[] = "itlb-2M-4M";
3490 static const char dtlb44_str
[] = "dtlb-4K-4M";
3491 static const char sl1_dcache_str
[] = "sectored-l1-dcache";
3492 static const char sl2_cache_str
[] = "sectored-l2-cache";
3493 static const char itrace_str
[] = "itrace-cache";
3494 static const char sl3_cache_str
[] = "sectored-l3-cache";
3495 static const char sh_l2_tlb4k_str
[] = "shared-l2-tlb-4k";
3497 static const struct cachetab
{
3500 uint16_t ct_line_size
;
3502 const char *ct_label
;
3505 * maintain descending order!
3507 * Codes ignored - Reason
3508 * ----------------------
3509 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
3510 * f0H/f1H - Currently we do not interpret prefetch size by design
3512 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str
},
3513 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str
},
3514 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str
},
3515 { 0xde, 12, 64, 6*1024*1024, l3_cache_str
},
3516 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str
},
3517 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str
},
3518 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str
},
3519 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str
},
3520 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str
},
3521 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str
},
3522 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str
},
3523 { 0xd0, 4, 64, 512*1024, l3_cache_str
},
3524 { 0xca, 4, 0, 512, sh_l2_tlb4k_str
},
3525 { 0xc0, 4, 0, 8, dtlb44_str
},
3526 { 0xba, 4, 0, 64, dtlb4k_str
},
3527 { 0xb4, 4, 0, 256, dtlb4k_str
},
3528 { 0xb3, 4, 0, 128, dtlb4k_str
},
3529 { 0xb2, 4, 0, 64, itlb4k_str
},
3530 { 0xb0, 4, 0, 128, itlb4k_str
},
3531 { 0x87, 8, 64, 1024*1024, l2_cache_str
},
3532 { 0x86, 4, 64, 512*1024, l2_cache_str
},
3533 { 0x85, 8, 32, 2*1024*1024, l2_cache_str
},
3534 { 0x84, 8, 32, 1024*1024, l2_cache_str
},
3535 { 0x83, 8, 32, 512*1024, l2_cache_str
},
3536 { 0x82, 8, 32, 256*1024, l2_cache_str
},
3537 { 0x80, 8, 64, 512*1024, l2_cache_str
},
3538 { 0x7f, 2, 64, 512*1024, l2_cache_str
},
3539 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str
},
3540 { 0x7c, 8, 64, 1024*1024, sl2_cache_str
},
3541 { 0x7b, 8, 64, 512*1024, sl2_cache_str
},
3542 { 0x7a, 8, 64, 256*1024, sl2_cache_str
},
3543 { 0x79, 8, 64, 128*1024, sl2_cache_str
},
3544 { 0x78, 8, 64, 1024*1024, l2_cache_str
},
3545 { 0x73, 8, 0, 64*1024, itrace_str
},
3546 { 0x72, 8, 0, 32*1024, itrace_str
},
3547 { 0x71, 8, 0, 16*1024, itrace_str
},
3548 { 0x70, 8, 0, 12*1024, itrace_str
},
3549 { 0x68, 4, 64, 32*1024, sl1_dcache_str
},
3550 { 0x67, 4, 64, 16*1024, sl1_dcache_str
},
3551 { 0x66, 4, 64, 8*1024, sl1_dcache_str
},
3552 { 0x60, 8, 64, 16*1024, sl1_dcache_str
},
3553 { 0x5d, 0, 0, 256, dtlb44_str
},
3554 { 0x5c, 0, 0, 128, dtlb44_str
},
3555 { 0x5b, 0, 0, 64, dtlb44_str
},
3556 { 0x5a, 4, 0, 32, dtlb24_str
},
3557 { 0x59, 0, 0, 16, dtlb4k_str
},
3558 { 0x57, 4, 0, 16, dtlb4k_str
},
3559 { 0x56, 4, 0, 16, dtlb4M_str
},
3560 { 0x55, 0, 0, 7, itlb24_str
},
3561 { 0x52, 0, 0, 256, itlb424_str
},
3562 { 0x51, 0, 0, 128, itlb424_str
},
3563 { 0x50, 0, 0, 64, itlb424_str
},
3564 { 0x4f, 0, 0, 32, itlb4k_str
},
3565 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str
},
3566 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str
},
3567 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str
},
3568 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str
},
3569 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str
},
3570 { 0x49, 16, 64, 4*1024*1024, l3_cache_str
},
3571 { 0x48, 12, 64, 3*1024*1024, l2_cache_str
},
3572 { 0x47, 8, 64, 8*1024*1024, l3_cache_str
},
3573 { 0x46, 4, 64, 4*1024*1024, l3_cache_str
},
3574 { 0x45, 4, 32, 2*1024*1024, l2_cache_str
},
3575 { 0x44, 4, 32, 1024*1024, l2_cache_str
},
3576 { 0x43, 4, 32, 512*1024, l2_cache_str
},
3577 { 0x42, 4, 32, 256*1024, l2_cache_str
},
3578 { 0x41, 4, 32, 128*1024, l2_cache_str
},
3579 { 0x3e, 4, 64, 512*1024, sl2_cache_str
},
3580 { 0x3d, 6, 64, 384*1024, sl2_cache_str
},
3581 { 0x3c, 4, 64, 256*1024, sl2_cache_str
},
3582 { 0x3b, 2, 64, 128*1024, sl2_cache_str
},
3583 { 0x3a, 6, 64, 192*1024, sl2_cache_str
},
3584 { 0x39, 4, 64, 128*1024, sl2_cache_str
},
3585 { 0x30, 8, 64, 32*1024, l1_icache_str
},
3586 { 0x2c, 8, 64, 32*1024, l1_dcache_str
},
3587 { 0x29, 8, 64, 4096*1024, sl3_cache_str
},
3588 { 0x25, 8, 64, 2048*1024, sl3_cache_str
},
3589 { 0x23, 8, 64, 1024*1024, sl3_cache_str
},
3590 { 0x22, 4, 64, 512*1024, sl3_cache_str
},
3591 { 0x0e, 6, 64, 24*1024, l1_dcache_str
},
3592 { 0x0d, 4, 32, 16*1024, l1_dcache_str
},
3593 { 0x0c, 4, 32, 16*1024, l1_dcache_str
},
3594 { 0x0b, 4, 0, 4, itlb4M_str
},
3595 { 0x0a, 2, 32, 8*1024, l1_dcache_str
},
3596 { 0x08, 4, 32, 16*1024, l1_icache_str
},
3597 { 0x06, 4, 32, 8*1024, l1_icache_str
},
3598 { 0x05, 4, 0, 32, dtlb4M_str
},
3599 { 0x04, 4, 0, 8, dtlb4M_str
},
3600 { 0x03, 4, 0, 64, dtlb4k_str
},
3601 { 0x02, 4, 0, 2, itlb4M_str
},
3602 { 0x01, 4, 0, 32, itlb4k_str
},
3606 static const struct cachetab cyrix_ctab
[] = {
3607 { 0x70, 4, 0, 32, "tlb-4K" },
3608 { 0x80, 4, 16, 16*1024, "l1-cache" },
3613 * Search a cache table for a matching entry
3615 static const struct cachetab
*
3616 find_cacheent(const struct cachetab
*ct
, uint_t code
)
3619 for (; ct
->ct_code
!= 0; ct
++)
3620 if (ct
->ct_code
<= code
)
3622 if (ct
->ct_code
== code
)
3629 * Populate cachetab entry with L2 or L3 cache-information using
3630 * cpuid function 4. This function is called from intel_walk_cacheinfo()
3631 * when descriptor 0x49 is encountered. It returns 0 if no such cache
3632 * information is found.
3635 intel_cpuid_4_cache_info(struct cachetab
*ct
, struct cpuid_info
*cpi
)
3640 for (i
= 0; i
< cpi
->cpi_std_4_size
; i
++) {
3641 level
= CPI_CACHE_LVL(cpi
->cpi_std_4
[i
]);
3643 if (level
== 2 || level
== 3) {
3644 ct
->ct_assoc
= CPI_CACHE_WAYS(cpi
->cpi_std_4
[i
]) + 1;
3646 CPI_CACHE_COH_LN_SZ(cpi
->cpi_std_4
[i
]) + 1;
3647 ct
->ct_size
= ct
->ct_assoc
*
3648 (CPI_CACHE_PARTS(cpi
->cpi_std_4
[i
]) + 1) *
3650 (cpi
->cpi_std_4
[i
]->cp_ecx
+ 1);
3653 ct
->ct_label
= l2_cache_str
;
3654 } else if (level
== 3) {
3655 ct
->ct_label
= l3_cache_str
;
3665 * Walk the cacheinfo descriptor, applying 'func' to every valid element
3666 * The walk is terminated if the walker returns non-zero.
3669 intel_walk_cacheinfo(struct cpuid_info
*cpi
,
3670 void *arg
, int (*func
)(void *, const struct cachetab
*))
3672 const struct cachetab
*ct
;
3673 struct cachetab des_49_ct
, des_b1_ct
;
3677 if ((dp
= cpi
->cpi_cacheinfo
) == NULL
)
3679 for (i
= 0; i
< cpi
->cpi_ncache
; i
++, dp
++) {
3681 * For overloaded descriptor 0x49 we use cpuid function 4
3682 * if supported by the current processor, to create
3683 * cache information.
3684 * For overloaded descriptor 0xb1 we use X86_PAE flag
3685 * to disambiguate the cache information.
3687 if (*dp
== 0x49 && cpi
->cpi_maxeax
>= 0x4 &&
3688 intel_cpuid_4_cache_info(&des_49_ct
, cpi
) == 1) {
3690 } else if (*dp
== 0xb1) {
3691 des_b1_ct
.ct_code
= 0xb1;
3692 des_b1_ct
.ct_assoc
= 4;
3693 des_b1_ct
.ct_line_size
= 0;
3694 if (is_x86_feature(x86_featureset
, X86FSET_PAE
)) {
3695 des_b1_ct
.ct_size
= 8;
3696 des_b1_ct
.ct_label
= itlb2M_str
;
3698 des_b1_ct
.ct_size
= 4;
3699 des_b1_ct
.ct_label
= itlb4M_str
;
3703 if ((ct
= find_cacheent(intel_ctab
, *dp
)) == NULL
) {
3708 if (func(arg
, ct
) != 0) {
3715 * (Like the Intel one, except for Cyrix CPUs)
3718 cyrix_walk_cacheinfo(struct cpuid_info
*cpi
,
3719 void *arg
, int (*func
)(void *, const struct cachetab
*))
3721 const struct cachetab
*ct
;
3725 if ((dp
= cpi
->cpi_cacheinfo
) == NULL
)
3727 for (i
= 0; i
< cpi
->cpi_ncache
; i
++, dp
++) {
3729 * Search Cyrix-specific descriptor table first ..
3731 if ((ct
= find_cacheent(cyrix_ctab
, *dp
)) != NULL
) {
3732 if (func(arg
, ct
) != 0)
3737 * .. else fall back to the Intel one
3739 if ((ct
= find_cacheent(intel_ctab
, *dp
)) != NULL
) {
3740 if (func(arg
, ct
) != 0)
3748 * A cacheinfo walker that adds associativity, line-size, and size properties
3749 * to the devinfo node it is passed as an argument.
3752 add_cacheent_props(void *arg
, const struct cachetab
*ct
)
3754 dev_info_t
*devi
= arg
;
3756 add_cache_prop(devi
, ct
->ct_label
, assoc_str
, ct
->ct_assoc
);
3757 if (ct
->ct_line_size
!= 0)
3758 add_cache_prop(devi
, ct
->ct_label
, line_str
,
3760 add_cache_prop(devi
, ct
->ct_label
, size_str
, ct
->ct_size
);
3765 static const char fully_assoc
[] = "fully-associative?";
3768 * AMD style cache/tlb description
3770 * Extended functions 5 and 6 directly describe properties of
3771 * tlbs and various cache levels.
3774 add_amd_assoc(dev_info_t
*devi
, const char *label
, uint_t assoc
)
3777 case 0: /* reserved; ignore */
3780 add_cache_prop(devi
, label
, assoc_str
, assoc
);
3783 add_cache_prop(devi
, label
, fully_assoc
, 1);
3789 add_amd_tlb(dev_info_t
*devi
, const char *label
, uint_t assoc
, uint_t size
)
3793 add_cache_prop(devi
, label
, size_str
, size
);
3794 add_amd_assoc(devi
, label
, assoc
);
3798 add_amd_cache(dev_info_t
*devi
, const char *label
,
3799 uint_t size
, uint_t assoc
, uint_t lines_per_tag
, uint_t line_size
)
3801 if (size
== 0 || line_size
== 0)
3803 add_amd_assoc(devi
, label
, assoc
);
3805 * Most AMD parts have a sectored cache. Multiple cache lines are
3806 * associated with each tag. A sector consists of all cache lines
3807 * associated with a tag. For example, the AMD K6-III has a sector
3808 * size of 2 cache lines per tag.
3810 if (lines_per_tag
!= 0)
3811 add_cache_prop(devi
, label
, "lines-per-tag", lines_per_tag
);
3812 add_cache_prop(devi
, label
, line_str
, line_size
);
3813 add_cache_prop(devi
, label
, size_str
, size
* 1024);
3817 add_amd_l2_assoc(dev_info_t
*devi
, const char *label
, uint_t assoc
)
3825 add_cache_prop(devi
, label
, assoc_str
, assoc
);
3828 add_cache_prop(devi
, label
, assoc_str
, 8);
3831 add_cache_prop(devi
, label
, assoc_str
, 16);
3834 add_cache_prop(devi
, label
, fully_assoc
, 1);
3836 default: /* reserved; ignore */
3842 add_amd_l2_tlb(dev_info_t
*devi
, const char *label
, uint_t assoc
, uint_t size
)
3844 if (size
== 0 || assoc
== 0)
3846 add_amd_l2_assoc(devi
, label
, assoc
);
3847 add_cache_prop(devi
, label
, size_str
, size
);
3851 add_amd_l2_cache(dev_info_t
*devi
, const char *label
,
3852 uint_t size
, uint_t assoc
, uint_t lines_per_tag
, uint_t line_size
)
3854 if (size
== 0 || assoc
== 0 || line_size
== 0)
3856 add_amd_l2_assoc(devi
, label
, assoc
);
3857 if (lines_per_tag
!= 0)
3858 add_cache_prop(devi
, label
, "lines-per-tag", lines_per_tag
);
3859 add_cache_prop(devi
, label
, line_str
, line_size
);
3860 add_cache_prop(devi
, label
, size_str
, size
* 1024);
3864 amd_cache_info(struct cpuid_info
*cpi
, dev_info_t
*devi
)
3866 struct cpuid_regs
*cp
;
3868 if (cpi
->cpi_xmaxeax
< 0x80000005)
3870 cp
= &cpi
->cpi_extd
[5];
3873 * 4M/2M L1 TLB configuration
3875 * We report the size for 2M pages because AMD uses two
3876 * TLB entries for one 4M page.
3878 add_amd_tlb(devi
, "dtlb-2M",
3879 BITX(cp
->cp_eax
, 31, 24), BITX(cp
->cp_eax
, 23, 16));
3880 add_amd_tlb(devi
, "itlb-2M",
3881 BITX(cp
->cp_eax
, 15, 8), BITX(cp
->cp_eax
, 7, 0));
3884 * 4K L1 TLB configuration
3887 switch (cpi
->cpi_vendor
) {
3890 if (cpi
->cpi_family
>= 5) {
3892 * Crusoe processors have 256 TLB entries, but
3893 * cpuid data format constrains them to only
3894 * reporting 255 of them.
3896 if ((nentries
= BITX(cp
->cp_ebx
, 23, 16)) == 255)
3899 * Crusoe processors also have a unified TLB
3901 add_amd_tlb(devi
, "tlb-4K", BITX(cp
->cp_ebx
, 31, 24),
3907 add_amd_tlb(devi
, itlb4k_str
,
3908 BITX(cp
->cp_ebx
, 31, 24), BITX(cp
->cp_ebx
, 23, 16));
3909 add_amd_tlb(devi
, dtlb4k_str
,
3910 BITX(cp
->cp_ebx
, 15, 8), BITX(cp
->cp_ebx
, 7, 0));
3915 * data L1 cache configuration
3918 add_amd_cache(devi
, l1_dcache_str
,
3919 BITX(cp
->cp_ecx
, 31, 24), BITX(cp
->cp_ecx
, 23, 16),
3920 BITX(cp
->cp_ecx
, 15, 8), BITX(cp
->cp_ecx
, 7, 0));
3923 * code L1 cache configuration
3926 add_amd_cache(devi
, l1_icache_str
,
3927 BITX(cp
->cp_edx
, 31, 24), BITX(cp
->cp_edx
, 23, 16),
3928 BITX(cp
->cp_edx
, 15, 8), BITX(cp
->cp_edx
, 7, 0));
3930 if (cpi
->cpi_xmaxeax
< 0x80000006)
3932 cp
= &cpi
->cpi_extd
[6];
3934 /* Check for a unified L2 TLB for large pages */
3936 if (BITX(cp
->cp_eax
, 31, 16) == 0)
3937 add_amd_l2_tlb(devi
, "l2-tlb-2M",
3938 BITX(cp
->cp_eax
, 15, 12), BITX(cp
->cp_eax
, 11, 0));
3940 add_amd_l2_tlb(devi
, "l2-dtlb-2M",
3941 BITX(cp
->cp_eax
, 31, 28), BITX(cp
->cp_eax
, 27, 16));
3942 add_amd_l2_tlb(devi
, "l2-itlb-2M",
3943 BITX(cp
->cp_eax
, 15, 12), BITX(cp
->cp_eax
, 11, 0));
3946 /* Check for a unified L2 TLB for 4K pages */
3948 if (BITX(cp
->cp_ebx
, 31, 16) == 0) {
3949 add_amd_l2_tlb(devi
, "l2-tlb-4K",
3950 BITX(cp
->cp_eax
, 15, 12), BITX(cp
->cp_eax
, 11, 0));
3952 add_amd_l2_tlb(devi
, "l2-dtlb-4K",
3953 BITX(cp
->cp_eax
, 31, 28), BITX(cp
->cp_eax
, 27, 16));
3954 add_amd_l2_tlb(devi
, "l2-itlb-4K",
3955 BITX(cp
->cp_eax
, 15, 12), BITX(cp
->cp_eax
, 11, 0));
3958 add_amd_l2_cache(devi
, l2_cache_str
,
3959 BITX(cp
->cp_ecx
, 31, 16), BITX(cp
->cp_ecx
, 15, 12),
3960 BITX(cp
->cp_ecx
, 11, 8), BITX(cp
->cp_ecx
, 7, 0));
3964 * There are two basic ways that the x86 world describes it cache
3965 * and tlb architecture - Intel's way and AMD's way.
3967 * Return which flavor of cache architecture we should use
3970 x86_which_cacheinfo(struct cpuid_info
*cpi
)
3972 switch (cpi
->cpi_vendor
) {
3973 case X86_VENDOR_Intel
:
3974 if (cpi
->cpi_maxeax
>= 2)
3975 return (X86_VENDOR_Intel
);
3977 case X86_VENDOR_AMD
:
3979 * The K5 model 1 was the first part from AMD that reported
3980 * cache sizes via extended cpuid functions.
3982 if (cpi
->cpi_family
> 5 ||
3983 (cpi
->cpi_family
== 5 && cpi
->cpi_model
>= 1))
3984 return (X86_VENDOR_AMD
);
3987 if (cpi
->cpi_family
>= 5)
3988 return (X86_VENDOR_AMD
);
3992 * If they have extended CPU data for 0x80000005
3993 * then we assume they have AMD-format cache
3996 * If not, and the vendor happens to be Cyrix,
3997 * then try our-Cyrix specific handler.
3999 * If we're not Cyrix, then assume we're using Intel's
4000 * table-driven format instead.
4002 if (cpi
->cpi_xmaxeax
>= 0x80000005)
4003 return (X86_VENDOR_AMD
);
4004 else if (cpi
->cpi_vendor
== X86_VENDOR_Cyrix
)
4005 return (X86_VENDOR_Cyrix
);
4006 else if (cpi
->cpi_maxeax
>= 2)
4007 return (X86_VENDOR_Intel
);
4014 cpuid_set_cpu_properties(void *dip
, processorid_t cpu_id
,
4015 struct cpuid_info
*cpi
)
4017 dev_info_t
*cpu_devi
;
4020 cpu_devi
= (dev_info_t
*)dip
;
4023 (void) ndi_prop_update_string(DDI_DEV_T_NONE
, cpu_devi
,
4024 "device_type", "cpu");
4027 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4030 /* cpu-mhz, and clock-frequency */
4034 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4035 "cpu-mhz", cpu_freq
);
4036 if ((mul
= cpu_freq
* 1000000LL) <= INT_MAX
)
4037 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4038 "clock-frequency", (int)mul
);
4041 if (!is_x86_feature(x86_featureset
, X86FSET_CPUID
)) {
4046 (void) ndi_prop_update_string(DDI_DEV_T_NONE
, cpu_devi
,
4047 "vendor-id", cpi
->cpi_vendorstr
);
4049 if (cpi
->cpi_maxeax
== 0) {
4054 * family, model, and step
4056 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4057 "family", CPI_FAMILY(cpi
));
4058 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4059 "cpu-model", CPI_MODEL(cpi
));
4060 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4061 "stepping-id", CPI_STEP(cpi
));
4064 switch (cpi
->cpi_vendor
) {
4065 case X86_VENDOR_Intel
:
4073 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4074 "type", CPI_TYPE(cpi
));
4077 switch (cpi
->cpi_vendor
) {
4078 case X86_VENDOR_Intel
:
4079 case X86_VENDOR_AMD
:
4080 create
= cpi
->cpi_family
>= 0xf;
4087 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4088 "ext-family", CPI_FAMILY_XTD(cpi
));
4091 switch (cpi
->cpi_vendor
) {
4092 case X86_VENDOR_Intel
:
4093 create
= IS_EXTENDED_MODEL_INTEL(cpi
);
4095 case X86_VENDOR_AMD
:
4096 create
= CPI_FAMILY(cpi
) == 0xf;
4103 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4104 "ext-model", CPI_MODEL_XTD(cpi
));
4107 switch (cpi
->cpi_vendor
) {
4108 case X86_VENDOR_AMD
:
4110 * AMD K5 model 1 was the first part to support this
4112 create
= cpi
->cpi_xmaxeax
>= 0x80000001;
4119 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4120 "generation", BITX((cpi
)->cpi_extd
[1].cp_eax
, 11, 8));
4123 switch (cpi
->cpi_vendor
) {
4124 case X86_VENDOR_Intel
:
4126 * brand id first appeared on Pentium III Xeon model 8,
4127 * and Celeron model 8 processors and Opteron
4129 create
= cpi
->cpi_family
> 6 ||
4130 (cpi
->cpi_family
== 6 && cpi
->cpi_model
>= 8);
4132 case X86_VENDOR_AMD
:
4133 create
= cpi
->cpi_family
>= 0xf;
4139 if (create
&& cpi
->cpi_brandid
!= 0) {
4140 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4141 "brand-id", cpi
->cpi_brandid
);
4144 /* chunks, and apic-id */
4145 switch (cpi
->cpi_vendor
) {
4147 * first available on Pentium IV and Opteron (K8)
4149 case X86_VENDOR_Intel
:
4150 create
= IS_NEW_F6(cpi
) || cpi
->cpi_family
>= 0xf;
4152 case X86_VENDOR_AMD
:
4153 create
= cpi
->cpi_family
>= 0xf;
4160 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4161 "chunks", CPI_CHUNKS(cpi
));
4162 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4163 "apic-id", cpi
->cpi_apicid
);
4164 if (cpi
->cpi_chipid
>= 0) {
4165 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4166 "chip#", cpi
->cpi_chipid
);
4167 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4168 "clog#", cpi
->cpi_clogid
);
4172 /* cpuid-features */
4173 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4174 "cpuid-features", CPI_FEATURES_EDX(cpi
));
4177 /* cpuid-features-ecx */
4178 switch (cpi
->cpi_vendor
) {
4179 case X86_VENDOR_Intel
:
4180 create
= IS_NEW_F6(cpi
) || cpi
->cpi_family
>= 0xf;
4182 case X86_VENDOR_AMD
:
4183 create
= cpi
->cpi_family
>= 0xf;
4190 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4191 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi
));
4193 /* ext-cpuid-features */
4194 switch (cpi
->cpi_vendor
) {
4195 case X86_VENDOR_Intel
:
4196 case X86_VENDOR_AMD
:
4197 case X86_VENDOR_Cyrix
:
4199 case X86_VENDOR_Centaur
:
4200 create
= cpi
->cpi_xmaxeax
>= 0x80000001;
4207 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4208 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi
));
4209 (void) ndi_prop_update_int(DDI_DEV_T_NONE
, cpu_devi
,
4210 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi
));
4214 * Brand String first appeared in Intel Pentium IV, AMD K5
4215 * model 1, and Cyrix GXm. On earlier models we try and
4216 * simulate something similar .. so this string should always
4217 * same -something- about the processor, however lame.
4219 (void) ndi_prop_update_string(DDI_DEV_T_NONE
, cpu_devi
,
4220 "brand-string", cpi
->cpi_brandstr
);
4223 * Finally, cache and tlb information
4225 switch (x86_which_cacheinfo(cpi
)) {
4226 case X86_VENDOR_Intel
:
4227 intel_walk_cacheinfo(cpi
, cpu_devi
, add_cacheent_props
);
4229 case X86_VENDOR_Cyrix
:
4230 cyrix_walk_cacheinfo(cpi
, cpu_devi
, add_cacheent_props
);
4232 case X86_VENDOR_AMD
:
4233 amd_cache_info(cpi
, cpu_devi
);
4248 * A cacheinfo walker that fetches the size, line-size and associativity
4252 intel_l2cinfo(void *arg
, const struct cachetab
*ct
)
4254 struct l2info
*l2i
= arg
;
4257 if (ct
->ct_label
!= l2_cache_str
&&
4258 ct
->ct_label
!= sl2_cache_str
)
4259 return (0); /* not an L2 -- keep walking */
4261 if ((ip
= l2i
->l2i_csz
) != NULL
)
4263 if ((ip
= l2i
->l2i_lsz
) != NULL
)
4264 *ip
= ct
->ct_line_size
;
4265 if ((ip
= l2i
->l2i_assoc
) != NULL
)
4267 l2i
->l2i_ret
= ct
->ct_size
;
4268 return (1); /* was an L2 -- terminate walk */
4272 * AMD L2/L3 Cache and TLB Associativity Field Definition:
4274 * Unlike the associativity for the L1 cache and tlb where the 8 bit
4275 * value is the associativity, the associativity for the L2 cache and
4276 * tlb is encoded in the following table. The 4 bit L2 value serves as
4277 * an index into the amd_afd[] array to determine the associativity.
4278 * -1 is undefined. 0 is fully associative.
4281 static int amd_afd
[] =
4282 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
4285 amd_l2cacheinfo(struct cpuid_info
*cpi
, struct l2info
*l2i
)
4287 struct cpuid_regs
*cp
;
4292 if (cpi
->cpi_xmaxeax
< 0x80000006)
4294 cp
= &cpi
->cpi_extd
[6];
4296 if ((i
= BITX(cp
->cp_ecx
, 15, 12)) != 0 &&
4297 (size
= BITX(cp
->cp_ecx
, 31, 16)) != 0) {
4298 uint_t cachesz
= size
* 1024;
4301 ASSERT(assoc
!= -1);
4303 if ((ip
= l2i
->l2i_csz
) != NULL
)
4305 if ((ip
= l2i
->l2i_lsz
) != NULL
)
4306 *ip
= BITX(cp
->cp_ecx
, 7, 0);
4307 if ((ip
= l2i
->l2i_assoc
) != NULL
)
4309 l2i
->l2i_ret
= cachesz
;
4314 getl2cacheinfo(cpu_t
*cpu
, int *csz
, int *lsz
, int *assoc
)
4316 struct cpuid_info
*cpi
= cpu
->cpu_m
.mcpu_cpi
;
4317 struct l2info __l2info
, *l2i
= &__l2info
;
4321 l2i
->l2i_assoc
= assoc
;
4324 switch (x86_which_cacheinfo(cpi
)) {
4325 case X86_VENDOR_Intel
:
4326 intel_walk_cacheinfo(cpi
, l2i
, intel_l2cinfo
);
4328 case X86_VENDOR_Cyrix
:
4329 cyrix_walk_cacheinfo(cpi
, l2i
, intel_l2cinfo
);
4331 case X86_VENDOR_AMD
:
4332 amd_l2cacheinfo(cpi
, l2i
);
4337 return (l2i
->l2i_ret
);
4343 cpuid_mwait_alloc(cpu_t
*cpu
)
4348 ASSERT(cpuid_checkpass(CPU
, 2));
4350 mwait_size
= CPU
->cpu_m
.mcpu_cpi
->cpi_mwait
.mon_max
;
4351 if (mwait_size
== 0)
4355 * kmem_alloc() returns cache line size aligned data for mwait_size
4356 * allocations. mwait_size is currently cache line sized. Neither
4357 * of these implementation details are guarantied to be true in the
4360 * First try allocating mwait_size as kmem_alloc() currently returns
4361 * correctly aligned memory. If kmem_alloc() does not return
4362 * mwait_size aligned memory, then use mwait_size ROUNDUP.
4364 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
4365 * decide to free this memory.
4367 ret
= kmem_zalloc(mwait_size
, KM_SLEEP
);
4368 if (ret
== (uint32_t *)P2ROUNDUP((uintptr_t)ret
, mwait_size
)) {
4369 cpu
->cpu_m
.mcpu_cpi
->cpi_mwait
.buf_actual
= ret
;
4370 cpu
->cpu_m
.mcpu_cpi
->cpi_mwait
.size_actual
= mwait_size
;
4371 *ret
= MWAIT_RUNNING
;
4374 kmem_free(ret
, mwait_size
);
4375 ret
= kmem_zalloc(mwait_size
* 2, KM_SLEEP
);
4376 cpu
->cpu_m
.mcpu_cpi
->cpi_mwait
.buf_actual
= ret
;
4377 cpu
->cpu_m
.mcpu_cpi
->cpi_mwait
.size_actual
= mwait_size
* 2;
4378 ret
= (uint32_t *)P2ROUNDUP((uintptr_t)ret
, mwait_size
);
4379 *ret
= MWAIT_RUNNING
;
4385 cpuid_mwait_free(cpu_t
*cpu
)
4387 if (cpu
->cpu_m
.mcpu_cpi
== NULL
) {
4391 if (cpu
->cpu_m
.mcpu_cpi
->cpi_mwait
.buf_actual
!= NULL
&&
4392 cpu
->cpu_m
.mcpu_cpi
->cpi_mwait
.size_actual
> 0) {
4393 kmem_free(cpu
->cpu_m
.mcpu_cpi
->cpi_mwait
.buf_actual
,
4394 cpu
->cpu_m
.mcpu_cpi
->cpi_mwait
.size_actual
);
4397 cpu
->cpu_m
.mcpu_cpi
->cpi_mwait
.buf_actual
= NULL
;
4398 cpu
->cpu_m
.mcpu_cpi
->cpi_mwait
.size_actual
= 0;
4402 patch_tsc_read(int flag
)
4408 cnt
= &_no_rdtsc_end
- &_no_rdtsc_start
;
4409 (void) memcpy((void *)tsc_read
, (void *)&_no_rdtsc_start
, cnt
);
4412 cnt
= &_tscp_end
- &_tscp_start
;
4413 (void) memcpy((void *)tsc_read
, (void *)&_tscp_start
, cnt
);
4415 case X86_TSC_MFENCE
:
4416 cnt
= &_tsc_mfence_end
- &_tsc_mfence_start
;
4417 (void) memcpy((void *)tsc_read
,
4418 (void *)&_tsc_mfence_start
, cnt
);
4420 case X86_TSC_LFENCE
:
4421 cnt
= &_tsc_lfence_end
- &_tsc_lfence_start
;
4422 (void) memcpy((void *)tsc_read
,
4423 (void *)&_tsc_lfence_start
, cnt
);
4431 cpuid_deep_cstates_supported(void)
4433 struct cpuid_info
*cpi
;
4434 struct cpuid_regs regs
;
4436 ASSERT(cpuid_checkpass(CPU
, 1));
4438 cpi
= CPU
->cpu_m
.mcpu_cpi
;
4440 if (!is_x86_feature(x86_featureset
, X86FSET_CPUID
))
4443 switch (cpi
->cpi_vendor
) {
4444 case X86_VENDOR_Intel
:
4445 if (cpi
->cpi_xmaxeax
< 0x80000007)
4449 * TSC run at a constant rate in all ACPI C-states?
4451 regs
.cp_eax
= 0x80000007;
4452 (void) __cpuid_insn(®s
);
4453 return (regs
.cp_edx
& CPUID_TSC_CSTATE_INVARIANCE
);
4463 post_startup_cpu_fixups(void)
4467 * Some AMD processors support C1E state. Entering this state will
4468 * cause the local APIC timer to stop, which we can't deal with at
4471 if (cpuid_getvendor(CPU
) == X86_VENDOR_AMD
) {
4475 if (!on_trap(&otd
, OT_DATA_ACCESS
)) {
4476 reg
= rdmsr(MSR_AMD_INT_PENDING_CMP_HALT
);
4477 /* Disable C1E state if it is enabled by BIOS */
4478 if ((reg
>> AMD_ACTONCMPHALT_SHIFT
) &
4479 AMD_ACTONCMPHALT_MASK
) {
4480 reg
&= ~(AMD_ACTONCMPHALT_MASK
<<
4481 AMD_ACTONCMPHALT_SHIFT
);
4482 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT
, reg
);
4491 * Setup necessary registers to enable XSAVE feature on this processor.
4492 * This function needs to be called early enough, so that no xsave/xrstor
4493 * ops will execute on the processor before the MSRs are properly set up.
4495 * Current implementation has the following assumption:
4496 * - cpuid_pass1() is done, so that X86 features are known.
4497 * - fpu_probe() is done, so that fp_save_mech is chosen.
4500 xsave_setup_msr(cpu_t
*cpu
)
4502 ASSERT(fp_save_mech
== FP_XSAVE
);
4503 ASSERT(is_x86_feature(x86_featureset
, X86FSET_XSAVE
));
4505 /* Enable OSXSAVE in CR4. */
4506 setcr4(getcr4() | CR4_OSXSAVE
);
4508 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
4511 cpu
->cpu_m
.mcpu_cpi
->cpi_std
[1].cp_ecx
|= CPUID_INTC_ECX_OSXSAVE
;
4516 * Starting with the Westmere processor the local
4517 * APIC timer will continue running in all C-states,
4518 * including the deepest C-states.
4521 cpuid_arat_supported(void)
4523 struct cpuid_info
*cpi
;
4524 struct cpuid_regs regs
;
4526 ASSERT(cpuid_checkpass(CPU
, 1));
4527 ASSERT(is_x86_feature(x86_featureset
, X86FSET_CPUID
));
4529 cpi
= CPU
->cpu_m
.mcpu_cpi
;
4531 switch (cpi
->cpi_vendor
) {
4532 case X86_VENDOR_Intel
:
4534 * Always-running Local APIC Timer is
4535 * indicated by CPUID.6.EAX[2].
4537 if (cpi
->cpi_maxeax
>= 6) {
4539 (void) cpuid_insn(NULL
, ®s
);
4540 return (regs
.cp_eax
& CPUID_CSTATE_ARAT
);
4550 * Check support for Intel ENERGY_PERF_BIAS feature
4553 cpuid_iepb_supported(struct cpu
*cp
)
4555 struct cpuid_info
*cpi
= cp
->cpu_m
.mcpu_cpi
;
4556 struct cpuid_regs regs
;
4558 ASSERT(cpuid_checkpass(cp
, 1));
4560 if (!(is_x86_feature(x86_featureset
, X86FSET_CPUID
)) ||
4561 !(is_x86_feature(x86_featureset
, X86FSET_MSR
))) {
4566 * Intel ENERGY_PERF_BIAS MSR is indicated by
4567 * capability bit CPUID.6.ECX.3
4569 if ((cpi
->cpi_vendor
!= X86_VENDOR_Intel
) || (cpi
->cpi_maxeax
< 6))
4573 (void) cpuid_insn(NULL
, ®s
);
4574 return (regs
.cp_ecx
& CPUID_EPB_SUPPORT
);
4578 * Check support for TSC deadline timer
4580 * TSC deadline timer provides a superior software programming
4581 * model over local APIC timer that eliminates "time drifts".
4582 * Instead of specifying a relative time, software specifies an
4583 * absolute time as the target at which the processor should
4584 * generate a timer event.
4587 cpuid_deadline_tsc_supported(void)
4589 struct cpuid_info
*cpi
= CPU
->cpu_m
.mcpu_cpi
;
4590 struct cpuid_regs regs
;
4592 ASSERT(cpuid_checkpass(CPU
, 1));
4593 ASSERT(is_x86_feature(x86_featureset
, X86FSET_CPUID
));
4595 switch (cpi
->cpi_vendor
) {
4596 case X86_VENDOR_Intel
:
4597 if (cpi
->cpi_maxeax
>= 1) {
4599 (void) cpuid_insn(NULL
, ®s
);
4600 return (regs
.cp_ecx
& CPUID_DEADLINE_TSC
);
4609 #if defined(__amd64) && !defined(__xpv)
4611 * Patch in versions of bcopy for high performance Intel Nhm processors
4615 patch_memops(uint_t vendor
)
4620 if ((vendor
== X86_VENDOR_Intel
) &&
4621 is_x86_feature(x86_featureset
, X86FSET_SSE4_2
)) {
4622 cnt
= &bcopy_patch_end
- &bcopy_patch_start
;
4623 to
= &bcopy_ck_size
;
4624 from
= &bcopy_patch_start
;
4625 for (i
= 0; i
< cnt
; i
++) {
4630 #endif /* __amd64 && !__xpv */
4633 * This function finds the number of bits to represent the number of cores per
4634 * chip and the number of strands per core for the Intel platforms.
4635 * It re-uses the x2APIC cpuid code of the cpuid_pass2().
4638 cpuid_get_ext_topo(uint_t vendor
, uint_t
*core_nbits
, uint_t
*strand_nbits
)
4640 struct cpuid_regs regs
;
4641 struct cpuid_regs
*cp
= ®s
;
4643 if (vendor
!= X86_VENDOR_Intel
) {
4647 /* if the cpuid level is 0xB, extended topo is available. */
4649 if (__cpuid_insn(cp
) >= 0xB) {
4652 cp
->cp_edx
= cp
->cp_ebx
= cp
->cp_ecx
= 0;
4653 (void) __cpuid_insn(cp
);
4656 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
4657 * indicates that the extended topology enumeration leaf is
4661 uint_t coreid_shift
= 0;
4662 uint_t chipid_shift
= 0;
4666 for (i
= 0; i
< CPI_FNB_ECX_MAX
; i
++) {
4670 (void) __cpuid_insn(cp
);
4671 level
= CPI_CPU_LEVEL_TYPE(cp
);
4675 * Thread level processor topology
4676 * Number of bits shift right APIC ID
4677 * to get the coreid.
4679 coreid_shift
= BITX(cp
->cp_eax
, 4, 0);
4680 } else if (level
== 2) {
4682 * Core level processor topology
4683 * Number of bits shift right APIC ID
4684 * to get the chipid.
4686 chipid_shift
= BITX(cp
->cp_eax
, 4, 0);
4690 if (coreid_shift
> 0 && chipid_shift
> coreid_shift
) {
4691 *strand_nbits
= coreid_shift
;
4692 *core_nbits
= chipid_shift
- coreid_shift
;