Define HAS_FMA with bit_FMA_Usable
[glibc.git] / sysdeps / x86_64 / multiarch / init-arch.c
blobfed5ab8982126d89a869f91d733b5b65cf40a003
1 /* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008-2012 Free Software Foundation, Inc.
4 Contributed by Ulrich Drepper <drepper@redhat.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
20 #include <atomic.h>
21 #include <cpuid.h>
22 #include "init-arch.h"
25 struct cpu_features __cpu_features attribute_hidden;
28 static void
29 get_common_indeces (unsigned int *family, unsigned int *model)
31 __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax,
32 __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx,
33 __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx,
34 __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx);
36 unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
37 *family = (eax >> 8) & 0x0f;
38 *model = (eax >> 4) & 0x0f;
42 void
43 __init_cpu_features (void)
45 unsigned int ebx;
46 unsigned int ecx;
47 unsigned int edx;
48 unsigned int family = 0;
49 unsigned int model = 0;
50 enum cpu_features_kind kind;
52 __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx);
54 /* This spells out "GenuineIntel". */
55 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
57 kind = arch_kind_intel;
59 get_common_indeces (&family, &model);
61 /* Intel processors prefer SSE instruction for memory/string
62 routines if they are available. */
63 __cpu_features.feature[index_Prefer_SSE_for_memop]
64 |= bit_Prefer_SSE_for_memop;
66 unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
67 unsigned int extended_family = (eax >> 20) & 0xff;
68 unsigned int extended_model = (eax >> 12) & 0xf0;
69 if (family == 0x0f)
71 family += extended_family;
72 model += extended_model;
74 else if (family == 0x06)
76 ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
77 model += extended_model;
78 switch (model)
80 case 0x1c:
81 case 0x26:
82 /* BSF is slow on Atom. */
83 __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF;
84 break;
86 default:
87 /* Unknown family 0x06 processors. Assuming this is one
88 of Core i3/i5/i7 processors if AVX is available. */
89 if ((ecx & bit_AVX) == 0)
90 break;
92 case 0x1a:
93 case 0x1e:
94 case 0x1f:
95 case 0x25:
96 case 0x2c:
97 case 0x2e:
98 case 0x2f:
99 /* Rep string instructions, copy backward, unaligned loads
100 and pminub are fast on Intel Core i3, i5 and i7. */
101 #if index_Fast_Rep_String != index_Fast_Copy_Backward
102 # error index_Fast_Rep_String != index_Fast_Copy_Backward
103 #endif
104 #if index_Fast_Rep_String != index_Fast_Unaligned_Load
105 # error index_Fast_Rep_String != index_Fast_Unaligned_Load
106 #endif
107 #if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
108 # error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
109 #endif
110 __cpu_features.feature[index_Fast_Rep_String]
111 |= (bit_Fast_Rep_String
112 | bit_Fast_Copy_Backward
113 | bit_Fast_Unaligned_Load
114 | bit_Prefer_PMINUB_for_stringop);
115 break;
119 /* This spells out "AuthenticAMD". */
120 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
122 kind = arch_kind_amd;
124 get_common_indeces (&family, &model);
126 ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
128 /* AMD processors prefer SSE instructions for memory/string routines
129 if they are available, otherwise they prefer integer instructions. */
130 if ((ecx & 0x200))
131 __cpu_features.feature[index_Prefer_SSE_for_memop]
132 |= bit_Prefer_SSE_for_memop;
134 unsigned int eax;
135 __cpuid (0x80000000, eax, ebx, ecx, edx);
136 if (eax >= 0x80000001)
137 __cpuid (0x80000001,
138 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax,
139 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx,
140 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx,
141 __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx);
143 else
144 kind = arch_kind_other;
146 /* Can we call xgetbv? */
147 if (CPUID_OSXSAVE)
149 unsigned int xcrlow;
150 unsigned int xcrhigh;
151 asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
152 /* Is YMM and XMM state usable? */
153 if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
154 (bit_YMM_state | bit_XMM_state))
156 /* Determine if AVX is usable. */
157 if (CPUID_AVX)
158 __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
159 /* Determine if FMA is usable. */
160 if (CPUID_FMA)
161 __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable;
162 /* Determine if FMA4 is usable. */
163 if (CPUID_FMA4)
164 __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable;
168 __cpu_features.family = family;
169 __cpu_features.model = model;
170 atomic_write_barrier ();
171 __cpu_features.kind = kind;
174 #undef __get_cpu_features
176 const struct cpu_features *
177 __get_cpu_features (void)
179 if (__cpu_features.kind == arch_kind_unknown)
180 __init_cpu_features ();
182 return &__cpu_features;