arm64 insn selector: improved handling of Or1/And1 trees.
[valgrind.git] / coregrind / m_machine.c
blob1bf50846d2d9dbeeea9221969d0d2dcdd2492f77
1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
5 /*
6 This file is part of Valgrind, a dynamic binary instrumentation
7 framework.
9 Copyright (C) 2000-2017 Julian Seward
10 jseward@acm.org
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 #include "pub_core_basics.h"
29 #include "pub_core_vki.h"
30 #include "pub_core_threadstate.h"
31 #include "pub_core_libcassert.h"
32 #include "pub_core_libcbase.h"
33 #include "pub_core_libcfile.h"
34 #include "pub_core_libcprint.h"
35 #include "pub_core_libcproc.h"
36 #include "pub_core_mallocfree.h"
37 #include "pub_core_machine.h"
38 #include "pub_core_cpuid.h"
39 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
40 #include "pub_core_debuglog.h"
43 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
44 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
45 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
47 #define STACK_PTR_S1(regs) ((regs).vex_shadow1.VG_STACK_PTR)
49 Addr VG_(get_IP) ( ThreadId tid ) {
50 return INSTR_PTR( VG_(threads)[tid].arch );
52 Addr VG_(get_SP) ( ThreadId tid ) {
53 return STACK_PTR( VG_(threads)[tid].arch );
55 Addr VG_(get_FP) ( ThreadId tid ) {
56 return FRAME_PTR( VG_(threads)[tid].arch );
59 Addr VG_(get_SP_s1) ( ThreadId tid ) {
60 return STACK_PTR_S1( VG_(threads)[tid].arch );
62 void VG_(set_SP_s1) ( ThreadId tid, Addr sp ) {
63 STACK_PTR_S1( VG_(threads)[tid].arch ) = sp;
66 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
67 INSTR_PTR( VG_(threads)[tid].arch ) = ip;
69 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
70 STACK_PTR( VG_(threads)[tid].arch ) = sp;
73 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
74 ThreadId tid )
76 # if defined(VGA_x86)
77 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
78 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
79 regs->misc.X86.r_ebp
80 = VG_(threads)[tid].arch.vex.guest_EBP;
81 # elif defined(VGA_amd64)
82 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
83 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
84 regs->misc.AMD64.r_rbp
85 = VG_(threads)[tid].arch.vex.guest_RBP;
86 # elif defined(VGA_ppc32)
87 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
88 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
89 regs->misc.PPC32.r_lr
90 = VG_(threads)[tid].arch.vex.guest_LR;
91 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
92 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
93 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
94 regs->misc.PPC64.r_lr
95 = VG_(threads)[tid].arch.vex.guest_LR;
96 # elif defined(VGA_arm)
97 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
98 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
99 regs->misc.ARM.r14
100 = VG_(threads)[tid].arch.vex.guest_R14;
101 regs->misc.ARM.r12
102 = VG_(threads)[tid].arch.vex.guest_R12;
103 regs->misc.ARM.r11
104 = VG_(threads)[tid].arch.vex.guest_R11;
105 regs->misc.ARM.r7
106 = VG_(threads)[tid].arch.vex.guest_R7;
107 # elif defined(VGA_arm64)
108 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
109 regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
110 regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
111 regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
112 # elif defined(VGA_s390x)
113 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
114 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
115 regs->misc.S390X.r_fp
116 = VG_(threads)[tid].arch.vex.guest_FP;
117 regs->misc.S390X.r_lr
118 = VG_(threads)[tid].arch.vex.guest_LR;
119 /* ANDREAS 3 Apr 2019 FIXME r_f0..r_f7: is this correct? */
120 regs->misc.S390X.r_f0
121 = VG_(threads)[tid].arch.vex.guest_v0.w64[0];
122 regs->misc.S390X.r_f1
123 = VG_(threads)[tid].arch.vex.guest_v1.w64[0];
124 regs->misc.S390X.r_f2
125 = VG_(threads)[tid].arch.vex.guest_v2.w64[0];
126 regs->misc.S390X.r_f3
127 = VG_(threads)[tid].arch.vex.guest_v3.w64[0];
128 regs->misc.S390X.r_f4
129 = VG_(threads)[tid].arch.vex.guest_v4.w64[0];
130 regs->misc.S390X.r_f5
131 = VG_(threads)[tid].arch.vex.guest_v5.w64[0];
132 regs->misc.S390X.r_f6
133 = VG_(threads)[tid].arch.vex.guest_v6.w64[0];
134 regs->misc.S390X.r_f7
135 = VG_(threads)[tid].arch.vex.guest_v7.w64[0];
136 # elif defined(VGA_mips32) || defined(VGP_nanomips_linux)
137 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
138 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
139 regs->misc.MIPS32.r30
140 = VG_(threads)[tid].arch.vex.guest_r30;
141 regs->misc.MIPS32.r31
142 = VG_(threads)[tid].arch.vex.guest_r31;
143 regs->misc.MIPS32.r28
144 = VG_(threads)[tid].arch.vex.guest_r28;
145 # elif defined(VGA_mips64)
146 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
147 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
148 regs->misc.MIPS64.r30
149 = VG_(threads)[tid].arch.vex.guest_r30;
150 regs->misc.MIPS64.r31
151 = VG_(threads)[tid].arch.vex.guest_r31;
152 regs->misc.MIPS64.r28
153 = VG_(threads)[tid].arch.vex.guest_r28;
154 # else
155 # error "Unknown arch"
156 # endif
159 void
160 VG_(get_shadow_regs_area) ( ThreadId tid,
161 /*DST*/UChar* dst,
162 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
164 void* src;
165 ThreadState* tst;
166 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
167 vg_assert(VG_(is_valid_tid)(tid));
168 // Bounds check
169 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
170 vg_assert(offset + size <= sizeof(VexGuestArchState));
171 // Copy
172 tst = & VG_(threads)[tid];
173 src = NULL;
174 switch (shadowNo) {
175 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
176 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
177 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
179 vg_assert(src != NULL);
180 VG_(memcpy)( dst, src, size);
183 void
184 VG_(set_shadow_regs_area) ( ThreadId tid,
185 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
186 /*SRC*/const UChar* src )
188 void* dst;
189 ThreadState* tst;
190 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
191 vg_assert(VG_(is_valid_tid)(tid));
192 // Bounds check
193 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
194 vg_assert(offset + size <= sizeof(VexGuestArchState));
195 // Copy
196 tst = & VG_(threads)[tid];
197 dst = NULL;
198 switch (shadowNo) {
199 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
200 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
201 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
203 vg_assert(dst != NULL);
204 VG_(memcpy)( dst, src, size);
208 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
209 const HChar*, Addr))
211 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
212 VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid);
213 #if defined(VGA_x86)
214 (*f)(tid, "EAX", vex->guest_EAX);
215 (*f)(tid, "ECX", vex->guest_ECX);
216 (*f)(tid, "EDX", vex->guest_EDX);
217 (*f)(tid, "EBX", vex->guest_EBX);
218 (*f)(tid, "ESI", vex->guest_ESI);
219 (*f)(tid, "EDI", vex->guest_EDI);
220 (*f)(tid, "ESP", vex->guest_ESP);
221 (*f)(tid, "EBP", vex->guest_EBP);
222 #elif defined(VGA_amd64)
223 (*f)(tid, "RAX", vex->guest_RAX);
224 (*f)(tid, "RCX", vex->guest_RCX);
225 (*f)(tid, "RDX", vex->guest_RDX);
226 (*f)(tid, "RBX", vex->guest_RBX);
227 (*f)(tid, "RSI", vex->guest_RSI);
228 (*f)(tid, "RDI", vex->guest_RDI);
229 (*f)(tid, "RSP", vex->guest_RSP);
230 (*f)(tid, "RBP", vex->guest_RBP);
231 (*f)(tid, "R8" , vex->guest_R8 );
232 (*f)(tid, "R9" , vex->guest_R9 );
233 (*f)(tid, "R10", vex->guest_R10);
234 (*f)(tid, "R11", vex->guest_R11);
235 (*f)(tid, "R12", vex->guest_R12);
236 (*f)(tid, "R13", vex->guest_R13);
237 (*f)(tid, "R14", vex->guest_R14);
238 (*f)(tid, "R15", vex->guest_R15);
239 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
240 (*f)(tid, "GPR0" , vex->guest_GPR0 );
241 (*f)(tid, "GPR1" , vex->guest_GPR1 );
242 (*f)(tid, "GPR2" , vex->guest_GPR2 );
243 (*f)(tid, "GPR3" , vex->guest_GPR3 );
244 (*f)(tid, "GPR4" , vex->guest_GPR4 );
245 (*f)(tid, "GPR5" , vex->guest_GPR5 );
246 (*f)(tid, "GPR6" , vex->guest_GPR6 );
247 (*f)(tid, "GPR7" , vex->guest_GPR7 );
248 (*f)(tid, "GPR8" , vex->guest_GPR8 );
249 (*f)(tid, "GPR9" , vex->guest_GPR9 );
250 (*f)(tid, "GPR10", vex->guest_GPR10);
251 (*f)(tid, "GPR11", vex->guest_GPR11);
252 (*f)(tid, "GPR12", vex->guest_GPR12);
253 (*f)(tid, "GPR13", vex->guest_GPR13);
254 (*f)(tid, "GPR14", vex->guest_GPR14);
255 (*f)(tid, "GPR15", vex->guest_GPR15);
256 (*f)(tid, "GPR16", vex->guest_GPR16);
257 (*f)(tid, "GPR17", vex->guest_GPR17);
258 (*f)(tid, "GPR18", vex->guest_GPR18);
259 (*f)(tid, "GPR19", vex->guest_GPR19);
260 (*f)(tid, "GPR20", vex->guest_GPR20);
261 (*f)(tid, "GPR21", vex->guest_GPR21);
262 (*f)(tid, "GPR22", vex->guest_GPR22);
263 (*f)(tid, "GPR23", vex->guest_GPR23);
264 (*f)(tid, "GPR24", vex->guest_GPR24);
265 (*f)(tid, "GPR25", vex->guest_GPR25);
266 (*f)(tid, "GPR26", vex->guest_GPR26);
267 (*f)(tid, "GPR27", vex->guest_GPR27);
268 (*f)(tid, "GPR28", vex->guest_GPR28);
269 (*f)(tid, "GPR29", vex->guest_GPR29);
270 (*f)(tid, "GPR30", vex->guest_GPR30);
271 (*f)(tid, "GPR31", vex->guest_GPR31);
272 (*f)(tid, "CTR" , vex->guest_CTR );
273 (*f)(tid, "LR" , vex->guest_LR );
274 #elif defined(VGA_arm)
275 (*f)(tid, "R0" , vex->guest_R0 );
276 (*f)(tid, "R1" , vex->guest_R1 );
277 (*f)(tid, "R2" , vex->guest_R2 );
278 (*f)(tid, "R3" , vex->guest_R3 );
279 (*f)(tid, "R4" , vex->guest_R4 );
280 (*f)(tid, "R5" , vex->guest_R5 );
281 (*f)(tid, "R6" , vex->guest_R6 );
282 (*f)(tid, "R8" , vex->guest_R8 );
283 (*f)(tid, "R9" , vex->guest_R9 );
284 (*f)(tid, "R10", vex->guest_R10);
285 (*f)(tid, "R11", vex->guest_R11);
286 (*f)(tid, "R12", vex->guest_R12);
287 (*f)(tid, "R13", vex->guest_R13);
288 (*f)(tid, "R14", vex->guest_R14);
289 #elif defined(VGA_s390x)
290 (*f)(tid, "r0" , vex->guest_r0 );
291 (*f)(tid, "r1" , vex->guest_r1 );
292 (*f)(tid, "r2" , vex->guest_r2 );
293 (*f)(tid, "r3" , vex->guest_r3 );
294 (*f)(tid, "r4" , vex->guest_r4 );
295 (*f)(tid, "r5" , vex->guest_r5 );
296 (*f)(tid, "r6" , vex->guest_r6 );
297 (*f)(tid, "r7" , vex->guest_r7 );
298 (*f)(tid, "r8" , vex->guest_r8 );
299 (*f)(tid, "r9" , vex->guest_r9 );
300 (*f)(tid, "r10", vex->guest_r10);
301 (*f)(tid, "r11", vex->guest_r11);
302 (*f)(tid, "r12", vex->guest_r12);
303 (*f)(tid, "r13", vex->guest_r13);
304 (*f)(tid, "r14", vex->guest_r14);
305 (*f)(tid, "r15", vex->guest_r15);
306 #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGP_nanomips_linux)
307 (*f)(tid, "r0" , vex->guest_r0 );
308 (*f)(tid, "r1" , vex->guest_r1 );
309 (*f)(tid, "r2" , vex->guest_r2 );
310 (*f)(tid, "r3" , vex->guest_r3 );
311 (*f)(tid, "r4" , vex->guest_r4 );
312 (*f)(tid, "r5" , vex->guest_r5 );
313 (*f)(tid, "r6" , vex->guest_r6 );
314 (*f)(tid, "r7" , vex->guest_r7 );
315 (*f)(tid, "r8" , vex->guest_r8 );
316 (*f)(tid, "r9" , vex->guest_r9 );
317 (*f)(tid, "r10", vex->guest_r10);
318 (*f)(tid, "r11", vex->guest_r11);
319 (*f)(tid, "r12", vex->guest_r12);
320 (*f)(tid, "r13", vex->guest_r13);
321 (*f)(tid, "r14", vex->guest_r14);
322 (*f)(tid, "r15", vex->guest_r15);
323 (*f)(tid, "r16", vex->guest_r16);
324 (*f)(tid, "r17", vex->guest_r17);
325 (*f)(tid, "r18", vex->guest_r18);
326 (*f)(tid, "r19", vex->guest_r19);
327 (*f)(tid, "r20", vex->guest_r20);
328 (*f)(tid, "r21", vex->guest_r21);
329 (*f)(tid, "r22", vex->guest_r22);
330 (*f)(tid, "r23", vex->guest_r23);
331 (*f)(tid, "r24", vex->guest_r24);
332 (*f)(tid, "r25", vex->guest_r25);
333 (*f)(tid, "r26", vex->guest_r26);
334 (*f)(tid, "r27", vex->guest_r27);
335 (*f)(tid, "r28", vex->guest_r28);
336 (*f)(tid, "r29", vex->guest_r29);
337 (*f)(tid, "r30", vex->guest_r30);
338 (*f)(tid, "r31", vex->guest_r31);
339 #elif defined(VGA_arm64)
340 (*f)(tid, "x0" , vex->guest_X0 );
341 (*f)(tid, "x1" , vex->guest_X1 );
342 (*f)(tid, "x2" , vex->guest_X2 );
343 (*f)(tid, "x3" , vex->guest_X3 );
344 (*f)(tid, "x4" , vex->guest_X4 );
345 (*f)(tid, "x5" , vex->guest_X5 );
346 (*f)(tid, "x6" , vex->guest_X6 );
347 (*f)(tid, "x7" , vex->guest_X7 );
348 (*f)(tid, "x8" , vex->guest_X8 );
349 (*f)(tid, "x9" , vex->guest_X9 );
350 (*f)(tid, "x10", vex->guest_X10);
351 (*f)(tid, "x11", vex->guest_X11);
352 (*f)(tid, "x12", vex->guest_X12);
353 (*f)(tid, "x13", vex->guest_X13);
354 (*f)(tid, "x14", vex->guest_X14);
355 (*f)(tid, "x15", vex->guest_X15);
356 (*f)(tid, "x16", vex->guest_X16);
357 (*f)(tid, "x17", vex->guest_X17);
358 (*f)(tid, "x18", vex->guest_X18);
359 (*f)(tid, "x19", vex->guest_X19);
360 (*f)(tid, "x20", vex->guest_X20);
361 (*f)(tid, "x21", vex->guest_X21);
362 (*f)(tid, "x22", vex->guest_X22);
363 (*f)(tid, "x23", vex->guest_X23);
364 (*f)(tid, "x24", vex->guest_X24);
365 (*f)(tid, "x25", vex->guest_X25);
366 (*f)(tid, "x26", vex->guest_X26);
367 (*f)(tid, "x27", vex->guest_X27);
368 (*f)(tid, "x28", vex->guest_X28);
369 (*f)(tid, "x29", vex->guest_X29);
370 (*f)(tid, "x30", vex->guest_X30);
371 #else
372 # error Unknown arch
373 #endif
377 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
379 ThreadId tid;
381 for (tid = 1; tid < VG_N_THREADS; tid++) {
382 if (VG_(is_valid_tid)(tid)
383 || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
384 // live thread or thread instructed to die by another thread that
385 // called exit.
386 apply_to_GPs_of_tid(tid, f);
391 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
393 *tid = (ThreadId)(-1);
396 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
397 /*OUT*/Addr* stack_min,
398 /*OUT*/Addr* stack_max)
400 ThreadId i;
401 for (i = (*tid)+1; i < VG_N_THREADS; i++) {
402 if (i == VG_INVALID_THREADID)
403 continue;
404 if (VG_(threads)[i].status != VgTs_Empty) {
405 *tid = i;
406 *stack_min = VG_(get_SP)(i);
407 *stack_max = VG_(threads)[i].client_stack_highest_byte;
408 return True;
411 return False;
414 Addr VG_(thread_get_stack_max)(ThreadId tid)
416 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
417 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
418 return VG_(threads)[tid].client_stack_highest_byte;
421 SizeT VG_(thread_get_stack_size)(ThreadId tid)
423 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
424 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
425 return VG_(threads)[tid].client_stack_szB;
428 Addr VG_(thread_get_altstack_min)(ThreadId tid)
430 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
431 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
432 return (Addr)VG_(threads)[tid].altstack.ss_sp;
435 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
437 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
438 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
439 return VG_(threads)[tid].altstack.ss_size;
442 //-------------------------------------------------------------
443 /* Details about the capabilities of the underlying (host) CPU. These
444 details are acquired by (1) enquiring with the CPU at startup, or
445 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
446 line size). It's a bit nasty in the sense that there's no obvious
447 way to stop uses of some of this info before it's ready to go.
448 See pub_core_machine.h for more information about that.
450 VG_(machine_get_hwcaps) may use signals (although it attempts to
451 leave signal state unchanged) and therefore should only be
452 called before m_main sets up the client's signal state.
455 /* --------- State --------- */
456 static Bool hwcaps_done = False;
458 /* --- all archs --- */
459 static VexArch va = VexArch_INVALID;
460 static VexArchInfo vai;
462 #if defined(VGA_x86)
463 UInt VG_(machine_x86_have_mxcsr) = 0;
464 #endif
465 #if defined(VGA_ppc32)
466 UInt VG_(machine_ppc32_has_FP) = 0;
467 UInt VG_(machine_ppc32_has_VMX) = 0;
468 #endif
469 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
470 ULong VG_(machine_ppc64_has_VMX) = 0;
471 #endif
472 #if defined(VGA_arm)
473 Int VG_(machine_arm_archlevel) = 4;
474 #endif
477 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
478 testing, so we need a VG_MINIMAL_JMP_BUF. */
479 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
480 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) \
481 || defined(VGA_mips64) || defined(VGA_arm64)
482 #include "pub_core_libcsetjmp.h"
483 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
484 static void handler_unsup_insn ( Int x ) {
485 VG_MINIMAL_LONGJMP(env_unsup_insn);
487 #endif
490 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
491 * handlers are installed. Determines the sizes affected by dcbz
492 * and dcbzl instructions and updates the given VexArchInfo structure
493 * accordingly.
495 * Not very defensive: assumes that as long as the dcbz/dcbzl
496 * instructions don't raise a SIGILL, that they will zero an aligned,
497 * contiguous block of memory of a sensible size. */
498 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
499 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
501 Int dcbz_szB = 0;
502 Int dcbzl_szB;
503 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
504 char test_block[4*MAX_DCBZL_SZB];
505 char *aligned = test_block;
506 Int i;
508 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
509 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
510 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
512 /* dcbz often clears 32B, although sometimes whatever the native cache
513 * block size is */
514 VG_(memset)(test_block, 0xff, sizeof(test_block));
515 __asm__ __volatile__("dcbz 0,%0"
516 : /*out*/
517 : "r" (aligned) /*in*/
518 : "memory" /*clobber*/);
519 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
520 if (!test_block[i])
521 ++dcbz_szB;
523 vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
525 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
526 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
527 dcbzl_szB = 0; /* indicates unsupported */
529 else {
530 VG_(memset)(test_block, 0xff, sizeof(test_block));
531 /* some older assemblers won't understand the dcbzl instruction
532 * variant, so we directly emit the instruction ourselves */
533 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
534 : /*out*/
535 : "r" (aligned) /*in*/
536 : "memory", "r9" /*clobber*/);
537 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
538 if (!test_block[i])
539 ++dcbzl_szB;
541 vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
544 arch_info->ppc_dcbz_szB = dcbz_szB;
545 arch_info->ppc_dcbzl_szB = dcbzl_szB;
547 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
548 dcbz_szB, dcbzl_szB);
549 # undef MAX_DCBZL_SZB
551 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
553 #ifdef VGA_s390x
555 /* Read /proc/cpuinfo. Look for lines like these
557 processor 0: version = FF, identification = 0117C9, machine = 2064
559 and return the machine model. If the machine model could not be determined
560 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
562 static UInt VG_(get_machine_model)(void)
564 static struct model_map {
565 const HChar name[5];
566 UInt id;
567 } model_map[] = {
568 { "2064", VEX_S390X_MODEL_Z900 },
569 { "2066", VEX_S390X_MODEL_Z800 },
570 { "2084", VEX_S390X_MODEL_Z990 },
571 { "2086", VEX_S390X_MODEL_Z890 },
572 { "2094", VEX_S390X_MODEL_Z9_EC },
573 { "2096", VEX_S390X_MODEL_Z9_BC },
574 { "2097", VEX_S390X_MODEL_Z10_EC },
575 { "2098", VEX_S390X_MODEL_Z10_BC },
576 { "2817", VEX_S390X_MODEL_Z196 },
577 { "2818", VEX_S390X_MODEL_Z114 },
578 { "2827", VEX_S390X_MODEL_ZEC12 },
579 { "2828", VEX_S390X_MODEL_ZBC12 },
580 { "2964", VEX_S390X_MODEL_Z13 },
581 { "2965", VEX_S390X_MODEL_Z13S },
582 { "3906", VEX_S390X_MODEL_Z14 },
583 { "3907", VEX_S390X_MODEL_Z14_ZR1 },
584 { "8561", VEX_S390X_MODEL_Z15 },
585 { "8562", VEX_S390X_MODEL_Z15 },
588 Int model, n, fh;
589 SysRes fd;
590 SizeT num_bytes, file_buf_size;
591 HChar *p, *m, *model_name, *file_buf;
593 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
594 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
595 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
597 fh = sr_Res(fd);
599 /* Determine the size of /proc/cpuinfo.
600 Work around broken-ness in /proc file system implementation.
601 fstat returns a zero size for /proc/cpuinfo although it is
602 claimed to be a regular file. */
603 num_bytes = 0;
604 file_buf_size = 1000;
605 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
606 while (42) {
607 n = VG_(read)(fh, file_buf, file_buf_size);
608 if (n < 0) break;
610 num_bytes += n;
611 if (n < file_buf_size) break; /* reached EOF */
614 if (n < 0) num_bytes = 0; /* read error; ignore contents */
616 if (num_bytes > file_buf_size) {
617 VG_(free)( file_buf );
618 VG_(lseek)( fh, 0, VKI_SEEK_SET );
619 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
620 n = VG_(read)( fh, file_buf, num_bytes );
621 if (n < 0) num_bytes = 0;
624 file_buf[num_bytes] = '\0';
625 VG_(close)(fh);
627 /* Parse file */
628 model = VEX_S390X_MODEL_UNKNOWN;
629 for (p = file_buf; *p; ++p) {
630 /* Beginning of line */
631 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
633 m = VG_(strstr)( p, "machine" );
634 if (m == NULL) continue;
636 p = m + sizeof "machine" - 1;
637 while ( VG_(isspace)( *p ) || *p == '=') {
638 if (*p == '\n') goto next_line;
639 ++p;
642 model_name = p;
643 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
644 struct model_map *mm = model_map + n;
645 SizeT len = VG_(strlen)( mm->name );
646 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
647 VG_(isspace)( model_name[len] )) {
648 if (mm->id < model) model = mm->id;
649 p = model_name + len;
650 break;
653 /* Skip until end-of-line */
654 while (*p != '\n')
655 ++p;
656 next_line: ;
659 VG_(free)( file_buf );
660 VG_(debugLog)(1, "machine", "model = %s\n",
661 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
662 : model_map[model].name);
663 return model;
666 #endif /* defined(VGA_s390x) */
668 #if defined(VGA_mips32) || defined(VGA_mips64)
671 * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not
672 * determine what CPU it is (it searches only for the models that are or may be
673 * supported by Valgrind).
675 static Bool VG_(parse_cpuinfo)(void)
677 const char *search_Broadcom_str = "cpu model\t\t: Broadcom";
678 const char *search_Cavium_str= "cpu model\t\t: Cavium";
679 const char *search_Ingenic_str= "cpu model\t\t: Ingenic";
680 const char *search_Loongson_str= "cpu model\t\t: ICT Loongson";
681 const char *search_MIPS_str = "cpu model\t\t: MIPS";
682 const char *search_Netlogic_str = "cpu model\t\t: Netlogic";
684 Int n, fh;
685 SysRes fd;
686 SizeT num_bytes, file_buf_size;
687 HChar *file_buf, *isa;
689 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
690 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
691 if ( sr_isError(fd) ) return False;
693 fh = sr_Res(fd);
695 /* Determine the size of /proc/cpuinfo.
696 Work around broken-ness in /proc file system implementation.
697 fstat returns a zero size for /proc/cpuinfo although it is
698 claimed to be a regular file. */
699 num_bytes = 0;
700 file_buf_size = 1000;
701 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
702 while (42) {
703 n = VG_(read)(fh, file_buf, file_buf_size);
704 if (n < 0) break;
706 num_bytes += n;
707 if (n < file_buf_size) break; /* reached EOF */
710 if (n < 0) num_bytes = 0; /* read error; ignore contents */
712 if (num_bytes > file_buf_size) {
713 VG_(free)( file_buf );
714 VG_(lseek)( fh, 0, VKI_SEEK_SET );
715 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
716 n = VG_(read)( fh, file_buf, num_bytes );
717 if (n < 0) num_bytes = 0;
720 file_buf[num_bytes] = '\0';
721 VG_(close)(fh);
723 /* Parse file */
724 if (VG_(strstr)(file_buf, search_Broadcom_str) != NULL)
725 vai.hwcaps = VEX_PRID_COMP_BROADCOM;
726 else if (VG_(strstr)(file_buf, search_Netlogic_str) != NULL)
727 vai.hwcaps = VEX_PRID_COMP_NETLOGIC;
728 else if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
729 vai.hwcaps = VEX_PRID_COMP_CAVIUM;
730 else if (VG_(strstr)(file_buf, search_MIPS_str) != NULL)
731 vai.hwcaps = VEX_PRID_COMP_MIPS;
732 else if (VG_(strstr)(file_buf, search_Ingenic_str) != NULL)
733 vai.hwcaps = VEX_PRID_COMP_INGENIC_E1;
734 else if (VG_(strstr)(file_buf, search_Loongson_str) != NULL)
735 vai.hwcaps = (VEX_PRID_COMP_LEGACY | VEX_PRID_IMP_LOONGSON_64);
736 else {
737 /* Did not find string in the proc file. */
738 vai.hwcaps = 0;
739 VG_(free)(file_buf);
740 return False;
743 isa = VG_(strstr)(file_buf, "isa\t\t\t: ");
745 if (NULL != isa) {
746 if (VG_(strstr) (isa, "mips32r1") != NULL)
747 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
748 if (VG_(strstr) (isa, "mips32r2") != NULL)
749 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
750 if (VG_(strstr) (isa, "mips32r6") != NULL)
751 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R6;
752 if (VG_(strstr) (isa, "mips64r1") != NULL)
753 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R1;
754 if (VG_(strstr) (isa, "mips64r2") != NULL)
755 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2;
756 if (VG_(strstr) (isa, "mips64r6") != NULL)
757 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R6;
760 * TODO(petarj): Remove this Cavium workaround once Linux kernel folks
761 * decide to change incorrect settings in
762 * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h.
763 * The current settings show mips32r1, mips32r2 and mips64r1 as
764 * unsupported ISAs by Cavium MIPS CPUs.
766 if (VEX_MIPS_COMP_ID(vai.hwcaps) == VEX_PRID_COMP_CAVIUM) {
767 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1 | VEX_MIPS_CPU_ISA_M32R2 |
768 VEX_MIPS_CPU_ISA_M64R1;
770 } else {
772 * Kernel does not provide information about supported ISAs.
773 * Populate the isa level flags based on the CPU model. That is our
774 * best guess.
776 switch VEX_MIPS_COMP_ID(vai.hwcaps) {
777 case VEX_PRID_COMP_CAVIUM:
778 case VEX_PRID_COMP_NETLOGIC:
779 vai.hwcaps |= (VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1);
780 /* fallthrough */
781 case VEX_PRID_COMP_INGENIC_E1:
782 case VEX_PRID_COMP_MIPS:
783 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
784 /* fallthrough */
785 case VEX_PRID_COMP_BROADCOM:
786 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
787 break;
788 case VEX_PRID_COMP_LEGACY:
789 if ((VEX_MIPS_PROC_ID(vai.hwcaps) == VEX_PRID_IMP_LOONGSON_64))
790 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1 |
791 VEX_MIPS_CPU_ISA_M32R2 | VEX_MIPS_CPU_ISA_M32R1;
792 break;
793 default:
794 break;
797 VG_(free)(file_buf);
798 return True;
801 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */
803 #if defined(VGP_arm64_linux)
805 /* Check to see whether we are running on a Cavium core, and if so auto-enable
806 the fallback LLSC implementation. See #369459. */
808 static Bool VG_(parse_cpuinfo)(void)
810 const char *search_Cavium_str = "CPU implementer\t: 0x43";
812 Int n, fh;
813 SysRes fd;
814 SizeT num_bytes, file_buf_size;
815 HChar *file_buf;
817 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
818 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
819 if ( sr_isError(fd) ) return False;
821 fh = sr_Res(fd);
823 /* Determine the size of /proc/cpuinfo.
824 Work around broken-ness in /proc file system implementation.
825 fstat returns a zero size for /proc/cpuinfo although it is
826 claimed to be a regular file. */
827 num_bytes = 0;
828 file_buf_size = 1000;
829 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
830 while (42) {
831 n = VG_(read)(fh, file_buf, file_buf_size);
832 if (n < 0) break;
834 num_bytes += n;
835 if (n < file_buf_size) break; /* reached EOF */
838 if (n < 0) num_bytes = 0; /* read error; ignore contents */
840 if (num_bytes > file_buf_size) {
841 VG_(free)( file_buf );
842 VG_(lseek)( fh, 0, VKI_SEEK_SET );
843 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
844 n = VG_(read)( fh, file_buf, num_bytes );
845 if (n < 0) num_bytes = 0;
848 file_buf[num_bytes] = '\0';
849 VG_(close)(fh);
851 /* Parse file */
852 if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
853 vai.arm64_requires_fallback_LLSC = True;
855 VG_(free)(file_buf);
856 return True;
859 #endif /* defined(VGP_arm64_linux) */
861 Bool VG_(machine_get_hwcaps)( void )
863 vg_assert(hwcaps_done == False);
864 hwcaps_done = True;
866 // Whack default settings into vai, so that we only need to fill in
867 // any interesting bits.
868 LibVEX_default_VexArchInfo(&vai);
870 #if defined(VGA_x86)
871 { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
872 UInt eax, ebx, ecx, edx, max_extended;
873 HChar vstr[13];
874 vstr[0] = 0;
876 if (!VG_(has_cpuid)())
877 /* we can't do cpuid at all. Give up. */
878 return False;
880 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
881 if (eax < 1)
882 /* we can't ask for cpuid(x) for x > 0. Give up. */
883 return False;
885 /* Get processor ID string, and max basic/extended index
886 values. */
887 VG_(memcpy)(&vstr[0], &ebx, 4);
888 VG_(memcpy)(&vstr[4], &edx, 4);
889 VG_(memcpy)(&vstr[8], &ecx, 4);
890 vstr[12] = 0;
892 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
893 max_extended = eax;
895 /* get capabilities bits into edx */
896 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
898 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
899 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
900 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
902 /* cmpxchg8b is a minimum requirement now; if we don't have it we
903 must simply give up. But all CPUs since Pentium-I have it, so
904 that doesn't seem like much of a restriction. */
905 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
906 if (!have_cx8)
907 return False;
909 /* Figure out if this is an AMD that can do MMXEXT. */
910 have_mmxext = False;
911 if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
912 && max_extended >= 0x80000001) {
913 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
914 /* Some older AMD processors support a sse1 subset (Integer SSE). */
915 have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
918 /* Figure out if this is an AMD or Intel that can do LZCNT. */
919 have_lzcnt = False;
920 if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
921 || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
922 && max_extended >= 0x80000001) {
923 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
924 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
927 /* Intel processors don't define the mmxext extension, but since it
928 is just a sse1 subset always define it when we have sse1. */
929 if (have_sse1)
930 have_mmxext = True;
932 va = VexArchX86;
933 vai.endness = VexEndnessLE;
935 if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
936 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
937 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
938 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
939 vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
940 if (have_lzcnt)
941 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
942 VG_(machine_x86_have_mxcsr) = 1;
943 } else if (have_sse2 && have_sse1 && have_mmxext) {
944 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
945 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
946 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
947 if (have_lzcnt)
948 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
949 VG_(machine_x86_have_mxcsr) = 1;
950 } else if (have_sse1 && have_mmxext) {
951 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
952 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
953 VG_(machine_x86_have_mxcsr) = 1;
954 } else if (have_mmxext) {
955 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
956 VG_(machine_x86_have_mxcsr) = 0;
957 } else {
958 vai.hwcaps = 0; /*baseline - no sse at all*/
959 VG_(machine_x86_have_mxcsr) = 0;
962 VG_(machine_get_cache_info)(&vai);
964 return True;
967 #elif defined(VGA_amd64)
968 { Bool have_sse3, have_ssse3, have_cx8, have_cx16;
969 Bool have_lzcnt, have_avx, have_bmi, have_avx2;
970 Bool have_rdtscp, have_rdrand, have_f16c, have_rdseed;
971 UInt eax, ebx, ecx, edx, max_basic, max_extended;
972 ULong xgetbv_0 = 0;
973 HChar vstr[13];
974 vstr[0] = 0;
976 have_sse3 = have_ssse3 = have_cx8 = have_cx16
977 = have_lzcnt = have_avx = have_bmi = have_avx2
978 = have_rdtscp = have_rdrand = have_f16c = have_rdseed = False;
980 eax = ebx = ecx = edx = max_basic = max_extended = 0;
982 if (!VG_(has_cpuid)())
983 /* we can't do cpuid at all. Give up. */
984 return False;
986 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
987 max_basic = eax;
988 if (max_basic < 1)
989 /* we can't ask for cpuid(x) for x > 0. Give up. */
990 return False;
992 /* Get processor ID string, and max basic/extended index
993 values. */
994 VG_(memcpy)(&vstr[0], &ebx, 4);
995 VG_(memcpy)(&vstr[4], &edx, 4);
996 VG_(memcpy)(&vstr[8], &ecx, 4);
997 vstr[12] = 0;
999 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
1000 max_extended = eax;
1002 /* get capabilities bits into edx */
1003 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
1005 // we assume that SSE1 and SSE2 are available by default
1006 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
1007 have_ssse3 = (ecx & (1<<9)) != 0; /* True => have Sup SSE3 insns */
1008 // fma is ecx:12
1009 // sse41 is ecx:19
1010 // sse42 is ecx:20
1011 // xsave is ecx:26
1012 // osxsave is ecx:27
1013 // avx is ecx:28
1014 have_f16c = (ecx & (1<<29)) != 0; /* True => have F16C insns */
1015 have_rdrand = (ecx & (1<<30)) != 0; /* True => have RDRAND insns */
1017 have_avx = False;
1018 /* have_fma = False; */
1019 if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
1020 /* Processor supports AVX instructions and XGETBV is enabled
1021 by OS and AVX instructions are enabled by the OS. */
1022 ULong w;
1023 __asm__ __volatile__("movq $0,%%rcx ; "
1024 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
1025 "movq %%rax,%0"
1026 :/*OUT*/"=r"(w) :/*IN*/
1027 :/*TRASH*/"rdx","rcx","rax");
1028 xgetbv_0 = w;
1029 if ((xgetbv_0 & 7) == 7) {
1030 /* Only say we have AVX if the XSAVE-allowable
1031 bitfield-mask allows x87, SSE and AVX state. We could
1032 actually run with a more restrictive XGETBV(0) value,
1033 but VEX's implementation of XSAVE and XRSTOR assumes
1034 that all 3 bits are enabled.
1036 Also, the VEX implementation of XSAVE/XRSTOR assumes that
1037 state component [2] (the YMM high halves) are located in
1038 the XSAVE image at offsets 576 .. 831. So we have to
1039 check that here before declaring AVX to be supported. */
1040 UInt eax2, ebx2, ecx2, edx2;
1041 VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2);
1042 if (ebx2 == 576 && eax2 == 256) {
1043 have_avx = True;
1045 /* have_fma = (ecx & (1<<12)) != 0; */
1046 /* have_fma: Probably correct, but gcc complains due to
1047 unusedness. */
1051 /* cmpxchg8b is a minimum requirement now; if we don't have it we
1052 must simply give up. But all CPUs since Pentium-I have it, so
1053 that doesn't seem like much of a restriction. */
1054 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
1055 if (!have_cx8)
1056 return False;
1058 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
1059 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
1061 /* Figure out if this CPU can do LZCNT. */
1062 have_lzcnt = False;
1063 if (max_extended >= 0x80000001) {
1064 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1065 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
1068 /* Can we do RDTSCP? */
1069 have_rdtscp = False;
1070 if (max_extended >= 0x80000001) {
1071 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1072 have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
1075 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
1076 have_bmi = False;
1077 have_avx2 = False;
1078 if (have_avx && max_basic >= 7) {
1079 VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
1080 have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
1081 have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
1082 have_rdseed = (ebx & (1<<18)) != 0; /* True => have RDSEED insns */
1085 /* Sanity check for RDRAND and F16C. These don't actually *need* AVX, but
1086 it's convenient to restrict them to the AVX case since the simulated
1087 CPUID we'll offer them on has AVX as a base. */
1088 if (!have_avx) {
1089 have_f16c = False;
1090 have_rdrand = False;
1091 have_rdseed = False;
1094 va = VexArchAMD64;
1095 vai.endness = VexEndnessLE;
1096 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
1097 | (have_ssse3 ? VEX_HWCAPS_AMD64_SSSE3 : 0)
1098 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
1099 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
1100 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0)
1101 | (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0)
1102 | (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0)
1103 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0)
1104 | (have_f16c ? VEX_HWCAPS_AMD64_F16C : 0)
1105 | (have_rdrand ? VEX_HWCAPS_AMD64_RDRAND : 0)
1106 | (have_rdseed ? VEX_HWCAPS_AMD64_RDSEED : 0);
1108 VG_(machine_get_cache_info)(&vai);
1110 return True;
1113 #elif defined(VGA_ppc32)
1115 /* Find out which subset of the ppc32 instruction set is supported by
1116 verifying whether various ppc32 instructions generate a SIGILL
1117 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1118 AT_PLATFORM entries in the ELF auxiliary table -- see also
1119 the_iifii.client_auxv in m_main.c.
1121 vki_sigset_t saved_set, tmp_set;
1122 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1123 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1125 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1126 volatile Bool have_isa_2_07, have_isa_3_0;
1127 Int r;
1129 /* This is a kludge. Really we ought to back-convert saved_act
1130 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1131 since that's a no-op on all ppc32 platforms so far supported,
1132 it's not worth the typing effort. At least include most basic
1133 sanity check: */
1134 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1136 VG_(sigemptyset)(&tmp_set);
1137 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1138 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1140 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1141 vg_assert(r == 0);
1143 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1144 vg_assert(r == 0);
1145 tmp_sigill_act = saved_sigill_act;
1147 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1148 vg_assert(r == 0);
1149 tmp_sigfpe_act = saved_sigfpe_act;
1151 /* NODEFER: signal handler does not return (from the kernel's point of
1152 view), hence if it is to successfully catch a signal more than once,
1153 we need the NODEFER flag. */
1154 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1155 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1156 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1157 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1158 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1159 vg_assert(r == 0);
1161 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1162 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1163 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1164 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1165 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1166 vg_assert(r == 0);
1168 /* standard FP insns */
1169 have_F = True;
1170 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1171 have_F = False;
1172 } else {
1173 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
1176 /* Altivec insns */
1177 have_V = True;
1178 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1179 have_V = False;
1180 } else {
1181 /* Unfortunately some older assemblers don't speak Altivec (or
1182 choose not to), so to be safe we directly emit the 32-bit
1183 word corresponding to "vor 0,0,0". This fixes a build
1184 problem that happens on Debian 3.1 (ppc32), and probably
1185 various other places. */
1186 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1189 /* General-Purpose optional (fsqrt, fsqrts) */
1190 have_FX = True;
1191 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1192 have_FX = False;
1193 } else {
1194 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1197 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1198 have_GX = True;
1199 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1200 have_GX = False;
1201 } else {
1202 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1205 /* VSX support implies Power ISA 2.06 */
1206 have_VX = True;
1207 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1208 have_VX = False;
1209 } else {
1210 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1213 /* Check for Decimal Floating Point (DFP) support. */
1214 have_DFP = True;
1215 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1216 have_DFP = False;
1217 } else {
1218 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1221 /* Check for ISA 2.07 support. */
1222 have_isa_2_07 = True;
1223 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1224 have_isa_2_07 = False;
1225 } else {
1226 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1229 /* Check for ISA 3.0 support. */
1230 have_isa_3_0 = True;
1231 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1232 have_isa_3_0 = False;
1233 } else {
1234 __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1237 // ISA 3.1 not supported on 32-bit systems
1239 /* determine dcbz/dcbzl sizes while we still have the signal
1240 * handlers registered */
1241 find_ppc_dcbz_sz(&vai);
1243 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1244 vg_assert(r == 0);
1245 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1246 vg_assert(r == 0);
1247 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1248 vg_assert(r == 0);
1249 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1250 (Int)have_F, (Int)have_V, (Int)have_FX,
1251 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1252 (Int)have_isa_2_07, (Int)have_isa_3_0);
1253 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1254 if (have_V && !have_F)
1255 have_V = False;
1256 if (have_FX && !have_F)
1257 have_FX = False;
1258 if (have_GX && !have_F)
1259 have_GX = False;
1261 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0;
1262 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1264 va = VexArchPPC32;
1265 vai.endness = VexEndnessBE;
1267 vai.hwcaps = 0;
1268 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1269 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1270 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1271 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1272 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1273 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1274 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1275 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA3_0;
1276 /* ISA 3.1 not supported on 32-bit systems. */
1278 VG_(machine_get_cache_info)(&vai);
1280 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1281 called before we're ready to go. */
1282 return True;
1285 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1287 /* Same instruction set detection algorithm as for ppc32. */
1288 vki_sigset_t saved_set, tmp_set;
1289 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1290 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1292 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1293 volatile Bool have_isa_2_07, have_isa_3_0, have_isa_3_1;
1294 Int r;
1296 /* This is a kludge. Really we ought to back-convert saved_act
1297 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1298 since that's a no-op on all ppc64 platforms so far supported,
1299 it's not worth the typing effort. At least include most basic
1300 sanity check: */
1301 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1303 VG_(sigemptyset)(&tmp_set);
1304 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1305 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1307 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1308 vg_assert(r == 0);
1310 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1311 vg_assert(r == 0);
1312 tmp_sigill_act = saved_sigill_act;
1314 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1315 tmp_sigfpe_act = saved_sigfpe_act;
1317 /* NODEFER: signal handler does not return (from the kernel's point of
1318 view), hence if it is to successfully catch a signal more than once,
1319 we need the NODEFER flag. */
1320 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1321 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1322 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1323 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1324 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1326 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1327 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1328 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1329 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1330 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1332 /* standard FP insns */
1333 have_F = True;
1334 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1335 have_F = False;
1336 } else {
1337 __asm__ __volatile__("fmr 0,0");
1340 /* Altivec insns */
1341 have_V = True;
1342 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1343 have_V = False;
1344 } else {
1345 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1348 /* General-Purpose optional (fsqrt, fsqrts) */
1349 have_FX = True;
1350 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1351 have_FX = False;
1352 } else {
1353 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1356 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1357 have_GX = True;
1358 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1359 have_GX = False;
1360 } else {
1361 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1364 /* VSX support implies Power ISA 2.06 */
1365 have_VX = True;
1366 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1367 have_VX = False;
1368 } else {
1369 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1372 /* Check for Decimal Floating Point (DFP) support. */
1373 have_DFP = True;
1374 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1375 have_DFP = False;
1376 } else {
1377 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1380 /* Check for ISA 2.07 support. */
1381 have_isa_2_07 = True;
1382 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1383 have_isa_2_07 = False;
1384 } else {
1385 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1388 /* Check for ISA 3.0 support. */
1389 have_isa_3_0 = True;
1390 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1391 have_isa_3_0 = False;
1392 } else {
1393 __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1396 /* Check for ISA 3.1 support. */
1397 have_isa_3_1 = True;
1398 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1399 have_isa_3_1 = False;
1400 } else {
1401 __asm__ __volatile__(".long 0x7f1401b6"); /* brh RA, RS */
1404 /* determine dcbz/dcbzl sizes while we still have the signal
1405 * handlers registered */
1406 find_ppc_dcbz_sz(&vai);
1408 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1409 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1410 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1411 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d ISA3.1 %d\n",
1412 (Int)have_F, (Int)have_V, (Int)have_FX,
1413 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1414 (Int)have_isa_2_07, (int)have_isa_3_0, (int)have_isa_3_1);
1415 /* on ppc64be, if we don't even have FP, just give up. */
1416 if (!have_F)
1417 return False;
1419 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1421 va = VexArchPPC64;
1422 # if defined(VKI_LITTLE_ENDIAN)
1423 vai.endness = VexEndnessLE;
1424 # elif defined(VKI_BIG_ENDIAN)
1425 vai.endness = VexEndnessBE;
1426 # else
1427 vai.endness = VexEndness_INVALID;
1428 # endif
1430 vai.hwcaps = 0;
1431 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1432 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1433 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1434 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1435 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1436 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1437 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_0;
1438 if (have_isa_3_1) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_1;
1440 VG_(machine_get_cache_info)(&vai);
1442 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1443 called before we're ready to go. */
1444 return True;
1447 #elif defined(VGA_s390x)
1449 # include "libvex_s390x_common.h"
1452 /* Instruction set detection code borrowed from ppc above. */
1453 vki_sigset_t saved_set, tmp_set;
1454 vki_sigaction_fromK_t saved_sigill_act;
1455 vki_sigaction_toK_t tmp_sigill_act;
1457 volatile Bool have_LDISP, have_STFLE;
1458 Int i, r, model;
1460 /* If the model is "unknown" don't treat this as an error. Assume
1461 this is a brand-new machine model for which we don't have the
1462 identification yet. Keeping fingers crossed. */
1463 model = VG_(get_machine_model)();
1465 /* Unblock SIGILL and stash away the old action for that signal */
1466 VG_(sigemptyset)(&tmp_set);
1467 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1469 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1470 vg_assert(r == 0);
1472 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1473 vg_assert(r == 0);
1474 tmp_sigill_act = saved_sigill_act;
1476 /* NODEFER: signal handler does not return (from the kernel's point of
1477 view), hence if it is to successfully catch a signal more than once,
1478 we need the NODEFER flag. */
1479 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1480 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1481 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1482 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1483 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1485 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1486 is not supported on z900. */
1488 have_LDISP = True;
1489 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1490 have_LDISP = False;
1491 } else {
1492 /* BASR loads the address of the next insn into r1. Needed to avoid
1493 a segfault in XY. */
1494 __asm__ __volatile__("basr %%r1,%%r0\n\t"
1495 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1496 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1499 /* Check availability of STFLE. If available store facility bits
1500 in hoststfle. */
1501 ULong hoststfle[S390_NUM_FACILITY_DW];
1503 for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1504 hoststfle[i] = 0;
1506 have_STFLE = True;
1507 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1508 have_STFLE = False;
1509 } else {
1510 register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1512 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */
1513 : "=m" (hoststfle), "+d"(reg0)
1514 : : "cc", "memory");
1517 /* Restore signals */
1518 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1519 vg_assert(r == 0);
1520 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1521 vg_assert(r == 0);
1522 va = VexArchS390X;
1523 vai.endness = VexEndnessBE;
1525 vai.hwcaps = model;
1526 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1527 if (have_LDISP) {
1528 /* Use long displacement only on machines >= z990. For all other
1529 machines it is millicoded and therefore slow. */
1530 if (model >= VEX_S390X_MODEL_Z990)
1531 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1534 /* Detect presence of certain facilities using the STFLE insn.
1535 Note, that these facilities were introduced at the same time or later
1536 as STFLE, so the absence of STLFE implies the absence of the facility
1537 we're trying to detect. */
1538 struct fac_hwcaps_map {
1539 UInt installed;
1540 UInt facility_bit;
1541 UInt hwcaps_bit;
1542 const HChar name[6]; // may need adjustment for new facility names
1543 } fac_hwcaps[] = {
1544 { False, S390_FAC_EIMM, VEX_HWCAPS_S390X_EIMM, "EIMM" },
1545 { False, S390_FAC_GIE, VEX_HWCAPS_S390X_GIE, "GIE" },
1546 { False, S390_FAC_DFP, VEX_HWCAPS_S390X_DFP, "DFP" },
1547 { False, S390_FAC_FPSE, VEX_HWCAPS_S390X_FGX, "FGX" },
1548 { False, S390_FAC_ETF2, VEX_HWCAPS_S390X_ETF2, "ETF2" },
1549 { False, S390_FAC_ETF3, VEX_HWCAPS_S390X_ETF3, "ETF3" },
1550 { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1551 { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1552 { False, S390_FAC_LSC, VEX_HWCAPS_S390X_LSC, "LSC" },
1553 { False, S390_FAC_PFPO, VEX_HWCAPS_S390X_PFPO, "PFPO" },
1554 { False, S390_FAC_VX, VEX_HWCAPS_S390X_VX, "VX" },
1555 { False, S390_FAC_MSA5, VEX_HWCAPS_S390X_MSA5, "MSA5" },
1556 { False, S390_FAC_MI2, VEX_HWCAPS_S390X_MI2, "MI2" },
1557 { False, S390_FAC_LSC2, VEX_HWCAPS_S390X_LSC2, "LSC2" },
1558 { False, S390_FAC_VXE, VEX_HWCAPS_S390X_VXE, "VXE" },
1561 /* Set hwcaps according to the detected facilities */
1562 UChar dw_number = 0;
1563 UChar fac_bit = 0;
1564 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1565 vg_assert(fac_hwcaps[i].facility_bit <= 191); // for now
1566 dw_number = fac_hwcaps[i].facility_bit / 64;
1567 fac_bit = fac_hwcaps[i].facility_bit % 64;
1568 if (hoststfle[dw_number] & (1ULL << (63 - fac_bit))) {
1569 fac_hwcaps[i].installed = True;
1570 vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1574 /* Build up a string showing the probed-for facilities */
1575 HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1576 (sizeof fac_hwcaps[0].name + 3) + // %s %d
1577 7 + 1 + 4 + 2 // machine %4d
1578 + 1]; // \0
1579 HChar *p = fac_str;
1580 p += VG_(sprintf)(p, "machine %4d ", model);
1581 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1582 p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name,
1583 fac_hwcaps[i].installed);
1585 *p++ = '\0';
1587 VG_(debugLog)(1, "machine", "%s\n", fac_str);
1588 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1590 VG_(machine_get_cache_info)(&vai);
1592 return True;
1595 #elif defined(VGA_arm)
1597 /* Same instruction set detection algorithm as for ppc32. */
1598 vki_sigset_t saved_set, tmp_set;
1599 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1600 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1602 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON, have_V8;
1603 volatile Int archlevel;
1604 Int r;
1606 /* This is a kludge. Really we ought to back-convert saved_act
1607 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1608 since that's a no-op on all ppc64 platforms so far supported,
1609 it's not worth the typing effort. At least include most basic
1610 sanity check: */
1611 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1613 VG_(sigemptyset)(&tmp_set);
1614 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1615 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1617 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1618 vg_assert(r == 0);
1620 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1621 vg_assert(r == 0);
1622 tmp_sigill_act = saved_sigill_act;
1624 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1625 tmp_sigfpe_act = saved_sigfpe_act;
1627 /* NODEFER: signal handler does not return (from the kernel's point of
1628 view), hence if it is to successfully catch a signal more than once,
1629 we need the NODEFER flag. */
1630 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1631 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1632 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1633 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1634 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1636 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1637 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1638 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1639 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1640 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1642 /* VFP insns */
1643 have_VFP = True;
1644 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1645 have_VFP = False;
1646 } else {
1647 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1649 /* There are several generation of VFP extension but they differs very
1650 little so for now we will not distinguish them. */
1651 have_VFP2 = have_VFP;
1652 have_VFP3 = have_VFP;
1654 /* NEON insns */
1655 have_NEON = True;
1656 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1657 have_NEON = False;
1658 } else {
1659 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1662 /* ARM architecture level */
1663 archlevel = 5; /* v5 will be base level */
1664 if (archlevel < 7) {
1665 archlevel = 7;
1666 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1667 archlevel = 5;
1668 } else {
1669 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1672 if (archlevel < 6) {
1673 archlevel = 6;
1674 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1675 archlevel = 5;
1676 } else {
1677 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1681 /* ARMv8 insns */
1682 have_V8 = True;
1683 if (archlevel == 7) {
1684 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1685 have_V8 = False;
1686 } else {
1687 __asm__ __volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
1689 if (have_V8 && have_NEON && have_VFP3) {
1690 archlevel = 8;
1694 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1695 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1696 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1697 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1698 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1700 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1701 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1702 (Int)have_NEON);
1704 VG_(machine_arm_archlevel) = archlevel;
1706 va = VexArchARM;
1707 vai.endness = VexEndnessLE;
1709 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1710 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1711 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1712 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1713 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1715 VG_(machine_get_cache_info)(&vai);
1717 return True;
1720 #elif defined(VGA_arm64)
1722 /* Use the attribute and feature registers to determine host hardware
1723 * capabilities. Only user-space features are read. Naming conventions
1724 * follow the Arm Architecture Reference Manual.
1726 * ID_AA64ISAR0_EL1 Instruction Set Attribute Register 0
1727 * ----------------
1728 * ...5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
1729 * ...1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
1730 * FHM DP SM4 SM3 SHA3 RDM ATOMICS
1732 * ID_AA64ISAR1_EL1 Instruction Set Attribute Register 1
1733 * ----------------
1734 * ...5555 5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
1735 * ...5432 1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
1736 * ...I8MM BF16 DPB
1738 * ID_AA64PFR0_EL1 Processor Feature Register 0
1739 * ---------------
1740 * 6666...2222 2222 1111 1111 11
1741 * 3210...7654 3210 9876 5432 1098 7654 3210
1742 * ASIMD FP16
1745 Bool is_base_v8 = False;
1747 Bool have_fhm, have_dp, have_sm4, have_sm3, have_sha3, have_rdm;
1748 Bool have_atomics, have_i8mm, have_bf16, have_dpbcvap, have_dpbcvadp;
1749 Bool have_vfp16, have_fp16;
1751 have_fhm = have_dp = have_sm4 = have_sm3 = have_sha3 = have_rdm
1752 = have_atomics = have_i8mm = have_bf16 = have_dpbcvap
1753 = have_dpbcvadp = have_vfp16 = have_fp16 = False;
1755 /* Some baseline v8.0 kernels do not allow reads of these registers. Use
1756 * the same SIGILL handling algorithm as other architectures for such
1757 * kernels.
1759 vki_sigset_t saved_set, tmp_set;
1760 vki_sigaction_fromK_t saved_sigill_act;
1761 vki_sigaction_toK_t tmp_sigill_act;
1763 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1765 VG_(sigemptyset)(&tmp_set);
1766 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1768 Int r;
1770 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1771 vg_assert(r == 0);
1773 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1774 vg_assert(r == 0);
1775 tmp_sigill_act = saved_sigill_act;
1777 /* NODEFER: signal handler does not return (from the kernel's point of
1778 view), hence if it is to successfully catch a signal more than once,
1779 we need the NODEFER flag. */
1780 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1781 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1782 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1783 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1784 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1786 /* Does reading ID_AA64ISAR0_EL1 register throw SIGILL on base v8.0? */
1787 if (VG_MINIMAL_SETJMP(env_unsup_insn))
1788 is_base_v8 = True;
1789 else
1790 __asm__ __volatile__("mrs x0, ID_AA64ISAR0_EL1");
1792 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1793 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1794 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1796 va = VexArchARM64;
1797 vai.endness = VexEndnessLE;
1799 /* Baseline features are v8.0. */
1800 vai.hwcaps = 0;
1802 VG_(machine_get_cache_info)(&vai);
1804 /* Check whether we need to use the fallback LLSC implementation.
1805 If the check fails, give up. */
1806 if (! VG_(parse_cpuinfo)())
1807 return False;
1809 /* 0 denotes 'not set'. The range of legitimate values here,
1810 after being set that is, is 2 though 17 inclusive. */
1811 vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1812 vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1813 ULong ctr_el0;
1814 __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1815 vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1816 vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >> 0) & 0xF) + 2;
1817 VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1818 "ctr_el0.iMinLine_szB = %d\n",
1819 1 << vai.arm64_dMinLine_lg2_szB,
1820 1 << vai.arm64_iMinLine_lg2_szB);
1821 VG_(debugLog)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
1822 vai.arm64_requires_fallback_LLSC ? "yes" : "no");
1824 if (is_base_v8)
1825 return True;
1827 /* ID_AA64ISAR0_EL1 Instruction set attribute register 0 fields */
1828 #define ID_AA64ISAR0_FHM_SHIFT 48
1829 #define ID_AA64ISAR0_DP_SHIFT 44
1830 #define ID_AA64ISAR0_SM4_SHIFT 40
1831 #define ID_AA64ISAR0_SM3_SHIFT 36
1832 #define ID_AA64ISAR0_SHA3_SHIFT 32
1833 #define ID_AA64ISAR0_RDM_SHIFT 28
1834 #define ID_AA64ISAR0_ATOMICS_SHIFT 20
1835 /* Field values */
1836 #define ID_AA64ISAR0_FHM_SUPPORTED 0x1
1837 #define ID_AA64ISAR0_DP_SUPPORTED 0x1
1838 #define ID_AA64ISAR0_SM4_SUPPORTED 0x1
1839 #define ID_AA64ISAR0_SM3_SUPPORTED 0x1
1840 #define ID_AA64ISAR0_SHA3_SUPPORTED 0x1
1841 #define ID_AA64ISAR0_RDM_SUPPORTED 0x1
1842 #define ID_AA64ISAR0_ATOMICS_SUPPORTED 0x2
1844 /* ID_AA64ISAR1_EL1 Instruction set attribute register 1 fields */
1845 #define ID_AA64ISAR1_I8MM_SHIFT 52
1846 #define ID_AA64ISAR1_BF16_SHIFT 44
1847 #define ID_AA64ISAR1_DPB_SHIFT 0
1848 /* Field values */
1849 #define ID_AA64ISAR1_I8MM_SUPPORTED 0x1
1850 #define ID_AA64ISAR1_BF16_SUPPORTED 0x1
1851 #define ID_AA64ISAR1_DPBCVAP_SUPPORTED 0x1
1852 #define ID_AA64ISAR1_DPBCVADP_SUPPORTED 0x2
1854 /* ID_AA64PFR0_EL1 Processor feature register 0 fields */
1855 #define ID_AA64PFR0_VFP16_SHIFT 20
1856 #define ID_AA64PFR0_FP16_SHIFT 16
1857 /* Field values */
1858 #define ID_AA64PFR0_VFP16_SUPPORTED 0x1
1859 #define ID_AA64PFR0_FP16_SUPPORTED 0x1
1861 #define get_cpu_ftr(id) ({ \
1862 unsigned long val; \
1863 asm("mrs %0, "#id : "=r" (val)); \
1864 VG_(debugLog)(1, "machine", "ARM64: %-20s: 0x%016lx\n", #id, val); \
1866 get_cpu_ftr(ID_AA64ISAR0_EL1);
1867 get_cpu_ftr(ID_AA64ISAR1_EL1);
1868 get_cpu_ftr(ID_AA64PFR0_EL1);
1870 #define get_ftr(id, ftr, fval, have_ftr) ({ \
1871 unsigned long rval; \
1872 asm("mrs %0, "#id : "=r" (rval)); \
1873 have_ftr = (fval & ((rval >> ftr) & 0xf)) >= fval ? True : False; \
1876 /* Read ID_AA64ISAR0_EL1 attributes */
1878 /* FHM indicates support for FMLAL and FMLSL instructions.
1879 * Optional for v8.2.
1881 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT,
1882 ID_AA64ISAR0_FHM_SUPPORTED, have_fhm);
1884 /* DP indicates support for UDOT and SDOT instructions.
1885 * Optional for v8.2.
1887 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT,
1888 ID_AA64ISAR0_DP_SUPPORTED, have_dp);
1890 /* SM4 indicates support for SM4E and SM4EKEY instructions.
1891 * Optional for v8.2.
1893 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT,
1894 ID_AA64ISAR0_SM4_SUPPORTED, have_sm4);
1896 /* SM3 indicates support for SM3SS1, SM3TT1A, SM3TT1B, SM3TT2A, * SM3TT2B,
1897 * SM3PARTW1, and SM3PARTW2 instructions.
1898 * Optional for v8.2.
1900 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT,
1901 ID_AA64ISAR0_SM3_SUPPORTED, have_sm3);
1903 /* SHA3 indicates support for EOR3, RAX1, XAR, and BCAX instructions.
1904 * Optional for v8.2.
1906 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT,
1907 ID_AA64ISAR0_SHA3_SUPPORTED, have_sha3);
1909 /* RDM indicates support for SQRDMLAH and SQRDMLSH instructions.
1910 * Mandatory from v8.1 onwards.
1912 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT,
1913 ID_AA64ISAR0_RDM_SUPPORTED, have_rdm);
1915 /* v8.1 ATOMICS indicates support for LDADD, LDCLR, LDEOR, LDSET, LDSMAX,
1916 * LDSMIN, LDUMAX, LDUMIN, CAS, CASP, and SWP instructions.
1917 * Mandatory from v8.1 onwards.
1919 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT,
1920 ID_AA64ISAR0_ATOMICS_SUPPORTED, have_atomics);
1922 /* Read ID_AA64ISAR1_EL1 attributes */
1924 /* I8MM indicates support for SMMLA, SUDOT, UMMLA, USMMLA, and USDOT
1925 * instructions.
1926 * Optional for v8.2.
1928 get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT,
1929 ID_AA64ISAR1_I8MM_SUPPORTED, have_i8mm);
1931 /* BF16 indicates support for BFDOT, BFMLAL, BFMLAL2, BFMMLA, BFCVT, and
1932 * BFCVT2 instructions.
1933 * Optional for v8.2.
1935 get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT,
1936 ID_AA64ISAR1_BF16_SUPPORTED, have_bf16);
1938 /* DPB indicates support for DC CVAP instruction.
1939 * Mandatory for v8.2 onwards.
1941 get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT,
1942 ID_AA64ISAR1_DPBCVAP_SUPPORTED, have_dpbcvap);
1944 /* DPB indicates support for DC CVADP instruction.
1945 * Optional for v8.2.
1947 get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT,
1948 ID_AA64ISAR1_DPBCVADP_SUPPORTED, have_dpbcvadp);
1950 /* Read ID_AA64PFR0_EL1 attributes */
1952 /* VFP16 indicates support for half-precision vector arithmetic.
1953 * Optional for v8.2. Must be the same value as FP16.
1955 get_ftr(ID_AA64PFR0_EL1, ID_AA64PFR0_VFP16_SHIFT,
1956 ID_AA64PFR0_VFP16_SUPPORTED, have_vfp16);
1958 /* FP16 indicates support for half-precision scalar arithmetic.
1959 * Optional for v8.2. Must be the same value as VFP16.
1961 get_ftr(ID_AA64PFR0_EL1, ID_AA64PFR0_FP16_SHIFT,
1962 ID_AA64PFR0_FP16_SUPPORTED, have_fp16);
1964 if (have_fhm) vai.hwcaps |= VEX_HWCAPS_ARM64_FHM;
1965 if (have_dpbcvap) vai.hwcaps |= VEX_HWCAPS_ARM64_DPBCVAP;
1966 if (have_dpbcvadp) vai.hwcaps |= VEX_HWCAPS_ARM64_DPBCVADP;
1967 if (have_sm3) vai.hwcaps |= VEX_HWCAPS_ARM64_SM3;
1968 if (have_sm4) vai.hwcaps |= VEX_HWCAPS_ARM64_SM4;
1969 if (have_sha3) vai.hwcaps |= VEX_HWCAPS_ARM64_SHA3;
1970 if (have_rdm) vai.hwcaps |= VEX_HWCAPS_ARM64_RDM;
1971 if (have_i8mm) vai.hwcaps |= VEX_HWCAPS_ARM64_I8MM;
1972 if (have_atomics) vai.hwcaps |= VEX_HWCAPS_ARM64_ATOMICS;
1973 if (have_bf16) vai.hwcaps |= VEX_HWCAPS_ARM64_BF16;
1974 if (have_fp16) vai.hwcaps |= VEX_HWCAPS_ARM64_FP16;
1975 if (have_vfp16) vai.hwcaps |= VEX_HWCAPS_ARM64_VFP16;
1977 #undef get_cpu_ftr
1978 #undef get_ftr
1980 return True;
1983 #elif defined(VGA_mips32)
1985 /* Define the position of F64 bit in FIR register. */
1986 # define FP64 22
1987 va = VexArchMIPS32;
1988 if (!VG_(parse_cpuinfo)())
1989 return False;
1991 # if defined(VKI_LITTLE_ENDIAN)
1992 vai.endness = VexEndnessLE;
1993 # elif defined(VKI_BIG_ENDIAN)
1994 vai.endness = VexEndnessBE;
1995 # else
1996 vai.endness = VexEndness_INVALID;
1997 # endif
1999 /* Same instruction set detection algorithm as for ppc32/arm... */
2000 vki_sigset_t saved_set, tmp_set;
2001 vki_sigaction_fromK_t saved_sigill_act;
2002 vki_sigaction_toK_t tmp_sigill_act;
2004 volatile Bool have_DSP, have_DSPr2, have_MSA;
2005 Int r;
2007 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
2009 VG_(sigemptyset)(&tmp_set);
2010 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
2012 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
2013 vg_assert(r == 0);
2015 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
2016 vg_assert(r == 0);
2017 tmp_sigill_act = saved_sigill_act;
2019 /* NODEFER: signal handler does not return (from the kernel's point of
2020 view), hence if it is to successfully catch a signal more than once,
2021 we need the NODEFER flag. */
2022 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
2023 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
2024 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
2025 tmp_sigill_act.ksa_handler = handler_unsup_insn;
2026 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
2028 if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
2030 /* MSA instructions. */
2031 have_MSA = True;
2032 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
2033 have_MSA = False;
2034 } else {
2035 __asm__ __volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
2037 if (have_MSA) {
2038 vai.hwcaps |= VEX_PRID_IMP_P5600;
2039 } else {
2040 /* DSPr2 instructions. */
2041 have_DSPr2 = True;
2042 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
2043 have_DSPr2 = False;
2044 } else {
2045 __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
2047 if (have_DSPr2) {
2048 /* We assume it's 74K, since it can run DSPr2. */
2049 vai.hwcaps |= VEX_PRID_IMP_74K;
2050 } else {
2051 /* DSP instructions. */
2052 have_DSP = True;
2053 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
2054 have_DSP = False;
2055 } else {
2056 __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
2058 if (have_DSP) {
2059 /* We assume it's 34K, since it has support for DSP. */
2060 vai.hwcaps |= VEX_PRID_IMP_34K;
2066 # if defined(VGP_mips32_linux)
2067 Int fpmode = VG_(prctl)(VKI_PR_GET_FP_MODE, 0, 0, 0, 0);
2068 # else
2069 Int fpmode = -1;
2070 # endif
2072 if (fpmode < 0) {
2073 /* prctl(PR_GET_FP_MODE) is not supported by Kernel,
2074 we are using alternative way to determine FP mode */
2075 ULong result = 0;
2077 if (!VG_MINIMAL_SETJMP(env_unsup_insn)) {
2078 __asm__ volatile (
2079 ".set push\n\t"
2080 ".set noreorder\n\t"
2081 ".set oddspreg\n\t"
2082 ".set hardfloat\n\t"
2083 "lui $t0, 0x3FF0\n\t"
2084 "ldc1 $f0, %0\n\t"
2085 "mtc1 $t0, $f1\n\t"
2086 "sdc1 $f0, %0\n\t"
2087 ".set pop\n\t"
2088 : "+m"(result)
2090 : "t0", "$f0", "$f1", "memory");
2092 fpmode = (result != 0x3FF0000000000000ull);
2096 if (fpmode != 0)
2097 vai.hwcaps |= VEX_MIPS_HOST_FR;
2099 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
2100 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
2101 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
2103 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
2104 VG_(machine_get_cache_info)(&vai);
2106 return True;
2109 #elif defined(VGA_mips64)
2111 va = VexArchMIPS64;
2112 if (!VG_(parse_cpuinfo)())
2113 return False;
2115 # if defined(VKI_LITTLE_ENDIAN)
2116 vai.endness = VexEndnessLE;
2117 # elif defined(VKI_BIG_ENDIAN)
2118 vai.endness = VexEndnessBE;
2119 # else
2120 vai.endness = VexEndness_INVALID;
2121 # endif
2123 vai.hwcaps |= VEX_MIPS_HOST_FR;
2125 /* Same instruction set detection algorithm as for ppc32/arm... */
2126 vki_sigset_t saved_set, tmp_set;
2127 vki_sigaction_fromK_t saved_sigill_act;
2128 vki_sigaction_toK_t tmp_sigill_act;
2130 volatile Bool have_MSA;
2131 Int r;
2133 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
2135 VG_(sigemptyset)(&tmp_set);
2136 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
2138 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
2139 vg_assert(r == 0);
2141 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
2142 vg_assert(r == 0);
2143 tmp_sigill_act = saved_sigill_act;
2145 /* NODEFER: signal handler does not return (from the kernel's point of
2146 view), hence if it is to successfully catch a signal more than once,
2147 we need the NODEFER flag. */
2148 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
2149 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
2150 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
2151 tmp_sigill_act.ksa_handler = handler_unsup_insn;
2152 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
2154 if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
2156 /* MSA instructions */
2157 have_MSA = True;
2158 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
2159 have_MSA = False;
2160 } else {
2161 __asm__ __volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
2163 if (have_MSA) {
2164 vai.hwcaps |= VEX_PRID_IMP_P5600;
2168 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
2169 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
2170 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
2172 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
2174 VG_(machine_get_cache_info)(&vai);
2176 return True;
2179 #elif defined(VGP_nanomips_linux)
2181 va = VexArchNANOMIPS;
2182 vai.hwcaps = 0;
2184 # if defined(VKI_LITTLE_ENDIAN)
2185 vai.endness = VexEndnessLE;
2186 # elif defined(VKI_BIG_ENDIAN)
2187 vai.endness = VexEndnessBE;
2188 # else
2189 vai.endness = VexEndness_INVALID;
2190 # endif
2192 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
2194 VG_(machine_get_cache_info)(&vai);
2196 return True;
2198 #else
2199 # error "Unknown arch"
2200 #endif
2203 /* Notify host cpu instruction cache line size. */
2204 #if defined(VGA_ppc32)
2205 void VG_(machine_ppc32_set_clszB)( Int szB )
2207 vg_assert(hwcaps_done);
2209 /* Either the value must not have been set yet (zero) or we can
2210 tolerate it being set to the same value multiple times, as the
2211 stack scanning logic in m_main is a bit stupid. */
2212 vg_assert(vai.ppc_icache_line_szB == 0
2213 || vai.ppc_icache_line_szB == szB);
2215 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
2216 vai.ppc_icache_line_szB = szB;
2218 #endif
2221 /* Notify host cpu instruction cache line size. */
2222 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
2223 void VG_(machine_ppc64_set_clszB)( Int szB )
2225 vg_assert(hwcaps_done);
2227 /* Either the value must not have been set yet (zero) or we can
2228 tolerate it being set to the same value multiple times, as the
2229 stack scanning logic in m_main is a bit stupid. */
2230 vg_assert(vai.ppc_icache_line_szB == 0
2231 || vai.ppc_icache_line_szB == szB);
2233 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
2234 vai.ppc_icache_line_szB = szB;
2236 #endif
2239 /* Notify host's ability to handle NEON instructions. */
2240 #if defined(VGA_arm)
2241 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
2243 vg_assert(hwcaps_done);
2244 /* There's nothing else we can sanity check. */
2246 if (has_neon) {
2247 vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
2248 } else {
2249 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
2252 #endif
2255 /* Fetch host cpu info, once established. */
2256 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
2257 /*OUT*/VexArchInfo* pVai )
2259 vg_assert(hwcaps_done);
2260 if (pVa) *pVa = va;
2261 if (pVai) *pVai = vai;
2265 /* Returns the size of the largest guest register that we will
2266 simulate in this run. This depends on both the guest architecture
2267 and on the specific capabilities we are simulating for that guest
2268 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
2269 or 32. General rule: if in doubt, return a value larger than
2270 reality.
2272 This information is needed by Cachegrind and Callgrind to decide
2273 what the minimum cache line size they are prepared to simulate is.
2274 Basically require that the minimum cache line size is at least as
2275 large as the largest register that might get transferred to/from
2276 memory, so as to guarantee that any such transaction can straddle
2277 at most 2 cache lines.
2279 Int VG_(machine_get_size_of_largest_guest_register) ( void )
2281 vg_assert(hwcaps_done);
2282 /* Once hwcaps_done is True, we can fish around inside va/vai to
2283 find the information we need. */
2285 # if defined(VGA_x86)
2286 vg_assert(va == VexArchX86);
2287 /* We don't support AVX, so 32 is out. At the other end, even if
2288 we don't support any SSE, the X87 can generate 10 byte
2289 transfers, so let's say 16 to be on the safe side. Hence the
2290 answer is always 16. */
2291 return 16;
2293 # elif defined(VGA_amd64)
2294 /* if AVX then 32 else 16 */
2295 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
2297 # elif defined(VGA_ppc32)
2298 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2299 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
2300 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
2301 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
2302 return 8;
2304 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
2305 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2306 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
2307 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
2308 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
2309 return 8;
2311 # elif defined(VGA_s390x)
2312 return 8;
2314 # elif defined(VGA_arm)
2315 /* Really it depends whether or not we have NEON, but let's just
2316 assume we always do. */
2317 return 16;
2319 # elif defined(VGA_arm64)
2320 /* ARM64 always has Neon, AFAICS. */
2321 return 16;
2323 # elif defined(VGA_mips32) || defined(VGP_nanomips_linux)
2324 /* The guest state implies 4, but that can't really be true, can
2325 it? */
2326 return 8;
2328 # elif defined(VGA_mips64)
2329 return 8;
2331 # else
2332 # error "Unknown arch"
2333 # endif
2337 // Given a pointer to a function as obtained by "& functionname" in C,
2338 // produce a pointer to the actual entry point for the function.
2339 void* VG_(fnptr_to_fnentry)( void* f )
2341 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
2342 || defined(VGP_arm_linux) || defined(VGO_darwin) \
2343 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
2344 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
2345 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
2346 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) \
2347 || defined(VGP_nanomips_linux)
2348 return f;
2349 # elif defined(VGP_ppc64be_linux)
2350 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
2351 3-word function descriptor, of which the first word is the entry
2352 address. */
2353 UWord* descr = (UWord*)f;
2354 return (void*)(descr[0]);
2355 # else
2356 # error "Unknown platform"
2357 # endif
2360 /*--------------------------------------------------------------------*/
2361 /*--- end ---*/
2362 /*--------------------------------------------------------------------*/