Bug 439685 compiler warning in callgrind/main.c
[valgrind.git] / coregrind / m_machine.c
blob052b5d186bd616b3de96a417a8539ae0a69e4ea7
1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
5 /*
6 This file is part of Valgrind, a dynamic binary instrumentation
7 framework.
9 Copyright (C) 2000-2017 Julian Seward
10 jseward@acm.org
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 #include "pub_core_basics.h"
29 #include "pub_core_vki.h"
30 #include "pub_core_threadstate.h"
31 #include "pub_core_libcassert.h"
32 #include "pub_core_libcbase.h"
33 #include "pub_core_libcprint.h"
34 #include "pub_core_libcfile.h"
35 #include "pub_core_libcprint.h"
36 #include "pub_core_libcproc.h"
37 #include "pub_core_mallocfree.h"
38 #include "pub_core_machine.h"
39 #include "pub_core_cpuid.h"
40 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
41 #include "pub_core_debuglog.h"
44 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
45 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
46 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
48 #define STACK_PTR_S1(regs) ((regs).vex_shadow1.VG_STACK_PTR)
50 Addr VG_(get_IP) ( ThreadId tid ) {
51 return INSTR_PTR( VG_(threads)[tid].arch );
53 Addr VG_(get_SP) ( ThreadId tid ) {
54 return STACK_PTR( VG_(threads)[tid].arch );
56 Addr VG_(get_FP) ( ThreadId tid ) {
57 return FRAME_PTR( VG_(threads)[tid].arch );
60 Addr VG_(get_SP_s1) ( ThreadId tid ) {
61 return STACK_PTR_S1( VG_(threads)[tid].arch );
63 void VG_(set_SP_s1) ( ThreadId tid, Addr sp ) {
64 STACK_PTR_S1( VG_(threads)[tid].arch ) = sp;
67 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
68 INSTR_PTR( VG_(threads)[tid].arch ) = ip;
70 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
71 STACK_PTR( VG_(threads)[tid].arch ) = sp;
74 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
75 ThreadId tid )
77 # if defined(VGA_x86)
78 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
79 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
80 regs->misc.X86.r_ebp
81 = VG_(threads)[tid].arch.vex.guest_EBP;
82 # elif defined(VGA_amd64)
83 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
84 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
85 regs->misc.AMD64.r_rbp
86 = VG_(threads)[tid].arch.vex.guest_RBP;
87 # elif defined(VGA_ppc32)
88 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
89 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
90 regs->misc.PPC32.r_lr
91 = VG_(threads)[tid].arch.vex.guest_LR;
92 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
93 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
94 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
95 regs->misc.PPC64.r_lr
96 = VG_(threads)[tid].arch.vex.guest_LR;
97 # elif defined(VGA_arm)
98 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
99 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
100 regs->misc.ARM.r14
101 = VG_(threads)[tid].arch.vex.guest_R14;
102 regs->misc.ARM.r12
103 = VG_(threads)[tid].arch.vex.guest_R12;
104 regs->misc.ARM.r11
105 = VG_(threads)[tid].arch.vex.guest_R11;
106 regs->misc.ARM.r7
107 = VG_(threads)[tid].arch.vex.guest_R7;
108 # elif defined(VGA_arm64)
109 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
110 regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
111 regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
112 regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
113 # elif defined(VGA_s390x)
114 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
115 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
116 regs->misc.S390X.r_fp
117 = VG_(threads)[tid].arch.vex.guest_FP;
118 regs->misc.S390X.r_lr
119 = VG_(threads)[tid].arch.vex.guest_LR;
120 /* ANDREAS 3 Apr 2019 FIXME r_f0..r_f7: is this correct? */
121 regs->misc.S390X.r_f0
122 = VG_(threads)[tid].arch.vex.guest_v0.w64[0];
123 regs->misc.S390X.r_f1
124 = VG_(threads)[tid].arch.vex.guest_v1.w64[0];
125 regs->misc.S390X.r_f2
126 = VG_(threads)[tid].arch.vex.guest_v2.w64[0];
127 regs->misc.S390X.r_f3
128 = VG_(threads)[tid].arch.vex.guest_v3.w64[0];
129 regs->misc.S390X.r_f4
130 = VG_(threads)[tid].arch.vex.guest_v4.w64[0];
131 regs->misc.S390X.r_f5
132 = VG_(threads)[tid].arch.vex.guest_v5.w64[0];
133 regs->misc.S390X.r_f6
134 = VG_(threads)[tid].arch.vex.guest_v6.w64[0];
135 regs->misc.S390X.r_f7
136 = VG_(threads)[tid].arch.vex.guest_v7.w64[0];
137 # elif defined(VGA_mips32) || defined(VGP_nanomips_linux)
138 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
139 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
140 regs->misc.MIPS32.r30
141 = VG_(threads)[tid].arch.vex.guest_r30;
142 regs->misc.MIPS32.r31
143 = VG_(threads)[tid].arch.vex.guest_r31;
144 regs->misc.MIPS32.r28
145 = VG_(threads)[tid].arch.vex.guest_r28;
146 # elif defined(VGA_mips64)
147 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
148 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
149 regs->misc.MIPS64.r30
150 = VG_(threads)[tid].arch.vex.guest_r30;
151 regs->misc.MIPS64.r31
152 = VG_(threads)[tid].arch.vex.guest_r31;
153 regs->misc.MIPS64.r28
154 = VG_(threads)[tid].arch.vex.guest_r28;
155 # else
156 # error "Unknown arch"
157 # endif
160 void
161 VG_(get_shadow_regs_area) ( ThreadId tid,
162 /*DST*/UChar* dst,
163 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
165 void* src;
166 ThreadState* tst;
167 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
168 vg_assert(VG_(is_valid_tid)(tid));
169 // Bounds check
170 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
171 vg_assert(offset + size <= sizeof(VexGuestArchState));
172 // Copy
173 tst = & VG_(threads)[tid];
174 src = NULL;
175 switch (shadowNo) {
176 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
177 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
178 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
180 vg_assert(src != NULL);
181 VG_(memcpy)( dst, src, size);
184 void
185 VG_(set_shadow_regs_area) ( ThreadId tid,
186 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
187 /*SRC*/const UChar* src )
189 void* dst;
190 ThreadState* tst;
191 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
192 vg_assert(VG_(is_valid_tid)(tid));
193 // Bounds check
194 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
195 vg_assert(offset + size <= sizeof(VexGuestArchState));
196 // Copy
197 tst = & VG_(threads)[tid];
198 dst = NULL;
199 switch (shadowNo) {
200 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
201 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
202 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
204 vg_assert(dst != NULL);
205 VG_(memcpy)( dst, src, size);
209 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
210 const HChar*, Addr))
212 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
213 VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid);
214 #if defined(VGA_x86)
215 (*f)(tid, "EAX", vex->guest_EAX);
216 (*f)(tid, "ECX", vex->guest_ECX);
217 (*f)(tid, "EDX", vex->guest_EDX);
218 (*f)(tid, "EBX", vex->guest_EBX);
219 (*f)(tid, "ESI", vex->guest_ESI);
220 (*f)(tid, "EDI", vex->guest_EDI);
221 (*f)(tid, "ESP", vex->guest_ESP);
222 (*f)(tid, "EBP", vex->guest_EBP);
223 #elif defined(VGA_amd64)
224 (*f)(tid, "RAX", vex->guest_RAX);
225 (*f)(tid, "RCX", vex->guest_RCX);
226 (*f)(tid, "RDX", vex->guest_RDX);
227 (*f)(tid, "RBX", vex->guest_RBX);
228 (*f)(tid, "RSI", vex->guest_RSI);
229 (*f)(tid, "RDI", vex->guest_RDI);
230 (*f)(tid, "RSP", vex->guest_RSP);
231 (*f)(tid, "RBP", vex->guest_RBP);
232 (*f)(tid, "R8" , vex->guest_R8 );
233 (*f)(tid, "R9" , vex->guest_R9 );
234 (*f)(tid, "R10", vex->guest_R10);
235 (*f)(tid, "R11", vex->guest_R11);
236 (*f)(tid, "R12", vex->guest_R12);
237 (*f)(tid, "R13", vex->guest_R13);
238 (*f)(tid, "R14", vex->guest_R14);
239 (*f)(tid, "R15", vex->guest_R15);
240 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
241 (*f)(tid, "GPR0" , vex->guest_GPR0 );
242 (*f)(tid, "GPR1" , vex->guest_GPR1 );
243 (*f)(tid, "GPR2" , vex->guest_GPR2 );
244 (*f)(tid, "GPR3" , vex->guest_GPR3 );
245 (*f)(tid, "GPR4" , vex->guest_GPR4 );
246 (*f)(tid, "GPR5" , vex->guest_GPR5 );
247 (*f)(tid, "GPR6" , vex->guest_GPR6 );
248 (*f)(tid, "GPR7" , vex->guest_GPR7 );
249 (*f)(tid, "GPR8" , vex->guest_GPR8 );
250 (*f)(tid, "GPR9" , vex->guest_GPR9 );
251 (*f)(tid, "GPR10", vex->guest_GPR10);
252 (*f)(tid, "GPR11", vex->guest_GPR11);
253 (*f)(tid, "GPR12", vex->guest_GPR12);
254 (*f)(tid, "GPR13", vex->guest_GPR13);
255 (*f)(tid, "GPR14", vex->guest_GPR14);
256 (*f)(tid, "GPR15", vex->guest_GPR15);
257 (*f)(tid, "GPR16", vex->guest_GPR16);
258 (*f)(tid, "GPR17", vex->guest_GPR17);
259 (*f)(tid, "GPR18", vex->guest_GPR18);
260 (*f)(tid, "GPR19", vex->guest_GPR19);
261 (*f)(tid, "GPR20", vex->guest_GPR20);
262 (*f)(tid, "GPR21", vex->guest_GPR21);
263 (*f)(tid, "GPR22", vex->guest_GPR22);
264 (*f)(tid, "GPR23", vex->guest_GPR23);
265 (*f)(tid, "GPR24", vex->guest_GPR24);
266 (*f)(tid, "GPR25", vex->guest_GPR25);
267 (*f)(tid, "GPR26", vex->guest_GPR26);
268 (*f)(tid, "GPR27", vex->guest_GPR27);
269 (*f)(tid, "GPR28", vex->guest_GPR28);
270 (*f)(tid, "GPR29", vex->guest_GPR29);
271 (*f)(tid, "GPR30", vex->guest_GPR30);
272 (*f)(tid, "GPR31", vex->guest_GPR31);
273 (*f)(tid, "CTR" , vex->guest_CTR );
274 (*f)(tid, "LR" , vex->guest_LR );
275 #elif defined(VGA_arm)
276 (*f)(tid, "R0" , vex->guest_R0 );
277 (*f)(tid, "R1" , vex->guest_R1 );
278 (*f)(tid, "R2" , vex->guest_R2 );
279 (*f)(tid, "R3" , vex->guest_R3 );
280 (*f)(tid, "R4" , vex->guest_R4 );
281 (*f)(tid, "R5" , vex->guest_R5 );
282 (*f)(tid, "R6" , vex->guest_R6 );
283 (*f)(tid, "R8" , vex->guest_R8 );
284 (*f)(tid, "R9" , vex->guest_R9 );
285 (*f)(tid, "R10", vex->guest_R10);
286 (*f)(tid, "R11", vex->guest_R11);
287 (*f)(tid, "R12", vex->guest_R12);
288 (*f)(tid, "R13", vex->guest_R13);
289 (*f)(tid, "R14", vex->guest_R14);
290 #elif defined(VGA_s390x)
291 (*f)(tid, "r0" , vex->guest_r0 );
292 (*f)(tid, "r1" , vex->guest_r1 );
293 (*f)(tid, "r2" , vex->guest_r2 );
294 (*f)(tid, "r3" , vex->guest_r3 );
295 (*f)(tid, "r4" , vex->guest_r4 );
296 (*f)(tid, "r5" , vex->guest_r5 );
297 (*f)(tid, "r6" , vex->guest_r6 );
298 (*f)(tid, "r7" , vex->guest_r7 );
299 (*f)(tid, "r8" , vex->guest_r8 );
300 (*f)(tid, "r9" , vex->guest_r9 );
301 (*f)(tid, "r10", vex->guest_r10);
302 (*f)(tid, "r11", vex->guest_r11);
303 (*f)(tid, "r12", vex->guest_r12);
304 (*f)(tid, "r13", vex->guest_r13);
305 (*f)(tid, "r14", vex->guest_r14);
306 (*f)(tid, "r15", vex->guest_r15);
307 #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGP_nanomips_linux)
308 (*f)(tid, "r0" , vex->guest_r0 );
309 (*f)(tid, "r1" , vex->guest_r1 );
310 (*f)(tid, "r2" , vex->guest_r2 );
311 (*f)(tid, "r3" , vex->guest_r3 );
312 (*f)(tid, "r4" , vex->guest_r4 );
313 (*f)(tid, "r5" , vex->guest_r5 );
314 (*f)(tid, "r6" , vex->guest_r6 );
315 (*f)(tid, "r7" , vex->guest_r7 );
316 (*f)(tid, "r8" , vex->guest_r8 );
317 (*f)(tid, "r9" , vex->guest_r9 );
318 (*f)(tid, "r10", vex->guest_r10);
319 (*f)(tid, "r11", vex->guest_r11);
320 (*f)(tid, "r12", vex->guest_r12);
321 (*f)(tid, "r13", vex->guest_r13);
322 (*f)(tid, "r14", vex->guest_r14);
323 (*f)(tid, "r15", vex->guest_r15);
324 (*f)(tid, "r16", vex->guest_r16);
325 (*f)(tid, "r17", vex->guest_r17);
326 (*f)(tid, "r18", vex->guest_r18);
327 (*f)(tid, "r19", vex->guest_r19);
328 (*f)(tid, "r20", vex->guest_r20);
329 (*f)(tid, "r21", vex->guest_r21);
330 (*f)(tid, "r22", vex->guest_r22);
331 (*f)(tid, "r23", vex->guest_r23);
332 (*f)(tid, "r24", vex->guest_r24);
333 (*f)(tid, "r25", vex->guest_r25);
334 (*f)(tid, "r26", vex->guest_r26);
335 (*f)(tid, "r27", vex->guest_r27);
336 (*f)(tid, "r28", vex->guest_r28);
337 (*f)(tid, "r29", vex->guest_r29);
338 (*f)(tid, "r30", vex->guest_r30);
339 (*f)(tid, "r31", vex->guest_r31);
340 #elif defined(VGA_arm64)
341 (*f)(tid, "x0" , vex->guest_X0 );
342 (*f)(tid, "x1" , vex->guest_X1 );
343 (*f)(tid, "x2" , vex->guest_X2 );
344 (*f)(tid, "x3" , vex->guest_X3 );
345 (*f)(tid, "x4" , vex->guest_X4 );
346 (*f)(tid, "x5" , vex->guest_X5 );
347 (*f)(tid, "x6" , vex->guest_X6 );
348 (*f)(tid, "x7" , vex->guest_X7 );
349 (*f)(tid, "x8" , vex->guest_X8 );
350 (*f)(tid, "x9" , vex->guest_X9 );
351 (*f)(tid, "x10", vex->guest_X10);
352 (*f)(tid, "x11", vex->guest_X11);
353 (*f)(tid, "x12", vex->guest_X12);
354 (*f)(tid, "x13", vex->guest_X13);
355 (*f)(tid, "x14", vex->guest_X14);
356 (*f)(tid, "x15", vex->guest_X15);
357 (*f)(tid, "x16", vex->guest_X16);
358 (*f)(tid, "x17", vex->guest_X17);
359 (*f)(tid, "x18", vex->guest_X18);
360 (*f)(tid, "x19", vex->guest_X19);
361 (*f)(tid, "x20", vex->guest_X20);
362 (*f)(tid, "x21", vex->guest_X21);
363 (*f)(tid, "x22", vex->guest_X22);
364 (*f)(tid, "x23", vex->guest_X23);
365 (*f)(tid, "x24", vex->guest_X24);
366 (*f)(tid, "x25", vex->guest_X25);
367 (*f)(tid, "x26", vex->guest_X26);
368 (*f)(tid, "x27", vex->guest_X27);
369 (*f)(tid, "x28", vex->guest_X28);
370 (*f)(tid, "x29", vex->guest_X29);
371 (*f)(tid, "x30", vex->guest_X30);
372 #else
373 # error Unknown arch
374 #endif
378 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
380 ThreadId tid;
382 for (tid = 1; tid < VG_N_THREADS; tid++) {
383 if (VG_(is_valid_tid)(tid)
384 || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
385 // live thread or thread instructed to die by another thread that
386 // called exit.
387 apply_to_GPs_of_tid(tid, f);
392 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
394 *tid = (ThreadId)(-1);
397 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
398 /*OUT*/Addr* stack_min,
399 /*OUT*/Addr* stack_max)
401 ThreadId i;
402 for (i = (*tid)+1; i < VG_N_THREADS; i++) {
403 if (i == VG_INVALID_THREADID)
404 continue;
405 if (VG_(threads)[i].status != VgTs_Empty) {
406 *tid = i;
407 *stack_min = VG_(get_SP)(i);
408 *stack_max = VG_(threads)[i].client_stack_highest_byte;
409 return True;
412 return False;
415 Addr VG_(thread_get_stack_max)(ThreadId tid)
417 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
418 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
419 return VG_(threads)[tid].client_stack_highest_byte;
422 SizeT VG_(thread_get_stack_size)(ThreadId tid)
424 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
425 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
426 return VG_(threads)[tid].client_stack_szB;
429 Addr VG_(thread_get_altstack_min)(ThreadId tid)
431 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
432 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
433 return (Addr)VG_(threads)[tid].altstack.ss_sp;
436 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
438 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
439 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
440 return VG_(threads)[tid].altstack.ss_size;
443 //-------------------------------------------------------------
444 /* Details about the capabilities of the underlying (host) CPU. These
445 details are acquired by (1) enquiring with the CPU at startup, or
446 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
447 line size). It's a bit nasty in the sense that there's no obvious
448 way to stop uses of some of this info before it's ready to go.
449 See pub_core_machine.h for more information about that.
451 VG_(machine_get_hwcaps) may use signals (although it attempts to
452 leave signal state unchanged) and therefore should only be
453 called before m_main sets up the client's signal state.
456 /* --------- State --------- */
457 static Bool hwcaps_done = False;
459 /* --- all archs --- */
460 static VexArch va = VexArch_INVALID;
461 static VexArchInfo vai;
463 #if defined(VGA_x86)
464 UInt VG_(machine_x86_have_mxcsr) = 0;
465 #endif
466 #if defined(VGA_ppc32)
467 UInt VG_(machine_ppc32_has_FP) = 0;
468 UInt VG_(machine_ppc32_has_VMX) = 0;
469 #endif
470 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
471 ULong VG_(machine_ppc64_has_VMX) = 0;
472 #endif
473 #if defined(VGA_arm)
474 Int VG_(machine_arm_archlevel) = 4;
475 #endif
478 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
479 testing, so we need a VG_MINIMAL_JMP_BUF. */
480 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
481 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) \
482 || defined(VGA_mips64) || defined(VGA_arm64)
483 #include "pub_core_libcsetjmp.h"
484 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
485 static void handler_unsup_insn ( Int x ) {
486 VG_MINIMAL_LONGJMP(env_unsup_insn);
488 #endif
491 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
492 * handlers are installed. Determines the sizes affected by dcbz
493 * and dcbzl instructions and updates the given VexArchInfo structure
494 * accordingly.
496 * Not very defensive: assumes that as long as the dcbz/dcbzl
497 * instructions don't raise a SIGILL, that they will zero an aligned,
498 * contiguous block of memory of a sensible size. */
499 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
500 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
502 Int dcbz_szB = 0;
503 Int dcbzl_szB;
504 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
505 char test_block[4*MAX_DCBZL_SZB];
506 char *aligned = test_block;
507 Int i;
509 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
510 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
511 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
513 /* dcbz often clears 32B, although sometimes whatever the native cache
514 * block size is */
515 VG_(memset)(test_block, 0xff, sizeof(test_block));
516 __asm__ __volatile__("dcbz 0,%0"
517 : /*out*/
518 : "r" (aligned) /*in*/
519 : "memory" /*clobber*/);
520 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
521 if (!test_block[i])
522 ++dcbz_szB;
524 vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
526 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
527 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
528 dcbzl_szB = 0; /* indicates unsupported */
530 else {
531 VG_(memset)(test_block, 0xff, sizeof(test_block));
532 /* some older assemblers won't understand the dcbzl instruction
533 * variant, so we directly emit the instruction ourselves */
534 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
535 : /*out*/
536 : "r" (aligned) /*in*/
537 : "memory", "r9" /*clobber*/);
538 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
539 if (!test_block[i])
540 ++dcbzl_szB;
542 vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
545 arch_info->ppc_dcbz_szB = dcbz_szB;
546 arch_info->ppc_dcbzl_szB = dcbzl_szB;
548 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
549 dcbz_szB, dcbzl_szB);
550 # undef MAX_DCBZL_SZB
552 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
554 #ifdef VGA_s390x
556 /* Read /proc/cpuinfo. Look for lines like these
558 processor 0: version = FF, identification = 0117C9, machine = 2064
560 and return the machine model. If the machine model could not be determined
561 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
563 static UInt VG_(get_machine_model)(void)
565 static struct model_map {
566 const HChar name[5];
567 UInt id;
568 } model_map[] = {
569 { "2064", VEX_S390X_MODEL_Z900 },
570 { "2066", VEX_S390X_MODEL_Z800 },
571 { "2084", VEX_S390X_MODEL_Z990 },
572 { "2086", VEX_S390X_MODEL_Z890 },
573 { "2094", VEX_S390X_MODEL_Z9_EC },
574 { "2096", VEX_S390X_MODEL_Z9_BC },
575 { "2097", VEX_S390X_MODEL_Z10_EC },
576 { "2098", VEX_S390X_MODEL_Z10_BC },
577 { "2817", VEX_S390X_MODEL_Z196 },
578 { "2818", VEX_S390X_MODEL_Z114 },
579 { "2827", VEX_S390X_MODEL_ZEC12 },
580 { "2828", VEX_S390X_MODEL_ZBC12 },
581 { "2964", VEX_S390X_MODEL_Z13 },
582 { "2965", VEX_S390X_MODEL_Z13S },
583 { "3906", VEX_S390X_MODEL_Z14 },
584 { "3907", VEX_S390X_MODEL_Z14_ZR1 },
585 { "8561", VEX_S390X_MODEL_Z15 },
586 { "8562", VEX_S390X_MODEL_Z15 },
589 Int model, n, fh;
590 SysRes fd;
591 SizeT num_bytes, file_buf_size;
592 HChar *p, *m, *model_name, *file_buf;
594 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
595 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
596 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
598 fh = sr_Res(fd);
600 /* Determine the size of /proc/cpuinfo.
601 Work around broken-ness in /proc file system implementation.
602 fstat returns a zero size for /proc/cpuinfo although it is
603 claimed to be a regular file. */
604 num_bytes = 0;
605 file_buf_size = 1000;
606 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
607 while (42) {
608 n = VG_(read)(fh, file_buf, file_buf_size);
609 if (n < 0) break;
611 num_bytes += n;
612 if (n < file_buf_size) break; /* reached EOF */
615 if (n < 0) num_bytes = 0; /* read error; ignore contents */
617 if (num_bytes > file_buf_size) {
618 VG_(free)( file_buf );
619 VG_(lseek)( fh, 0, VKI_SEEK_SET );
620 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
621 n = VG_(read)( fh, file_buf, num_bytes );
622 if (n < 0) num_bytes = 0;
625 file_buf[num_bytes] = '\0';
626 VG_(close)(fh);
628 /* Parse file */
629 model = VEX_S390X_MODEL_UNKNOWN;
630 for (p = file_buf; *p; ++p) {
631 /* Beginning of line */
632 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
634 m = VG_(strstr)( p, "machine" );
635 if (m == NULL) continue;
637 p = m + sizeof "machine" - 1;
638 while ( VG_(isspace)( *p ) || *p == '=') {
639 if (*p == '\n') goto next_line;
640 ++p;
643 model_name = p;
644 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
645 struct model_map *mm = model_map + n;
646 SizeT len = VG_(strlen)( mm->name );
647 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
648 VG_(isspace)( model_name[len] )) {
649 if (mm->id < model) model = mm->id;
650 p = model_name + len;
651 break;
654 /* Skip until end-of-line */
655 while (*p != '\n')
656 ++p;
657 next_line: ;
660 VG_(free)( file_buf );
661 VG_(debugLog)(1, "machine", "model = %s\n",
662 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
663 : model_map[model].name);
664 return model;
667 #endif /* defined(VGA_s390x) */
669 #if defined(VGA_mips32) || defined(VGA_mips64)
672 * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not
673 * determine what CPU it is (it searches only for the models that are or may be
674 * supported by Valgrind).
676 static Bool VG_(parse_cpuinfo)(void)
678 const char *search_Broadcom_str = "cpu model\t\t: Broadcom";
679 const char *search_Cavium_str= "cpu model\t\t: Cavium";
680 const char *search_Ingenic_str= "cpu model\t\t: Ingenic";
681 const char *search_Loongson_str= "cpu model\t\t: ICT Loongson";
682 const char *search_MIPS_str = "cpu model\t\t: MIPS";
683 const char *search_Netlogic_str = "cpu model\t\t: Netlogic";
685 Int n, fh;
686 SysRes fd;
687 SizeT num_bytes, file_buf_size;
688 HChar *file_buf, *isa;
690 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
691 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
692 if ( sr_isError(fd) ) return False;
694 fh = sr_Res(fd);
696 /* Determine the size of /proc/cpuinfo.
697 Work around broken-ness in /proc file system implementation.
698 fstat returns a zero size for /proc/cpuinfo although it is
699 claimed to be a regular file. */
700 num_bytes = 0;
701 file_buf_size = 1000;
702 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
703 while (42) {
704 n = VG_(read)(fh, file_buf, file_buf_size);
705 if (n < 0) break;
707 num_bytes += n;
708 if (n < file_buf_size) break; /* reached EOF */
711 if (n < 0) num_bytes = 0; /* read error; ignore contents */
713 if (num_bytes > file_buf_size) {
714 VG_(free)( file_buf );
715 VG_(lseek)( fh, 0, VKI_SEEK_SET );
716 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
717 n = VG_(read)( fh, file_buf, num_bytes );
718 if (n < 0) num_bytes = 0;
721 file_buf[num_bytes] = '\0';
722 VG_(close)(fh);
724 /* Parse file */
725 if (VG_(strstr)(file_buf, search_Broadcom_str) != NULL)
726 vai.hwcaps = VEX_PRID_COMP_BROADCOM;
727 else if (VG_(strstr)(file_buf, search_Netlogic_str) != NULL)
728 vai.hwcaps = VEX_PRID_COMP_NETLOGIC;
729 else if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
730 vai.hwcaps = VEX_PRID_COMP_CAVIUM;
731 else if (VG_(strstr)(file_buf, search_MIPS_str) != NULL)
732 vai.hwcaps = VEX_PRID_COMP_MIPS;
733 else if (VG_(strstr)(file_buf, search_Ingenic_str) != NULL)
734 vai.hwcaps = VEX_PRID_COMP_INGENIC_E1;
735 else if (VG_(strstr)(file_buf, search_Loongson_str) != NULL)
736 vai.hwcaps = (VEX_PRID_COMP_LEGACY | VEX_PRID_IMP_LOONGSON_64);
737 else {
738 /* Did not find string in the proc file. */
739 vai.hwcaps = 0;
740 VG_(free)(file_buf);
741 return False;
744 isa = VG_(strstr)(file_buf, "isa\t\t\t: ");
746 if (NULL != isa) {
747 if (VG_(strstr) (isa, "mips32r1") != NULL)
748 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
749 if (VG_(strstr) (isa, "mips32r2") != NULL)
750 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
751 if (VG_(strstr) (isa, "mips32r6") != NULL)
752 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R6;
753 if (VG_(strstr) (isa, "mips64r1") != NULL)
754 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R1;
755 if (VG_(strstr) (isa, "mips64r2") != NULL)
756 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2;
757 if (VG_(strstr) (isa, "mips64r6") != NULL)
758 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R6;
761 * TODO(petarj): Remove this Cavium workaround once Linux kernel folks
762 * decide to change incorrect settings in
763 * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h.
764 * The current settings show mips32r1, mips32r2 and mips64r1 as
765 * unsupported ISAs by Cavium MIPS CPUs.
767 if (VEX_MIPS_COMP_ID(vai.hwcaps) == VEX_PRID_COMP_CAVIUM) {
768 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1 | VEX_MIPS_CPU_ISA_M32R2 |
769 VEX_MIPS_CPU_ISA_M64R1;
771 } else {
773 * Kernel does not provide information about supported ISAs.
774 * Populate the isa level flags based on the CPU model. That is our
775 * best guess.
777 switch VEX_MIPS_COMP_ID(vai.hwcaps) {
778 case VEX_PRID_COMP_CAVIUM:
779 case VEX_PRID_COMP_NETLOGIC:
780 vai.hwcaps |= (VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1);
781 /* fallthrough */
782 case VEX_PRID_COMP_INGENIC_E1:
783 case VEX_PRID_COMP_MIPS:
784 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
785 /* fallthrough */
786 case VEX_PRID_COMP_BROADCOM:
787 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
788 break;
789 case VEX_PRID_COMP_LEGACY:
790 if ((VEX_MIPS_PROC_ID(vai.hwcaps) == VEX_PRID_IMP_LOONGSON_64))
791 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1 |
792 VEX_MIPS_CPU_ISA_M32R2 | VEX_MIPS_CPU_ISA_M32R1;
793 break;
794 default:
795 break;
798 VG_(free)(file_buf);
799 return True;
802 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */
804 #if defined(VGP_arm64_linux)
806 /* Check to see whether we are running on a Cavium core, and if so auto-enable
807 the fallback LLSC implementation. See #369459. */
809 static Bool VG_(parse_cpuinfo)(void)
811 const char *search_Cavium_str = "CPU implementer\t: 0x43";
813 Int n, fh;
814 SysRes fd;
815 SizeT num_bytes, file_buf_size;
816 HChar *file_buf;
818 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
819 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
820 if ( sr_isError(fd) ) return False;
822 fh = sr_Res(fd);
824 /* Determine the size of /proc/cpuinfo.
825 Work around broken-ness in /proc file system implementation.
826 fstat returns a zero size for /proc/cpuinfo although it is
827 claimed to be a regular file. */
828 num_bytes = 0;
829 file_buf_size = 1000;
830 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
831 while (42) {
832 n = VG_(read)(fh, file_buf, file_buf_size);
833 if (n < 0) break;
835 num_bytes += n;
836 if (n < file_buf_size) break; /* reached EOF */
839 if (n < 0) num_bytes = 0; /* read error; ignore contents */
841 if (num_bytes > file_buf_size) {
842 VG_(free)( file_buf );
843 VG_(lseek)( fh, 0, VKI_SEEK_SET );
844 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
845 n = VG_(read)( fh, file_buf, num_bytes );
846 if (n < 0) num_bytes = 0;
849 file_buf[num_bytes] = '\0';
850 VG_(close)(fh);
852 /* Parse file */
853 if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
854 vai.arm64_requires_fallback_LLSC = True;
856 VG_(free)(file_buf);
857 return True;
860 #endif /* defined(VGP_arm64_linux) */
862 Bool VG_(machine_get_hwcaps)( void )
864 vg_assert(hwcaps_done == False);
865 hwcaps_done = True;
867 // Whack default settings into vai, so that we only need to fill in
868 // any interesting bits.
869 LibVEX_default_VexArchInfo(&vai);
871 #if defined(VGA_x86)
872 { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
873 UInt eax, ebx, ecx, edx, max_extended;
874 HChar vstr[13];
875 vstr[0] = 0;
877 if (!VG_(has_cpuid)())
878 /* we can't do cpuid at all. Give up. */
879 return False;
881 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
882 if (eax < 1)
883 /* we can't ask for cpuid(x) for x > 0. Give up. */
884 return False;
886 /* Get processor ID string, and max basic/extended index
887 values. */
888 VG_(memcpy)(&vstr[0], &ebx, 4);
889 VG_(memcpy)(&vstr[4], &edx, 4);
890 VG_(memcpy)(&vstr[8], &ecx, 4);
891 vstr[12] = 0;
893 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
894 max_extended = eax;
896 /* get capabilities bits into edx */
897 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
899 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
900 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
901 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
903 /* cmpxchg8b is a minimum requirement now; if we don't have it we
904 must simply give up. But all CPUs since Pentium-I have it, so
905 that doesn't seem like much of a restriction. */
906 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
907 if (!have_cx8)
908 return False;
910 #if defined(VGP_x86_freebsd)
911 if (have_sse1 || have_sse2) {
912 Int sc, error;
913 SizeT scl;
914 /* Regardless of whether cpuid says, the OS has to enable SSE first! */
915 scl = sizeof(sc);
916 error = VG_(sysctlbyname)("hw.instruction_sse", &sc, &scl, 0, 0);
917 if (error == -1 || sc != 1) {
918 have_sse1 = 0;
919 have_sse2 = 0;
920 VG_(message)(Vg_UserMsg, "Warning: cpu has SSE, but the OS has not enabled it. Disabling in valgrind!");
923 #endif
924 /* Figure out if this is an AMD that can do MMXEXT. */
925 have_mmxext = False;
926 if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
927 && max_extended >= 0x80000001) {
928 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
929 /* Some older AMD processors support a sse1 subset (Integer SSE). */
930 have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
933 /* Figure out if this is an AMD or Intel that can do LZCNT. */
934 have_lzcnt = False;
935 if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
936 || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
937 && max_extended >= 0x80000001) {
938 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
939 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
942 /* Intel processors don't define the mmxext extension, but since it
943 is just a sse1 subset always define it when we have sse1. */
944 if (have_sse1)
945 have_mmxext = True;
947 va = VexArchX86;
948 vai.endness = VexEndnessLE;
950 if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
951 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
952 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
953 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
954 vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
955 if (have_lzcnt)
956 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
957 VG_(machine_x86_have_mxcsr) = 1;
958 } else if (have_sse2 && have_sse1 && have_mmxext) {
959 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
960 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
961 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
962 if (have_lzcnt)
963 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
964 VG_(machine_x86_have_mxcsr) = 1;
965 } else if (have_sse1 && have_mmxext) {
966 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
967 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
968 VG_(machine_x86_have_mxcsr) = 1;
969 } else if (have_mmxext) {
970 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
971 VG_(machine_x86_have_mxcsr) = 0;
972 } else {
973 vai.hwcaps = 0; /*baseline - no sse at all*/
974 VG_(machine_x86_have_mxcsr) = 0;
977 VG_(machine_get_cache_info)(&vai);
979 return True;
982 #elif defined(VGA_amd64)
983 { Bool have_sse3, have_ssse3, have_cx8, have_cx16;
984 Bool have_lzcnt, have_avx, have_bmi, have_avx2;
985 Bool have_rdtscp, have_rdrand, have_f16c, have_rdseed;
986 UInt eax, ebx, ecx, edx, max_basic, max_extended;
987 ULong xgetbv_0 = 0;
988 HChar vstr[13];
989 vstr[0] = 0;
991 have_sse3 = have_ssse3 = have_cx8 = have_cx16
992 = have_lzcnt = have_avx = have_bmi = have_avx2
993 = have_rdtscp = have_rdrand = have_f16c = have_rdseed = False;
995 eax = ebx = ecx = edx = max_basic = max_extended = 0;
997 if (!VG_(has_cpuid)())
998 /* we can't do cpuid at all. Give up. */
999 return False;
1001 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
1002 max_basic = eax;
1003 if (max_basic < 1)
1004 /* we can't ask for cpuid(x) for x > 0. Give up. */
1005 return False;
1007 /* Get processor ID string, and max basic/extended index
1008 values. */
1009 VG_(memcpy)(&vstr[0], &ebx, 4);
1010 VG_(memcpy)(&vstr[4], &edx, 4);
1011 VG_(memcpy)(&vstr[8], &ecx, 4);
1012 vstr[12] = 0;
1014 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
1015 max_extended = eax;
1017 /* get capabilities bits into edx */
1018 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
1020 // we assume that SSE1 and SSE2 are available by default
1021 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
1022 have_ssse3 = (ecx & (1<<9)) != 0; /* True => have Sup SSE3 insns */
1023 // fma is ecx:12
1024 // sse41 is ecx:19
1025 // sse42 is ecx:20
1026 // xsave is ecx:26
1027 // osxsave is ecx:27
1028 // avx is ecx:28
1029 have_f16c = (ecx & (1<<29)) != 0; /* True => have F16C insns */
1030 have_rdrand = (ecx & (1<<30)) != 0; /* True => have RDRAND insns */
1032 have_avx = False;
1033 /* have_fma = False; */
1034 if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
1035 /* Processor supports AVX instructions and XGETBV is enabled
1036 by OS and AVX instructions are enabled by the OS. */
1037 ULong w;
1038 __asm__ __volatile__("movq $0,%%rcx ; "
1039 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
1040 "movq %%rax,%0"
1041 :/*OUT*/"=r"(w) :/*IN*/
1042 :/*TRASH*/"rdx","rcx","rax");
1043 xgetbv_0 = w;
1044 if ((xgetbv_0 & 7) == 7) {
1045 /* Only say we have AVX if the XSAVE-allowable
1046 bitfield-mask allows x87, SSE and AVX state. We could
1047 actually run with a more restrictive XGETBV(0) value,
1048 but VEX's implementation of XSAVE and XRSTOR assumes
1049 that all 3 bits are enabled.
1051 Also, the VEX implementation of XSAVE/XRSTOR assumes that
1052 state component [2] (the YMM high halves) are located in
1053 the XSAVE image at offsets 576 .. 831. So we have to
1054 check that here before declaring AVX to be supported. */
1055 UInt eax2, ebx2, ecx2, edx2;
1056 VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2);
1057 if (ebx2 == 576 && eax2 == 256) {
1058 have_avx = True;
1060 /* have_fma = (ecx & (1<<12)) != 0; */
1061 /* have_fma: Probably correct, but gcc complains due to
1062 unusedness. */
1066 /* cmpxchg8b is a minimum requirement now; if we don't have it we
1067 must simply give up. But all CPUs since Pentium-I have it, so
1068 that doesn't seem like much of a restriction. */
1069 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
1070 if (!have_cx8)
1071 return False;
1073 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
1074 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
1076 /* Figure out if this CPU can do LZCNT. */
1077 have_lzcnt = False;
1078 if (max_extended >= 0x80000001) {
1079 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1080 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
1083 /* Can we do RDTSCP? */
1084 have_rdtscp = False;
1085 if (max_extended >= 0x80000001) {
1086 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1087 have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
1090 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
1091 have_bmi = False;
1092 have_avx2 = False;
1093 if (have_avx && max_basic >= 7) {
1094 VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
1095 have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
1096 have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
1097 have_rdseed = (ebx & (1<<18)) != 0; /* True => have RDSEED insns */
1100 /* Sanity check for RDRAND and F16C. These don't actually *need* AVX, but
1101 it's convenient to restrict them to the AVX case since the simulated
1102 CPUID we'll offer them on has AVX as a base. */
1103 if (!have_avx) {
1104 have_f16c = False;
1105 have_rdrand = False;
1106 have_rdseed = False;
1109 va = VexArchAMD64;
1110 vai.endness = VexEndnessLE;
1111 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
1112 | (have_ssse3 ? VEX_HWCAPS_AMD64_SSSE3 : 0)
1113 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
1114 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
1115 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0)
1116 | (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0)
1117 | (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0)
1118 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0)
1119 | (have_f16c ? VEX_HWCAPS_AMD64_F16C : 0)
1120 | (have_rdrand ? VEX_HWCAPS_AMD64_RDRAND : 0)
1121 | (have_rdseed ? VEX_HWCAPS_AMD64_RDSEED : 0);
1123 VG_(machine_get_cache_info)(&vai);
1125 return True;
1128 #elif defined(VGA_ppc32)
1130 /* Find out which subset of the ppc32 instruction set is supported by
1131 verifying whether various ppc32 instructions generate a SIGILL
1132 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1133 AT_PLATFORM entries in the ELF auxiliary table -- see also
1134 the_iifii.client_auxv in m_main.c.
1136 vki_sigset_t saved_set, tmp_set;
1137 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1138 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1140 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1141 volatile Bool have_isa_2_07, have_isa_3_0;
1142 Int r;
1144 /* This is a kludge. Really we ought to back-convert saved_act
1145 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1146 since that's a no-op on all ppc32 platforms so far supported,
1147 it's not worth the typing effort. At least include most basic
1148 sanity check: */
1149 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1151 VG_(sigemptyset)(&tmp_set);
1152 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1153 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1155 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1156 vg_assert(r == 0);
1158 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1159 vg_assert(r == 0);
1160 tmp_sigill_act = saved_sigill_act;
1162 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1163 vg_assert(r == 0);
1164 tmp_sigfpe_act = saved_sigfpe_act;
1166 /* NODEFER: signal handler does not return (from the kernel's point of
1167 view), hence if it is to successfully catch a signal more than once,
1168 we need the NODEFER flag. */
1169 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1170 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1171 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1172 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1173 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1174 vg_assert(r == 0);
1176 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1177 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1178 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1179 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1180 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1181 vg_assert(r == 0);
1183 /* standard FP insns */
1184 have_F = True;
1185 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1186 have_F = False;
1187 } else {
1188 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
1191 /* Altivec insns */
1192 have_V = True;
1193 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1194 have_V = False;
1195 } else {
1196 /* Unfortunately some older assemblers don't speak Altivec (or
1197 choose not to), so to be safe we directly emit the 32-bit
1198 word corresponding to "vor 0,0,0". This fixes a build
1199 problem that happens on Debian 3.1 (ppc32), and probably
1200 various other places. */
1201 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1204 /* General-Purpose optional (fsqrt, fsqrts) */
1205 have_FX = True;
1206 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1207 have_FX = False;
1208 } else {
1209 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1212 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1213 have_GX = True;
1214 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1215 have_GX = False;
1216 } else {
1217 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1220 /* VSX support implies Power ISA 2.06 */
1221 have_VX = True;
1222 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1223 have_VX = False;
1224 } else {
1225 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1228 /* Check for Decimal Floating Point (DFP) support. */
1229 have_DFP = True;
1230 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1231 have_DFP = False;
1232 } else {
1233 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1236 /* Check for ISA 2.07 support. */
1237 have_isa_2_07 = True;
1238 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1239 have_isa_2_07 = False;
1240 } else {
1241 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1244 /* Check for ISA 3.0 support. */
1245 have_isa_3_0 = True;
1246 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1247 have_isa_3_0 = False;
1248 } else {
1249 __asm__ __volatile__(".long 0x7f140434":::"r20"); /* cnttzw r20,r24 */
1252 // ISA 3.1 not supported on 32-bit systems
1254 // scv instruction not supported on 32-bit systems.
1256 /* determine dcbz/dcbzl sizes while we still have the signal
1257 * handlers registered */
1258 find_ppc_dcbz_sz(&vai);
1260 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1261 vg_assert(r == 0);
1262 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1263 vg_assert(r == 0);
1264 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1265 vg_assert(r == 0);
1266 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1267 (Int)have_F, (Int)have_V, (Int)have_FX,
1268 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1269 (Int)have_isa_2_07, (Int)have_isa_3_0);
1270 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1271 if (have_V && !have_F)
1272 have_V = False;
1273 if (have_FX && !have_F)
1274 have_FX = False;
1275 if (have_GX && !have_F)
1276 have_GX = False;
1278 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0;
1279 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1281 va = VexArchPPC32;
1282 vai.endness = VexEndnessBE;
1284 vai.hwcaps = 0;
1285 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1286 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1287 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1288 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1289 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1290 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1291 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1292 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA3_0;
1293 /* ISA 3.1 not supported on 32-bit systems. */
1294 /* SCV not supported on PPC32 */
1296 VG_(machine_get_cache_info)(&vai);
1298 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1299 called before we're ready to go. */
1300 return True;
1303 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1305 /* Same instruction set detection algorithm as for ppc32. */
1306 vki_sigset_t saved_set, tmp_set;
1307 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1308 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1310 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1311 volatile Bool have_isa_2_07, have_isa_3_0, have_isa_3_1;
1312 Int r;
1314 /* This is a kludge. Really we ought to back-convert saved_act
1315 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1316 since that's a no-op on all ppc64 platforms so far supported,
1317 it's not worth the typing effort. At least include most basic
1318 sanity check: */
1319 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1321 VG_(sigemptyset)(&tmp_set);
1322 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1323 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1325 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1326 vg_assert(r == 0);
1328 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1329 vg_assert(r == 0);
1330 tmp_sigill_act = saved_sigill_act;
1332 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1333 tmp_sigfpe_act = saved_sigfpe_act;
1335 /* NODEFER: signal handler does not return (from the kernel's point of
1336 view), hence if it is to successfully catch a signal more than once,
1337 we need the NODEFER flag. */
1338 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1339 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1340 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1341 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1342 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1344 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1345 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1346 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1347 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1348 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1350 /* standard FP insns */
1351 have_F = True;
1352 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1353 have_F = False;
1354 } else {
1355 __asm__ __volatile__("fmr 0,0");
1358 /* Altivec insns */
1359 have_V = True;
1360 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1361 have_V = False;
1362 } else {
1363 __asm__ __volatile__(".long 0x10000484"); /* vor v0,v0,v0 */
1366 /* General-Purpose optional (fsqrt, fsqrts) */
1367 have_FX = True;
1368 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1369 have_FX = False;
1370 } else {
1371 __asm__ __volatile__(".long 0xFC00002C"); /* fsqrt f0,f0 */
1374 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1375 have_GX = True;
1376 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1377 have_GX = False;
1378 } else {
1379 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte f0,f0 */
1382 /* VSX support implies Power ISA 2.06 */
1383 have_VX = True;
1384 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1385 have_VX = False;
1386 } else {
1387 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp vs0,vs0 */
1390 /* Check for Decimal Floating Point (DFP) support. */
1391 have_DFP = True;
1392 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1393 have_DFP = False;
1394 } else {
1395 __asm__ __volatile__(".long 0xec0e8005"); /* dadd f0,f14,f16 */
1398 /* Check for ISA 2.07 support. */
1399 have_isa_2_07 = True;
1400 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1401 have_isa_2_07 = False;
1402 } else {
1403 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd f0,r0 */
1406 /* Check for ISA 3.0 support. */
1407 have_isa_3_0 = True;
1408 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1409 have_isa_3_0 = False;
1410 } else {
1411 __asm__ __volatile__(".long 0x7f140434":::"r20"); /* cnttzw r20,r24 */
1414 /* Check if Host supports scv instruction.
1415 Note, can not use the usual method of issuing the scv instruction and
1416 checking if it is supported or not. Issuing scv on a system that does
1417 not have scv support in the HWCAPS generates a message in dmesg,
1418 "Facility 'SCV' unavailable (12), exception". It is considered bad
1419 form to issue and scv on systems that do not support it.
1421 The function VG_(machine_ppc64_set_scv_support), is called in
1422 initimg-linux.c to set the flag ppc_scv_supported based on HWCAPS2
1423 value. The flag ppc_scv_supported is defined struct VexArchInfo,
1424 in file libvex.h The setting of ppc_scv_supported in VexArchInfo
1425 is checked in disInstr_PPC_WRK() to set the allow_scv flag. */
1427 /* Check for ISA 3.1 support. */
1428 have_isa_3_1 = True;
1429 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1430 have_isa_3_1 = False;
1431 } else {
1432 __asm__ __volatile__(".long 0x7f1401b6":::"r20"); /* brh r20,r24 */
1435 /* determine dcbz/dcbzl sizes while we still have the signal
1436 * handlers registered */
1437 find_ppc_dcbz_sz(&vai);
1439 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1440 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1441 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1442 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d ISA3.1 %d\n",
1443 (Int)have_F, (Int)have_V, (Int)have_FX,
1444 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1445 (Int)have_isa_2_07, (int)have_isa_3_0, (int)have_isa_3_1);
1446 /* on ppc64be, if we don't even have FP, just give up. */
1447 if (!have_F)
1448 return False;
1450 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1452 va = VexArchPPC64;
1453 # if defined(VKI_LITTLE_ENDIAN)
1454 vai.endness = VexEndnessLE;
1455 # elif defined(VKI_BIG_ENDIAN)
1456 vai.endness = VexEndnessBE;
1457 # else
1458 vai.endness = VexEndness_INVALID;
1459 # endif
1461 vai.hwcaps = 0;
1462 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1463 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1464 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1465 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1466 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1467 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1468 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_0;
1469 if (have_isa_3_1) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_1;
1471 VG_(machine_get_cache_info)(&vai);
1473 /* But we're not done yet: VG_(machine_ppc64_set_clszB) and
1474 VG_(machine_ppc64_set_scv_support) must be called before we're
1475 ready to go. */
1476 return True;
1479 #elif defined(VGA_s390x)
1481 # include "libvex_s390x_common.h"
1484 /* Instruction set detection code borrowed from ppc above. */
1485 vki_sigset_t saved_set, tmp_set;
1486 vki_sigaction_fromK_t saved_sigill_act;
1487 vki_sigaction_toK_t tmp_sigill_act;
1489 volatile Bool have_LDISP, have_STFLE;
1490 Int i, r, model;
1492 /* If the model is "unknown" don't treat this as an error. Assume
1493 this is a brand-new machine model for which we don't have the
1494 identification yet. Keeping fingers crossed. */
1495 model = VG_(get_machine_model)();
1497 /* Unblock SIGILL and stash away the old action for that signal */
1498 VG_(sigemptyset)(&tmp_set);
1499 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1501 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1502 vg_assert(r == 0);
1504 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1505 vg_assert(r == 0);
1506 tmp_sigill_act = saved_sigill_act;
1508 /* NODEFER: signal handler does not return (from the kernel's point of
1509 view), hence if it is to successfully catch a signal more than once,
1510 we need the NODEFER flag. */
1511 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1512 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1513 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1514 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1515 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1517 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1518 is not supported on z900. */
1520 have_LDISP = True;
1521 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1522 have_LDISP = False;
1523 } else {
1524 /* BASR loads the address of the next insn into r1. Needed to avoid
1525 a segfault in XY. */
1526 __asm__ __volatile__("basr %%r1,%%r0\n\t"
1527 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1528 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1531 /* Check availability of STFLE. If available store facility bits
1532 in hoststfle. */
1533 ULong hoststfle[S390_NUM_FACILITY_DW];
1535 for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1536 hoststfle[i] = 0;
1538 have_STFLE = True;
1539 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1540 have_STFLE = False;
1541 } else {
1542 register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1544 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */
1545 : "=m" (hoststfle), "+d"(reg0)
1546 : : "cc", "memory");
1549 /* Restore signals */
1550 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1551 vg_assert(r == 0);
1552 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1553 vg_assert(r == 0);
1554 va = VexArchS390X;
1555 vai.endness = VexEndnessBE;
1557 vai.hwcaps = model;
1558 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1559 if (have_LDISP) {
1560 /* Use long displacement only on machines >= z990. For all other
1561 machines it is millicoded and therefore slow. */
1562 if (model >= VEX_S390X_MODEL_Z990)
1563 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1566 /* Detect presence of certain facilities using the STFLE insn.
1567 Note, that these facilities were introduced at the same time or later
1568 as STFLE, so the absence of STLFE implies the absence of the facility
1569 we're trying to detect. */
1570 struct fac_hwcaps_map {
1571 UInt installed;
1572 UInt facility_bit;
1573 UInt hwcaps_bit;
1574 const HChar name[6]; // may need adjustment for new facility names
1575 } fac_hwcaps[] = {
1576 { False, S390_FAC_EIMM, VEX_HWCAPS_S390X_EIMM, "EIMM" },
1577 { False, S390_FAC_GIE, VEX_HWCAPS_S390X_GIE, "GIE" },
1578 { False, S390_FAC_DFP, VEX_HWCAPS_S390X_DFP, "DFP" },
1579 { False, S390_FAC_FPSE, VEX_HWCAPS_S390X_FGX, "FGX" },
1580 { False, S390_FAC_ETF2, VEX_HWCAPS_S390X_ETF2, "ETF2" },
1581 { False, S390_FAC_ETF3, VEX_HWCAPS_S390X_ETF3, "ETF3" },
1582 { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1583 { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1584 { False, S390_FAC_LSC, VEX_HWCAPS_S390X_LSC, "LSC" },
1585 { False, S390_FAC_PFPO, VEX_HWCAPS_S390X_PFPO, "PFPO" },
1586 { False, S390_FAC_VX, VEX_HWCAPS_S390X_VX, "VX" },
1587 { False, S390_FAC_MSA5, VEX_HWCAPS_S390X_MSA5, "MSA5" },
1588 { False, S390_FAC_MI2, VEX_HWCAPS_S390X_MI2, "MI2" },
1589 { False, S390_FAC_LSC2, VEX_HWCAPS_S390X_LSC2, "LSC2" },
1590 { False, S390_FAC_VXE, VEX_HWCAPS_S390X_VXE, "VXE" },
1593 /* Set hwcaps according to the detected facilities */
1594 UChar dw_number = 0;
1595 UChar fac_bit = 0;
1596 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1597 vg_assert(fac_hwcaps[i].facility_bit <= 191); // for now
1598 dw_number = fac_hwcaps[i].facility_bit / 64;
1599 fac_bit = fac_hwcaps[i].facility_bit % 64;
1600 if (hoststfle[dw_number] & (1ULL << (63 - fac_bit))) {
1601 fac_hwcaps[i].installed = True;
1602 vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1606 /* Build up a string showing the probed-for facilities */
1607 HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1608 (sizeof fac_hwcaps[0].name + 3) + // %s %d
1609 7 + 1 + 4 + 2 // machine %4d
1610 + 1]; // \0
1611 HChar *p = fac_str;
1612 p += VG_(sprintf)(p, "machine %4d ", model);
1613 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1614 p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name,
1615 fac_hwcaps[i].installed);
1617 *p++ = '\0';
1619 VG_(debugLog)(1, "machine", "%s\n", fac_str);
1620 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1622 VG_(machine_get_cache_info)(&vai);
1624 return True;
1627 #elif defined(VGA_arm)
1629 /* Same instruction set detection algorithm as for ppc32. */
1630 vki_sigset_t saved_set, tmp_set;
1631 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1632 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1634 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON, have_V8;
1635 volatile Int archlevel;
1636 Int r;
1638 /* This is a kludge. Really we ought to back-convert saved_act
1639 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1640 since that's a no-op on all ppc64 platforms so far supported,
1641 it's not worth the typing effort. At least include most basic
1642 sanity check: */
1643 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1645 VG_(sigemptyset)(&tmp_set);
1646 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1647 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1649 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1650 vg_assert(r == 0);
1652 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1653 vg_assert(r == 0);
1654 tmp_sigill_act = saved_sigill_act;
1656 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1657 tmp_sigfpe_act = saved_sigfpe_act;
1659 /* NODEFER: signal handler does not return (from the kernel's point of
1660 view), hence if it is to successfully catch a signal more than once,
1661 we need the NODEFER flag. */
1662 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1663 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1664 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1665 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1666 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1668 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1669 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1670 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1671 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1672 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1674 /* VFP insns */
1675 have_VFP = True;
1676 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1677 have_VFP = False;
1678 } else {
1679 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1681 /* There are several generation of VFP extension but they differs very
1682 little so for now we will not distinguish them. */
1683 have_VFP2 = have_VFP;
1684 have_VFP3 = have_VFP;
1686 /* NEON insns */
1687 have_NEON = True;
1688 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1689 have_NEON = False;
1690 } else {
1691 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1694 /* ARM architecture level */
1695 archlevel = 5; /* v5 will be base level */
1696 if (archlevel < 7) {
1697 archlevel = 7;
1698 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1699 archlevel = 5;
1700 } else {
1701 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1704 if (archlevel < 6) {
1705 archlevel = 6;
1706 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1707 archlevel = 5;
1708 } else {
1709 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1713 /* ARMv8 insns */
1714 have_V8 = True;
1715 if (archlevel == 7) {
1716 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1717 have_V8 = False;
1718 } else {
1719 __asm__ __volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
1721 if (have_V8 && have_NEON && have_VFP3) {
1722 archlevel = 8;
1726 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1727 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1728 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1729 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1730 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1732 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1733 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1734 (Int)have_NEON);
1736 VG_(machine_arm_archlevel) = archlevel;
1738 va = VexArchARM;
1739 vai.endness = VexEndnessLE;
1741 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1742 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1743 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1744 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1745 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1747 VG_(machine_get_cache_info)(&vai);
1749 return True;
1752 #elif defined(VGA_arm64)
1754 /* Use the attribute and feature registers to determine host hardware
1755 * capabilities. Only user-space features are read. Naming conventions
1756 * follow the Arm Architecture Reference Manual.
1758 * ID_AA64ISAR0_EL1 Instruction Set Attribute Register 0
1759 * ----------------
1760 * ...5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
1761 * ...1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
1762 * FHM DP SM4 SM3 SHA3 RDM ATOMICS
1764 * ID_AA64ISAR1_EL1 Instruction Set Attribute Register 1
1765 * ----------------
1766 * ...5555 5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
1767 * ...5432 1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
1768 * ...I8MM BF16 DPB
1770 * ID_AA64PFR0_EL1 Processor Feature Register 0
1771 * ---------------
1772 * 6666...2222 2222 1111 1111 11
1773 * 3210...7654 3210 9876 5432 1098 7654 3210
1774 * ASIMD FP16
1777 Bool is_base_v8 = False;
1779 Bool have_fhm, have_dp, have_sm4, have_sm3, have_sha3, have_rdm;
1780 Bool have_atomics, have_i8mm, have_bf16, have_dpbcvap, have_dpbcvadp;
1781 Bool have_vfp16, have_fp16;
1783 have_fhm = have_dp = have_sm4 = have_sm3 = have_sha3 = have_rdm
1784 = have_atomics = have_i8mm = have_bf16 = have_dpbcvap
1785 = have_dpbcvadp = have_vfp16 = have_fp16 = False;
1787 /* Some baseline v8.0 kernels do not allow reads of these registers. Use
1788 * the same SIGILL handling algorithm as other architectures for such
1789 * kernels.
1791 vki_sigset_t saved_set, tmp_set;
1792 vki_sigaction_fromK_t saved_sigill_act;
1793 vki_sigaction_toK_t tmp_sigill_act;
1795 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1797 VG_(sigemptyset)(&tmp_set);
1798 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1800 Int r;
1802 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1803 vg_assert(r == 0);
1805 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1806 vg_assert(r == 0);
1807 tmp_sigill_act = saved_sigill_act;
1809 /* NODEFER: signal handler does not return (from the kernel's point of
1810 view), hence if it is to successfully catch a signal more than once,
1811 we need the NODEFER flag. */
1812 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1813 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1814 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1815 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1816 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1818 /* Does reading ID_AA64ISAR0_EL1 register throw SIGILL on base v8.0? */
1819 if (VG_MINIMAL_SETJMP(env_unsup_insn))
1820 is_base_v8 = True;
1821 else
1822 __asm__ __volatile__("mrs x0, ID_AA64ISAR0_EL1");
1824 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1825 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1826 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1828 va = VexArchARM64;
1829 vai.endness = VexEndnessLE;
1831 /* Baseline features are v8.0. */
1832 vai.hwcaps = 0;
1834 VG_(machine_get_cache_info)(&vai);
1836 /* Check whether we need to use the fallback LLSC implementation.
1837 If the check fails, give up. */
1838 if (! VG_(parse_cpuinfo)())
1839 return False;
1841 /* 0 denotes 'not set'. The range of legitimate values here,
1842 after being set that is, is 2 though 17 inclusive. */
1843 vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1844 vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1845 ULong ctr_el0;
1846 __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1847 vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1848 vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >> 0) & 0xF) + 2;
1849 VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1850 "ctr_el0.iMinLine_szB = %d\n",
1851 1 << vai.arm64_dMinLine_lg2_szB,
1852 1 << vai.arm64_iMinLine_lg2_szB);
1853 VG_(debugLog)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
1854 vai.arm64_requires_fallback_LLSC ? "yes" : "no");
1856 if (is_base_v8)
1857 return True;
1859 /* ID_AA64ISAR0_EL1 Instruction set attribute register 0 fields */
1860 #define ID_AA64ISAR0_FHM_SHIFT 48
1861 #define ID_AA64ISAR0_DP_SHIFT 44
1862 #define ID_AA64ISAR0_SM4_SHIFT 40
1863 #define ID_AA64ISAR0_SM3_SHIFT 36
1864 #define ID_AA64ISAR0_SHA3_SHIFT 32
1865 #define ID_AA64ISAR0_RDM_SHIFT 28
1866 #define ID_AA64ISAR0_ATOMICS_SHIFT 20
1867 /* Field values */
1868 #define ID_AA64ISAR0_FHM_SUPPORTED 0x1
1869 #define ID_AA64ISAR0_DP_SUPPORTED 0x1
1870 #define ID_AA64ISAR0_SM4_SUPPORTED 0x1
1871 #define ID_AA64ISAR0_SM3_SUPPORTED 0x1
1872 #define ID_AA64ISAR0_SHA3_SUPPORTED 0x1
1873 #define ID_AA64ISAR0_RDM_SUPPORTED 0x1
1874 #define ID_AA64ISAR0_ATOMICS_SUPPORTED 0x2
1876 /* ID_AA64ISAR1_EL1 Instruction set attribute register 1 fields */
1877 #define ID_AA64ISAR1_I8MM_SHIFT 52
1878 #define ID_AA64ISAR1_BF16_SHIFT 44
1879 #define ID_AA64ISAR1_DPB_SHIFT 0
1880 /* Field values */
1881 #define ID_AA64ISAR1_I8MM_SUPPORTED 0x1
1882 #define ID_AA64ISAR1_BF16_SUPPORTED 0x1
1883 #define ID_AA64ISAR1_DPBCVAP_SUPPORTED 0x1
1884 #define ID_AA64ISAR1_DPBCVADP_SUPPORTED 0x2
1886 /* ID_AA64PFR0_EL1 Processor feature register 0 fields */
1887 #define ID_AA64PFR0_VFP16_SHIFT 20
1888 #define ID_AA64PFR0_FP16_SHIFT 16
1889 /* Field values */
1890 #define ID_AA64PFR0_VFP16_SUPPORTED 0x1
1891 #define ID_AA64PFR0_FP16_SUPPORTED 0x1
1893 #define get_cpu_ftr(id) ({ \
1894 unsigned long val; \
1895 asm("mrs %0, "#id : "=r" (val)); \
1896 VG_(debugLog)(1, "machine", "ARM64: %-20s: 0x%016lx\n", #id, val); \
1898 get_cpu_ftr(ID_AA64ISAR0_EL1);
1899 get_cpu_ftr(ID_AA64ISAR1_EL1);
1900 get_cpu_ftr(ID_AA64PFR0_EL1);
1902 #define get_ftr(id, ftr, fval, have_ftr) ({ \
1903 unsigned long rval; \
1904 asm("mrs %0, "#id : "=r" (rval)); \
1905 have_ftr = (fval & ((rval >> ftr) & 0xf)) >= fval ? True : False; \
1908 /* Read ID_AA64ISAR0_EL1 attributes */
1910 /* FHM indicates support for FMLAL and FMLSL instructions.
1911 * Optional for v8.2.
1913 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT,
1914 ID_AA64ISAR0_FHM_SUPPORTED, have_fhm);
1916 /* DP indicates support for UDOT and SDOT instructions.
1917 * Optional for v8.2.
1919 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT,
1920 ID_AA64ISAR0_DP_SUPPORTED, have_dp);
1922 /* SM4 indicates support for SM4E and SM4EKEY instructions.
1923 * Optional for v8.2.
1925 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT,
1926 ID_AA64ISAR0_SM4_SUPPORTED, have_sm4);
1928 /* SM3 indicates support for SM3SS1, SM3TT1A, SM3TT1B, SM3TT2A, * SM3TT2B,
1929 * SM3PARTW1, and SM3PARTW2 instructions.
1930 * Optional for v8.2.
1932 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT,
1933 ID_AA64ISAR0_SM3_SUPPORTED, have_sm3);
1935 /* SHA3 indicates support for EOR3, RAX1, XAR, and BCAX instructions.
1936 * Optional for v8.2.
1938 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT,
1939 ID_AA64ISAR0_SHA3_SUPPORTED, have_sha3);
1941 /* RDM indicates support for SQRDMLAH and SQRDMLSH instructions.
1942 * Mandatory from v8.1 onwards.
1944 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT,
1945 ID_AA64ISAR0_RDM_SUPPORTED, have_rdm);
1947 /* v8.1 ATOMICS indicates support for LDADD, LDCLR, LDEOR, LDSET, LDSMAX,
1948 * LDSMIN, LDUMAX, LDUMIN, CAS, CASP, and SWP instructions.
1949 * Mandatory from v8.1 onwards.
1951 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT,
1952 ID_AA64ISAR0_ATOMICS_SUPPORTED, have_atomics);
1954 /* Read ID_AA64ISAR1_EL1 attributes */
1956 /* I8MM indicates support for SMMLA, SUDOT, UMMLA, USMMLA, and USDOT
1957 * instructions.
1958 * Optional for v8.2.
1960 get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT,
1961 ID_AA64ISAR1_I8MM_SUPPORTED, have_i8mm);
1963 /* BF16 indicates support for BFDOT, BFMLAL, BFMLAL2, BFMMLA, BFCVT, and
1964 * BFCVT2 instructions.
1965 * Optional for v8.2.
1967 get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT,
1968 ID_AA64ISAR1_BF16_SUPPORTED, have_bf16);
1970 /* DPB indicates support for DC CVAP instruction.
1971 * Mandatory for v8.2 onwards.
1973 get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT,
1974 ID_AA64ISAR1_DPBCVAP_SUPPORTED, have_dpbcvap);
1976 /* DPB indicates support for DC CVADP instruction.
1977 * Optional for v8.2.
1979 get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT,
1980 ID_AA64ISAR1_DPBCVADP_SUPPORTED, have_dpbcvadp);
1982 /* Read ID_AA64PFR0_EL1 attributes */
1984 /* VFP16 indicates support for half-precision vector arithmetic.
1985 * Optional for v8.2. Must be the same value as FP16.
1987 get_ftr(ID_AA64PFR0_EL1, ID_AA64PFR0_VFP16_SHIFT,
1988 ID_AA64PFR0_VFP16_SUPPORTED, have_vfp16);
1990 /* FP16 indicates support for half-precision scalar arithmetic.
1991 * Optional for v8.2. Must be the same value as VFP16.
1993 get_ftr(ID_AA64PFR0_EL1, ID_AA64PFR0_FP16_SHIFT,
1994 ID_AA64PFR0_FP16_SUPPORTED, have_fp16);
1996 if (have_fhm) vai.hwcaps |= VEX_HWCAPS_ARM64_FHM;
1997 if (have_dpbcvap) vai.hwcaps |= VEX_HWCAPS_ARM64_DPBCVAP;
1998 if (have_dpbcvadp) vai.hwcaps |= VEX_HWCAPS_ARM64_DPBCVADP;
1999 if (have_sm3) vai.hwcaps |= VEX_HWCAPS_ARM64_SM3;
2000 if (have_sm4) vai.hwcaps |= VEX_HWCAPS_ARM64_SM4;
2001 if (have_sha3) vai.hwcaps |= VEX_HWCAPS_ARM64_SHA3;
2002 if (have_rdm) vai.hwcaps |= VEX_HWCAPS_ARM64_RDM;
2003 if (have_i8mm) vai.hwcaps |= VEX_HWCAPS_ARM64_I8MM;
2004 if (have_atomics) vai.hwcaps |= VEX_HWCAPS_ARM64_ATOMICS;
2005 if (have_bf16) vai.hwcaps |= VEX_HWCAPS_ARM64_BF16;
2006 if (have_fp16) vai.hwcaps |= VEX_HWCAPS_ARM64_FP16;
2007 if (have_vfp16) vai.hwcaps |= VEX_HWCAPS_ARM64_VFP16;
2009 #undef get_cpu_ftr
2010 #undef get_ftr
2012 return True;
2015 #elif defined(VGA_mips32)
2017 /* Define the position of F64 bit in FIR register. */
2018 # define FP64 22
2019 va = VexArchMIPS32;
2020 if (!VG_(parse_cpuinfo)())
2021 return False;
2023 # if defined(VKI_LITTLE_ENDIAN)
2024 vai.endness = VexEndnessLE;
2025 # elif defined(VKI_BIG_ENDIAN)
2026 vai.endness = VexEndnessBE;
2027 # else
2028 vai.endness = VexEndness_INVALID;
2029 # endif
2031 /* Same instruction set detection algorithm as for ppc32/arm... */
2032 vki_sigset_t saved_set, tmp_set;
2033 vki_sigaction_fromK_t saved_sigill_act;
2034 vki_sigaction_toK_t tmp_sigill_act;
2036 volatile Bool have_DSP, have_DSPr2, have_MSA;
2037 Int r;
2039 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
2041 VG_(sigemptyset)(&tmp_set);
2042 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
2044 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
2045 vg_assert(r == 0);
2047 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
2048 vg_assert(r == 0);
2049 tmp_sigill_act = saved_sigill_act;
2051 /* NODEFER: signal handler does not return (from the kernel's point of
2052 view), hence if it is to successfully catch a signal more than once,
2053 we need the NODEFER flag. */
2054 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
2055 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
2056 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
2057 tmp_sigill_act.ksa_handler = handler_unsup_insn;
2058 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
2060 if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
2062 /* MSA instructions. */
2063 have_MSA = True;
2064 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
2065 have_MSA = False;
2066 } else {
2067 __asm__ __volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
2069 if (have_MSA) {
2070 vai.hwcaps |= VEX_PRID_IMP_P5600;
2071 } else {
2072 /* DSPr2 instructions. */
2073 have_DSPr2 = True;
2074 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
2075 have_DSPr2 = False;
2076 } else {
2077 __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
2079 if (have_DSPr2) {
2080 /* We assume it's 74K, since it can run DSPr2. */
2081 vai.hwcaps |= VEX_PRID_IMP_74K;
2082 } else {
2083 /* DSP instructions. */
2084 have_DSP = True;
2085 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
2086 have_DSP = False;
2087 } else {
2088 __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
2090 if (have_DSP) {
2091 /* We assume it's 34K, since it has support for DSP. */
2092 vai.hwcaps |= VEX_PRID_IMP_34K;
2098 # if defined(VGP_mips32_linux)
2099 Int fpmode = VG_(prctl)(VKI_PR_GET_FP_MODE, 0, 0, 0, 0);
2100 # else
2101 Int fpmode = -1;
2102 # endif
2104 if (fpmode < 0) {
2105 /* prctl(PR_GET_FP_MODE) is not supported by Kernel,
2106 we are using alternative way to determine FP mode */
2107 ULong result = 0;
2109 if (!VG_MINIMAL_SETJMP(env_unsup_insn)) {
2110 __asm__ volatile (
2111 ".set push\n\t"
2112 ".set noreorder\n\t"
2113 ".set oddspreg\n\t"
2114 ".set hardfloat\n\t"
2115 "lui $t0, 0x3FF0\n\t"
2116 "ldc1 $f0, %0\n\t"
2117 "mtc1 $t0, $f1\n\t"
2118 "sdc1 $f0, %0\n\t"
2119 ".set pop\n\t"
2120 : "+m"(result)
2122 : "t0", "$f0", "$f1", "memory");
2124 fpmode = (result != 0x3FF0000000000000ull);
2128 if (fpmode != 0)
2129 vai.hwcaps |= VEX_MIPS_HOST_FR;
2131 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
2132 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
2133 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
2135 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
2136 VG_(machine_get_cache_info)(&vai);
2138 return True;
2141 #elif defined(VGA_mips64)
2143 va = VexArchMIPS64;
2144 if (!VG_(parse_cpuinfo)())
2145 return False;
2147 # if defined(VKI_LITTLE_ENDIAN)
2148 vai.endness = VexEndnessLE;
2149 # elif defined(VKI_BIG_ENDIAN)
2150 vai.endness = VexEndnessBE;
2151 # else
2152 vai.endness = VexEndness_INVALID;
2153 # endif
2155 vai.hwcaps |= VEX_MIPS_HOST_FR;
2157 /* Same instruction set detection algorithm as for ppc32/arm... */
2158 vki_sigset_t saved_set, tmp_set;
2159 vki_sigaction_fromK_t saved_sigill_act;
2160 vki_sigaction_toK_t tmp_sigill_act;
2162 volatile Bool have_MSA;
2163 Int r;
2165 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
2167 VG_(sigemptyset)(&tmp_set);
2168 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
2170 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
2171 vg_assert(r == 0);
2173 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
2174 vg_assert(r == 0);
2175 tmp_sigill_act = saved_sigill_act;
2177 /* NODEFER: signal handler does not return (from the kernel's point of
2178 view), hence if it is to successfully catch a signal more than once,
2179 we need the NODEFER flag. */
2180 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
2181 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
2182 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
2183 tmp_sigill_act.ksa_handler = handler_unsup_insn;
2184 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
2186 if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
2188 /* MSA instructions */
2189 have_MSA = True;
2190 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
2191 have_MSA = False;
2192 } else {
2193 __asm__ __volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
2195 if (have_MSA) {
2196 vai.hwcaps |= VEX_PRID_IMP_P5600;
2200 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
2201 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
2202 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
2204 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
2206 VG_(machine_get_cache_info)(&vai);
2208 return True;
2211 #elif defined(VGP_nanomips_linux)
2213 va = VexArchNANOMIPS;
2214 vai.hwcaps = 0;
2216 # if defined(VKI_LITTLE_ENDIAN)
2217 vai.endness = VexEndnessLE;
2218 # elif defined(VKI_BIG_ENDIAN)
2219 vai.endness = VexEndnessBE;
2220 # else
2221 vai.endness = VexEndness_INVALID;
2222 # endif
2224 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
2226 VG_(machine_get_cache_info)(&vai);
2228 return True;
2230 #else
2231 # error "Unknown arch"
2232 #endif
2235 /* Notify host cpu instruction cache line size. */
2236 #if defined(VGA_ppc32)
2237 void VG_(machine_ppc32_set_clszB)( Int szB )
2239 vg_assert(hwcaps_done);
2241 /* Either the value must not have been set yet (zero) or we can
2242 tolerate it being set to the same value multiple times, as the
2243 stack scanning logic in m_main is a bit stupid. */
2244 vg_assert(vai.ppc_icache_line_szB == 0
2245 || vai.ppc_icache_line_szB == szB);
2247 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
2248 vai.ppc_icache_line_szB = szB;
2250 #endif
2253 /* Notify host cpu instruction cache line size. */
2254 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
2255 void VG_(machine_ppc64_set_clszB)( Int szB )
2257 vg_assert(hwcaps_done);
2259 /* Either the value must not have been set yet (zero) or we can
2260 tolerate it being set to the same value multiple times, as the
2261 stack scanning logic in m_main is a bit stupid. */
2262 vg_assert(vai.ppc_icache_line_szB == 0
2263 || vai.ppc_icache_line_szB == szB);
2265 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
2266 vai.ppc_icache_line_szB = szB;
2269 void VG_(machine_ppc64_set_scv_support)( Int is_supported )
2271 vg_assert(hwcaps_done);
2272 vai.ppc_scv_supported = is_supported;
2275 #endif
2278 /* Notify host's ability to handle NEON instructions. */
2279 #if defined(VGA_arm)
2280 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
2282 vg_assert(hwcaps_done);
2283 /* There's nothing else we can sanity check. */
2285 if (has_neon) {
2286 vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
2287 } else {
2288 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
2291 #endif
2294 /* Fetch host cpu info, once established. */
2295 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
2296 /*OUT*/VexArchInfo* pVai )
2298 vg_assert(hwcaps_done);
2299 if (pVa) *pVa = va;
2300 if (pVai) *pVai = vai;
2304 /* Returns the size of the largest guest register that we will
2305 simulate in this run. This depends on both the guest architecture
2306 and on the specific capabilities we are simulating for that guest
2307 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
2308 or 32. General rule: if in doubt, return a value larger than
2309 reality.
2311 This information is needed by Cachegrind and Callgrind to decide
2312 what the minimum cache line size they are prepared to simulate is.
2313 Basically require that the minimum cache line size is at least as
2314 large as the largest register that might get transferred to/from
2315 memory, so as to guarantee that any such transaction can straddle
2316 at most 2 cache lines.
2318 Int VG_(machine_get_size_of_largest_guest_register) ( void )
2320 vg_assert(hwcaps_done);
2321 /* Once hwcaps_done is True, we can fish around inside va/vai to
2322 find the information we need. */
2324 # if defined(VGA_x86)
2325 vg_assert(va == VexArchX86);
2326 /* We don't support AVX, so 32 is out. At the other end, even if
2327 we don't support any SSE, the X87 can generate 10 byte
2328 transfers, so let's say 16 to be on the safe side. Hence the
2329 answer is always 16. */
2330 return 16;
2332 # elif defined(VGA_amd64)
2333 /* if AVX then 32 else 16 */
2334 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
2336 # elif defined(VGA_ppc32)
2337 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2338 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
2339 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
2340 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
2341 return 8;
2343 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
2344 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2345 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
2346 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
2347 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
2348 return 8;
2350 # elif defined(VGA_s390x)
2351 return 8;
2353 # elif defined(VGA_arm)
2354 /* Really it depends whether or not we have NEON, but let's just
2355 assume we always do. */
2356 return 16;
2358 # elif defined(VGA_arm64)
2359 /* ARM64 always has Neon, AFAICS. */
2360 return 16;
2362 # elif defined(VGA_mips32) || defined(VGP_nanomips_linux)
2363 /* The guest state implies 4, but that can't really be true, can
2364 it? */
2365 return 8;
2367 # elif defined(VGA_mips64)
2368 return 8;
2370 # else
2371 # error "Unknown arch"
2372 # endif
2376 // Given a pointer to a function as obtained by "& functionname" in C,
2377 // produce a pointer to the actual entry point for the function.
2378 void* VG_(fnptr_to_fnentry)( void* f )
2380 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
2381 || defined(VGP_arm_linux) || defined(VGO_darwin) || defined(VGO_freebsd) \
2382 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
2383 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
2384 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
2385 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) \
2386 || defined(VGP_nanomips_linux)
2387 return f;
2388 # elif defined(VGP_ppc64be_linux)
2389 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
2390 3-word function descriptor, of which the first word is the entry
2391 address. */
2392 UWord* descr = (UWord*)f;
2393 return (void*)(descr[0]);
2394 # else
2395 # error "Unknown platform"
2396 # endif
2399 /*--------------------------------------------------------------------*/
2400 /*--- end ---*/
2401 /*--------------------------------------------------------------------*/