1 /* -*- mode: C; c-basic-offset: 3; -*- */
4 This file is part of MemCheck, a heavyweight Valgrind tool for
5 detecting memory errors.
7 Copyright (C) 2012-2017 Florian Krohm
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 The GNU General Public License is contained in the file COPYING.
25 #include <stdio.h> // fprintf
26 #include <assert.h> // assert
27 #if defined(__APPLE__)
28 #include <machine/endian.h>
29 #define __BYTE_ORDER BYTE_ORDER
30 #define __LITTLE_ENDIAN LITTLE_ENDIAN
32 #define __LITTLE_ENDIAN 1234
33 #define __BIG_ENDIAN 4321
34 # if defined(_LITTLE_ENDIAN)
35 # define __BYTE_ORDER __LITTLE_ENDIAN
37 # define __BYTE_ORDER __BIG_ENDIAN
46 #include "memcheck.h" // VALGRIND_MAKE_MEM_DEFINED
49 /* Return the bits of V if they fit into 64-bit. If V has fewer than
50 64 bits, the bit pattern is zero-extended to the left. */
55 case 1: return v
.bits
.u32
;
56 case 8: return v
.bits
.u8
;
57 case 16: return v
.bits
.u16
;
58 case 32: return v
.bits
.u32
;
59 case 64: return v
.bits
.u64
;
69 print_vbits(FILE *fp
, vbits_t v
)
72 case 1: fprintf(fp
, "%08x", v
.bits
.u32
); break;
73 case 8: fprintf(fp
, "%02x", v
.bits
.u8
); break;
74 case 16: fprintf(fp
, "%04x", v
.bits
.u16
); break;
75 case 32: fprintf(fp
, "%08x", v
.bits
.u32
); break;
76 case 64: fprintf(fp
, "%016"PRIx64
, v
.bits
.u64
); break;
78 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
79 fprintf(fp
, "%016"PRIx64
, v
.bits
.u128
[1]);
80 fprintf(fp
, "%016"PRIx64
, v
.bits
.u128
[0]);
82 fprintf(fp
, "%016"PRIx64
, v
.bits
.u128
[0]);
83 fprintf(fp
, "%016"PRIx64
, v
.bits
.u128
[1]);
87 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
88 fprintf(fp
, "%016"PRIx64
, v
.bits
.u256
[3]);
89 fprintf(fp
, "%016"PRIx64
, v
.bits
.u256
[2]);
90 fprintf(fp
, "%016"PRIx64
, v
.bits
.u256
[1]);
91 fprintf(fp
, "%016"PRIx64
, v
.bits
.u256
[0]);
93 fprintf(fp
, "%016"PRIx64
, v
.bits
.u256
[0]);
94 fprintf(fp
, "%016"PRIx64
, v
.bits
.u256
[1]);
95 fprintf(fp
, "%016"PRIx64
, v
.bits
.u256
[2]);
96 fprintf(fp
, "%016"PRIx64
, v
.bits
.u256
[3]);
105 /* Return a value where all bits are set to undefined. */
107 undefined_vbits(unsigned num_bits
)
109 vbits_t
new = { .num_bits
= num_bits
};
112 case 1: new.bits
.u32
= 0x01; break;
113 case 8: new.bits
.u8
= 0xff; break;
114 case 16: new.bits
.u16
= 0xffff; break;
115 case 32: new.bits
.u32
= ~0; break;
116 case 64: new.bits
.u64
= ~0ull; break;
117 case 128: new.bits
.u128
[0] = ~0ull;
118 new.bits
.u128
[1] = ~0ull;
120 case 256: new.bits
.u256
[0] = ~0ull;
121 new.bits
.u256
[1] = ~0ull;
122 new.bits
.u256
[2] = ~0ull;
123 new.bits
.u256
[3] = ~0ull;
131 /* The following routines named undefined_vbits_BxE() return a 128-bit
132 * vector with E elements each of size bits. If any of the bits in an
133 * element is undefined, then return a value where all bits in that
134 * element are undefined.
137 undefined_vbits_BxE(unsigned int bits
, unsigned int elements
, vbits_t v
)
139 vbits_t
new = { .num_bits
= v
.num_bits
};
140 uint64_t mask
= ~0ull >> (64 - bits
);
143 assert ((elements
% 2) == 0);
146 for (i
= 0; i
<2; i
++) {
147 new.bits
.u128
[i
] = 0ull;
149 for (j
= 0; j
<elements
/2; j
++) {
150 if ((v
.bits
.u128
[i
] & (mask
<< (j
*bits
))) != 0)
151 new.bits
.u128
[i
] |= (mask
<< (j
*bits
));
157 /* The following routines named undefined_vbits_BxE_rotate() return a 128-bit
158 * vector with E elements each of size bits. The bits in v are rotated
159 * left by the amounts in the corresponding element of val. Specified rotate
160 * amount field is assumed to be at most 8-bits wide.
163 undefined_vbits_BxE_rotate(unsigned int bits
, unsigned int elements
,
164 vbits_t v
, value_t val
)
166 vbits_t
new = { .num_bits
= v
.num_bits
};
167 uint64_t mask
= ~0ull >> (64 - bits
);
168 uint64_t const shift_mask
= 0xFF;
172 assert ((elements
% 2) == 0);
175 for (i
= 0; i
<2; i
++) {
176 new.bits
.u128
[i
] = 0ull;
178 for (j
= 0; j
<elements
/2; j
++) {
179 element
= (v
.bits
.u128
[i
] >> (j
*bits
)) & mask
;
180 shift
= (int)((val
.u128
[i
] >> (j
*bits
)) & shift_mask
);
184 new.bits
.u128
[i
] = element
>> -shift
;
186 /* OR in the bits shifted out into the top of the element */
187 new.bits
.u128
[i
] |= element
<< (bits
+ shift
);
190 /* upper bits from shift */
191 new.bits
.u128
[i
] = element
<< shift
;
193 /* OR in the bits shifted out into the bottom of the element */
194 new.bits
.u128
[i
] |= element
>> (bits
- shift
);
201 /* Only the even elements of the input are used by the Iop*/
203 undefined_vbits_128_even_element(unsigned int bits
, unsigned int elements
,
208 unsigned int const element_width
= 128/elements
;
209 vbits_t
new = { .num_bits
= v
.num_bits
};
211 assert ((elements
% 2) == 0);
214 /* Create a 128-bit mask with the bits in the even numbered
215 * elements are all ones.
217 mask
= ~0ull >> (64 - bits
);
219 for (i
= 2; i
< elements
/2; i
=i
+2) {
220 mask
|= mask
<< (i
* element_width
);
223 new.bits
.u128
[0] = mask
& v
.bits
.u128
[0];
224 new.bits
.u128
[1] = mask
& v
.bits
.u128
[1];
229 /* Concatenate bit i from each byte j. Place concatenated 8 bit value into
230 * byte i of the result. Do for all i from 0 to 7 and j from 0 to 7 of each
234 undefined_vbits_64x2_transpose(vbits_t v
)
236 vbits_t
new = { .num_bits
= v
.num_bits
};
237 unsigned int bit
, byte
, element
;
238 uint64_t value
, new_value
, select_bit
;
240 for (element
= 0; element
< 2; element
++) {
241 value
= v
.bits
.u128
[element
];
243 for (byte
= 0; byte
< 8; byte
++) {
244 for (bit
= 0; bit
< 8; bit
++) {
245 select_bit
= 1ULL & (value
>> (bit
+ 8*byte
));
246 new_value
|= select_bit
<< (bit
*8 + byte
);
249 new.bits
.u128
[element
] = new_value
;
254 /* The routine takes a 256-bit vector value stored across the two 128-bit
255 * source operands src1 and src2. The size of each element in the input is
256 * src_num_bits. The elements are narrowed to result_num_bits and packed
257 * into the result. If saturate is True, then the all the result bits are
258 * set to 1 if the source element can not be represented in result_num_bits.
261 undefined_vbits_Narrow256_AtoB(unsigned int src_num_bits
,
262 unsigned int result_num_bits
,
263 vbits_t src1_v
, value_t src1_value
,
264 vbits_t src2_v
, value_t src2_value
,
268 vbits_t
new = { .num_bits
= src1_v
.num_bits
};
270 uint64_t vbits
, new_value
;
271 uint64_t const src_mask
= ~0x0ULL
>> (64 - src_num_bits
);
272 uint64_t const result_mask
= ~0x0ULL
>> (64 - result_num_bits
);
273 unsigned int num_elements_per_64_bits
= src_num_bits
/64;
278 * the saturated value is 0xFFFF for the vbit is in one of the lower
279 * 32-bits of the source. The saturated result is 0xFFFF0000 if the
280 * vbit is in the upper 32-bits of the source. Not sure what
281 * the saturated result is in general for a B-bit result.
283 * ONLY TESTED FOR 64 bit input, 32 bit result
285 uint64_t const saturated_result
= 0xFFFFULL
;
287 /* Source elements are split between the two source operands */
289 assert(src_num_bits
<= 64);
290 assert(result_num_bits
< 64);
291 assert(result_num_bits
< src_num_bits
);
293 /* Narrow the elements from src1 to the upper 64-bits of result.
294 * Do each of the 64 bit values that make up a u128
297 for (i
= 0; i
< num_elements_per_64_bits
; i
++) {
298 vbits
= src1_v
.bits
.u128
[0] >> (i
* src_num_bits
);
301 shift
= result_num_bits
* i
;
304 /* Value will not fit in B-bits, saturate the result as needed. */
305 if (vbits
>> (src_num_bits
/2))
306 /* vbit is upper half of the source */
307 new_value
|= saturated_result
<< ( shift
+ result_num_bits
/2);
309 new_value
|= saturated_result
<< shift
;
311 new_value
|= (vbits
& result_mask
) << shift
;
316 for (i
= 0; i
< num_elements_per_64_bits
; i
++) {
317 vbits
= src1_v
.bits
.u128
[1] >> (i
* src_num_bits
);
320 shift
= result_num_bits
* i
+ (num_elements_per_64_bits
324 /* Value will not fit in result_num_bits, saturate the result
327 if (vbits
>> (src_num_bits
/2))
328 /* vbit is upper half of the source */
329 new_value
|= saturated_result
<< (shift
+ result_num_bits
/2);
332 new_value
|= saturated_result
<< shift
;
335 new_value
|= (vbits
& result_mask
) << shift
;
339 if (__BYTE_ORDER
== __LITTLE_ENDIAN
)
340 new.bits
.u128
[1] = new_value
;
342 /* Big endian, swap the upper and lower 32-bits of new_value */
343 new.bits
.u128
[0] = (new_value
<< 32) | (new_value
>> 32);
346 /* Narrow the elements from src2 to the lower 64-bits of result.
347 * Do each of the 64 bit values that make up a u128
349 for (i
= 0; i
< num_elements_per_64_bits
; i
++) {
350 vbits
= src2_v
.bits
.u128
[0] >> (i
* src_num_bits
);
353 shift
= result_num_bits
* i
;
356 /* Value will not fit in result, saturate the result as needed. */
357 if (vbits
>> (src_num_bits
/2))
358 /* vbit is upper half of the source */
359 new_value
|= saturated_result
<< (shift
+ result_num_bits
/2);
361 new_value
|= saturated_result
<< shift
;
363 new_value
|= (vbits
& result_mask
) << shift
;
368 for (i
= 0; i
< num_elements_per_64_bits
; i
++) {
369 vbits
= src2_v
.bits
.u128
[1] >> (i
* src_num_bits
);
374 /* Value will not fit in result_num_bits, saturate the result
377 if (vbits
>> (src_num_bits
/2))
378 /* vbit is upper half of the source */
379 new_value
|= saturated_result
<< (result_num_bits
* i
381 + (num_elements_per_64_bits
384 new_value
|= saturated_result
<< (result_num_bits
* i
385 + (num_elements_per_64_bits
389 new_value
|= (vbits
& result_mask
) << (result_num_bits
* i
390 + (num_elements_per_64_bits
395 if (__BYTE_ORDER
== __LITTLE_ENDIAN
)
396 new.bits
.u128
[0] = new_value
;
398 /* Big endian, swap the upper and lower 32-bits of new_value */
399 new.bits
.u128
[1] = (new_value
<< 32) | (new_value
>> 32);
404 /* Return a value where all bits are set to defined. */
406 defined_vbits(unsigned num_bits
)
408 vbits_t
new = { .num_bits
= num_bits
};
411 case 1: new.bits
.u32
= 0x0; break;
412 case 8: new.bits
.u8
= 0x0; break;
413 case 16: new.bits
.u16
= 0x0; break;
414 case 32: new.bits
.u32
= 0x0; break;
415 case 64: new.bits
.u64
= 0x0; break;
416 case 128: new.bits
.u128
[0] = 0x0;
417 new.bits
.u128
[1] = 0x0;
419 case 256: new.bits
.u256
[0] = 0x0;
420 new.bits
.u256
[1] = 0x0;
421 new.bits
.u256
[2] = 0x0;
422 new.bits
.u256
[3] = 0x0;
431 /* Return 1, if equal. */
433 equal_vbits(vbits_t v1
, vbits_t v2
)
435 assert(v1
.num_bits
== v2
.num_bits
);
437 switch (v1
.num_bits
) {
438 case 1: return v1
.bits
.u32
== v2
.bits
.u32
;
439 case 8: return v1
.bits
.u8
== v2
.bits
.u8
;
440 case 16: return v1
.bits
.u16
== v2
.bits
.u16
;
441 case 32: return v1
.bits
.u32
== v2
.bits
.u32
;
442 case 64: return v1
.bits
.u64
== v2
.bits
.u64
;
443 case 128: return v1
.bits
.u128
[0] == v2
.bits
.u128
[0] &&
444 v1
.bits
.u128
[1] == v2
.bits
.u128
[1];
445 case 256: return v1
.bits
.u256
[0] == v2
.bits
.u256
[0] &&
446 v1
.bits
.u256
[1] == v2
.bits
.u256
[1] &&
447 v1
.bits
.u256
[2] == v2
.bits
.u256
[2] &&
448 v1
.bits
.u256
[3] == v2
.bits
.u256
[3];
455 /* Truncate the bit pattern in V1 to NUM_BITS bits */
457 truncate_vbits(vbits_t v
, unsigned num_bits
)
459 assert(num_bits
<= v
.num_bits
);
461 if (num_bits
== v
.num_bits
) return v
;
463 vbits_t
new = { .num_bits
= num_bits
};
465 if (num_bits
<= 64) {
468 if (v
.num_bits
<= 64)
469 bits
= get_bits64(v
);
470 else if (v
.num_bits
== 128)
471 if (__BYTE_ORDER
== __LITTLE_ENDIAN
)
472 bits
= v
.bits
.u128
[0];
474 bits
= v
.bits
.u128
[1];
475 else if (v
.num_bits
== 256)
476 if (__BYTE_ORDER
== __LITTLE_ENDIAN
)
477 bits
= v
.bits
.u256
[0];
479 bits
= v
.bits
.u256
[3];
484 case 1: new.bits
.u32
= bits
& 0x01; break;
485 case 8: new.bits
.u8
= bits
& 0xff; break;
486 case 16: new.bits
.u16
= bits
& 0xffff; break;
487 case 32: new.bits
.u32
= bits
& ~0u; break;
488 case 64: new.bits
.u64
= bits
& ~0ll; break;
495 if (num_bits
== 128) {
496 assert(v
.num_bits
== 256);
497 /* From 256 bits to 128 */
498 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
499 new.bits
.u128
[0] = v
.bits
.u256
[0];
500 new.bits
.u128
[1] = v
.bits
.u256
[1];
502 new.bits
.u128
[0] = v
.bits
.u256
[2];
503 new.bits
.u128
[1] = v
.bits
.u256
[3];
508 /* Cannot truncate to 256 bits from something larger */
513 /* Helper function to compute left_vbits */
523 left_vbits(vbits_t v
, unsigned num_bits
)
525 assert(num_bits
>= v
.num_bits
);
527 vbits_t
new = { .num_bits
= num_bits
};
529 if (v
.num_bits
<= 64) {
530 uint64_t bits
= left64(get_bits64(v
));
533 case 8: new.bits
.u8
= bits
& 0xff; break;
534 case 16: new.bits
.u16
= bits
& 0xffff; break;
535 case 32: new.bits
.u32
= bits
& ~0u; break;
536 case 64: new.bits
.u64
= bits
& ~0ll; break;
538 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
539 new.bits
.u128
[0] = bits
;
540 if (bits
& (1ull << 63)) { // MSB is set
541 new.bits
.u128
[1] = ~0ull;
543 new.bits
.u128
[1] = 0;
546 new.bits
.u128
[1] = bits
;
547 if (bits
& (1ull << 63)) { // MSB is set
548 new.bits
.u128
[0] = ~0ull;
550 new.bits
.u128
[0] = 0;
555 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
556 new.bits
.u256
[0] = bits
;
557 if (bits
& (1ull << 63)) { // MSB is set
558 new.bits
.u256
[1] = ~0ull;
559 new.bits
.u256
[2] = ~0ull;
560 new.bits
.u256
[3] = ~0ull;
562 new.bits
.u256
[1] = 0;
563 new.bits
.u256
[2] = 0;
564 new.bits
.u256
[3] = 0;
567 new.bits
.u256
[3] = bits
;
568 if (bits
& (1ull << 63)) { // MSB is set
569 new.bits
.u256
[0] = ~0ull;
570 new.bits
.u256
[1] = ~0ull;
571 new.bits
.u256
[2] = ~0ull;
573 new.bits
.u256
[0] = 0;
574 new.bits
.u256
[1] = 0;
575 new.bits
.u256
[2] = 0;
585 if (v
.num_bits
== 128) {
586 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
587 if (v
.bits
.u128
[1] != 0) {
588 new.bits
.u128
[0] = v
.bits
.u128
[0];
589 new.bits
.u128
[1] = left64(v
.bits
.u128
[1]);
591 new.bits
.u128
[0] = left64(v
.bits
.u128
[0]);
592 if (new.bits
.u128
[0] & (1ull << 63)) { // MSB is set
593 new.bits
.u128
[1] = ~0ull;
595 new.bits
.u128
[1] = 0;
599 if (v
.bits
.u128
[0] != 0) {
600 new.bits
.u128
[0] = left64(v
.bits
.u128
[0]);
601 new.bits
.u128
[1] = v
.bits
.u128
[1];
603 new.bits
.u128
[1] = left64(v
.bits
.u128
[1]);
604 if (new.bits
.u128
[1] & (1ull << 63)) { // MSB is set
605 new.bits
.u128
[0] = ~0ull;
607 new.bits
.u128
[0] = 0;
611 if (num_bits
== 128) return new;
613 assert(num_bits
== 256);
615 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
616 uint64_t b1
= new.bits
.u128
[1];
617 uint64_t b0
= new.bits
.u128
[0];
619 new.bits
.u256
[0] = b0
;
620 new.bits
.u256
[1] = b1
;
622 if (new.bits
.u256
[1] & (1ull << 63)) { // MSB is set
623 new.bits
.u256
[2] = ~0ull;
624 new.bits
.u256
[3] = ~0ull;
626 new.bits
.u256
[2] = 0;
627 new.bits
.u256
[3] = 0;
630 uint64_t b1
= new.bits
.u128
[0];
631 uint64_t b0
= new.bits
.u128
[1];
633 new.bits
.u256
[2] = b0
;
634 new.bits
.u256
[3] = b1
;
636 if (new.bits
.u256
[2] & (1ull << 63)) { // MSB is set
637 new.bits
.u256
[0] = ~0ull;
638 new.bits
.u256
[1] = ~0ull;
640 new.bits
.u256
[0] = 0;
641 new.bits
.u256
[1] = 0;
652 or_vbits(vbits_t v1
, vbits_t v2
)
654 assert(v1
.num_bits
== v2
.num_bits
);
656 vbits_t
new = { .num_bits
= v1
.num_bits
};
658 switch (v1
.num_bits
) {
659 case 1: new.bits
.u1
= (v1
.bits
.u1
| v2
.bits
.u1
) & 1; break;
660 case 8: new.bits
.u8
= v1
.bits
.u8
| v2
.bits
.u8
; break;
661 case 16: new.bits
.u16
= v1
.bits
.u16
| v2
.bits
.u16
; break;
662 case 32: new.bits
.u32
= v1
.bits
.u32
| v2
.bits
.u32
; break;
663 case 64: new.bits
.u64
= v1
.bits
.u64
| v2
.bits
.u64
; break;
664 case 128: new.bits
.u128
[0] = v1
.bits
.u128
[0] | v2
.bits
.u128
[0];
665 new.bits
.u128
[1] = v1
.bits
.u128
[1] | v2
.bits
.u128
[1];
667 case 256: new.bits
.u256
[0] = v1
.bits
.u256
[0] | v2
.bits
.u256
[0];
668 new.bits
.u256
[1] = v1
.bits
.u256
[1] | v2
.bits
.u256
[1];
669 new.bits
.u256
[2] = v1
.bits
.u256
[2] | v2
.bits
.u256
[2];
670 new.bits
.u256
[3] = v1
.bits
.u256
[3] | v2
.bits
.u256
[3];
681 and_vbits(vbits_t v1
, vbits_t v2
)
683 assert(v1
.num_bits
== v2
.num_bits
);
685 vbits_t
new = { .num_bits
= v1
.num_bits
};
687 switch (v1
.num_bits
) {
688 case 1: new.bits
.u1
= (v1
.bits
.u1
& v2
.bits
.u1
) & 1; break;
689 case 8: new.bits
.u8
= v1
.bits
.u8
& v2
.bits
.u8
; break;
690 case 16: new.bits
.u16
= v1
.bits
.u16
& v2
.bits
.u16
; break;
691 case 32: new.bits
.u32
= v1
.bits
.u32
& v2
.bits
.u32
; break;
692 case 64: new.bits
.u64
= v1
.bits
.u64
& v2
.bits
.u64
; break;
693 case 128: new.bits
.u128
[0] = v1
.bits
.u128
[0] & v2
.bits
.u128
[0];
694 new.bits
.u128
[1] = v1
.bits
.u128
[1] & v2
.bits
.u128
[1];
696 case 256: new.bits
.u256
[0] = v1
.bits
.u256
[0] & v2
.bits
.u256
[0];
697 new.bits
.u256
[1] = v1
.bits
.u256
[1] & v2
.bits
.u256
[1];
698 new.bits
.u256
[2] = v1
.bits
.u256
[2] & v2
.bits
.u256
[2];
699 new.bits
.u256
[3] = v1
.bits
.u256
[3] & v2
.bits
.u256
[3];
710 concat_vbits(vbits_t v1
, vbits_t v2
)
712 assert(v1
.num_bits
== v2
.num_bits
);
714 vbits_t
new = { .num_bits
= v1
.num_bits
* 2 };
716 switch (v1
.num_bits
) {
717 case 8: new.bits
.u16
= v1
.bits
.u8
;
718 new.bits
.u16
= (new.bits
.u16
<< 8) | v2
.bits
.u8
; break;
719 case 16: new.bits
.u32
= v1
.bits
.u16
;
720 new.bits
.u32
= (new.bits
.u32
<< 16) | v2
.bits
.u16
; break;
721 case 32: new.bits
.u64
= v1
.bits
.u32
;
722 new.bits
.u64
= (new.bits
.u64
<< 32) | v2
.bits
.u32
; break;
724 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
725 new.bits
.u128
[0] = v2
.bits
.u64
;
726 new.bits
.u128
[1] = v1
.bits
.u64
;
728 new.bits
.u128
[0] = v1
.bits
.u64
;
729 new.bits
.u128
[1] = v2
.bits
.u64
;
733 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
734 new.bits
.u256
[0] = v2
.bits
.u128
[0];
735 new.bits
.u256
[1] = v2
.bits
.u128
[1];
736 new.bits
.u256
[2] = v1
.bits
.u128
[0];
737 new.bits
.u256
[3] = v1
.bits
.u128
[1];
739 new.bits
.u256
[0] = v1
.bits
.u128
[0];
740 new.bits
.u256
[1] = v1
.bits
.u128
[1];
741 new.bits
.u256
[2] = v2
.bits
.u128
[0];
742 new.bits
.u256
[3] = v2
.bits
.u128
[1];
745 case 256: /* Fall through */
755 upper_vbits(vbits_t v
)
757 vbits_t
new = { .num_bits
= v
.num_bits
/ 2 };
759 switch (v
.num_bits
) {
760 case 16: new.bits
.u8
= v
.bits
.u16
>> 8; break;
761 case 32: new.bits
.u16
= v
.bits
.u32
>> 16; break;
762 case 64: new.bits
.u32
= v
.bits
.u64
>> 32; break;
764 if (__BYTE_ORDER
== __LITTLE_ENDIAN
)
765 new.bits
.u64
= v
.bits
.u128
[1];
767 new.bits
.u64
= v
.bits
.u128
[0];
770 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
771 new.bits
.u128
[0] = v
.bits
.u256
[2];
772 new.bits
.u128
[1] = v
.bits
.u256
[3];
774 new.bits
.u128
[0] = v
.bits
.u256
[0];
775 new.bits
.u128
[1] = v
.bits
.u256
[1];
788 zextend_vbits(vbits_t v
, unsigned num_bits
)
790 assert(num_bits
>= v
.num_bits
);
792 if (num_bits
== v
.num_bits
) return v
;
794 vbits_t
new = { .num_bits
= num_bits
};
796 if (v
.num_bits
<= 64) {
797 uint64_t bits
= get_bits64(v
);
800 case 8: new.bits
.u8
= bits
; break;
801 case 16: new.bits
.u16
= bits
; break;
802 case 32: new.bits
.u32
= bits
; break;
803 case 64: new.bits
.u64
= bits
; break;
805 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
806 new.bits
.u128
[0] = bits
;
807 new.bits
.u128
[1] = 0;
809 new.bits
.u128
[0] = 0;
810 new.bits
.u128
[1] = bits
;
814 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
815 new.bits
.u256
[0] = bits
;
816 new.bits
.u256
[1] = 0;
817 new.bits
.u256
[2] = 0;
818 new.bits
.u256
[3] = 0;
820 new.bits
.u256
[0] = 0;
821 new.bits
.u256
[1] = 0;
822 new.bits
.u256
[2] = 0;
823 new.bits
.u256
[3] = bits
;
832 if (v
.num_bits
== 128) {
833 assert(num_bits
== 256);
835 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
836 new.bits
.u256
[0] = v
.bits
.u128
[0];
837 new.bits
.u256
[1] = v
.bits
.u128
[1];
838 new.bits
.u256
[2] = 0;
839 new.bits
.u256
[3] = 0;
841 new.bits
.u256
[0] = 0;
842 new.bits
.u256
[1] = 0;
843 new.bits
.u256
[2] = v
.bits
.u128
[1];
844 new.bits
.u256
[3] = v
.bits
.u128
[0];
849 /* Cannot zero-extend a 256-bit value to something larger */
855 sextend_vbits(vbits_t v
, unsigned num_bits
)
857 assert(num_bits
>= v
.num_bits
);
861 switch (v
.num_bits
) {
862 case 8: if (v
.bits
.u8
== 0x80) sextend
= 1; break;
863 case 16: if (v
.bits
.u16
== 0x8000) sextend
= 1; break;
864 case 32: if (v
.bits
.u32
== 0x80000000) sextend
= 1; break;
865 case 64: if (v
.bits
.u64
== (1ull << 63)) sextend
= 1; break;
866 case 128: if (v
.bits
.u128
[1] == (1ull << 63)) sextend
= 1; break;
867 case 256: if (v
.bits
.u256
[3] == (1ull << 63)) sextend
= 1; break;
873 return sextend
? left_vbits(v
, num_bits
) : zextend_vbits(v
, num_bits
);
878 onehot_vbits(unsigned bitno
, unsigned num_bits
)
880 assert(bitno
< num_bits
);
882 vbits_t
new = { .num_bits
= num_bits
};
885 case 1: new.bits
.u32
= 1 << bitno
; break;
886 case 8: new.bits
.u8
= 1 << bitno
; break;
887 case 16: new.bits
.u16
= 1 << bitno
; break;
888 case 32: new.bits
.u32
= 1u << bitno
; break;
889 case 64: new.bits
.u64
= 1ull << bitno
; break;
891 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
893 new.bits
.u128
[0] = 1ull << bitno
;
894 new.bits
.u128
[1] = 0;
896 new.bits
.u128
[0] = 0;
897 new.bits
.u128
[1] = 1ull << (bitno
- 64);
901 new.bits
.u128
[0] = 0;
902 new.bits
.u128
[1] = 1ull << bitno
;
904 new.bits
.u128
[0] = 1ull << (bitno
- 64);
905 new.bits
.u128
[1] = 0;
910 if (__BYTE_ORDER
== __LITTLE_ENDIAN
) {
912 new.bits
.u256
[0] = 1ull << bitno
;
913 new.bits
.u256
[1] = 0;
914 new.bits
.u256
[2] = 0;
915 new.bits
.u256
[3] = 0;
916 } else if (bitno
< 128) {
917 new.bits
.u256
[0] = 0;
918 new.bits
.u256
[1] = 1ull << (bitno
- 64);
919 new.bits
.u256
[2] = 0;
920 new.bits
.u256
[3] = 0;
921 } else if (bitno
< 192) {
922 new.bits
.u256
[0] = 0;
923 new.bits
.u256
[1] = 0;
924 new.bits
.u256
[2] = 1ull << (bitno
- 128);
925 new.bits
.u256
[3] = 0;
927 new.bits
.u256
[0] = 0;
928 new.bits
.u256
[1] = 0;
929 new.bits
.u256
[2] = 0;
930 new.bits
.u256
[3] = 1ull << (bitno
- 192);
934 new.bits
.u256
[0] = 0;
935 new.bits
.u256
[1] = 0;
936 new.bits
.u256
[2] = 0;
937 new.bits
.u256
[3] = 1ull << bitno
;
938 } else if (bitno
< 128) {
939 new.bits
.u256
[0] = 0;
940 new.bits
.u256
[1] = 0;
941 new.bits
.u256
[2] = 1ull << (bitno
- 64);
942 new.bits
.u256
[3] = 0;
943 } else if (bitno
< 192) {
944 new.bits
.u256
[0] = 0;
945 new.bits
.u256
[1] = 1ull << (bitno
- 128);
946 new.bits
.u256
[2] = 0;
947 new.bits
.u256
[3] = 0;
949 new.bits
.u256
[0] = 1ull << (bitno
- 192);
950 new.bits
.u256
[1] = 0;
951 new.bits
.u256
[2] = 0;
952 new.bits
.u256
[3] = 0;
964 completely_defined_vbits(vbits_t v
)
966 return equal_vbits(v
, defined_vbits(v
.num_bits
));
971 shl_vbits(vbits_t v
, unsigned shift_amount
)
973 assert(shift_amount
< v
.num_bits
);
977 switch (v
.num_bits
) {
978 case 8: new.bits
.u8
<<= shift_amount
; break;
979 case 16: new.bits
.u16
<<= shift_amount
; break;
980 case 32: new.bits
.u32
<<= shift_amount
; break;
981 case 64: new.bits
.u64
<<= shift_amount
; break;
982 case 128: /* fall through */
983 case 256: /* fall through */
993 shr_vbits(vbits_t v
, unsigned shift_amount
)
995 assert(shift_amount
< v
.num_bits
);
999 switch (v
.num_bits
) {
1000 case 8: new.bits
.u8
>>= shift_amount
; break;
1001 case 16: new.bits
.u16
>>= shift_amount
; break;
1002 case 32: new.bits
.u32
>>= shift_amount
; break;
1003 case 64: new.bits
.u64
>>= shift_amount
; break;
1004 case 128: /* fall through */
1005 case 256: /* fall through */
1015 sar_vbits(vbits_t v
, unsigned shift_amount
)
1017 assert(shift_amount
< v
.num_bits
);
1022 switch (v
.num_bits
) {
1024 new.bits
.u8
>>= shift_amount
;
1025 msb
= (v
.bits
.u8
& 0x80) != 0;
1028 new.bits
.u16
>>= shift_amount
;
1029 msb
= (v
.bits
.u16
& 0x8000) != 0;
1032 new.bits
.u32
>>= shift_amount
;
1033 msb
= (v
.bits
.u32
& (1u << 31)) != 0;
1036 new.bits
.u64
>>= shift_amount
;
1037 msb
= (v
.bits
.u64
& (1ull << 63)) != 0;
1039 case 128: /* fall through */
1040 case 256: /* fall through */
1046 new = left_vbits(new, new.num_bits
);
1050 /* Return a value for the POWER Iop_CmpORD class iops */
1052 cmpord_vbits(unsigned v1_num_bits
, unsigned v2_num_bits
)
1054 vbits_t
new = { .num_bits
= v1_num_bits
};
1056 /* Size of values being compared must be the same */
1057 assert( v1_num_bits
== v2_num_bits
);
1059 /* Comparison only produces 32-bit or 64-bit value where
1060 * the lower 3 bits are set to indicate, less than, equal and greater than.
1062 switch (v1_num_bits
) {
1079 /* Deal with precise integer EQ and NE. Needs some helpers. The helpers
1080 compute the result for 64-bit inputs, but can also be used for the
1081 32/16/8 bit cases, because we can zero extend both the vbits and values
1082 out to 64 bits and still get the correct result. */
1085 /* Get both vbits and values for a binary operation, that has args of the
1086 same size (type?), namely 8, 16, 32 or 64 bit. Unused bits are set to
1087 zero in both vbit_ and val_ cases. */
1089 void get_binary_vbits_and_vals64 ( /*OUT*/uint64_t* varg1
,
1090 /*OUT*/uint64_t* arg1
,
1091 /*OUT*/uint64_t* varg2
,
1092 /*OUT*/uint64_t* arg2
,
1093 vbits_t vbits1
, vbits_t vbits2
,
1094 value_t val1
, value_t val2
)
1096 assert(vbits1
.num_bits
== vbits2
.num_bits
);
1098 *varg1
= *arg1
= *varg2
= *arg2
= 0;
1100 switch (vbits1
.num_bits
) {
1101 case 8: *arg1
= (uint64_t)val1
.u8
; *arg2
= (uint64_t)val2
.u8
; break;
1102 case 16: *arg1
= (uint64_t)val1
.u16
; *arg2
= (uint64_t)val2
.u16
; break;
1103 case 32: *arg1
= (uint64_t)val1
.u32
; *arg2
= (uint64_t)val2
.u32
; break;
1104 case 64: *arg1
= val1
.u64
; *arg2
= val2
.u64
; break;
1105 default: panic(__func__
);
1108 *varg1
= get_bits64(vbits1
);
1109 *varg2
= get_bits64(vbits2
);
1112 static uint64_t uifu64 ( uint64_t x
, uint64_t y
) { return x
| y
; }
1114 /* Returns 0 (defined) or 1 (undefined). */
1115 static uint32_t ref_CmpEQ64_with_vbits ( uint64_t arg1
, uint64_t varg1
,
1116 uint64_t arg2
, uint64_t varg2
)
1118 uint64_t naive
= uifu64(varg1
, varg2
);
1120 return 0; /* defined */
1123 // Mark the two actual arguments as fully defined, else Memcheck will
1124 // complain about undefinedness in them, which is correct but confusing
1125 // (and pollutes the output of this test program.)
1126 VALGRIND_MAKE_MEM_DEFINED(&arg1
, sizeof(arg1
));
1127 VALGRIND_MAKE_MEM_DEFINED(&arg2
, sizeof(arg2
));
1129 // if any bit in naive is 1, then the result is undefined. Except,
1130 // if we can find two corresponding bits in arg1 and arg2 such that they
1131 // are different but both defined, then the overall result is defined
1132 // (because the two bits guarantee that the bit vectors arg1 and arg2
1135 for (i
= 0; i
< 64; i
++) {
1136 if ((arg1
& 1) != (arg2
& 1) && (varg1
& 1) == 0 && (varg2
& 1) == 0) {
1137 return 0; /* defined */
1139 arg1
>>= 1; arg2
>>= 1; varg1
>>= 1; varg2
>>= 1;
1142 return 1; /* undefined */
1147 cmp_eq_ne_vbits(vbits_t vbits1
, vbits_t vbits2
, value_t val1
, value_t val2
)
1149 uint64_t varg1
= 0, arg1
= 0, varg2
= 0, arg2
= 0;
1150 get_binary_vbits_and_vals64(&varg1
, &arg1
, &varg2
, &arg2
,
1151 vbits1
, vbits2
, val1
, val2
);
1153 vbits_t res
= { .num_bits
= 1 };
1154 res
.bits
.u32
= ref_CmpEQ64_with_vbits(arg1
, varg1
, arg2
, varg2
);
1160 /* Deal with precise integer ADD and SUB. */
1162 int_add_or_sub_vbits(int isAdd
,
1163 vbits_t vbits1
, vbits_t vbits2
, value_t val1
, value_t val2
)
1165 uint64_t vaa
= 0, aa
= 0, vbb
= 0, bb
= 0;
1166 get_binary_vbits_and_vals64(&vaa
, &aa
, &vbb
, &bb
,
1167 vbits1
, vbits2
, val1
, val2
);
1169 // This is derived from expensiveAddSub() in mc_translate.c.
1170 uint64_t a_min
= aa
& ~vaa
;
1171 uint64_t b_min
= bb
& ~vbb
;
1172 uint64_t a_max
= aa
| vaa
;
1173 uint64_t b_max
= bb
| vbb
;
1177 result
= (vaa
| vbb
) | ((a_min
+ b_min
) ^ (a_max
+ b_max
));
1179 result
= (vaa
| vbb
) | ((a_min
- b_max
) ^ (a_max
- b_min
));
1182 vbits_t res
= { .num_bits
= vbits1
.num_bits
};
1183 switch (res
.num_bits
) {
1184 case 8: res
.bits
.u8
= (uint8_t)result
; break;
1185 case 16: res
.bits
.u16
= (uint16_t)result
; break;
1186 case 32: res
.bits
.u32
= (uint32_t)result
; break;
1187 case 64: res
.bits
.u64
= (uint64_t)result
; break;
1188 default: panic(__func__
);