Initial implementation of C-source-level &&-idiom recovery
[valgrind.git] / memcheck / tests / vbit-test / vbits.c
blob9307efb35956cbd18b94b19b70fc59f8f1f75dcb
1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*
4 This file is part of MemCheck, a heavyweight Valgrind tool for
5 detecting memory errors.
7 Copyright (C) 2012-2017 Florian Krohm
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 The GNU General Public License is contained in the file COPYING.
25 #include <stdio.h> // fprintf
26 #include <assert.h> // assert
27 #if defined(__APPLE__)
28 #include <machine/endian.h>
29 #define __BYTE_ORDER BYTE_ORDER
30 #define __LITTLE_ENDIAN LITTLE_ENDIAN
31 #elif defined(__sun)
32 #define __LITTLE_ENDIAN 1234
33 #define __BIG_ENDIAN 4321
34 # if defined(_LITTLE_ENDIAN)
35 # define __BYTE_ORDER __LITTLE_ENDIAN
36 # else
37 # define __BYTE_ORDER __BIG_ENDIAN
38 # endif
39 #else
40 #include <endian.h>
41 #endif
42 #include <inttypes.h>
43 #include "vbits.h"
44 #include "vtest.h"
46 #include "memcheck.h" // VALGRIND_MAKE_MEM_DEFINED
49 /* Return the bits of V if they fit into 64-bit. If V has fewer than
50 64 bits, the bit pattern is zero-extended to the left. */
51 static uint64_t
52 get_bits64(vbits_t v)
54 switch (v.num_bits) {
55 case 1: return v.bits.u32;
56 case 8: return v.bits.u8;
57 case 16: return v.bits.u16;
58 case 32: return v.bits.u32;
59 case 64: return v.bits.u64;
60 case 128:
61 case 256:
62 /* fall through */
63 default:
64 panic(__func__);
68 void
69 print_vbits(FILE *fp, vbits_t v)
71 switch (v.num_bits) {
72 case 1: fprintf(fp, "%08x", v.bits.u32); break;
73 case 8: fprintf(fp, "%02x", v.bits.u8); break;
74 case 16: fprintf(fp, "%04x", v.bits.u16); break;
75 case 32: fprintf(fp, "%08x", v.bits.u32); break;
76 case 64: fprintf(fp, "%016"PRIx64, v.bits.u64); break;
77 case 128:
78 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
79 fprintf(fp, "%016"PRIx64, v.bits.u128[1]);
80 fprintf(fp, "%016"PRIx64, v.bits.u128[0]);
81 } else {
82 fprintf(fp, "%016"PRIx64, v.bits.u128[0]);
83 fprintf(fp, "%016"PRIx64, v.bits.u128[1]);
85 break;
86 case 256:
87 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
88 fprintf(fp, "%016"PRIx64, v.bits.u256[3]);
89 fprintf(fp, "%016"PRIx64, v.bits.u256[2]);
90 fprintf(fp, "%016"PRIx64, v.bits.u256[1]);
91 fprintf(fp, "%016"PRIx64, v.bits.u256[0]);
92 } else {
93 fprintf(fp, "%016"PRIx64, v.bits.u256[0]);
94 fprintf(fp, "%016"PRIx64, v.bits.u256[1]);
95 fprintf(fp, "%016"PRIx64, v.bits.u256[2]);
96 fprintf(fp, "%016"PRIx64, v.bits.u256[3]);
98 break;
99 default:
100 panic(__func__);
105 /* Return a value where all bits are set to undefined. */
106 vbits_t
107 undefined_vbits(unsigned num_bits)
109 vbits_t new = { .num_bits = num_bits };
111 switch (num_bits) {
112 case 1: new.bits.u32 = 0x01; break;
113 case 8: new.bits.u8 = 0xff; break;
114 case 16: new.bits.u16 = 0xffff; break;
115 case 32: new.bits.u32 = ~0; break;
116 case 64: new.bits.u64 = ~0ull; break;
117 case 128: new.bits.u128[0] = ~0ull;
118 new.bits.u128[1] = ~0ull;
119 break;
120 case 256: new.bits.u256[0] = ~0ull;
121 new.bits.u256[1] = ~0ull;
122 new.bits.u256[2] = ~0ull;
123 new.bits.u256[3] = ~0ull;
124 break;
125 default:
126 panic(__func__);
128 return new;
131 /* The following routines named undefined_vbits_BxE() return a 128-bit
132 * vector with E elements each of size bits. If any of the bits in an
133 * element is undefined, then return a value where all bits in that
134 * element are undefined.
136 vbits_t
137 undefined_vbits_BxE(unsigned int bits, unsigned int elements, vbits_t v)
139 vbits_t new = { .num_bits = v.num_bits };
140 uint64_t mask = ~0ull >> (64 - bits);
141 int i, j;
143 assert ((elements % 2) == 0);
144 assert (bits <= 64);
146 for (i = 0; i<2; i++) {
147 new.bits.u128[i] = 0ull;
149 for (j = 0; j<elements/2; j++) {
150 if ((v.bits.u128[i] & (mask << (j*bits))) != 0)
151 new.bits.u128[i] |= (mask << (j*bits));
154 return new;
157 /* The following routines named undefined_vbits_BxE_rotate() return a 128-bit
158 * vector with E elements each of size bits. The bits in v are rotated
159 * left by the amounts in the corresponding element of val. Specified rotate
160 * amount field is assumed to be at most 8-bits wide.
162 vbits_t
163 undefined_vbits_BxE_rotate(unsigned int bits, unsigned int elements,
164 vbits_t v, value_t val)
166 vbits_t new = { .num_bits = v.num_bits };
167 uint64_t mask = ~0ull >> (64 - bits);
168 uint64_t const shift_mask = 0xFF;
169 uint64_t element;
170 int i, j;
171 signed char shift;
172 assert ((elements % 2) == 0);
173 assert (bits <= 64);
175 for (i = 0; i<2; i++) {
176 new.bits.u128[i] = 0ull;
178 for (j = 0; j<elements/2; j++) {
179 element = (v.bits.u128[i] >> (j*bits)) & mask;
180 shift = (int)((val.u128[i] >> (j*bits)) & shift_mask);
182 if (shift < 0) {
183 /* right shift */
184 new.bits.u128[i] = element >> -shift;
186 /* OR in the bits shifted out into the top of the element */
187 new.bits.u128[i] |= element << (bits + shift);
188 } else {
189 /* left shift */
190 /* upper bits from shift */
191 new.bits.u128[i] = element << shift;
193 /* OR in the bits shifted out into the bottom of the element */
194 new.bits.u128[i] |= element >> (bits - shift);
198 return new;
201 /* Only the even elements of the input are used by the Iop*/
202 vbits_t
203 undefined_vbits_128_even_element(unsigned int bits, unsigned int elements,
204 vbits_t v)
206 int i;
207 uint64_t mask;
208 unsigned int const element_width = 128/elements;
209 vbits_t new = { .num_bits = v.num_bits };
211 assert ((elements % 2) == 0);
212 assert (bits <= 64);
214 /* Create a 128-bit mask with the bits in the even numbered
215 * elements are all ones.
217 mask = ~0ull >> (64 - bits);
219 for (i = 2; i < elements/2; i=i+2) {
220 mask |= mask << (i * element_width);
223 new.bits.u128[0] = mask & v.bits.u128[0];
224 new.bits.u128[1] = mask & v.bits.u128[1];
226 return new;
229 /* Concatenate bit i from each byte j. Place concatenated 8 bit value into
230 * byte i of the result. Do for all i from 0 to 7 and j from 0 to 7 of each
231 * 64-bit element.
233 vbits_t
234 undefined_vbits_64x2_transpose(vbits_t v)
236 vbits_t new = { .num_bits = v.num_bits };
237 unsigned int bit, byte, element;
238 uint64_t value, new_value, select_bit;
240 for (element = 0; element < 2; element++) {
241 value = v.bits.u128[element];
242 new_value = 0;
243 for (byte = 0; byte < 8; byte++) {
244 for (bit = 0; bit < 8; bit++) {
245 select_bit = 1ULL & (value >> (bit + 8*byte));
246 new_value |= select_bit << (bit*8 + byte);
249 new.bits.u128[element] = new_value;
251 return new;
254 /* The routine takes a 256-bit vector value stored across the two 128-bit
255 * source operands src1 and src2. The size of each element in the input is
256 * src_num_bits. The elements are narrowed to result_num_bits and packed
257 * into the result. If saturate is True, then the all the result bits are
258 * set to 1 if the source element can not be represented in result_num_bits.
260 vbits_t
261 undefined_vbits_Narrow256_AtoB(unsigned int src_num_bits,
262 unsigned int result_num_bits,
263 vbits_t src1_v, value_t src1_value,
264 vbits_t src2_v, value_t src2_value,
265 bool saturate)
268 vbits_t new = { .num_bits = src1_v.num_bits };
269 unsigned int i;
270 uint64_t vbits, new_value;
271 uint64_t const src_mask = ~0x0ULL >> (64 - src_num_bits);
272 uint64_t const result_mask = ~0x0ULL >> (64 - result_num_bits);
273 unsigned int num_elements_per_64_bits = src_num_bits/64;
274 unsigned int shift;
277 * NOTE: POWER PPC
278 * the saturated value is 0xFFFF for the vbit is in one of the lower
279 * 32-bits of the source. The saturated result is 0xFFFF0000 if the
280 * vbit is in the upper 32-bits of the source. Not sure what
281 * the saturated result is in general for a B-bit result.
283 * ONLY TESTED FOR 64 bit input, 32 bit result
285 uint64_t const saturated_result = 0xFFFFULL;
287 /* Source elements are split between the two source operands */
289 assert(src_num_bits <= 64);
290 assert(result_num_bits < 64);
291 assert(result_num_bits < src_num_bits);
293 /* Narrow the elements from src1 to the upper 64-bits of result.
294 * Do each of the 64 bit values that make up a u128
296 new_value = 0;
297 for (i = 0; i < num_elements_per_64_bits; i++) {
298 vbits = src1_v.bits.u128[0] >> (i * src_num_bits);
299 vbits &= src_mask;
301 shift = result_num_bits * i;
302 if (vbits) {
303 if (saturate) {
304 /* Value will not fit in B-bits, saturate the result as needed. */
305 if (vbits >> (src_num_bits/2))
306 /* vbit is upper half of the source */
307 new_value |= saturated_result << ( shift + result_num_bits/2);
308 else
309 new_value |= saturated_result << shift;
310 } else {
311 new_value |= (vbits & result_mask) << shift;
316 for (i = 0; i < num_elements_per_64_bits; i++) {
317 vbits = src1_v.bits.u128[1] >> (i * src_num_bits);
318 vbits &= src_mask;
320 shift = result_num_bits * i + (num_elements_per_64_bits
321 * result_num_bits);
322 if (vbits) {
323 if (saturate) {
324 /* Value will not fit in result_num_bits, saturate the result
325 * as needed.
327 if (vbits >> (src_num_bits/2))
328 /* vbit is upper half of the source */
329 new_value |= saturated_result << (shift + result_num_bits/2);
331 else
332 new_value |= saturated_result << shift;
334 } else {
335 new_value |= (vbits & result_mask) << shift;
339 if (__BYTE_ORDER == __LITTLE_ENDIAN)
340 new.bits.u128[1] = new_value;
341 else
342 /* Big endian, swap the upper and lower 32-bits of new_value */
343 new.bits.u128[0] = (new_value << 32) | (new_value >> 32);
345 new_value = 0;
346 /* Narrow the elements from src2 to the lower 64-bits of result.
347 * Do each of the 64 bit values that make up a u128
349 for (i = 0; i < num_elements_per_64_bits; i++) {
350 vbits = src2_v.bits.u128[0] >> (i * src_num_bits);
351 vbits &= src_mask;
353 shift = result_num_bits * i;
354 if (vbits) {
355 if (saturate) {
356 /* Value will not fit in result, saturate the result as needed. */
357 if (vbits >> (src_num_bits/2))
358 /* vbit is upper half of the source */
359 new_value |= saturated_result << (shift + result_num_bits/2);
360 else
361 new_value |= saturated_result << shift;
362 } else {
363 new_value |= (vbits & result_mask) << shift;
368 for (i = 0; i < num_elements_per_64_bits; i++) {
369 vbits = src2_v.bits.u128[1] >> (i * src_num_bits);
370 vbits &= src_mask;
372 if (vbits) {
373 if (saturate) {
374 /* Value will not fit in result_num_bits, saturate the result
375 * as needed.
377 if (vbits >> (src_num_bits/2))
378 /* vbit is upper half of the source */
379 new_value |= saturated_result << (result_num_bits * i
380 + result_num_bits/2
381 + (num_elements_per_64_bits
382 * result_num_bits));
383 else
384 new_value |= saturated_result << (result_num_bits * i
385 + (num_elements_per_64_bits
386 * result_num_bits));
388 } else {
389 new_value |= (vbits & result_mask) << (result_num_bits * i
390 + (num_elements_per_64_bits
391 * result_num_bits));
395 if (__BYTE_ORDER == __LITTLE_ENDIAN)
396 new.bits.u128[0] = new_value;
397 else
398 /* Big endian, swap the upper and lower 32-bits of new_value */
399 new.bits.u128[1] = (new_value << 32) | (new_value >> 32);
401 return new;
404 /* Return a value where all bits are set to defined. */
405 vbits_t
406 defined_vbits(unsigned num_bits)
408 vbits_t new = { .num_bits = num_bits };
410 switch (num_bits) {
411 case 1: new.bits.u32 = 0x0; break;
412 case 8: new.bits.u8 = 0x0; break;
413 case 16: new.bits.u16 = 0x0; break;
414 case 32: new.bits.u32 = 0x0; break;
415 case 64: new.bits.u64 = 0x0; break;
416 case 128: new.bits.u128[0] = 0x0;
417 new.bits.u128[1] = 0x0;
418 break;
419 case 256: new.bits.u256[0] = 0x0;
420 new.bits.u256[1] = 0x0;
421 new.bits.u256[2] = 0x0;
422 new.bits.u256[3] = 0x0;
423 break;
424 default:
425 panic(__func__);
427 return new;
431 /* Return 1, if equal. */
433 equal_vbits(vbits_t v1, vbits_t v2)
435 assert(v1.num_bits == v2.num_bits);
437 switch (v1.num_bits) {
438 case 1: return v1.bits.u32 == v2.bits.u32;
439 case 8: return v1.bits.u8 == v2.bits.u8;
440 case 16: return v1.bits.u16 == v2.bits.u16;
441 case 32: return v1.bits.u32 == v2.bits.u32;
442 case 64: return v1.bits.u64 == v2.bits.u64;
443 case 128: return v1.bits.u128[0] == v2.bits.u128[0] &&
444 v1.bits.u128[1] == v2.bits.u128[1];
445 case 256: return v1.bits.u256[0] == v2.bits.u256[0] &&
446 v1.bits.u256[1] == v2.bits.u256[1] &&
447 v1.bits.u256[2] == v2.bits.u256[2] &&
448 v1.bits.u256[3] == v2.bits.u256[3];
449 default:
450 panic(__func__);
455 /* Truncate the bit pattern in V1 to NUM_BITS bits */
456 vbits_t
457 truncate_vbits(vbits_t v, unsigned num_bits)
459 assert(num_bits <= v.num_bits);
461 if (num_bits == v.num_bits) return v;
463 vbits_t new = { .num_bits = num_bits };
465 if (num_bits <= 64) {
466 uint64_t bits;
468 if (v.num_bits <= 64)
469 bits = get_bits64(v);
470 else if (v.num_bits == 128)
471 if (__BYTE_ORDER == __LITTLE_ENDIAN)
472 bits = v.bits.u128[0];
473 else
474 bits = v.bits.u128[1];
475 else if (v.num_bits == 256)
476 if (__BYTE_ORDER == __LITTLE_ENDIAN)
477 bits = v.bits.u256[0];
478 else
479 bits = v.bits.u256[3];
480 else
481 panic(__func__);
483 switch (num_bits) {
484 case 1: new.bits.u32 = bits & 0x01; break;
485 case 8: new.bits.u8 = bits & 0xff; break;
486 case 16: new.bits.u16 = bits & 0xffff; break;
487 case 32: new.bits.u32 = bits & ~0u; break;
488 case 64: new.bits.u64 = bits & ~0ll; break;
489 default:
490 panic(__func__);
492 return new;
495 if (num_bits == 128) {
496 assert(v.num_bits == 256);
497 /* From 256 bits to 128 */
498 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
499 new.bits.u128[0] = v.bits.u256[0];
500 new.bits.u128[1] = v.bits.u256[1];
501 } else {
502 new.bits.u128[0] = v.bits.u256[2];
503 new.bits.u128[1] = v.bits.u256[3];
505 return new;
508 /* Cannot truncate to 256 bits from something larger */
509 panic(__func__);
513 /* Helper function to compute left_vbits */
514 static uint64_t
515 left64(uint64_t x)
517 // left(x) = x | -x
518 return x | (~x + 1);
522 vbits_t
523 left_vbits(vbits_t v, unsigned num_bits)
525 assert(num_bits >= v.num_bits);
527 vbits_t new = { .num_bits = num_bits };
529 if (v.num_bits <= 64) {
530 uint64_t bits = left64(get_bits64(v));
532 switch (num_bits) {
533 case 8: new.bits.u8 = bits & 0xff; break;
534 case 16: new.bits.u16 = bits & 0xffff; break;
535 case 32: new.bits.u32 = bits & ~0u; break;
536 case 64: new.bits.u64 = bits & ~0ll; break;
537 case 128:
538 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
539 new.bits.u128[0] = bits;
540 if (bits & (1ull << 63)) { // MSB is set
541 new.bits.u128[1] = ~0ull;
542 } else {
543 new.bits.u128[1] = 0;
545 } else {
546 new.bits.u128[1] = bits;
547 if (bits & (1ull << 63)) { // MSB is set
548 new.bits.u128[0] = ~0ull;
549 } else {
550 new.bits.u128[0] = 0;
553 break;
554 case 256:
555 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
556 new.bits.u256[0] = bits;
557 if (bits & (1ull << 63)) { // MSB is set
558 new.bits.u256[1] = ~0ull;
559 new.bits.u256[2] = ~0ull;
560 new.bits.u256[3] = ~0ull;
561 } else {
562 new.bits.u256[1] = 0;
563 new.bits.u256[2] = 0;
564 new.bits.u256[3] = 0;
566 } else {
567 new.bits.u256[3] = bits;
568 if (bits & (1ull << 63)) { // MSB is set
569 new.bits.u256[0] = ~0ull;
570 new.bits.u256[1] = ~0ull;
571 new.bits.u256[2] = ~0ull;
572 } else {
573 new.bits.u256[0] = 0;
574 new.bits.u256[1] = 0;
575 new.bits.u256[2] = 0;
578 break;
579 default:
580 panic(__func__);
582 return new;
585 if (v.num_bits == 128) {
586 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
587 if (v.bits.u128[1] != 0) {
588 new.bits.u128[0] = v.bits.u128[0];
589 new.bits.u128[1] = left64(v.bits.u128[1]);
590 } else {
591 new.bits.u128[0] = left64(v.bits.u128[0]);
592 if (new.bits.u128[0] & (1ull << 63)) { // MSB is set
593 new.bits.u128[1] = ~0ull;
594 } else {
595 new.bits.u128[1] = 0;
598 } else {
599 if (v.bits.u128[0] != 0) {
600 new.bits.u128[0] = left64(v.bits.u128[0]);
601 new.bits.u128[1] = v.bits.u128[1];
602 } else {
603 new.bits.u128[1] = left64(v.bits.u128[1]);
604 if (new.bits.u128[1] & (1ull << 63)) { // MSB is set
605 new.bits.u128[0] = ~0ull;
606 } else {
607 new.bits.u128[0] = 0;
611 if (num_bits == 128) return new;
613 assert(num_bits == 256);
615 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
616 uint64_t b1 = new.bits.u128[1];
617 uint64_t b0 = new.bits.u128[0];
619 new.bits.u256[0] = b0;
620 new.bits.u256[1] = b1;
622 if (new.bits.u256[1] & (1ull << 63)) { // MSB is set
623 new.bits.u256[2] = ~0ull;
624 new.bits.u256[3] = ~0ull;
625 } else {
626 new.bits.u256[2] = 0;
627 new.bits.u256[3] = 0;
629 } else {
630 uint64_t b1 = new.bits.u128[0];
631 uint64_t b0 = new.bits.u128[1];
633 new.bits.u256[2] = b0;
634 new.bits.u256[3] = b1;
636 if (new.bits.u256[2] & (1ull << 63)) { // MSB is set
637 new.bits.u256[0] = ~0ull;
638 new.bits.u256[1] = ~0ull;
639 } else {
640 new.bits.u256[0] = 0;
641 new.bits.u256[1] = 0;
644 return new;
647 panic(__func__);
651 vbits_t
652 or_vbits(vbits_t v1, vbits_t v2)
654 assert(v1.num_bits == v2.num_bits);
656 vbits_t new = { .num_bits = v1.num_bits };
658 switch (v1.num_bits) {
659 case 1: new.bits.u1 = (v1.bits.u1 | v2.bits.u1) & 1; break;
660 case 8: new.bits.u8 = v1.bits.u8 | v2.bits.u8; break;
661 case 16: new.bits.u16 = v1.bits.u16 | v2.bits.u16; break;
662 case 32: new.bits.u32 = v1.bits.u32 | v2.bits.u32; break;
663 case 64: new.bits.u64 = v1.bits.u64 | v2.bits.u64; break;
664 case 128: new.bits.u128[0] = v1.bits.u128[0] | v2.bits.u128[0];
665 new.bits.u128[1] = v1.bits.u128[1] | v2.bits.u128[1];
666 break;
667 case 256: new.bits.u256[0] = v1.bits.u256[0] | v2.bits.u256[0];
668 new.bits.u256[1] = v1.bits.u256[1] | v2.bits.u256[1];
669 new.bits.u256[2] = v1.bits.u256[2] | v2.bits.u256[2];
670 new.bits.u256[3] = v1.bits.u256[3] | v2.bits.u256[3];
671 break;
672 default:
673 panic(__func__);
676 return new;
680 vbits_t
681 and_vbits(vbits_t v1, vbits_t v2)
683 assert(v1.num_bits == v2.num_bits);
685 vbits_t new = { .num_bits = v1.num_bits };
687 switch (v1.num_bits) {
688 case 1: new.bits.u1 = (v1.bits.u1 & v2.bits.u1) & 1; break;
689 case 8: new.bits.u8 = v1.bits.u8 & v2.bits.u8; break;
690 case 16: new.bits.u16 = v1.bits.u16 & v2.bits.u16; break;
691 case 32: new.bits.u32 = v1.bits.u32 & v2.bits.u32; break;
692 case 64: new.bits.u64 = v1.bits.u64 & v2.bits.u64; break;
693 case 128: new.bits.u128[0] = v1.bits.u128[0] & v2.bits.u128[0];
694 new.bits.u128[1] = v1.bits.u128[1] & v2.bits.u128[1];
695 break;
696 case 256: new.bits.u256[0] = v1.bits.u256[0] & v2.bits.u256[0];
697 new.bits.u256[1] = v1.bits.u256[1] & v2.bits.u256[1];
698 new.bits.u256[2] = v1.bits.u256[2] & v2.bits.u256[2];
699 new.bits.u256[3] = v1.bits.u256[3] & v2.bits.u256[3];
700 break;
701 default:
702 panic(__func__);
705 return new;
709 vbits_t
710 concat_vbits(vbits_t v1, vbits_t v2)
712 assert(v1.num_bits == v2.num_bits);
714 vbits_t new = { .num_bits = v1.num_bits * 2 };
716 switch (v1.num_bits) {
717 case 8: new.bits.u16 = v1.bits.u8;
718 new.bits.u16 = (new.bits.u16 << 8) | v2.bits.u8; break;
719 case 16: new.bits.u32 = v1.bits.u16;
720 new.bits.u32 = (new.bits.u32 << 16) | v2.bits.u16; break;
721 case 32: new.bits.u64 = v1.bits.u32;
722 new.bits.u64 = (new.bits.u64 << 32) | v2.bits.u32; break;
723 case 64:
724 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
725 new.bits.u128[0] = v2.bits.u64;
726 new.bits.u128[1] = v1.bits.u64;
727 } else {
728 new.bits.u128[0] = v1.bits.u64;
729 new.bits.u128[1] = v2.bits.u64;
731 break;
732 case 128:
733 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
734 new.bits.u256[0] = v2.bits.u128[0];
735 new.bits.u256[1] = v2.bits.u128[1];
736 new.bits.u256[2] = v1.bits.u128[0];
737 new.bits.u256[3] = v1.bits.u128[1];
738 } else {
739 new.bits.u256[0] = v1.bits.u128[0];
740 new.bits.u256[1] = v1.bits.u128[1];
741 new.bits.u256[2] = v2.bits.u128[0];
742 new.bits.u256[3] = v2.bits.u128[1];
744 break;
745 case 256: /* Fall through */
746 default:
747 panic(__func__);
750 return new;
754 vbits_t
755 upper_vbits(vbits_t v)
757 vbits_t new = { .num_bits = v.num_bits / 2 };
759 switch (v.num_bits) {
760 case 16: new.bits.u8 = v.bits.u16 >> 8; break;
761 case 32: new.bits.u16 = v.bits.u32 >> 16; break;
762 case 64: new.bits.u32 = v.bits.u64 >> 32; break;
763 case 128:
764 if (__BYTE_ORDER == __LITTLE_ENDIAN)
765 new.bits.u64 = v.bits.u128[1];
766 else
767 new.bits.u64 = v.bits.u128[0];
768 break;
769 case 256:
770 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
771 new.bits.u128[0] = v.bits.u256[2];
772 new.bits.u128[1] = v.bits.u256[3];
773 } else {
774 new.bits.u128[0] = v.bits.u256[0];
775 new.bits.u128[1] = v.bits.u256[1];
777 break;
778 case 8:
779 default:
780 panic(__func__);
783 return new;
787 vbits_t
788 zextend_vbits(vbits_t v, unsigned num_bits)
790 assert(num_bits >= v.num_bits);
792 if (num_bits == v.num_bits) return v;
794 vbits_t new = { .num_bits = num_bits };
796 if (v.num_bits <= 64) {
797 uint64_t bits = get_bits64(v);
799 switch (num_bits) {
800 case 8: new.bits.u8 = bits; break;
801 case 16: new.bits.u16 = bits; break;
802 case 32: new.bits.u32 = bits; break;
803 case 64: new.bits.u64 = bits; break;
804 case 128:
805 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
806 new.bits.u128[0] = bits;
807 new.bits.u128[1] = 0;
808 } else {
809 new.bits.u128[0] = 0;
810 new.bits.u128[1] = bits;
812 break;
813 case 256:
814 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
815 new.bits.u256[0] = bits;
816 new.bits.u256[1] = 0;
817 new.bits.u256[2] = 0;
818 new.bits.u256[3] = 0;
819 } else {
820 new.bits.u256[0] = 0;
821 new.bits.u256[1] = 0;
822 new.bits.u256[2] = 0;
823 new.bits.u256[3] = bits;
825 break;
826 default:
827 panic(__func__);
829 return new;
832 if (v.num_bits == 128) {
833 assert(num_bits == 256);
835 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
836 new.bits.u256[0] = v.bits.u128[0];
837 new.bits.u256[1] = v.bits.u128[1];
838 new.bits.u256[2] = 0;
839 new.bits.u256[3] = 0;
840 } else {
841 new.bits.u256[0] = 0;
842 new.bits.u256[1] = 0;
843 new.bits.u256[2] = v.bits.u128[1];
844 new.bits.u256[3] = v.bits.u128[0];
846 return new;
849 /* Cannot zero-extend a 256-bit value to something larger */
850 panic(__func__);
854 vbits_t
855 sextend_vbits(vbits_t v, unsigned num_bits)
857 assert(num_bits >= v.num_bits);
859 int sextend = 0;
861 switch (v.num_bits) {
862 case 8: if (v.bits.u8 == 0x80) sextend = 1; break;
863 case 16: if (v.bits.u16 == 0x8000) sextend = 1; break;
864 case 32: if (v.bits.u32 == 0x80000000) sextend = 1; break;
865 case 64: if (v.bits.u64 == (1ull << 63)) sextend = 1; break;
866 case 128: if (v.bits.u128[1] == (1ull << 63)) sextend = 1; break;
867 case 256: if (v.bits.u256[3] == (1ull << 63)) sextend = 1; break;
869 default:
870 panic(__func__);
873 return sextend ? left_vbits(v, num_bits) : zextend_vbits(v, num_bits);
877 vbits_t
878 onehot_vbits(unsigned bitno, unsigned num_bits)
880 assert(bitno < num_bits);
882 vbits_t new = { .num_bits = num_bits };
884 switch (num_bits) {
885 case 1: new.bits.u32 = 1 << bitno; break;
886 case 8: new.bits.u8 = 1 << bitno; break;
887 case 16: new.bits.u16 = 1 << bitno; break;
888 case 32: new.bits.u32 = 1u << bitno; break;
889 case 64: new.bits.u64 = 1ull << bitno; break;
890 case 128:
891 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
892 if (bitno < 64) {
893 new.bits.u128[0] = 1ull << bitno;
894 new.bits.u128[1] = 0;
895 } else {
896 new.bits.u128[0] = 0;
897 new.bits.u128[1] = 1ull << (bitno - 64);
899 } else {
900 if (bitno < 64) {
901 new.bits.u128[0] = 0;
902 new.bits.u128[1] = 1ull << bitno;
903 } else {
904 new.bits.u128[0] = 1ull << (bitno - 64);
905 new.bits.u128[1] = 0;
908 break;
909 case 256:
910 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
911 if (bitno < 64) {
912 new.bits.u256[0] = 1ull << bitno;
913 new.bits.u256[1] = 0;
914 new.bits.u256[2] = 0;
915 new.bits.u256[3] = 0;
916 } else if (bitno < 128) {
917 new.bits.u256[0] = 0;
918 new.bits.u256[1] = 1ull << (bitno - 64);
919 new.bits.u256[2] = 0;
920 new.bits.u256[3] = 0;
921 } else if (bitno < 192) {
922 new.bits.u256[0] = 0;
923 new.bits.u256[1] = 0;
924 new.bits.u256[2] = 1ull << (bitno - 128);
925 new.bits.u256[3] = 0;
926 } else {
927 new.bits.u256[0] = 0;
928 new.bits.u256[1] = 0;
929 new.bits.u256[2] = 0;
930 new.bits.u256[3] = 1ull << (bitno - 192);
932 } else {
933 if (bitno < 64) {
934 new.bits.u256[0] = 0;
935 new.bits.u256[1] = 0;
936 new.bits.u256[2] = 0;
937 new.bits.u256[3] = 1ull << bitno;
938 } else if (bitno < 128) {
939 new.bits.u256[0] = 0;
940 new.bits.u256[1] = 0;
941 new.bits.u256[2] = 1ull << (bitno - 64);
942 new.bits.u256[3] = 0;
943 } else if (bitno < 192) {
944 new.bits.u256[0] = 0;
945 new.bits.u256[1] = 1ull << (bitno - 128);
946 new.bits.u256[2] = 0;
947 new.bits.u256[3] = 0;
948 } else {
949 new.bits.u256[0] = 1ull << (bitno - 192);
950 new.bits.u256[1] = 0;
951 new.bits.u256[2] = 0;
952 new.bits.u256[3] = 0;
955 break;
956 default:
957 panic(__func__);
959 return new;
964 completely_defined_vbits(vbits_t v)
966 return equal_vbits(v, defined_vbits(v.num_bits));
970 vbits_t
971 shl_vbits(vbits_t v, unsigned shift_amount)
973 assert(shift_amount < v.num_bits);
975 vbits_t new = v;
977 switch (v.num_bits) {
978 case 8: new.bits.u8 <<= shift_amount; break;
979 case 16: new.bits.u16 <<= shift_amount; break;
980 case 32: new.bits.u32 <<= shift_amount; break;
981 case 64: new.bits.u64 <<= shift_amount; break;
982 case 128: /* fall through */
983 case 256: /* fall through */
984 default:
985 panic(__func__);
988 return new;
992 vbits_t
993 shr_vbits(vbits_t v, unsigned shift_amount)
995 assert(shift_amount < v.num_bits);
997 vbits_t new = v;
999 switch (v.num_bits) {
1000 case 8: new.bits.u8 >>= shift_amount; break;
1001 case 16: new.bits.u16 >>= shift_amount; break;
1002 case 32: new.bits.u32 >>= shift_amount; break;
1003 case 64: new.bits.u64 >>= shift_amount; break;
1004 case 128: /* fall through */
1005 case 256: /* fall through */
1006 default:
1007 panic(__func__);
1010 return new;
1014 vbits_t
1015 sar_vbits(vbits_t v, unsigned shift_amount)
1017 assert(shift_amount < v.num_bits);
1019 vbits_t new = v;
1020 int msb;
1022 switch (v.num_bits) {
1023 case 8:
1024 new.bits.u8 >>= shift_amount;
1025 msb = (v.bits.u8 & 0x80) != 0;
1026 break;
1027 case 16:
1028 new.bits.u16 >>= shift_amount;
1029 msb = (v.bits.u16 & 0x8000) != 0;
1030 break;
1031 case 32:
1032 new.bits.u32 >>= shift_amount;
1033 msb = (v.bits.u32 & (1u << 31)) != 0;
1034 break;
1035 case 64:
1036 new.bits.u64 >>= shift_amount;
1037 msb = (v.bits.u64 & (1ull << 63)) != 0;
1038 break;
1039 case 128: /* fall through */
1040 case 256: /* fall through */
1041 default:
1042 panic(__func__);
1045 if (msb)
1046 new = left_vbits(new, new.num_bits);
1047 return new;
1050 /* Return a value for the POWER Iop_CmpORD class iops */
1051 vbits_t
1052 cmpord_vbits(unsigned v1_num_bits, unsigned v2_num_bits)
1054 vbits_t new = { .num_bits = v1_num_bits };
1056 /* Size of values being compared must be the same */
1057 assert( v1_num_bits == v2_num_bits);
1059 /* Comparison only produces 32-bit or 64-bit value where
1060 * the lower 3 bits are set to indicate, less than, equal and greater than.
1062 switch (v1_num_bits) {
1063 case 32:
1064 new.bits.u32 = 0xE;
1065 break;
1067 case 64:
1068 new.bits.u64 = 0xE;
1069 break;
1071 default:
1072 panic(__func__);
1075 return new;
1079 /* Deal with precise integer EQ and NE. Needs some helpers. The helpers
1080 compute the result for 64-bit inputs, but can also be used for the
1081 32/16/8 bit cases, because we can zero extend both the vbits and values
1082 out to 64 bits and still get the correct result. */
1085 /* Get both vbits and values for a binary operation, that has args of the
1086 same size (type?), namely 8, 16, 32 or 64 bit. Unused bits are set to
1087 zero in both vbit_ and val_ cases. */
1088 static
1089 void get_binary_vbits_and_vals64 ( /*OUT*/uint64_t* varg1,
1090 /*OUT*/uint64_t* arg1,
1091 /*OUT*/uint64_t* varg2,
1092 /*OUT*/uint64_t* arg2,
1093 vbits_t vbits1, vbits_t vbits2,
1094 value_t val1, value_t val2)
1096 assert(vbits1.num_bits == vbits2.num_bits);
1098 *varg1 = *arg1 = *varg2 = *arg2 = 0;
1100 switch (vbits1.num_bits) {
1101 case 8: *arg1 = (uint64_t)val1.u8; *arg2 = (uint64_t)val2.u8; break;
1102 case 16: *arg1 = (uint64_t)val1.u16; *arg2 = (uint64_t)val2.u16; break;
1103 case 32: *arg1 = (uint64_t)val1.u32; *arg2 = (uint64_t)val2.u32; break;
1104 case 64: *arg1 = val1.u64; *arg2 = val2.u64; break;
1105 default: panic(__func__);
1108 *varg1 = get_bits64(vbits1);
1109 *varg2 = get_bits64(vbits2);
1112 static uint64_t uifu64 ( uint64_t x, uint64_t y ) { return x | y; }
1114 /* Returns 0 (defined) or 1 (undefined). */
1115 static uint32_t ref_CmpEQ64_with_vbits ( uint64_t arg1, uint64_t varg1,
1116 uint64_t arg2, uint64_t varg2 )
1118 uint64_t naive = uifu64(varg1, varg2);
1119 if (naive == 0) {
1120 return 0; /* defined */
1123 // Mark the two actual arguments as fully defined, else Memcheck will
1124 // complain about undefinedness in them, which is correct but confusing
1125 // (and pollutes the output of this test program.)
1126 VALGRIND_MAKE_MEM_DEFINED(&arg1, sizeof(arg1));
1127 VALGRIND_MAKE_MEM_DEFINED(&arg2, sizeof(arg2));
1129 // if any bit in naive is 1, then the result is undefined. Except,
1130 // if we can find two corresponding bits in arg1 and arg2 such that they
1131 // are different but both defined, then the overall result is defined
1132 // (because the two bits guarantee that the bit vectors arg1 and arg2
1133 // are different.)
1134 UInt i;
1135 for (i = 0; i < 64; i++) {
1136 if ((arg1 & 1) != (arg2 & 1) && (varg1 & 1) == 0 && (varg2 & 1) == 0) {
1137 return 0; /* defined */
1139 arg1 >>= 1; arg2 >>= 1; varg1 >>= 1; varg2 >>= 1;
1142 return 1; /* undefined */
1146 vbits_t
1147 cmp_eq_ne_vbits(vbits_t vbits1, vbits_t vbits2, value_t val1, value_t val2)
1149 uint64_t varg1 = 0, arg1 = 0, varg2 = 0, arg2 = 0;
1150 get_binary_vbits_and_vals64(&varg1, &arg1, &varg2, &arg2,
1151 vbits1, vbits2, val1, val2);
1153 vbits_t res = { .num_bits = 1 };
1154 res.bits.u32 = ref_CmpEQ64_with_vbits(arg1, varg1, arg2, varg2);
1156 return res;
1160 /* Deal with precise integer ADD and SUB. */
1161 vbits_t
1162 int_add_or_sub_vbits(int isAdd,
1163 vbits_t vbits1, vbits_t vbits2, value_t val1, value_t val2)
1165 uint64_t vaa = 0, aa = 0, vbb = 0, bb = 0;
1166 get_binary_vbits_and_vals64(&vaa, &aa, &vbb, &bb,
1167 vbits1, vbits2, val1, val2);
1169 // This is derived from expensiveAddSub() in mc_translate.c.
1170 uint64_t a_min = aa & ~vaa;
1171 uint64_t b_min = bb & ~vbb;
1172 uint64_t a_max = aa | vaa;
1173 uint64_t b_max = bb | vbb;
1175 uint64_t result;
1176 if (isAdd) {
1177 result = (vaa | vbb) | ((a_min + b_min) ^ (a_max + b_max));
1178 } else {
1179 result = (vaa | vbb) | ((a_min - b_max) ^ (a_max - b_min));
1182 vbits_t res = { .num_bits = vbits1.num_bits };
1183 switch (res.num_bits) {
1184 case 8: res.bits.u8 = (uint8_t)result; break;
1185 case 16: res.bits.u16 = (uint16_t)result; break;
1186 case 32: res.bits.u32 = (uint32_t)result; break;
1187 case 64: res.bits.u64 = (uint64_t)result; break;
1188 default: panic(__func__);
1191 return res;