1 #ifndef INTERNAL_BITS2_H /*-*-C-*-vi:se ft=c:*/
2 #define INTERNAL_BITS2_H
4 * @author Ruby developers <ruby-core@ruby-lang.org>
5 * @copyright This file is a part of the programming language Ruby.
6 * Permission is hereby granted, to either redistribute and/or
7 * modify this file, provided that the conditions mentioned in the
8 * file COPYING are met. Consult the file for details.
9 * @brief Internal header for bitwise integer algorithms.
10 * @see Henry S. Warren Jr., "Hacker's Delight" (2nd ed.), 2013.
11 * @see SEI CERT C Coding Standard INT32-C. "Ensure that operations on
12 * signed integers do not result in overflow"
13 * @see https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html
14 * @see https://clang.llvm.org/docs/LanguageExtensions.html#builtin-rotateleft
15 * @see https://clang.llvm.org/docs/LanguageExtensions.html#builtin-rotateright
16 * @see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/byteswap-uint64-byteswap-ulong-byteswap-ushort
17 * @see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/rotl-rotl64-rotr-rotr64
18 * @see https://docs.microsoft.com/en-us/cpp/intrinsics/bitscanforward-bitscanforward64
19 * @see https://docs.microsoft.com/en-us/cpp/intrinsics/bitscanreverse-bitscanreverse64
20 * @see https://docs.microsoft.com/en-us/cpp/intrinsics/lzcnt16-lzcnt-lzcnt64
21 * @see https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64
22 * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u32
23 * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u32
24 * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rotl64
25 * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rotr64
26 * @see https://stackoverflow.com/a/776523
28 #include "ruby/internal/config.h"
29 #include <limits.h> /* for CHAR_BITS */
30 #include <stdint.h> /* for uintptr_t */
31 #include "internal/compilers.h" /* for MSC_VERSION_SINCE */
33 #if MSC_VERSION_SINCE(1310)
34 # include <stdlib.h> /* for _byteswap_uint64 */
37 #if defined(HAVE_X86INTRIN_H)
38 # include <x86intrin.h> /* for _lzcnt_u64 */
39 #elif MSC_VERSION_SINCE(1310)
40 # include <intrin.h> /* for the following intrinsics */
43 #if defined(_MSC_VER) && defined(__AVX__)
44 # pragma intrinsic(__popcnt)
45 # pragma intrinsic(__popcnt64)
48 #if defined(_MSC_VER) && defined(__AVX2__)
49 # pragma intrinsic(__lzcnt)
50 # pragma intrinsic(__lzcnt64)
53 #if MSC_VERSION_SINCE(1310)
54 # pragma intrinsic(_rotl)
55 # pragma intrinsic(_rotr)
57 # pragma intrinsic(_rotl64)
58 # pragma intrinsic(_rotr64)
62 #if MSC_VERSION_SINCE(1400)
63 # pragma intrinsic(_BitScanForward)
64 # pragma intrinsic(_BitScanReverse)
66 # pragma intrinsic(_BitScanForward64)
67 # pragma intrinsic(_BitScanReverse64)
71 #include "parser_value.h" /* for VALUE */
72 #include "internal/static_assert.h" /* for STATIC_ASSERT */
74 /* The most significant bit of the lower part of half-long integer.
75 * If sizeof(long) == 4, this is 0x8000.
76 * If sizeof(long) == 8, this is 0x80000000.
78 #define HALF_LONG_MSB ((SIGNED_VALUE)1<<((SIZEOF_LONG*CHAR_BIT-1)/2))
80 #define SIGNED_INTEGER_TYPE_P(T) (0 > ((T)0)-1)
82 #define SIGNED_INTEGER_MIN(T) \
83 ((sizeof(T) == sizeof(int8_t)) ? ((T)INT8_MIN) : \
84 ((sizeof(T) == sizeof(int16_t)) ? ((T)INT16_MIN) : \
85 ((sizeof(T) == sizeof(int32_t)) ? ((T)INT32_MIN) : \
86 ((sizeof(T) == sizeof(int64_t)) ? ((T)INT64_MIN) : \
89 #define SIGNED_INTEGER_MAX(T) ((T)(SIGNED_INTEGER_MIN(T) ^ ((T)~(T)0)))
91 #define UNSIGNED_INTEGER_MAX(T) ((T)~(T)0)
93 #if __has_builtin(__builtin_mul_overflow_p)
94 # define MUL_OVERFLOW_P(a, b) \
95 __builtin_mul_overflow_p((a), (b), (__typeof__(a * b))0)
96 #elif __has_builtin(__builtin_mul_overflow)
97 # define MUL_OVERFLOW_P(a, b) \
98 __extension__ ({ __typeof__(a) c; __builtin_mul_overflow((a), (b), &c); })
101 #define MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \
103 (a) == -1 ? (b) < -(max) : \
105 ((b) > 0 ? (max) / (a) < (b) : (min) / (a) > (b)) : \
106 ((b) > 0 ? (min) / (a) < (b) : (max) / (a) > (b)))
108 #if __has_builtin(__builtin_mul_overflow_p)
109 /* __builtin_mul_overflow_p can take bitfield */
110 /* and GCC permits bitfields for integers other than int */
111 # define MUL_OVERFLOW_FIXNUM_P(a, b) \
113 struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c = { 0 }; \
114 __builtin_mul_overflow_p((a), (b), c.fixnum); \
117 # define MUL_OVERFLOW_FIXNUM_P(a, b) \
118 MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, FIXNUM_MIN, FIXNUM_MAX)
121 #ifdef MUL_OVERFLOW_P
122 # define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_P(a, b)
123 # define MUL_OVERFLOW_LONG_P(a, b) MUL_OVERFLOW_P(a, b)
124 # define MUL_OVERFLOW_INT_P(a, b) MUL_OVERFLOW_P(a, b)
126 # define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX)
127 # define MUL_OVERFLOW_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LONG_MIN, LONG_MAX)
128 # define MUL_OVERFLOW_INT_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, INT_MIN, INT_MAX)
131 #ifdef HAVE_UINT128_T
132 # define bit_length(x) \
134 (sizeof(x) <= sizeof(int32_t) ? 32 - nlz_int32((uint32_t)(x)) : \
135 sizeof(x) <= sizeof(int64_t) ? 64 - nlz_int64((uint64_t)(x)) : \
136 128 - nlz_int128((uint128_t)(x)))
138 # define bit_length(x) \
140 (sizeof(x) <= sizeof(int32_t) ? 32 - nlz_int32((uint32_t)(x)) : \
141 64 - nlz_int64((uint64_t)(x)))
145 # define swap16 ruby_swap16
149 # define swap32 ruby_swap32
153 # define swap64 ruby_swap64
156 static inline uint16_t ruby_swap16(uint16_t);
157 static inline uint32_t ruby_swap32(uint32_t);
158 static inline uint64_t ruby_swap64(uint64_t);
159 static inline unsigned nlz_int(unsigned x
);
160 static inline unsigned nlz_long(unsigned long x
);
161 static inline unsigned nlz_long_long(unsigned long long x
);
162 static inline unsigned nlz_intptr(uintptr_t x
);
163 static inline unsigned nlz_int32(uint32_t x
);
164 static inline unsigned nlz_int64(uint64_t x
);
165 #ifdef HAVE_UINT128_T
166 static inline unsigned nlz_int128(uint128_t x
);
168 static inline unsigned rb_popcount32(uint32_t x
);
169 static inline unsigned rb_popcount64(uint64_t x
);
170 static inline unsigned rb_popcount_intptr(uintptr_t x
);
171 static inline int ntz_int32(uint32_t x
);
172 static inline int ntz_int64(uint64_t x
);
173 static inline int ntz_intptr(uintptr_t x
);
174 static inline VALUE
RUBY_BIT_ROTL(VALUE
, int);
175 static inline VALUE
RUBY_BIT_ROTR(VALUE
, int);
177 static inline uint16_t
178 ruby_swap16(uint16_t x
)
180 #if __has_builtin(__builtin_bswap16)
181 return __builtin_bswap16(x
);
183 #elif MSC_VERSION_SINCE(1310)
184 return _byteswap_ushort(x
);
187 return (x
<< 8) | (x
>> 8);
192 static inline uint32_t
193 ruby_swap32(uint32_t x
)
195 #if __has_builtin(__builtin_bswap32)
196 return __builtin_bswap32(x
);
198 #elif MSC_VERSION_SINCE(1310)
199 return _byteswap_ulong(x
);
202 x
= ((x
& 0x0000FFFF) << 16) | ((x
& 0xFFFF0000) >> 16);
203 x
= ((x
& 0x00FF00FF) << 8) | ((x
& 0xFF00FF00) >> 8);
209 static inline uint64_t
210 ruby_swap64(uint64_t x
)
212 #if __has_builtin(__builtin_bswap64)
213 return __builtin_bswap64(x
);
215 #elif MSC_VERSION_SINCE(1310)
216 return _byteswap_uint64(x
);
219 x
= ((x
& 0x00000000FFFFFFFFULL
) << 32) | ((x
& 0xFFFFFFFF00000000ULL
) >> 32);
220 x
= ((x
& 0x0000FFFF0000FFFFULL
) << 16) | ((x
& 0xFFFF0000FFFF0000ULL
) >> 16);
221 x
= ((x
& 0x00FF00FF00FF00FFULL
) << 8) | ((x
& 0xFF00FF00FF00FF00ULL
) >> 8);
227 static inline unsigned int
228 nlz_int32(uint32_t x
)
230 #if defined(_MSC_VER) && defined(__AVX2__)
231 /* Note: It seems there is no such thing like __LZCNT__ predefined in MSVC.
232 * AMD CPUs have had this instruction for decades (since K10) but for
233 * Intel, Haswell is the oldest one. We need to use __AVX2__ for maximum
235 return (unsigned int)__lzcnt(x
);
237 #elif defined(__x86_64__) && defined(__LZCNT__)
238 return (unsigned int)_lzcnt_u32(x
);
240 #elif MSC_VERSION_SINCE(1400) /* &&! defined(__AVX2__) */
242 return _BitScanReverse(&r
, x
) ? (31 - (int)r
) : 32;
244 #elif __has_builtin(__builtin_clz)
245 STATIC_ASSERT(sizeof_int
, sizeof(int) * CHAR_BIT
== 32);
246 return x
? (unsigned int)__builtin_clz(x
) : 32;
251 y
= x
>> 16; if (y
) {n
-= 16; x
= y
;}
252 y
= x
>> 8; if (y
) {n
-= 8; x
= y
;}
253 y
= x
>> 4; if (y
) {n
-= 4; x
= y
;}
254 y
= x
>> 2; if (y
) {n
-= 2; x
= y
;}
255 y
= x
>> 1; if (y
) {return n
- 2;}
256 return (unsigned int)(n
- x
);
260 static inline unsigned int
261 nlz_int64(uint64_t x
)
263 #if defined(_MSC_VER) && defined(__AVX2__)
264 return (unsigned int)__lzcnt64(x
);
266 #elif defined(__x86_64__) && defined(__LZCNT__)
267 return (unsigned int)_lzcnt_u64(x
);
269 #elif defined(_WIN64) && MSC_VERSION_SINCE(1400) /* &&! defined(__AVX2__) */
271 return _BitScanReverse64(&r
, x
) ? (63u - (unsigned int)r
) : 64;
273 #elif __has_builtin(__builtin_clzl)
277 else if (sizeof(long) * CHAR_BIT
== 64) {
278 return (unsigned int)__builtin_clzl((unsigned long)x
);
280 else if (sizeof(long long) * CHAR_BIT
== 64) {
281 return (unsigned int)__builtin_clzll((unsigned long long)x
);
284 /* :FIXME: Is there a way to make this branch a compile-time error? */
285 UNREACHABLE_RETURN(~0);
291 y
= x
>> 32; if (y
) {n
-= 32; x
= y
;}
292 y
= x
>> 16; if (y
) {n
-= 16; x
= y
;}
293 y
= x
>> 8; if (y
) {n
-= 8; x
= y
;}
294 y
= x
>> 4; if (y
) {n
-= 4; x
= y
;}
295 y
= x
>> 2; if (y
) {n
-= 2; x
= y
;}
296 y
= x
>> 1; if (y
) {return n
- 2;}
297 return (unsigned int)(n
- x
);
302 #ifdef HAVE_UINT128_T
303 static inline unsigned int
304 nlz_int128(uint128_t x
)
306 uint64_t y
= (uint64_t)(x
>> 64);
312 return (unsigned int)nlz_int64(x
) + 64;
315 return (unsigned int)nlz_int64(y
);
320 static inline unsigned int
321 nlz_int(unsigned int x
)
323 if (sizeof(unsigned int) * CHAR_BIT
== 32) {
324 return nlz_int32((uint32_t)x
);
326 else if (sizeof(unsigned int) * CHAR_BIT
== 64) {
327 return nlz_int64((uint64_t)x
);
330 UNREACHABLE_RETURN(~0);
334 static inline unsigned int
335 nlz_long(unsigned long x
)
337 if (sizeof(unsigned long) * CHAR_BIT
== 32) {
338 return nlz_int32((uint32_t)x
);
340 else if (sizeof(unsigned long) * CHAR_BIT
== 64) {
341 return nlz_int64((uint64_t)x
);
344 UNREACHABLE_RETURN(~0);
348 static inline unsigned int
349 nlz_long_long(unsigned long long x
)
351 if (sizeof(unsigned long long) * CHAR_BIT
== 64) {
352 return nlz_int64((uint64_t)x
);
354 #ifdef HAVE_UINT128_T
355 else if (sizeof(unsigned long long) * CHAR_BIT
== 128) {
356 return nlz_int128((uint128_t
)x
);
360 UNREACHABLE_RETURN(~0);
364 static inline unsigned int
365 nlz_intptr(uintptr_t x
)
367 if (sizeof(uintptr_t) == sizeof(unsigned int)) {
368 return nlz_int((unsigned int)x
);
370 if (sizeof(uintptr_t) == sizeof(unsigned long)) {
371 return nlz_long((unsigned long)x
);
373 if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
374 return nlz_long_long((unsigned long long)x
);
377 UNREACHABLE_RETURN(~0);
381 static inline unsigned int
382 rb_popcount32(uint32_t x
)
384 #if defined(_MSC_VER) && defined(__AVX__)
385 /* Note: CPUs since Nehalem and Barcelona have had this instruction so SSE
386 * 4.2 should suffice, but it seems there is no such thing like __SSE_4_2__
387 * predefined macro in MSVC. They do have __AVX__ so use it instead. */
388 return (unsigned int)__popcnt(x
);
390 #elif __has_builtin(__builtin_popcount)
391 STATIC_ASSERT(sizeof_int
, sizeof(int) * CHAR_BIT
>= 32);
392 return (unsigned int)__builtin_popcount(x
);
395 x
= (x
& 0x55555555) + (x
>> 1 & 0x55555555);
396 x
= (x
& 0x33333333) + (x
>> 2 & 0x33333333);
397 x
= (x
& 0x0f0f0f0f) + (x
>> 4 & 0x0f0f0f0f);
398 x
= (x
& 0x001f001f) + (x
>> 8 & 0x001f001f);
399 x
= (x
& 0x0000003f) + (x
>>16 & 0x0000003f);
400 return (unsigned int)x
;
405 static inline unsigned int
406 rb_popcount64(uint64_t x
)
408 #if defined(_MSC_VER) && defined(__AVX__)
409 return (unsigned int)__popcnt64(x
);
411 #elif __has_builtin(__builtin_popcount)
412 if (sizeof(long) * CHAR_BIT
== 64) {
413 return (unsigned int)__builtin_popcountl((unsigned long)x
);
415 else if (sizeof(long long) * CHAR_BIT
== 64) {
416 return (unsigned int)__builtin_popcountll((unsigned long long)x
);
419 /* :FIXME: Is there a way to make this branch a compile-time error? */
420 UNREACHABLE_RETURN(~0);
424 x
= (x
& 0x5555555555555555) + (x
>> 1 & 0x5555555555555555);
425 x
= (x
& 0x3333333333333333) + (x
>> 2 & 0x3333333333333333);
426 x
= (x
& 0x0707070707070707) + (x
>> 4 & 0x0707070707070707);
427 x
= (x
& 0x001f001f001f001f) + (x
>> 8 & 0x001f001f001f001f);
428 x
= (x
& 0x0000003f0000003f) + (x
>>16 & 0x0000003f0000003f);
429 x
= (x
& 0x000000000000007f) + (x
>>32 & 0x000000000000007f);
430 return (unsigned int)x
;
435 static inline unsigned int
436 rb_popcount_intptr(uintptr_t x
)
438 if (sizeof(uintptr_t) * CHAR_BIT
== 64) {
439 return rb_popcount64((uint64_t)x
);
441 else if (sizeof(uintptr_t) * CHAR_BIT
== 32) {
442 return rb_popcount32((uint32_t)x
);
445 UNREACHABLE_RETURN(~0);
450 ntz_int32(uint32_t x
)
452 #if defined(__x86_64__) && defined(__BMI__)
453 return (unsigned)_tzcnt_u32(x
);
455 #elif MSC_VERSION_SINCE(1400)
456 /* :FIXME: Is there any way to issue TZCNT instead of BSF, apart from using
457 * assembly? Because issuing LZCNT seems possible (see nlz.h). */
459 return _BitScanForward(&r
, x
) ? (int)r
: 32;
461 #elif __has_builtin(__builtin_ctz)
462 STATIC_ASSERT(sizeof_int
, sizeof(int) * CHAR_BIT
== 32);
463 return x
? (unsigned)__builtin_ctz(x
) : 32;
466 return rb_popcount32((~x
) & (x
-1));
472 ntz_int64(uint64_t x
)
474 #if defined(__x86_64__) && defined(__BMI__)
475 return (unsigned)_tzcnt_u64(x
);
477 #elif defined(_WIN64) && MSC_VERSION_SINCE(1400)
479 return _BitScanForward64(&r
, x
) ? (int)r
: 64;
481 #elif __has_builtin(__builtin_ctzl)
485 else if (sizeof(long) * CHAR_BIT
== 64) {
486 return (unsigned)__builtin_ctzl((unsigned long)x
);
488 else if (sizeof(long long) * CHAR_BIT
== 64) {
489 return (unsigned)__builtin_ctzll((unsigned long long)x
);
492 /* :FIXME: Is there a way to make this branch a compile-time error? */
493 UNREACHABLE_RETURN(~0);
497 return rb_popcount64((~x
) & (x
-1));
503 ntz_intptr(uintptr_t x
)
505 if (sizeof(uintptr_t) * CHAR_BIT
== 64) {
506 return ntz_int64((uint64_t)x
);
508 else if (sizeof(uintptr_t) * CHAR_BIT
== 32) {
509 return ntz_int32((uint32_t)x
);
512 UNREACHABLE_RETURN(~0);
517 RUBY_BIT_ROTL(VALUE v
, int n
)
519 #if __has_builtin(__builtin_rotateleft32) && (SIZEOF_VALUE * CHAR_BIT == 32)
520 return __builtin_rotateleft32(v
, n
);
522 #elif __has_builtin(__builtin_rotateleft64) && (SIZEOF_VALUE * CHAR_BIT == 64)
523 return __builtin_rotateleft64(v
, n
);
525 #elif MSC_VERSION_SINCE(1310) && (SIZEOF_VALUE * CHAR_BIT == 32)
528 #elif MSC_VERSION_SINCE(1310) && (SIZEOF_VALUE * CHAR_BIT == 64)
529 return _rotl64(v
, n
);
531 #elif defined(_lrotl) && (SIZEOF_VALUE == SIZEOF_LONG)
535 const int m
= (sizeof(VALUE
) * CHAR_BIT
) - 1;
536 return (v
<< (n
& m
)) | (v
>> (-n
& m
));
541 RUBY_BIT_ROTR(VALUE v
, int n
)
543 #if __has_builtin(__builtin_rotateright32) && (SIZEOF_VALUE * CHAR_BIT == 32)
544 return __builtin_rotateright32(v
, n
);
546 #elif __has_builtin(__builtin_rotateright64) && (SIZEOF_VALUE * CHAR_BIT == 64)
547 return __builtin_rotateright64(v
, n
);
549 #elif MSC_VERSION_SINCE(1310) && (SIZEOF_VALUE * CHAR_BIT == 32)
552 #elif MSC_VERSION_SINCE(1310) && (SIZEOF_VALUE * CHAR_BIT == 64)
553 return _rotr64(v
, n
);
555 #elif defined(_lrotr) && (SIZEOF_VALUE == SIZEOF_LONG)
559 const int m
= (sizeof(VALUE
) * CHAR_BIT
) - 1;
560 return (v
<< (-n
& m
)) | (v
>> (n
& m
));
564 #endif /* INTERNAL_BITS2_H */