2 * Routines common to user and system emulation of load/store.
4 * Copyright (c) 2022 Linaro, Ltd.
6 * SPDX-License-Identifier: GPL-2.0-or-later
8 * This work is licensed under the terms of the GNU GPL, version 2 or later.
9 * See the COPYING file in the top-level directory.
12 #include "host/load-extract-al16-al8.h.inc"
13 #include "host/store-insert-al16.h.inc"
15 #ifdef CONFIG_ATOMIC64
16 # define HAVE_al8 true
18 # define HAVE_al8 false
20 #define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
25 * Return the lg2 bytes of atomicity required by @memop for @p.
26 * If the operation must be split into two operations to be
27 * examined separately for atomicity, return -lg2.
29 static int required_atomicity(CPUState *cpu, uintptr_t p, MemOp memop)
31 MemOp atom = memop & MO_ATOM_MASK;
32 MemOp size = memop & MO_SIZE;
33 MemOp half = size ? size - 1 : 0;
42 case MO_ATOM_IFALIGN_PAIR:
47 tmp = (1 << size) - 1;
48 atmax = p & tmp ? MO_8 : size;
51 case MO_ATOM_WITHIN16:
53 atmax = (tmp + (1 << size) <= 16 ? size : MO_8);
56 case MO_ATOM_WITHIN16_PAIR:
58 if (tmp + (1 << size) <= 16) {
60 } else if (tmp + (1 << half) == 16) {
62 * The pair exactly straddles the boundary.
63 * Both halves are naturally aligned and atomic.
68 * One of the pair crosses the boundary, and is non-atomic.
69 * The other of the pair does not cross, and is atomic.
75 case MO_ATOM_SUBALIGN:
77 * Examine the alignment of p to determine if there are subobjects
78 * that must be aligned. Note that we only really need ctz4() --
79 * any more significant bits are discarded by the immediately
80 * following comparison.
83 atmax = MIN(size, tmp);
87 g_assert_not_reached();
91 * Here we have the architectural atomicity of the operation.
92 * However, when executing in a serial context, we need no extra
93 * host atomicity in order to avoid racing. This reduction
94 * avoids looping with cpu_loop_exit_atomic.
96 if (cpu_in_serial_context(cpu)) {
106 * Atomically load 2 aligned bytes from @pv.
108 static inline uint16_t load_atomic2(void *pv)
110 uint16_t *p = __builtin_assume_aligned(pv, 2);
111 return qatomic_read(p);
118 * Atomically load 4 aligned bytes from @pv.
120 static inline uint32_t load_atomic4(void *pv)
122 uint32_t *p = __builtin_assume_aligned(pv, 4);
123 return qatomic_read(p);
130 * Atomically load 8 aligned bytes from @pv.
132 static inline uint64_t load_atomic8(void *pv)
134 uint64_t *p = __builtin_assume_aligned(pv, 8);
136 qemu_build_assert(HAVE_al8);
137 return qatomic_read__nocheck(p);
141 * load_atomic8_or_exit:
142 * @cpu: generic cpu state
143 * @ra: host unwind address
146 * Atomically load 8 aligned bytes from @pv.
147 * If this is not possible, longjmp out to restart serially.
149 static uint64_t load_atomic8_or_exit(CPUState *cpu, uintptr_t ra, void *pv)
152 return load_atomic8(pv);
155 #ifdef CONFIG_USER_ONLY
157 * If the page is not writable, then assume the value is immutable
158 * and requires no locking. This ignores the case of MAP_SHARED with
159 * another process, because the fallback start_exclusive solution
160 * provides no protection across processes.
162 WITH_MMAP_LOCK_GUARD() {
163 if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
164 uint64_t *p = __builtin_assume_aligned(pv, 8);
170 /* Ultimate fallback: re-execute in serial context. */
171 cpu_loop_exit_atomic(cpu, ra);
175 * load_atomic16_or_exit:
176 * @cpu: generic cpu state
177 * @ra: host unwind address
180 * Atomically load 16 aligned bytes from @pv.
181 * If this is not possible, longjmp out to restart serially.
183 static Int128 load_atomic16_or_exit(CPUState *cpu, uintptr_t ra, void *pv)
185 Int128 *p = __builtin_assume_aligned(pv, 16);
187 if (HAVE_ATOMIC128_RO) {
188 return atomic16_read_ro(p);
192 * We can only use cmpxchg to emulate a load if the page is writable.
193 * If the page is not writable, then assume the value is immutable
194 * and requires no locking. This ignores the case of MAP_SHARED with
195 * another process, because the fallback start_exclusive solution
196 * provides no protection across processes.
198 * In system mode all guest pages are writable. For user mode,
199 * we must take mmap_lock so that the query remains valid until
200 * the write is complete -- tests/tcg/multiarch/munmap-pthread.c
201 * is an example that can race.
203 WITH_MMAP_LOCK_GUARD() {
204 #ifdef CONFIG_USER_ONLY
205 if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) {
209 if (HAVE_ATOMIC128_RW) {
210 return atomic16_read_rw(p);
214 /* Ultimate fallback: re-execute in serial context. */
215 cpu_loop_exit_atomic(cpu, ra);
219 * load_atom_extract_al4x2:
222 * Load 4 bytes from @p, from two sequential atomic 4-byte loads.
224 static uint32_t load_atom_extract_al4x2(void *pv)
226 uintptr_t pi = (uintptr_t)pv;
227 int sh = (pi & 3) * 8;
230 pv = (void *)(pi & ~3);
231 a = load_atomic4(pv);
232 b = load_atomic4(pv + 4);
234 if (HOST_BIG_ENDIAN) {
235 return (a << sh) | (b >> (-sh & 31));
237 return (a >> sh) | (b << (-sh & 31));
242 * load_atom_extract_al8x2:
245 * Load 8 bytes from @p, from two sequential atomic 8-byte loads.
247 static uint64_t load_atom_extract_al8x2(void *pv)
249 uintptr_t pi = (uintptr_t)pv;
250 int sh = (pi & 7) * 8;
253 pv = (void *)(pi & ~7);
254 a = load_atomic8(pv);
255 b = load_atomic8(pv + 8);
257 if (HOST_BIG_ENDIAN) {
258 return (a << sh) | (b >> (-sh & 63));
260 return (a >> sh) | (b << (-sh & 63));
265 * load_atom_extract_al8_or_exit:
266 * @cpu: generic cpu state
267 * @ra: host unwind address
269 * @s: object size in bytes, @s <= 4.
271 * Atomically load @s bytes from @p, when p % s != 0, and [p, p+s-1] does
272 * not cross an 8-byte boundary. This means that we can perform an atomic
273 * 8-byte load and extract.
274 * The value is returned in the low bits of a uint32_t.
276 static uint32_t load_atom_extract_al8_or_exit(CPUState *cpu, uintptr_t ra,
279 uintptr_t pi = (uintptr_t)pv;
281 int shr = (HOST_BIG_ENDIAN ? 8 - s - o : o) * 8;
283 pv = (void *)(pi & ~7);
284 return load_atomic8_or_exit(cpu, ra, pv) >> shr;
288 * load_atom_extract_al16_or_exit:
289 * @cpu: generic cpu state
290 * @ra: host unwind address
292 * @s: object size in bytes, @s <= 8.
294 * Atomically load @s bytes from @p, when p % 16 < 8
295 * and p % 16 + s > 8. I.e. does not cross a 16-byte
296 * boundary, but *does* cross an 8-byte boundary.
297 * This is the slow version, so we must have eliminated
298 * any faster load_atom_extract_al8_or_exit case.
300 * If this is not possible, longjmp out to restart serially.
302 static uint64_t load_atom_extract_al16_or_exit(CPUState *cpu, uintptr_t ra,
305 uintptr_t pi = (uintptr_t)pv;
307 int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
311 * Note constraints above: p & 8 must be clear.
312 * Provoke SIGBUS if possible otherwise.
314 pv = (void *)(pi & ~7);
315 r = load_atomic16_or_exit(cpu, ra, pv);
317 r = int128_urshift(r, shr);
318 return int128_getlo(r);
325 * Load 4 bytes from @pv, with two 2-byte atomic loads.
327 static inline uint32_t load_atom_4_by_2(void *pv)
329 uint32_t a = load_atomic2(pv);
330 uint32_t b = load_atomic2(pv + 2);
332 if (HOST_BIG_ENDIAN) {
333 return (a << 16) | b;
335 return (b << 16) | a;
343 * Load 8 bytes from @pv, with four 2-byte atomic loads.
345 static inline uint64_t load_atom_8_by_2(void *pv)
347 uint32_t a = load_atom_4_by_2(pv);
348 uint32_t b = load_atom_4_by_2(pv + 4);
350 if (HOST_BIG_ENDIAN) {
351 return ((uint64_t)a << 32) | b;
353 return ((uint64_t)b << 32) | a;
361 * Load 8 bytes from @pv, with two 4-byte atomic loads.
363 static inline uint64_t load_atom_8_by_4(void *pv)
365 uint32_t a = load_atomic4(pv);
366 uint32_t b = load_atomic4(pv + 4);
368 if (HOST_BIG_ENDIAN) {
369 return ((uint64_t)a << 32) | b;
371 return ((uint64_t)b << 32) | a;
376 * load_atom_8_by_8_or_4:
379 * Load 8 bytes from aligned @pv, with at least 4-byte atomicity.
381 static inline uint64_t load_atom_8_by_8_or_4(void *pv)
384 return load_atomic8(pv);
386 return load_atom_8_by_4(pv);
393 * @memop: the full memory op
395 * Load 2 bytes from @p, honoring the atomicity of @memop.
397 static uint16_t load_atom_2(CPUState *cpu, uintptr_t ra,
398 void *pv, MemOp memop)
400 uintptr_t pi = (uintptr_t)pv;
403 if (likely((pi & 1) == 0)) {
404 return load_atomic2(pv);
406 if (HAVE_ATOMIC128_RO) {
407 intptr_t left_in_page = -(pi | TARGET_PAGE_MASK);
408 if (likely(left_in_page > 8)) {
409 return load_atom_extract_al16_or_al8(pv, 2);
413 atmax = required_atomicity(cpu, pi, memop);
416 return lduw_he_p(pv);
418 /* The only case remaining is MO_ATOM_WITHIN16. */
419 if (!HAVE_al8_fast && (pi & 3) == 1) {
420 /* Big or little endian, we want the middle two bytes. */
421 return load_atomic4(pv - 1) >> 8;
423 if ((pi & 15) != 7) {
424 return load_atom_extract_al8_or_exit(cpu, ra, pv, 2);
426 return load_atom_extract_al16_or_exit(cpu, ra, pv, 2);
428 g_assert_not_reached();
435 * @memop: the full memory op
437 * Load 4 bytes from @p, honoring the atomicity of @memop.
439 static uint32_t load_atom_4(CPUState *cpu, uintptr_t ra,
440 void *pv, MemOp memop)
442 uintptr_t pi = (uintptr_t)pv;
445 if (likely((pi & 3) == 0)) {
446 return load_atomic4(pv);
448 if (HAVE_ATOMIC128_RO) {
449 intptr_t left_in_page = -(pi | TARGET_PAGE_MASK);
450 if (likely(left_in_page > 8)) {
451 return load_atom_extract_al16_or_al8(pv, 4);
455 atmax = required_atomicity(cpu, pi, memop);
461 * For MO_ATOM_IFALIGN, this is more atomicity than required,
462 * but it's trivially supported on all hosts, better than 4
463 * individual byte loads (when the host requires alignment),
464 * and overlaps with the MO_ATOM_SUBALIGN case of p % 2 == 0.
466 return load_atom_extract_al4x2(pv);
469 return load_atom_extract_al8_or_exit(cpu, ra, pv, 4);
471 return load_atom_extract_al16_or_exit(cpu, ra, pv, 4);
473 g_assert_not_reached();
480 * @memop: the full memory op
482 * Load 8 bytes from @p, honoring the atomicity of @memop.
484 static uint64_t load_atom_8(CPUState *cpu, uintptr_t ra,
485 void *pv, MemOp memop)
487 uintptr_t pi = (uintptr_t)pv;
491 * If the host does not support 8-byte atomics, wait until we have
492 * examined the atomicity parameters below.
494 if (HAVE_al8 && likely((pi & 7) == 0)) {
495 return load_atomic8(pv);
497 if (HAVE_ATOMIC128_RO) {
498 return load_atom_extract_al16_or_al8(pv, 8);
501 atmax = required_atomicity(cpu, pi, memop);
502 if (atmax == MO_64) {
503 if (!HAVE_al8 && (pi & 7) == 0) {
504 load_atomic8_or_exit(cpu, ra, pv);
506 return load_atom_extract_al16_or_exit(cpu, ra, pv, 8);
509 return load_atom_extract_al8x2(pv);
515 return load_atom_8_by_2(pv);
517 return load_atom_8_by_4(pv);
520 return load_atom_extract_al8x2(pv);
522 cpu_loop_exit_atomic(cpu, ra);
524 g_assert_not_reached();
531 * @memop: the full memory op
533 * Load 16 bytes from @p, honoring the atomicity of @memop.
535 static Int128 load_atom_16(CPUState *cpu, uintptr_t ra,
536 void *pv, MemOp memop)
538 uintptr_t pi = (uintptr_t)pv;
544 * If the host does not support 16-byte atomics, wait until we have
545 * examined the atomicity parameters below.
547 if (HAVE_ATOMIC128_RO && likely((pi & 15) == 0)) {
548 return atomic16_read_ro(pv);
551 atmax = required_atomicity(cpu, pi, memop);
557 a = load_atom_8_by_2(pv);
558 b = load_atom_8_by_2(pv + 8);
561 a = load_atom_8_by_4(pv);
562 b = load_atom_8_by_4(pv + 8);
566 cpu_loop_exit_atomic(cpu, ra);
568 a = load_atomic8(pv);
569 b = load_atomic8(pv + 8);
573 cpu_loop_exit_atomic(cpu, ra);
575 a = load_atom_extract_al8x2(pv);
576 b = load_atom_extract_al8x2(pv + 8);
579 return load_atomic16_or_exit(cpu, ra, pv);
581 g_assert_not_reached();
583 return int128_make128(HOST_BIG_ENDIAN ? b : a, HOST_BIG_ENDIAN ? a : b);
589 * @val: value to store
591 * Atomically store 2 aligned bytes to @pv.
593 static inline void store_atomic2(void *pv, uint16_t val)
595 uint16_t *p = __builtin_assume_aligned(pv, 2);
602 * @val: value to store
604 * Atomically store 4 aligned bytes to @pv.
606 static inline void store_atomic4(void *pv, uint32_t val)
608 uint32_t *p = __builtin_assume_aligned(pv, 4);
615 * @val: value to store
617 * Atomically store 8 aligned bytes to @pv.
619 static inline void store_atomic8(void *pv, uint64_t val)
621 uint64_t *p = __builtin_assume_aligned(pv, 8);
623 qemu_build_assert(HAVE_al8);
624 qatomic_set__nocheck(p, val);
630 static inline void store_atom_4_by_2(void *pv, uint32_t val)
632 store_atomic2(pv, val >> (HOST_BIG_ENDIAN ? 16 : 0));
633 store_atomic2(pv + 2, val >> (HOST_BIG_ENDIAN ? 0 : 16));
639 static inline void store_atom_8_by_2(void *pv, uint64_t val)
641 store_atom_4_by_2(pv, val >> (HOST_BIG_ENDIAN ? 32 : 0));
642 store_atom_4_by_2(pv + 4, val >> (HOST_BIG_ENDIAN ? 0 : 32));
648 static inline void store_atom_8_by_4(void *pv, uint64_t val)
650 store_atomic4(pv, val >> (HOST_BIG_ENDIAN ? 32 : 0));
651 store_atomic4(pv + 4, val >> (HOST_BIG_ENDIAN ? 0 : 32));
655 * store_atom_insert_al4:
657 * @val: shifted value to store
658 * @msk: mask for value to store
660 * Atomically store @val to @p, masked by @msk.
662 static void store_atom_insert_al4(uint32_t *p, uint32_t val, uint32_t msk)
666 p = __builtin_assume_aligned(p, 4);
667 old = qatomic_read(p);
669 new = (old & ~msk) | val;
670 } while (!__atomic_compare_exchange_n(p, &old, new, true,
671 __ATOMIC_RELAXED, __ATOMIC_RELAXED));
675 * store_atom_insert_al8:
677 * @val: shifted value to store
678 * @msk: mask for value to store
680 * Atomically store @val to @p masked by @msk.
682 static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
686 qemu_build_assert(HAVE_al8);
687 p = __builtin_assume_aligned(p, 8);
688 old = qatomic_read__nocheck(p);
690 new = (old & ~msk) | val;
691 } while (!__atomic_compare_exchange_n(p, &old, new, true,
692 __ATOMIC_RELAXED, __ATOMIC_RELAXED));
698 * @size: number of bytes to store
699 * @val_le: data to store
701 * Store @size bytes at @p. The bytes to store are extracted in little-endian order
702 * from @val_le; return the bytes of @val_le beyond @size that have not been stored.
704 static uint64_t store_bytes_leN(void *pv, int size, uint64_t val_le)
707 for (int i = 0; i < size; i++, val_le >>= 8) {
716 * @size: number of bytes to store
717 * @val_le: data to store
719 * As store_bytes_leN, but atomically on each aligned part.
722 static uint64_t store_parts_leN(void *pv, int size, uint64_t val_le)
727 /* Find minimum of alignment and size */
728 switch (((uintptr_t)pv | size) & 7) {
730 store_atomic4(pv, le32_to_cpu(val_le));
736 store_atomic2(pv, le16_to_cpu(val_le));
741 *(uint8_t *)pv = val_le;
746 g_assert_not_reached();
758 * @size: number of bytes to store
759 * @val_le: data to store
761 * As store_bytes_leN, but atomically as a whole.
762 * Four aligned bytes are guaranteed to cover the store.
764 static uint64_t store_whole_le4(void *pv, int size, uint64_t val_le)
767 int o = (uintptr_t)pv & 3;
769 uint32_t m = MAKE_64BIT_MASK(0, sz);
772 if (HOST_BIG_ENDIAN) {
773 v = bswap32(val_le) >> sh;
774 m = bswap32(m) >> sh;
779 store_atom_insert_al4(pv - o, v, m);
786 * @size: number of bytes to store
787 * @val_le: data to store
789 * As store_bytes_leN, but atomically as a whole.
790 * Eight aligned bytes are guaranteed to cover the store.
792 static uint64_t store_whole_le8(void *pv, int size, uint64_t val_le)
795 int o = (uintptr_t)pv & 7;
797 uint64_t m = MAKE_64BIT_MASK(0, sz);
800 qemu_build_assert(HAVE_al8);
801 if (HOST_BIG_ENDIAN) {
802 v = bswap64(val_le) >> sh;
803 m = bswap64(m) >> sh;
808 store_atom_insert_al8(pv - o, v, m);
815 * @size: number of bytes to store
816 * @val_le: data to store
818 * As store_bytes_leN, but atomically as a whole.
819 * 16 aligned bytes are guaranteed to cover the store.
821 static uint64_t store_whole_le16(void *pv, int size, Int128 val_le)
824 int o = (uintptr_t)pv & 15;
828 qemu_build_assert(HAVE_CMPXCHG128);
830 /* Like MAKE_64BIT_MASK(0, sz), but larger. */
832 m = int128_make64(MAKE_64BIT_MASK(0, sz));
834 m = int128_make128(-1, MAKE_64BIT_MASK(0, sz - 64));
837 if (HOST_BIG_ENDIAN) {
838 v = int128_urshift(bswap128(val_le), sh);
839 m = int128_urshift(bswap128(m), sh);
841 v = int128_lshift(val_le, sh);
842 m = int128_lshift(m, sh);
844 store_atom_insert_al16(pv - o, v, m);
849 return int128_gethi(val_le) >> (sz - 64);
855 * @val: the value to store
856 * @memop: the full memory op
858 * Store 2 bytes to @p, honoring the atomicity of @memop.
860 static void store_atom_2(CPUState *cpu, uintptr_t ra,
861 void *pv, MemOp memop, uint16_t val)
863 uintptr_t pi = (uintptr_t)pv;
866 if (likely((pi & 1) == 0)) {
867 store_atomic2(pv, val);
871 atmax = required_atomicity(cpu, pi, memop);
878 * The only case remaining is MO_ATOM_WITHIN16.
879 * Big or little endian, we want the middle two bytes in each test.
882 store_atom_insert_al4(pv - 1, (uint32_t)val << 8, MAKE_64BIT_MASK(8, 16));
884 } else if ((pi & 7) == 3) {
886 store_atom_insert_al8(pv - 3, (uint64_t)val << 24, MAKE_64BIT_MASK(24, 16));
889 } else if ((pi & 15) == 7) {
890 if (HAVE_CMPXCHG128) {
891 Int128 v = int128_lshift(int128_make64(val), 56);
892 Int128 m = int128_lshift(int128_make64(0xffff), 56);
893 store_atom_insert_al16(pv - 7, v, m);
897 g_assert_not_reached();
900 cpu_loop_exit_atomic(cpu, ra);
906 * @val: the value to store
907 * @memop: the full memory op
909 * Store 4 bytes to @p, honoring the atomicity of @memop.
911 static void store_atom_4(CPUState *cpu, uintptr_t ra,
912 void *pv, MemOp memop, uint32_t val)
914 uintptr_t pi = (uintptr_t)pv;
917 if (likely((pi & 3) == 0)) {
918 store_atomic4(pv, val);
922 atmax = required_atomicity(cpu, pi, memop);
928 store_atom_4_by_2(pv, val);
932 uint32_t val_le = cpu_to_le32(val);
938 val_le = store_whole_le4(pv, s1, val_le);
939 *(uint8_t *)(pv + 3) = val_le;
942 *(uint8_t *)pv = val_le;
943 store_whole_le4(pv + 1, s2, val_le >> 8);
945 case 0: /* aligned */
946 case 2: /* atmax MO_16 */
948 g_assert_not_reached();
955 store_whole_le8(pv, 4, cpu_to_le32(val));
959 if (HAVE_CMPXCHG128) {
960 store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val)));
964 cpu_loop_exit_atomic(cpu, ra);
966 g_assert_not_reached();
973 * @val: the value to store
974 * @memop: the full memory op
976 * Store 8 bytes to @p, honoring the atomicity of @memop.
978 static void store_atom_8(CPUState *cpu, uintptr_t ra,
979 void *pv, MemOp memop, uint64_t val)
981 uintptr_t pi = (uintptr_t)pv;
984 if (HAVE_al8 && likely((pi & 7) == 0)) {
985 store_atomic8(pv, val);
989 atmax = required_atomicity(cpu, pi, memop);
995 store_atom_8_by_2(pv, val);
998 store_atom_8_by_4(pv, val);
1002 uint64_t val_le = cpu_to_le64(val);
1008 val_le = store_whole_le8(pv, s1, val_le);
1009 store_bytes_leN(pv + s1, s2, val_le);
1012 val_le = store_bytes_leN(pv, s1, val_le);
1013 store_whole_le8(pv + s1, s2, val_le);
1015 case 0: /* aligned */
1016 case 4: /* atmax MO_32 */
1018 g_assert_not_reached();
1024 if (HAVE_CMPXCHG128) {
1025 store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val)));
1030 g_assert_not_reached();
1032 cpu_loop_exit_atomic(cpu, ra);
1038 * @val: the value to store
1039 * @memop: the full memory op
1041 * Store 16 bytes to @p, honoring the atomicity of @memop.
1043 static void store_atom_16(CPUState *cpu, uintptr_t ra,
1044 void *pv, MemOp memop, Int128 val)
1046 uintptr_t pi = (uintptr_t)pv;
1050 if (HAVE_ATOMIC128_RW && likely((pi & 15) == 0)) {
1051 atomic16_set(pv, val);
1055 atmax = required_atomicity(cpu, pi, memop);
1057 a = HOST_BIG_ENDIAN ? int128_gethi(val) : int128_getlo(val);
1058 b = HOST_BIG_ENDIAN ? int128_getlo(val) : int128_gethi(val);
1061 memcpy(pv, &val, 16);
1064 store_atom_8_by_2(pv, a);
1065 store_atom_8_by_2(pv + 8, b);
1068 store_atom_8_by_4(pv, a);
1069 store_atom_8_by_4(pv + 8, b);
1073 store_atomic8(pv, a);
1074 store_atomic8(pv + 8, b);
1079 if (HAVE_CMPXCHG128) {
1084 if (HOST_BIG_ENDIAN) {
1085 val = bswap128(val);
1089 val_le = store_whole_le16(pv, s1, val);
1090 store_bytes_leN(pv + s1, s2, val_le);
1093 store_bytes_leN(pv, s1, int128_getlo(val));
1094 val = int128_urshift(val, s1 * 8);
1095 store_whole_le16(pv + s1, s2, val);
1097 case 0: /* aligned */
1098 case 8: /* atmax MO_64 */
1100 g_assert_not_reached();
1108 g_assert_not_reached();
1110 cpu_loop_exit_atomic(cpu, ra);