Lines Matching +full:8 +full:- +full:cpu

6  * SPDX-License-Identifier: GPL-2.0-or-later
9 * See the COPYING file in the top-level directory.
12 #include "host/load-extract-al16-al8.h.inc"
13 #include "host/store-insert-al16.h.inc"
20 #define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
27 * examined separately for atomicity, return -lg2.
29 static int required_atomicity(CPUState *cpu, uintptr_t p, MemOp memop)
33 MemOp half = size ? size - 1 : 0;
47 tmp = (1 << size) - 1;
68 * One of the pair crosses the boundary, and is non-atomic.
71 atmax = -half;
78 * that must be aligned. Note that we only really need ctz4() --
96 if (cpu_in_serial_context(cpu)) {
130 * Atomically load 8 aligned bytes from @pv.
134 uint64_t *p = __builtin_assume_aligned(pv, 8);
142 * @cpu: generic cpu state
146 * Atomically load 8 aligned bytes from @pv.
149 static uint64_t load_atomic8_or_exit(CPUState *cpu, uintptr_t ra, void *pv)
163 if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
164 uint64_t *p = __builtin_assume_aligned(pv, 8);
170 /* Ultimate fallback: re-execute in serial context. */
172 cpu_loop_exit_atomic(cpu, ra);
177 * @cpu: generic cpu state
184 static Int128 load_atomic16_or_exit(CPUState *cpu, uintptr_t ra, void *pv)
201 * the write is complete -- tests/tcg/multiarch/munmap-pthread.c
215 /* Ultimate fallback: re-execute in serial context. */
217 cpu_loop_exit_atomic(cpu, ra);
224 * Load 4 bytes from @p, from two sequential atomic 4-byte loads.
229 int sh = (pi & 3) * 8;
237 return (a << sh) | (b >> (-sh & 31));
239 return (a >> sh) | (b << (-sh & 31));
247 * Load 8 bytes from @p, from two sequential atomic 8-byte loads.
252 int sh = (pi & 7) * 8;
257 b = load_atomic8(pv + 8);
260 return (a << sh) | (b >> (-sh & 63));
262 return (a >> sh) | (b << (-sh & 63));
268 * @cpu: generic cpu state
273 * Atomically load @s bytes from @p, when p % s != 0, and [p, p+s-1] does
274 * not cross an 8-byte boundary. This means that we can perform an atomic
275 * 8-byte load and extract.
278 static uint32_t load_atom_extract_al8_or_exit(CPUState *cpu, uintptr_t ra,
283 int shr = (HOST_BIG_ENDIAN ? 8 - s - o : o) * 8;
286 return load_atomic8_or_exit(cpu, ra, pv) >> shr;
291 * @cpu: generic cpu state
294 * @s: object size in bytes, @s <= 8.
296 * Atomically load @s bytes from @p, when p % 16 < 8
297 * and p % 16 + s > 8. I.e. does not cross a 16-byte
298 * boundary, but *does* cross an 8-byte boundary.
304 static uint64_t load_atom_extract_al16_or_exit(CPUState *cpu, uintptr_t ra,
309 int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
313 * Note constraints above: p & 8 must be clear.
317 r = load_atomic16_or_exit(cpu, ra, pv);
327 * Load 4 bytes from @pv, with two 2-byte atomic loads.
345 * Load 8 bytes from @pv, with four 2-byte atomic loads.
363 * Load 8 bytes from @pv, with two 4-byte atomic loads.
381 * Load 8 bytes from aligned @pv, with at least 4-byte atomicity.
399 static uint16_t load_atom_2(CPUState *cpu, uintptr_t ra,
409 intptr_t left_in_page = -(pi | TARGET_PAGE_MASK);
410 if (likely(left_in_page > 8)) {
415 atmax = required_atomicity(cpu, pi, memop);
423 return load_atomic4(pv - 1) >> 8;
426 return load_atom_extract_al8_or_exit(cpu, ra, pv, 2);
428 return load_atom_extract_al16_or_exit(cpu, ra, pv, 2);
441 static uint32_t load_atom_4(CPUState *cpu, uintptr_t ra,
451 intptr_t left_in_page = -(pi | TARGET_PAGE_MASK);
452 if (likely(left_in_page > 8)) {
457 atmax = required_atomicity(cpu, pi, memop);
461 case -MO_16:
471 return load_atom_extract_al8_or_exit(cpu, ra, pv, 4);
473 return load_atom_extract_al16_or_exit(cpu, ra, pv, 4);
484 * Load 8 bytes from @p, honoring the atomicity of @memop.
486 static uint64_t load_atom_8(CPUState *cpu, uintptr_t ra,
493 * If the host does not support 8-byte atomics, wait until we have
500 return load_atom_extract_al16_or_al8(pv, 8);
503 atmax = required_atomicity(cpu, pi, memop);
506 load_atomic8_or_exit(cpu, ra, pv);
508 return load_atom_extract_al16_or_exit(cpu, ra, pv, 8);
520 case -MO_32:
525 cpu_loop_exit_atomic(cpu, ra);
538 static Int128 load_atom_16(CPUState *cpu, uintptr_t ra,
547 * If the host does not support 16-byte atomics, wait until we have
554 atmax = required_atomicity(cpu, pi, memop);
561 b = load_atom_8_by_2(pv + 8);
565 b = load_atom_8_by_4(pv + 8);
570 cpu_loop_exit_atomic(cpu, ra);
573 b = load_atomic8(pv + 8);
575 case -MO_64:
578 cpu_loop_exit_atomic(cpu, ra);
581 b = load_atom_extract_al8x2(pv + 8);
584 return load_atomic16_or_exit(cpu, ra, pv);
622 * Atomically store 8 aligned bytes to @pv.
626 uint64_t *p = __builtin_assume_aligned(pv, 8);
692 p = __builtin_assume_aligned(p, 8);
706 * Store @size bytes at @p. The bytes to store are extracted in little-endian order
712 for (int i = 0; i < size; i++, val_le >>= 8) {
747 val_le >>= 8;
754 size -= n;
771 int sz = size * 8;
773 int sh = o * 8;
784 store_atom_insert_al4(pv - o, v, m);
799 int sz = size * 8;
801 int sh = o * 8;
813 store_atom_insert_al8(pv - o, v, m);
828 int sz = size * 8;
830 int sh = o * 8;
839 m = int128_make128(-1, MAKE_64BIT_MASK(0, sz - 64));
849 store_atom_insert_al16(pv - o, v, m);
854 return int128_gethi(val_le) >> (sz - 64);
865 static void store_atom_2(CPUState *cpu, uintptr_t ra,
876 atmax = required_atomicity(cpu, pi, memop);
887 store_atom_insert_al4(pv - 1, (uint32_t)val << 8, MAKE_64BIT_MASK(8, 16));
891 store_atom_insert_al8(pv - 3, (uint64_t)val << 24, MAKE_64BIT_MASK(24, 16));
898 store_atom_insert_al16(pv - 7, v, m);
906 cpu_loop_exit_atomic(cpu, ra);
917 static void store_atom_4(CPUState *cpu, uintptr_t ra,
928 atmax = required_atomicity(cpu, pi, memop);
936 case -MO_16:
940 int s1 = 4 - s2;
949 store_whole_le4(pv + 1, s2, val_le >> 8);
971 cpu_loop_exit_atomic(cpu, ra);
983 * Store 8 bytes to @p, honoring the atomicity of @memop.
985 static void store_atom_8(CPUState *cpu, uintptr_t ra,
996 atmax = required_atomicity(cpu, pi, memop);
1007 case -MO_32:
1011 int s1 = 8 - s2;
1032 store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val)));
1040 cpu_loop_exit_atomic(cpu, ra);
1051 static void store_atom_16(CPUState *cpu, uintptr_t ra,
1063 atmax = required_atomicity(cpu, pi, memop);
1073 store_atom_8_by_2(pv + 8, b);
1077 store_atom_8_by_4(pv + 8, b);
1082 store_atomic8(pv + 8, b);
1086 case -MO_64:
1090 int s1 = 16 - s2;
1102 val = int128_urshift(val, s1 * 8);
1106 case 8: /* atmax MO_64 */
1119 cpu_loop_exit_atomic(cpu, ra);