1cdfac37bSRichard Henderson/* 2cdfac37bSRichard Henderson * Routines common to user and system emulation of load/store. 3cdfac37bSRichard Henderson * 4cdfac37bSRichard Henderson * Copyright (c) 2022 Linaro, Ltd. 5cdfac37bSRichard Henderson * 6cdfac37bSRichard Henderson * SPDX-License-Identifier: GPL-2.0-or-later 7cdfac37bSRichard Henderson * 8cdfac37bSRichard Henderson * This work is licensed under the terms of the GNU GPL, version 2 or later. 9cdfac37bSRichard Henderson * See the COPYING file in the top-level directory. 10cdfac37bSRichard Henderson */ 11cdfac37bSRichard Henderson 12e4751d34SPhilippe Mathieu-Daudé#include "host/load-extract-al16-al8.h.inc" 13e4751d34SPhilippe Mathieu-Daudé#include "host/store-insert-al16.h.inc" 14af844a11SRichard Henderson 15cdfac37bSRichard Henderson#ifdef CONFIG_ATOMIC64 16cdfac37bSRichard Henderson# define HAVE_al8 true 17cdfac37bSRichard Henderson#else 18cdfac37bSRichard Henderson# define HAVE_al8 false 19cdfac37bSRichard Henderson#endif 20cdfac37bSRichard Henderson#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8) 21cdfac37bSRichard Henderson 22cdfac37bSRichard Henderson/** 23cdfac37bSRichard Henderson * required_atomicity: 24cdfac37bSRichard Henderson * 25cdfac37bSRichard Henderson * Return the lg2 bytes of atomicity required by @memop for @p. 26cdfac37bSRichard Henderson * If the operation must be split into two operations to be 27cdfac37bSRichard Henderson * examined separately for atomicity, return -lg2. 28cdfac37bSRichard Henderson */ 2973fda56fSAnton Johanssonstatic int required_atomicity(CPUState *cpu, uintptr_t p, MemOp memop) 30cdfac37bSRichard Henderson{ 31cdfac37bSRichard Henderson MemOp atom = memop & MO_ATOM_MASK; 32cdfac37bSRichard Henderson MemOp size = memop & MO_SIZE; 33cdfac37bSRichard Henderson MemOp half = size ? size - 1 : 0; 34cdfac37bSRichard Henderson unsigned tmp; 35cdfac37bSRichard Henderson int atmax; 36cdfac37bSRichard Henderson 37cdfac37bSRichard Henderson switch (atom) { 38cdfac37bSRichard Henderson case MO_ATOM_NONE: 39cdfac37bSRichard Henderson atmax = MO_8; 40cdfac37bSRichard Henderson break; 41cdfac37bSRichard Henderson 42cdfac37bSRichard Henderson case MO_ATOM_IFALIGN_PAIR: 43cdfac37bSRichard Henderson size = half; 44cdfac37bSRichard Henderson /* fall through */ 45cdfac37bSRichard Henderson 46cdfac37bSRichard Henderson case MO_ATOM_IFALIGN: 47cdfac37bSRichard Henderson tmp = (1 << size) - 1; 48cdfac37bSRichard Henderson atmax = p & tmp ? MO_8 : size; 49cdfac37bSRichard Henderson break; 50cdfac37bSRichard Henderson 51cdfac37bSRichard Henderson case MO_ATOM_WITHIN16: 52cdfac37bSRichard Henderson tmp = p & 15; 53cdfac37bSRichard Henderson atmax = (tmp + (1 << size) <= 16 ? size : MO_8); 54cdfac37bSRichard Henderson break; 55cdfac37bSRichard Henderson 56cdfac37bSRichard Henderson case MO_ATOM_WITHIN16_PAIR: 57cdfac37bSRichard Henderson tmp = p & 15; 58cdfac37bSRichard Henderson if (tmp + (1 << size) <= 16) { 59cdfac37bSRichard Henderson atmax = size; 60cdfac37bSRichard Henderson } else if (tmp + (1 << half) == 16) { 61cdfac37bSRichard Henderson /* 62cdfac37bSRichard Henderson * The pair exactly straddles the boundary. 63cdfac37bSRichard Henderson * Both halves are naturally aligned and atomic. 64cdfac37bSRichard Henderson */ 65cdfac37bSRichard Henderson atmax = half; 66cdfac37bSRichard Henderson } else { 67cdfac37bSRichard Henderson /* 68cdfac37bSRichard Henderson * One of the pair crosses the boundary, and is non-atomic. 69cdfac37bSRichard Henderson * The other of the pair does not cross, and is atomic. 70cdfac37bSRichard Henderson */ 71cdfac37bSRichard Henderson atmax = -half; 72cdfac37bSRichard Henderson } 73cdfac37bSRichard Henderson break; 74cdfac37bSRichard Henderson 75cdfac37bSRichard Henderson case MO_ATOM_SUBALIGN: 76cdfac37bSRichard Henderson /* 77cdfac37bSRichard Henderson * Examine the alignment of p to determine if there are subobjects 78cdfac37bSRichard Henderson * that must be aligned. Note that we only really need ctz4() -- 79431eddb6SManos Pitsidianakis * any more significant bits are discarded by the immediately 80cdfac37bSRichard Henderson * following comparison. 81cdfac37bSRichard Henderson */ 82cdfac37bSRichard Henderson tmp = ctz32(p); 83cdfac37bSRichard Henderson atmax = MIN(size, tmp); 84cdfac37bSRichard Henderson break; 85cdfac37bSRichard Henderson 86cdfac37bSRichard Henderson default: 87cdfac37bSRichard Henderson g_assert_not_reached(); 88cdfac37bSRichard Henderson } 89cdfac37bSRichard Henderson 90cdfac37bSRichard Henderson /* 91cdfac37bSRichard Henderson * Here we have the architectural atomicity of the operation. 92cdfac37bSRichard Henderson * However, when executing in a serial context, we need no extra 93cdfac37bSRichard Henderson * host atomicity in order to avoid racing. This reduction 94cdfac37bSRichard Henderson * avoids looping with cpu_loop_exit_atomic. 95cdfac37bSRichard Henderson */ 9673fda56fSAnton Johansson if (cpu_in_serial_context(cpu)) { 97cdfac37bSRichard Henderson return MO_8; 98cdfac37bSRichard Henderson } 99cdfac37bSRichard Henderson return atmax; 100cdfac37bSRichard Henderson} 101cdfac37bSRichard Henderson 102cdfac37bSRichard Henderson/** 103cdfac37bSRichard Henderson * load_atomic2: 104cdfac37bSRichard Henderson * @pv: host address 105cdfac37bSRichard Henderson * 106cdfac37bSRichard Henderson * Atomically load 2 aligned bytes from @pv. 107cdfac37bSRichard Henderson */ 108cdfac37bSRichard Hendersonstatic inline uint16_t load_atomic2(void *pv) 109cdfac37bSRichard Henderson{ 110cdfac37bSRichard Henderson uint16_t *p = __builtin_assume_aligned(pv, 2); 111cdfac37bSRichard Henderson return qatomic_read(p); 112cdfac37bSRichard Henderson} 113cdfac37bSRichard Henderson 114cdfac37bSRichard Henderson/** 115cdfac37bSRichard Henderson * load_atomic4: 116cdfac37bSRichard Henderson * @pv: host address 117cdfac37bSRichard Henderson * 118cdfac37bSRichard Henderson * Atomically load 4 aligned bytes from @pv. 119cdfac37bSRichard Henderson */ 120cdfac37bSRichard Hendersonstatic inline uint32_t load_atomic4(void *pv) 121cdfac37bSRichard Henderson{ 122cdfac37bSRichard Henderson uint32_t *p = __builtin_assume_aligned(pv, 4); 123cdfac37bSRichard Henderson return qatomic_read(p); 124cdfac37bSRichard Henderson} 125cdfac37bSRichard Henderson 126cdfac37bSRichard Henderson/** 127cdfac37bSRichard Henderson * load_atomic8: 128cdfac37bSRichard Henderson * @pv: host address 129cdfac37bSRichard Henderson * 130cdfac37bSRichard Henderson * Atomically load 8 aligned bytes from @pv. 131cdfac37bSRichard Henderson */ 132cdfac37bSRichard Hendersonstatic inline uint64_t load_atomic8(void *pv) 133cdfac37bSRichard Henderson{ 134cdfac37bSRichard Henderson uint64_t *p = __builtin_assume_aligned(pv, 8); 135cdfac37bSRichard Henderson 136cdfac37bSRichard Henderson qemu_build_assert(HAVE_al8); 137cdfac37bSRichard Henderson return qatomic_read__nocheck(p); 138cdfac37bSRichard Henderson} 139cdfac37bSRichard Henderson 140cdfac37bSRichard Henderson/** 141cdfac37bSRichard Henderson * load_atomic8_or_exit: 14273fda56fSAnton Johansson * @cpu: generic cpu state 143cdfac37bSRichard Henderson * @ra: host unwind address 144cdfac37bSRichard Henderson * @pv: host address 145cdfac37bSRichard Henderson * 146cdfac37bSRichard Henderson * Atomically load 8 aligned bytes from @pv. 147cdfac37bSRichard Henderson * If this is not possible, longjmp out to restart serially. 148cdfac37bSRichard Henderson */ 14973fda56fSAnton Johanssonstatic uint64_t load_atomic8_or_exit(CPUState *cpu, uintptr_t ra, void *pv) 150cdfac37bSRichard Henderson{ 151cdfac37bSRichard Henderson if (HAVE_al8) { 152cdfac37bSRichard Henderson return load_atomic8(pv); 153cdfac37bSRichard Henderson } 154cdfac37bSRichard Henderson 155cdfac37bSRichard Henderson#ifdef CONFIG_USER_ONLY 156cdfac37bSRichard Henderson /* 157cdfac37bSRichard Henderson * If the page is not writable, then assume the value is immutable 158cdfac37bSRichard Henderson * and requires no locking. This ignores the case of MAP_SHARED with 159cdfac37bSRichard Henderson * another process, because the fallback start_exclusive solution 160cdfac37bSRichard Henderson * provides no protection across processes. 161cdfac37bSRichard Henderson */ 1622c8412d4SRichard Henderson WITH_MMAP_LOCK_GUARD() { 163f1ce0b80SRichard Henderson if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) { 164cdfac37bSRichard Henderson uint64_t *p = __builtin_assume_aligned(pv, 8); 165cdfac37bSRichard Henderson return *p; 166cdfac37bSRichard Henderson } 1672c8412d4SRichard Henderson } 168cdfac37bSRichard Henderson#endif 169cdfac37bSRichard Henderson 170cdfac37bSRichard Henderson /* Ultimate fallback: re-execute in serial context. */ 171*b24bad34SAlex Bennée trace_load_atom8_or_exit_fallback(ra); 17273fda56fSAnton Johansson cpu_loop_exit_atomic(cpu, ra); 173cdfac37bSRichard Henderson} 174cdfac37bSRichard Henderson 175cdfac37bSRichard Henderson/** 176cdfac37bSRichard Henderson * load_atomic16_or_exit: 17773fda56fSAnton Johansson * @cpu: generic cpu state 178cdfac37bSRichard Henderson * @ra: host unwind address 179cdfac37bSRichard Henderson * @pv: host address 180cdfac37bSRichard Henderson * 181cdfac37bSRichard Henderson * Atomically load 16 aligned bytes from @pv. 182cdfac37bSRichard Henderson * If this is not possible, longjmp out to restart serially. 183cdfac37bSRichard Henderson */ 18473fda56fSAnton Johanssonstatic Int128 load_atomic16_or_exit(CPUState *cpu, uintptr_t ra, void *pv) 185cdfac37bSRichard Henderson{ 186cdfac37bSRichard Henderson Int128 *p = __builtin_assume_aligned(pv, 16); 187cdfac37bSRichard Henderson 1888dc24ff4SRichard Henderson if (HAVE_ATOMIC128_RO) { 1898dc24ff4SRichard Henderson return atomic16_read_ro(p); 190cdfac37bSRichard Henderson } 191cdfac37bSRichard Henderson 192cdfac37bSRichard Henderson /* 193cdfac37bSRichard Henderson * We can only use cmpxchg to emulate a load if the page is writable. 194cdfac37bSRichard Henderson * If the page is not writable, then assume the value is immutable 195cdfac37bSRichard Henderson * and requires no locking. This ignores the case of MAP_SHARED with 196cdfac37bSRichard Henderson * another process, because the fallback start_exclusive solution 197cdfac37bSRichard Henderson * provides no protection across processes. 1982c8412d4SRichard Henderson * 1992c8412d4SRichard Henderson * In system mode all guest pages are writable. For user mode, 2002c8412d4SRichard Henderson * we must take mmap_lock so that the query remains valid until 2012c8412d4SRichard Henderson * the write is complete -- tests/tcg/multiarch/munmap-pthread.c 2022c8412d4SRichard Henderson * is an example that can race. 203cdfac37bSRichard Henderson */ 2042c8412d4SRichard Henderson WITH_MMAP_LOCK_GUARD() { 2052c8412d4SRichard Henderson#ifdef CONFIG_USER_ONLY 206f1ce0b80SRichard Henderson if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) { 207cdfac37bSRichard Henderson return *p; 208cdfac37bSRichard Henderson } 209cdfac37bSRichard Henderson#endif 2108dc24ff4SRichard Henderson if (HAVE_ATOMIC128_RW) { 2118dc24ff4SRichard Henderson return atomic16_read_rw(p); 212cdfac37bSRichard Henderson } 2132c8412d4SRichard Henderson } 214cdfac37bSRichard Henderson 215cdfac37bSRichard Henderson /* Ultimate fallback: re-execute in serial context. */ 216*b24bad34SAlex Bennée trace_load_atom16_or_exit_fallback(ra); 21773fda56fSAnton Johansson cpu_loop_exit_atomic(cpu, ra); 218cdfac37bSRichard Henderson} 219cdfac37bSRichard Henderson 220cdfac37bSRichard Henderson/** 221cdfac37bSRichard Henderson * load_atom_extract_al4x2: 222cdfac37bSRichard Henderson * @pv: host address 223cdfac37bSRichard Henderson * 224cdfac37bSRichard Henderson * Load 4 bytes from @p, from two sequential atomic 4-byte loads. 225cdfac37bSRichard Henderson */ 226cdfac37bSRichard Hendersonstatic uint32_t load_atom_extract_al4x2(void *pv) 227cdfac37bSRichard Henderson{ 228cdfac37bSRichard Henderson uintptr_t pi = (uintptr_t)pv; 229cdfac37bSRichard Henderson int sh = (pi & 3) * 8; 230cdfac37bSRichard Henderson uint32_t a, b; 231cdfac37bSRichard Henderson 232cdfac37bSRichard Henderson pv = (void *)(pi & ~3); 233cdfac37bSRichard Henderson a = load_atomic4(pv); 234cdfac37bSRichard Henderson b = load_atomic4(pv + 4); 235cdfac37bSRichard Henderson 236cdfac37bSRichard Henderson if (HOST_BIG_ENDIAN) { 237cdfac37bSRichard Henderson return (a << sh) | (b >> (-sh & 31)); 238cdfac37bSRichard Henderson } else { 239cdfac37bSRichard Henderson return (a >> sh) | (b << (-sh & 31)); 240cdfac37bSRichard Henderson } 241cdfac37bSRichard Henderson} 242cdfac37bSRichard Henderson 243cdfac37bSRichard Henderson/** 244cdfac37bSRichard Henderson * load_atom_extract_al8x2: 245cdfac37bSRichard Henderson * @pv: host address 246cdfac37bSRichard Henderson * 247cdfac37bSRichard Henderson * Load 8 bytes from @p, from two sequential atomic 8-byte loads. 248cdfac37bSRichard Henderson */ 249cdfac37bSRichard Hendersonstatic uint64_t load_atom_extract_al8x2(void *pv) 250cdfac37bSRichard Henderson{ 251cdfac37bSRichard Henderson uintptr_t pi = (uintptr_t)pv; 252cdfac37bSRichard Henderson int sh = (pi & 7) * 8; 253cdfac37bSRichard Henderson uint64_t a, b; 254cdfac37bSRichard Henderson 255cdfac37bSRichard Henderson pv = (void *)(pi & ~7); 256cdfac37bSRichard Henderson a = load_atomic8(pv); 257cdfac37bSRichard Henderson b = load_atomic8(pv + 8); 258cdfac37bSRichard Henderson 259cdfac37bSRichard Henderson if (HOST_BIG_ENDIAN) { 260cdfac37bSRichard Henderson return (a << sh) | (b >> (-sh & 63)); 261cdfac37bSRichard Henderson } else { 262cdfac37bSRichard Henderson return (a >> sh) | (b << (-sh & 63)); 263cdfac37bSRichard Henderson } 264cdfac37bSRichard Henderson} 265cdfac37bSRichard Henderson 266cdfac37bSRichard Henderson/** 267cdfac37bSRichard Henderson * load_atom_extract_al8_or_exit: 26873fda56fSAnton Johansson * @cpu: generic cpu state 269cdfac37bSRichard Henderson * @ra: host unwind address 270cdfac37bSRichard Henderson * @pv: host address 271cdfac37bSRichard Henderson * @s: object size in bytes, @s <= 4. 272cdfac37bSRichard Henderson * 273cdfac37bSRichard Henderson * Atomically load @s bytes from @p, when p % s != 0, and [p, p+s-1] does 274cdfac37bSRichard Henderson * not cross an 8-byte boundary. This means that we can perform an atomic 275cdfac37bSRichard Henderson * 8-byte load and extract. 276cdfac37bSRichard Henderson * The value is returned in the low bits of a uint32_t. 277cdfac37bSRichard Henderson */ 27873fda56fSAnton Johanssonstatic uint32_t load_atom_extract_al8_or_exit(CPUState *cpu, uintptr_t ra, 279cdfac37bSRichard Henderson void *pv, int s) 280cdfac37bSRichard Henderson{ 281cdfac37bSRichard Henderson uintptr_t pi = (uintptr_t)pv; 282cdfac37bSRichard Henderson int o = pi & 7; 283cdfac37bSRichard Henderson int shr = (HOST_BIG_ENDIAN ? 8 - s - o : o) * 8; 284cdfac37bSRichard Henderson 285cdfac37bSRichard Henderson pv = (void *)(pi & ~7); 28673fda56fSAnton Johansson return load_atomic8_or_exit(cpu, ra, pv) >> shr; 287cdfac37bSRichard Henderson} 288cdfac37bSRichard Henderson 289cdfac37bSRichard Henderson/** 290cdfac37bSRichard Henderson * load_atom_extract_al16_or_exit: 29173fda56fSAnton Johansson * @cpu: generic cpu state 292cdfac37bSRichard Henderson * @ra: host unwind address 293cdfac37bSRichard Henderson * @p: host address 294cdfac37bSRichard Henderson * @s: object size in bytes, @s <= 8. 295cdfac37bSRichard Henderson * 296cdfac37bSRichard Henderson * Atomically load @s bytes from @p, when p % 16 < 8 297cdfac37bSRichard Henderson * and p % 16 + s > 8. I.e. does not cross a 16-byte 298cdfac37bSRichard Henderson * boundary, but *does* cross an 8-byte boundary. 299cdfac37bSRichard Henderson * This is the slow version, so we must have eliminated 300cdfac37bSRichard Henderson * any faster load_atom_extract_al8_or_exit case. 301cdfac37bSRichard Henderson * 302cdfac37bSRichard Henderson * If this is not possible, longjmp out to restart serially. 303cdfac37bSRichard Henderson */ 30473fda56fSAnton Johanssonstatic uint64_t load_atom_extract_al16_or_exit(CPUState *cpu, uintptr_t ra, 305cdfac37bSRichard Henderson void *pv, int s) 306cdfac37bSRichard Henderson{ 307cdfac37bSRichard Henderson uintptr_t pi = (uintptr_t)pv; 308cdfac37bSRichard Henderson int o = pi & 7; 309cdfac37bSRichard Henderson int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8; 310cdfac37bSRichard Henderson Int128 r; 311cdfac37bSRichard Henderson 312cdfac37bSRichard Henderson /* 313cdfac37bSRichard Henderson * Note constraints above: p & 8 must be clear. 314cdfac37bSRichard Henderson * Provoke SIGBUS if possible otherwise. 315cdfac37bSRichard Henderson */ 316cdfac37bSRichard Henderson pv = (void *)(pi & ~7); 31773fda56fSAnton Johansson r = load_atomic16_or_exit(cpu, ra, pv); 318cdfac37bSRichard Henderson 319cdfac37bSRichard Henderson r = int128_urshift(r, shr); 320cdfac37bSRichard Henderson return int128_getlo(r); 321cdfac37bSRichard Henderson} 322cdfac37bSRichard Henderson 323cdfac37bSRichard Henderson/** 324cdfac37bSRichard Henderson * load_atom_4_by_2: 325cdfac37bSRichard Henderson * @pv: host address 326cdfac37bSRichard Henderson * 327cdfac37bSRichard Henderson * Load 4 bytes from @pv, with two 2-byte atomic loads. 328cdfac37bSRichard Henderson */ 329cdfac37bSRichard Hendersonstatic inline uint32_t load_atom_4_by_2(void *pv) 330cdfac37bSRichard Henderson{ 331cdfac37bSRichard Henderson uint32_t a = load_atomic2(pv); 332cdfac37bSRichard Henderson uint32_t b = load_atomic2(pv + 2); 333cdfac37bSRichard Henderson 334cdfac37bSRichard Henderson if (HOST_BIG_ENDIAN) { 335cdfac37bSRichard Henderson return (a << 16) | b; 336cdfac37bSRichard Henderson } else { 337cdfac37bSRichard Henderson return (b << 16) | a; 338cdfac37bSRichard Henderson } 339cdfac37bSRichard Henderson} 340cdfac37bSRichard Henderson 341cdfac37bSRichard Henderson/** 342cdfac37bSRichard Henderson * load_atom_8_by_2: 343cdfac37bSRichard Henderson * @pv: host address 344cdfac37bSRichard Henderson * 345cdfac37bSRichard Henderson * Load 8 bytes from @pv, with four 2-byte atomic loads. 346cdfac37bSRichard Henderson */ 347cdfac37bSRichard Hendersonstatic inline uint64_t load_atom_8_by_2(void *pv) 348cdfac37bSRichard Henderson{ 349cdfac37bSRichard Henderson uint32_t a = load_atom_4_by_2(pv); 350cdfac37bSRichard Henderson uint32_t b = load_atom_4_by_2(pv + 4); 351cdfac37bSRichard Henderson 352cdfac37bSRichard Henderson if (HOST_BIG_ENDIAN) { 353cdfac37bSRichard Henderson return ((uint64_t)a << 32) | b; 354cdfac37bSRichard Henderson } else { 355cdfac37bSRichard Henderson return ((uint64_t)b << 32) | a; 356cdfac37bSRichard Henderson } 357cdfac37bSRichard Henderson} 358cdfac37bSRichard Henderson 359cdfac37bSRichard Henderson/** 360cdfac37bSRichard Henderson * load_atom_8_by_4: 361cdfac37bSRichard Henderson * @pv: host address 362cdfac37bSRichard Henderson * 363cdfac37bSRichard Henderson * Load 8 bytes from @pv, with two 4-byte atomic loads. 364cdfac37bSRichard Henderson */ 365cdfac37bSRichard Hendersonstatic inline uint64_t load_atom_8_by_4(void *pv) 366cdfac37bSRichard Henderson{ 367cdfac37bSRichard Henderson uint32_t a = load_atomic4(pv); 368cdfac37bSRichard Henderson uint32_t b = load_atomic4(pv + 4); 369cdfac37bSRichard Henderson 370cdfac37bSRichard Henderson if (HOST_BIG_ENDIAN) { 371cdfac37bSRichard Henderson return ((uint64_t)a << 32) | b; 372cdfac37bSRichard Henderson } else { 373cdfac37bSRichard Henderson return ((uint64_t)b << 32) | a; 374cdfac37bSRichard Henderson } 375cdfac37bSRichard Henderson} 376cdfac37bSRichard Henderson 377cdfac37bSRichard Henderson/** 37835c653c4SRichard Henderson * load_atom_8_by_8_or_4: 37935c653c4SRichard Henderson * @pv: host address 38035c653c4SRichard Henderson * 38135c653c4SRichard Henderson * Load 8 bytes from aligned @pv, with at least 4-byte atomicity. 38235c653c4SRichard Henderson */ 38335c653c4SRichard Hendersonstatic inline uint64_t load_atom_8_by_8_or_4(void *pv) 38435c653c4SRichard Henderson{ 38535c653c4SRichard Henderson if (HAVE_al8_fast) { 38635c653c4SRichard Henderson return load_atomic8(pv); 38735c653c4SRichard Henderson } else { 38835c653c4SRichard Henderson return load_atom_8_by_4(pv); 38935c653c4SRichard Henderson } 39035c653c4SRichard Henderson} 39135c653c4SRichard Henderson 39235c653c4SRichard Henderson/** 393cdfac37bSRichard Henderson * load_atom_2: 394cdfac37bSRichard Henderson * @p: host address 395cdfac37bSRichard Henderson * @memop: the full memory op 396cdfac37bSRichard Henderson * 397cdfac37bSRichard Henderson * Load 2 bytes from @p, honoring the atomicity of @memop. 398cdfac37bSRichard Henderson */ 39973fda56fSAnton Johanssonstatic uint16_t load_atom_2(CPUState *cpu, uintptr_t ra, 400cdfac37bSRichard Henderson void *pv, MemOp memop) 401cdfac37bSRichard Henderson{ 402cdfac37bSRichard Henderson uintptr_t pi = (uintptr_t)pv; 403cdfac37bSRichard Henderson int atmax; 404cdfac37bSRichard Henderson 405cdfac37bSRichard Henderson if (likely((pi & 1) == 0)) { 406cdfac37bSRichard Henderson return load_atomic2(pv); 407cdfac37bSRichard Henderson } 4088dc24ff4SRichard Henderson if (HAVE_ATOMIC128_RO) { 4096a2c23ddSRichard Henderson intptr_t left_in_page = -(pi | TARGET_PAGE_MASK); 4106a2c23ddSRichard Henderson if (likely(left_in_page > 8)) { 411cdfac37bSRichard Henderson return load_atom_extract_al16_or_al8(pv, 2); 412cdfac37bSRichard Henderson } 4136a2c23ddSRichard Henderson } 414cdfac37bSRichard Henderson 41573fda56fSAnton Johansson atmax = required_atomicity(cpu, pi, memop); 416cdfac37bSRichard Henderson switch (atmax) { 417cdfac37bSRichard Henderson case MO_8: 418cdfac37bSRichard Henderson return lduw_he_p(pv); 419cdfac37bSRichard Henderson case MO_16: 420cdfac37bSRichard Henderson /* The only case remaining is MO_ATOM_WITHIN16. */ 421cdfac37bSRichard Henderson if (!HAVE_al8_fast && (pi & 3) == 1) { 422cdfac37bSRichard Henderson /* Big or little endian, we want the middle two bytes. */ 423cdfac37bSRichard Henderson return load_atomic4(pv - 1) >> 8; 424cdfac37bSRichard Henderson } 425cdfac37bSRichard Henderson if ((pi & 15) != 7) { 42673fda56fSAnton Johansson return load_atom_extract_al8_or_exit(cpu, ra, pv, 2); 427cdfac37bSRichard Henderson } 42873fda56fSAnton Johansson return load_atom_extract_al16_or_exit(cpu, ra, pv, 2); 429cdfac37bSRichard Henderson default: 430cdfac37bSRichard Henderson g_assert_not_reached(); 431cdfac37bSRichard Henderson } 432cdfac37bSRichard Henderson} 433cdfac37bSRichard Henderson 434cdfac37bSRichard Henderson/** 435cdfac37bSRichard Henderson * load_atom_4: 436cdfac37bSRichard Henderson * @p: host address 437cdfac37bSRichard Henderson * @memop: the full memory op 438cdfac37bSRichard Henderson * 439cdfac37bSRichard Henderson * Load 4 bytes from @p, honoring the atomicity of @memop. 440cdfac37bSRichard Henderson */ 44173fda56fSAnton Johanssonstatic uint32_t load_atom_4(CPUState *cpu, uintptr_t ra, 442cdfac37bSRichard Henderson void *pv, MemOp memop) 443cdfac37bSRichard Henderson{ 444cdfac37bSRichard Henderson uintptr_t pi = (uintptr_t)pv; 445cdfac37bSRichard Henderson int atmax; 446cdfac37bSRichard Henderson 447cdfac37bSRichard Henderson if (likely((pi & 3) == 0)) { 448cdfac37bSRichard Henderson return load_atomic4(pv); 449cdfac37bSRichard Henderson } 4508dc24ff4SRichard Henderson if (HAVE_ATOMIC128_RO) { 4516a2c23ddSRichard Henderson intptr_t left_in_page = -(pi | TARGET_PAGE_MASK); 4526a2c23ddSRichard Henderson if (likely(left_in_page > 8)) { 453cdfac37bSRichard Henderson return load_atom_extract_al16_or_al8(pv, 4); 454cdfac37bSRichard Henderson } 4556a2c23ddSRichard Henderson } 456cdfac37bSRichard Henderson 45773fda56fSAnton Johansson atmax = required_atomicity(cpu, pi, memop); 458cdfac37bSRichard Henderson switch (atmax) { 459cdfac37bSRichard Henderson case MO_8: 460cdfac37bSRichard Henderson case MO_16: 461cdfac37bSRichard Henderson case -MO_16: 462cdfac37bSRichard Henderson /* 463cdfac37bSRichard Henderson * For MO_ATOM_IFALIGN, this is more atomicity than required, 464cdfac37bSRichard Henderson * but it's trivially supported on all hosts, better than 4 465cdfac37bSRichard Henderson * individual byte loads (when the host requires alignment), 466cdfac37bSRichard Henderson * and overlaps with the MO_ATOM_SUBALIGN case of p % 2 == 0. 467cdfac37bSRichard Henderson */ 468cdfac37bSRichard Henderson return load_atom_extract_al4x2(pv); 469cdfac37bSRichard Henderson case MO_32: 470cdfac37bSRichard Henderson if (!(pi & 4)) { 47173fda56fSAnton Johansson return load_atom_extract_al8_or_exit(cpu, ra, pv, 4); 472cdfac37bSRichard Henderson } 47373fda56fSAnton Johansson return load_atom_extract_al16_or_exit(cpu, ra, pv, 4); 474cdfac37bSRichard Henderson default: 475cdfac37bSRichard Henderson g_assert_not_reached(); 476cdfac37bSRichard Henderson } 477cdfac37bSRichard Henderson} 478cdfac37bSRichard Henderson 479cdfac37bSRichard Henderson/** 480cdfac37bSRichard Henderson * load_atom_8: 481cdfac37bSRichard Henderson * @p: host address 482cdfac37bSRichard Henderson * @memop: the full memory op 483cdfac37bSRichard Henderson * 484cdfac37bSRichard Henderson * Load 8 bytes from @p, honoring the atomicity of @memop. 485cdfac37bSRichard Henderson */ 48673fda56fSAnton Johanssonstatic uint64_t load_atom_8(CPUState *cpu, uintptr_t ra, 487cdfac37bSRichard Henderson void *pv, MemOp memop) 488cdfac37bSRichard Henderson{ 489cdfac37bSRichard Henderson uintptr_t pi = (uintptr_t)pv; 490cdfac37bSRichard Henderson int atmax; 491cdfac37bSRichard Henderson 492cdfac37bSRichard Henderson /* 493cdfac37bSRichard Henderson * If the host does not support 8-byte atomics, wait until we have 494cdfac37bSRichard Henderson * examined the atomicity parameters below. 495cdfac37bSRichard Henderson */ 496cdfac37bSRichard Henderson if (HAVE_al8 && likely((pi & 7) == 0)) { 497cdfac37bSRichard Henderson return load_atomic8(pv); 498cdfac37bSRichard Henderson } 4998dc24ff4SRichard Henderson if (HAVE_ATOMIC128_RO) { 500cdfac37bSRichard Henderson return load_atom_extract_al16_or_al8(pv, 8); 501cdfac37bSRichard Henderson } 502cdfac37bSRichard Henderson 50373fda56fSAnton Johansson atmax = required_atomicity(cpu, pi, memop); 504cdfac37bSRichard Henderson if (atmax == MO_64) { 505cdfac37bSRichard Henderson if (!HAVE_al8 && (pi & 7) == 0) { 50673fda56fSAnton Johansson load_atomic8_or_exit(cpu, ra, pv); 507cdfac37bSRichard Henderson } 50873fda56fSAnton Johansson return load_atom_extract_al16_or_exit(cpu, ra, pv, 8); 509cdfac37bSRichard Henderson } 510cdfac37bSRichard Henderson if (HAVE_al8_fast) { 511cdfac37bSRichard Henderson return load_atom_extract_al8x2(pv); 512cdfac37bSRichard Henderson } 513cdfac37bSRichard Henderson switch (atmax) { 514cdfac37bSRichard Henderson case MO_8: 515cdfac37bSRichard Henderson return ldq_he_p(pv); 516cdfac37bSRichard Henderson case MO_16: 517cdfac37bSRichard Henderson return load_atom_8_by_2(pv); 518cdfac37bSRichard Henderson case MO_32: 519cdfac37bSRichard Henderson return load_atom_8_by_4(pv); 520cdfac37bSRichard Henderson case -MO_32: 521cdfac37bSRichard Henderson if (HAVE_al8) { 522cdfac37bSRichard Henderson return load_atom_extract_al8x2(pv); 523cdfac37bSRichard Henderson } 524*b24bad34SAlex Bennée trace_load_atom8_fallback(memop, ra); 52573fda56fSAnton Johansson cpu_loop_exit_atomic(cpu, ra); 526cdfac37bSRichard Henderson default: 527cdfac37bSRichard Henderson g_assert_not_reached(); 528cdfac37bSRichard Henderson } 529cdfac37bSRichard Henderson} 5305b36f268SRichard Henderson 5315b36f268SRichard Henderson/** 53235c653c4SRichard Henderson * load_atom_16: 53335c653c4SRichard Henderson * @p: host address 53435c653c4SRichard Henderson * @memop: the full memory op 53535c653c4SRichard Henderson * 53635c653c4SRichard Henderson * Load 16 bytes from @p, honoring the atomicity of @memop. 53735c653c4SRichard Henderson */ 53873fda56fSAnton Johanssonstatic Int128 load_atom_16(CPUState *cpu, uintptr_t ra, 53935c653c4SRichard Henderson void *pv, MemOp memop) 54035c653c4SRichard Henderson{ 54135c653c4SRichard Henderson uintptr_t pi = (uintptr_t)pv; 54235c653c4SRichard Henderson int atmax; 54335c653c4SRichard Henderson Int128 r; 54435c653c4SRichard Henderson uint64_t a, b; 54535c653c4SRichard Henderson 54635c653c4SRichard Henderson /* 54735c653c4SRichard Henderson * If the host does not support 16-byte atomics, wait until we have 54835c653c4SRichard Henderson * examined the atomicity parameters below. 54935c653c4SRichard Henderson */ 5508dc24ff4SRichard Henderson if (HAVE_ATOMIC128_RO && likely((pi & 15) == 0)) { 5518dc24ff4SRichard Henderson return atomic16_read_ro(pv); 55235c653c4SRichard Henderson } 55335c653c4SRichard Henderson 55473fda56fSAnton Johansson atmax = required_atomicity(cpu, pi, memop); 55535c653c4SRichard Henderson switch (atmax) { 55635c653c4SRichard Henderson case MO_8: 55735c653c4SRichard Henderson memcpy(&r, pv, 16); 55835c653c4SRichard Henderson return r; 55935c653c4SRichard Henderson case MO_16: 56035c653c4SRichard Henderson a = load_atom_8_by_2(pv); 56135c653c4SRichard Henderson b = load_atom_8_by_2(pv + 8); 56235c653c4SRichard Henderson break; 56335c653c4SRichard Henderson case MO_32: 56435c653c4SRichard Henderson a = load_atom_8_by_4(pv); 56535c653c4SRichard Henderson b = load_atom_8_by_4(pv + 8); 56635c653c4SRichard Henderson break; 56735c653c4SRichard Henderson case MO_64: 56835c653c4SRichard Henderson if (!HAVE_al8) { 569*b24bad34SAlex Bennée trace_load_atom16_fallback(memop, ra); 57073fda56fSAnton Johansson cpu_loop_exit_atomic(cpu, ra); 57135c653c4SRichard Henderson } 57235c653c4SRichard Henderson a = load_atomic8(pv); 57335c653c4SRichard Henderson b = load_atomic8(pv + 8); 57435c653c4SRichard Henderson break; 57535c653c4SRichard Henderson case -MO_64: 57635c653c4SRichard Henderson if (!HAVE_al8) { 577*b24bad34SAlex Bennée trace_load_atom16_fallback(memop, ra); 57873fda56fSAnton Johansson cpu_loop_exit_atomic(cpu, ra); 57935c653c4SRichard Henderson } 58035c653c4SRichard Henderson a = load_atom_extract_al8x2(pv); 58135c653c4SRichard Henderson b = load_atom_extract_al8x2(pv + 8); 58235c653c4SRichard Henderson break; 58335c653c4SRichard Henderson case MO_128: 58473fda56fSAnton Johansson return load_atomic16_or_exit(cpu, ra, pv); 58535c653c4SRichard Henderson default: 58635c653c4SRichard Henderson g_assert_not_reached(); 58735c653c4SRichard Henderson } 58835c653c4SRichard Henderson return int128_make128(HOST_BIG_ENDIAN ? b : a, HOST_BIG_ENDIAN ? a : b); 58935c653c4SRichard Henderson} 59035c653c4SRichard Henderson 59135c653c4SRichard Henderson/** 5925b36f268SRichard Henderson * store_atomic2: 5935b36f268SRichard Henderson * @pv: host address 5945b36f268SRichard Henderson * @val: value to store 5955b36f268SRichard Henderson * 5965b36f268SRichard Henderson * Atomically store 2 aligned bytes to @pv. 5975b36f268SRichard Henderson */ 5985b36f268SRichard Hendersonstatic inline void store_atomic2(void *pv, uint16_t val) 5995b36f268SRichard Henderson{ 6005b36f268SRichard Henderson uint16_t *p = __builtin_assume_aligned(pv, 2); 6015b36f268SRichard Henderson qatomic_set(p, val); 6025b36f268SRichard Henderson} 6035b36f268SRichard Henderson 6045b36f268SRichard Henderson/** 6055b36f268SRichard Henderson * store_atomic4: 6065b36f268SRichard Henderson * @pv: host address 6075b36f268SRichard Henderson * @val: value to store 6085b36f268SRichard Henderson * 6095b36f268SRichard Henderson * Atomically store 4 aligned bytes to @pv. 6105b36f268SRichard Henderson */ 6115b36f268SRichard Hendersonstatic inline void store_atomic4(void *pv, uint32_t val) 6125b36f268SRichard Henderson{ 6135b36f268SRichard Henderson uint32_t *p = __builtin_assume_aligned(pv, 4); 6145b36f268SRichard Henderson qatomic_set(p, val); 6155b36f268SRichard Henderson} 6165b36f268SRichard Henderson 6175b36f268SRichard Henderson/** 6185b36f268SRichard Henderson * store_atomic8: 6195b36f268SRichard Henderson * @pv: host address 6205b36f268SRichard Henderson * @val: value to store 6215b36f268SRichard Henderson * 6225b36f268SRichard Henderson * Atomically store 8 aligned bytes to @pv. 6235b36f268SRichard Henderson */ 6245b36f268SRichard Hendersonstatic inline void store_atomic8(void *pv, uint64_t val) 6255b36f268SRichard Henderson{ 6265b36f268SRichard Henderson uint64_t *p = __builtin_assume_aligned(pv, 8); 6275b36f268SRichard Henderson 6285b36f268SRichard Henderson qemu_build_assert(HAVE_al8); 6295b36f268SRichard Henderson qatomic_set__nocheck(p, val); 6305b36f268SRichard Henderson} 6315b36f268SRichard Henderson 6325b36f268SRichard Henderson/** 6335b36f268SRichard Henderson * store_atom_4x2 6345b36f268SRichard Henderson */ 6355b36f268SRichard Hendersonstatic inline void store_atom_4_by_2(void *pv, uint32_t val) 6365b36f268SRichard Henderson{ 6375b36f268SRichard Henderson store_atomic2(pv, val >> (HOST_BIG_ENDIAN ? 16 : 0)); 6385b36f268SRichard Henderson store_atomic2(pv + 2, val >> (HOST_BIG_ENDIAN ? 0 : 16)); 6395b36f268SRichard Henderson} 6405b36f268SRichard Henderson 6415b36f268SRichard Henderson/** 6425b36f268SRichard Henderson * store_atom_8_by_2 6435b36f268SRichard Henderson */ 6445b36f268SRichard Hendersonstatic inline void store_atom_8_by_2(void *pv, uint64_t val) 6455b36f268SRichard Henderson{ 6465b36f268SRichard Henderson store_atom_4_by_2(pv, val >> (HOST_BIG_ENDIAN ? 32 : 0)); 6475b36f268SRichard Henderson store_atom_4_by_2(pv + 4, val >> (HOST_BIG_ENDIAN ? 0 : 32)); 6485b36f268SRichard Henderson} 6495b36f268SRichard Henderson 6505b36f268SRichard Henderson/** 6515b36f268SRichard Henderson * store_atom_8_by_4 6525b36f268SRichard Henderson */ 6535b36f268SRichard Hendersonstatic inline void store_atom_8_by_4(void *pv, uint64_t val) 6545b36f268SRichard Henderson{ 6555b36f268SRichard Henderson store_atomic4(pv, val >> (HOST_BIG_ENDIAN ? 32 : 0)); 6565b36f268SRichard Henderson store_atomic4(pv + 4, val >> (HOST_BIG_ENDIAN ? 0 : 32)); 6575b36f268SRichard Henderson} 6585b36f268SRichard Henderson 6595b36f268SRichard Henderson/** 6605b36f268SRichard Henderson * store_atom_insert_al4: 6615b36f268SRichard Henderson * @p: host address 6625b36f268SRichard Henderson * @val: shifted value to store 6635b36f268SRichard Henderson * @msk: mask for value to store 6645b36f268SRichard Henderson * 6655b36f268SRichard Henderson * Atomically store @val to @p, masked by @msk. 6665b36f268SRichard Henderson */ 6675b36f268SRichard Hendersonstatic void store_atom_insert_al4(uint32_t *p, uint32_t val, uint32_t msk) 6685b36f268SRichard Henderson{ 6695b36f268SRichard Henderson uint32_t old, new; 6705b36f268SRichard Henderson 6715b36f268SRichard Henderson p = __builtin_assume_aligned(p, 4); 6725b36f268SRichard Henderson old = qatomic_read(p); 6735b36f268SRichard Henderson do { 6745b36f268SRichard Henderson new = (old & ~msk) | val; 6755b36f268SRichard Henderson } while (!__atomic_compare_exchange_n(p, &old, new, true, 6765b36f268SRichard Henderson __ATOMIC_RELAXED, __ATOMIC_RELAXED)); 6775b36f268SRichard Henderson} 6785b36f268SRichard Henderson 6795b36f268SRichard Henderson/** 6805b36f268SRichard Henderson * store_atom_insert_al8: 6815b36f268SRichard Henderson * @p: host address 6825b36f268SRichard Henderson * @val: shifted value to store 6835b36f268SRichard Henderson * @msk: mask for value to store 6845b36f268SRichard Henderson * 6855b36f268SRichard Henderson * Atomically store @val to @p masked by @msk. 6865b36f268SRichard Henderson */ 6875b36f268SRichard Hendersonstatic void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk) 6885b36f268SRichard Henderson{ 6895b36f268SRichard Henderson uint64_t old, new; 6905b36f268SRichard Henderson 6915b36f268SRichard Henderson qemu_build_assert(HAVE_al8); 6925b36f268SRichard Henderson p = __builtin_assume_aligned(p, 8); 6935b36f268SRichard Henderson old = qatomic_read__nocheck(p); 6945b36f268SRichard Henderson do { 6955b36f268SRichard Henderson new = (old & ~msk) | val; 6965b36f268SRichard Henderson } while (!__atomic_compare_exchange_n(p, &old, new, true, 6975b36f268SRichard Henderson __ATOMIC_RELAXED, __ATOMIC_RELAXED)); 6985b36f268SRichard Henderson} 6995b36f268SRichard Henderson 7005b36f268SRichard Henderson/** 7015b36f268SRichard Henderson * store_bytes_leN: 7025b36f268SRichard Henderson * @pv: host address 7035b36f268SRichard Henderson * @size: number of bytes to store 7045b36f268SRichard Henderson * @val_le: data to store 7055b36f268SRichard Henderson * 7065b36f268SRichard Henderson * Store @size bytes at @p. The bytes to store are extracted in little-endian order 7075b36f268SRichard Henderson * from @val_le; return the bytes of @val_le beyond @size that have not been stored. 7085b36f268SRichard Henderson */ 7095b36f268SRichard Hendersonstatic uint64_t store_bytes_leN(void *pv, int size, uint64_t val_le) 7105b36f268SRichard Henderson{ 7115b36f268SRichard Henderson uint8_t *p = pv; 7125b36f268SRichard Henderson for (int i = 0; i < size; i++, val_le >>= 8) { 7135b36f268SRichard Henderson p[i] = val_le; 7145b36f268SRichard Henderson } 7155b36f268SRichard Henderson return val_le; 7165b36f268SRichard Henderson} 7175b36f268SRichard Henderson 7185b36f268SRichard Henderson/** 7195b36f268SRichard Henderson * store_parts_leN 7205b36f268SRichard Henderson * @pv: host address 7215b36f268SRichard Henderson * @size: number of bytes to store 7225b36f268SRichard Henderson * @val_le: data to store 7235b36f268SRichard Henderson * 7245b36f268SRichard Henderson * As store_bytes_leN, but atomically on each aligned part. 7255b36f268SRichard Henderson */ 7265b36f268SRichard HendersonG_GNUC_UNUSED 7275b36f268SRichard Hendersonstatic uint64_t store_parts_leN(void *pv, int size, uint64_t val_le) 7285b36f268SRichard Henderson{ 7295b36f268SRichard Henderson do { 7305b36f268SRichard Henderson int n; 7315b36f268SRichard Henderson 7325b36f268SRichard Henderson /* Find minimum of alignment and size */ 7335b36f268SRichard Henderson switch (((uintptr_t)pv | size) & 7) { 7345b36f268SRichard Henderson case 4: 7355b36f268SRichard Henderson store_atomic4(pv, le32_to_cpu(val_le)); 7365b36f268SRichard Henderson val_le >>= 32; 7375b36f268SRichard Henderson n = 4; 7385b36f268SRichard Henderson break; 7395b36f268SRichard Henderson case 2: 7405b36f268SRichard Henderson case 6: 7415b36f268SRichard Henderson store_atomic2(pv, le16_to_cpu(val_le)); 7425b36f268SRichard Henderson val_le >>= 16; 7435b36f268SRichard Henderson n = 2; 7445b36f268SRichard Henderson break; 7455b36f268SRichard Henderson default: 7465b36f268SRichard Henderson *(uint8_t *)pv = val_le; 7475b36f268SRichard Henderson val_le >>= 8; 7485b36f268SRichard Henderson n = 1; 7495b36f268SRichard Henderson break; 7505b36f268SRichard Henderson case 0: 7515b36f268SRichard Henderson g_assert_not_reached(); 7525b36f268SRichard Henderson } 7535b36f268SRichard Henderson pv += n; 7545b36f268SRichard Henderson size -= n; 7555b36f268SRichard Henderson } while (size != 0); 7565b36f268SRichard Henderson 7575b36f268SRichard Henderson return val_le; 7585b36f268SRichard Henderson} 7595b36f268SRichard Henderson 7605b36f268SRichard Henderson/** 7615b36f268SRichard Henderson * store_whole_le4 7625b36f268SRichard Henderson * @pv: host address 7635b36f268SRichard Henderson * @size: number of bytes to store 7645b36f268SRichard Henderson * @val_le: data to store 7655b36f268SRichard Henderson * 7665b36f268SRichard Henderson * As store_bytes_leN, but atomically as a whole. 7675b36f268SRichard Henderson * Four aligned bytes are guaranteed to cover the store. 7685b36f268SRichard Henderson */ 7695b36f268SRichard Hendersonstatic uint64_t store_whole_le4(void *pv, int size, uint64_t val_le) 7705b36f268SRichard Henderson{ 7715b36f268SRichard Henderson int sz = size * 8; 7725b36f268SRichard Henderson int o = (uintptr_t)pv & 3; 7735b36f268SRichard Henderson int sh = o * 8; 7745b36f268SRichard Henderson uint32_t m = MAKE_64BIT_MASK(0, sz); 7755b36f268SRichard Henderson uint32_t v; 7765b36f268SRichard Henderson 7775b36f268SRichard Henderson if (HOST_BIG_ENDIAN) { 7785b36f268SRichard Henderson v = bswap32(val_le) >> sh; 7795b36f268SRichard Henderson m = bswap32(m) >> sh; 7805b36f268SRichard Henderson } else { 7815b36f268SRichard Henderson v = val_le << sh; 7825b36f268SRichard Henderson m <<= sh; 7835b36f268SRichard Henderson } 7845b36f268SRichard Henderson store_atom_insert_al4(pv - o, v, m); 7855b36f268SRichard Henderson return val_le >> sz; 7865b36f268SRichard Henderson} 7875b36f268SRichard Henderson 7885b36f268SRichard Henderson/** 7895b36f268SRichard Henderson * store_whole_le8 7905b36f268SRichard Henderson * @pv: host address 7915b36f268SRichard Henderson * @size: number of bytes to store 7925b36f268SRichard Henderson * @val_le: data to store 7935b36f268SRichard Henderson * 7945b36f268SRichard Henderson * As store_bytes_leN, but atomically as a whole. 7955b36f268SRichard Henderson * Eight aligned bytes are guaranteed to cover the store. 7965b36f268SRichard Henderson */ 7975b36f268SRichard Hendersonstatic uint64_t store_whole_le8(void *pv, int size, uint64_t val_le) 7985b36f268SRichard Henderson{ 7995b36f268SRichard Henderson int sz = size * 8; 8005b36f268SRichard Henderson int o = (uintptr_t)pv & 7; 8015b36f268SRichard Henderson int sh = o * 8; 8025b36f268SRichard Henderson uint64_t m = MAKE_64BIT_MASK(0, sz); 8035b36f268SRichard Henderson uint64_t v; 8045b36f268SRichard Henderson 8055b36f268SRichard Henderson qemu_build_assert(HAVE_al8); 8065b36f268SRichard Henderson if (HOST_BIG_ENDIAN) { 8075b36f268SRichard Henderson v = bswap64(val_le) >> sh; 8085b36f268SRichard Henderson m = bswap64(m) >> sh; 8095b36f268SRichard Henderson } else { 8105b36f268SRichard Henderson v = val_le << sh; 8115b36f268SRichard Henderson m <<= sh; 8125b36f268SRichard Henderson } 8135b36f268SRichard Henderson store_atom_insert_al8(pv - o, v, m); 8145b36f268SRichard Henderson return val_le >> sz; 8155b36f268SRichard Henderson} 8165b36f268SRichard Henderson 8175b36f268SRichard Henderson/** 8185b36f268SRichard Henderson * store_whole_le16 8195b36f268SRichard Henderson * @pv: host address 8205b36f268SRichard Henderson * @size: number of bytes to store 8215b36f268SRichard Henderson * @val_le: data to store 8225b36f268SRichard Henderson * 8235b36f268SRichard Henderson * As store_bytes_leN, but atomically as a whole. 8245b36f268SRichard Henderson * 16 aligned bytes are guaranteed to cover the store. 8255b36f268SRichard Henderson */ 8265b36f268SRichard Hendersonstatic uint64_t store_whole_le16(void *pv, int size, Int128 val_le) 8275b36f268SRichard Henderson{ 8285b36f268SRichard Henderson int sz = size * 8; 8295b36f268SRichard Henderson int o = (uintptr_t)pv & 15; 8305b36f268SRichard Henderson int sh = o * 8; 8315b36f268SRichard Henderson Int128 m, v; 8325b36f268SRichard Henderson 8336046f6e9SRichard Henderson qemu_build_assert(HAVE_CMPXCHG128); 8345b36f268SRichard Henderson 8355b36f268SRichard Henderson /* Like MAKE_64BIT_MASK(0, sz), but larger. */ 8365b36f268SRichard Henderson if (sz <= 64) { 8375b36f268SRichard Henderson m = int128_make64(MAKE_64BIT_MASK(0, sz)); 8385b36f268SRichard Henderson } else { 8395b36f268SRichard Henderson m = int128_make128(-1, MAKE_64BIT_MASK(0, sz - 64)); 8405b36f268SRichard Henderson } 8415b36f268SRichard Henderson 8425b36f268SRichard Henderson if (HOST_BIG_ENDIAN) { 8435b36f268SRichard Henderson v = int128_urshift(bswap128(val_le), sh); 8445b36f268SRichard Henderson m = int128_urshift(bswap128(m), sh); 8455b36f268SRichard Henderson } else { 8465b36f268SRichard Henderson v = int128_lshift(val_le, sh); 8475b36f268SRichard Henderson m = int128_lshift(m, sh); 8485b36f268SRichard Henderson } 8495b36f268SRichard Henderson store_atom_insert_al16(pv - o, v, m); 8505b36f268SRichard Henderson 851c0dde5fcSRichard Henderson if (sz <= 64) { 852c0dde5fcSRichard Henderson return 0; 853c0dde5fcSRichard Henderson } 8545b36f268SRichard Henderson return int128_gethi(val_le) >> (sz - 64); 8555b36f268SRichard Henderson} 8565b36f268SRichard Henderson 8575b36f268SRichard Henderson/** 8585b36f268SRichard Henderson * store_atom_2: 8595b36f268SRichard Henderson * @p: host address 8605b36f268SRichard Henderson * @val: the value to store 8615b36f268SRichard Henderson * @memop: the full memory op 8625b36f268SRichard Henderson * 8635b36f268SRichard Henderson * Store 2 bytes to @p, honoring the atomicity of @memop. 8645b36f268SRichard Henderson */ 86573fda56fSAnton Johanssonstatic void store_atom_2(CPUState *cpu, uintptr_t ra, 8665b36f268SRichard Henderson void *pv, MemOp memop, uint16_t val) 8675b36f268SRichard Henderson{ 8685b36f268SRichard Henderson uintptr_t pi = (uintptr_t)pv; 8695b36f268SRichard Henderson int atmax; 8705b36f268SRichard Henderson 8715b36f268SRichard Henderson if (likely((pi & 1) == 0)) { 8725b36f268SRichard Henderson store_atomic2(pv, val); 8735b36f268SRichard Henderson return; 8745b36f268SRichard Henderson } 8755b36f268SRichard Henderson 87673fda56fSAnton Johansson atmax = required_atomicity(cpu, pi, memop); 8775b36f268SRichard Henderson if (atmax == MO_8) { 8785b36f268SRichard Henderson stw_he_p(pv, val); 8795b36f268SRichard Henderson return; 8805b36f268SRichard Henderson } 8815b36f268SRichard Henderson 8825b36f268SRichard Henderson /* 8835b36f268SRichard Henderson * The only case remaining is MO_ATOM_WITHIN16. 8845b36f268SRichard Henderson * Big or little endian, we want the middle two bytes in each test. 8855b36f268SRichard Henderson */ 8865b36f268SRichard Henderson if ((pi & 3) == 1) { 8875b36f268SRichard Henderson store_atom_insert_al4(pv - 1, (uint32_t)val << 8, MAKE_64BIT_MASK(8, 16)); 8885b36f268SRichard Henderson return; 8895b36f268SRichard Henderson } else if ((pi & 7) == 3) { 8905b36f268SRichard Henderson if (HAVE_al8) { 8915b36f268SRichard Henderson store_atom_insert_al8(pv - 3, (uint64_t)val << 24, MAKE_64BIT_MASK(24, 16)); 8925b36f268SRichard Henderson return; 8935b36f268SRichard Henderson } 8945b36f268SRichard Henderson } else if ((pi & 15) == 7) { 8956046f6e9SRichard Henderson if (HAVE_CMPXCHG128) { 8965b36f268SRichard Henderson Int128 v = int128_lshift(int128_make64(val), 56); 8975b36f268SRichard Henderson Int128 m = int128_lshift(int128_make64(0xffff), 56); 8985b36f268SRichard Henderson store_atom_insert_al16(pv - 7, v, m); 8995b36f268SRichard Henderson return; 9005b36f268SRichard Henderson } 9015b36f268SRichard Henderson } else { 9025b36f268SRichard Henderson g_assert_not_reached(); 9035b36f268SRichard Henderson } 9045b36f268SRichard Henderson 905*b24bad34SAlex Bennée trace_store_atom2_fallback(memop, ra); 90673fda56fSAnton Johansson cpu_loop_exit_atomic(cpu, ra); 9075b36f268SRichard Henderson} 9085b36f268SRichard Henderson 9095b36f268SRichard Henderson/** 9105b36f268SRichard Henderson * store_atom_4: 9115b36f268SRichard Henderson * @p: host address 9125b36f268SRichard Henderson * @val: the value to store 9135b36f268SRichard Henderson * @memop: the full memory op 9145b36f268SRichard Henderson * 9155b36f268SRichard Henderson * Store 4 bytes to @p, honoring the atomicity of @memop. 9165b36f268SRichard Henderson */ 91773fda56fSAnton Johanssonstatic void store_atom_4(CPUState *cpu, uintptr_t ra, 9185b36f268SRichard Henderson void *pv, MemOp memop, uint32_t val) 9195b36f268SRichard Henderson{ 9205b36f268SRichard Henderson uintptr_t pi = (uintptr_t)pv; 9215b36f268SRichard Henderson int atmax; 9225b36f268SRichard Henderson 9235b36f268SRichard Henderson if (likely((pi & 3) == 0)) { 9245b36f268SRichard Henderson store_atomic4(pv, val); 9255b36f268SRichard Henderson return; 9265b36f268SRichard Henderson } 9275b36f268SRichard Henderson 92873fda56fSAnton Johansson atmax = required_atomicity(cpu, pi, memop); 9295b36f268SRichard Henderson switch (atmax) { 9305b36f268SRichard Henderson case MO_8: 9315b36f268SRichard Henderson stl_he_p(pv, val); 9325b36f268SRichard Henderson return; 9335b36f268SRichard Henderson case MO_16: 9345b36f268SRichard Henderson store_atom_4_by_2(pv, val); 9355b36f268SRichard Henderson return; 9365b36f268SRichard Henderson case -MO_16: 9375b36f268SRichard Henderson { 9385b36f268SRichard Henderson uint32_t val_le = cpu_to_le32(val); 9395b36f268SRichard Henderson int s2 = pi & 3; 9405b36f268SRichard Henderson int s1 = 4 - s2; 9415b36f268SRichard Henderson 9425b36f268SRichard Henderson switch (s2) { 9435b36f268SRichard Henderson case 1: 9445b36f268SRichard Henderson val_le = store_whole_le4(pv, s1, val_le); 9455b36f268SRichard Henderson *(uint8_t *)(pv + 3) = val_le; 9465b36f268SRichard Henderson break; 9475b36f268SRichard Henderson case 3: 9485b36f268SRichard Henderson *(uint8_t *)pv = val_le; 9495b36f268SRichard Henderson store_whole_le4(pv + 1, s2, val_le >> 8); 9505b36f268SRichard Henderson break; 9515b36f268SRichard Henderson case 0: /* aligned */ 9525b36f268SRichard Henderson case 2: /* atmax MO_16 */ 9535b36f268SRichard Henderson default: 9545b36f268SRichard Henderson g_assert_not_reached(); 9555b36f268SRichard Henderson } 9565b36f268SRichard Henderson } 9575b36f268SRichard Henderson return; 9585b36f268SRichard Henderson case MO_32: 9595b36f268SRichard Henderson if ((pi & 7) < 4) { 9605b36f268SRichard Henderson if (HAVE_al8) { 9615b36f268SRichard Henderson store_whole_le8(pv, 4, cpu_to_le32(val)); 9625b36f268SRichard Henderson return; 9635b36f268SRichard Henderson } 9645b36f268SRichard Henderson } else { 9656046f6e9SRichard Henderson if (HAVE_CMPXCHG128) { 9665b36f268SRichard Henderson store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val))); 9675b36f268SRichard Henderson return; 9685b36f268SRichard Henderson } 9695b36f268SRichard Henderson } 970*b24bad34SAlex Bennée trace_store_atom4_fallback(memop, ra); 97173fda56fSAnton Johansson cpu_loop_exit_atomic(cpu, ra); 9725b36f268SRichard Henderson default: 9735b36f268SRichard Henderson g_assert_not_reached(); 9745b36f268SRichard Henderson } 9755b36f268SRichard Henderson} 9765b36f268SRichard Henderson 9775b36f268SRichard Henderson/** 9785b36f268SRichard Henderson * store_atom_8: 9795b36f268SRichard Henderson * @p: host address 9805b36f268SRichard Henderson * @val: the value to store 9815b36f268SRichard Henderson * @memop: the full memory op 9825b36f268SRichard Henderson * 9835b36f268SRichard Henderson * Store 8 bytes to @p, honoring the atomicity of @memop. 9845b36f268SRichard Henderson */ 98573fda56fSAnton Johanssonstatic void store_atom_8(CPUState *cpu, uintptr_t ra, 9865b36f268SRichard Henderson void *pv, MemOp memop, uint64_t val) 9875b36f268SRichard Henderson{ 9885b36f268SRichard Henderson uintptr_t pi = (uintptr_t)pv; 9895b36f268SRichard Henderson int atmax; 9905b36f268SRichard Henderson 9915b36f268SRichard Henderson if (HAVE_al8 && likely((pi & 7) == 0)) { 9925b36f268SRichard Henderson store_atomic8(pv, val); 9935b36f268SRichard Henderson return; 9945b36f268SRichard Henderson } 9955b36f268SRichard Henderson 99673fda56fSAnton Johansson atmax = required_atomicity(cpu, pi, memop); 9975b36f268SRichard Henderson switch (atmax) { 9985b36f268SRichard Henderson case MO_8: 9995b36f268SRichard Henderson stq_he_p(pv, val); 10005b36f268SRichard Henderson return; 10015b36f268SRichard Henderson case MO_16: 10025b36f268SRichard Henderson store_atom_8_by_2(pv, val); 10035b36f268SRichard Henderson return; 10045b36f268SRichard Henderson case MO_32: 10055b36f268SRichard Henderson store_atom_8_by_4(pv, val); 10065b36f268SRichard Henderson return; 10075b36f268SRichard Henderson case -MO_32: 10085b36f268SRichard Henderson if (HAVE_al8) { 10095b36f268SRichard Henderson uint64_t val_le = cpu_to_le64(val); 10105b36f268SRichard Henderson int s2 = pi & 7; 10115b36f268SRichard Henderson int s1 = 8 - s2; 10125b36f268SRichard Henderson 10135b36f268SRichard Henderson switch (s2) { 10145b36f268SRichard Henderson case 1 ... 3: 10155b36f268SRichard Henderson val_le = store_whole_le8(pv, s1, val_le); 10165b36f268SRichard Henderson store_bytes_leN(pv + s1, s2, val_le); 10175b36f268SRichard Henderson break; 10185b36f268SRichard Henderson case 5 ... 7: 10195b36f268SRichard Henderson val_le = store_bytes_leN(pv, s1, val_le); 10205b36f268SRichard Henderson store_whole_le8(pv + s1, s2, val_le); 10215b36f268SRichard Henderson break; 10225b36f268SRichard Henderson case 0: /* aligned */ 10235b36f268SRichard Henderson case 4: /* atmax MO_32 */ 10245b36f268SRichard Henderson default: 10255b36f268SRichard Henderson g_assert_not_reached(); 10265b36f268SRichard Henderson } 10275b36f268SRichard Henderson return; 10285b36f268SRichard Henderson } 10295b36f268SRichard Henderson break; 10305b36f268SRichard Henderson case MO_64: 10316046f6e9SRichard Henderson if (HAVE_CMPXCHG128) { 10325b36f268SRichard Henderson store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val))); 10335b36f268SRichard Henderson return; 10345b36f268SRichard Henderson } 10355b36f268SRichard Henderson break; 10365b36f268SRichard Henderson default: 10375b36f268SRichard Henderson g_assert_not_reached(); 10385b36f268SRichard Henderson } 1039*b24bad34SAlex Bennée trace_store_atom8_fallback(memop, ra); 104073fda56fSAnton Johansson cpu_loop_exit_atomic(cpu, ra); 10415b36f268SRichard Henderson} 104235c653c4SRichard Henderson 104335c653c4SRichard Henderson/** 104435c653c4SRichard Henderson * store_atom_16: 104535c653c4SRichard Henderson * @p: host address 104635c653c4SRichard Henderson * @val: the value to store 104735c653c4SRichard Henderson * @memop: the full memory op 104835c653c4SRichard Henderson * 104935c653c4SRichard Henderson * Store 16 bytes to @p, honoring the atomicity of @memop. 105035c653c4SRichard Henderson */ 105173fda56fSAnton Johanssonstatic void store_atom_16(CPUState *cpu, uintptr_t ra, 105235c653c4SRichard Henderson void *pv, MemOp memop, Int128 val) 105335c653c4SRichard Henderson{ 105435c653c4SRichard Henderson uintptr_t pi = (uintptr_t)pv; 105535c653c4SRichard Henderson uint64_t a, b; 105635c653c4SRichard Henderson int atmax; 105735c653c4SRichard Henderson 10588dc24ff4SRichard Henderson if (HAVE_ATOMIC128_RW && likely((pi & 15) == 0)) { 10598dc24ff4SRichard Henderson atomic16_set(pv, val); 106035c653c4SRichard Henderson return; 106135c653c4SRichard Henderson } 106235c653c4SRichard Henderson 106373fda56fSAnton Johansson atmax = required_atomicity(cpu, pi, memop); 106435c653c4SRichard Henderson 106535c653c4SRichard Henderson a = HOST_BIG_ENDIAN ? int128_gethi(val) : int128_getlo(val); 106635c653c4SRichard Henderson b = HOST_BIG_ENDIAN ? int128_getlo(val) : int128_gethi(val); 106735c653c4SRichard Henderson switch (atmax) { 106835c653c4SRichard Henderson case MO_8: 106935c653c4SRichard Henderson memcpy(pv, &val, 16); 107035c653c4SRichard Henderson return; 107135c653c4SRichard Henderson case MO_16: 107235c653c4SRichard Henderson store_atom_8_by_2(pv, a); 107335c653c4SRichard Henderson store_atom_8_by_2(pv + 8, b); 107435c653c4SRichard Henderson return; 107535c653c4SRichard Henderson case MO_32: 107635c653c4SRichard Henderson store_atom_8_by_4(pv, a); 107735c653c4SRichard Henderson store_atom_8_by_4(pv + 8, b); 107835c653c4SRichard Henderson return; 107935c653c4SRichard Henderson case MO_64: 108035c653c4SRichard Henderson if (HAVE_al8) { 108135c653c4SRichard Henderson store_atomic8(pv, a); 108235c653c4SRichard Henderson store_atomic8(pv + 8, b); 108335c653c4SRichard Henderson return; 108435c653c4SRichard Henderson } 108535c653c4SRichard Henderson break; 108635c653c4SRichard Henderson case -MO_64: 10876046f6e9SRichard Henderson if (HAVE_CMPXCHG128) { 108835c653c4SRichard Henderson uint64_t val_le; 108935c653c4SRichard Henderson int s2 = pi & 15; 109035c653c4SRichard Henderson int s1 = 16 - s2; 109135c653c4SRichard Henderson 109235c653c4SRichard Henderson if (HOST_BIG_ENDIAN) { 109335c653c4SRichard Henderson val = bswap128(val); 109435c653c4SRichard Henderson } 109535c653c4SRichard Henderson switch (s2) { 109635c653c4SRichard Henderson case 1 ... 7: 109735c653c4SRichard Henderson val_le = store_whole_le16(pv, s1, val); 109835c653c4SRichard Henderson store_bytes_leN(pv + s1, s2, val_le); 109935c653c4SRichard Henderson break; 110035c653c4SRichard Henderson case 9 ... 15: 110135c653c4SRichard Henderson store_bytes_leN(pv, s1, int128_getlo(val)); 110235c653c4SRichard Henderson val = int128_urshift(val, s1 * 8); 110335c653c4SRichard Henderson store_whole_le16(pv + s1, s2, val); 110435c653c4SRichard Henderson break; 110535c653c4SRichard Henderson case 0: /* aligned */ 110635c653c4SRichard Henderson case 8: /* atmax MO_64 */ 110735c653c4SRichard Henderson default: 110835c653c4SRichard Henderson g_assert_not_reached(); 110935c653c4SRichard Henderson } 111035c653c4SRichard Henderson return; 111135c653c4SRichard Henderson } 111235c653c4SRichard Henderson break; 111335c653c4SRichard Henderson case MO_128: 111435c653c4SRichard Henderson break; 111535c653c4SRichard Henderson default: 111635c653c4SRichard Henderson g_assert_not_reached(); 111735c653c4SRichard Henderson } 1118*b24bad34SAlex Bennée trace_store_atom16_fallback(memop, ra); 111973fda56fSAnton Johansson cpu_loop_exit_atomic(cpu, ra); 112035c653c4SRichard Henderson} 1121