xref: /openbmc/qemu/accel/tcg/ldst_atomicity.c.inc (revision fdf250e5a37830615e324017cb3a503e84b3712c)
1cdfac37bSRichard Henderson/*
2cdfac37bSRichard Henderson * Routines common to user and system emulation of load/store.
3cdfac37bSRichard Henderson *
4cdfac37bSRichard Henderson *  Copyright (c) 2022 Linaro, Ltd.
5cdfac37bSRichard Henderson *
6cdfac37bSRichard Henderson * SPDX-License-Identifier: GPL-2.0-or-later
7cdfac37bSRichard Henderson *
8cdfac37bSRichard Henderson * This work is licensed under the terms of the GNU GPL, version 2 or later.
9cdfac37bSRichard Henderson * See the COPYING file in the top-level directory.
10cdfac37bSRichard Henderson */
11cdfac37bSRichard Henderson
12e4751d34SPhilippe Mathieu-Daudé#include "host/load-extract-al16-al8.h.inc"
13e4751d34SPhilippe Mathieu-Daudé#include "host/store-insert-al16.h.inc"
14af844a11SRichard Henderson
15cdfac37bSRichard Henderson#ifdef CONFIG_ATOMIC64
16cdfac37bSRichard Henderson# define HAVE_al8          true
17cdfac37bSRichard Henderson#else
18cdfac37bSRichard Henderson# define HAVE_al8          false
19cdfac37bSRichard Henderson#endif
20cdfac37bSRichard Henderson#define HAVE_al8_fast      (ATOMIC_REG_SIZE >= 8)
21cdfac37bSRichard Henderson
22cdfac37bSRichard Henderson/**
23cdfac37bSRichard Henderson * required_atomicity:
24cdfac37bSRichard Henderson *
25cdfac37bSRichard Henderson * Return the lg2 bytes of atomicity required by @memop for @p.
26cdfac37bSRichard Henderson * If the operation must be split into two operations to be
27cdfac37bSRichard Henderson * examined separately for atomicity, return -lg2.
28cdfac37bSRichard Henderson */
2973fda56fSAnton Johanssonstatic int required_atomicity(CPUState *cpu, uintptr_t p, MemOp memop)
30cdfac37bSRichard Henderson{
31cdfac37bSRichard Henderson    MemOp atom = memop & MO_ATOM_MASK;
32cdfac37bSRichard Henderson    MemOp size = memop & MO_SIZE;
33cdfac37bSRichard Henderson    MemOp half = size ? size - 1 : 0;
34cdfac37bSRichard Henderson    unsigned tmp;
35cdfac37bSRichard Henderson    int atmax;
36cdfac37bSRichard Henderson
37cdfac37bSRichard Henderson    switch (atom) {
38cdfac37bSRichard Henderson    case MO_ATOM_NONE:
39cdfac37bSRichard Henderson        atmax = MO_8;
40cdfac37bSRichard Henderson        break;
41cdfac37bSRichard Henderson
42cdfac37bSRichard Henderson    case MO_ATOM_IFALIGN_PAIR:
43cdfac37bSRichard Henderson        size = half;
44cdfac37bSRichard Henderson        /* fall through */
45cdfac37bSRichard Henderson
46cdfac37bSRichard Henderson    case MO_ATOM_IFALIGN:
47cdfac37bSRichard Henderson        tmp = (1 << size) - 1;
48cdfac37bSRichard Henderson        atmax = p & tmp ? MO_8 : size;
49cdfac37bSRichard Henderson        break;
50cdfac37bSRichard Henderson
51cdfac37bSRichard Henderson    case MO_ATOM_WITHIN16:
52cdfac37bSRichard Henderson        tmp = p & 15;
53cdfac37bSRichard Henderson        atmax = (tmp + (1 << size) <= 16 ? size : MO_8);
54cdfac37bSRichard Henderson        break;
55cdfac37bSRichard Henderson
56cdfac37bSRichard Henderson    case MO_ATOM_WITHIN16_PAIR:
57cdfac37bSRichard Henderson        tmp = p & 15;
58cdfac37bSRichard Henderson        if (tmp + (1 << size) <= 16) {
59cdfac37bSRichard Henderson            atmax = size;
60cdfac37bSRichard Henderson        } else if (tmp + (1 << half) == 16) {
61cdfac37bSRichard Henderson            /*
62cdfac37bSRichard Henderson             * The pair exactly straddles the boundary.
63cdfac37bSRichard Henderson             * Both halves are naturally aligned and atomic.
64cdfac37bSRichard Henderson             */
65cdfac37bSRichard Henderson            atmax = half;
66cdfac37bSRichard Henderson        } else {
67cdfac37bSRichard Henderson            /*
68cdfac37bSRichard Henderson             * One of the pair crosses the boundary, and is non-atomic.
69cdfac37bSRichard Henderson             * The other of the pair does not cross, and is atomic.
70cdfac37bSRichard Henderson             */
71cdfac37bSRichard Henderson            atmax = -half;
72cdfac37bSRichard Henderson        }
73cdfac37bSRichard Henderson        break;
74cdfac37bSRichard Henderson
75cdfac37bSRichard Henderson    case MO_ATOM_SUBALIGN:
76cdfac37bSRichard Henderson        /*
77cdfac37bSRichard Henderson         * Examine the alignment of p to determine if there are subobjects
78cdfac37bSRichard Henderson         * that must be aligned.  Note that we only really need ctz4() --
79431eddb6SManos Pitsidianakis         * any more significant bits are discarded by the immediately
80cdfac37bSRichard Henderson         * following comparison.
81cdfac37bSRichard Henderson         */
82cdfac37bSRichard Henderson        tmp = ctz32(p);
83cdfac37bSRichard Henderson        atmax = MIN(size, tmp);
84cdfac37bSRichard Henderson        break;
85cdfac37bSRichard Henderson
86cdfac37bSRichard Henderson    default:
87cdfac37bSRichard Henderson        g_assert_not_reached();
88cdfac37bSRichard Henderson    }
89cdfac37bSRichard Henderson
90cdfac37bSRichard Henderson    /*
91cdfac37bSRichard Henderson     * Here we have the architectural atomicity of the operation.
92cdfac37bSRichard Henderson     * However, when executing in a serial context, we need no extra
93cdfac37bSRichard Henderson     * host atomicity in order to avoid racing.  This reduction
94cdfac37bSRichard Henderson     * avoids looping with cpu_loop_exit_atomic.
95cdfac37bSRichard Henderson     */
9673fda56fSAnton Johansson    if (cpu_in_serial_context(cpu)) {
97cdfac37bSRichard Henderson        return MO_8;
98cdfac37bSRichard Henderson    }
99cdfac37bSRichard Henderson    return atmax;
100cdfac37bSRichard Henderson}
101cdfac37bSRichard Henderson
102cdfac37bSRichard Henderson/**
103cdfac37bSRichard Henderson * load_atomic2:
104cdfac37bSRichard Henderson * @pv: host address
105cdfac37bSRichard Henderson *
106cdfac37bSRichard Henderson * Atomically load 2 aligned bytes from @pv.
107cdfac37bSRichard Henderson */
108cdfac37bSRichard Hendersonstatic inline uint16_t load_atomic2(void *pv)
109cdfac37bSRichard Henderson{
110cdfac37bSRichard Henderson    uint16_t *p = __builtin_assume_aligned(pv, 2);
111cdfac37bSRichard Henderson    return qatomic_read(p);
112cdfac37bSRichard Henderson}
113cdfac37bSRichard Henderson
114cdfac37bSRichard Henderson/**
115cdfac37bSRichard Henderson * load_atomic4:
116cdfac37bSRichard Henderson * @pv: host address
117cdfac37bSRichard Henderson *
118cdfac37bSRichard Henderson * Atomically load 4 aligned bytes from @pv.
119cdfac37bSRichard Henderson */
120cdfac37bSRichard Hendersonstatic inline uint32_t load_atomic4(void *pv)
121cdfac37bSRichard Henderson{
122cdfac37bSRichard Henderson    uint32_t *p = __builtin_assume_aligned(pv, 4);
123cdfac37bSRichard Henderson    return qatomic_read(p);
124cdfac37bSRichard Henderson}
125cdfac37bSRichard Henderson
126cdfac37bSRichard Henderson/**
127cdfac37bSRichard Henderson * load_atomic8:
128cdfac37bSRichard Henderson * @pv: host address
129cdfac37bSRichard Henderson *
130cdfac37bSRichard Henderson * Atomically load 8 aligned bytes from @pv.
131cdfac37bSRichard Henderson */
132cdfac37bSRichard Hendersonstatic inline uint64_t load_atomic8(void *pv)
133cdfac37bSRichard Henderson{
134cdfac37bSRichard Henderson    uint64_t *p = __builtin_assume_aligned(pv, 8);
135cdfac37bSRichard Henderson
136cdfac37bSRichard Henderson    qemu_build_assert(HAVE_al8);
137cdfac37bSRichard Henderson    return qatomic_read__nocheck(p);
138cdfac37bSRichard Henderson}
139cdfac37bSRichard Henderson
140cdfac37bSRichard Henderson/**
141cdfac37bSRichard Henderson * load_atomic8_or_exit:
14273fda56fSAnton Johansson * @cpu: generic cpu state
143cdfac37bSRichard Henderson * @ra: host unwind address
144cdfac37bSRichard Henderson * @pv: host address
145cdfac37bSRichard Henderson *
146cdfac37bSRichard Henderson * Atomically load 8 aligned bytes from @pv.
147cdfac37bSRichard Henderson * If this is not possible, longjmp out to restart serially.
148cdfac37bSRichard Henderson */
14973fda56fSAnton Johanssonstatic uint64_t load_atomic8_or_exit(CPUState *cpu, uintptr_t ra, void *pv)
150cdfac37bSRichard Henderson{
151cdfac37bSRichard Henderson    if (HAVE_al8) {
152cdfac37bSRichard Henderson        return load_atomic8(pv);
153cdfac37bSRichard Henderson    }
154cdfac37bSRichard Henderson
155cdfac37bSRichard Henderson#ifdef CONFIG_USER_ONLY
156cdfac37bSRichard Henderson    /*
157cdfac37bSRichard Henderson     * If the page is not writable, then assume the value is immutable
158cdfac37bSRichard Henderson     * and requires no locking.  This ignores the case of MAP_SHARED with
159cdfac37bSRichard Henderson     * another process, because the fallback start_exclusive solution
160cdfac37bSRichard Henderson     * provides no protection across processes.
161cdfac37bSRichard Henderson     */
1622c8412d4SRichard Henderson    WITH_MMAP_LOCK_GUARD() {
163f1ce0b80SRichard Henderson        if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
164cdfac37bSRichard Henderson            uint64_t *p = __builtin_assume_aligned(pv, 8);
165cdfac37bSRichard Henderson            return *p;
166cdfac37bSRichard Henderson        }
1672c8412d4SRichard Henderson    }
168cdfac37bSRichard Henderson#endif
169cdfac37bSRichard Henderson
170cdfac37bSRichard Henderson    /* Ultimate fallback: re-execute in serial context. */
171*b24bad34SAlex Bennée    trace_load_atom8_or_exit_fallback(ra);
17273fda56fSAnton Johansson    cpu_loop_exit_atomic(cpu, ra);
173cdfac37bSRichard Henderson}
174cdfac37bSRichard Henderson
175cdfac37bSRichard Henderson/**
176cdfac37bSRichard Henderson * load_atomic16_or_exit:
17773fda56fSAnton Johansson * @cpu: generic cpu state
178cdfac37bSRichard Henderson * @ra: host unwind address
179cdfac37bSRichard Henderson * @pv: host address
180cdfac37bSRichard Henderson *
181cdfac37bSRichard Henderson * Atomically load 16 aligned bytes from @pv.
182cdfac37bSRichard Henderson * If this is not possible, longjmp out to restart serially.
183cdfac37bSRichard Henderson */
18473fda56fSAnton Johanssonstatic Int128 load_atomic16_or_exit(CPUState *cpu, uintptr_t ra, void *pv)
185cdfac37bSRichard Henderson{
186cdfac37bSRichard Henderson    Int128 *p = __builtin_assume_aligned(pv, 16);
187cdfac37bSRichard Henderson
1888dc24ff4SRichard Henderson    if (HAVE_ATOMIC128_RO) {
1898dc24ff4SRichard Henderson        return atomic16_read_ro(p);
190cdfac37bSRichard Henderson    }
191cdfac37bSRichard Henderson
192cdfac37bSRichard Henderson    /*
193cdfac37bSRichard Henderson     * We can only use cmpxchg to emulate a load if the page is writable.
194cdfac37bSRichard Henderson     * If the page is not writable, then assume the value is immutable
195cdfac37bSRichard Henderson     * and requires no locking.  This ignores the case of MAP_SHARED with
196cdfac37bSRichard Henderson     * another process, because the fallback start_exclusive solution
197cdfac37bSRichard Henderson     * provides no protection across processes.
1982c8412d4SRichard Henderson     *
1992c8412d4SRichard Henderson     * In system mode all guest pages are writable.  For user mode,
2002c8412d4SRichard Henderson     * we must take mmap_lock so that the query remains valid until
2012c8412d4SRichard Henderson     * the write is complete -- tests/tcg/multiarch/munmap-pthread.c
2022c8412d4SRichard Henderson     * is an example that can race.
203cdfac37bSRichard Henderson     */
2042c8412d4SRichard Henderson    WITH_MMAP_LOCK_GUARD() {
2052c8412d4SRichard Henderson#ifdef CONFIG_USER_ONLY
206f1ce0b80SRichard Henderson        if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) {
207cdfac37bSRichard Henderson            return *p;
208cdfac37bSRichard Henderson        }
209cdfac37bSRichard Henderson#endif
2108dc24ff4SRichard Henderson        if (HAVE_ATOMIC128_RW) {
2118dc24ff4SRichard Henderson            return atomic16_read_rw(p);
212cdfac37bSRichard Henderson        }
2132c8412d4SRichard Henderson    }
214cdfac37bSRichard Henderson
215cdfac37bSRichard Henderson    /* Ultimate fallback: re-execute in serial context. */
216*b24bad34SAlex Bennée    trace_load_atom16_or_exit_fallback(ra);
21773fda56fSAnton Johansson    cpu_loop_exit_atomic(cpu, ra);
218cdfac37bSRichard Henderson}
219cdfac37bSRichard Henderson
220cdfac37bSRichard Henderson/**
221cdfac37bSRichard Henderson * load_atom_extract_al4x2:
222cdfac37bSRichard Henderson * @pv: host address
223cdfac37bSRichard Henderson *
224cdfac37bSRichard Henderson * Load 4 bytes from @p, from two sequential atomic 4-byte loads.
225cdfac37bSRichard Henderson */
226cdfac37bSRichard Hendersonstatic uint32_t load_atom_extract_al4x2(void *pv)
227cdfac37bSRichard Henderson{
228cdfac37bSRichard Henderson    uintptr_t pi = (uintptr_t)pv;
229cdfac37bSRichard Henderson    int sh = (pi & 3) * 8;
230cdfac37bSRichard Henderson    uint32_t a, b;
231cdfac37bSRichard Henderson
232cdfac37bSRichard Henderson    pv = (void *)(pi & ~3);
233cdfac37bSRichard Henderson    a = load_atomic4(pv);
234cdfac37bSRichard Henderson    b = load_atomic4(pv + 4);
235cdfac37bSRichard Henderson
236cdfac37bSRichard Henderson    if (HOST_BIG_ENDIAN) {
237cdfac37bSRichard Henderson        return (a << sh) | (b >> (-sh & 31));
238cdfac37bSRichard Henderson    } else {
239cdfac37bSRichard Henderson        return (a >> sh) | (b << (-sh & 31));
240cdfac37bSRichard Henderson    }
241cdfac37bSRichard Henderson}
242cdfac37bSRichard Henderson
243cdfac37bSRichard Henderson/**
244cdfac37bSRichard Henderson * load_atom_extract_al8x2:
245cdfac37bSRichard Henderson * @pv: host address
246cdfac37bSRichard Henderson *
247cdfac37bSRichard Henderson * Load 8 bytes from @p, from two sequential atomic 8-byte loads.
248cdfac37bSRichard Henderson */
249cdfac37bSRichard Hendersonstatic uint64_t load_atom_extract_al8x2(void *pv)
250cdfac37bSRichard Henderson{
251cdfac37bSRichard Henderson    uintptr_t pi = (uintptr_t)pv;
252cdfac37bSRichard Henderson    int sh = (pi & 7) * 8;
253cdfac37bSRichard Henderson    uint64_t a, b;
254cdfac37bSRichard Henderson
255cdfac37bSRichard Henderson    pv = (void *)(pi & ~7);
256cdfac37bSRichard Henderson    a = load_atomic8(pv);
257cdfac37bSRichard Henderson    b = load_atomic8(pv + 8);
258cdfac37bSRichard Henderson
259cdfac37bSRichard Henderson    if (HOST_BIG_ENDIAN) {
260cdfac37bSRichard Henderson        return (a << sh) | (b >> (-sh & 63));
261cdfac37bSRichard Henderson    } else {
262cdfac37bSRichard Henderson        return (a >> sh) | (b << (-sh & 63));
263cdfac37bSRichard Henderson    }
264cdfac37bSRichard Henderson}
265cdfac37bSRichard Henderson
266cdfac37bSRichard Henderson/**
267cdfac37bSRichard Henderson * load_atom_extract_al8_or_exit:
26873fda56fSAnton Johansson * @cpu: generic cpu state
269cdfac37bSRichard Henderson * @ra: host unwind address
270cdfac37bSRichard Henderson * @pv: host address
271cdfac37bSRichard Henderson * @s: object size in bytes, @s <= 4.
272cdfac37bSRichard Henderson *
273cdfac37bSRichard Henderson * Atomically load @s bytes from @p, when p % s != 0, and [p, p+s-1] does
274cdfac37bSRichard Henderson * not cross an 8-byte boundary.  This means that we can perform an atomic
275cdfac37bSRichard Henderson * 8-byte load and extract.
276cdfac37bSRichard Henderson * The value is returned in the low bits of a uint32_t.
277cdfac37bSRichard Henderson */
27873fda56fSAnton Johanssonstatic uint32_t load_atom_extract_al8_or_exit(CPUState *cpu, uintptr_t ra,
279cdfac37bSRichard Henderson                                              void *pv, int s)
280cdfac37bSRichard Henderson{
281cdfac37bSRichard Henderson    uintptr_t pi = (uintptr_t)pv;
282cdfac37bSRichard Henderson    int o = pi & 7;
283cdfac37bSRichard Henderson    int shr = (HOST_BIG_ENDIAN ? 8 - s - o : o) * 8;
284cdfac37bSRichard Henderson
285cdfac37bSRichard Henderson    pv = (void *)(pi & ~7);
28673fda56fSAnton Johansson    return load_atomic8_or_exit(cpu, ra, pv) >> shr;
287cdfac37bSRichard Henderson}
288cdfac37bSRichard Henderson
289cdfac37bSRichard Henderson/**
290cdfac37bSRichard Henderson * load_atom_extract_al16_or_exit:
29173fda56fSAnton Johansson * @cpu: generic cpu state
292cdfac37bSRichard Henderson * @ra: host unwind address
293cdfac37bSRichard Henderson * @p: host address
294cdfac37bSRichard Henderson * @s: object size in bytes, @s <= 8.
295cdfac37bSRichard Henderson *
296cdfac37bSRichard Henderson * Atomically load @s bytes from @p, when p % 16 < 8
297cdfac37bSRichard Henderson * and p % 16 + s > 8.  I.e. does not cross a 16-byte
298cdfac37bSRichard Henderson * boundary, but *does* cross an 8-byte boundary.
299cdfac37bSRichard Henderson * This is the slow version, so we must have eliminated
300cdfac37bSRichard Henderson * any faster load_atom_extract_al8_or_exit case.
301cdfac37bSRichard Henderson *
302cdfac37bSRichard Henderson * If this is not possible, longjmp out to restart serially.
303cdfac37bSRichard Henderson */
30473fda56fSAnton Johanssonstatic uint64_t load_atom_extract_al16_or_exit(CPUState *cpu, uintptr_t ra,
305cdfac37bSRichard Henderson                                               void *pv, int s)
306cdfac37bSRichard Henderson{
307cdfac37bSRichard Henderson    uintptr_t pi = (uintptr_t)pv;
308cdfac37bSRichard Henderson    int o = pi & 7;
309cdfac37bSRichard Henderson    int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
310cdfac37bSRichard Henderson    Int128 r;
311cdfac37bSRichard Henderson
312cdfac37bSRichard Henderson    /*
313cdfac37bSRichard Henderson     * Note constraints above: p & 8 must be clear.
314cdfac37bSRichard Henderson     * Provoke SIGBUS if possible otherwise.
315cdfac37bSRichard Henderson     */
316cdfac37bSRichard Henderson    pv = (void *)(pi & ~7);
31773fda56fSAnton Johansson    r = load_atomic16_or_exit(cpu, ra, pv);
318cdfac37bSRichard Henderson
319cdfac37bSRichard Henderson    r = int128_urshift(r, shr);
320cdfac37bSRichard Henderson    return int128_getlo(r);
321cdfac37bSRichard Henderson}
322cdfac37bSRichard Henderson
323cdfac37bSRichard Henderson/**
324cdfac37bSRichard Henderson * load_atom_4_by_2:
325cdfac37bSRichard Henderson * @pv: host address
326cdfac37bSRichard Henderson *
327cdfac37bSRichard Henderson * Load 4 bytes from @pv, with two 2-byte atomic loads.
328cdfac37bSRichard Henderson */
329cdfac37bSRichard Hendersonstatic inline uint32_t load_atom_4_by_2(void *pv)
330cdfac37bSRichard Henderson{
331cdfac37bSRichard Henderson    uint32_t a = load_atomic2(pv);
332cdfac37bSRichard Henderson    uint32_t b = load_atomic2(pv + 2);
333cdfac37bSRichard Henderson
334cdfac37bSRichard Henderson    if (HOST_BIG_ENDIAN) {
335cdfac37bSRichard Henderson        return (a << 16) | b;
336cdfac37bSRichard Henderson    } else {
337cdfac37bSRichard Henderson        return (b << 16) | a;
338cdfac37bSRichard Henderson    }
339cdfac37bSRichard Henderson}
340cdfac37bSRichard Henderson
341cdfac37bSRichard Henderson/**
342cdfac37bSRichard Henderson * load_atom_8_by_2:
343cdfac37bSRichard Henderson * @pv: host address
344cdfac37bSRichard Henderson *
345cdfac37bSRichard Henderson * Load 8 bytes from @pv, with four 2-byte atomic loads.
346cdfac37bSRichard Henderson */
347cdfac37bSRichard Hendersonstatic inline uint64_t load_atom_8_by_2(void *pv)
348cdfac37bSRichard Henderson{
349cdfac37bSRichard Henderson    uint32_t a = load_atom_4_by_2(pv);
350cdfac37bSRichard Henderson    uint32_t b = load_atom_4_by_2(pv + 4);
351cdfac37bSRichard Henderson
352cdfac37bSRichard Henderson    if (HOST_BIG_ENDIAN) {
353cdfac37bSRichard Henderson        return ((uint64_t)a << 32) | b;
354cdfac37bSRichard Henderson    } else {
355cdfac37bSRichard Henderson        return ((uint64_t)b << 32) | a;
356cdfac37bSRichard Henderson    }
357cdfac37bSRichard Henderson}
358cdfac37bSRichard Henderson
359cdfac37bSRichard Henderson/**
360cdfac37bSRichard Henderson * load_atom_8_by_4:
361cdfac37bSRichard Henderson * @pv: host address
362cdfac37bSRichard Henderson *
363cdfac37bSRichard Henderson * Load 8 bytes from @pv, with two 4-byte atomic loads.
364cdfac37bSRichard Henderson */
365cdfac37bSRichard Hendersonstatic inline uint64_t load_atom_8_by_4(void *pv)
366cdfac37bSRichard Henderson{
367cdfac37bSRichard Henderson    uint32_t a = load_atomic4(pv);
368cdfac37bSRichard Henderson    uint32_t b = load_atomic4(pv + 4);
369cdfac37bSRichard Henderson
370cdfac37bSRichard Henderson    if (HOST_BIG_ENDIAN) {
371cdfac37bSRichard Henderson        return ((uint64_t)a << 32) | b;
372cdfac37bSRichard Henderson    } else {
373cdfac37bSRichard Henderson        return ((uint64_t)b << 32) | a;
374cdfac37bSRichard Henderson    }
375cdfac37bSRichard Henderson}
376cdfac37bSRichard Henderson
377cdfac37bSRichard Henderson/**
37835c653c4SRichard Henderson * load_atom_8_by_8_or_4:
37935c653c4SRichard Henderson * @pv: host address
38035c653c4SRichard Henderson *
38135c653c4SRichard Henderson * Load 8 bytes from aligned @pv, with at least 4-byte atomicity.
38235c653c4SRichard Henderson */
38335c653c4SRichard Hendersonstatic inline uint64_t load_atom_8_by_8_or_4(void *pv)
38435c653c4SRichard Henderson{
38535c653c4SRichard Henderson    if (HAVE_al8_fast) {
38635c653c4SRichard Henderson        return load_atomic8(pv);
38735c653c4SRichard Henderson    } else {
38835c653c4SRichard Henderson        return load_atom_8_by_4(pv);
38935c653c4SRichard Henderson    }
39035c653c4SRichard Henderson}
39135c653c4SRichard Henderson
39235c653c4SRichard Henderson/**
393cdfac37bSRichard Henderson * load_atom_2:
394cdfac37bSRichard Henderson * @p: host address
395cdfac37bSRichard Henderson * @memop: the full memory op
396cdfac37bSRichard Henderson *
397cdfac37bSRichard Henderson * Load 2 bytes from @p, honoring the atomicity of @memop.
398cdfac37bSRichard Henderson */
39973fda56fSAnton Johanssonstatic uint16_t load_atom_2(CPUState *cpu, uintptr_t ra,
400cdfac37bSRichard Henderson                            void *pv, MemOp memop)
401cdfac37bSRichard Henderson{
402cdfac37bSRichard Henderson    uintptr_t pi = (uintptr_t)pv;
403cdfac37bSRichard Henderson    int atmax;
404cdfac37bSRichard Henderson
405cdfac37bSRichard Henderson    if (likely((pi & 1) == 0)) {
406cdfac37bSRichard Henderson        return load_atomic2(pv);
407cdfac37bSRichard Henderson    }
4088dc24ff4SRichard Henderson    if (HAVE_ATOMIC128_RO) {
4096a2c23ddSRichard Henderson        intptr_t left_in_page = -(pi | TARGET_PAGE_MASK);
4106a2c23ddSRichard Henderson        if (likely(left_in_page > 8)) {
411cdfac37bSRichard Henderson            return load_atom_extract_al16_or_al8(pv, 2);
412cdfac37bSRichard Henderson        }
4136a2c23ddSRichard Henderson    }
414cdfac37bSRichard Henderson
41573fda56fSAnton Johansson    atmax = required_atomicity(cpu, pi, memop);
416cdfac37bSRichard Henderson    switch (atmax) {
417cdfac37bSRichard Henderson    case MO_8:
418cdfac37bSRichard Henderson        return lduw_he_p(pv);
419cdfac37bSRichard Henderson    case MO_16:
420cdfac37bSRichard Henderson        /* The only case remaining is MO_ATOM_WITHIN16. */
421cdfac37bSRichard Henderson        if (!HAVE_al8_fast && (pi & 3) == 1) {
422cdfac37bSRichard Henderson            /* Big or little endian, we want the middle two bytes. */
423cdfac37bSRichard Henderson            return load_atomic4(pv - 1) >> 8;
424cdfac37bSRichard Henderson        }
425cdfac37bSRichard Henderson        if ((pi & 15) != 7) {
42673fda56fSAnton Johansson            return load_atom_extract_al8_or_exit(cpu, ra, pv, 2);
427cdfac37bSRichard Henderson        }
42873fda56fSAnton Johansson        return load_atom_extract_al16_or_exit(cpu, ra, pv, 2);
429cdfac37bSRichard Henderson    default:
430cdfac37bSRichard Henderson        g_assert_not_reached();
431cdfac37bSRichard Henderson    }
432cdfac37bSRichard Henderson}
433cdfac37bSRichard Henderson
434cdfac37bSRichard Henderson/**
435cdfac37bSRichard Henderson * load_atom_4:
436cdfac37bSRichard Henderson * @p: host address
437cdfac37bSRichard Henderson * @memop: the full memory op
438cdfac37bSRichard Henderson *
439cdfac37bSRichard Henderson * Load 4 bytes from @p, honoring the atomicity of @memop.
440cdfac37bSRichard Henderson */
44173fda56fSAnton Johanssonstatic uint32_t load_atom_4(CPUState *cpu, uintptr_t ra,
442cdfac37bSRichard Henderson                            void *pv, MemOp memop)
443cdfac37bSRichard Henderson{
444cdfac37bSRichard Henderson    uintptr_t pi = (uintptr_t)pv;
445cdfac37bSRichard Henderson    int atmax;
446cdfac37bSRichard Henderson
447cdfac37bSRichard Henderson    if (likely((pi & 3) == 0)) {
448cdfac37bSRichard Henderson        return load_atomic4(pv);
449cdfac37bSRichard Henderson    }
4508dc24ff4SRichard Henderson    if (HAVE_ATOMIC128_RO) {
4516a2c23ddSRichard Henderson        intptr_t left_in_page = -(pi | TARGET_PAGE_MASK);
4526a2c23ddSRichard Henderson        if (likely(left_in_page > 8)) {
453cdfac37bSRichard Henderson            return load_atom_extract_al16_or_al8(pv, 4);
454cdfac37bSRichard Henderson        }
4556a2c23ddSRichard Henderson    }
456cdfac37bSRichard Henderson
45773fda56fSAnton Johansson    atmax = required_atomicity(cpu, pi, memop);
458cdfac37bSRichard Henderson    switch (atmax) {
459cdfac37bSRichard Henderson    case MO_8:
460cdfac37bSRichard Henderson    case MO_16:
461cdfac37bSRichard Henderson    case -MO_16:
462cdfac37bSRichard Henderson        /*
463cdfac37bSRichard Henderson         * For MO_ATOM_IFALIGN, this is more atomicity than required,
464cdfac37bSRichard Henderson         * but it's trivially supported on all hosts, better than 4
465cdfac37bSRichard Henderson         * individual byte loads (when the host requires alignment),
466cdfac37bSRichard Henderson         * and overlaps with the MO_ATOM_SUBALIGN case of p % 2 == 0.
467cdfac37bSRichard Henderson         */
468cdfac37bSRichard Henderson        return load_atom_extract_al4x2(pv);
469cdfac37bSRichard Henderson    case MO_32:
470cdfac37bSRichard Henderson        if (!(pi & 4)) {
47173fda56fSAnton Johansson            return load_atom_extract_al8_or_exit(cpu, ra, pv, 4);
472cdfac37bSRichard Henderson        }
47373fda56fSAnton Johansson        return load_atom_extract_al16_or_exit(cpu, ra, pv, 4);
474cdfac37bSRichard Henderson    default:
475cdfac37bSRichard Henderson        g_assert_not_reached();
476cdfac37bSRichard Henderson    }
477cdfac37bSRichard Henderson}
478cdfac37bSRichard Henderson
479cdfac37bSRichard Henderson/**
480cdfac37bSRichard Henderson * load_atom_8:
481cdfac37bSRichard Henderson * @p: host address
482cdfac37bSRichard Henderson * @memop: the full memory op
483cdfac37bSRichard Henderson *
484cdfac37bSRichard Henderson * Load 8 bytes from @p, honoring the atomicity of @memop.
485cdfac37bSRichard Henderson */
48673fda56fSAnton Johanssonstatic uint64_t load_atom_8(CPUState *cpu, uintptr_t ra,
487cdfac37bSRichard Henderson                            void *pv, MemOp memop)
488cdfac37bSRichard Henderson{
489cdfac37bSRichard Henderson    uintptr_t pi = (uintptr_t)pv;
490cdfac37bSRichard Henderson    int atmax;
491cdfac37bSRichard Henderson
492cdfac37bSRichard Henderson    /*
493cdfac37bSRichard Henderson     * If the host does not support 8-byte atomics, wait until we have
494cdfac37bSRichard Henderson     * examined the atomicity parameters below.
495cdfac37bSRichard Henderson     */
496cdfac37bSRichard Henderson    if (HAVE_al8 && likely((pi & 7) == 0)) {
497cdfac37bSRichard Henderson        return load_atomic8(pv);
498cdfac37bSRichard Henderson    }
4998dc24ff4SRichard Henderson    if (HAVE_ATOMIC128_RO) {
500cdfac37bSRichard Henderson        return load_atom_extract_al16_or_al8(pv, 8);
501cdfac37bSRichard Henderson    }
502cdfac37bSRichard Henderson
50373fda56fSAnton Johansson    atmax = required_atomicity(cpu, pi, memop);
504cdfac37bSRichard Henderson    if (atmax == MO_64) {
505cdfac37bSRichard Henderson        if (!HAVE_al8 && (pi & 7) == 0) {
50673fda56fSAnton Johansson            load_atomic8_or_exit(cpu, ra, pv);
507cdfac37bSRichard Henderson        }
50873fda56fSAnton Johansson        return load_atom_extract_al16_or_exit(cpu, ra, pv, 8);
509cdfac37bSRichard Henderson    }
510cdfac37bSRichard Henderson    if (HAVE_al8_fast) {
511cdfac37bSRichard Henderson        return load_atom_extract_al8x2(pv);
512cdfac37bSRichard Henderson    }
513cdfac37bSRichard Henderson    switch (atmax) {
514cdfac37bSRichard Henderson    case MO_8:
515cdfac37bSRichard Henderson        return ldq_he_p(pv);
516cdfac37bSRichard Henderson    case MO_16:
517cdfac37bSRichard Henderson        return load_atom_8_by_2(pv);
518cdfac37bSRichard Henderson    case MO_32:
519cdfac37bSRichard Henderson        return load_atom_8_by_4(pv);
520cdfac37bSRichard Henderson    case -MO_32:
521cdfac37bSRichard Henderson        if (HAVE_al8) {
522cdfac37bSRichard Henderson            return load_atom_extract_al8x2(pv);
523cdfac37bSRichard Henderson        }
524*b24bad34SAlex Bennée        trace_load_atom8_fallback(memop, ra);
52573fda56fSAnton Johansson        cpu_loop_exit_atomic(cpu, ra);
526cdfac37bSRichard Henderson    default:
527cdfac37bSRichard Henderson        g_assert_not_reached();
528cdfac37bSRichard Henderson    }
529cdfac37bSRichard Henderson}
5305b36f268SRichard Henderson
5315b36f268SRichard Henderson/**
53235c653c4SRichard Henderson * load_atom_16:
53335c653c4SRichard Henderson * @p: host address
53435c653c4SRichard Henderson * @memop: the full memory op
53535c653c4SRichard Henderson *
53635c653c4SRichard Henderson * Load 16 bytes from @p, honoring the atomicity of @memop.
53735c653c4SRichard Henderson */
53873fda56fSAnton Johanssonstatic Int128 load_atom_16(CPUState *cpu, uintptr_t ra,
53935c653c4SRichard Henderson                           void *pv, MemOp memop)
54035c653c4SRichard Henderson{
54135c653c4SRichard Henderson    uintptr_t pi = (uintptr_t)pv;
54235c653c4SRichard Henderson    int atmax;
54335c653c4SRichard Henderson    Int128 r;
54435c653c4SRichard Henderson    uint64_t a, b;
54535c653c4SRichard Henderson
54635c653c4SRichard Henderson    /*
54735c653c4SRichard Henderson     * If the host does not support 16-byte atomics, wait until we have
54835c653c4SRichard Henderson     * examined the atomicity parameters below.
54935c653c4SRichard Henderson     */
5508dc24ff4SRichard Henderson    if (HAVE_ATOMIC128_RO && likely((pi & 15) == 0)) {
5518dc24ff4SRichard Henderson        return atomic16_read_ro(pv);
55235c653c4SRichard Henderson    }
55335c653c4SRichard Henderson
55473fda56fSAnton Johansson    atmax = required_atomicity(cpu, pi, memop);
55535c653c4SRichard Henderson    switch (atmax) {
55635c653c4SRichard Henderson    case MO_8:
55735c653c4SRichard Henderson        memcpy(&r, pv, 16);
55835c653c4SRichard Henderson        return r;
55935c653c4SRichard Henderson    case MO_16:
56035c653c4SRichard Henderson        a = load_atom_8_by_2(pv);
56135c653c4SRichard Henderson        b = load_atom_8_by_2(pv + 8);
56235c653c4SRichard Henderson        break;
56335c653c4SRichard Henderson    case MO_32:
56435c653c4SRichard Henderson        a = load_atom_8_by_4(pv);
56535c653c4SRichard Henderson        b = load_atom_8_by_4(pv + 8);
56635c653c4SRichard Henderson        break;
56735c653c4SRichard Henderson    case MO_64:
56835c653c4SRichard Henderson        if (!HAVE_al8) {
569*b24bad34SAlex Bennée            trace_load_atom16_fallback(memop, ra);
57073fda56fSAnton Johansson            cpu_loop_exit_atomic(cpu, ra);
57135c653c4SRichard Henderson        }
57235c653c4SRichard Henderson        a = load_atomic8(pv);
57335c653c4SRichard Henderson        b = load_atomic8(pv + 8);
57435c653c4SRichard Henderson        break;
57535c653c4SRichard Henderson    case -MO_64:
57635c653c4SRichard Henderson        if (!HAVE_al8) {
577*b24bad34SAlex Bennée            trace_load_atom16_fallback(memop, ra);
57873fda56fSAnton Johansson            cpu_loop_exit_atomic(cpu, ra);
57935c653c4SRichard Henderson        }
58035c653c4SRichard Henderson        a = load_atom_extract_al8x2(pv);
58135c653c4SRichard Henderson        b = load_atom_extract_al8x2(pv + 8);
58235c653c4SRichard Henderson        break;
58335c653c4SRichard Henderson    case MO_128:
58473fda56fSAnton Johansson        return load_atomic16_or_exit(cpu, ra, pv);
58535c653c4SRichard Henderson    default:
58635c653c4SRichard Henderson        g_assert_not_reached();
58735c653c4SRichard Henderson    }
58835c653c4SRichard Henderson    return int128_make128(HOST_BIG_ENDIAN ? b : a, HOST_BIG_ENDIAN ? a : b);
58935c653c4SRichard Henderson}
59035c653c4SRichard Henderson
59135c653c4SRichard Henderson/**
5925b36f268SRichard Henderson * store_atomic2:
5935b36f268SRichard Henderson * @pv: host address
5945b36f268SRichard Henderson * @val: value to store
5955b36f268SRichard Henderson *
5965b36f268SRichard Henderson * Atomically store 2 aligned bytes to @pv.
5975b36f268SRichard Henderson */
5985b36f268SRichard Hendersonstatic inline void store_atomic2(void *pv, uint16_t val)
5995b36f268SRichard Henderson{
6005b36f268SRichard Henderson    uint16_t *p = __builtin_assume_aligned(pv, 2);
6015b36f268SRichard Henderson    qatomic_set(p, val);
6025b36f268SRichard Henderson}
6035b36f268SRichard Henderson
6045b36f268SRichard Henderson/**
6055b36f268SRichard Henderson * store_atomic4:
6065b36f268SRichard Henderson * @pv: host address
6075b36f268SRichard Henderson * @val: value to store
6085b36f268SRichard Henderson *
6095b36f268SRichard Henderson * Atomically store 4 aligned bytes to @pv.
6105b36f268SRichard Henderson */
6115b36f268SRichard Hendersonstatic inline void store_atomic4(void *pv, uint32_t val)
6125b36f268SRichard Henderson{
6135b36f268SRichard Henderson    uint32_t *p = __builtin_assume_aligned(pv, 4);
6145b36f268SRichard Henderson    qatomic_set(p, val);
6155b36f268SRichard Henderson}
6165b36f268SRichard Henderson
6175b36f268SRichard Henderson/**
6185b36f268SRichard Henderson * store_atomic8:
6195b36f268SRichard Henderson * @pv: host address
6205b36f268SRichard Henderson * @val: value to store
6215b36f268SRichard Henderson *
6225b36f268SRichard Henderson * Atomically store 8 aligned bytes to @pv.
6235b36f268SRichard Henderson */
6245b36f268SRichard Hendersonstatic inline void store_atomic8(void *pv, uint64_t val)
6255b36f268SRichard Henderson{
6265b36f268SRichard Henderson    uint64_t *p = __builtin_assume_aligned(pv, 8);
6275b36f268SRichard Henderson
6285b36f268SRichard Henderson    qemu_build_assert(HAVE_al8);
6295b36f268SRichard Henderson    qatomic_set__nocheck(p, val);
6305b36f268SRichard Henderson}
6315b36f268SRichard Henderson
6325b36f268SRichard Henderson/**
6335b36f268SRichard Henderson * store_atom_4x2
6345b36f268SRichard Henderson */
6355b36f268SRichard Hendersonstatic inline void store_atom_4_by_2(void *pv, uint32_t val)
6365b36f268SRichard Henderson{
6375b36f268SRichard Henderson    store_atomic2(pv, val >> (HOST_BIG_ENDIAN ? 16 : 0));
6385b36f268SRichard Henderson    store_atomic2(pv + 2, val >> (HOST_BIG_ENDIAN ? 0 : 16));
6395b36f268SRichard Henderson}
6405b36f268SRichard Henderson
6415b36f268SRichard Henderson/**
6425b36f268SRichard Henderson * store_atom_8_by_2
6435b36f268SRichard Henderson */
6445b36f268SRichard Hendersonstatic inline void store_atom_8_by_2(void *pv, uint64_t val)
6455b36f268SRichard Henderson{
6465b36f268SRichard Henderson    store_atom_4_by_2(pv, val >> (HOST_BIG_ENDIAN ? 32 : 0));
6475b36f268SRichard Henderson    store_atom_4_by_2(pv + 4, val >> (HOST_BIG_ENDIAN ? 0 : 32));
6485b36f268SRichard Henderson}
6495b36f268SRichard Henderson
6505b36f268SRichard Henderson/**
6515b36f268SRichard Henderson * store_atom_8_by_4
6525b36f268SRichard Henderson */
6535b36f268SRichard Hendersonstatic inline void store_atom_8_by_4(void *pv, uint64_t val)
6545b36f268SRichard Henderson{
6555b36f268SRichard Henderson    store_atomic4(pv, val >> (HOST_BIG_ENDIAN ? 32 : 0));
6565b36f268SRichard Henderson    store_atomic4(pv + 4, val >> (HOST_BIG_ENDIAN ? 0 : 32));
6575b36f268SRichard Henderson}
6585b36f268SRichard Henderson
6595b36f268SRichard Henderson/**
6605b36f268SRichard Henderson * store_atom_insert_al4:
6615b36f268SRichard Henderson * @p: host address
6625b36f268SRichard Henderson * @val: shifted value to store
6635b36f268SRichard Henderson * @msk: mask for value to store
6645b36f268SRichard Henderson *
6655b36f268SRichard Henderson * Atomically store @val to @p, masked by @msk.
6665b36f268SRichard Henderson */
6675b36f268SRichard Hendersonstatic void store_atom_insert_al4(uint32_t *p, uint32_t val, uint32_t msk)
6685b36f268SRichard Henderson{
6695b36f268SRichard Henderson    uint32_t old, new;
6705b36f268SRichard Henderson
6715b36f268SRichard Henderson    p = __builtin_assume_aligned(p, 4);
6725b36f268SRichard Henderson    old = qatomic_read(p);
6735b36f268SRichard Henderson    do {
6745b36f268SRichard Henderson        new = (old & ~msk) | val;
6755b36f268SRichard Henderson    } while (!__atomic_compare_exchange_n(p, &old, new, true,
6765b36f268SRichard Henderson                                          __ATOMIC_RELAXED, __ATOMIC_RELAXED));
6775b36f268SRichard Henderson}
6785b36f268SRichard Henderson
6795b36f268SRichard Henderson/**
6805b36f268SRichard Henderson * store_atom_insert_al8:
6815b36f268SRichard Henderson * @p: host address
6825b36f268SRichard Henderson * @val: shifted value to store
6835b36f268SRichard Henderson * @msk: mask for value to store
6845b36f268SRichard Henderson *
6855b36f268SRichard Henderson * Atomically store @val to @p masked by @msk.
6865b36f268SRichard Henderson */
6875b36f268SRichard Hendersonstatic void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
6885b36f268SRichard Henderson{
6895b36f268SRichard Henderson    uint64_t old, new;
6905b36f268SRichard Henderson
6915b36f268SRichard Henderson    qemu_build_assert(HAVE_al8);
6925b36f268SRichard Henderson    p = __builtin_assume_aligned(p, 8);
6935b36f268SRichard Henderson    old = qatomic_read__nocheck(p);
6945b36f268SRichard Henderson    do {
6955b36f268SRichard Henderson        new = (old & ~msk) | val;
6965b36f268SRichard Henderson    } while (!__atomic_compare_exchange_n(p, &old, new, true,
6975b36f268SRichard Henderson                                          __ATOMIC_RELAXED, __ATOMIC_RELAXED));
6985b36f268SRichard Henderson}
6995b36f268SRichard Henderson
7005b36f268SRichard Henderson/**
7015b36f268SRichard Henderson * store_bytes_leN:
7025b36f268SRichard Henderson * @pv: host address
7035b36f268SRichard Henderson * @size: number of bytes to store
7045b36f268SRichard Henderson * @val_le: data to store
7055b36f268SRichard Henderson *
7065b36f268SRichard Henderson * Store @size bytes at @p.  The bytes to store are extracted in little-endian order
7075b36f268SRichard Henderson * from @val_le; return the bytes of @val_le beyond @size that have not been stored.
7085b36f268SRichard Henderson */
7095b36f268SRichard Hendersonstatic uint64_t store_bytes_leN(void *pv, int size, uint64_t val_le)
7105b36f268SRichard Henderson{
7115b36f268SRichard Henderson    uint8_t *p = pv;
7125b36f268SRichard Henderson    for (int i = 0; i < size; i++, val_le >>= 8) {
7135b36f268SRichard Henderson        p[i] = val_le;
7145b36f268SRichard Henderson    }
7155b36f268SRichard Henderson    return val_le;
7165b36f268SRichard Henderson}
7175b36f268SRichard Henderson
7185b36f268SRichard Henderson/**
7195b36f268SRichard Henderson * store_parts_leN
7205b36f268SRichard Henderson * @pv: host address
7215b36f268SRichard Henderson * @size: number of bytes to store
7225b36f268SRichard Henderson * @val_le: data to store
7235b36f268SRichard Henderson *
7245b36f268SRichard Henderson * As store_bytes_leN, but atomically on each aligned part.
7255b36f268SRichard Henderson */
7265b36f268SRichard HendersonG_GNUC_UNUSED
7275b36f268SRichard Hendersonstatic uint64_t store_parts_leN(void *pv, int size, uint64_t val_le)
7285b36f268SRichard Henderson{
7295b36f268SRichard Henderson    do {
7305b36f268SRichard Henderson        int n;
7315b36f268SRichard Henderson
7325b36f268SRichard Henderson        /* Find minimum of alignment and size */
7335b36f268SRichard Henderson        switch (((uintptr_t)pv | size) & 7) {
7345b36f268SRichard Henderson        case 4:
7355b36f268SRichard Henderson            store_atomic4(pv, le32_to_cpu(val_le));
7365b36f268SRichard Henderson            val_le >>= 32;
7375b36f268SRichard Henderson            n = 4;
7385b36f268SRichard Henderson            break;
7395b36f268SRichard Henderson        case 2:
7405b36f268SRichard Henderson        case 6:
7415b36f268SRichard Henderson            store_atomic2(pv, le16_to_cpu(val_le));
7425b36f268SRichard Henderson            val_le >>= 16;
7435b36f268SRichard Henderson            n = 2;
7445b36f268SRichard Henderson            break;
7455b36f268SRichard Henderson        default:
7465b36f268SRichard Henderson            *(uint8_t *)pv = val_le;
7475b36f268SRichard Henderson            val_le >>= 8;
7485b36f268SRichard Henderson            n = 1;
7495b36f268SRichard Henderson            break;
7505b36f268SRichard Henderson        case 0:
7515b36f268SRichard Henderson            g_assert_not_reached();
7525b36f268SRichard Henderson        }
7535b36f268SRichard Henderson        pv += n;
7545b36f268SRichard Henderson        size -= n;
7555b36f268SRichard Henderson    } while (size != 0);
7565b36f268SRichard Henderson
7575b36f268SRichard Henderson    return val_le;
7585b36f268SRichard Henderson}
7595b36f268SRichard Henderson
7605b36f268SRichard Henderson/**
7615b36f268SRichard Henderson * store_whole_le4
7625b36f268SRichard Henderson * @pv: host address
7635b36f268SRichard Henderson * @size: number of bytes to store
7645b36f268SRichard Henderson * @val_le: data to store
7655b36f268SRichard Henderson *
7665b36f268SRichard Henderson * As store_bytes_leN, but atomically as a whole.
7675b36f268SRichard Henderson * Four aligned bytes are guaranteed to cover the store.
7685b36f268SRichard Henderson */
7695b36f268SRichard Hendersonstatic uint64_t store_whole_le4(void *pv, int size, uint64_t val_le)
7705b36f268SRichard Henderson{
7715b36f268SRichard Henderson    int sz = size * 8;
7725b36f268SRichard Henderson    int o = (uintptr_t)pv & 3;
7735b36f268SRichard Henderson    int sh = o * 8;
7745b36f268SRichard Henderson    uint32_t m = MAKE_64BIT_MASK(0, sz);
7755b36f268SRichard Henderson    uint32_t v;
7765b36f268SRichard Henderson
7775b36f268SRichard Henderson    if (HOST_BIG_ENDIAN) {
7785b36f268SRichard Henderson        v = bswap32(val_le) >> sh;
7795b36f268SRichard Henderson        m = bswap32(m) >> sh;
7805b36f268SRichard Henderson    } else {
7815b36f268SRichard Henderson        v = val_le << sh;
7825b36f268SRichard Henderson        m <<= sh;
7835b36f268SRichard Henderson    }
7845b36f268SRichard Henderson    store_atom_insert_al4(pv - o, v, m);
7855b36f268SRichard Henderson    return val_le >> sz;
7865b36f268SRichard Henderson}
7875b36f268SRichard Henderson
7885b36f268SRichard Henderson/**
7895b36f268SRichard Henderson * store_whole_le8
7905b36f268SRichard Henderson * @pv: host address
7915b36f268SRichard Henderson * @size: number of bytes to store
7925b36f268SRichard Henderson * @val_le: data to store
7935b36f268SRichard Henderson *
7945b36f268SRichard Henderson * As store_bytes_leN, but atomically as a whole.
7955b36f268SRichard Henderson * Eight aligned bytes are guaranteed to cover the store.
7965b36f268SRichard Henderson */
7975b36f268SRichard Hendersonstatic uint64_t store_whole_le8(void *pv, int size, uint64_t val_le)
7985b36f268SRichard Henderson{
7995b36f268SRichard Henderson    int sz = size * 8;
8005b36f268SRichard Henderson    int o = (uintptr_t)pv & 7;
8015b36f268SRichard Henderson    int sh = o * 8;
8025b36f268SRichard Henderson    uint64_t m = MAKE_64BIT_MASK(0, sz);
8035b36f268SRichard Henderson    uint64_t v;
8045b36f268SRichard Henderson
8055b36f268SRichard Henderson    qemu_build_assert(HAVE_al8);
8065b36f268SRichard Henderson    if (HOST_BIG_ENDIAN) {
8075b36f268SRichard Henderson        v = bswap64(val_le) >> sh;
8085b36f268SRichard Henderson        m = bswap64(m) >> sh;
8095b36f268SRichard Henderson    } else {
8105b36f268SRichard Henderson        v = val_le << sh;
8115b36f268SRichard Henderson        m <<= sh;
8125b36f268SRichard Henderson    }
8135b36f268SRichard Henderson    store_atom_insert_al8(pv - o, v, m);
8145b36f268SRichard Henderson    return val_le >> sz;
8155b36f268SRichard Henderson}
8165b36f268SRichard Henderson
8175b36f268SRichard Henderson/**
8185b36f268SRichard Henderson * store_whole_le16
8195b36f268SRichard Henderson * @pv: host address
8205b36f268SRichard Henderson * @size: number of bytes to store
8215b36f268SRichard Henderson * @val_le: data to store
8225b36f268SRichard Henderson *
8235b36f268SRichard Henderson * As store_bytes_leN, but atomically as a whole.
8245b36f268SRichard Henderson * 16 aligned bytes are guaranteed to cover the store.
8255b36f268SRichard Henderson */
8265b36f268SRichard Hendersonstatic uint64_t store_whole_le16(void *pv, int size, Int128 val_le)
8275b36f268SRichard Henderson{
8285b36f268SRichard Henderson    int sz = size * 8;
8295b36f268SRichard Henderson    int o = (uintptr_t)pv & 15;
8305b36f268SRichard Henderson    int sh = o * 8;
8315b36f268SRichard Henderson    Int128 m, v;
8325b36f268SRichard Henderson
8336046f6e9SRichard Henderson    qemu_build_assert(HAVE_CMPXCHG128);
8345b36f268SRichard Henderson
8355b36f268SRichard Henderson    /* Like MAKE_64BIT_MASK(0, sz), but larger. */
8365b36f268SRichard Henderson    if (sz <= 64) {
8375b36f268SRichard Henderson        m = int128_make64(MAKE_64BIT_MASK(0, sz));
8385b36f268SRichard Henderson    } else {
8395b36f268SRichard Henderson        m = int128_make128(-1, MAKE_64BIT_MASK(0, sz - 64));
8405b36f268SRichard Henderson    }
8415b36f268SRichard Henderson
8425b36f268SRichard Henderson    if (HOST_BIG_ENDIAN) {
8435b36f268SRichard Henderson        v = int128_urshift(bswap128(val_le), sh);
8445b36f268SRichard Henderson        m = int128_urshift(bswap128(m), sh);
8455b36f268SRichard Henderson    } else {
8465b36f268SRichard Henderson        v = int128_lshift(val_le, sh);
8475b36f268SRichard Henderson        m = int128_lshift(m, sh);
8485b36f268SRichard Henderson    }
8495b36f268SRichard Henderson    store_atom_insert_al16(pv - o, v, m);
8505b36f268SRichard Henderson
851c0dde5fcSRichard Henderson    if (sz <= 64) {
852c0dde5fcSRichard Henderson        return 0;
853c0dde5fcSRichard Henderson    }
8545b36f268SRichard Henderson    return int128_gethi(val_le) >> (sz - 64);
8555b36f268SRichard Henderson}
8565b36f268SRichard Henderson
8575b36f268SRichard Henderson/**
8585b36f268SRichard Henderson * store_atom_2:
8595b36f268SRichard Henderson * @p: host address
8605b36f268SRichard Henderson * @val: the value to store
8615b36f268SRichard Henderson * @memop: the full memory op
8625b36f268SRichard Henderson *
8635b36f268SRichard Henderson * Store 2 bytes to @p, honoring the atomicity of @memop.
8645b36f268SRichard Henderson */
86573fda56fSAnton Johanssonstatic void store_atom_2(CPUState *cpu, uintptr_t ra,
8665b36f268SRichard Henderson                         void *pv, MemOp memop, uint16_t val)
8675b36f268SRichard Henderson{
8685b36f268SRichard Henderson    uintptr_t pi = (uintptr_t)pv;
8695b36f268SRichard Henderson    int atmax;
8705b36f268SRichard Henderson
8715b36f268SRichard Henderson    if (likely((pi & 1) == 0)) {
8725b36f268SRichard Henderson        store_atomic2(pv, val);
8735b36f268SRichard Henderson        return;
8745b36f268SRichard Henderson    }
8755b36f268SRichard Henderson
87673fda56fSAnton Johansson    atmax = required_atomicity(cpu, pi, memop);
8775b36f268SRichard Henderson    if (atmax == MO_8) {
8785b36f268SRichard Henderson        stw_he_p(pv, val);
8795b36f268SRichard Henderson        return;
8805b36f268SRichard Henderson    }
8815b36f268SRichard Henderson
8825b36f268SRichard Henderson    /*
8835b36f268SRichard Henderson     * The only case remaining is MO_ATOM_WITHIN16.
8845b36f268SRichard Henderson     * Big or little endian, we want the middle two bytes in each test.
8855b36f268SRichard Henderson     */
8865b36f268SRichard Henderson    if ((pi & 3) == 1) {
8875b36f268SRichard Henderson        store_atom_insert_al4(pv - 1, (uint32_t)val << 8, MAKE_64BIT_MASK(8, 16));
8885b36f268SRichard Henderson        return;
8895b36f268SRichard Henderson    } else if ((pi & 7) == 3) {
8905b36f268SRichard Henderson        if (HAVE_al8) {
8915b36f268SRichard Henderson            store_atom_insert_al8(pv - 3, (uint64_t)val << 24, MAKE_64BIT_MASK(24, 16));
8925b36f268SRichard Henderson            return;
8935b36f268SRichard Henderson        }
8945b36f268SRichard Henderson    } else if ((pi & 15) == 7) {
8956046f6e9SRichard Henderson        if (HAVE_CMPXCHG128) {
8965b36f268SRichard Henderson            Int128 v = int128_lshift(int128_make64(val), 56);
8975b36f268SRichard Henderson            Int128 m = int128_lshift(int128_make64(0xffff), 56);
8985b36f268SRichard Henderson            store_atom_insert_al16(pv - 7, v, m);
8995b36f268SRichard Henderson            return;
9005b36f268SRichard Henderson        }
9015b36f268SRichard Henderson    } else {
9025b36f268SRichard Henderson        g_assert_not_reached();
9035b36f268SRichard Henderson    }
9045b36f268SRichard Henderson
905*b24bad34SAlex Bennée    trace_store_atom2_fallback(memop, ra);
90673fda56fSAnton Johansson    cpu_loop_exit_atomic(cpu, ra);
9075b36f268SRichard Henderson}
9085b36f268SRichard Henderson
9095b36f268SRichard Henderson/**
9105b36f268SRichard Henderson * store_atom_4:
9115b36f268SRichard Henderson * @p: host address
9125b36f268SRichard Henderson * @val: the value to store
9135b36f268SRichard Henderson * @memop: the full memory op
9145b36f268SRichard Henderson *
9155b36f268SRichard Henderson * Store 4 bytes to @p, honoring the atomicity of @memop.
9165b36f268SRichard Henderson */
91773fda56fSAnton Johanssonstatic void store_atom_4(CPUState *cpu, uintptr_t ra,
9185b36f268SRichard Henderson                         void *pv, MemOp memop, uint32_t val)
9195b36f268SRichard Henderson{
9205b36f268SRichard Henderson    uintptr_t pi = (uintptr_t)pv;
9215b36f268SRichard Henderson    int atmax;
9225b36f268SRichard Henderson
9235b36f268SRichard Henderson    if (likely((pi & 3) == 0)) {
9245b36f268SRichard Henderson        store_atomic4(pv, val);
9255b36f268SRichard Henderson        return;
9265b36f268SRichard Henderson    }
9275b36f268SRichard Henderson
92873fda56fSAnton Johansson    atmax = required_atomicity(cpu, pi, memop);
9295b36f268SRichard Henderson    switch (atmax) {
9305b36f268SRichard Henderson    case MO_8:
9315b36f268SRichard Henderson        stl_he_p(pv, val);
9325b36f268SRichard Henderson        return;
9335b36f268SRichard Henderson    case MO_16:
9345b36f268SRichard Henderson        store_atom_4_by_2(pv, val);
9355b36f268SRichard Henderson        return;
9365b36f268SRichard Henderson    case -MO_16:
9375b36f268SRichard Henderson        {
9385b36f268SRichard Henderson            uint32_t val_le = cpu_to_le32(val);
9395b36f268SRichard Henderson            int s2 = pi & 3;
9405b36f268SRichard Henderson            int s1 = 4 - s2;
9415b36f268SRichard Henderson
9425b36f268SRichard Henderson            switch (s2) {
9435b36f268SRichard Henderson            case 1:
9445b36f268SRichard Henderson                val_le = store_whole_le4(pv, s1, val_le);
9455b36f268SRichard Henderson                *(uint8_t *)(pv + 3) = val_le;
9465b36f268SRichard Henderson                break;
9475b36f268SRichard Henderson            case 3:
9485b36f268SRichard Henderson                *(uint8_t *)pv = val_le;
9495b36f268SRichard Henderson                store_whole_le4(pv + 1, s2, val_le >> 8);
9505b36f268SRichard Henderson                break;
9515b36f268SRichard Henderson            case 0: /* aligned */
9525b36f268SRichard Henderson            case 2: /* atmax MO_16 */
9535b36f268SRichard Henderson            default:
9545b36f268SRichard Henderson                g_assert_not_reached();
9555b36f268SRichard Henderson            }
9565b36f268SRichard Henderson        }
9575b36f268SRichard Henderson        return;
9585b36f268SRichard Henderson    case MO_32:
9595b36f268SRichard Henderson        if ((pi & 7) < 4) {
9605b36f268SRichard Henderson            if (HAVE_al8) {
9615b36f268SRichard Henderson                store_whole_le8(pv, 4, cpu_to_le32(val));
9625b36f268SRichard Henderson                return;
9635b36f268SRichard Henderson            }
9645b36f268SRichard Henderson        } else {
9656046f6e9SRichard Henderson            if (HAVE_CMPXCHG128) {
9665b36f268SRichard Henderson                store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val)));
9675b36f268SRichard Henderson                return;
9685b36f268SRichard Henderson            }
9695b36f268SRichard Henderson        }
970*b24bad34SAlex Bennée        trace_store_atom4_fallback(memop, ra);
97173fda56fSAnton Johansson        cpu_loop_exit_atomic(cpu, ra);
9725b36f268SRichard Henderson    default:
9735b36f268SRichard Henderson        g_assert_not_reached();
9745b36f268SRichard Henderson    }
9755b36f268SRichard Henderson}
9765b36f268SRichard Henderson
9775b36f268SRichard Henderson/**
9785b36f268SRichard Henderson * store_atom_8:
9795b36f268SRichard Henderson * @p: host address
9805b36f268SRichard Henderson * @val: the value to store
9815b36f268SRichard Henderson * @memop: the full memory op
9825b36f268SRichard Henderson *
9835b36f268SRichard Henderson * Store 8 bytes to @p, honoring the atomicity of @memop.
9845b36f268SRichard Henderson */
98573fda56fSAnton Johanssonstatic void store_atom_8(CPUState *cpu, uintptr_t ra,
9865b36f268SRichard Henderson                         void *pv, MemOp memop, uint64_t val)
9875b36f268SRichard Henderson{
9885b36f268SRichard Henderson    uintptr_t pi = (uintptr_t)pv;
9895b36f268SRichard Henderson    int atmax;
9905b36f268SRichard Henderson
9915b36f268SRichard Henderson    if (HAVE_al8 && likely((pi & 7) == 0)) {
9925b36f268SRichard Henderson        store_atomic8(pv, val);
9935b36f268SRichard Henderson        return;
9945b36f268SRichard Henderson    }
9955b36f268SRichard Henderson
99673fda56fSAnton Johansson    atmax = required_atomicity(cpu, pi, memop);
9975b36f268SRichard Henderson    switch (atmax) {
9985b36f268SRichard Henderson    case MO_8:
9995b36f268SRichard Henderson        stq_he_p(pv, val);
10005b36f268SRichard Henderson        return;
10015b36f268SRichard Henderson    case MO_16:
10025b36f268SRichard Henderson        store_atom_8_by_2(pv, val);
10035b36f268SRichard Henderson        return;
10045b36f268SRichard Henderson    case MO_32:
10055b36f268SRichard Henderson        store_atom_8_by_4(pv, val);
10065b36f268SRichard Henderson        return;
10075b36f268SRichard Henderson    case -MO_32:
10085b36f268SRichard Henderson        if (HAVE_al8) {
10095b36f268SRichard Henderson            uint64_t val_le = cpu_to_le64(val);
10105b36f268SRichard Henderson            int s2 = pi & 7;
10115b36f268SRichard Henderson            int s1 = 8 - s2;
10125b36f268SRichard Henderson
10135b36f268SRichard Henderson            switch (s2) {
10145b36f268SRichard Henderson            case 1 ... 3:
10155b36f268SRichard Henderson                val_le = store_whole_le8(pv, s1, val_le);
10165b36f268SRichard Henderson                store_bytes_leN(pv + s1, s2, val_le);
10175b36f268SRichard Henderson                break;
10185b36f268SRichard Henderson            case 5 ... 7:
10195b36f268SRichard Henderson                val_le = store_bytes_leN(pv, s1, val_le);
10205b36f268SRichard Henderson                store_whole_le8(pv + s1, s2, val_le);
10215b36f268SRichard Henderson                break;
10225b36f268SRichard Henderson            case 0: /* aligned */
10235b36f268SRichard Henderson            case 4: /* atmax MO_32 */
10245b36f268SRichard Henderson            default:
10255b36f268SRichard Henderson                g_assert_not_reached();
10265b36f268SRichard Henderson            }
10275b36f268SRichard Henderson            return;
10285b36f268SRichard Henderson        }
10295b36f268SRichard Henderson        break;
10305b36f268SRichard Henderson    case MO_64:
10316046f6e9SRichard Henderson        if (HAVE_CMPXCHG128) {
10325b36f268SRichard Henderson            store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val)));
10335b36f268SRichard Henderson            return;
10345b36f268SRichard Henderson        }
10355b36f268SRichard Henderson        break;
10365b36f268SRichard Henderson    default:
10375b36f268SRichard Henderson        g_assert_not_reached();
10385b36f268SRichard Henderson    }
1039*b24bad34SAlex Bennée    trace_store_atom8_fallback(memop, ra);
104073fda56fSAnton Johansson    cpu_loop_exit_atomic(cpu, ra);
10415b36f268SRichard Henderson}
104235c653c4SRichard Henderson
104335c653c4SRichard Henderson/**
104435c653c4SRichard Henderson * store_atom_16:
104535c653c4SRichard Henderson * @p: host address
104635c653c4SRichard Henderson * @val: the value to store
104735c653c4SRichard Henderson * @memop: the full memory op
104835c653c4SRichard Henderson *
104935c653c4SRichard Henderson * Store 16 bytes to @p, honoring the atomicity of @memop.
105035c653c4SRichard Henderson */
105173fda56fSAnton Johanssonstatic void store_atom_16(CPUState *cpu, uintptr_t ra,
105235c653c4SRichard Henderson                          void *pv, MemOp memop, Int128 val)
105335c653c4SRichard Henderson{
105435c653c4SRichard Henderson    uintptr_t pi = (uintptr_t)pv;
105535c653c4SRichard Henderson    uint64_t a, b;
105635c653c4SRichard Henderson    int atmax;
105735c653c4SRichard Henderson
10588dc24ff4SRichard Henderson    if (HAVE_ATOMIC128_RW && likely((pi & 15) == 0)) {
10598dc24ff4SRichard Henderson        atomic16_set(pv, val);
106035c653c4SRichard Henderson        return;
106135c653c4SRichard Henderson    }
106235c653c4SRichard Henderson
106373fda56fSAnton Johansson    atmax = required_atomicity(cpu, pi, memop);
106435c653c4SRichard Henderson
106535c653c4SRichard Henderson    a = HOST_BIG_ENDIAN ? int128_gethi(val) : int128_getlo(val);
106635c653c4SRichard Henderson    b = HOST_BIG_ENDIAN ? int128_getlo(val) : int128_gethi(val);
106735c653c4SRichard Henderson    switch (atmax) {
106835c653c4SRichard Henderson    case MO_8:
106935c653c4SRichard Henderson        memcpy(pv, &val, 16);
107035c653c4SRichard Henderson        return;
107135c653c4SRichard Henderson    case MO_16:
107235c653c4SRichard Henderson        store_atom_8_by_2(pv, a);
107335c653c4SRichard Henderson        store_atom_8_by_2(pv + 8, b);
107435c653c4SRichard Henderson        return;
107535c653c4SRichard Henderson    case MO_32:
107635c653c4SRichard Henderson        store_atom_8_by_4(pv, a);
107735c653c4SRichard Henderson        store_atom_8_by_4(pv + 8, b);
107835c653c4SRichard Henderson        return;
107935c653c4SRichard Henderson    case MO_64:
108035c653c4SRichard Henderson        if (HAVE_al8) {
108135c653c4SRichard Henderson            store_atomic8(pv, a);
108235c653c4SRichard Henderson            store_atomic8(pv + 8, b);
108335c653c4SRichard Henderson            return;
108435c653c4SRichard Henderson        }
108535c653c4SRichard Henderson        break;
108635c653c4SRichard Henderson    case -MO_64:
10876046f6e9SRichard Henderson        if (HAVE_CMPXCHG128) {
108835c653c4SRichard Henderson            uint64_t val_le;
108935c653c4SRichard Henderson            int s2 = pi & 15;
109035c653c4SRichard Henderson            int s1 = 16 - s2;
109135c653c4SRichard Henderson
109235c653c4SRichard Henderson            if (HOST_BIG_ENDIAN) {
109335c653c4SRichard Henderson                val = bswap128(val);
109435c653c4SRichard Henderson            }
109535c653c4SRichard Henderson            switch (s2) {
109635c653c4SRichard Henderson            case 1 ... 7:
109735c653c4SRichard Henderson                val_le = store_whole_le16(pv, s1, val);
109835c653c4SRichard Henderson                store_bytes_leN(pv + s1, s2, val_le);
109935c653c4SRichard Henderson                break;
110035c653c4SRichard Henderson            case 9 ... 15:
110135c653c4SRichard Henderson                store_bytes_leN(pv, s1, int128_getlo(val));
110235c653c4SRichard Henderson                val = int128_urshift(val, s1 * 8);
110335c653c4SRichard Henderson                store_whole_le16(pv + s1, s2, val);
110435c653c4SRichard Henderson                break;
110535c653c4SRichard Henderson            case 0: /* aligned */
110635c653c4SRichard Henderson            case 8: /* atmax MO_64 */
110735c653c4SRichard Henderson            default:
110835c653c4SRichard Henderson                g_assert_not_reached();
110935c653c4SRichard Henderson            }
111035c653c4SRichard Henderson            return;
111135c653c4SRichard Henderson        }
111235c653c4SRichard Henderson        break;
111335c653c4SRichard Henderson    case MO_128:
111435c653c4SRichard Henderson        break;
111535c653c4SRichard Henderson    default:
111635c653c4SRichard Henderson        g_assert_not_reached();
111735c653c4SRichard Henderson    }
1118*b24bad34SAlex Bennée    trace_store_atom16_fallback(memop, ra);
111973fda56fSAnton Johansson    cpu_loop_exit_atomic(cpu, ra);
112035c653c4SRichard Henderson}
1121