1aead9dc9SPaolo Bonzini /*
2aead9dc9SPaolo Bonzini * Graph lock: rwlock to protect block layer graph manipulations (add/remove
3aead9dc9SPaolo Bonzini * edges and nodes)
4aead9dc9SPaolo Bonzini *
5aead9dc9SPaolo Bonzini * Copyright (c) 2022 Red Hat
6aead9dc9SPaolo Bonzini *
7aead9dc9SPaolo Bonzini * This library is free software; you can redistribute it and/or
8aead9dc9SPaolo Bonzini * modify it under the terms of the GNU Lesser General Public
9aead9dc9SPaolo Bonzini * License as published by the Free Software Foundation; either
10aead9dc9SPaolo Bonzini * version 2.1 of the License, or (at your option) any later version.
11aead9dc9SPaolo Bonzini *
12aead9dc9SPaolo Bonzini * This library is distributed in the hope that it will be useful,
13aead9dc9SPaolo Bonzini * but WITHOUT ANY WARRANTY; without even the implied warranty of
14aead9dc9SPaolo Bonzini * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15aead9dc9SPaolo Bonzini * Lesser General Public License for more details.
16aead9dc9SPaolo Bonzini *
17aead9dc9SPaolo Bonzini * You should have received a copy of the GNU Lesser General Public
18aead9dc9SPaolo Bonzini * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19aead9dc9SPaolo Bonzini */
20aead9dc9SPaolo Bonzini #ifndef GRAPH_LOCK_H
21aead9dc9SPaolo Bonzini #define GRAPH_LOCK_H
22aead9dc9SPaolo Bonzini
234002ffdcSKevin Wolf #include "qemu/clang-tsa.h"
24aead9dc9SPaolo Bonzini
25aead9dc9SPaolo Bonzini /**
26aead9dc9SPaolo Bonzini * Graph Lock API
27aead9dc9SPaolo Bonzini * This API provides a rwlock used to protect block layer
28aead9dc9SPaolo Bonzini * graph modifications like edge (BdrvChild) and node (BlockDriverState)
29aead9dc9SPaolo Bonzini * addition and removal.
30aead9dc9SPaolo Bonzini * Currently we have 1 writer only, the Main loop, and many
31aead9dc9SPaolo Bonzini * readers, mostly coroutines running in other AioContext thus other threads.
32aead9dc9SPaolo Bonzini *
33aead9dc9SPaolo Bonzini * We distinguish between writer (main loop, under BQL) that modifies the
34aead9dc9SPaolo Bonzini * graph, and readers (all other coroutines running in various AioContext),
35aead9dc9SPaolo Bonzini * that go through the graph edges, reading
36aead9dc9SPaolo Bonzini * BlockDriverState ->parents and->children.
37aead9dc9SPaolo Bonzini *
38aead9dc9SPaolo Bonzini * The writer (main loop) has an "exclusive" access, so it first waits for
39aead9dc9SPaolo Bonzini * current read to finish, and then prevents incoming ones from
40aead9dc9SPaolo Bonzini * entering while it has the exclusive access.
41aead9dc9SPaolo Bonzini *
42aead9dc9SPaolo Bonzini * The readers (coroutines in multiple AioContext) are free to
43aead9dc9SPaolo Bonzini * access the graph as long the writer is not modifying the graph.
44aead9dc9SPaolo Bonzini * In case it is, they go in a CoQueue and sleep until the writer
45aead9dc9SPaolo Bonzini * is done.
46aead9dc9SPaolo Bonzini *
47aead9dc9SPaolo Bonzini * If a coroutine changes AioContext, the counter in the original and new
48aead9dc9SPaolo Bonzini * AioContext are left intact, since the writer does not care where is the
49aead9dc9SPaolo Bonzini * reader, but only if there is one.
50aead9dc9SPaolo Bonzini * As a result, some AioContexts might have a negative reader count, to
51aead9dc9SPaolo Bonzini * balance the positive count of the AioContext that took the lock.
52aead9dc9SPaolo Bonzini * This also means that when an AioContext is deleted it may have a nonzero
53aead9dc9SPaolo Bonzini * reader count. In that case we transfer the count to a global shared counter
54aead9dc9SPaolo Bonzini * so that the writer is always aware of all readers.
55aead9dc9SPaolo Bonzini */
56aead9dc9SPaolo Bonzini typedef struct BdrvGraphRWlock BdrvGraphRWlock;
57aead9dc9SPaolo Bonzini
584002ffdcSKevin Wolf /* Dummy lock object to use for Thread Safety Analysis (TSA) */
594002ffdcSKevin Wolf typedef struct TSA_CAPABILITY("mutex") BdrvGraphLock {
604002ffdcSKevin Wolf } BdrvGraphLock;
614002ffdcSKevin Wolf
624002ffdcSKevin Wolf extern BdrvGraphLock graph_lock;
634002ffdcSKevin Wolf
644002ffdcSKevin Wolf /*
654002ffdcSKevin Wolf * clang doesn't check consistency in locking annotations between forward
664002ffdcSKevin Wolf * declarations and the function definition. Having the annotation on the
674002ffdcSKevin Wolf * definition, but not the declaration in a header file, may give the reader
684002ffdcSKevin Wolf * a false sense of security because the condition actually remains unchecked
694002ffdcSKevin Wolf * for callers in other source files.
704002ffdcSKevin Wolf *
714002ffdcSKevin Wolf * Therefore, as a convention, for public functions, GRAPH_RDLOCK and
724002ffdcSKevin Wolf * GRAPH_WRLOCK annotations should be present only in the header file.
734002ffdcSKevin Wolf */
744002ffdcSKevin Wolf #define GRAPH_WRLOCK TSA_REQUIRES(graph_lock)
754002ffdcSKevin Wolf #define GRAPH_RDLOCK TSA_REQUIRES_SHARED(graph_lock)
76d51c349bSKevin Wolf #define GRAPH_UNLOCKED TSA_EXCLUDES(graph_lock)
774002ffdcSKevin Wolf
784002ffdcSKevin Wolf /*
794002ffdcSKevin Wolf * TSA annotations are not part of function types, so checks are defeated when
804002ffdcSKevin Wolf * using a function pointer. As a workaround, annotate function pointers with
814002ffdcSKevin Wolf * this macro that will require that the lock is at least taken while reading
824002ffdcSKevin Wolf * the pointer. In most cases this is equivalent to actually protecting the
834002ffdcSKevin Wolf * function call.
844002ffdcSKevin Wolf */
854002ffdcSKevin Wolf #define GRAPH_RDLOCK_PTR TSA_GUARDED_BY(graph_lock)
864002ffdcSKevin Wolf #define GRAPH_WRLOCK_PTR TSA_GUARDED_BY(graph_lock)
87d51c349bSKevin Wolf #define GRAPH_UNLOCKED_PTR
884002ffdcSKevin Wolf
89aead9dc9SPaolo Bonzini /*
90aead9dc9SPaolo Bonzini * register_aiocontext:
91aead9dc9SPaolo Bonzini * Add AioContext @ctx to the list of AioContext.
92aead9dc9SPaolo Bonzini * This list is used to obtain the total number of readers
93aead9dc9SPaolo Bonzini * currently running the graph.
94aead9dc9SPaolo Bonzini */
95aead9dc9SPaolo Bonzini void register_aiocontext(AioContext *ctx);
96aead9dc9SPaolo Bonzini
97aead9dc9SPaolo Bonzini /*
98aead9dc9SPaolo Bonzini * unregister_aiocontext:
99aead9dc9SPaolo Bonzini * Removes AioContext @ctx to the list of AioContext.
100aead9dc9SPaolo Bonzini */
101aead9dc9SPaolo Bonzini void unregister_aiocontext(AioContext *ctx);
102aead9dc9SPaolo Bonzini
103aead9dc9SPaolo Bonzini /*
104aead9dc9SPaolo Bonzini * bdrv_graph_wrlock:
105aead9dc9SPaolo Bonzini * Start an exclusive write operation to modify the graph. This means we are
106aead9dc9SPaolo Bonzini * adding or removing an edge or a node in the block layer graph. Nobody else
107aead9dc9SPaolo Bonzini * is allowed to access the graph.
108aead9dc9SPaolo Bonzini *
109aead9dc9SPaolo Bonzini * Must only be called from outside bdrv_graph_co_rdlock.
110aead9dc9SPaolo Bonzini *
111aead9dc9SPaolo Bonzini * The wrlock can only be taken from the main loop, with BQL held, as only the
112aead9dc9SPaolo Bonzini * main loop is allowed to modify the graph.
113aead9dc9SPaolo Bonzini */
114e6e964b8SKevin Wolf void no_coroutine_fn TSA_ACQUIRE(graph_lock) TSA_NO_TSA
1156bc30f19SStefan Hajnoczi bdrv_graph_wrlock(void);
116aead9dc9SPaolo Bonzini
117aead9dc9SPaolo Bonzini /*
118aead9dc9SPaolo Bonzini * bdrv_graph_wrunlock:
119aead9dc9SPaolo Bonzini * Write finished, reset global has_writer to 0 and restart
120aead9dc9SPaolo Bonzini * all readers that are waiting.
121aead9dc9SPaolo Bonzini */
1226bc0bcc8SKevin Wolf void no_coroutine_fn TSA_RELEASE(graph_lock) TSA_NO_TSA
1236bc30f19SStefan Hajnoczi bdrv_graph_wrunlock(void);
124aead9dc9SPaolo Bonzini
125aead9dc9SPaolo Bonzini /*
126aead9dc9SPaolo Bonzini * bdrv_graph_co_rdlock:
127aead9dc9SPaolo Bonzini * Read the bs graph. This usually means traversing all nodes in
128aead9dc9SPaolo Bonzini * the graph, therefore it can't happen while another thread is
129aead9dc9SPaolo Bonzini * modifying it.
130aead9dc9SPaolo Bonzini * Increases the reader counter of the current aiocontext,
131aead9dc9SPaolo Bonzini * and if has_writer is set, it means that the writer is modifying
132aead9dc9SPaolo Bonzini * the graph, therefore wait in a coroutine queue.
133aead9dc9SPaolo Bonzini * The writer will then wake this coroutine once it is done.
134aead9dc9SPaolo Bonzini *
135aead9dc9SPaolo Bonzini * This lock should be taken from Iothreads (IO_CODE() class of functions)
136aead9dc9SPaolo Bonzini * because it signals the writer that there are some
137aead9dc9SPaolo Bonzini * readers currently running, or waits until the current
138aead9dc9SPaolo Bonzini * write is finished before continuing.
139aead9dc9SPaolo Bonzini * Calling this function from the Main Loop with BQL held
140aead9dc9SPaolo Bonzini * is not necessary, since the Main Loop itself is the only
141aead9dc9SPaolo Bonzini * writer, thus won't be able to read and write at the same time.
142aead9dc9SPaolo Bonzini * The only exception to that is when we can't take the lock in the
143aead9dc9SPaolo Bonzini * function/coroutine itself, and need to delegate the caller (usually main
144aead9dc9SPaolo Bonzini * loop) to take it and wait that the coroutine ends, so that
145aead9dc9SPaolo Bonzini * we always signal that a reader is running.
146aead9dc9SPaolo Bonzini */
1474002ffdcSKevin Wolf void coroutine_fn TSA_ACQUIRE_SHARED(graph_lock) TSA_NO_TSA
1484002ffdcSKevin Wolf bdrv_graph_co_rdlock(void);
149aead9dc9SPaolo Bonzini
150aead9dc9SPaolo Bonzini /*
151aead9dc9SPaolo Bonzini * bdrv_graph_rdunlock:
152aead9dc9SPaolo Bonzini * Read terminated, decrease the count of readers in the current aiocontext.
153aead9dc9SPaolo Bonzini * If the writer is waiting for reads to finish (has_writer == 1), signal
154aead9dc9SPaolo Bonzini * the writer that we are done via aio_wait_kick() to let it continue.
155aead9dc9SPaolo Bonzini */
1564002ffdcSKevin Wolf void coroutine_fn TSA_RELEASE_SHARED(graph_lock) TSA_NO_TSA
1574002ffdcSKevin Wolf bdrv_graph_co_rdunlock(void);
158aead9dc9SPaolo Bonzini
159aead9dc9SPaolo Bonzini /*
160aead9dc9SPaolo Bonzini * bdrv_graph_rd{un}lock_main_loop:
161aead9dc9SPaolo Bonzini * Just a placeholder to mark where the graph rdlock should be taken
162aead9dc9SPaolo Bonzini * in the main loop. It is just asserting that we are not
163aead9dc9SPaolo Bonzini * in a coroutine and in GLOBAL_STATE_CODE.
164aead9dc9SPaolo Bonzini */
1654002ffdcSKevin Wolf void TSA_ACQUIRE_SHARED(graph_lock) TSA_NO_TSA
1664002ffdcSKevin Wolf bdrv_graph_rdlock_main_loop(void);
1674002ffdcSKevin Wolf
1684002ffdcSKevin Wolf void TSA_RELEASE_SHARED(graph_lock) TSA_NO_TSA
1694002ffdcSKevin Wolf bdrv_graph_rdunlock_main_loop(void);
170aead9dc9SPaolo Bonzini
1713f35f82eSEmanuele Giuseppe Esposito /*
1723f35f82eSEmanuele Giuseppe Esposito * assert_bdrv_graph_readable:
1733f35f82eSEmanuele Giuseppe Esposito * Make sure that the reader is either the main loop,
1743f35f82eSEmanuele Giuseppe Esposito * or there is at least a reader helding the rdlock.
1753f35f82eSEmanuele Giuseppe Esposito * In this way an incoming writer is aware of the read and waits.
1763f35f82eSEmanuele Giuseppe Esposito */
177303de47bSKevin Wolf void GRAPH_RDLOCK assert_bdrv_graph_readable(void);
1783f35f82eSEmanuele Giuseppe Esposito
1793f35f82eSEmanuele Giuseppe Esposito /*
1803f35f82eSEmanuele Giuseppe Esposito * assert_bdrv_graph_writable:
1813f35f82eSEmanuele Giuseppe Esposito * Make sure that the writer is the main loop and has set @has_writer,
1823f35f82eSEmanuele Giuseppe Esposito * so that incoming readers will pause.
1833f35f82eSEmanuele Giuseppe Esposito */
184303de47bSKevin Wolf void GRAPH_WRLOCK assert_bdrv_graph_writable(void);
1853f35f82eSEmanuele Giuseppe Esposito
1864002ffdcSKevin Wolf /*
1874002ffdcSKevin Wolf * Calling this function tells TSA that we know that the lock is effectively
1884002ffdcSKevin Wolf * taken even though we cannot prove it (yet) with GRAPH_RDLOCK. This can be
1894002ffdcSKevin Wolf * useful in intermediate stages of a conversion to using the GRAPH_RDLOCK
1904002ffdcSKevin Wolf * macro.
1914002ffdcSKevin Wolf */
TSA_ASSERT_SHARED(graph_lock)1924002ffdcSKevin Wolf static inline void TSA_ASSERT_SHARED(graph_lock) TSA_NO_TSA
1934002ffdcSKevin Wolf assume_graph_lock(void)
1944002ffdcSKevin Wolf {
1954002ffdcSKevin Wolf }
1964002ffdcSKevin Wolf
1978aa77000SEmanuele Giuseppe Esposito typedef struct GraphLockable { } GraphLockable;
1988aa77000SEmanuele Giuseppe Esposito
1998aa77000SEmanuele Giuseppe Esposito /*
2008aa77000SEmanuele Giuseppe Esposito * In C, compound literals have the lifetime of an automatic variable.
2018aa77000SEmanuele Giuseppe Esposito * In C++ it would be different, but then C++ wouldn't need QemuLockable
2028aa77000SEmanuele Giuseppe Esposito * either...
2038aa77000SEmanuele Giuseppe Esposito */
2048aa77000SEmanuele Giuseppe Esposito #define GML_OBJ_() (&(GraphLockable) { })
2058aa77000SEmanuele Giuseppe Esposito
2064002ffdcSKevin Wolf /*
2074ee1f854SKevin Wolf * This is not marked as TSA_ACQUIRE_SHARED() because TSA doesn't understand the
2084002ffdcSKevin Wolf * cleanup attribute and would therefore complain that the graph is never
2094ee1f854SKevin Wolf * unlocked. TSA_ASSERT_SHARED() makes sure that the following calls know that
2104ee1f854SKevin Wolf * we hold the lock while unlocking is left unchecked.
2114002ffdcSKevin Wolf */
TSA_ACQUIRE_SHARED(graph_lock)212*7e171116SKevin Wolf static inline GraphLockable * TSA_ACQUIRE_SHARED(graph_lock) coroutine_fn
2134002ffdcSKevin Wolf graph_lockable_auto_lock(GraphLockable *x)
2148aa77000SEmanuele Giuseppe Esposito {
2158aa77000SEmanuele Giuseppe Esposito bdrv_graph_co_rdlock();
2168aa77000SEmanuele Giuseppe Esposito return x;
2178aa77000SEmanuele Giuseppe Esposito }
2188aa77000SEmanuele Giuseppe Esposito
TSA_RELEASE_SHARED(graph_lock)219*7e171116SKevin Wolf static inline void TSA_RELEASE_SHARED(graph_lock) coroutine_fn
220*7e171116SKevin Wolf graph_lockable_auto_unlock(GraphLockable **x)
2218aa77000SEmanuele Giuseppe Esposito {
2228aa77000SEmanuele Giuseppe Esposito bdrv_graph_co_rdunlock();
2238aa77000SEmanuele Giuseppe Esposito }
2248aa77000SEmanuele Giuseppe Esposito
225*7e171116SKevin Wolf #define GRAPH_AUTO_UNLOCK __attribute__((cleanup(graph_lockable_auto_unlock)))
2268aa77000SEmanuele Giuseppe Esposito
227*7e171116SKevin Wolf /*
228*7e171116SKevin Wolf * @var is only used to break the loop after the first iteration.
229*7e171116SKevin Wolf * @unlock_var can't be unlocked and then set to NULL because TSA wants the lock
230*7e171116SKevin Wolf * to be held at the start of every iteration of the loop.
231*7e171116SKevin Wolf */
2328aa77000SEmanuele Giuseppe Esposito #define WITH_GRAPH_RDLOCK_GUARD_(var) \
233*7e171116SKevin Wolf for (GraphLockable *unlock_var GRAPH_AUTO_UNLOCK = \
234*7e171116SKevin Wolf graph_lockable_auto_lock(GML_OBJ_()), \
235*7e171116SKevin Wolf *var = unlock_var; \
2368aa77000SEmanuele Giuseppe Esposito var; \
237*7e171116SKevin Wolf var = NULL)
2388aa77000SEmanuele Giuseppe Esposito
2398aa77000SEmanuele Giuseppe Esposito #define WITH_GRAPH_RDLOCK_GUARD() \
2408aa77000SEmanuele Giuseppe Esposito WITH_GRAPH_RDLOCK_GUARD_(glue(graph_lockable_auto, __COUNTER__))
2418aa77000SEmanuele Giuseppe Esposito
2428aa77000SEmanuele Giuseppe Esposito #define GRAPH_RDLOCK_GUARD(x) \
243*7e171116SKevin Wolf GraphLockable * GRAPH_AUTO_UNLOCK \
2448aa77000SEmanuele Giuseppe Esposito glue(graph_lockable_auto, __COUNTER__) G_GNUC_UNUSED = \
2458aa77000SEmanuele Giuseppe Esposito graph_lockable_auto_lock(GML_OBJ_())
2468aa77000SEmanuele Giuseppe Esposito
2478aa77000SEmanuele Giuseppe Esposito
2488aa77000SEmanuele Giuseppe Esposito typedef struct GraphLockableMainloop { } GraphLockableMainloop;
2498aa77000SEmanuele Giuseppe Esposito
2508aa77000SEmanuele Giuseppe Esposito /*
2518aa77000SEmanuele Giuseppe Esposito * In C, compound literals have the lifetime of an automatic variable.
2528aa77000SEmanuele Giuseppe Esposito * In C++ it would be different, but then C++ wouldn't need QemuLockable
2538aa77000SEmanuele Giuseppe Esposito * either...
2548aa77000SEmanuele Giuseppe Esposito */
2558aa77000SEmanuele Giuseppe Esposito #define GMLML_OBJ_() (&(GraphLockableMainloop) { })
2568aa77000SEmanuele Giuseppe Esposito
2574002ffdcSKevin Wolf /*
2584ee1f854SKevin Wolf * This is not marked as TSA_ACQUIRE_SHARED() because TSA doesn't understand the
2594002ffdcSKevin Wolf * cleanup attribute and would therefore complain that the graph is never
2604ee1f854SKevin Wolf * unlocked. TSA_ASSERT_SHARED() makes sure that the following calls know that
2614ee1f854SKevin Wolf * we hold the lock while unlocking is left unchecked.
2624002ffdcSKevin Wolf */
TSA_ASSERT_SHARED(graph_lock)2634ee1f854SKevin Wolf static inline GraphLockableMainloop * TSA_ASSERT_SHARED(graph_lock) TSA_NO_TSA
2648aa77000SEmanuele Giuseppe Esposito graph_lockable_auto_lock_mainloop(GraphLockableMainloop *x)
2658aa77000SEmanuele Giuseppe Esposito {
2668aa77000SEmanuele Giuseppe Esposito bdrv_graph_rdlock_main_loop();
2678aa77000SEmanuele Giuseppe Esposito return x;
2688aa77000SEmanuele Giuseppe Esposito }
2698aa77000SEmanuele Giuseppe Esposito
2704002ffdcSKevin Wolf static inline void TSA_NO_TSA
graph_lockable_auto_unlock_mainloop(GraphLockableMainloop * x)2718aa77000SEmanuele Giuseppe Esposito graph_lockable_auto_unlock_mainloop(GraphLockableMainloop *x)
2728aa77000SEmanuele Giuseppe Esposito {
2738aa77000SEmanuele Giuseppe Esposito bdrv_graph_rdunlock_main_loop();
2748aa77000SEmanuele Giuseppe Esposito }
2758aa77000SEmanuele Giuseppe Esposito
2768aa77000SEmanuele Giuseppe Esposito G_DEFINE_AUTOPTR_CLEANUP_FUNC(GraphLockableMainloop,
2778aa77000SEmanuele Giuseppe Esposito graph_lockable_auto_unlock_mainloop)
2788aa77000SEmanuele Giuseppe Esposito
2798aa77000SEmanuele Giuseppe Esposito #define GRAPH_RDLOCK_GUARD_MAINLOOP(x) \
2808aa77000SEmanuele Giuseppe Esposito g_autoptr(GraphLockableMainloop) \
2818aa77000SEmanuele Giuseppe Esposito glue(graph_lockable_auto, __COUNTER__) G_GNUC_UNUSED = \
2828aa77000SEmanuele Giuseppe Esposito graph_lockable_auto_lock_mainloop(GMLML_OBJ_())
2838aa77000SEmanuele Giuseppe Esposito
284aead9dc9SPaolo Bonzini #endif /* GRAPH_LOCK_H */
285aead9dc9SPaolo Bonzini
286