xref: /openbmc/qemu/util/rcu.c (revision d02d06f8)
17911747bSPaolo Bonzini /*
27911747bSPaolo Bonzini  * urcu-mb.c
37911747bSPaolo Bonzini  *
47911747bSPaolo Bonzini  * Userspace RCU library with explicit memory barriers
57911747bSPaolo Bonzini  *
67911747bSPaolo Bonzini  * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
77911747bSPaolo Bonzini  * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
87911747bSPaolo Bonzini  * Copyright 2015 Red Hat, Inc.
97911747bSPaolo Bonzini  *
107911747bSPaolo Bonzini  * Ported to QEMU by Paolo Bonzini  <pbonzini@redhat.com>
117911747bSPaolo Bonzini  *
127911747bSPaolo Bonzini  * This library is free software; you can redistribute it and/or
137911747bSPaolo Bonzini  * modify it under the terms of the GNU Lesser General Public
147911747bSPaolo Bonzini  * License as published by the Free Software Foundation; either
157911747bSPaolo Bonzini  * version 2.1 of the License, or (at your option) any later version.
167911747bSPaolo Bonzini  *
177911747bSPaolo Bonzini  * This library is distributed in the hope that it will be useful,
187911747bSPaolo Bonzini  * but WITHOUT ANY WARRANTY; without even the implied warranty of
197911747bSPaolo Bonzini  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
207911747bSPaolo Bonzini  * Lesser General Public License for more details.
217911747bSPaolo Bonzini  *
227911747bSPaolo Bonzini  * You should have received a copy of the GNU Lesser General Public
237911747bSPaolo Bonzini  * License along with this library; if not, write to the Free Software
247911747bSPaolo Bonzini  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
257911747bSPaolo Bonzini  *
267911747bSPaolo Bonzini  * IBM's contributions to this file may be relicensed under LGPLv2 or later.
277911747bSPaolo Bonzini  */
287911747bSPaolo Bonzini 
29aafd7584SPeter Maydell #include "qemu/osdep.h"
307911747bSPaolo Bonzini #include "qemu/rcu.h"
317911747bSPaolo Bonzini #include "qemu/atomic.h"
3226387f86SPaolo Bonzini #include "qemu/thread.h"
33a4649824SPaolo Bonzini #include "qemu/main-loop.h"
346e8a355dSDaniel Brodsky #include "qemu/lockable.h"
355a22ab71SYang Zhong #if defined(CONFIG_MALLOC_TRIM)
365a22ab71SYang Zhong #include <malloc.h>
375a22ab71SYang Zhong #endif
387911747bSPaolo Bonzini 
397911747bSPaolo Bonzini /*
407911747bSPaolo Bonzini  * Global grace period counter.  Bit 0 is always one in rcu_gp_ctr.
417911747bSPaolo Bonzini  * Bits 1 and above are defined in synchronize_rcu.
427911747bSPaolo Bonzini  */
437911747bSPaolo Bonzini #define RCU_GP_LOCKED           (1UL << 0)
447911747bSPaolo Bonzini #define RCU_GP_CTR              (1UL << 1)
457911747bSPaolo Bonzini 
467911747bSPaolo Bonzini unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
477911747bSPaolo Bonzini 
487911747bSPaolo Bonzini QemuEvent rcu_gp_event;
49ef149763SGreg Kurz static int in_drain_call_rcu;
50c097a60bSWen Congyang static QemuMutex rcu_registry_lock;
51c097a60bSWen Congyang static QemuMutex rcu_sync_lock;
527911747bSPaolo Bonzini 
537911747bSPaolo Bonzini /*
547911747bSPaolo Bonzini  * Check whether a quiescent state was crossed between the beginning of
557911747bSPaolo Bonzini  * update_counter_and_wait and now.
567911747bSPaolo Bonzini  */
rcu_gp_ongoing(unsigned long * ctr)577911747bSPaolo Bonzini static inline int rcu_gp_ongoing(unsigned long *ctr)
587911747bSPaolo Bonzini {
597911747bSPaolo Bonzini     unsigned long v;
607911747bSPaolo Bonzini 
61d73415a3SStefan Hajnoczi     v = qatomic_read(ctr);
627911747bSPaolo Bonzini     return v && (v != rcu_gp_ctr);
637911747bSPaolo Bonzini }
647911747bSPaolo Bonzini 
657911747bSPaolo Bonzini /* Written to only by each individual reader. Read by both the reader and the
667911747bSPaolo Bonzini  * writers.
677911747bSPaolo Bonzini  */
6817c78154SStefan Hajnoczi QEMU_DEFINE_CO_TLS(struct rcu_reader_data, rcu_reader)
697911747bSPaolo Bonzini 
70c097a60bSWen Congyang /* Protected by rcu_registry_lock.  */
717911747bSPaolo Bonzini typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
727911747bSPaolo Bonzini static ThreadList registry = QLIST_HEAD_INITIALIZER(registry);
737911747bSPaolo Bonzini 
747911747bSPaolo Bonzini /* Wait for previous parity/grace period to be empty of readers.  */
wait_for_readers(void)757911747bSPaolo Bonzini static void wait_for_readers(void)
767911747bSPaolo Bonzini {
777911747bSPaolo Bonzini     ThreadList qsreaders = QLIST_HEAD_INITIALIZER(qsreaders);
787911747bSPaolo Bonzini     struct rcu_reader_data *index, *tmp;
797911747bSPaolo Bonzini 
807911747bSPaolo Bonzini     for (;;) {
817911747bSPaolo Bonzini         /* We want to be notified of changes made to rcu_gp_ongoing
827911747bSPaolo Bonzini          * while we walk the list.
837911747bSPaolo Bonzini          */
847911747bSPaolo Bonzini         qemu_event_reset(&rcu_gp_event);
857911747bSPaolo Bonzini 
867911747bSPaolo Bonzini         QLIST_FOREACH(index, &registry, node) {
87d73415a3SStefan Hajnoczi             qatomic_set(&index->waiting, true);
887911747bSPaolo Bonzini         }
897911747bSPaolo Bonzini 
9077a8b846SPaolo Bonzini         /* Here, order the stores to index->waiting before the loads of
91c8d3877eSPaolo Bonzini          * index->ctr.  Pairs with smp_mb_placeholder() in rcu_read_unlock(),
9277a8b846SPaolo Bonzini          * ensuring that the loads of index->ctr are sequentially consistent.
936e288b00SPaolo Bonzini          *
946e288b00SPaolo Bonzini          * If this is the last iteration, this barrier also prevents
956e288b00SPaolo Bonzini          * frees from seeping upwards, and orders the two wait phases
966e288b00SPaolo Bonzini          * on architectures with 32-bit longs; see synchronize_rcu().
97e11131b0SPaolo Bonzini          */
98c8d3877eSPaolo Bonzini         smp_mb_global();
997911747bSPaolo Bonzini 
1007911747bSPaolo Bonzini         QLIST_FOREACH_SAFE(index, &registry, node, tmp) {
1017911747bSPaolo Bonzini             if (!rcu_gp_ongoing(&index->ctr)) {
1027911747bSPaolo Bonzini                 QLIST_REMOVE(index, node);
1037911747bSPaolo Bonzini                 QLIST_INSERT_HEAD(&qsreaders, index, node);
1047911747bSPaolo Bonzini 
1056e288b00SPaolo Bonzini                 /* No need for memory barriers here, worst of all we
1067911747bSPaolo Bonzini                  * get some extra futex wakeups.
1077911747bSPaolo Bonzini                  */
108d73415a3SStefan Hajnoczi                 qatomic_set(&index->waiting, false);
109ef149763SGreg Kurz             } else if (qatomic_read(&in_drain_call_rcu)) {
110ef149763SGreg Kurz                 notifier_list_notify(&index->force_rcu, NULL);
1117911747bSPaolo Bonzini             }
1127911747bSPaolo Bonzini         }
1137911747bSPaolo Bonzini 
1147911747bSPaolo Bonzini         if (QLIST_EMPTY(&registry)) {
1157911747bSPaolo Bonzini             break;
1167911747bSPaolo Bonzini         }
1177911747bSPaolo Bonzini 
118c097a60bSWen Congyang         /* Wait for one thread to report a quiescent state and try again.
119c097a60bSWen Congyang          * Release rcu_registry_lock, so rcu_(un)register_thread() doesn't
120c097a60bSWen Congyang          * wait too much time.
121c097a60bSWen Congyang          *
122c097a60bSWen Congyang          * rcu_register_thread() may add nodes to &registry; it will not
123c097a60bSWen Congyang          * wake up synchronize_rcu, but that is okay because at least another
124c097a60bSWen Congyang          * thread must exit its RCU read-side critical section before
125c097a60bSWen Congyang          * synchronize_rcu is done.  The next iteration of the loop will
126c097a60bSWen Congyang          * move the new thread's rcu_reader from &registry to &qsreaders,
127c097a60bSWen Congyang          * because rcu_gp_ongoing() will return false.
128c097a60bSWen Congyang          *
129c097a60bSWen Congyang          * rcu_unregister_thread() may remove nodes from &qsreaders instead
130c097a60bSWen Congyang          * of &registry if it runs during qemu_event_wait.  That's okay;
131c097a60bSWen Congyang          * the node then will not be added back to &registry by QLIST_SWAP
132c097a60bSWen Congyang          * below.  The invariant is that the node is part of one list when
133c097a60bSWen Congyang          * rcu_registry_lock is released.
1347911747bSPaolo Bonzini          */
135c097a60bSWen Congyang         qemu_mutex_unlock(&rcu_registry_lock);
1367911747bSPaolo Bonzini         qemu_event_wait(&rcu_gp_event);
137c097a60bSWen Congyang         qemu_mutex_lock(&rcu_registry_lock);
1387911747bSPaolo Bonzini     }
1397911747bSPaolo Bonzini 
1407911747bSPaolo Bonzini     /* put back the reader list in the registry */
1417911747bSPaolo Bonzini     QLIST_SWAP(&registry, &qsreaders, node);
1427911747bSPaolo Bonzini }
1437911747bSPaolo Bonzini 
synchronize_rcu(void)1447911747bSPaolo Bonzini void synchronize_rcu(void)
1457911747bSPaolo Bonzini {
1466e8a355dSDaniel Brodsky     QEMU_LOCK_GUARD(&rcu_sync_lock);
1477911747bSPaolo Bonzini 
14877a8b846SPaolo Bonzini     /* Write RCU-protected pointers before reading p_rcu_reader->ctr.
149c8d3877eSPaolo Bonzini      * Pairs with smp_mb_placeholder() in rcu_read_lock().
1506e288b00SPaolo Bonzini      *
1516e288b00SPaolo Bonzini      * Also orders write to RCU-protected pointers before
1526e288b00SPaolo Bonzini      * write to rcu_gp_ctr.
15377a8b846SPaolo Bonzini      */
154c8d3877eSPaolo Bonzini     smp_mb_global();
15577a8b846SPaolo Bonzini 
1566e8a355dSDaniel Brodsky     QEMU_LOCK_GUARD(&rcu_registry_lock);
1577911747bSPaolo Bonzini     if (!QLIST_EMPTY(&registry)) {
1587911747bSPaolo Bonzini         if (sizeof(rcu_gp_ctr) < 8) {
1597911747bSPaolo Bonzini             /* For architectures with 32-bit longs, a two-subphases algorithm
1607911747bSPaolo Bonzini              * ensures we do not encounter overflow bugs.
1617911747bSPaolo Bonzini              *
1627911747bSPaolo Bonzini              * Switch parity: 0 -> 1, 1 -> 0.
1637911747bSPaolo Bonzini              */
1646e288b00SPaolo Bonzini             qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
1657911747bSPaolo Bonzini             wait_for_readers();
1666e288b00SPaolo Bonzini             qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
1677911747bSPaolo Bonzini         } else {
1687911747bSPaolo Bonzini             /* Increment current grace period.  */
1696e288b00SPaolo Bonzini             qatomic_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
1707911747bSPaolo Bonzini         }
1717911747bSPaolo Bonzini 
1727911747bSPaolo Bonzini         wait_for_readers();
1737911747bSPaolo Bonzini     }
1747911747bSPaolo Bonzini }
1757911747bSPaolo Bonzini 
17626387f86SPaolo Bonzini 
17726387f86SPaolo Bonzini #define RCU_CALL_MIN_SIZE        30
17826387f86SPaolo Bonzini 
17926387f86SPaolo Bonzini /* Multi-producer, single-consumer queue based on urcu/static/wfqueue.h
18026387f86SPaolo Bonzini  * from liburcu.  Note that head is only used by the consumer.
18126387f86SPaolo Bonzini  */
18226387f86SPaolo Bonzini static struct rcu_head dummy;
18326387f86SPaolo Bonzini static struct rcu_head *head = &dummy, **tail = &dummy.next;
18426387f86SPaolo Bonzini static int rcu_call_count;
18526387f86SPaolo Bonzini static QemuEvent rcu_call_ready_event;
18626387f86SPaolo Bonzini 
enqueue(struct rcu_head * node)18726387f86SPaolo Bonzini static void enqueue(struct rcu_head *node)
18826387f86SPaolo Bonzini {
18926387f86SPaolo Bonzini     struct rcu_head **old_tail;
19026387f86SPaolo Bonzini 
19126387f86SPaolo Bonzini     node->next = NULL;
1928f593ba9SPaolo Bonzini 
1938f593ba9SPaolo Bonzini     /*
1948f593ba9SPaolo Bonzini      * Make this node the tail of the list.  The node will be
1958f593ba9SPaolo Bonzini      * used by further enqueue operations, but it will not
1968f593ba9SPaolo Bonzini      * be dequeued yet...
1978f593ba9SPaolo Bonzini      */
198d73415a3SStefan Hajnoczi     old_tail = qatomic_xchg(&tail, &node->next);
1998f593ba9SPaolo Bonzini 
2008f593ba9SPaolo Bonzini     /*
2018f593ba9SPaolo Bonzini      * ... until it is pointed to from another item in the list.
2028f593ba9SPaolo Bonzini      * In the meantime, try_dequeue() will find a NULL next pointer
2038f593ba9SPaolo Bonzini      * and loop.
2048f593ba9SPaolo Bonzini      *
2058f593ba9SPaolo Bonzini      * Synchronizes with qatomic_load_acquire() in try_dequeue().
2068f593ba9SPaolo Bonzini      */
2078f593ba9SPaolo Bonzini     qatomic_store_release(old_tail, node);
20826387f86SPaolo Bonzini }
20926387f86SPaolo Bonzini 
try_dequeue(void)21026387f86SPaolo Bonzini static struct rcu_head *try_dequeue(void)
21126387f86SPaolo Bonzini {
21226387f86SPaolo Bonzini     struct rcu_head *node, *next;
21326387f86SPaolo Bonzini 
21426387f86SPaolo Bonzini retry:
2158f593ba9SPaolo Bonzini     /* Head is only written by this thread, so no need for barriers.  */
2168f593ba9SPaolo Bonzini     node = head;
2178f593ba9SPaolo Bonzini 
2188f593ba9SPaolo Bonzini     /*
2198f593ba9SPaolo Bonzini      * If the head node has NULL in its next pointer, the value is
2208f593ba9SPaolo Bonzini      * wrong and we need to wait until its enqueuer finishes the update.
2218f593ba9SPaolo Bonzini      */
2228f593ba9SPaolo Bonzini     next = qatomic_load_acquire(&node->next);
2238f593ba9SPaolo Bonzini     if (!next) {
2248f593ba9SPaolo Bonzini         return NULL;
2258f593ba9SPaolo Bonzini     }
2268f593ba9SPaolo Bonzini 
2278f593ba9SPaolo Bonzini     /*
2288f593ba9SPaolo Bonzini      * Test for an empty list, which we do not expect.  Note that for
22926387f86SPaolo Bonzini      * the consumer head and tail are always consistent.  The head
23026387f86SPaolo Bonzini      * is consistent because only the consumer reads/writes it.
23126387f86SPaolo Bonzini      * The tail, because it is the first step in the enqueuing.
23226387f86SPaolo Bonzini      * It is only the next pointers that might be inconsistent.
23326387f86SPaolo Bonzini      */
2348f593ba9SPaolo Bonzini     if (head == &dummy && qatomic_read(&tail) == &dummy.next) {
23526387f86SPaolo Bonzini         abort();
23626387f86SPaolo Bonzini     }
23726387f86SPaolo Bonzini 
2388f593ba9SPaolo Bonzini     /*
2398f593ba9SPaolo Bonzini      * Since we are the sole consumer, and we excluded the empty case
24026387f86SPaolo Bonzini      * above, the queue will always have at least two nodes: the
24126387f86SPaolo Bonzini      * dummy node, and the one being removed.  So we do not need to update
24226387f86SPaolo Bonzini      * the tail pointer.
24326387f86SPaolo Bonzini      */
24426387f86SPaolo Bonzini     head = next;
24526387f86SPaolo Bonzini 
24626387f86SPaolo Bonzini     /* If we dequeued the dummy node, add it back at the end and retry.  */
24726387f86SPaolo Bonzini     if (node == &dummy) {
24826387f86SPaolo Bonzini         enqueue(node);
24926387f86SPaolo Bonzini         goto retry;
25026387f86SPaolo Bonzini     }
25126387f86SPaolo Bonzini 
25226387f86SPaolo Bonzini     return node;
25326387f86SPaolo Bonzini }
25426387f86SPaolo Bonzini 
call_rcu_thread(void * opaque)25526387f86SPaolo Bonzini static void *call_rcu_thread(void *opaque)
25626387f86SPaolo Bonzini {
25726387f86SPaolo Bonzini     struct rcu_head *node;
25826387f86SPaolo Bonzini 
259ab28bd23SPaolo Bonzini     rcu_register_thread();
260ab28bd23SPaolo Bonzini 
26126387f86SPaolo Bonzini     for (;;) {
26226387f86SPaolo Bonzini         int tries = 0;
263d73415a3SStefan Hajnoczi         int n = qatomic_read(&rcu_call_count);
26426387f86SPaolo Bonzini 
26526387f86SPaolo Bonzini         /* Heuristically wait for a decent number of callbacks to pile up.
26626387f86SPaolo Bonzini          * Fetch rcu_call_count now, we only must process elements that were
26726387f86SPaolo Bonzini          * added before synchronize_rcu() starts.
26826387f86SPaolo Bonzini          */
269a7d1d636SPaolo Bonzini         while (n == 0 || (n < RCU_CALL_MIN_SIZE && ++tries <= 5)) {
270a7d1d636SPaolo Bonzini             g_usleep(10000);
271a7d1d636SPaolo Bonzini             if (n == 0) {
27226387f86SPaolo Bonzini                 qemu_event_reset(&rcu_call_ready_event);
273d73415a3SStefan Hajnoczi                 n = qatomic_read(&rcu_call_count);
274a7d1d636SPaolo Bonzini                 if (n == 0) {
2755a22ab71SYang Zhong #if defined(CONFIG_MALLOC_TRIM)
2765a22ab71SYang Zhong                     malloc_trim(4 * 1024 * 1024);
2775a22ab71SYang Zhong #endif
27826387f86SPaolo Bonzini                     qemu_event_wait(&rcu_call_ready_event);
27926387f86SPaolo Bonzini                 }
28026387f86SPaolo Bonzini             }
281d73415a3SStefan Hajnoczi             n = qatomic_read(&rcu_call_count);
282a7d1d636SPaolo Bonzini         }
28326387f86SPaolo Bonzini 
284d73415a3SStefan Hajnoczi         qatomic_sub(&rcu_call_count, n);
28526387f86SPaolo Bonzini         synchronize_rcu();
286a4649824SPaolo Bonzini         qemu_mutex_lock_iothread();
28726387f86SPaolo Bonzini         while (n > 0) {
28826387f86SPaolo Bonzini             node = try_dequeue();
28926387f86SPaolo Bonzini             while (!node) {
290a4649824SPaolo Bonzini                 qemu_mutex_unlock_iothread();
29126387f86SPaolo Bonzini                 qemu_event_reset(&rcu_call_ready_event);
29226387f86SPaolo Bonzini                 node = try_dequeue();
29326387f86SPaolo Bonzini                 if (!node) {
29426387f86SPaolo Bonzini                     qemu_event_wait(&rcu_call_ready_event);
29526387f86SPaolo Bonzini                     node = try_dequeue();
29626387f86SPaolo Bonzini                 }
297a4649824SPaolo Bonzini                 qemu_mutex_lock_iothread();
29826387f86SPaolo Bonzini             }
29926387f86SPaolo Bonzini 
30026387f86SPaolo Bonzini             n--;
30126387f86SPaolo Bonzini             node->func(node);
30226387f86SPaolo Bonzini         }
303a4649824SPaolo Bonzini         qemu_mutex_unlock_iothread();
30426387f86SPaolo Bonzini     }
30526387f86SPaolo Bonzini     abort();
30626387f86SPaolo Bonzini }
30726387f86SPaolo Bonzini 
call_rcu1(struct rcu_head * node,void (* func)(struct rcu_head * node))30826387f86SPaolo Bonzini void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node))
30926387f86SPaolo Bonzini {
31026387f86SPaolo Bonzini     node->func = func;
31126387f86SPaolo Bonzini     enqueue(node);
312d73415a3SStefan Hajnoczi     qatomic_inc(&rcu_call_count);
31326387f86SPaolo Bonzini     qemu_event_set(&rcu_call_ready_event);
31426387f86SPaolo Bonzini }
31526387f86SPaolo Bonzini 
316d816614cSMaxim Levitsky 
317d816614cSMaxim Levitsky struct rcu_drain {
318d816614cSMaxim Levitsky     struct rcu_head rcu;
319d816614cSMaxim Levitsky     QemuEvent drain_complete_event;
320d816614cSMaxim Levitsky };
321d816614cSMaxim Levitsky 
drain_rcu_callback(struct rcu_head * node)322d816614cSMaxim Levitsky static void drain_rcu_callback(struct rcu_head *node)
323d816614cSMaxim Levitsky {
324d816614cSMaxim Levitsky     struct rcu_drain *event = (struct rcu_drain *)node;
325d816614cSMaxim Levitsky     qemu_event_set(&event->drain_complete_event);
326d816614cSMaxim Levitsky }
327d816614cSMaxim Levitsky 
328d816614cSMaxim Levitsky /*
329d816614cSMaxim Levitsky  * This function ensures that all pending RCU callbacks
330d816614cSMaxim Levitsky  * on the current thread are done executing
331d816614cSMaxim Levitsky 
332d816614cSMaxim Levitsky  * drops big qemu lock during the wait to allow RCU thread
333d816614cSMaxim Levitsky  * to process the callbacks
334d816614cSMaxim Levitsky  *
335d816614cSMaxim Levitsky  */
336d816614cSMaxim Levitsky 
drain_call_rcu(void)337d816614cSMaxim Levitsky void drain_call_rcu(void)
338d816614cSMaxim Levitsky {
339d816614cSMaxim Levitsky     struct rcu_drain rcu_drain;
340d816614cSMaxim Levitsky     bool locked = qemu_mutex_iothread_locked();
341d816614cSMaxim Levitsky 
342d816614cSMaxim Levitsky     memset(&rcu_drain, 0, sizeof(struct rcu_drain));
343d816614cSMaxim Levitsky     qemu_event_init(&rcu_drain.drain_complete_event, false);
344d816614cSMaxim Levitsky 
345d816614cSMaxim Levitsky     if (locked) {
346d816614cSMaxim Levitsky         qemu_mutex_unlock_iothread();
347d816614cSMaxim Levitsky     }
348d816614cSMaxim Levitsky 
349d816614cSMaxim Levitsky 
350d816614cSMaxim Levitsky     /*
351d816614cSMaxim Levitsky      * RCU callbacks are invoked in the same order as in which they
352d816614cSMaxim Levitsky      * are registered, thus we can be sure that when 'drain_rcu_callback'
353d816614cSMaxim Levitsky      * is called, all RCU callbacks that were registered on this thread
354d816614cSMaxim Levitsky      * prior to calling this function are completed.
355d816614cSMaxim Levitsky      *
356d816614cSMaxim Levitsky      * Note that since we have only one global queue of the RCU callbacks,
357d816614cSMaxim Levitsky      * we also end up waiting for most of RCU callbacks that were registered
358*d02d06f8SMichael Tokarev      * on the other threads, but this is a side effect that shouldn't be
359d816614cSMaxim Levitsky      * assumed.
360d816614cSMaxim Levitsky      */
361d816614cSMaxim Levitsky 
362ef149763SGreg Kurz     qatomic_inc(&in_drain_call_rcu);
363d816614cSMaxim Levitsky     call_rcu1(&rcu_drain.rcu, drain_rcu_callback);
364d816614cSMaxim Levitsky     qemu_event_wait(&rcu_drain.drain_complete_event);
365ef149763SGreg Kurz     qatomic_dec(&in_drain_call_rcu);
366d816614cSMaxim Levitsky 
367d816614cSMaxim Levitsky     if (locked) {
368d816614cSMaxim Levitsky         qemu_mutex_lock_iothread();
369d816614cSMaxim Levitsky     }
370d816614cSMaxim Levitsky 
371d816614cSMaxim Levitsky }
372d816614cSMaxim Levitsky 
rcu_register_thread(void)3737911747bSPaolo Bonzini void rcu_register_thread(void)
3747911747bSPaolo Bonzini {
37517c78154SStefan Hajnoczi     assert(get_ptr_rcu_reader()->ctr == 0);
376c097a60bSWen Congyang     qemu_mutex_lock(&rcu_registry_lock);
37717c78154SStefan Hajnoczi     QLIST_INSERT_HEAD(&registry, get_ptr_rcu_reader(), node);
378c097a60bSWen Congyang     qemu_mutex_unlock(&rcu_registry_lock);
3797911747bSPaolo Bonzini }
3807911747bSPaolo Bonzini 
rcu_unregister_thread(void)3817911747bSPaolo Bonzini void rcu_unregister_thread(void)
3827911747bSPaolo Bonzini {
383c097a60bSWen Congyang     qemu_mutex_lock(&rcu_registry_lock);
38417c78154SStefan Hajnoczi     QLIST_REMOVE(get_ptr_rcu_reader(), node);
385c097a60bSWen Congyang     qemu_mutex_unlock(&rcu_registry_lock);
3867911747bSPaolo Bonzini }
3877911747bSPaolo Bonzini 
rcu_add_force_rcu_notifier(Notifier * n)388ef149763SGreg Kurz void rcu_add_force_rcu_notifier(Notifier *n)
389ef149763SGreg Kurz {
390ef149763SGreg Kurz     qemu_mutex_lock(&rcu_registry_lock);
39117c78154SStefan Hajnoczi     notifier_list_add(&get_ptr_rcu_reader()->force_rcu, n);
392ef149763SGreg Kurz     qemu_mutex_unlock(&rcu_registry_lock);
393ef149763SGreg Kurz }
394ef149763SGreg Kurz 
rcu_remove_force_rcu_notifier(Notifier * n)395ef149763SGreg Kurz void rcu_remove_force_rcu_notifier(Notifier *n)
396ef149763SGreg Kurz {
397ef149763SGreg Kurz     qemu_mutex_lock(&rcu_registry_lock);
398ef149763SGreg Kurz     notifier_remove(n);
399ef149763SGreg Kurz     qemu_mutex_unlock(&rcu_registry_lock);
400ef149763SGreg Kurz }
401ef149763SGreg Kurz 
rcu_init_complete(void)40221b7cf9eSPaolo Bonzini static void rcu_init_complete(void)
4037911747bSPaolo Bonzini {
40426387f86SPaolo Bonzini     QemuThread thread;
40526387f86SPaolo Bonzini 
406c097a60bSWen Congyang     qemu_mutex_init(&rcu_registry_lock);
407c097a60bSWen Congyang     qemu_mutex_init(&rcu_sync_lock);
4087911747bSPaolo Bonzini     qemu_event_init(&rcu_gp_event, true);
40926387f86SPaolo Bonzini 
41026387f86SPaolo Bonzini     qemu_event_init(&rcu_call_ready_event, false);
41121b7cf9eSPaolo Bonzini 
41221b7cf9eSPaolo Bonzini     /* The caller is assumed to have iothread lock, so the call_rcu thread
41321b7cf9eSPaolo Bonzini      * must have been quiescent even after forking, just recreate it.
41421b7cf9eSPaolo Bonzini      */
41526387f86SPaolo Bonzini     qemu_thread_create(&thread, "call_rcu", call_rcu_thread,
41626387f86SPaolo Bonzini                        NULL, QEMU_THREAD_DETACHED);
41726387f86SPaolo Bonzini 
4187911747bSPaolo Bonzini     rcu_register_thread();
4197911747bSPaolo Bonzini }
42021b7cf9eSPaolo Bonzini 
42173c6e401SPaolo Bonzini static int atfork_depth = 1;
42273c6e401SPaolo Bonzini 
rcu_enable_atfork(void)42373c6e401SPaolo Bonzini void rcu_enable_atfork(void)
42473c6e401SPaolo Bonzini {
42573c6e401SPaolo Bonzini     atfork_depth++;
42673c6e401SPaolo Bonzini }
42773c6e401SPaolo Bonzini 
rcu_disable_atfork(void)42873c6e401SPaolo Bonzini void rcu_disable_atfork(void)
42973c6e401SPaolo Bonzini {
43073c6e401SPaolo Bonzini     atfork_depth--;
43173c6e401SPaolo Bonzini }
43273c6e401SPaolo Bonzini 
43321b7cf9eSPaolo Bonzini #ifdef CONFIG_POSIX
rcu_init_lock(void)43421b7cf9eSPaolo Bonzini static void rcu_init_lock(void)
43521b7cf9eSPaolo Bonzini {
43673c6e401SPaolo Bonzini     if (atfork_depth < 1) {
43773c6e401SPaolo Bonzini         return;
43873c6e401SPaolo Bonzini     }
43973c6e401SPaolo Bonzini 
440c097a60bSWen Congyang     qemu_mutex_lock(&rcu_sync_lock);
441c097a60bSWen Congyang     qemu_mutex_lock(&rcu_registry_lock);
44221b7cf9eSPaolo Bonzini }
44321b7cf9eSPaolo Bonzini 
rcu_init_unlock(void)44421b7cf9eSPaolo Bonzini static void rcu_init_unlock(void)
44521b7cf9eSPaolo Bonzini {
44673c6e401SPaolo Bonzini     if (atfork_depth < 1) {
44773c6e401SPaolo Bonzini         return;
44873c6e401SPaolo Bonzini     }
44973c6e401SPaolo Bonzini 
450c097a60bSWen Congyang     qemu_mutex_unlock(&rcu_registry_lock);
451c097a60bSWen Congyang     qemu_mutex_unlock(&rcu_sync_lock);
45221b7cf9eSPaolo Bonzini }
45321b7cf9eSPaolo Bonzini 
rcu_init_child(void)4542a96a552SPaolo Bonzini static void rcu_init_child(void)
45521b7cf9eSPaolo Bonzini {
4562a96a552SPaolo Bonzini     if (atfork_depth < 1) {
4572a96a552SPaolo Bonzini         return;
4582a96a552SPaolo Bonzini     }
4592a96a552SPaolo Bonzini 
46021b7cf9eSPaolo Bonzini     memset(&registry, 0, sizeof(registry));
46121b7cf9eSPaolo Bonzini     rcu_init_complete();
46221b7cf9eSPaolo Bonzini }
4632a96a552SPaolo Bonzini #endif
46421b7cf9eSPaolo Bonzini 
rcu_init(void)46521b7cf9eSPaolo Bonzini static void __attribute__((__constructor__)) rcu_init(void)
46621b7cf9eSPaolo Bonzini {
467c8d3877eSPaolo Bonzini     smp_mb_global_init();
46821b7cf9eSPaolo Bonzini #ifdef CONFIG_POSIX
4692a96a552SPaolo Bonzini     pthread_atfork(rcu_init_lock, rcu_init_unlock, rcu_init_child);
47021b7cf9eSPaolo Bonzini #endif
47121b7cf9eSPaolo Bonzini     rcu_init_complete();
47221b7cf9eSPaolo Bonzini }
473