xref: /openbmc/linux/kernel/sched/membarrier.c (revision ca481398)
1 /*
2  * Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3  *
4  * membarrier system call
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  */
16 
17 #include <linux/syscalls.h>
18 #include <linux/membarrier.h>
19 #include <linux/tick.h>
20 #include <linux/cpumask.h>
21 
22 #include "sched.h"	/* for cpu_rq(). */
23 
24 /*
25  * Bitmask made from a "or" of all commands within enum membarrier_cmd,
26  * except MEMBARRIER_CMD_QUERY.
27  */
28 #define MEMBARRIER_CMD_BITMASK	\
29 	(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED)
30 
31 static void ipi_mb(void *info)
32 {
33 	smp_mb();	/* IPIs should be serializing but paranoid. */
34 }
35 
36 static void membarrier_private_expedited(void)
37 {
38 	int cpu;
39 	bool fallback = false;
40 	cpumask_var_t tmpmask;
41 
42 	if (num_online_cpus() == 1)
43 		return;
44 
45 	/*
46 	 * Matches memory barriers around rq->curr modification in
47 	 * scheduler.
48 	 */
49 	smp_mb();	/* system call entry is not a mb. */
50 
51 	/*
52 	 * Expedited membarrier commands guarantee that they won't
53 	 * block, hence the GFP_NOWAIT allocation flag and fallback
54 	 * implementation.
55 	 */
56 	if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
57 		/* Fallback for OOM. */
58 		fallback = true;
59 	}
60 
61 	cpus_read_lock();
62 	for_each_online_cpu(cpu) {
63 		struct task_struct *p;
64 
65 		/*
66 		 * Skipping the current CPU is OK even through we can be
67 		 * migrated at any point. The current CPU, at the point
68 		 * where we read raw_smp_processor_id(), is ensured to
69 		 * be in program order with respect to the caller
70 		 * thread. Therefore, we can skip this CPU from the
71 		 * iteration.
72 		 */
73 		if (cpu == raw_smp_processor_id())
74 			continue;
75 		rcu_read_lock();
76 		p = task_rcu_dereference(&cpu_rq(cpu)->curr);
77 		if (p && p->mm == current->mm) {
78 			if (!fallback)
79 				__cpumask_set_cpu(cpu, tmpmask);
80 			else
81 				smp_call_function_single(cpu, ipi_mb, NULL, 1);
82 		}
83 		rcu_read_unlock();
84 	}
85 	if (!fallback) {
86 		smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
87 		free_cpumask_var(tmpmask);
88 	}
89 	cpus_read_unlock();
90 
91 	/*
92 	 * Memory barrier on the caller thread _after_ we finished
93 	 * waiting for the last IPI. Matches memory barriers around
94 	 * rq->curr modification in scheduler.
95 	 */
96 	smp_mb();	/* exit from system call is not a mb */
97 }
98 
99 /**
100  * sys_membarrier - issue memory barriers on a set of threads
101  * @cmd:   Takes command values defined in enum membarrier_cmd.
102  * @flags: Currently needs to be 0. For future extensions.
103  *
104  * If this system call is not implemented, -ENOSYS is returned. If the
105  * command specified does not exist, not available on the running
106  * kernel, or if the command argument is invalid, this system call
107  * returns -EINVAL. For a given command, with flags argument set to 0,
108  * this system call is guaranteed to always return the same value until
109  * reboot.
110  *
111  * All memory accesses performed in program order from each targeted thread
112  * is guaranteed to be ordered with respect to sys_membarrier(). If we use
113  * the semantic "barrier()" to represent a compiler barrier forcing memory
114  * accesses to be performed in program order across the barrier, and
115  * smp_mb() to represent explicit memory barriers forcing full memory
116  * ordering across the barrier, we have the following ordering table for
117  * each pair of barrier(), sys_membarrier() and smp_mb():
118  *
119  * The pair ordering is detailed as (O: ordered, X: not ordered):
120  *
121  *                        barrier()   smp_mb() sys_membarrier()
122  *        barrier()          X           X            O
123  *        smp_mb()           X           O            O
124  *        sys_membarrier()   O           O            O
125  */
126 SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
127 {
128 	if (unlikely(flags))
129 		return -EINVAL;
130 	switch (cmd) {
131 	case MEMBARRIER_CMD_QUERY:
132 	{
133 		int cmd_mask = MEMBARRIER_CMD_BITMASK;
134 
135 		if (tick_nohz_full_enabled())
136 			cmd_mask &= ~MEMBARRIER_CMD_SHARED;
137 		return cmd_mask;
138 	}
139 	case MEMBARRIER_CMD_SHARED:
140 		/* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
141 		if (tick_nohz_full_enabled())
142 			return -EINVAL;
143 		if (num_online_cpus() > 1)
144 			synchronize_sched();
145 		return 0;
146 	case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
147 		membarrier_private_expedited();
148 		return 0;
149 	default:
150 		return -EINVAL;
151 	}
152 }
153