xref: /openbmc/linux/net/sunrpc/svc.c (revision 61182c796d74f54ba66d17bac6f516183ec09af2)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * linux/net/sunrpc/svc.c
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  * High-level RPC service routines
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
8bfd24160SGreg Banks  *
9bfd24160SGreg Banks  * Multiple threads pools and NUMAisation
10bfd24160SGreg Banks  * Copyright (c) 2006 Silicon Graphics, Inc.
11bfd24160SGreg Banks  * by Greg Banks <gnb@melbourne.sgi.com>
121da177e4SLinus Torvalds  */
131da177e4SLinus Torvalds 
141da177e4SLinus Torvalds #include <linux/linkage.h>
153f07c014SIngo Molnar #include <linux/sched/signal.h>
161da177e4SLinus Torvalds #include <linux/errno.h>
171da177e4SLinus Torvalds #include <linux/net.h>
181da177e4SLinus Torvalds #include <linux/in.h>
191da177e4SLinus Torvalds #include <linux/mm.h>
20a7455442SGreg Banks #include <linux/interrupt.h>
21a7455442SGreg Banks #include <linux/module.h>
229867d76cSJeff Layton #include <linux/kthread.h>
235a0e3ad6STejun Heo #include <linux/slab.h>
241da177e4SLinus Torvalds 
251da177e4SLinus Torvalds #include <linux/sunrpc/types.h>
261da177e4SLinus Torvalds #include <linux/sunrpc/xdr.h>
271da177e4SLinus Torvalds #include <linux/sunrpc/stats.h>
281da177e4SLinus Torvalds #include <linux/sunrpc/svcsock.h>
291da177e4SLinus Torvalds #include <linux/sunrpc/clnt.h>
304d6bbb62SRicardo Labiaga #include <linux/sunrpc/bc_xprt.h>
311da177e4SLinus Torvalds 
32860a0d9eSJeff Layton #include <trace/events/sunrpc.h>
33860a0d9eSJeff Layton 
343a126180SChuck Lever #include "fail.h"
353a126180SChuck Lever 
361da177e4SLinus Torvalds #define RPCDBG_FACILITY	RPCDBG_SVCDSP
371da177e4SLinus Torvalds 
385247fab5SStanislav Kinsbursky static void svc_unregister(const struct svc_serv *serv, struct net *net);
397252d575SChuck Lever 
4042a7fc4aSGreg Banks #define SVC_POOL_DEFAULT	SVC_POOL_GLOBAL
41bfd24160SGreg Banks 
42bfd24160SGreg Banks /*
43cf0e124eSNeilBrown  * Mode for mapping cpus to pools.
44cf0e124eSNeilBrown  */
45cf0e124eSNeilBrown enum {
46cf0e124eSNeilBrown 	SVC_POOL_AUTO = -1,	/* choose one of the others */
47cf0e124eSNeilBrown 	SVC_POOL_GLOBAL,	/* no mapping, just a single global pool
48cf0e124eSNeilBrown 				 * (legacy & UP mode) */
49cf0e124eSNeilBrown 	SVC_POOL_PERCPU,	/* one pool per cpu */
50cf0e124eSNeilBrown 	SVC_POOL_PERNODE	/* one pool per numa node */
51cf0e124eSNeilBrown };
52cf0e124eSNeilBrown 
53cf0e124eSNeilBrown /*
54bfd24160SGreg Banks  * Structure for mapping cpus to pools and vice versa.
55bfd24160SGreg Banks  * Setup once during sunrpc initialisation.
56bfd24160SGreg Banks  */
57cf0e124eSNeilBrown 
58cf0e124eSNeilBrown struct svc_pool_map {
59cf0e124eSNeilBrown 	int count;			/* How many svc_servs use us */
60cf0e124eSNeilBrown 	int mode;			/* Note: int not enum to avoid
61cf0e124eSNeilBrown 					 * warnings about "enumeration value
62cf0e124eSNeilBrown 					 * not handled in switch" */
63cf0e124eSNeilBrown 	unsigned int npools;
64cf0e124eSNeilBrown 	unsigned int *pool_to;		/* maps pool id to cpu or node */
65cf0e124eSNeilBrown 	unsigned int *to_pool;		/* maps cpu or node to pool id */
66cf0e124eSNeilBrown };
67cf0e124eSNeilBrown 
68cf0e124eSNeilBrown static struct svc_pool_map svc_pool_map = {
6942a7fc4aSGreg Banks 	.mode = SVC_POOL_DEFAULT
70bfd24160SGreg Banks };
71d70bc0c6SJeff Layton 
7242a7fc4aSGreg Banks static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
73bfd24160SGreg Banks 
7442a7fc4aSGreg Banks static int
75e4dca7b7SKees Cook param_set_pool_mode(const char *val, const struct kernel_param *kp)
7642a7fc4aSGreg Banks {
7742a7fc4aSGreg Banks 	int *ip = (int *)kp->arg;
7842a7fc4aSGreg Banks 	struct svc_pool_map *m = &svc_pool_map;
7942a7fc4aSGreg Banks 	int err;
8042a7fc4aSGreg Banks 
8142a7fc4aSGreg Banks 	mutex_lock(&svc_pool_map_mutex);
8242a7fc4aSGreg Banks 
8342a7fc4aSGreg Banks 	err = -EBUSY;
8442a7fc4aSGreg Banks 	if (m->count)
8542a7fc4aSGreg Banks 		goto out;
8642a7fc4aSGreg Banks 
8742a7fc4aSGreg Banks 	err = 0;
8842a7fc4aSGreg Banks 	if (!strncmp(val, "auto", 4))
8942a7fc4aSGreg Banks 		*ip = SVC_POOL_AUTO;
9042a7fc4aSGreg Banks 	else if (!strncmp(val, "global", 6))
9142a7fc4aSGreg Banks 		*ip = SVC_POOL_GLOBAL;
9242a7fc4aSGreg Banks 	else if (!strncmp(val, "percpu", 6))
9342a7fc4aSGreg Banks 		*ip = SVC_POOL_PERCPU;
9442a7fc4aSGreg Banks 	else if (!strncmp(val, "pernode", 7))
9542a7fc4aSGreg Banks 		*ip = SVC_POOL_PERNODE;
9642a7fc4aSGreg Banks 	else
9742a7fc4aSGreg Banks 		err = -EINVAL;
9842a7fc4aSGreg Banks 
9942a7fc4aSGreg Banks out:
10042a7fc4aSGreg Banks 	mutex_unlock(&svc_pool_map_mutex);
10142a7fc4aSGreg Banks 	return err;
10242a7fc4aSGreg Banks }
10342a7fc4aSGreg Banks 
10442a7fc4aSGreg Banks static int
105e4dca7b7SKees Cook param_get_pool_mode(char *buf, const struct kernel_param *kp)
10642a7fc4aSGreg Banks {
10742a7fc4aSGreg Banks 	int *ip = (int *)kp->arg;
10842a7fc4aSGreg Banks 
10942a7fc4aSGreg Banks 	switch (*ip)
11042a7fc4aSGreg Banks 	{
11142a7fc4aSGreg Banks 	case SVC_POOL_AUTO:
112a9156d7eSAzeem Shaikh 		return sysfs_emit(buf, "auto\n");
11342a7fc4aSGreg Banks 	case SVC_POOL_GLOBAL:
114a9156d7eSAzeem Shaikh 		return sysfs_emit(buf, "global\n");
11542a7fc4aSGreg Banks 	case SVC_POOL_PERCPU:
116a9156d7eSAzeem Shaikh 		return sysfs_emit(buf, "percpu\n");
11742a7fc4aSGreg Banks 	case SVC_POOL_PERNODE:
118a9156d7eSAzeem Shaikh 		return sysfs_emit(buf, "pernode\n");
11942a7fc4aSGreg Banks 	default:
120a9156d7eSAzeem Shaikh 		return sysfs_emit(buf, "%d\n", *ip);
12142a7fc4aSGreg Banks 	}
12242a7fc4aSGreg Banks }
12342a7fc4aSGreg Banks 
12442a7fc4aSGreg Banks module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
12542a7fc4aSGreg Banks 		 &svc_pool_map.mode, 0644);
126bfd24160SGreg Banks 
127bfd24160SGreg Banks /*
128bfd24160SGreg Banks  * Detect best pool mapping mode heuristically,
129bfd24160SGreg Banks  * according to the machine's topology.
130bfd24160SGreg Banks  */
131bfd24160SGreg Banks static int
132bfd24160SGreg Banks svc_pool_map_choose_mode(void)
133bfd24160SGreg Banks {
134bfd24160SGreg Banks 	unsigned int node;
135bfd24160SGreg Banks 
13662bc62a8SChristoph Lameter 	if (nr_online_nodes > 1) {
137bfd24160SGreg Banks 		/*
138bfd24160SGreg Banks 		 * Actually have multiple NUMA nodes,
139bfd24160SGreg Banks 		 * so split pools on NUMA node boundaries
140bfd24160SGreg Banks 		 */
141bfd24160SGreg Banks 		return SVC_POOL_PERNODE;
142bfd24160SGreg Banks 	}
143bfd24160SGreg Banks 
14472c33688SH Hartley Sweeten 	node = first_online_node;
145bfd24160SGreg Banks 	if (nr_cpus_node(node) > 2) {
146bfd24160SGreg Banks 		/*
147bfd24160SGreg Banks 		 * Non-trivial SMP, or CONFIG_NUMA on
148bfd24160SGreg Banks 		 * non-NUMA hardware, e.g. with a generic
149bfd24160SGreg Banks 		 * x86_64 kernel on Xeons.  In this case we
150bfd24160SGreg Banks 		 * want to divide the pools on cpu boundaries.
151bfd24160SGreg Banks 		 */
152bfd24160SGreg Banks 		return SVC_POOL_PERCPU;
153bfd24160SGreg Banks 	}
154bfd24160SGreg Banks 
155bfd24160SGreg Banks 	/* default: one global pool */
156bfd24160SGreg Banks 	return SVC_POOL_GLOBAL;
157bfd24160SGreg Banks }
158bfd24160SGreg Banks 
159bfd24160SGreg Banks /*
160bfd24160SGreg Banks  * Allocate the to_pool[] and pool_to[] arrays.
161bfd24160SGreg Banks  * Returns 0 on success or an errno.
162bfd24160SGreg Banks  */
163bfd24160SGreg Banks static int
164bfd24160SGreg Banks svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)
165bfd24160SGreg Banks {
166bfd24160SGreg Banks 	m->to_pool = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
167bfd24160SGreg Banks 	if (!m->to_pool)
168bfd24160SGreg Banks 		goto fail;
169bfd24160SGreg Banks 	m->pool_to = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
170bfd24160SGreg Banks 	if (!m->pool_to)
171bfd24160SGreg Banks 		goto fail_free;
172bfd24160SGreg Banks 
173bfd24160SGreg Banks 	return 0;
174bfd24160SGreg Banks 
175bfd24160SGreg Banks fail_free:
176bfd24160SGreg Banks 	kfree(m->to_pool);
17761c8504cSJ. Bruce Fields 	m->to_pool = NULL;
178bfd24160SGreg Banks fail:
179bfd24160SGreg Banks 	return -ENOMEM;
180bfd24160SGreg Banks }
181bfd24160SGreg Banks 
182bfd24160SGreg Banks /*
183bfd24160SGreg Banks  * Initialise the pool map for SVC_POOL_PERCPU mode.
184bfd24160SGreg Banks  * Returns number of pools or <0 on error.
185bfd24160SGreg Banks  */
186bfd24160SGreg Banks static int
187bfd24160SGreg Banks svc_pool_map_init_percpu(struct svc_pool_map *m)
188bfd24160SGreg Banks {
18953b8a315SChristoph Lameter 	unsigned int maxpools = nr_cpu_ids;
190bfd24160SGreg Banks 	unsigned int pidx = 0;
191bfd24160SGreg Banks 	unsigned int cpu;
192bfd24160SGreg Banks 	int err;
193bfd24160SGreg Banks 
194bfd24160SGreg Banks 	err = svc_pool_map_alloc_arrays(m, maxpools);
195bfd24160SGreg Banks 	if (err)
196bfd24160SGreg Banks 		return err;
197bfd24160SGreg Banks 
198bfd24160SGreg Banks 	for_each_online_cpu(cpu) {
199eb63192bSDan Carpenter 		BUG_ON(pidx >= maxpools);
200bfd24160SGreg Banks 		m->to_pool[cpu] = pidx;
201bfd24160SGreg Banks 		m->pool_to[pidx] = cpu;
202bfd24160SGreg Banks 		pidx++;
203bfd24160SGreg Banks 	}
204bfd24160SGreg Banks 	/* cpus brought online later all get mapped to pool0, sorry */
205bfd24160SGreg Banks 
206bfd24160SGreg Banks 	return pidx;
207bfd24160SGreg Banks };
208bfd24160SGreg Banks 
209bfd24160SGreg Banks 
210bfd24160SGreg Banks /*
211bfd24160SGreg Banks  * Initialise the pool map for SVC_POOL_PERNODE mode.
212bfd24160SGreg Banks  * Returns number of pools or <0 on error.
213bfd24160SGreg Banks  */
214bfd24160SGreg Banks static int
215bfd24160SGreg Banks svc_pool_map_init_pernode(struct svc_pool_map *m)
216bfd24160SGreg Banks {
21774c7aa8bSChristoph Lameter 	unsigned int maxpools = nr_node_ids;
218bfd24160SGreg Banks 	unsigned int pidx = 0;
219bfd24160SGreg Banks 	unsigned int node;
220bfd24160SGreg Banks 	int err;
221bfd24160SGreg Banks 
222bfd24160SGreg Banks 	err = svc_pool_map_alloc_arrays(m, maxpools);
223bfd24160SGreg Banks 	if (err)
224bfd24160SGreg Banks 		return err;
225bfd24160SGreg Banks 
226bfd24160SGreg Banks 	for_each_node_with_cpus(node) {
227bfd24160SGreg Banks 		/* some architectures (e.g. SN2) have cpuless nodes */
228bfd24160SGreg Banks 		BUG_ON(pidx > maxpools);
229bfd24160SGreg Banks 		m->to_pool[node] = pidx;
230bfd24160SGreg Banks 		m->pool_to[pidx] = node;
231bfd24160SGreg Banks 		pidx++;
232bfd24160SGreg Banks 	}
233bfd24160SGreg Banks 	/* nodes brought online later all get mapped to pool0, sorry */
234bfd24160SGreg Banks 
235bfd24160SGreg Banks 	return pidx;
236bfd24160SGreg Banks }
237bfd24160SGreg Banks 
238bfd24160SGreg Banks 
239bfd24160SGreg Banks /*
24042a7fc4aSGreg Banks  * Add a reference to the global map of cpus to pools (and
24193aa619eSNeilBrown  * vice versa) if pools are in use.
24293aa619eSNeilBrown  * Initialise the map if we're the first user.
24393aa619eSNeilBrown  * Returns the number of pools. If this is '1', no reference
24493aa619eSNeilBrown  * was taken.
245bfd24160SGreg Banks  */
246cf0e124eSNeilBrown static unsigned int
24742a7fc4aSGreg Banks svc_pool_map_get(void)
248bfd24160SGreg Banks {
249bfd24160SGreg Banks 	struct svc_pool_map *m = &svc_pool_map;
250bfd24160SGreg Banks 	int npools = -1;
251bfd24160SGreg Banks 
25242a7fc4aSGreg Banks 	mutex_lock(&svc_pool_map_mutex);
253bfd24160SGreg Banks 
25442a7fc4aSGreg Banks 	if (m->count++) {
25542a7fc4aSGreg Banks 		mutex_unlock(&svc_pool_map_mutex);
25693aa619eSNeilBrown 		WARN_ON_ONCE(m->npools <= 1);
25742a7fc4aSGreg Banks 		return m->npools;
25842a7fc4aSGreg Banks 	}
25942a7fc4aSGreg Banks 
26042a7fc4aSGreg Banks 	if (m->mode == SVC_POOL_AUTO)
261bfd24160SGreg Banks 		m->mode = svc_pool_map_choose_mode();
262bfd24160SGreg Banks 
263bfd24160SGreg Banks 	switch (m->mode) {
264bfd24160SGreg Banks 	case SVC_POOL_PERCPU:
265bfd24160SGreg Banks 		npools = svc_pool_map_init_percpu(m);
266bfd24160SGreg Banks 		break;
267bfd24160SGreg Banks 	case SVC_POOL_PERNODE:
268bfd24160SGreg Banks 		npools = svc_pool_map_init_pernode(m);
269bfd24160SGreg Banks 		break;
270bfd24160SGreg Banks 	}
271bfd24160SGreg Banks 
27293aa619eSNeilBrown 	if (npools <= 0) {
273bfd24160SGreg Banks 		/* default, or memory allocation failure */
274bfd24160SGreg Banks 		npools = 1;
275bfd24160SGreg Banks 		m->mode = SVC_POOL_GLOBAL;
276bfd24160SGreg Banks 	}
277bfd24160SGreg Banks 	m->npools = npools;
278bfd24160SGreg Banks 
27993aa619eSNeilBrown 	if (npools == 1)
28093aa619eSNeilBrown 		/* service is unpooled, so doesn't hold a reference */
28193aa619eSNeilBrown 		m->count--;
28293aa619eSNeilBrown 
28342a7fc4aSGreg Banks 	mutex_unlock(&svc_pool_map_mutex);
28493aa619eSNeilBrown 	return npools;
285bfd24160SGreg Banks }
28642a7fc4aSGreg Banks 
28742a7fc4aSGreg Banks /*
28893aa619eSNeilBrown  * Drop a reference to the global map of cpus to pools, if
28993aa619eSNeilBrown  * pools were in use, i.e. if npools > 1.
29042a7fc4aSGreg Banks  * When the last reference is dropped, the map data is
29142a7fc4aSGreg Banks  * freed; this allows the sysadmin to change the pool
29242a7fc4aSGreg Banks  * mode using the pool_mode module option without
29342a7fc4aSGreg Banks  * rebooting or re-loading sunrpc.ko.
29442a7fc4aSGreg Banks  */
295cf0e124eSNeilBrown static void
29693aa619eSNeilBrown svc_pool_map_put(int npools)
29742a7fc4aSGreg Banks {
29842a7fc4aSGreg Banks 	struct svc_pool_map *m = &svc_pool_map;
29942a7fc4aSGreg Banks 
30093aa619eSNeilBrown 	if (npools <= 1)
30193aa619eSNeilBrown 		return;
30242a7fc4aSGreg Banks 	mutex_lock(&svc_pool_map_mutex);
30342a7fc4aSGreg Banks 
30442a7fc4aSGreg Banks 	if (!--m->count) {
30542a7fc4aSGreg Banks 		kfree(m->to_pool);
30661c8504cSJ. Bruce Fields 		m->to_pool = NULL;
30742a7fc4aSGreg Banks 		kfree(m->pool_to);
30861c8504cSJ. Bruce Fields 		m->pool_to = NULL;
30942a7fc4aSGreg Banks 		m->npools = 0;
31042a7fc4aSGreg Banks 	}
31142a7fc4aSGreg Banks 
31242a7fc4aSGreg Banks 	mutex_unlock(&svc_pool_map_mutex);
31342a7fc4aSGreg Banks }
31442a7fc4aSGreg Banks 
31511fd165cSEric Dumazet static int svc_pool_map_get_node(unsigned int pidx)
31611fd165cSEric Dumazet {
31711fd165cSEric Dumazet 	const struct svc_pool_map *m = &svc_pool_map;
31811fd165cSEric Dumazet 
31911fd165cSEric Dumazet 	if (m->count) {
32011fd165cSEric Dumazet 		if (m->mode == SVC_POOL_PERCPU)
32111fd165cSEric Dumazet 			return cpu_to_node(m->pool_to[pidx]);
32211fd165cSEric Dumazet 		if (m->mode == SVC_POOL_PERNODE)
32311fd165cSEric Dumazet 			return m->pool_to[pidx];
32411fd165cSEric Dumazet 	}
32511fd165cSEric Dumazet 	return NUMA_NO_NODE;
32611fd165cSEric Dumazet }
327bfd24160SGreg Banks /*
3289867d76cSJeff Layton  * Set the given thread's cpus_allowed mask so that it
329bfd24160SGreg Banks  * will only run on cpus in the given pool.
330bfd24160SGreg Banks  */
3319867d76cSJeff Layton static inline void
3329867d76cSJeff Layton svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
333bfd24160SGreg Banks {
334bfd24160SGreg Banks 	struct svc_pool_map *m = &svc_pool_map;
3359867d76cSJeff Layton 	unsigned int node = m->pool_to[pidx];
336bfd24160SGreg Banks 
337bfd24160SGreg Banks 	/*
338bfd24160SGreg Banks 	 * The caller checks for sv_nrpools > 1, which
33942a7fc4aSGreg Banks 	 * implies that we've been initialized.
340bfd24160SGreg Banks 	 */
3411bd58aafSWeston Andros Adamson 	WARN_ON_ONCE(m->count == 0);
3421bd58aafSWeston Andros Adamson 	if (m->count == 0)
3431bd58aafSWeston Andros Adamson 		return;
344bfd24160SGreg Banks 
3459867d76cSJeff Layton 	switch (m->mode) {
346bfd24160SGreg Banks 	case SVC_POOL_PERCPU:
347c5f59f08SMike Travis 	{
348aa85ea5bSRusty Russell 		set_cpus_allowed_ptr(task, cpumask_of(node));
3499867d76cSJeff Layton 		break;
350c5f59f08SMike Travis 	}
351bfd24160SGreg Banks 	case SVC_POOL_PERNODE:
352c5f59f08SMike Travis 	{
353a70f7302SRusty Russell 		set_cpus_allowed_ptr(task, cpumask_of_node(node));
3549867d76cSJeff Layton 		break;
355bfd24160SGreg Banks 	}
356bfd24160SGreg Banks 	}
357c5f59f08SMike Travis }
358bfd24160SGreg Banks 
3592059b698SChuck Lever /**
3602059b698SChuck Lever  * svc_pool_for_cpu - Select pool to run a thread on this cpu
3612059b698SChuck Lever  * @serv: An RPC service
3622059b698SChuck Lever  *
3632059b698SChuck Lever  * Use the active CPU and the svc_pool_map's mode setting to
3642059b698SChuck Lever  * select the svc thread pool to use. Once initialized, the
3652059b698SChuck Lever  * svc_pool_map does not change.
3662059b698SChuck Lever  *
3672059b698SChuck Lever  * Return value:
3682059b698SChuck Lever  *   A pointer to an svc_pool
369bfd24160SGreg Banks  */
3702059b698SChuck Lever struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv)
371bfd24160SGreg Banks {
372bfd24160SGreg Banks 	struct svc_pool_map *m = &svc_pool_map;
3732059b698SChuck Lever 	int cpu = raw_smp_processor_id();
374bfd24160SGreg Banks 	unsigned int pidx = 0;
375bfd24160SGreg Banks 
37693aa619eSNeilBrown 	if (serv->sv_nrpools <= 1)
37793aa619eSNeilBrown 		return serv->sv_pools;
37893aa619eSNeilBrown 
379bfd24160SGreg Banks 	switch (m->mode) {
380bfd24160SGreg Banks 	case SVC_POOL_PERCPU:
381bfd24160SGreg Banks 		pidx = m->to_pool[cpu];
382bfd24160SGreg Banks 		break;
383bfd24160SGreg Banks 	case SVC_POOL_PERNODE:
384bfd24160SGreg Banks 		pidx = m->to_pool[cpu_to_node(cpu)];
385bfd24160SGreg Banks 		break;
386bfd24160SGreg Banks 	}
38793aa619eSNeilBrown 
388bfd24160SGreg Banks 	return &serv->sv_pools[pidx % serv->sv_nrpools];
389bfd24160SGreg Banks }
390bfd24160SGreg Banks 
391bb2224dfSStanislav Kinsbursky int svc_rpcb_setup(struct svc_serv *serv, struct net *net)
392d9908560SStanislav Kinsbursky {
393d9908560SStanislav Kinsbursky 	int err;
394d9908560SStanislav Kinsbursky 
395bee42f68SStanislav Kinsbursky 	err = rpcb_create_local(net);
396d9908560SStanislav Kinsbursky 	if (err)
397d9908560SStanislav Kinsbursky 		return err;
398d9908560SStanislav Kinsbursky 
399d9908560SStanislav Kinsbursky 	/* Remove any stale portmap registrations */
400bee42f68SStanislav Kinsbursky 	svc_unregister(serv, net);
401d9908560SStanislav Kinsbursky 	return 0;
402d9908560SStanislav Kinsbursky }
403bb2224dfSStanislav Kinsbursky EXPORT_SYMBOL_GPL(svc_rpcb_setup);
404d9908560SStanislav Kinsbursky 
4055ecebb7cSStanislav Kinsbursky void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net)
406d9908560SStanislav Kinsbursky {
4075ecebb7cSStanislav Kinsbursky 	svc_unregister(serv, net);
4085ecebb7cSStanislav Kinsbursky 	rpcb_put_local(net);
409d9908560SStanislav Kinsbursky }
41016d05870SStanislav Kinsbursky EXPORT_SYMBOL_GPL(svc_rpcb_cleanup);
411d9908560SStanislav Kinsbursky 
412d9908560SStanislav Kinsbursky static int svc_uses_rpcbind(struct svc_serv *serv)
413d9908560SStanislav Kinsbursky {
414d9908560SStanislav Kinsbursky 	struct svc_program	*progp;
415d9908560SStanislav Kinsbursky 	unsigned int		i;
416d9908560SStanislav Kinsbursky 
417d9908560SStanislav Kinsbursky 	for (progp = serv->sv_program; progp; progp = progp->pg_next) {
418d9908560SStanislav Kinsbursky 		for (i = 0; i < progp->pg_nvers; i++) {
419d9908560SStanislav Kinsbursky 			if (progp->pg_vers[i] == NULL)
420d9908560SStanislav Kinsbursky 				continue;
42105a45a2dSJeff Layton 			if (!progp->pg_vers[i]->vs_hidden)
422d9908560SStanislav Kinsbursky 				return 1;
423d9908560SStanislav Kinsbursky 		}
424d9908560SStanislav Kinsbursky 	}
425d9908560SStanislav Kinsbursky 
426d9908560SStanislav Kinsbursky 	return 0;
427d9908560SStanislav Kinsbursky }
428bfd24160SGreg Banks 
4299793f7c8SStanislav Kinsbursky int svc_bind(struct svc_serv *serv, struct net *net)
4309793f7c8SStanislav Kinsbursky {
4319793f7c8SStanislav Kinsbursky 	if (!svc_uses_rpcbind(serv))
4329793f7c8SStanislav Kinsbursky 		return 0;
4339793f7c8SStanislav Kinsbursky 	return svc_rpcb_setup(serv, net);
4349793f7c8SStanislav Kinsbursky }
4359793f7c8SStanislav Kinsbursky EXPORT_SYMBOL_GPL(svc_bind);
4369793f7c8SStanislav Kinsbursky 
437d0025268STrond Myklebust #if defined(CONFIG_SUNRPC_BACKCHANNEL)
438d0025268STrond Myklebust static void
439d0025268STrond Myklebust __svc_init_bc(struct svc_serv *serv)
440d0025268STrond Myklebust {
441d0025268STrond Myklebust 	INIT_LIST_HEAD(&serv->sv_cb_list);
442d0025268STrond Myklebust 	spin_lock_init(&serv->sv_cb_lock);
443d0025268STrond Myklebust 	init_waitqueue_head(&serv->sv_cb_waitq);
444d0025268STrond Myklebust }
445d0025268STrond Myklebust #else
446d0025268STrond Myklebust static void
447d0025268STrond Myklebust __svc_init_bc(struct svc_serv *serv)
448d0025268STrond Myklebust {
449d0025268STrond Myklebust }
450d0025268STrond Myklebust #endif
451d0025268STrond Myklebust 
452bfd24160SGreg Banks /*
4531da177e4SLinus Torvalds  * Create an RPC service
4541da177e4SLinus Torvalds  */
455a7455442SGreg Banks static struct svc_serv *
456a7455442SGreg Banks __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
45737902c63SChuck Lever 	     int (*threadfn)(void *data))
4581da177e4SLinus Torvalds {
4591da177e4SLinus Torvalds 	struct svc_serv	*serv;
460ea339d46SChuck Lever 	unsigned int vers;
4611da177e4SLinus Torvalds 	unsigned int xdrsize;
4623262c816SGreg Banks 	unsigned int i;
4631da177e4SLinus Torvalds 
4640da974f4SPanagiotis Issaris 	if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
4651da177e4SLinus Torvalds 		return NULL;
4669ba02638SAndreas Gruenbacher 	serv->sv_name      = prog->pg_name;
4671da177e4SLinus Torvalds 	serv->sv_program   = prog;
468ec52361dSNeilBrown 	kref_init(&serv->sv_refcnt);
4691da177e4SLinus Torvalds 	serv->sv_stats     = prog->pg_stats;
470c6b0a9f8SNeilBrown 	if (bufsize > RPCSVC_MAXPAYLOAD)
471c6b0a9f8SNeilBrown 		bufsize = RPCSVC_MAXPAYLOAD;
472c6b0a9f8SNeilBrown 	serv->sv_max_payload = bufsize? bufsize : 4096;
473c6b0a9f8SNeilBrown 	serv->sv_max_mesg  = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
47437902c63SChuck Lever 	serv->sv_threadfn = threadfn;
4751da177e4SLinus Torvalds 	xdrsize = 0;
4769ba02638SAndreas Gruenbacher 	while (prog) {
4779ba02638SAndreas Gruenbacher 		prog->pg_lovers = prog->pg_nvers-1;
4781da177e4SLinus Torvalds 		for (vers=0; vers<prog->pg_nvers ; vers++)
4791da177e4SLinus Torvalds 			if (prog->pg_vers[vers]) {
4801da177e4SLinus Torvalds 				prog->pg_hivers = vers;
4811da177e4SLinus Torvalds 				if (prog->pg_lovers > vers)
4821da177e4SLinus Torvalds 					prog->pg_lovers = vers;
4831da177e4SLinus Torvalds 				if (prog->pg_vers[vers]->vs_xdrsize > xdrsize)
4841da177e4SLinus Torvalds 					xdrsize = prog->pg_vers[vers]->vs_xdrsize;
4851da177e4SLinus Torvalds 			}
4869ba02638SAndreas Gruenbacher 		prog = prog->pg_next;
4879ba02638SAndreas Gruenbacher 	}
4881da177e4SLinus Torvalds 	serv->sv_xdrsize   = xdrsize;
4891da177e4SLinus Torvalds 	INIT_LIST_HEAD(&serv->sv_tempsocks);
4901da177e4SLinus Torvalds 	INIT_LIST_HEAD(&serv->sv_permsocks);
491ff861c4dSKees Cook 	timer_setup(&serv->sv_temptimer, NULL, 0);
4921da177e4SLinus Torvalds 	spin_lock_init(&serv->sv_lock);
4931da177e4SLinus Torvalds 
494d0025268STrond Myklebust 	__svc_init_bc(serv);
495d0025268STrond Myklebust 
496a7455442SGreg Banks 	serv->sv_nrpools = npools;
4973262c816SGreg Banks 	serv->sv_pools =
498cd861280SRobert P. J. Day 		kcalloc(serv->sv_nrpools, sizeof(struct svc_pool),
4993262c816SGreg Banks 			GFP_KERNEL);
5003262c816SGreg Banks 	if (!serv->sv_pools) {
5013262c816SGreg Banks 		kfree(serv);
5023262c816SGreg Banks 		return NULL;
5033262c816SGreg Banks 	}
5043262c816SGreg Banks 
5053262c816SGreg Banks 	for (i = 0; i < serv->sv_nrpools; i++) {
5063262c816SGreg Banks 		struct svc_pool *pool = &serv->sv_pools[i];
5073262c816SGreg Banks 
50846121cf7SChuck Lever 		dprintk("svc: initialising pool %u for %s\n",
5093262c816SGreg Banks 				i, serv->sv_name);
5103262c816SGreg Banks 
5113262c816SGreg Banks 		pool->sp_id = i;
5123262c816SGreg Banks 		INIT_LIST_HEAD(&pool->sp_sockets);
513a7455442SGreg Banks 		INIT_LIST_HEAD(&pool->sp_all_threads);
5143262c816SGreg Banks 		spin_lock_init(&pool->sp_lock);
515ccf08bedSChuck Lever 
516ccf08bedSChuck Lever 		percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL);
517ccf08bedSChuck Lever 		percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL);
518ccf08bedSChuck Lever 		percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL);
5193262c816SGreg Banks 	}
5203262c816SGreg Banks 
5211da177e4SLinus Torvalds 	return serv;
5221da177e4SLinus Torvalds }
5231da177e4SLinus Torvalds 
52437902c63SChuck Lever /**
52537902c63SChuck Lever  * svc_create - Create an RPC service
52637902c63SChuck Lever  * @prog: the RPC program the new service will handle
52737902c63SChuck Lever  * @bufsize: maximum message size for @prog
52837902c63SChuck Lever  * @threadfn: a function to service RPC requests for @prog
52937902c63SChuck Lever  *
53037902c63SChuck Lever  * Returns an instantiated struct svc_serv object or NULL.
53137902c63SChuck Lever  */
53237902c63SChuck Lever struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
53337902c63SChuck Lever 			    int (*threadfn)(void *data))
534a7455442SGreg Banks {
53537902c63SChuck Lever 	return __svc_create(prog, bufsize, 1, threadfn);
536a7455442SGreg Banks }
53724c3767eSTrond Myklebust EXPORT_SYMBOL_GPL(svc_create);
538a7455442SGreg Banks 
53937902c63SChuck Lever /**
54037902c63SChuck Lever  * svc_create_pooled - Create an RPC service with pooled threads
54137902c63SChuck Lever  * @prog: the RPC program the new service will handle
54237902c63SChuck Lever  * @bufsize: maximum message size for @prog
54337902c63SChuck Lever  * @threadfn: a function to service RPC requests for @prog
54437902c63SChuck Lever  *
54537902c63SChuck Lever  * Returns an instantiated struct svc_serv object or NULL.
54637902c63SChuck Lever  */
54737902c63SChuck Lever struct svc_serv *svc_create_pooled(struct svc_program *prog,
54837902c63SChuck Lever 				   unsigned int bufsize,
54937902c63SChuck Lever 				   int (*threadfn)(void *data))
550a7455442SGreg Banks {
551a7455442SGreg Banks 	struct svc_serv *serv;
55242a7fc4aSGreg Banks 	unsigned int npools = svc_pool_map_get();
553a7455442SGreg Banks 
55437902c63SChuck Lever 	serv = __svc_create(prog, bufsize, npools, threadfn);
555067f96efSJeff Layton 	if (!serv)
556067f96efSJeff Layton 		goto out_err;
557a7455442SGreg Banks 	return serv;
558067f96efSJeff Layton out_err:
55993aa619eSNeilBrown 	svc_pool_map_put(npools);
560067f96efSJeff Layton 	return NULL;
561a7455442SGreg Banks }
56224c3767eSTrond Myklebust EXPORT_SYMBOL_GPL(svc_create_pooled);
563a7455442SGreg Banks 
5641da177e4SLinus Torvalds /*
565bedbdd8bSNeil Brown  * Destroy an RPC service. Should be called with appropriate locking to
5662a36395fSNeilBrown  * protect sv_permsocks and sv_tempsocks.
5671da177e4SLinus Torvalds  */
5681da177e4SLinus Torvalds void
569ec52361dSNeilBrown svc_destroy(struct kref *ref)
5701da177e4SLinus Torvalds {
571ec52361dSNeilBrown 	struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt);
572ccf08bedSChuck Lever 	unsigned int i;
5731da177e4SLinus Torvalds 
574ec52361dSNeilBrown 	dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name);
575292a089dSSteven Rostedt (Google) 	timer_shutdown_sync(&serv->sv_temptimer);
576074d0f67SStanislav Kinsbursky 
5777b147f1fSStanislav Kinsbursky 	/*
5787b147f1fSStanislav Kinsbursky 	 * The last user is gone and thus all sockets have to be destroyed to
5797b147f1fSStanislav Kinsbursky 	 * the point. Check this.
5807b147f1fSStanislav Kinsbursky 	 */
5817b147f1fSStanislav Kinsbursky 	BUG_ON(!list_empty(&serv->sv_permsocks));
5827b147f1fSStanislav Kinsbursky 	BUG_ON(!list_empty(&serv->sv_tempsocks));
583cda1fd4aSNeilBrown 
5841da177e4SLinus Torvalds 	cache_clean_deferred(serv);
5851da177e4SLinus Torvalds 
58693aa619eSNeilBrown 	svc_pool_map_put(serv->sv_nrpools);
58742a7fc4aSGreg Banks 
588ccf08bedSChuck Lever 	for (i = 0; i < serv->sv_nrpools; i++) {
589ccf08bedSChuck Lever 		struct svc_pool *pool = &serv->sv_pools[i];
590ccf08bedSChuck Lever 
591ccf08bedSChuck Lever 		percpu_counter_destroy(&pool->sp_sockets_queued);
592ccf08bedSChuck Lever 		percpu_counter_destroy(&pool->sp_threads_woken);
593ccf08bedSChuck Lever 		percpu_counter_destroy(&pool->sp_threads_timedout);
594ccf08bedSChuck Lever 	}
5953262c816SGreg Banks 	kfree(serv->sv_pools);
5961da177e4SLinus Torvalds 	kfree(serv);
5971da177e4SLinus Torvalds }
59824c3767eSTrond Myklebust EXPORT_SYMBOL_GPL(svc_destroy);
5991da177e4SLinus Torvalds 
60088e4d41aSChuck Lever static bool
60111fd165cSEric Dumazet svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
6021da177e4SLinus Torvalds {
60388e4d41aSChuck Lever 	unsigned long pages, ret;
6041da177e4SLinus Torvalds 
605ba17686fSAndy Adamson 	/* bc_xprt uses fore channel allocated buffers */
606ba17686fSAndy Adamson 	if (svc_is_backchannel(rqstp))
60788e4d41aSChuck Lever 		return true;
608ba17686fSAndy Adamson 
609c6b0a9f8SNeilBrown 	pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
610c6b0a9f8SNeilBrown 				       * We assume one is at most one page
611c6b0a9f8SNeilBrown 				       */
612b25cd058SWeston Andros Adamson 	WARN_ON_ONCE(pages > RPCSVC_MAXPAGES);
613b25cd058SWeston Andros Adamson 	if (pages > RPCSVC_MAXPAGES)
614b25cd058SWeston Andros Adamson 		pages = RPCSVC_MAXPAGES;
61588e4d41aSChuck Lever 
61688e4d41aSChuck Lever 	ret = alloc_pages_bulk_array_node(GFP_KERNEL, node, pages,
61788e4d41aSChuck Lever 					  rqstp->rq_pages);
61888e4d41aSChuck Lever 	return ret == pages;
6191da177e4SLinus Torvalds }
6201da177e4SLinus Torvalds 
6211da177e4SLinus Torvalds /*
6221da177e4SLinus Torvalds  * Release an RPC server buffer
6231da177e4SLinus Torvalds  */
6241da177e4SLinus Torvalds static void
6251da177e4SLinus Torvalds svc_release_buffer(struct svc_rqst *rqstp)
6261da177e4SLinus Torvalds {
62750c8bb13SChuck Lever 	unsigned int i;
62850c8bb13SChuck Lever 
62944524359SNeilBrown 	for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++)
63044524359SNeilBrown 		if (rqstp->rq_pages[i])
63144524359SNeilBrown 			put_page(rqstp->rq_pages[i]);
6321da177e4SLinus Torvalds }
6331da177e4SLinus Torvalds 
6340113ab34SJeff Layton struct svc_rqst *
6351b6dc1dfSJeff Layton svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
6360113ab34SJeff Layton {
6370113ab34SJeff Layton 	struct svc_rqst	*rqstp;
6380113ab34SJeff Layton 
63911fd165cSEric Dumazet 	rqstp = kzalloc_node(sizeof(*rqstp), GFP_KERNEL, node);
6400113ab34SJeff Layton 	if (!rqstp)
6411b6dc1dfSJeff Layton 		return rqstp;
6420113ab34SJeff Layton 
64376fa8842SMatthew Wilcox (Oracle) 	folio_batch_init(&rqstp->rq_fbatch);
6446a0cdf56SChuck Lever 
645b1691bc0SJeff Layton 	__set_bit(RQ_BUSY, &rqstp->rq_flags);
646b1691bc0SJeff Layton 	rqstp->rq_server = serv;
647b1691bc0SJeff Layton 	rqstp->rq_pool = pool;
6481b6dc1dfSJeff Layton 
6495191955dSChuck Lever 	rqstp->rq_scratch_page = alloc_pages_node(node, GFP_KERNEL, 0);
6505191955dSChuck Lever 	if (!rqstp->rq_scratch_page)
6515191955dSChuck Lever 		goto out_enomem;
6525191955dSChuck Lever 
6531b6dc1dfSJeff Layton 	rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
6541b6dc1dfSJeff Layton 	if (!rqstp->rq_argp)
6551b6dc1dfSJeff Layton 		goto out_enomem;
6561b6dc1dfSJeff Layton 
6571b6dc1dfSJeff Layton 	rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
6581b6dc1dfSJeff Layton 	if (!rqstp->rq_resp)
6591b6dc1dfSJeff Layton 		goto out_enomem;
6601b6dc1dfSJeff Layton 
6611b6dc1dfSJeff Layton 	if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
6621b6dc1dfSJeff Layton 		goto out_enomem;
6631b6dc1dfSJeff Layton 
6641b6dc1dfSJeff Layton 	return rqstp;
6651b6dc1dfSJeff Layton out_enomem:
6661b6dc1dfSJeff Layton 	svc_rqst_free(rqstp);
6671b6dc1dfSJeff Layton 	return NULL;
6681b6dc1dfSJeff Layton }
6691b6dc1dfSJeff Layton EXPORT_SYMBOL_GPL(svc_rqst_alloc);
6701b6dc1dfSJeff Layton 
6716b044fbaSNeilBrown static struct svc_rqst *
6721b6dc1dfSJeff Layton svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
6731b6dc1dfSJeff Layton {
6741b6dc1dfSJeff Layton 	struct svc_rqst	*rqstp;
6751b6dc1dfSJeff Layton 
6761b6dc1dfSJeff Layton 	rqstp = svc_rqst_alloc(serv, pool, node);
6771b6dc1dfSJeff Layton 	if (!rqstp)
6781b6dc1dfSJeff Layton 		return ERR_PTR(-ENOMEM);
6791b6dc1dfSJeff Layton 
680ec52361dSNeilBrown 	svc_get(serv);
6812a36395fSNeilBrown 	spin_lock_bh(&serv->sv_lock);
6822a36395fSNeilBrown 	serv->sv_nrthreads += 1;
6832a36395fSNeilBrown 	spin_unlock_bh(&serv->sv_lock);
6842a36395fSNeilBrown 
6850113ab34SJeff Layton 	spin_lock_bh(&pool->sp_lock);
6860113ab34SJeff Layton 	pool->sp_nrthreads++;
68781244386SJeff Layton 	list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
6880113ab34SJeff Layton 	spin_unlock_bh(&pool->sp_lock);
6890113ab34SJeff Layton 	return rqstp;
6900113ab34SJeff Layton }
6910113ab34SJeff Layton 
6921da177e4SLinus Torvalds /*
693a7455442SGreg Banks  * Choose a pool in which to create a new thread, for svc_set_num_threads
694a7455442SGreg Banks  */
695a7455442SGreg Banks static inline struct svc_pool *
696a7455442SGreg Banks choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
697a7455442SGreg Banks {
698a7455442SGreg Banks 	if (pool != NULL)
699a7455442SGreg Banks 		return pool;
700a7455442SGreg Banks 
701a7455442SGreg Banks 	return &serv->sv_pools[(*state)++ % serv->sv_nrpools];
702a7455442SGreg Banks }
703a7455442SGreg Banks 
704a7455442SGreg Banks /*
705a7455442SGreg Banks  * Choose a thread to kill, for svc_set_num_threads
706a7455442SGreg Banks  */
707a7455442SGreg Banks static inline struct task_struct *
708a7455442SGreg Banks choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
709a7455442SGreg Banks {
710a7455442SGreg Banks 	unsigned int i;
711a7455442SGreg Banks 	struct task_struct *task = NULL;
712a7455442SGreg Banks 
713a7455442SGreg Banks 	if (pool != NULL) {
714a7455442SGreg Banks 		spin_lock_bh(&pool->sp_lock);
715a7455442SGreg Banks 	} else {
716a7455442SGreg Banks 		/* choose a pool in round-robin fashion */
717a7455442SGreg Banks 		for (i = 0; i < serv->sv_nrpools; i++) {
718a7455442SGreg Banks 			pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
719a7455442SGreg Banks 			spin_lock_bh(&pool->sp_lock);
720a7455442SGreg Banks 			if (!list_empty(&pool->sp_all_threads))
721a7455442SGreg Banks 				goto found_pool;
722a7455442SGreg Banks 			spin_unlock_bh(&pool->sp_lock);
723a7455442SGreg Banks 		}
724a7455442SGreg Banks 		return NULL;
725a7455442SGreg Banks 	}
726a7455442SGreg Banks 
727a7455442SGreg Banks found_pool:
728a7455442SGreg Banks 	if (!list_empty(&pool->sp_all_threads)) {
729a7455442SGreg Banks 		struct svc_rqst *rqstp;
730a7455442SGreg Banks 
731a7455442SGreg Banks 		/*
732a7455442SGreg Banks 		 * Remove from the pool->sp_all_threads list
733a7455442SGreg Banks 		 * so we don't try to kill it again.
734a7455442SGreg Banks 		 */
735a7455442SGreg Banks 		rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
73681244386SJeff Layton 		set_bit(RQ_VICTIM, &rqstp->rq_flags);
73781244386SJeff Layton 		list_del_rcu(&rqstp->rq_all);
738a7455442SGreg Banks 		task = rqstp->rq_task;
739a7455442SGreg Banks 	}
740a7455442SGreg Banks 	spin_unlock_bh(&pool->sp_lock);
741a7455442SGreg Banks 
742a7455442SGreg Banks 	return task;
743a7455442SGreg Banks }
744a7455442SGreg Banks 
7459e0d8768STrond Myklebust /* create new threads */
7469e0d8768STrond Myklebust static int
7479e0d8768STrond Myklebust svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
7489e0d8768STrond Myklebust {
7499e0d8768STrond Myklebust 	struct svc_rqst	*rqstp;
7509e0d8768STrond Myklebust 	struct task_struct *task;
7519e0d8768STrond Myklebust 	struct svc_pool *chosen_pool;
7529e0d8768STrond Myklebust 	unsigned int state = serv->sv_nrthreads-1;
7539e0d8768STrond Myklebust 	int node;
7549e0d8768STrond Myklebust 
7559e0d8768STrond Myklebust 	do {
7569e0d8768STrond Myklebust 		nrservs--;
7579e0d8768STrond Myklebust 		chosen_pool = choose_pool(serv, pool, &state);
7589e0d8768STrond Myklebust 
7599e0d8768STrond Myklebust 		node = svc_pool_map_get_node(chosen_pool->sp_id);
7609e0d8768STrond Myklebust 		rqstp = svc_prepare_thread(serv, chosen_pool, node);
7619e0d8768STrond Myklebust 		if (IS_ERR(rqstp))
7629e0d8768STrond Myklebust 			return PTR_ERR(rqstp);
7639e0d8768STrond Myklebust 
76437902c63SChuck Lever 		task = kthread_create_on_node(serv->sv_threadfn, rqstp,
7659e0d8768STrond Myklebust 					      node, "%s", serv->sv_name);
7669e0d8768STrond Myklebust 		if (IS_ERR(task)) {
7679e0d8768STrond Myklebust 			svc_exit_thread(rqstp);
7689e0d8768STrond Myklebust 			return PTR_ERR(task);
7699e0d8768STrond Myklebust 		}
7709e0d8768STrond Myklebust 
7719e0d8768STrond Myklebust 		rqstp->rq_task = task;
7729e0d8768STrond Myklebust 		if (serv->sv_nrpools > 1)
7739e0d8768STrond Myklebust 			svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
7749e0d8768STrond Myklebust 
7759e0d8768STrond Myklebust 		svc_sock_update_bufs(serv);
7769e0d8768STrond Myklebust 		wake_up_process(task);
7779e0d8768STrond Myklebust 	} while (nrservs > 0);
7789e0d8768STrond Myklebust 
7799e0d8768STrond Myklebust 	return 0;
7809e0d8768STrond Myklebust }
7819e0d8768STrond Myklebust 
782a7455442SGreg Banks /*
783a7455442SGreg Banks  * Create or destroy enough new threads to make the number
784a7455442SGreg Banks  * of threads the given number.  If `pool' is non-NULL, applies
785a7455442SGreg Banks  * only to threads in that pool, otherwise round-robins between
78694cf3179SJ. Bruce Fields  * all pools.  Caller must ensure that mutual exclusion between this and
78794cf3179SJ. Bruce Fields  * server startup or shutdown.
788a7455442SGreg Banks  */
789a7455442SGreg Banks 
790ed6473ddSTrond Myklebust /* destroy old threads */
791ed6473ddSTrond Myklebust static int
792ed6473ddSTrond Myklebust svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
793ed6473ddSTrond Myklebust {
7949ca6705dSBenjamin Coddington 	struct svc_rqst	*rqstp;
795ed6473ddSTrond Myklebust 	struct task_struct *task;
796ed6473ddSTrond Myklebust 	unsigned int state = serv->sv_nrthreads-1;
797ed6473ddSTrond Myklebust 
798ed6473ddSTrond Myklebust 	/* destroy old threads */
799ed6473ddSTrond Myklebust 	do {
800ed6473ddSTrond Myklebust 		task = choose_victim(serv, pool, &state);
801ed6473ddSTrond Myklebust 		if (task == NULL)
802ed6473ddSTrond Myklebust 			break;
8039ca6705dSBenjamin Coddington 		rqstp = kthread_data(task);
8049ca6705dSBenjamin Coddington 		/* Did we lose a race to svo_function threadfn? */
8059ca6705dSBenjamin Coddington 		if (kthread_stop(task) == -EINTR)
8069ca6705dSBenjamin Coddington 			svc_exit_thread(rqstp);
807ed6473ddSTrond Myklebust 		nrservs++;
808ed6473ddSTrond Myklebust 	} while (nrservs < 0);
809ed6473ddSTrond Myklebust 	return 0;
810ed6473ddSTrond Myklebust }
811ed6473ddSTrond Myklebust 
812ed6473ddSTrond Myklebust int
8133ebdbe52SNeilBrown svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
814ed6473ddSTrond Myklebust {
815ed6473ddSTrond Myklebust 	if (pool == NULL) {
816ec52361dSNeilBrown 		nrservs -= serv->sv_nrthreads;
817ed6473ddSTrond Myklebust 	} else {
818ed6473ddSTrond Myklebust 		spin_lock_bh(&pool->sp_lock);
819ed6473ddSTrond Myklebust 		nrservs -= pool->sp_nrthreads;
820ed6473ddSTrond Myklebust 		spin_unlock_bh(&pool->sp_lock);
821ed6473ddSTrond Myklebust 	}
822ed6473ddSTrond Myklebust 
823ed6473ddSTrond Myklebust 	if (nrservs > 0)
824ed6473ddSTrond Myklebust 		return svc_start_kthreads(serv, pool, nrservs);
825ed6473ddSTrond Myklebust 	if (nrservs < 0)
826ed6473ddSTrond Myklebust 		return svc_stop_kthreads(serv, pool, nrservs);
827ed6473ddSTrond Myklebust 	return 0;
828ed6473ddSTrond Myklebust }
8293ebdbe52SNeilBrown EXPORT_SYMBOL_GPL(svc_set_num_threads);
830ed6473ddSTrond Myklebust 
8312f0f88f4SChuck Lever /**
8322f0f88f4SChuck Lever  * svc_rqst_replace_page - Replace one page in rq_pages[]
8332f0f88f4SChuck Lever  * @rqstp: svc_rqst with pages to replace
8342f0f88f4SChuck Lever  * @page: replacement page
8352f0f88f4SChuck Lever  *
8362f0f88f4SChuck Lever  * When replacing a page in rq_pages, batch the release of the
8372f0f88f4SChuck Lever  * replaced pages to avoid hammering the page allocator.
8380f516248SChuck Lever  *
8390f516248SChuck Lever  * Return values:
8400f516248SChuck Lever  *   %true: page replaced
8410f516248SChuck Lever  *   %false: array bounds checking failed
8422f0f88f4SChuck Lever  */
8430f516248SChuck Lever bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
8442f0f88f4SChuck Lever {
8450f516248SChuck Lever 	struct page **begin = rqstp->rq_pages;
8460f516248SChuck Lever 	struct page **end = &rqstp->rq_pages[RPCSVC_MAXPAGES];
8470f516248SChuck Lever 
8480f516248SChuck Lever 	if (unlikely(rqstp->rq_next_page < begin || rqstp->rq_next_page > end)) {
8490f516248SChuck Lever 		trace_svc_replace_page_err(rqstp);
8500f516248SChuck Lever 		return false;
8510f516248SChuck Lever 	}
8520f516248SChuck Lever 
8532f0f88f4SChuck Lever 	if (*rqstp->rq_next_page) {
85476fa8842SMatthew Wilcox (Oracle) 		if (!folio_batch_add(&rqstp->rq_fbatch,
85576fa8842SMatthew Wilcox (Oracle) 				page_folio(*rqstp->rq_next_page)))
85676fa8842SMatthew Wilcox (Oracle) 			__folio_batch_release(&rqstp->rq_fbatch);
8572f0f88f4SChuck Lever 	}
8582f0f88f4SChuck Lever 
8592f0f88f4SChuck Lever 	get_page(page);
8602f0f88f4SChuck Lever 	*(rqstp->rq_next_page++) = page;
8610f516248SChuck Lever 	return true;
8622f0f88f4SChuck Lever }
8632f0f88f4SChuck Lever EXPORT_SYMBOL_GPL(svc_rqst_replace_page);
8642f0f88f4SChuck Lever 
865b20cb39dSChuck Lever /**
866b20cb39dSChuck Lever  * svc_rqst_release_pages - Release Reply buffer pages
867b20cb39dSChuck Lever  * @rqstp: RPC transaction context
868b20cb39dSChuck Lever  *
869b20cb39dSChuck Lever  * Release response pages that might still be in flight after
870b20cb39dSChuck Lever  * svc_send, and any spliced filesystem-owned pages.
871b20cb39dSChuck Lever  */
872b20cb39dSChuck Lever void svc_rqst_release_pages(struct svc_rqst *rqstp)
873b20cb39dSChuck Lever {
874647a2a64SChuck Lever 	int i, count = rqstp->rq_next_page - rqstp->rq_respages;
875b20cb39dSChuck Lever 
876647a2a64SChuck Lever 	if (count) {
877647a2a64SChuck Lever 		release_pages(rqstp->rq_respages, count);
878647a2a64SChuck Lever 		for (i = 0; i < count; i++)
879647a2a64SChuck Lever 			rqstp->rq_respages[i] = NULL;
880b20cb39dSChuck Lever 	}
881b20cb39dSChuck Lever }
882b20cb39dSChuck Lever 
883a7455442SGreg Banks /*
8843c519914SJeff Layton  * Called from a server thread as it's exiting. Caller must hold the "service
8853c519914SJeff Layton  * mutex" for the service.
8861da177e4SLinus Torvalds  */
8871da177e4SLinus Torvalds void
8881b6dc1dfSJeff Layton svc_rqst_free(struct svc_rqst *rqstp)
8891da177e4SLinus Torvalds {
89076fa8842SMatthew Wilcox (Oracle) 	folio_batch_release(&rqstp->rq_fbatch);
8911da177e4SLinus Torvalds 	svc_release_buffer(rqstp);
892b9f83ffaSYunjian Wang 	if (rqstp->rq_scratch_page)
8935191955dSChuck Lever 		put_page(rqstp->rq_scratch_page);
8941da177e4SLinus Torvalds 	kfree(rqstp->rq_resp);
8951da177e4SLinus Torvalds 	kfree(rqstp->rq_argp);
8961da177e4SLinus Torvalds 	kfree(rqstp->rq_auth_data);
8971b6dc1dfSJeff Layton 	kfree_rcu(rqstp, rq_rcu_head);
8981b6dc1dfSJeff Layton }
8991b6dc1dfSJeff Layton EXPORT_SYMBOL_GPL(svc_rqst_free);
9001b6dc1dfSJeff Layton 
9011b6dc1dfSJeff Layton void
9021b6dc1dfSJeff Layton svc_exit_thread(struct svc_rqst *rqstp)
9031b6dc1dfSJeff Layton {
9041b6dc1dfSJeff Layton 	struct svc_serv	*serv = rqstp->rq_server;
9051b6dc1dfSJeff Layton 	struct svc_pool	*pool = rqstp->rq_pool;
9063262c816SGreg Banks 
9073262c816SGreg Banks 	spin_lock_bh(&pool->sp_lock);
9083262c816SGreg Banks 	pool->sp_nrthreads--;
90981244386SJeff Layton 	if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags))
91081244386SJeff Layton 		list_del_rcu(&rqstp->rq_all);
9113262c816SGreg Banks 	spin_unlock_bh(&pool->sp_lock);
9123262c816SGreg Banks 
9132a36395fSNeilBrown 	spin_lock_bh(&serv->sv_lock);
914ec52361dSNeilBrown 	serv->sv_nrthreads -= 1;
9152a36395fSNeilBrown 	spin_unlock_bh(&serv->sv_lock);
916ec52361dSNeilBrown 	svc_sock_update_bufs(serv);
917ec52361dSNeilBrown 
9181b6dc1dfSJeff Layton 	svc_rqst_free(rqstp);
9191da177e4SLinus Torvalds 
920ec52361dSNeilBrown 	svc_put(serv);
9211da177e4SLinus Torvalds }
92224c3767eSTrond Myklebust EXPORT_SYMBOL_GPL(svc_exit_thread);
9231da177e4SLinus Torvalds 
9241da177e4SLinus Torvalds /*
9252c7eb0b2SChuck Lever  * Register an "inet" protocol family netid with the local
9262c7eb0b2SChuck Lever  * rpcbind daemon via an rpcbind v4 SET request.
927a26cfad6SChuck Lever  *
9282c7eb0b2SChuck Lever  * No netconfig infrastructure is available in the kernel, so
9292c7eb0b2SChuck Lever  * we map IP_ protocol numbers to netids by hand.
930a26cfad6SChuck Lever  *
9312c7eb0b2SChuck Lever  * Returns zero on success; a negative errno value is returned
9322c7eb0b2SChuck Lever  * if any error occurs.
9331da177e4SLinus Torvalds  */
9345247fab5SStanislav Kinsbursky static int __svc_rpcb_register4(struct net *net, const u32 program,
9355247fab5SStanislav Kinsbursky 				const u32 version,
936a26cfad6SChuck Lever 				const unsigned short protocol,
937a26cfad6SChuck Lever 				const unsigned short port)
938a26cfad6SChuck Lever {
939cadc0fa5SChuck Lever 	const struct sockaddr_in sin = {
940a26cfad6SChuck Lever 		.sin_family		= AF_INET,
941a26cfad6SChuck Lever 		.sin_addr.s_addr	= htonl(INADDR_ANY),
942a26cfad6SChuck Lever 		.sin_port		= htons(port),
943a26cfad6SChuck Lever 	};
944cadc0fa5SChuck Lever 	const char *netid;
945cadc0fa5SChuck Lever 	int error;
9462c7eb0b2SChuck Lever 
9472c7eb0b2SChuck Lever 	switch (protocol) {
9482c7eb0b2SChuck Lever 	case IPPROTO_UDP:
9492c7eb0b2SChuck Lever 		netid = RPCBIND_NETID_UDP;
9502c7eb0b2SChuck Lever 		break;
9512c7eb0b2SChuck Lever 	case IPPROTO_TCP:
9522c7eb0b2SChuck Lever 		netid = RPCBIND_NETID_TCP;
9532c7eb0b2SChuck Lever 		break;
9542c7eb0b2SChuck Lever 	default:
955ba5c35e0SChuck Lever 		return -ENOPROTOOPT;
9562c7eb0b2SChuck Lever 	}
9572c7eb0b2SChuck Lever 
9585247fab5SStanislav Kinsbursky 	error = rpcb_v4_register(net, program, version,
959cadc0fa5SChuck Lever 					(const struct sockaddr *)&sin, netid);
960cadc0fa5SChuck Lever 
961cadc0fa5SChuck Lever 	/*
962cadc0fa5SChuck Lever 	 * User space didn't support rpcbind v4, so retry this
963cadc0fa5SChuck Lever 	 * registration request with the legacy rpcbind v2 protocol.
964cadc0fa5SChuck Lever 	 */
965cadc0fa5SChuck Lever 	if (error == -EPROTONOSUPPORT)
9665247fab5SStanislav Kinsbursky 		error = rpcb_register(net, program, version, protocol, port);
967cadc0fa5SChuck Lever 
968cadc0fa5SChuck Lever 	return error;
9692c7eb0b2SChuck Lever }
9702c7eb0b2SChuck Lever 
971dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
9722c7eb0b2SChuck Lever /*
9732c7eb0b2SChuck Lever  * Register an "inet6" protocol family netid with the local
9742c7eb0b2SChuck Lever  * rpcbind daemon via an rpcbind v4 SET request.
9752c7eb0b2SChuck Lever  *
9762c7eb0b2SChuck Lever  * No netconfig infrastructure is available in the kernel, so
9772c7eb0b2SChuck Lever  * we map IP_ protocol numbers to netids by hand.
9782c7eb0b2SChuck Lever  *
9792c7eb0b2SChuck Lever  * Returns zero on success; a negative errno value is returned
9802c7eb0b2SChuck Lever  * if any error occurs.
9812c7eb0b2SChuck Lever  */
9825247fab5SStanislav Kinsbursky static int __svc_rpcb_register6(struct net *net, const u32 program,
9835247fab5SStanislav Kinsbursky 				const u32 version,
9842c7eb0b2SChuck Lever 				const unsigned short protocol,
9852c7eb0b2SChuck Lever 				const unsigned short port)
9862c7eb0b2SChuck Lever {
987cadc0fa5SChuck Lever 	const struct sockaddr_in6 sin6 = {
988a26cfad6SChuck Lever 		.sin6_family		= AF_INET6,
989a26cfad6SChuck Lever 		.sin6_addr		= IN6ADDR_ANY_INIT,
990a26cfad6SChuck Lever 		.sin6_port		= htons(port),
991a26cfad6SChuck Lever 	};
992cadc0fa5SChuck Lever 	const char *netid;
993cadc0fa5SChuck Lever 	int error;
994a26cfad6SChuck Lever 
9952c7eb0b2SChuck Lever 	switch (protocol) {
9962c7eb0b2SChuck Lever 	case IPPROTO_UDP:
9972c7eb0b2SChuck Lever 		netid = RPCBIND_NETID_UDP6;
9982c7eb0b2SChuck Lever 		break;
9992c7eb0b2SChuck Lever 	case IPPROTO_TCP:
10002c7eb0b2SChuck Lever 		netid = RPCBIND_NETID_TCP6;
10012c7eb0b2SChuck Lever 		break;
10022c7eb0b2SChuck Lever 	default:
1003ba5c35e0SChuck Lever 		return -ENOPROTOOPT;
10042c7eb0b2SChuck Lever 	}
10052c7eb0b2SChuck Lever 
10065247fab5SStanislav Kinsbursky 	error = rpcb_v4_register(net, program, version,
1007cadc0fa5SChuck Lever 					(const struct sockaddr *)&sin6, netid);
1008cadc0fa5SChuck Lever 
1009cadc0fa5SChuck Lever 	/*
1010cadc0fa5SChuck Lever 	 * User space didn't support rpcbind version 4, so we won't
1011cadc0fa5SChuck Lever 	 * use a PF_INET6 listener.
1012cadc0fa5SChuck Lever 	 */
1013cadc0fa5SChuck Lever 	if (error == -EPROTONOSUPPORT)
1014cadc0fa5SChuck Lever 		error = -EAFNOSUPPORT;
1015cadc0fa5SChuck Lever 
1016cadc0fa5SChuck Lever 	return error;
10172c7eb0b2SChuck Lever }
1018dfd56b8bSEric Dumazet #endif	/* IS_ENABLED(CONFIG_IPV6) */
10192c7eb0b2SChuck Lever 
10202c7eb0b2SChuck Lever /*
10212c7eb0b2SChuck Lever  * Register a kernel RPC service via rpcbind version 4.
10222c7eb0b2SChuck Lever  *
10232c7eb0b2SChuck Lever  * Returns zero on success; a negative errno value is returned
10242c7eb0b2SChuck Lever  * if any error occurs.
10252c7eb0b2SChuck Lever  */
10265247fab5SStanislav Kinsbursky static int __svc_register(struct net *net, const char *progname,
1027363f724cSChuck Lever 			  const u32 program, const u32 version,
10284b62e58cSChuck Lever 			  const int family,
10292c7eb0b2SChuck Lever 			  const unsigned short protocol,
10302c7eb0b2SChuck Lever 			  const unsigned short port)
10312c7eb0b2SChuck Lever {
1032363f724cSChuck Lever 	int error = -EAFNOSUPPORT;
10332c7eb0b2SChuck Lever 
1034a26cfad6SChuck Lever 	switch (family) {
10354b62e58cSChuck Lever 	case PF_INET:
10365247fab5SStanislav Kinsbursky 		error = __svc_rpcb_register4(net, program, version,
10372c7eb0b2SChuck Lever 						protocol, port);
1038cadc0fa5SChuck Lever 		break;
1039dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
10404b62e58cSChuck Lever 	case PF_INET6:
10415247fab5SStanislav Kinsbursky 		error = __svc_rpcb_register6(net, program, version,
10422c7eb0b2SChuck Lever 						protocol, port);
1043dfd56b8bSEric Dumazet #endif
10442c7eb0b2SChuck Lever 	}
10452c7eb0b2SChuck Lever 
104607a27305SChuck Lever 	trace_svc_register(progname, version, family, protocol, port, error);
1047a26cfad6SChuck Lever 	return error;
1048a26cfad6SChuck Lever }
1049a26cfad6SChuck Lever 
1050642ee6b2STrond Myklebust int svc_rpcbind_set_version(struct net *net,
1051642ee6b2STrond Myklebust 			    const struct svc_program *progp,
1052642ee6b2STrond Myklebust 			    u32 version, int family,
1053642ee6b2STrond Myklebust 			    unsigned short proto,
1054642ee6b2STrond Myklebust 			    unsigned short port)
1055642ee6b2STrond Myklebust {
1056642ee6b2STrond Myklebust 	return __svc_register(net, progp->pg_name, progp->pg_prog,
1057642ee6b2STrond Myklebust 				version, family, proto, port);
1058642ee6b2STrond Myklebust 
1059642ee6b2STrond Myklebust }
1060642ee6b2STrond Myklebust EXPORT_SYMBOL_GPL(svc_rpcbind_set_version);
1061642ee6b2STrond Myklebust 
1062642ee6b2STrond Myklebust int svc_generic_rpcbind_set(struct net *net,
1063642ee6b2STrond Myklebust 			    const struct svc_program *progp,
1064642ee6b2STrond Myklebust 			    u32 version, int family,
1065642ee6b2STrond Myklebust 			    unsigned short proto,
1066642ee6b2STrond Myklebust 			    unsigned short port)
1067642ee6b2STrond Myklebust {
1068642ee6b2STrond Myklebust 	const struct svc_version *vers = progp->pg_vers[version];
1069642ee6b2STrond Myklebust 	int error;
1070642ee6b2STrond Myklebust 
1071642ee6b2STrond Myklebust 	if (vers == NULL)
1072642ee6b2STrond Myklebust 		return 0;
1073642ee6b2STrond Myklebust 
1074642ee6b2STrond Myklebust 	if (vers->vs_hidden) {
1075b4af5932SChuck Lever 		trace_svc_noregister(progp->pg_name, version, proto,
1076b4af5932SChuck Lever 				     port, family, 0);
1077642ee6b2STrond Myklebust 		return 0;
1078642ee6b2STrond Myklebust 	}
1079642ee6b2STrond Myklebust 
1080642ee6b2STrond Myklebust 	/*
1081642ee6b2STrond Myklebust 	 * Don't register a UDP port if we need congestion
1082642ee6b2STrond Myklebust 	 * control.
1083642ee6b2STrond Myklebust 	 */
1084642ee6b2STrond Myklebust 	if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP)
1085642ee6b2STrond Myklebust 		return 0;
1086642ee6b2STrond Myklebust 
1087642ee6b2STrond Myklebust 	error = svc_rpcbind_set_version(net, progp, version,
1088642ee6b2STrond Myklebust 					family, proto, port);
1089642ee6b2STrond Myklebust 
1090642ee6b2STrond Myklebust 	return (vers->vs_rpcb_optnl) ? 0 : error;
1091642ee6b2STrond Myklebust }
1092642ee6b2STrond Myklebust EXPORT_SYMBOL_GPL(svc_generic_rpcbind_set);
1093642ee6b2STrond Myklebust 
1094a26cfad6SChuck Lever /**
1095a26cfad6SChuck Lever  * svc_register - register an RPC service with the local portmapper
1096a26cfad6SChuck Lever  * @serv: svc_serv struct for the service to register
10975247fab5SStanislav Kinsbursky  * @net: net namespace for the service to register
10984b62e58cSChuck Lever  * @family: protocol family of service's listener socket
1099a26cfad6SChuck Lever  * @proto: transport protocol number to advertise
1100a26cfad6SChuck Lever  * @port: port to advertise
1101a26cfad6SChuck Lever  *
11024b62e58cSChuck Lever  * Service is registered for any address in the passed-in protocol family
1103a26cfad6SChuck Lever  */
11045247fab5SStanislav Kinsbursky int svc_register(const struct svc_serv *serv, struct net *net,
11055247fab5SStanislav Kinsbursky 		 const int family, const unsigned short proto,
11065247fab5SStanislav Kinsbursky 		 const unsigned short port)
11071da177e4SLinus Torvalds {
11081da177e4SLinus Torvalds 	struct svc_program	*progp;
1109ea339d46SChuck Lever 	unsigned int		i;
111014aeb211SChuck Lever 	int			error = 0;
11111da177e4SLinus Torvalds 
11120af39507SWeston Andros Adamson 	WARN_ON_ONCE(proto == 0 && port == 0);
11130af39507SWeston Andros Adamson 	if (proto == 0 && port == 0)
11140af39507SWeston Andros Adamson 		return -EINVAL;
11151da177e4SLinus Torvalds 
1116bc5fea42SOlaf Kirch 	for (progp = serv->sv_program; progp; progp = progp->pg_next) {
11171da177e4SLinus Torvalds 		for (i = 0; i < progp->pg_nvers; i++) {
1118bc5fea42SOlaf Kirch 
1119642ee6b2STrond Myklebust 			error = progp->pg_rpcbind_set(net, progp, i,
1120642ee6b2STrond Myklebust 					family, proto, port);
11217e55b59bSKinglong Mee 			if (error < 0) {
11227e55b59bSKinglong Mee 				printk(KERN_WARNING "svc: failed to register "
11237e55b59bSKinglong Mee 					"%sv%u RPC service (errno %d).\n",
11247e55b59bSKinglong Mee 					progp->pg_name, i, -error);
11251da177e4SLinus Torvalds 				break;
11261da177e4SLinus Torvalds 			}
1127bc5fea42SOlaf Kirch 		}
11287e55b59bSKinglong Mee 	}
11291da177e4SLinus Torvalds 
11307252d575SChuck Lever 	return error;
11317252d575SChuck Lever }
11327252d575SChuck Lever 
1133d5a8620fSChuck Lever /*
1134d5a8620fSChuck Lever  * If user space is running rpcbind, it should take the v4 UNSET
1135d5a8620fSChuck Lever  * and clear everything for this [program, version].  If user space
1136d5a8620fSChuck Lever  * is running portmap, it will reject the v4 UNSET, but won't have
1137d5a8620fSChuck Lever  * any "inet6" entries anyway.  So a PMAP_UNSET should be sufficient
1138d5a8620fSChuck Lever  * in this case to clear all existing entries for [program, version].
1139d5a8620fSChuck Lever  */
11405247fab5SStanislav Kinsbursky static void __svc_unregister(struct net *net, const u32 program, const u32 version,
1141f6fb3f6fSChuck Lever 			     const char *progname)
1142f6fb3f6fSChuck Lever {
1143f6fb3f6fSChuck Lever 	int error;
1144f6fb3f6fSChuck Lever 
11455247fab5SStanislav Kinsbursky 	error = rpcb_v4_register(net, program, version, NULL, "");
1146d5a8620fSChuck Lever 
1147d5a8620fSChuck Lever 	/*
1148d5a8620fSChuck Lever 	 * User space didn't support rpcbind v4, so retry this
1149d5a8620fSChuck Lever 	 * request with the legacy rpcbind v2 protocol.
1150d5a8620fSChuck Lever 	 */
1151d5a8620fSChuck Lever 	if (error == -EPROTONOSUPPORT)
11525247fab5SStanislav Kinsbursky 		error = rpcb_register(net, program, version, 0, 0);
1153d5a8620fSChuck Lever 
1154b4af5932SChuck Lever 	trace_svc_unregister(progname, version, error);
1155f6fb3f6fSChuck Lever }
1156f6fb3f6fSChuck Lever 
11577252d575SChuck Lever /*
1158f6fb3f6fSChuck Lever  * All netids, bind addresses and ports registered for [program, version]
1159f6fb3f6fSChuck Lever  * are removed from the local rpcbind database (if the service is not
1160f6fb3f6fSChuck Lever  * hidden) to make way for a new instance of the service.
11617252d575SChuck Lever  *
1162f6fb3f6fSChuck Lever  * The result of unregistration is reported via dprintk for those who want
1163f6fb3f6fSChuck Lever  * verification of the result, but is otherwise not important.
11647252d575SChuck Lever  */
11655247fab5SStanislav Kinsbursky static void svc_unregister(const struct svc_serv *serv, struct net *net)
11667252d575SChuck Lever {
116700a87e5dSChuck Lever 	struct sighand_struct *sighand;
11687252d575SChuck Lever 	struct svc_program *progp;
11697252d575SChuck Lever 	unsigned long flags;
11707252d575SChuck Lever 	unsigned int i;
11717252d575SChuck Lever 
11727252d575SChuck Lever 	clear_thread_flag(TIF_SIGPENDING);
11737252d575SChuck Lever 
11747252d575SChuck Lever 	for (progp = serv->sv_program; progp; progp = progp->pg_next) {
11757252d575SChuck Lever 		for (i = 0; i < progp->pg_nvers; i++) {
11767252d575SChuck Lever 			if (progp->pg_vers[i] == NULL)
11777252d575SChuck Lever 				continue;
11787252d575SChuck Lever 			if (progp->pg_vers[i]->vs_hidden)
11797252d575SChuck Lever 				continue;
11805247fab5SStanislav Kinsbursky 			__svc_unregister(net, progp->pg_prog, i, progp->pg_name);
11817252d575SChuck Lever 		}
11827252d575SChuck Lever 	}
11837252d575SChuck Lever 
118400a87e5dSChuck Lever 	rcu_read_lock();
118500a87e5dSChuck Lever 	sighand = rcu_dereference(current->sighand);
118600a87e5dSChuck Lever 	spin_lock_irqsave(&sighand->siglock, flags);
11871da177e4SLinus Torvalds 	recalc_sigpending();
118800a87e5dSChuck Lever 	spin_unlock_irqrestore(&sighand->siglock, flags);
118900a87e5dSChuck Lever 	rcu_read_unlock();
11901da177e4SLinus Torvalds }
11911da177e4SLinus Torvalds 
11921da177e4SLinus Torvalds /*
11937032a3ddSJ. Bruce Fields  * dprintk the given error with the address of the client that caused it.
1194354ecbb9SDr. David Alan Gilbert  */
1195f895b252SJeff Layton #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
1196b9075fa9SJoe Perches static __printf(2, 3)
1197e87cc472SJoe Perches void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
1198354ecbb9SDr. David Alan Gilbert {
1199e87cc472SJoe Perches 	struct va_format vaf;
1200354ecbb9SDr. David Alan Gilbert 	va_list args;
1201354ecbb9SDr. David Alan Gilbert 	char 	buf[RPC_MAX_ADDRBUFLEN];
1202354ecbb9SDr. David Alan Gilbert 
1203354ecbb9SDr. David Alan Gilbert 	va_start(args, fmt);
1204354ecbb9SDr. David Alan Gilbert 
1205e87cc472SJoe Perches 	vaf.fmt = fmt;
1206e87cc472SJoe Perches 	vaf.va = &args;
1207e87cc472SJoe Perches 
12087032a3ddSJ. Bruce Fields 	dprintk("svc: %s: %pV", svc_print_addr(rqstp, buf, sizeof(buf)), &vaf);
1209e87cc472SJoe Perches 
1210e87cc472SJoe Perches 	va_end(args);
1211354ecbb9SDr. David Alan Gilbert }
1212624ab464SJ. Bruce Fields #else
1213624ab464SJ. Bruce Fields static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
1214624ab464SJ. Bruce Fields #endif
1215354ecbb9SDr. David Alan Gilbert 
12168e5b6773STrond Myklebust __be32
12178e5b6773STrond Myklebust svc_generic_init_request(struct svc_rqst *rqstp,
12188e5b6773STrond Myklebust 		const struct svc_program *progp,
12198e5b6773STrond Myklebust 		struct svc_process_info *ret)
12208e5b6773STrond Myklebust {
12218e5b6773STrond Myklebust 	const struct svc_version *versp = NULL;	/* compiler food */
12228e5b6773STrond Myklebust 	const struct svc_procedure *procp = NULL;
12238e5b6773STrond Myklebust 
12248e5b6773STrond Myklebust 	if (rqstp->rq_vers >= progp->pg_nvers )
12258e5b6773STrond Myklebust 		goto err_bad_vers;
12268e5b6773STrond Myklebust 	versp = progp->pg_vers[rqstp->rq_vers];
12278e5b6773STrond Myklebust 	if (!versp)
12288e5b6773STrond Myklebust 		goto err_bad_vers;
12298e5b6773STrond Myklebust 
12308e5b6773STrond Myklebust 	/*
12318e5b6773STrond Myklebust 	 * Some protocol versions (namely NFSv4) require some form of
12328e5b6773STrond Myklebust 	 * congestion control.  (See RFC 7530 section 3.1 paragraph 2)
12338e5b6773STrond Myklebust 	 * In other words, UDP is not allowed. We mark those when setting
12348e5b6773STrond Myklebust 	 * up the svc_xprt, and verify that here.
12358e5b6773STrond Myklebust 	 *
12368e5b6773STrond Myklebust 	 * The spec is not very clear about what error should be returned
12378e5b6773STrond Myklebust 	 * when someone tries to access a server that is listening on UDP
12388e5b6773STrond Myklebust 	 * for lower versions. RPC_PROG_MISMATCH seems to be the closest
12398e5b6773STrond Myklebust 	 * fit.
12408e5b6773STrond Myklebust 	 */
12418e5b6773STrond Myklebust 	if (versp->vs_need_cong_ctrl && rqstp->rq_xprt &&
12428e5b6773STrond Myklebust 	    !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
12438e5b6773STrond Myklebust 		goto err_bad_vers;
12448e5b6773STrond Myklebust 
12458e5b6773STrond Myklebust 	if (rqstp->rq_proc >= versp->vs_nproc)
12468e5b6773STrond Myklebust 		goto err_bad_proc;
12478e5b6773STrond Myklebust 	rqstp->rq_procinfo = procp = &versp->vs_proc[rqstp->rq_proc];
12488e5b6773STrond Myklebust 	if (!procp)
12498e5b6773STrond Myklebust 		goto err_bad_proc;
12508e5b6773STrond Myklebust 
12518e5b6773STrond Myklebust 	/* Initialize storage for argp and resp */
1252103cc1faSChuck Lever 	memset(rqstp->rq_argp, 0, procp->pc_argzero);
12538e5b6773STrond Myklebust 	memset(rqstp->rq_resp, 0, procp->pc_ressize);
12548e5b6773STrond Myklebust 
12558e5b6773STrond Myklebust 	/* Bump per-procedure stats counter */
125665ba3d24SChuck Lever 	this_cpu_inc(versp->vs_count[rqstp->rq_proc]);
12578e5b6773STrond Myklebust 
12588e5b6773STrond Myklebust 	ret->dispatch = versp->vs_dispatch;
12598e5b6773STrond Myklebust 	return rpc_success;
12608e5b6773STrond Myklebust err_bad_vers:
12618e5b6773STrond Myklebust 	ret->mismatch.lovers = progp->pg_lovers;
12628e5b6773STrond Myklebust 	ret->mismatch.hivers = progp->pg_hivers;
12638e5b6773STrond Myklebust 	return rpc_prog_mismatch;
12648e5b6773STrond Myklebust err_bad_proc:
12658e5b6773STrond Myklebust 	return rpc_proc_unavail;
12668e5b6773STrond Myklebust }
12678e5b6773STrond Myklebust EXPORT_SYMBOL_GPL(svc_generic_init_request);
12688e5b6773STrond Myklebust 
1269354ecbb9SDr. David Alan Gilbert /*
12701cad7ea6SRicardo Labiaga  * Common routine for processing the RPC request.
12711da177e4SLinus Torvalds  */
12721cad7ea6SRicardo Labiaga static int
12735f69d5f6SChuck Lever svc_process_common(struct svc_rqst *rqstp)
12741da177e4SLinus Torvalds {
1275649a692eSChuck Lever 	struct xdr_stream	*xdr = &rqstp->rq_res_stream;
12761da177e4SLinus Torvalds 	struct svc_program	*progp;
1277860bda29SChristoph Hellwig 	const struct svc_procedure *procp = NULL;
12786fb2b47fSNeilBrown 	struct svc_serv		*serv = rqstp->rq_server;
12798e5b6773STrond Myklebust 	struct svc_process_info process;
12805b747a59SChuck Lever 	int			auth_res, rc;
1281649a692eSChuck Lever 	unsigned int		aoffset;
1282cee4db19SChuck Lever 	__be32			*p;
12831da177e4SLinus Torvalds 
128406eb8a56SChuck Lever 	/* Will be turned off by GSS integrity and privacy services */
12857827c81fSChuck Lever 	set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
12862f425878SAndy Adamson 	/* Will be turned off only when NFSv4 Sessions are used */
12877827c81fSChuck Lever 	set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
12887827c81fSChuck Lever 	clear_bit(RQ_DROPME, &rqstp->rq_flags);
1289e831fe65STom Tucker 
12902009e329SChuck Lever 	/* Construct the first words of the reply: */
1291649a692eSChuck Lever 	svcxdr_init_encode(rqstp);
1292649a692eSChuck Lever 	xdr_stream_encode_be32(xdr, rqstp->rq_xid);
1293649a692eSChuck Lever 	xdr_stream_encode_be32(xdr, rpc_reply);
12941da177e4SLinus Torvalds 
1295163cdfcaSChuck Lever 	p = xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 4);
1296163cdfcaSChuck Lever 	if (unlikely(!p))
1297163cdfcaSChuck Lever 		goto err_short_len;
1298163cdfcaSChuck Lever 	if (*p++ != cpu_to_be32(RPC_VERSION))
12991da177e4SLinus Torvalds 		goto err_bad_rpc;
13001da177e4SLinus Torvalds 
1301649a692eSChuck Lever 	xdr_stream_encode_be32(xdr, rpc_msg_accepted);
13021da177e4SLinus Torvalds 
1303163cdfcaSChuck Lever 	rqstp->rq_prog = be32_to_cpup(p++);
1304163cdfcaSChuck Lever 	rqstp->rq_vers = be32_to_cpup(p++);
1305163cdfcaSChuck Lever 	rqstp->rq_proc = be32_to_cpup(p);
13061da177e4SLinus Torvalds 
130780d188a6SNeilBrown 	for (progp = serv->sv_program; progp; progp = progp->pg_next)
13084119bd03SChuck Lever 		if (rqstp->rq_prog == progp->pg_prog)
130980d188a6SNeilBrown 			break;
131080d188a6SNeilBrown 
13111da177e4SLinus Torvalds 	/*
13121da177e4SLinus Torvalds 	 * Decode auth data, and add verifier to reply buffer.
13131da177e4SLinus Torvalds 	 * We do this before anything else in order to get a decent
13141da177e4SLinus Torvalds 	 * auth verifier.
13151da177e4SLinus Torvalds 	 */
1316438623a0SChuck Lever 	auth_res = svc_authenticate(rqstp);
13171da177e4SLinus Torvalds 	/* Also give the program a chance to reject this call: */
13185c2465dfSChuck Lever 	if (auth_res == SVC_OK && progp)
13191da177e4SLinus Torvalds 		auth_res = progp->pg_authenticate(rqstp);
1320438623a0SChuck Lever 	trace_svc_authenticate(rqstp, auth_res);
13211da177e4SLinus Torvalds 	switch (auth_res) {
13221da177e4SLinus Torvalds 	case SVC_OK:
13231da177e4SLinus Torvalds 		break;
13241da177e4SLinus Torvalds 	case SVC_GARBAGE:
13256d037b15SChuck Lever 		goto err_garbage_args;
13261da177e4SLinus Torvalds 	case SVC_SYSERR:
13276d037b15SChuck Lever 		goto err_system_err;
13281da177e4SLinus Torvalds 	case SVC_DENIED:
13291da177e4SLinus Torvalds 		goto err_bad_auth;
13301ebede86SNeilBrown 	case SVC_CLOSE:
13314d712ef1SChuck Lever 		goto close;
13321da177e4SLinus Torvalds 	case SVC_DROP:
13331da177e4SLinus Torvalds 		goto dropit;
13341da177e4SLinus Torvalds 	case SVC_COMPLETE:
13351da177e4SLinus Torvalds 		goto sendit;
13361da177e4SLinus Torvalds 	}
13371da177e4SLinus Torvalds 
13389ba02638SAndreas Gruenbacher 	if (progp == NULL)
13391da177e4SLinus Torvalds 		goto err_bad_prog;
13401da177e4SLinus Torvalds 
13416d037b15SChuck Lever 	switch (progp->pg_init_request(rqstp, progp, &process)) {
13428e5b6773STrond Myklebust 	case rpc_success:
13438e5b6773STrond Myklebust 		break;
13448e5b6773STrond Myklebust 	case rpc_prog_unavail:
13458e5b6773STrond Myklebust 		goto err_bad_prog;
13468e5b6773STrond Myklebust 	case rpc_prog_mismatch:
13471da177e4SLinus Torvalds 		goto err_bad_vers;
13488e5b6773STrond Myklebust 	case rpc_proc_unavail:
13491da177e4SLinus Torvalds 		goto err_bad_proc;
13508e5b6773STrond Myklebust 	}
13518e5b6773STrond Myklebust 
13528e5b6773STrond Myklebust 	procp = rqstp->rq_procinfo;
13538e5b6773STrond Myklebust 	/* Should this check go into the dispatcher? */
13548e5b6773STrond Myklebust 	if (!procp || !procp->pc_func)
13558e5b6773STrond Myklebust 		goto err_bad_proc;
13561da177e4SLinus Torvalds 
13571da177e4SLinus Torvalds 	/* Syntactic check complete */
13581da177e4SLinus Torvalds 	serv->sv_stats->rpccnt++;
13590b9547bfSChuck Lever 	trace_svc_process(rqstp, progp->pg_name);
13601da177e4SLinus Torvalds 
1361649a692eSChuck Lever 	aoffset = xdr_stream_pos(xdr);
13621da177e4SLinus Torvalds 
13631da177e4SLinus Torvalds 	/* un-reserve some of the out-queue now that we have a
13641da177e4SLinus Torvalds 	 * better idea of reply size
13651da177e4SLinus Torvalds 	 */
13661da177e4SLinus Torvalds 	if (procp->pc_xdrressize)
1367cd123012SJeff Layton 		svc_reserve_auth(rqstp, procp->pc_xdrressize<<2);
13681da177e4SLinus Torvalds 
13691da177e4SLinus Torvalds 	/* Call the function that processes the request. */
1370cee4db19SChuck Lever 	rc = process.dispatch(rqstp);
13715b747a59SChuck Lever 	if (procp->pc_release)
13725b747a59SChuck Lever 		procp->pc_release(rqstp);
1373*61182c79SAnna Schumaker 	xdr_finish_decode(xdr);
1374*61182c79SAnna Schumaker 
13755b747a59SChuck Lever 	if (!rc)
13765b747a59SChuck Lever 		goto dropit;
13779082e1d9SChuck Lever 	if (rqstp->rq_auth_stat != rpc_auth_ok)
13785b747a59SChuck Lever 		goto err_bad_auth;
13799082e1d9SChuck Lever 
1380cee4db19SChuck Lever 	if (*rqstp->rq_accept_statp != rpc_success)
1381649a692eSChuck Lever 		xdr_truncate_encode(xdr, aoffset);
13821da177e4SLinus Torvalds 
13831da177e4SLinus Torvalds 	if (procp->pc_encode == NULL)
13841da177e4SLinus Torvalds 		goto dropit;
13851da177e4SLinus Torvalds 
13861da177e4SLinus Torvalds  sendit:
13871da177e4SLinus Torvalds 	if (svc_authorise(rqstp))
1388f1442d63SDaniel Kobras 		goto close_xprt;
13891cad7ea6SRicardo Labiaga 	return 1;		/* Caller can now send it */
13901da177e4SLinus Torvalds 
13911da177e4SLinus Torvalds  dropit:
13921da177e4SLinus Torvalds 	svc_authorise(rqstp);	/* doesn't hurt to call this twice */
13931da177e4SLinus Torvalds 	dprintk("svc: svc_process dropit\n");
13941da177e4SLinus Torvalds 	return 0;
13951da177e4SLinus Torvalds 
13964d712ef1SChuck Lever  close:
1397f1442d63SDaniel Kobras 	svc_authorise(rqstp);
1398f1442d63SDaniel Kobras close_xprt:
1399d4b09acfSVasily Averin 	if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
14004355d767SChuck Lever 		svc_xprt_close(rqstp->rq_xprt);
14014d712ef1SChuck Lever 	dprintk("svc: svc_process close\n");
14024d712ef1SChuck Lever 	return 0;
14034d712ef1SChuck Lever 
14041da177e4SLinus Torvalds err_short_len:
14051c59a532SChuck Lever 	svc_printk(rqstp, "short len %u, dropping request\n",
14061c59a532SChuck Lever 		   rqstp->rq_arg.len);
1407f1442d63SDaniel Kobras 	goto close_xprt;
14081da177e4SLinus Torvalds 
14091da177e4SLinus Torvalds err_bad_rpc:
14101da177e4SLinus Torvalds 	serv->sv_stats->rpcbadfmt++;
1411649a692eSChuck Lever 	xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
1412649a692eSChuck Lever 	xdr_stream_encode_u32(xdr, RPC_MISMATCH);
1413649a692eSChuck Lever 	/* Only RPCv2 supported */
1414649a692eSChuck Lever 	xdr_stream_encode_u32(xdr, RPC_VERSION);
1415649a692eSChuck Lever 	xdr_stream_encode_u32(xdr, RPC_VERSION);
141629cd2927SChuck Lever 	return 1;	/* don't wrap */
14171da177e4SLinus Torvalds 
14181da177e4SLinus Torvalds err_bad_auth:
1419438623a0SChuck Lever 	dprintk("svc: authentication failed (%d)\n",
1420438623a0SChuck Lever 		be32_to_cpu(rqstp->rq_auth_stat));
14211da177e4SLinus Torvalds 	serv->sv_stats->rpcbadauth++;
1422649a692eSChuck Lever 	/* Restore write pointer to location of reply status: */
1423649a692eSChuck Lever 	xdr_truncate_encode(xdr, XDR_UNIT * 2);
1424649a692eSChuck Lever 	xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
1425649a692eSChuck Lever 	xdr_stream_encode_u32(xdr, RPC_AUTH_ERROR);
1426649a692eSChuck Lever 	xdr_stream_encode_be32(xdr, rqstp->rq_auth_stat);
14271da177e4SLinus Torvalds 	goto sendit;
14281da177e4SLinus Torvalds 
14291da177e4SLinus Torvalds err_bad_prog:
14304119bd03SChuck Lever 	dprintk("svc: unknown program %d\n", rqstp->rq_prog);
14311da177e4SLinus Torvalds 	serv->sv_stats->rpcbadfmt++;
143229cd2927SChuck Lever 	*rqstp->rq_accept_statp = rpc_prog_unavail;
14331da177e4SLinus Torvalds 	goto sendit;
14341da177e4SLinus Torvalds 
14351da177e4SLinus Torvalds err_bad_vers:
1436354ecbb9SDr. David Alan Gilbert 	svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
14378e5b6773STrond Myklebust 		       rqstp->rq_vers, rqstp->rq_prog, progp->pg_name);
143834e9a63bSNeilBrown 
14391da177e4SLinus Torvalds 	serv->sv_stats->rpcbadfmt++;
144029cd2927SChuck Lever 	*rqstp->rq_accept_statp = rpc_prog_mismatch;
144129cd2927SChuck Lever 
144229cd2927SChuck Lever 	/*
144329cd2927SChuck Lever 	 * svc_authenticate() has already added the verifier and
144429cd2927SChuck Lever 	 * advanced the stream just past rq_accept_statp.
144529cd2927SChuck Lever 	 */
1446649a692eSChuck Lever 	xdr_stream_encode_u32(xdr, process.mismatch.lovers);
1447649a692eSChuck Lever 	xdr_stream_encode_u32(xdr, process.mismatch.hivers);
14481da177e4SLinus Torvalds 	goto sendit;
14491da177e4SLinus Torvalds 
14501da177e4SLinus Torvalds err_bad_proc:
14518e5b6773STrond Myklebust 	svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc);
145234e9a63bSNeilBrown 
14531da177e4SLinus Torvalds 	serv->sv_stats->rpcbadfmt++;
145429cd2927SChuck Lever 	*rqstp->rq_accept_statp = rpc_proc_unavail;
14551da177e4SLinus Torvalds 	goto sendit;
14561da177e4SLinus Torvalds 
14576d037b15SChuck Lever err_garbage_args:
14586d037b15SChuck Lever 	svc_printk(rqstp, "failed to decode RPC header\n");
145934e9a63bSNeilBrown 
14601da177e4SLinus Torvalds 	serv->sv_stats->rpcbadfmt++;
146129cd2927SChuck Lever 	*rqstp->rq_accept_statp = rpc_garbage_args;
14626d037b15SChuck Lever 	goto sendit;
14636d037b15SChuck Lever 
14646d037b15SChuck Lever err_system_err:
14656d037b15SChuck Lever 	serv->sv_stats->rpcbadfmt++;
146629cd2927SChuck Lever 	*rqstp->rq_accept_statp = rpc_system_err;
14671da177e4SLinus Torvalds 	goto sendit;
14681da177e4SLinus Torvalds }
14697adae489SGreg Banks 
147055fcc7d9SChuck Lever /**
147155fcc7d9SChuck Lever  * svc_process - Execute one RPC transaction
147255fcc7d9SChuck Lever  * @rqstp: RPC transaction context
147355fcc7d9SChuck Lever  *
14741cad7ea6SRicardo Labiaga  */
147555fcc7d9SChuck Lever void svc_process(struct svc_rqst *rqstp)
14761cad7ea6SRicardo Labiaga {
14771cad7ea6SRicardo Labiaga 	struct kvec		*resv = &rqstp->rq_res.head[0];
1478f4afc8feSChuck Lever 	__be32 *p;
14791cad7ea6SRicardo Labiaga 
14803a126180SChuck Lever #if IS_ENABLED(CONFIG_FAIL_SUNRPC)
14813a126180SChuck Lever 	if (!fail_sunrpc.ignore_server_disconnect &&
14823a126180SChuck Lever 	    should_fail(&fail_sunrpc.attr, 1))
14833a126180SChuck Lever 		svc_xprt_deferred_close(rqstp->rq_xprt);
14843a126180SChuck Lever #endif
14853a126180SChuck Lever 
14861cad7ea6SRicardo Labiaga 	/*
14871cad7ea6SRicardo Labiaga 	 * Setup response xdr_buf.
14881cad7ea6SRicardo Labiaga 	 * Initially it has just one page
14891cad7ea6SRicardo Labiaga 	 */
1490afc59400SJ. Bruce Fields 	rqstp->rq_next_page = &rqstp->rq_respages[1];
14911cad7ea6SRicardo Labiaga 	resv->iov_base = page_address(rqstp->rq_respages[0]);
14921cad7ea6SRicardo Labiaga 	resv->iov_len = 0;
149381593c4dSChuck Lever 	rqstp->rq_res.pages = rqstp->rq_next_page;
14941cad7ea6SRicardo Labiaga 	rqstp->rq_res.len = 0;
14951cad7ea6SRicardo Labiaga 	rqstp->rq_res.page_base = 0;
14961cad7ea6SRicardo Labiaga 	rqstp->rq_res.page_len = 0;
14971cad7ea6SRicardo Labiaga 	rqstp->rq_res.buflen = PAGE_SIZE;
14981cad7ea6SRicardo Labiaga 	rqstp->rq_res.tail[0].iov_base = NULL;
14991cad7ea6SRicardo Labiaga 	rqstp->rq_res.tail[0].iov_len = 0;
15001cad7ea6SRicardo Labiaga 
1501f4afc8feSChuck Lever 	svcxdr_init_decode(rqstp);
1502f4afc8feSChuck Lever 	p = xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 2);
1503f4afc8feSChuck Lever 	if (unlikely(!p))
1504f4afc8feSChuck Lever 		goto out_drop;
1505f4afc8feSChuck Lever 	rqstp->rq_xid = *p++;
1506f4afc8feSChuck Lever 	if (unlikely(*p != rpc_call))
150781593c4dSChuck Lever 		goto out_baddir;
1508f4afc8feSChuck Lever 
15095f69d5f6SChuck Lever 	if (!svc_process_common(rqstp))
1510860a0d9eSJeff Layton 		goto out_drop;
151155fcc7d9SChuck Lever 	svc_send(rqstp);
151255fcc7d9SChuck Lever 	return;
1513860a0d9eSJeff Layton 
151481593c4dSChuck Lever out_baddir:
151581593c4dSChuck Lever 	svc_printk(rqstp, "bad direction 0x%08x, dropping request\n",
1516f4afc8feSChuck Lever 		   be32_to_cpu(*p));
151781593c4dSChuck Lever 	rqstp->rq_server->sv_stats->rpcbadfmt++;
1518860a0d9eSJeff Layton out_drop:
15194b5b3ba1SAndy Adamson 	svc_drop(rqstp);
15204b5b3ba1SAndy Adamson }
15213f87d5d6SChuck Lever EXPORT_SYMBOL_GPL(svc_process);
15221cad7ea6SRicardo Labiaga 
15239e00abc3STrond Myklebust #if defined(CONFIG_SUNRPC_BACKCHANNEL)
15244d6bbb62SRicardo Labiaga /*
15254d6bbb62SRicardo Labiaga  * Process a backchannel RPC request that arrived over an existing
15264d6bbb62SRicardo Labiaga  * outbound connection
15274d6bbb62SRicardo Labiaga  */
15284d6bbb62SRicardo Labiaga int
15294d6bbb62SRicardo Labiaga bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
15304d6bbb62SRicardo Labiaga 	       struct svc_rqst *rqstp)
15314d6bbb62SRicardo Labiaga {
1532632dda83SChuck Lever 	struct rpc_task *task;
15330d2a970dSTrond Myklebust 	int proc_error;
1534632dda83SChuck Lever 	int error;
1535632dda83SChuck Lever 
1536632dda83SChuck Lever 	dprintk("svc: %s(%p)\n", __func__, req);
15374d6bbb62SRicardo Labiaga 
15384d6bbb62SRicardo Labiaga 	/* Build the svc_rqst used by the common processing routine */
15394d6bbb62SRicardo Labiaga 	rqstp->rq_xid = req->rq_xid;
15404d6bbb62SRicardo Labiaga 	rqstp->rq_prot = req->rq_xprt->prot;
15414d6bbb62SRicardo Labiaga 	rqstp->rq_server = serv;
1542d4b09acfSVasily Averin 	rqstp->rq_bc_net = req->rq_xprt->xprt_net;
15434d6bbb62SRicardo Labiaga 
15444d6bbb62SRicardo Labiaga 	rqstp->rq_addrlen = sizeof(req->rq_xprt->addr);
15454d6bbb62SRicardo Labiaga 	memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
15464d6bbb62SRicardo Labiaga 	memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg));
15474d6bbb62SRicardo Labiaga 	memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res));
1548756b9b37STrond Myklebust 
1549756b9b37STrond Myklebust 	/* Adjust the argument buffer length */
155038b7631fSBenjamin Coddington 	rqstp->rq_arg.len = req->rq_private_buf.len;
1551756b9b37STrond Myklebust 	if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
1552756b9b37STrond Myklebust 		rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
1553756b9b37STrond Myklebust 		rqstp->rq_arg.page_len = 0;
1554756b9b37STrond Myklebust 	} else if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len +
1555756b9b37STrond Myklebust 			rqstp->rq_arg.page_len)
1556756b9b37STrond Myklebust 		rqstp->rq_arg.page_len = rqstp->rq_arg.len -
1557756b9b37STrond Myklebust 			rqstp->rq_arg.head[0].iov_len;
1558756b9b37STrond Myklebust 	else
1559756b9b37STrond Myklebust 		rqstp->rq_arg.len = rqstp->rq_arg.head[0].iov_len +
1560756b9b37STrond Myklebust 			rqstp->rq_arg.page_len;
15614d6bbb62SRicardo Labiaga 
15625f69d5f6SChuck Lever 	/* Reset the response buffer */
15635f69d5f6SChuck Lever 	rqstp->rq_res.head[0].iov_len = 0;
1564f4afc8feSChuck Lever 
15654d6bbb62SRicardo Labiaga 	/*
1566f4afc8feSChuck Lever 	 * Skip the XID and calldir fields because they've already
1567f4afc8feSChuck Lever 	 * been processed by the caller.
15684d6bbb62SRicardo Labiaga 	 */
15695f69d5f6SChuck Lever 	svcxdr_init_decode(rqstp);
1570f4afc8feSChuck Lever 	if (!xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 2)) {
1571f4afc8feSChuck Lever 		error = -EINVAL;
1572f4afc8feSChuck Lever 		goto out;
1573f4afc8feSChuck Lever 	}
15744d6bbb62SRicardo Labiaga 
1575632dda83SChuck Lever 	/* Parse and execute the bc call */
15765f69d5f6SChuck Lever 	proc_error = svc_process_common(rqstp);
15770d2a970dSTrond Myklebust 
15787402a4feSTrond Myklebust 	atomic_dec(&req->rq_xprt->bc_slot_count);
15790d2a970dSTrond Myklebust 	if (!proc_error) {
1580632dda83SChuck Lever 		/* Processing error: drop the request */
1581b3b02ae5STrond Myklebust 		xprt_free_bc_request(req);
15828f7766c8SVasily Averin 		error = -EINVAL;
15838f7766c8SVasily Averin 		goto out;
15844b5b3ba1SAndy Adamson 	}
1585632dda83SChuck Lever 	/* Finally, send the reply synchronously */
1586632dda83SChuck Lever 	memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
15870f419791STrond Myklebust 	task = rpc_run_bc_task(req);
1588632dda83SChuck Lever 	if (IS_ERR(task)) {
1589632dda83SChuck Lever 		error = PTR_ERR(task);
1590632dda83SChuck Lever 		goto out;
1591632dda83SChuck Lever 	}
1592632dda83SChuck Lever 
1593632dda83SChuck Lever 	WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
1594632dda83SChuck Lever 	error = task->tk_status;
1595632dda83SChuck Lever 	rpc_put_task(task);
1596632dda83SChuck Lever 
1597632dda83SChuck Lever out:
1598632dda83SChuck Lever 	dprintk("svc: %s(), error=%d\n", __func__, error);
1599632dda83SChuck Lever 	return error;
16004d6bbb62SRicardo Labiaga }
16010d961aa9STrond Myklebust EXPORT_SYMBOL_GPL(bc_svc_process);
16029e00abc3STrond Myklebust #endif /* CONFIG_SUNRPC_BACKCHANNEL */
16034d6bbb62SRicardo Labiaga 
1604f18d8afbSChuck Lever /**
1605f18d8afbSChuck Lever  * svc_max_payload - Return transport-specific limit on the RPC payload
1606f18d8afbSChuck Lever  * @rqstp: RPC transaction context
1607f18d8afbSChuck Lever  *
1608f18d8afbSChuck Lever  * Returns the maximum number of payload bytes the current transport
1609f18d8afbSChuck Lever  * allows.
16107adae489SGreg Banks  */
16117adae489SGreg Banks u32 svc_max_payload(const struct svc_rqst *rqstp)
16127adae489SGreg Banks {
161349023155STom Tucker 	u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload;
16147adae489SGreg Banks 
1615c6b0a9f8SNeilBrown 	if (rqstp->rq_server->sv_max_payload < max)
1616c6b0a9f8SNeilBrown 		max = rqstp->rq_server->sv_max_payload;
16177adae489SGreg Banks 	return max;
16187adae489SGreg Banks }
16197adae489SGreg Banks EXPORT_SYMBOL_GPL(svc_max_payload);
16208154ef27SChuck Lever 
16218154ef27SChuck Lever /**
16225c117207SChuck Lever  * svc_proc_name - Return RPC procedure name in string form
16235c117207SChuck Lever  * @rqstp: svc_rqst to operate on
16245c117207SChuck Lever  *
16255c117207SChuck Lever  * Return value:
16265c117207SChuck Lever  *   Pointer to a NUL-terminated string
16275c117207SChuck Lever  */
16285c117207SChuck Lever const char *svc_proc_name(const struct svc_rqst *rqstp)
16295c117207SChuck Lever {
16305c117207SChuck Lever 	if (rqstp && rqstp->rq_procinfo)
16315c117207SChuck Lever 		return rqstp->rq_procinfo->pc_name;
16325c117207SChuck Lever 	return "unknown";
16335c117207SChuck Lever }
16345c117207SChuck Lever 
16355c117207SChuck Lever 
16365c117207SChuck Lever /**
163703493bcaSChuck Lever  * svc_encode_result_payload - mark a range of bytes as a result payload
163841205539SChuck Lever  * @rqstp: svc_rqst to operate on
163941205539SChuck Lever  * @offset: payload's byte offset in rqstp->rq_res
164041205539SChuck Lever  * @length: size of payload, in bytes
164141205539SChuck Lever  *
164241205539SChuck Lever  * Returns zero on success, or a negative errno if a permanent
164341205539SChuck Lever  * error occurred.
164441205539SChuck Lever  */
164503493bcaSChuck Lever int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset,
164641205539SChuck Lever 			      unsigned int length)
164741205539SChuck Lever {
164803493bcaSChuck Lever 	return rqstp->rq_xprt->xpt_ops->xpo_result_payload(rqstp, offset,
164903493bcaSChuck Lever 							   length);
165041205539SChuck Lever }
165103493bcaSChuck Lever EXPORT_SYMBOL_GPL(svc_encode_result_payload);
165241205539SChuck Lever 
165341205539SChuck Lever /**
16548154ef27SChuck Lever  * svc_fill_write_vector - Construct data argument for VFS write call
16558154ef27SChuck Lever  * @rqstp: svc_rqst to operate on
1656dae9a6caSChuck Lever  * @payload: xdr_buf containing only the write data payload
16578154ef27SChuck Lever  *
16583fd9557aSChuck Lever  * Fills in rqstp::rq_vec, and returns the number of elements.
16598154ef27SChuck Lever  */
1660dae9a6caSChuck Lever unsigned int svc_fill_write_vector(struct svc_rqst *rqstp,
1661dae9a6caSChuck Lever 				   struct xdr_buf *payload)
16628154ef27SChuck Lever {
1663dae9a6caSChuck Lever 	struct page **pages = payload->pages;
1664dae9a6caSChuck Lever 	struct kvec *first = payload->head;
16658154ef27SChuck Lever 	struct kvec *vec = rqstp->rq_vec;
1666dae9a6caSChuck Lever 	size_t total = payload->len;
16678154ef27SChuck Lever 	unsigned int i;
16688154ef27SChuck Lever 
16698154ef27SChuck Lever 	/* Some types of transport can present the write payload
16708154ef27SChuck Lever 	 * entirely in rq_arg.pages. In this case, @first is empty.
16718154ef27SChuck Lever 	 */
16728154ef27SChuck Lever 	i = 0;
16738154ef27SChuck Lever 	if (first->iov_len) {
16748154ef27SChuck Lever 		vec[i].iov_base = first->iov_base;
16758154ef27SChuck Lever 		vec[i].iov_len = min_t(size_t, total, first->iov_len);
16768154ef27SChuck Lever 		total -= vec[i].iov_len;
16778154ef27SChuck Lever 		++i;
16788154ef27SChuck Lever 	}
16798154ef27SChuck Lever 
16808154ef27SChuck Lever 	while (total) {
16818154ef27SChuck Lever 		vec[i].iov_base = page_address(*pages);
16828154ef27SChuck Lever 		vec[i].iov_len = min_t(size_t, total, PAGE_SIZE);
16838154ef27SChuck Lever 		total -= vec[i].iov_len;
16848154ef27SChuck Lever 		++i;
16858154ef27SChuck Lever 		++pages;
16868154ef27SChuck Lever 	}
16878154ef27SChuck Lever 
16888154ef27SChuck Lever 	WARN_ON_ONCE(i > ARRAY_SIZE(rqstp->rq_vec));
16898154ef27SChuck Lever 	return i;
16908154ef27SChuck Lever }
16918154ef27SChuck Lever EXPORT_SYMBOL_GPL(svc_fill_write_vector);
169238a70315SChuck Lever 
169338a70315SChuck Lever /**
169438a70315SChuck Lever  * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call
169538a70315SChuck Lever  * @rqstp: svc_rqst to operate on
169638a70315SChuck Lever  * @first: buffer containing first section of pathname
169711b4d66eSChuck Lever  * @p: buffer containing remaining section of pathname
169838a70315SChuck Lever  * @total: total length of the pathname argument
169938a70315SChuck Lever  *
170011b4d66eSChuck Lever  * The VFS symlink API demands a NUL-terminated pathname in mapped memory.
170111b4d66eSChuck Lever  * Returns pointer to a NUL-terminated string, or an ERR_PTR. Caller must free
170211b4d66eSChuck Lever  * the returned string.
170338a70315SChuck Lever  */
170438a70315SChuck Lever char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first,
170511b4d66eSChuck Lever 				void *p, size_t total)
170638a70315SChuck Lever {
170738a70315SChuck Lever 	size_t len, remaining;
170811b4d66eSChuck Lever 	char *result, *dst;
170938a70315SChuck Lever 
171011b4d66eSChuck Lever 	result = kmalloc(total + 1, GFP_KERNEL);
171111b4d66eSChuck Lever 	if (!result)
171211b4d66eSChuck Lever 		return ERR_PTR(-ESERVERFAULT);
171311b4d66eSChuck Lever 
171438a70315SChuck Lever 	dst = result;
171538a70315SChuck Lever 	remaining = total;
171638a70315SChuck Lever 
171738a70315SChuck Lever 	len = min_t(size_t, total, first->iov_len);
171811b4d66eSChuck Lever 	if (len) {
171938a70315SChuck Lever 		memcpy(dst, first->iov_base, len);
172038a70315SChuck Lever 		dst += len;
172138a70315SChuck Lever 		remaining -= len;
172211b4d66eSChuck Lever 	}
172338a70315SChuck Lever 
172438a70315SChuck Lever 	if (remaining) {
172538a70315SChuck Lever 		len = min_t(size_t, remaining, PAGE_SIZE);
172611b4d66eSChuck Lever 		memcpy(dst, p, len);
172738a70315SChuck Lever 		dst += len;
172838a70315SChuck Lever 	}
172938a70315SChuck Lever 
173038a70315SChuck Lever 	*dst = '\0';
173138a70315SChuck Lever 
173211b4d66eSChuck Lever 	/* Sanity check: Linux doesn't allow the pathname argument to
173338a70315SChuck Lever 	 * contain a NUL byte.
173438a70315SChuck Lever 	 */
173511b4d66eSChuck Lever 	if (strlen(result) != total) {
173611b4d66eSChuck Lever 		kfree(result);
173738a70315SChuck Lever 		return ERR_PTR(-EINVAL);
173811b4d66eSChuck Lever 	}
173938a70315SChuck Lever 	return result;
174038a70315SChuck Lever }
174138a70315SChuck Lever EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);
1742