xref: /openbmc/linux/fs/nfsd/nfssvc.c (revision 786baecf)
1 /*
2  * Central processing for nfsd.
3  *
4  * Authors:	Olaf Kirch (okir@monad.swb.de)
5  *
6  * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
7  */
8 
9 #include <linux/sched.h>
10 #include <linux/freezer.h>
11 #include <linux/module.h>
12 #include <linux/fs_struct.h>
13 #include <linux/swap.h>
14 #include <linux/nsproxy.h>
15 
16 #include <linux/sunrpc/stats.h>
17 #include <linux/sunrpc/svcsock.h>
18 #include <linux/lockd/bind.h>
19 #include <linux/nfsacl.h>
20 #include <linux/seq_file.h>
21 #include <net/net_namespace.h>
22 #include "nfsd.h"
23 #include "cache.h"
24 #include "vfs.h"
25 
26 #define NFSDDBG_FACILITY	NFSDDBG_SVC
27 
28 extern struct svc_program	nfsd_program;
29 static int			nfsd(void *vrqstp);
30 struct timeval			nfssvc_boot;
31 
32 /*
33  * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
34  * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
35  * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
36  *
37  * If (out side the lock) nfsd_serv is non-NULL, then it must point to a
38  * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
39  * of nfsd threads must exist and each must listed in ->sp_all_threads in each
40  * entry of ->sv_pools[].
41  *
42  * Transitions of the thread count between zero and non-zero are of particular
43  * interest since the svc_serv needs to be created and initialized at that
44  * point, or freed.
45  *
46  * Finally, the nfsd_mutex also protects some of the global variables that are
47  * accessed when nfsd starts and that are settable via the write_* routines in
48  * nfsctl.c. In particular:
49  *
50  *	user_recovery_dirname
51  *	user_lease_time
52  *	nfsd_versions
53  */
54 DEFINE_MUTEX(nfsd_mutex);
55 struct svc_serv 		*nfsd_serv;
56 
57 /*
58  * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
59  * nfsd_drc_max_pages limits the total amount of memory available for
60  * version 4.1 DRC caches.
61  * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
62  */
63 spinlock_t	nfsd_drc_lock;
64 unsigned int	nfsd_drc_max_mem;
65 unsigned int	nfsd_drc_mem_used;
66 
67 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
68 static struct svc_stat	nfsd_acl_svcstats;
69 static struct svc_version *	nfsd_acl_version[] = {
70 	[2] = &nfsd_acl_version2,
71 	[3] = &nfsd_acl_version3,
72 };
73 
74 #define NFSD_ACL_MINVERS            2
75 #define NFSD_ACL_NRVERS		ARRAY_SIZE(nfsd_acl_version)
76 static struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
77 
78 static struct svc_program	nfsd_acl_program = {
79 	.pg_prog		= NFS_ACL_PROGRAM,
80 	.pg_nvers		= NFSD_ACL_NRVERS,
81 	.pg_vers		= nfsd_acl_versions,
82 	.pg_name		= "nfsacl",
83 	.pg_class		= "nfsd",
84 	.pg_stats		= &nfsd_acl_svcstats,
85 	.pg_authenticate	= &svc_set_client,
86 };
87 
88 static struct svc_stat	nfsd_acl_svcstats = {
89 	.program	= &nfsd_acl_program,
90 };
91 #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
92 
93 static struct svc_version *	nfsd_version[] = {
94 	[2] = &nfsd_version2,
95 #if defined(CONFIG_NFSD_V3)
96 	[3] = &nfsd_version3,
97 #endif
98 #if defined(CONFIG_NFSD_V4)
99 	[4] = &nfsd_version4,
100 #endif
101 };
102 
103 #define NFSD_MINVERS    	2
104 #define NFSD_NRVERS		ARRAY_SIZE(nfsd_version)
105 static struct svc_version *nfsd_versions[NFSD_NRVERS];
106 
107 struct svc_program		nfsd_program = {
108 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
109 	.pg_next		= &nfsd_acl_program,
110 #endif
111 	.pg_prog		= NFS_PROGRAM,		/* program number */
112 	.pg_nvers		= NFSD_NRVERS,		/* nr of entries in nfsd_version */
113 	.pg_vers		= nfsd_versions,	/* version table */
114 	.pg_name		= "nfsd",		/* program name */
115 	.pg_class		= "nfsd",		/* authentication class */
116 	.pg_stats		= &nfsd_svcstats,	/* version table */
117 	.pg_authenticate	= &svc_set_client,	/* export authentication */
118 
119 };
120 
121 u32 nfsd_supported_minorversion;
122 
123 int nfsd_vers(int vers, enum vers_op change)
124 {
125 	if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
126 		return 0;
127 	switch(change) {
128 	case NFSD_SET:
129 		nfsd_versions[vers] = nfsd_version[vers];
130 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
131 		if (vers < NFSD_ACL_NRVERS)
132 			nfsd_acl_versions[vers] = nfsd_acl_version[vers];
133 #endif
134 		break;
135 	case NFSD_CLEAR:
136 		nfsd_versions[vers] = NULL;
137 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
138 		if (vers < NFSD_ACL_NRVERS)
139 			nfsd_acl_versions[vers] = NULL;
140 #endif
141 		break;
142 	case NFSD_TEST:
143 		return nfsd_versions[vers] != NULL;
144 	case NFSD_AVAIL:
145 		return nfsd_version[vers] != NULL;
146 	}
147 	return 0;
148 }
149 
150 int nfsd_minorversion(u32 minorversion, enum vers_op change)
151 {
152 	if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
153 		return -1;
154 	switch(change) {
155 	case NFSD_SET:
156 		nfsd_supported_minorversion = minorversion;
157 		break;
158 	case NFSD_CLEAR:
159 		if (minorversion == 0)
160 			return -1;
161 		nfsd_supported_minorversion = minorversion - 1;
162 		break;
163 	case NFSD_TEST:
164 		return minorversion <= nfsd_supported_minorversion;
165 	case NFSD_AVAIL:
166 		return minorversion <= NFSD_SUPPORTED_MINOR_VERSION;
167 	}
168 	return 0;
169 }
170 
171 /*
172  * Maximum number of nfsd processes
173  */
174 #define	NFSD_MAXSERVS		8192
175 
176 int nfsd_nrthreads(void)
177 {
178 	int rv = 0;
179 	mutex_lock(&nfsd_mutex);
180 	if (nfsd_serv)
181 		rv = nfsd_serv->sv_nrthreads;
182 	mutex_unlock(&nfsd_mutex);
183 	return rv;
184 }
185 
186 static int nfsd_init_socks(int port)
187 {
188 	int error;
189 	if (!list_empty(&nfsd_serv->sv_permsocks))
190 		return 0;
191 
192 	error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, port,
193 					SVC_SOCK_DEFAULTS);
194 	if (error < 0)
195 		return error;
196 
197 	error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, port,
198 					SVC_SOCK_DEFAULTS);
199 	if (error < 0)
200 		return error;
201 
202 	return 0;
203 }
204 
205 static bool nfsd_up = false;
206 
207 static int nfsd_startup(unsigned short port, int nrservs)
208 {
209 	int ret;
210 
211 	if (nfsd_up)
212 		return 0;
213 	/*
214 	 * Readahead param cache - will no-op if it already exists.
215 	 * (Note therefore results will be suboptimal if number of
216 	 * threads is modified after nfsd start.)
217 	 */
218 	ret = nfsd_racache_init(2*nrservs);
219 	if (ret)
220 		return ret;
221 	ret = nfsd_init_socks(port);
222 	if (ret)
223 		goto out_racache;
224 	ret = lockd_up(&init_net);
225 	if (ret)
226 		goto out_racache;
227 	ret = nfs4_state_start();
228 	if (ret)
229 		goto out_lockd;
230 	nfsd_up = true;
231 	return 0;
232 out_lockd:
233 	lockd_down(&init_net);
234 out_racache:
235 	nfsd_racache_shutdown();
236 	return ret;
237 }
238 
239 static void nfsd_shutdown(void)
240 {
241 	/*
242 	 * write_ports can create the server without actually starting
243 	 * any threads--if we get shut down before any threads are
244 	 * started, then nfsd_last_thread will be run before any of this
245 	 * other initialization has been done.
246 	 */
247 	if (!nfsd_up)
248 		return;
249 	nfs4_state_shutdown();
250 	lockd_down(&init_net);
251 	nfsd_racache_shutdown();
252 	nfsd_up = false;
253 }
254 
255 static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
256 {
257 	nfsd_shutdown();
258 
259 	svc_rpcb_cleanup(serv, net);
260 
261 	printk(KERN_WARNING "nfsd: last server has exited, flushing export "
262 			    "cache\n");
263 	nfsd_export_flush(net);
264 }
265 
266 void nfsd_reset_versions(void)
267 {
268 	int found_one = 0;
269 	int i;
270 
271 	for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
272 		if (nfsd_program.pg_vers[i])
273 			found_one = 1;
274 	}
275 
276 	if (!found_one) {
277 		for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
278 			nfsd_program.pg_vers[i] = nfsd_version[i];
279 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
280 		for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
281 			nfsd_acl_program.pg_vers[i] =
282 				nfsd_acl_version[i];
283 #endif
284 	}
285 }
286 
287 /*
288  * Each session guarantees a negotiated per slot memory cache for replies
289  * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated
290  * NFSv4.1 server might want to use more memory for a DRC than a machine
291  * with mutiple services.
292  *
293  * Impose a hard limit on the number of pages for the DRC which varies
294  * according to the machines free pages. This is of course only a default.
295  *
296  * For now this is a #defined shift which could be under admin control
297  * in the future.
298  */
299 static void set_max_drc(void)
300 {
301 	#define NFSD_DRC_SIZE_SHIFT	10
302 	nfsd_drc_max_mem = (nr_free_buffer_pages()
303 					>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
304 	nfsd_drc_mem_used = 0;
305 	spin_lock_init(&nfsd_drc_lock);
306 	dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem);
307 }
308 
309 static int nfsd_get_default_max_blksize(void)
310 {
311 	struct sysinfo i;
312 	unsigned long long target;
313 	unsigned long ret;
314 
315 	si_meminfo(&i);
316 	target = (i.totalram - i.totalhigh) << PAGE_SHIFT;
317 	/*
318 	 * Aim for 1/4096 of memory per thread This gives 1MB on 4Gig
319 	 * machines, but only uses 32K on 128M machines.  Bottom out at
320 	 * 8K on 32M and smaller.  Of course, this is only a default.
321 	 */
322 	target >>= 12;
323 
324 	ret = NFSSVC_MAXBLKSIZE;
325 	while (ret > target && ret >= 8*1024*2)
326 		ret /= 2;
327 	return ret;
328 }
329 
330 int nfsd_create_serv(void)
331 {
332 	int error;
333 	struct net *net = current->nsproxy->net_ns;
334 
335 	WARN_ON(!mutex_is_locked(&nfsd_mutex));
336 	if (nfsd_serv) {
337 		svc_get(nfsd_serv);
338 		return 0;
339 	}
340 	if (nfsd_max_blksize == 0)
341 		nfsd_max_blksize = nfsd_get_default_max_blksize();
342 	nfsd_reset_versions();
343 	nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
344 				      nfsd_last_thread, nfsd, THIS_MODULE);
345 	if (nfsd_serv == NULL)
346 		return -ENOMEM;
347 
348 	error = svc_bind(nfsd_serv, net);
349 	if (error < 0) {
350 		svc_destroy(nfsd_serv);
351 		return error;
352 	}
353 
354 	set_max_drc();
355 	do_gettimeofday(&nfssvc_boot);		/* record boot time */
356 	return 0;
357 }
358 
359 int nfsd_nrpools(void)
360 {
361 	if (nfsd_serv == NULL)
362 		return 0;
363 	else
364 		return nfsd_serv->sv_nrpools;
365 }
366 
367 int nfsd_get_nrthreads(int n, int *nthreads)
368 {
369 	int i = 0;
370 
371 	if (nfsd_serv != NULL) {
372 		for (i = 0; i < nfsd_serv->sv_nrpools && i < n; i++)
373 			nthreads[i] = nfsd_serv->sv_pools[i].sp_nrthreads;
374 	}
375 
376 	return 0;
377 }
378 
379 int nfsd_set_nrthreads(int n, int *nthreads)
380 {
381 	int i = 0;
382 	int tot = 0;
383 	int err = 0;
384 	struct net *net = &init_net;
385 
386 	WARN_ON(!mutex_is_locked(&nfsd_mutex));
387 
388 	if (nfsd_serv == NULL || n <= 0)
389 		return 0;
390 
391 	if (n > nfsd_serv->sv_nrpools)
392 		n = nfsd_serv->sv_nrpools;
393 
394 	/* enforce a global maximum number of threads */
395 	tot = 0;
396 	for (i = 0; i < n; i++) {
397 		if (nthreads[i] > NFSD_MAXSERVS)
398 			nthreads[i] = NFSD_MAXSERVS;
399 		tot += nthreads[i];
400 	}
401 	if (tot > NFSD_MAXSERVS) {
402 		/* total too large: scale down requested numbers */
403 		for (i = 0; i < n && tot > 0; i++) {
404 		    	int new = nthreads[i] * NFSD_MAXSERVS / tot;
405 			tot -= (nthreads[i] - new);
406 			nthreads[i] = new;
407 		}
408 		for (i = 0; i < n && tot > 0; i++) {
409 			nthreads[i]--;
410 			tot--;
411 		}
412 	}
413 
414 	/*
415 	 * There must always be a thread in pool 0; the admin
416 	 * can't shut down NFS completely using pool_threads.
417 	 */
418 	if (nthreads[0] == 0)
419 		nthreads[0] = 1;
420 
421 	/* apply the new numbers */
422 	svc_get(nfsd_serv);
423 	for (i = 0; i < n; i++) {
424 		err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
425 				    	  nthreads[i]);
426 		if (err)
427 			break;
428 	}
429 	nfsd_destroy(net);
430 	return err;
431 }
432 
433 /*
434  * Adjust the number of threads and return the new number of threads.
435  * This is also the function that starts the server if necessary, if
436  * this is the first time nrservs is nonzero.
437  */
438 int
439 nfsd_svc(unsigned short port, int nrservs)
440 {
441 	int	error;
442 	bool	nfsd_up_before;
443 	struct net *net = &init_net;
444 
445 	mutex_lock(&nfsd_mutex);
446 	dprintk("nfsd: creating service\n");
447 	if (nrservs <= 0)
448 		nrservs = 0;
449 	if (nrservs > NFSD_MAXSERVS)
450 		nrservs = NFSD_MAXSERVS;
451 	error = 0;
452 	if (nrservs == 0 && nfsd_serv == NULL)
453 		goto out;
454 
455 	error = nfsd_create_serv();
456 	if (error)
457 		goto out;
458 
459 	nfsd_up_before = nfsd_up;
460 
461 	error = nfsd_startup(port, nrservs);
462 	if (error)
463 		goto out_destroy;
464 	error = svc_set_num_threads(nfsd_serv, NULL, nrservs);
465 	if (error)
466 		goto out_shutdown;
467 	/* We are holding a reference to nfsd_serv which
468 	 * we don't want to count in the return value,
469 	 * so subtract 1
470 	 */
471 	error = nfsd_serv->sv_nrthreads - 1;
472 out_shutdown:
473 	if (error < 0 && !nfsd_up_before)
474 		nfsd_shutdown();
475 out_destroy:
476 	nfsd_destroy(net);		/* Release server */
477 out:
478 	mutex_unlock(&nfsd_mutex);
479 	return error;
480 }
481 
482 
483 /*
484  * This is the NFS server kernel thread
485  */
486 static int
487 nfsd(void *vrqstp)
488 {
489 	struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
490 	int err, preverr = 0;
491 
492 	/* Lock module and set up kernel thread */
493 	mutex_lock(&nfsd_mutex);
494 
495 	/* At this point, the thread shares current->fs
496 	 * with the init process. We need to create files with a
497 	 * umask of 0 instead of init's umask. */
498 	if (unshare_fs_struct() < 0) {
499 		printk("Unable to start nfsd thread: out of memory\n");
500 		goto out;
501 	}
502 
503 	current->fs->umask = 0;
504 
505 	/*
506 	 * thread is spawned with all signals set to SIG_IGN, re-enable
507 	 * the ones that will bring down the thread
508 	 */
509 	allow_signal(SIGKILL);
510 	allow_signal(SIGHUP);
511 	allow_signal(SIGINT);
512 	allow_signal(SIGQUIT);
513 
514 	nfsdstats.th_cnt++;
515 	mutex_unlock(&nfsd_mutex);
516 
517 	/*
518 	 * We want less throttling in balance_dirty_pages() so that nfs to
519 	 * localhost doesn't cause nfsd to lock up due to all the client's
520 	 * dirty pages.
521 	 */
522 	current->flags |= PF_LESS_THROTTLE;
523 	set_freezable();
524 
525 	/*
526 	 * The main request loop
527 	 */
528 	for (;;) {
529 		/*
530 		 * Find a socket with data available and call its
531 		 * recvfrom routine.
532 		 */
533 		while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
534 			;
535 		if (err == -EINTR)
536 			break;
537 		else if (err < 0) {
538 			if (err != preverr) {
539 				printk(KERN_WARNING "%s: unexpected error "
540 					"from svc_recv (%d)\n", __func__, -err);
541 				preverr = err;
542 			}
543 			schedule_timeout_uninterruptible(HZ);
544 			continue;
545 		}
546 
547 		validate_process_creds();
548 		svc_process(rqstp);
549 		validate_process_creds();
550 	}
551 
552 	/* Clear signals before calling svc_exit_thread() */
553 	flush_signals(current);
554 
555 	mutex_lock(&nfsd_mutex);
556 	nfsdstats.th_cnt --;
557 
558 out:
559 	rqstp->rq_server = NULL;
560 
561 	/* Release the thread */
562 	svc_exit_thread(rqstp);
563 
564 	nfsd_destroy(&init_net);
565 
566 	/* Release module */
567 	mutex_unlock(&nfsd_mutex);
568 	module_put_and_exit(0);
569 	return 0;
570 }
571 
572 static __be32 map_new_errors(u32 vers, __be32 nfserr)
573 {
574 	if (nfserr == nfserr_jukebox && vers == 2)
575 		return nfserr_dropit;
576 	if (nfserr == nfserr_wrongsec && vers < 4)
577 		return nfserr_acces;
578 	return nfserr;
579 }
580 
581 int
582 nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
583 {
584 	struct svc_procedure	*proc;
585 	kxdrproc_t		xdr;
586 	__be32			nfserr;
587 	__be32			*nfserrp;
588 
589 	dprintk("nfsd_dispatch: vers %d proc %d\n",
590 				rqstp->rq_vers, rqstp->rq_proc);
591 	proc = rqstp->rq_procinfo;
592 
593 	/*
594 	 * Give the xdr decoder a chance to change this if it wants
595 	 * (necessary in the NFSv4.0 compound case)
596 	 */
597 	rqstp->rq_cachetype = proc->pc_cachetype;
598 	/* Decode arguments */
599 	xdr = proc->pc_decode;
600 	if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base,
601 			rqstp->rq_argp)) {
602 		dprintk("nfsd: failed to decode arguments!\n");
603 		*statp = rpc_garbage_args;
604 		return 1;
605 	}
606 
607 	/* Check whether we have this call in the cache. */
608 	switch (nfsd_cache_lookup(rqstp)) {
609 	case RC_INTR:
610 	case RC_DROPIT:
611 		return 0;
612 	case RC_REPLY:
613 		return 1;
614 	case RC_DOIT:;
615 		/* do it */
616 	}
617 
618 	/* need to grab the location to store the status, as
619 	 * nfsv4 does some encoding while processing
620 	 */
621 	nfserrp = rqstp->rq_res.head[0].iov_base
622 		+ rqstp->rq_res.head[0].iov_len;
623 	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
624 
625 	/* Now call the procedure handler, and encode NFS status. */
626 	nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
627 	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
628 	if (nfserr == nfserr_dropit || rqstp->rq_dropme) {
629 		dprintk("nfsd: Dropping request; may be revisited later\n");
630 		nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
631 		return 0;
632 	}
633 
634 	if (rqstp->rq_proc != 0)
635 		*nfserrp++ = nfserr;
636 
637 	/* Encode result.
638 	 * For NFSv2, additional info is never returned in case of an error.
639 	 */
640 	if (!(nfserr && rqstp->rq_vers == 2)) {
641 		xdr = proc->pc_encode;
642 		if (xdr && !xdr(rqstp, nfserrp,
643 				rqstp->rq_resp)) {
644 			/* Failed to encode result. Release cache entry */
645 			dprintk("nfsd: failed to encode result!\n");
646 			nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
647 			*statp = rpc_system_err;
648 			return 1;
649 		}
650 	}
651 
652 	/* Store reply in cache. */
653 	nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
654 	return 1;
655 }
656 
657 int nfsd_pool_stats_open(struct inode *inode, struct file *file)
658 {
659 	int ret;
660 	mutex_lock(&nfsd_mutex);
661 	if (nfsd_serv == NULL) {
662 		mutex_unlock(&nfsd_mutex);
663 		return -ENODEV;
664 	}
665 	/* bump up the psudo refcount while traversing */
666 	svc_get(nfsd_serv);
667 	ret = svc_pool_stats_open(nfsd_serv, file);
668 	mutex_unlock(&nfsd_mutex);
669 	return ret;
670 }
671 
672 int nfsd_pool_stats_release(struct inode *inode, struct file *file)
673 {
674 	int ret = seq_release(inode, file);
675 	struct net *net = &init_net;
676 
677 	mutex_lock(&nfsd_mutex);
678 	/* this function really, really should have been called svc_put() */
679 	nfsd_destroy(net);
680 	mutex_unlock(&nfsd_mutex);
681 	return ret;
682 }
683