1 /* 2 * linux/net/sunrpc/svc.c 3 * 4 * High-level RPC service routines 5 * 6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 7 * 8 * Multiple threads pools and NUMAisation 9 * Copyright (c) 2006 Silicon Graphics, Inc. 10 * by Greg Banks <gnb@melbourne.sgi.com> 11 */ 12 13 #include <linux/linkage.h> 14 #include <linux/sched.h> 15 #include <linux/errno.h> 16 #include <linux/net.h> 17 #include <linux/in.h> 18 #include <linux/mm.h> 19 #include <linux/interrupt.h> 20 #include <linux/module.h> 21 #include <linux/kthread.h> 22 #include <linux/slab.h> 23 24 #include <linux/sunrpc/types.h> 25 #include <linux/sunrpc/xdr.h> 26 #include <linux/sunrpc/stats.h> 27 #include <linux/sunrpc/svcsock.h> 28 #include <linux/sunrpc/clnt.h> 29 #include <linux/sunrpc/bc_xprt.h> 30 31 #include <trace/events/sunrpc.h> 32 33 #define RPCDBG_FACILITY RPCDBG_SVCDSP 34 35 static void svc_unregister(const struct svc_serv *serv, struct net *net); 36 37 #define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function) 38 39 #define SVC_POOL_DEFAULT SVC_POOL_GLOBAL 40 41 /* 42 * Structure for mapping cpus to pools and vice versa. 43 * Setup once during sunrpc initialisation. 44 */ 45 struct svc_pool_map svc_pool_map = { 46 .mode = SVC_POOL_DEFAULT 47 }; 48 EXPORT_SYMBOL_GPL(svc_pool_map); 49 50 static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ 51 52 static int 53 param_set_pool_mode(const char *val, struct kernel_param *kp) 54 { 55 int *ip = (int *)kp->arg; 56 struct svc_pool_map *m = &svc_pool_map; 57 int err; 58 59 mutex_lock(&svc_pool_map_mutex); 60 61 err = -EBUSY; 62 if (m->count) 63 goto out; 64 65 err = 0; 66 if (!strncmp(val, "auto", 4)) 67 *ip = SVC_POOL_AUTO; 68 else if (!strncmp(val, "global", 6)) 69 *ip = SVC_POOL_GLOBAL; 70 else if (!strncmp(val, "percpu", 6)) 71 *ip = SVC_POOL_PERCPU; 72 else if (!strncmp(val, "pernode", 7)) 73 *ip = SVC_POOL_PERNODE; 74 else 75 err = -EINVAL; 76 77 out: 78 mutex_unlock(&svc_pool_map_mutex); 79 return err; 80 } 81 82 static int 83 param_get_pool_mode(char *buf, struct kernel_param *kp) 84 { 85 int *ip = (int *)kp->arg; 86 87 switch (*ip) 88 { 89 case SVC_POOL_AUTO: 90 return strlcpy(buf, "auto", 20); 91 case SVC_POOL_GLOBAL: 92 return strlcpy(buf, "global", 20); 93 case SVC_POOL_PERCPU: 94 return strlcpy(buf, "percpu", 20); 95 case SVC_POOL_PERNODE: 96 return strlcpy(buf, "pernode", 20); 97 default: 98 return sprintf(buf, "%d", *ip); 99 } 100 } 101 102 module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode, 103 &svc_pool_map.mode, 0644); 104 105 /* 106 * Detect best pool mapping mode heuristically, 107 * according to the machine's topology. 108 */ 109 static int 110 svc_pool_map_choose_mode(void) 111 { 112 unsigned int node; 113 114 if (nr_online_nodes > 1) { 115 /* 116 * Actually have multiple NUMA nodes, 117 * so split pools on NUMA node boundaries 118 */ 119 return SVC_POOL_PERNODE; 120 } 121 122 node = first_online_node; 123 if (nr_cpus_node(node) > 2) { 124 /* 125 * Non-trivial SMP, or CONFIG_NUMA on 126 * non-NUMA hardware, e.g. with a generic 127 * x86_64 kernel on Xeons. In this case we 128 * want to divide the pools on cpu boundaries. 129 */ 130 return SVC_POOL_PERCPU; 131 } 132 133 /* default: one global pool */ 134 return SVC_POOL_GLOBAL; 135 } 136 137 /* 138 * Allocate the to_pool[] and pool_to[] arrays. 139 * Returns 0 on success or an errno. 140 */ 141 static int 142 svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools) 143 { 144 m->to_pool = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL); 145 if (!m->to_pool) 146 goto fail; 147 m->pool_to = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL); 148 if (!m->pool_to) 149 goto fail_free; 150 151 return 0; 152 153 fail_free: 154 kfree(m->to_pool); 155 m->to_pool = NULL; 156 fail: 157 return -ENOMEM; 158 } 159 160 /* 161 * Initialise the pool map for SVC_POOL_PERCPU mode. 162 * Returns number of pools or <0 on error. 163 */ 164 static int 165 svc_pool_map_init_percpu(struct svc_pool_map *m) 166 { 167 unsigned int maxpools = nr_cpu_ids; 168 unsigned int pidx = 0; 169 unsigned int cpu; 170 int err; 171 172 err = svc_pool_map_alloc_arrays(m, maxpools); 173 if (err) 174 return err; 175 176 for_each_online_cpu(cpu) { 177 BUG_ON(pidx >= maxpools); 178 m->to_pool[cpu] = pidx; 179 m->pool_to[pidx] = cpu; 180 pidx++; 181 } 182 /* cpus brought online later all get mapped to pool0, sorry */ 183 184 return pidx; 185 }; 186 187 188 /* 189 * Initialise the pool map for SVC_POOL_PERNODE mode. 190 * Returns number of pools or <0 on error. 191 */ 192 static int 193 svc_pool_map_init_pernode(struct svc_pool_map *m) 194 { 195 unsigned int maxpools = nr_node_ids; 196 unsigned int pidx = 0; 197 unsigned int node; 198 int err; 199 200 err = svc_pool_map_alloc_arrays(m, maxpools); 201 if (err) 202 return err; 203 204 for_each_node_with_cpus(node) { 205 /* some architectures (e.g. SN2) have cpuless nodes */ 206 BUG_ON(pidx > maxpools); 207 m->to_pool[node] = pidx; 208 m->pool_to[pidx] = node; 209 pidx++; 210 } 211 /* nodes brought online later all get mapped to pool0, sorry */ 212 213 return pidx; 214 } 215 216 217 /* 218 * Add a reference to the global map of cpus to pools (and 219 * vice versa). Initialise the map if we're the first user. 220 * Returns the number of pools. 221 */ 222 unsigned int 223 svc_pool_map_get(void) 224 { 225 struct svc_pool_map *m = &svc_pool_map; 226 int npools = -1; 227 228 mutex_lock(&svc_pool_map_mutex); 229 230 if (m->count++) { 231 mutex_unlock(&svc_pool_map_mutex); 232 return m->npools; 233 } 234 235 if (m->mode == SVC_POOL_AUTO) 236 m->mode = svc_pool_map_choose_mode(); 237 238 switch (m->mode) { 239 case SVC_POOL_PERCPU: 240 npools = svc_pool_map_init_percpu(m); 241 break; 242 case SVC_POOL_PERNODE: 243 npools = svc_pool_map_init_pernode(m); 244 break; 245 } 246 247 if (npools < 0) { 248 /* default, or memory allocation failure */ 249 npools = 1; 250 m->mode = SVC_POOL_GLOBAL; 251 } 252 m->npools = npools; 253 254 mutex_unlock(&svc_pool_map_mutex); 255 return m->npools; 256 } 257 EXPORT_SYMBOL_GPL(svc_pool_map_get); 258 259 /* 260 * Drop a reference to the global map of cpus to pools. 261 * When the last reference is dropped, the map data is 262 * freed; this allows the sysadmin to change the pool 263 * mode using the pool_mode module option without 264 * rebooting or re-loading sunrpc.ko. 265 */ 266 void 267 svc_pool_map_put(void) 268 { 269 struct svc_pool_map *m = &svc_pool_map; 270 271 mutex_lock(&svc_pool_map_mutex); 272 273 if (!--m->count) { 274 kfree(m->to_pool); 275 m->to_pool = NULL; 276 kfree(m->pool_to); 277 m->pool_to = NULL; 278 m->npools = 0; 279 } 280 281 mutex_unlock(&svc_pool_map_mutex); 282 } 283 EXPORT_SYMBOL_GPL(svc_pool_map_put); 284 285 static int svc_pool_map_get_node(unsigned int pidx) 286 { 287 const struct svc_pool_map *m = &svc_pool_map; 288 289 if (m->count) { 290 if (m->mode == SVC_POOL_PERCPU) 291 return cpu_to_node(m->pool_to[pidx]); 292 if (m->mode == SVC_POOL_PERNODE) 293 return m->pool_to[pidx]; 294 } 295 return NUMA_NO_NODE; 296 } 297 /* 298 * Set the given thread's cpus_allowed mask so that it 299 * will only run on cpus in the given pool. 300 */ 301 static inline void 302 svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx) 303 { 304 struct svc_pool_map *m = &svc_pool_map; 305 unsigned int node = m->pool_to[pidx]; 306 307 /* 308 * The caller checks for sv_nrpools > 1, which 309 * implies that we've been initialized. 310 */ 311 WARN_ON_ONCE(m->count == 0); 312 if (m->count == 0) 313 return; 314 315 switch (m->mode) { 316 case SVC_POOL_PERCPU: 317 { 318 set_cpus_allowed_ptr(task, cpumask_of(node)); 319 break; 320 } 321 case SVC_POOL_PERNODE: 322 { 323 set_cpus_allowed_ptr(task, cpumask_of_node(node)); 324 break; 325 } 326 } 327 } 328 329 /* 330 * Use the mapping mode to choose a pool for a given CPU. 331 * Used when enqueueing an incoming RPC. Always returns 332 * a non-NULL pool pointer. 333 */ 334 struct svc_pool * 335 svc_pool_for_cpu(struct svc_serv *serv, int cpu) 336 { 337 struct svc_pool_map *m = &svc_pool_map; 338 unsigned int pidx = 0; 339 340 /* 341 * An uninitialised map happens in a pure client when 342 * lockd is brought up, so silently treat it the 343 * same as SVC_POOL_GLOBAL. 344 */ 345 if (svc_serv_is_pooled(serv)) { 346 switch (m->mode) { 347 case SVC_POOL_PERCPU: 348 pidx = m->to_pool[cpu]; 349 break; 350 case SVC_POOL_PERNODE: 351 pidx = m->to_pool[cpu_to_node(cpu)]; 352 break; 353 } 354 } 355 return &serv->sv_pools[pidx % serv->sv_nrpools]; 356 } 357 358 int svc_rpcb_setup(struct svc_serv *serv, struct net *net) 359 { 360 int err; 361 362 err = rpcb_create_local(net); 363 if (err) 364 return err; 365 366 /* Remove any stale portmap registrations */ 367 svc_unregister(serv, net); 368 return 0; 369 } 370 EXPORT_SYMBOL_GPL(svc_rpcb_setup); 371 372 void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net) 373 { 374 svc_unregister(serv, net); 375 rpcb_put_local(net); 376 } 377 EXPORT_SYMBOL_GPL(svc_rpcb_cleanup); 378 379 static int svc_uses_rpcbind(struct svc_serv *serv) 380 { 381 struct svc_program *progp; 382 unsigned int i; 383 384 for (progp = serv->sv_program; progp; progp = progp->pg_next) { 385 for (i = 0; i < progp->pg_nvers; i++) { 386 if (progp->pg_vers[i] == NULL) 387 continue; 388 if (progp->pg_vers[i]->vs_hidden == 0) 389 return 1; 390 } 391 } 392 393 return 0; 394 } 395 396 int svc_bind(struct svc_serv *serv, struct net *net) 397 { 398 if (!svc_uses_rpcbind(serv)) 399 return 0; 400 return svc_rpcb_setup(serv, net); 401 } 402 EXPORT_SYMBOL_GPL(svc_bind); 403 404 /* 405 * Create an RPC service 406 */ 407 static struct svc_serv * 408 __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, 409 struct svc_serv_ops *ops) 410 { 411 struct svc_serv *serv; 412 unsigned int vers; 413 unsigned int xdrsize; 414 unsigned int i; 415 416 if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) 417 return NULL; 418 serv->sv_name = prog->pg_name; 419 serv->sv_program = prog; 420 serv->sv_nrthreads = 1; 421 serv->sv_stats = prog->pg_stats; 422 if (bufsize > RPCSVC_MAXPAYLOAD) 423 bufsize = RPCSVC_MAXPAYLOAD; 424 serv->sv_max_payload = bufsize? bufsize : 4096; 425 serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); 426 serv->sv_ops = ops; 427 xdrsize = 0; 428 while (prog) { 429 prog->pg_lovers = prog->pg_nvers-1; 430 for (vers=0; vers<prog->pg_nvers ; vers++) 431 if (prog->pg_vers[vers]) { 432 prog->pg_hivers = vers; 433 if (prog->pg_lovers > vers) 434 prog->pg_lovers = vers; 435 if (prog->pg_vers[vers]->vs_xdrsize > xdrsize) 436 xdrsize = prog->pg_vers[vers]->vs_xdrsize; 437 } 438 prog = prog->pg_next; 439 } 440 serv->sv_xdrsize = xdrsize; 441 INIT_LIST_HEAD(&serv->sv_tempsocks); 442 INIT_LIST_HEAD(&serv->sv_permsocks); 443 init_timer(&serv->sv_temptimer); 444 spin_lock_init(&serv->sv_lock); 445 446 serv->sv_nrpools = npools; 447 serv->sv_pools = 448 kcalloc(serv->sv_nrpools, sizeof(struct svc_pool), 449 GFP_KERNEL); 450 if (!serv->sv_pools) { 451 kfree(serv); 452 return NULL; 453 } 454 455 for (i = 0; i < serv->sv_nrpools; i++) { 456 struct svc_pool *pool = &serv->sv_pools[i]; 457 458 dprintk("svc: initialising pool %u for %s\n", 459 i, serv->sv_name); 460 461 pool->sp_id = i; 462 INIT_LIST_HEAD(&pool->sp_sockets); 463 INIT_LIST_HEAD(&pool->sp_all_threads); 464 spin_lock_init(&pool->sp_lock); 465 } 466 467 return serv; 468 } 469 470 struct svc_serv * 471 svc_create(struct svc_program *prog, unsigned int bufsize, 472 struct svc_serv_ops *ops) 473 { 474 return __svc_create(prog, bufsize, /*npools*/1, ops); 475 } 476 EXPORT_SYMBOL_GPL(svc_create); 477 478 struct svc_serv * 479 svc_create_pooled(struct svc_program *prog, unsigned int bufsize, 480 struct svc_serv_ops *ops) 481 { 482 struct svc_serv *serv; 483 unsigned int npools = svc_pool_map_get(); 484 485 serv = __svc_create(prog, bufsize, npools, ops); 486 if (!serv) 487 goto out_err; 488 return serv; 489 out_err: 490 svc_pool_map_put(); 491 return NULL; 492 } 493 EXPORT_SYMBOL_GPL(svc_create_pooled); 494 495 void svc_shutdown_net(struct svc_serv *serv, struct net *net) 496 { 497 svc_close_net(serv, net); 498 499 if (serv->sv_ops->svo_shutdown) 500 serv->sv_ops->svo_shutdown(serv, net); 501 } 502 EXPORT_SYMBOL_GPL(svc_shutdown_net); 503 504 /* 505 * Destroy an RPC service. Should be called with appropriate locking to 506 * protect the sv_nrthreads, sv_permsocks and sv_tempsocks. 507 */ 508 void 509 svc_destroy(struct svc_serv *serv) 510 { 511 dprintk("svc: svc_destroy(%s, %d)\n", 512 serv->sv_program->pg_name, 513 serv->sv_nrthreads); 514 515 if (serv->sv_nrthreads) { 516 if (--(serv->sv_nrthreads) != 0) { 517 svc_sock_update_bufs(serv); 518 return; 519 } 520 } else 521 printk("svc_destroy: no threads for serv=%p!\n", serv); 522 523 del_timer_sync(&serv->sv_temptimer); 524 525 /* 526 * The last user is gone and thus all sockets have to be destroyed to 527 * the point. Check this. 528 */ 529 BUG_ON(!list_empty(&serv->sv_permsocks)); 530 BUG_ON(!list_empty(&serv->sv_tempsocks)); 531 532 cache_clean_deferred(serv); 533 534 if (svc_serv_is_pooled(serv)) 535 svc_pool_map_put(); 536 537 kfree(serv->sv_pools); 538 kfree(serv); 539 } 540 EXPORT_SYMBOL_GPL(svc_destroy); 541 542 /* 543 * Allocate an RPC server's buffer space. 544 * We allocate pages and place them in rq_argpages. 545 */ 546 static int 547 svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) 548 { 549 unsigned int pages, arghi; 550 551 /* bc_xprt uses fore channel allocated buffers */ 552 if (svc_is_backchannel(rqstp)) 553 return 1; 554 555 pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. 556 * We assume one is at most one page 557 */ 558 arghi = 0; 559 WARN_ON_ONCE(pages > RPCSVC_MAXPAGES); 560 if (pages > RPCSVC_MAXPAGES) 561 pages = RPCSVC_MAXPAGES; 562 while (pages) { 563 struct page *p = alloc_pages_node(node, GFP_KERNEL, 0); 564 if (!p) 565 break; 566 rqstp->rq_pages[arghi++] = p; 567 pages--; 568 } 569 return pages == 0; 570 } 571 572 /* 573 * Release an RPC server buffer 574 */ 575 static void 576 svc_release_buffer(struct svc_rqst *rqstp) 577 { 578 unsigned int i; 579 580 for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++) 581 if (rqstp->rq_pages[i]) 582 put_page(rqstp->rq_pages[i]); 583 } 584 585 struct svc_rqst * 586 svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) 587 { 588 struct svc_rqst *rqstp; 589 590 rqstp = kzalloc_node(sizeof(*rqstp), GFP_KERNEL, node); 591 if (!rqstp) 592 return rqstp; 593 594 __set_bit(RQ_BUSY, &rqstp->rq_flags); 595 spin_lock_init(&rqstp->rq_lock); 596 rqstp->rq_server = serv; 597 rqstp->rq_pool = pool; 598 599 rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); 600 if (!rqstp->rq_argp) 601 goto out_enomem; 602 603 rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); 604 if (!rqstp->rq_resp) 605 goto out_enomem; 606 607 if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node)) 608 goto out_enomem; 609 610 return rqstp; 611 out_enomem: 612 svc_rqst_free(rqstp); 613 return NULL; 614 } 615 EXPORT_SYMBOL_GPL(svc_rqst_alloc); 616 617 struct svc_rqst * 618 svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) 619 { 620 struct svc_rqst *rqstp; 621 622 rqstp = svc_rqst_alloc(serv, pool, node); 623 if (!rqstp) 624 return ERR_PTR(-ENOMEM); 625 626 serv->sv_nrthreads++; 627 spin_lock_bh(&pool->sp_lock); 628 pool->sp_nrthreads++; 629 list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); 630 spin_unlock_bh(&pool->sp_lock); 631 return rqstp; 632 } 633 EXPORT_SYMBOL_GPL(svc_prepare_thread); 634 635 /* 636 * Choose a pool in which to create a new thread, for svc_set_num_threads 637 */ 638 static inline struct svc_pool * 639 choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) 640 { 641 if (pool != NULL) 642 return pool; 643 644 return &serv->sv_pools[(*state)++ % serv->sv_nrpools]; 645 } 646 647 /* 648 * Choose a thread to kill, for svc_set_num_threads 649 */ 650 static inline struct task_struct * 651 choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) 652 { 653 unsigned int i; 654 struct task_struct *task = NULL; 655 656 if (pool != NULL) { 657 spin_lock_bh(&pool->sp_lock); 658 } else { 659 /* choose a pool in round-robin fashion */ 660 for (i = 0; i < serv->sv_nrpools; i++) { 661 pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; 662 spin_lock_bh(&pool->sp_lock); 663 if (!list_empty(&pool->sp_all_threads)) 664 goto found_pool; 665 spin_unlock_bh(&pool->sp_lock); 666 } 667 return NULL; 668 } 669 670 found_pool: 671 if (!list_empty(&pool->sp_all_threads)) { 672 struct svc_rqst *rqstp; 673 674 /* 675 * Remove from the pool->sp_all_threads list 676 * so we don't try to kill it again. 677 */ 678 rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all); 679 set_bit(RQ_VICTIM, &rqstp->rq_flags); 680 list_del_rcu(&rqstp->rq_all); 681 task = rqstp->rq_task; 682 } 683 spin_unlock_bh(&pool->sp_lock); 684 685 return task; 686 } 687 688 /* 689 * Create or destroy enough new threads to make the number 690 * of threads the given number. If `pool' is non-NULL, applies 691 * only to threads in that pool, otherwise round-robins between 692 * all pools. Caller must ensure that mutual exclusion between this and 693 * server startup or shutdown. 694 * 695 * Destroying threads relies on the service threads filling in 696 * rqstp->rq_task, which only the nfs ones do. Assumes the serv 697 * has been created using svc_create_pooled(). 698 * 699 * Based on code that used to be in nfsd_svc() but tweaked 700 * to be pool-aware. 701 */ 702 int 703 svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) 704 { 705 struct svc_rqst *rqstp; 706 struct task_struct *task; 707 struct svc_pool *chosen_pool; 708 int error = 0; 709 unsigned int state = serv->sv_nrthreads-1; 710 int node; 711 712 if (pool == NULL) { 713 /* The -1 assumes caller has done a svc_get() */ 714 nrservs -= (serv->sv_nrthreads-1); 715 } else { 716 spin_lock_bh(&pool->sp_lock); 717 nrservs -= pool->sp_nrthreads; 718 spin_unlock_bh(&pool->sp_lock); 719 } 720 721 /* create new threads */ 722 while (nrservs > 0) { 723 nrservs--; 724 chosen_pool = choose_pool(serv, pool, &state); 725 726 node = svc_pool_map_get_node(chosen_pool->sp_id); 727 rqstp = svc_prepare_thread(serv, chosen_pool, node); 728 if (IS_ERR(rqstp)) { 729 error = PTR_ERR(rqstp); 730 break; 731 } 732 733 __module_get(serv->sv_ops->svo_module); 734 task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, 735 node, "%s", serv->sv_name); 736 if (IS_ERR(task)) { 737 error = PTR_ERR(task); 738 module_put(serv->sv_ops->svo_module); 739 svc_exit_thread(rqstp); 740 break; 741 } 742 743 rqstp->rq_task = task; 744 if (serv->sv_nrpools > 1) 745 svc_pool_map_set_cpumask(task, chosen_pool->sp_id); 746 747 svc_sock_update_bufs(serv); 748 wake_up_process(task); 749 } 750 /* destroy old threads */ 751 while (nrservs < 0 && 752 (task = choose_victim(serv, pool, &state)) != NULL) { 753 send_sig(SIGINT, task, 1); 754 nrservs++; 755 } 756 757 return error; 758 } 759 EXPORT_SYMBOL_GPL(svc_set_num_threads); 760 761 /* 762 * Called from a server thread as it's exiting. Caller must hold the "service 763 * mutex" for the service. 764 */ 765 void 766 svc_rqst_free(struct svc_rqst *rqstp) 767 { 768 svc_release_buffer(rqstp); 769 kfree(rqstp->rq_resp); 770 kfree(rqstp->rq_argp); 771 kfree(rqstp->rq_auth_data); 772 kfree_rcu(rqstp, rq_rcu_head); 773 } 774 EXPORT_SYMBOL_GPL(svc_rqst_free); 775 776 void 777 svc_exit_thread(struct svc_rqst *rqstp) 778 { 779 struct svc_serv *serv = rqstp->rq_server; 780 struct svc_pool *pool = rqstp->rq_pool; 781 782 spin_lock_bh(&pool->sp_lock); 783 pool->sp_nrthreads--; 784 if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags)) 785 list_del_rcu(&rqstp->rq_all); 786 spin_unlock_bh(&pool->sp_lock); 787 788 svc_rqst_free(rqstp); 789 790 /* Release the server */ 791 if (serv) 792 svc_destroy(serv); 793 } 794 EXPORT_SYMBOL_GPL(svc_exit_thread); 795 796 /* 797 * Register an "inet" protocol family netid with the local 798 * rpcbind daemon via an rpcbind v4 SET request. 799 * 800 * No netconfig infrastructure is available in the kernel, so 801 * we map IP_ protocol numbers to netids by hand. 802 * 803 * Returns zero on success; a negative errno value is returned 804 * if any error occurs. 805 */ 806 static int __svc_rpcb_register4(struct net *net, const u32 program, 807 const u32 version, 808 const unsigned short protocol, 809 const unsigned short port) 810 { 811 const struct sockaddr_in sin = { 812 .sin_family = AF_INET, 813 .sin_addr.s_addr = htonl(INADDR_ANY), 814 .sin_port = htons(port), 815 }; 816 const char *netid; 817 int error; 818 819 switch (protocol) { 820 case IPPROTO_UDP: 821 netid = RPCBIND_NETID_UDP; 822 break; 823 case IPPROTO_TCP: 824 netid = RPCBIND_NETID_TCP; 825 break; 826 default: 827 return -ENOPROTOOPT; 828 } 829 830 error = rpcb_v4_register(net, program, version, 831 (const struct sockaddr *)&sin, netid); 832 833 /* 834 * User space didn't support rpcbind v4, so retry this 835 * registration request with the legacy rpcbind v2 protocol. 836 */ 837 if (error == -EPROTONOSUPPORT) 838 error = rpcb_register(net, program, version, protocol, port); 839 840 return error; 841 } 842 843 #if IS_ENABLED(CONFIG_IPV6) 844 /* 845 * Register an "inet6" protocol family netid with the local 846 * rpcbind daemon via an rpcbind v4 SET request. 847 * 848 * No netconfig infrastructure is available in the kernel, so 849 * we map IP_ protocol numbers to netids by hand. 850 * 851 * Returns zero on success; a negative errno value is returned 852 * if any error occurs. 853 */ 854 static int __svc_rpcb_register6(struct net *net, const u32 program, 855 const u32 version, 856 const unsigned short protocol, 857 const unsigned short port) 858 { 859 const struct sockaddr_in6 sin6 = { 860 .sin6_family = AF_INET6, 861 .sin6_addr = IN6ADDR_ANY_INIT, 862 .sin6_port = htons(port), 863 }; 864 const char *netid; 865 int error; 866 867 switch (protocol) { 868 case IPPROTO_UDP: 869 netid = RPCBIND_NETID_UDP6; 870 break; 871 case IPPROTO_TCP: 872 netid = RPCBIND_NETID_TCP6; 873 break; 874 default: 875 return -ENOPROTOOPT; 876 } 877 878 error = rpcb_v4_register(net, program, version, 879 (const struct sockaddr *)&sin6, netid); 880 881 /* 882 * User space didn't support rpcbind version 4, so we won't 883 * use a PF_INET6 listener. 884 */ 885 if (error == -EPROTONOSUPPORT) 886 error = -EAFNOSUPPORT; 887 888 return error; 889 } 890 #endif /* IS_ENABLED(CONFIG_IPV6) */ 891 892 /* 893 * Register a kernel RPC service via rpcbind version 4. 894 * 895 * Returns zero on success; a negative errno value is returned 896 * if any error occurs. 897 */ 898 static int __svc_register(struct net *net, const char *progname, 899 const u32 program, const u32 version, 900 const int family, 901 const unsigned short protocol, 902 const unsigned short port) 903 { 904 int error = -EAFNOSUPPORT; 905 906 switch (family) { 907 case PF_INET: 908 error = __svc_rpcb_register4(net, program, version, 909 protocol, port); 910 break; 911 #if IS_ENABLED(CONFIG_IPV6) 912 case PF_INET6: 913 error = __svc_rpcb_register6(net, program, version, 914 protocol, port); 915 #endif 916 } 917 918 return error; 919 } 920 921 /** 922 * svc_register - register an RPC service with the local portmapper 923 * @serv: svc_serv struct for the service to register 924 * @net: net namespace for the service to register 925 * @family: protocol family of service's listener socket 926 * @proto: transport protocol number to advertise 927 * @port: port to advertise 928 * 929 * Service is registered for any address in the passed-in protocol family 930 */ 931 int svc_register(const struct svc_serv *serv, struct net *net, 932 const int family, const unsigned short proto, 933 const unsigned short port) 934 { 935 struct svc_program *progp; 936 struct svc_version *vers; 937 unsigned int i; 938 int error = 0; 939 940 WARN_ON_ONCE(proto == 0 && port == 0); 941 if (proto == 0 && port == 0) 942 return -EINVAL; 943 944 for (progp = serv->sv_program; progp; progp = progp->pg_next) { 945 for (i = 0; i < progp->pg_nvers; i++) { 946 vers = progp->pg_vers[i]; 947 if (vers == NULL) 948 continue; 949 950 dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n", 951 progp->pg_name, 952 i, 953 proto == IPPROTO_UDP? "udp" : "tcp", 954 port, 955 family, 956 vers->vs_hidden ? 957 " (but not telling portmap)" : ""); 958 959 if (vers->vs_hidden) 960 continue; 961 962 error = __svc_register(net, progp->pg_name, progp->pg_prog, 963 i, family, proto, port); 964 965 if (vers->vs_rpcb_optnl) { 966 error = 0; 967 continue; 968 } 969 970 if (error < 0) { 971 printk(KERN_WARNING "svc: failed to register " 972 "%sv%u RPC service (errno %d).\n", 973 progp->pg_name, i, -error); 974 break; 975 } 976 } 977 } 978 979 return error; 980 } 981 982 /* 983 * If user space is running rpcbind, it should take the v4 UNSET 984 * and clear everything for this [program, version]. If user space 985 * is running portmap, it will reject the v4 UNSET, but won't have 986 * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient 987 * in this case to clear all existing entries for [program, version]. 988 */ 989 static void __svc_unregister(struct net *net, const u32 program, const u32 version, 990 const char *progname) 991 { 992 int error; 993 994 error = rpcb_v4_register(net, program, version, NULL, ""); 995 996 /* 997 * User space didn't support rpcbind v4, so retry this 998 * request with the legacy rpcbind v2 protocol. 999 */ 1000 if (error == -EPROTONOSUPPORT) 1001 error = rpcb_register(net, program, version, 0, 0); 1002 1003 dprintk("svc: %s(%sv%u), error %d\n", 1004 __func__, progname, version, error); 1005 } 1006 1007 /* 1008 * All netids, bind addresses and ports registered for [program, version] 1009 * are removed from the local rpcbind database (if the service is not 1010 * hidden) to make way for a new instance of the service. 1011 * 1012 * The result of unregistration is reported via dprintk for those who want 1013 * verification of the result, but is otherwise not important. 1014 */ 1015 static void svc_unregister(const struct svc_serv *serv, struct net *net) 1016 { 1017 struct svc_program *progp; 1018 unsigned long flags; 1019 unsigned int i; 1020 1021 clear_thread_flag(TIF_SIGPENDING); 1022 1023 for (progp = serv->sv_program; progp; progp = progp->pg_next) { 1024 for (i = 0; i < progp->pg_nvers; i++) { 1025 if (progp->pg_vers[i] == NULL) 1026 continue; 1027 if (progp->pg_vers[i]->vs_hidden) 1028 continue; 1029 1030 dprintk("svc: attempting to unregister %sv%u\n", 1031 progp->pg_name, i); 1032 __svc_unregister(net, progp->pg_prog, i, progp->pg_name); 1033 } 1034 } 1035 1036 spin_lock_irqsave(¤t->sighand->siglock, flags); 1037 recalc_sigpending(); 1038 spin_unlock_irqrestore(¤t->sighand->siglock, flags); 1039 } 1040 1041 /* 1042 * dprintk the given error with the address of the client that caused it. 1043 */ 1044 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 1045 static __printf(2, 3) 1046 void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) 1047 { 1048 struct va_format vaf; 1049 va_list args; 1050 char buf[RPC_MAX_ADDRBUFLEN]; 1051 1052 va_start(args, fmt); 1053 1054 vaf.fmt = fmt; 1055 vaf.va = &args; 1056 1057 dprintk("svc: %s: %pV", svc_print_addr(rqstp, buf, sizeof(buf)), &vaf); 1058 1059 va_end(args); 1060 } 1061 #else 1062 static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} 1063 #endif 1064 1065 /* 1066 * Common routine for processing the RPC request. 1067 */ 1068 static int 1069 svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) 1070 { 1071 struct svc_program *progp; 1072 struct svc_version *versp = NULL; /* compiler food */ 1073 struct svc_procedure *procp = NULL; 1074 struct svc_serv *serv = rqstp->rq_server; 1075 kxdrproc_t xdr; 1076 __be32 *statp; 1077 u32 prog, vers, proc; 1078 __be32 auth_stat, rpc_stat; 1079 int auth_res; 1080 __be32 *reply_statp; 1081 1082 rpc_stat = rpc_success; 1083 1084 if (argv->iov_len < 6*4) 1085 goto err_short_len; 1086 1087 /* Will be turned off only in gss privacy case: */ 1088 set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); 1089 /* Will be turned off only when NFSv4 Sessions are used */ 1090 set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); 1091 clear_bit(RQ_DROPME, &rqstp->rq_flags); 1092 1093 /* Setup reply header */ 1094 rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); 1095 1096 svc_putu32(resv, rqstp->rq_xid); 1097 1098 vers = svc_getnl(argv); 1099 1100 /* First words of reply: */ 1101 svc_putnl(resv, 1); /* REPLY */ 1102 1103 if (vers != 2) /* RPC version number */ 1104 goto err_bad_rpc; 1105 1106 /* Save position in case we later decide to reject: */ 1107 reply_statp = resv->iov_base + resv->iov_len; 1108 1109 svc_putnl(resv, 0); /* ACCEPT */ 1110 1111 rqstp->rq_prog = prog = svc_getnl(argv); /* program number */ 1112 rqstp->rq_vers = vers = svc_getnl(argv); /* version number */ 1113 rqstp->rq_proc = proc = svc_getnl(argv); /* procedure number */ 1114 1115 for (progp = serv->sv_program; progp; progp = progp->pg_next) 1116 if (prog == progp->pg_prog) 1117 break; 1118 1119 /* 1120 * Decode auth data, and add verifier to reply buffer. 1121 * We do this before anything else in order to get a decent 1122 * auth verifier. 1123 */ 1124 auth_res = svc_authenticate(rqstp, &auth_stat); 1125 /* Also give the program a chance to reject this call: */ 1126 if (auth_res == SVC_OK && progp) { 1127 auth_stat = rpc_autherr_badcred; 1128 auth_res = progp->pg_authenticate(rqstp); 1129 } 1130 switch (auth_res) { 1131 case SVC_OK: 1132 break; 1133 case SVC_GARBAGE: 1134 goto err_garbage; 1135 case SVC_SYSERR: 1136 rpc_stat = rpc_system_err; 1137 goto err_bad; 1138 case SVC_DENIED: 1139 goto err_bad_auth; 1140 case SVC_CLOSE: 1141 if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) 1142 svc_close_xprt(rqstp->rq_xprt); 1143 case SVC_DROP: 1144 goto dropit; 1145 case SVC_COMPLETE: 1146 goto sendit; 1147 } 1148 1149 if (progp == NULL) 1150 goto err_bad_prog; 1151 1152 if (vers >= progp->pg_nvers || 1153 !(versp = progp->pg_vers[vers])) 1154 goto err_bad_vers; 1155 1156 procp = versp->vs_proc + proc; 1157 if (proc >= versp->vs_nproc || !procp->pc_func) 1158 goto err_bad_proc; 1159 rqstp->rq_procinfo = procp; 1160 1161 /* Syntactic check complete */ 1162 serv->sv_stats->rpccnt++; 1163 1164 /* Build the reply header. */ 1165 statp = resv->iov_base +resv->iov_len; 1166 svc_putnl(resv, RPC_SUCCESS); 1167 1168 /* Bump per-procedure stats counter */ 1169 procp->pc_count++; 1170 1171 /* Initialize storage for argp and resp */ 1172 memset(rqstp->rq_argp, 0, procp->pc_argsize); 1173 memset(rqstp->rq_resp, 0, procp->pc_ressize); 1174 1175 /* un-reserve some of the out-queue now that we have a 1176 * better idea of reply size 1177 */ 1178 if (procp->pc_xdrressize) 1179 svc_reserve_auth(rqstp, procp->pc_xdrressize<<2); 1180 1181 /* Call the function that processes the request. */ 1182 if (!versp->vs_dispatch) { 1183 /* Decode arguments */ 1184 xdr = procp->pc_decode; 1185 if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp)) 1186 goto err_garbage; 1187 1188 *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); 1189 1190 /* Encode reply */ 1191 if (test_bit(RQ_DROPME, &rqstp->rq_flags)) { 1192 if (procp->pc_release) 1193 procp->pc_release(rqstp, NULL, rqstp->rq_resp); 1194 goto dropit; 1195 } 1196 if (*statp == rpc_success && 1197 (xdr = procp->pc_encode) && 1198 !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) { 1199 dprintk("svc: failed to encode reply\n"); 1200 /* serv->sv_stats->rpcsystemerr++; */ 1201 *statp = rpc_system_err; 1202 } 1203 } else { 1204 dprintk("svc: calling dispatcher\n"); 1205 if (!versp->vs_dispatch(rqstp, statp)) { 1206 /* Release reply info */ 1207 if (procp->pc_release) 1208 procp->pc_release(rqstp, NULL, rqstp->rq_resp); 1209 goto dropit; 1210 } 1211 } 1212 1213 /* Check RPC status result */ 1214 if (*statp != rpc_success) 1215 resv->iov_len = ((void*)statp) - resv->iov_base + 4; 1216 1217 /* Release reply info */ 1218 if (procp->pc_release) 1219 procp->pc_release(rqstp, NULL, rqstp->rq_resp); 1220 1221 if (procp->pc_encode == NULL) 1222 goto dropit; 1223 1224 sendit: 1225 if (svc_authorise(rqstp)) 1226 goto dropit; 1227 return 1; /* Caller can now send it */ 1228 1229 dropit: 1230 svc_authorise(rqstp); /* doesn't hurt to call this twice */ 1231 dprintk("svc: svc_process dropit\n"); 1232 return 0; 1233 1234 err_short_len: 1235 svc_printk(rqstp, "short len %Zd, dropping request\n", 1236 argv->iov_len); 1237 1238 goto dropit; /* drop request */ 1239 1240 err_bad_rpc: 1241 serv->sv_stats->rpcbadfmt++; 1242 svc_putnl(resv, 1); /* REJECT */ 1243 svc_putnl(resv, 0); /* RPC_MISMATCH */ 1244 svc_putnl(resv, 2); /* Only RPCv2 supported */ 1245 svc_putnl(resv, 2); 1246 goto sendit; 1247 1248 err_bad_auth: 1249 dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat)); 1250 serv->sv_stats->rpcbadauth++; 1251 /* Restore write pointer to location of accept status: */ 1252 xdr_ressize_check(rqstp, reply_statp); 1253 svc_putnl(resv, 1); /* REJECT */ 1254 svc_putnl(resv, 1); /* AUTH_ERROR */ 1255 svc_putnl(resv, ntohl(auth_stat)); /* status */ 1256 goto sendit; 1257 1258 err_bad_prog: 1259 dprintk("svc: unknown program %d\n", prog); 1260 serv->sv_stats->rpcbadfmt++; 1261 svc_putnl(resv, RPC_PROG_UNAVAIL); 1262 goto sendit; 1263 1264 err_bad_vers: 1265 svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n", 1266 vers, prog, progp->pg_name); 1267 1268 serv->sv_stats->rpcbadfmt++; 1269 svc_putnl(resv, RPC_PROG_MISMATCH); 1270 svc_putnl(resv, progp->pg_lovers); 1271 svc_putnl(resv, progp->pg_hivers); 1272 goto sendit; 1273 1274 err_bad_proc: 1275 svc_printk(rqstp, "unknown procedure (%d)\n", proc); 1276 1277 serv->sv_stats->rpcbadfmt++; 1278 svc_putnl(resv, RPC_PROC_UNAVAIL); 1279 goto sendit; 1280 1281 err_garbage: 1282 svc_printk(rqstp, "failed to decode args\n"); 1283 1284 rpc_stat = rpc_garbage_args; 1285 err_bad: 1286 serv->sv_stats->rpcbadfmt++; 1287 svc_putnl(resv, ntohl(rpc_stat)); 1288 goto sendit; 1289 } 1290 1291 /* 1292 * Process the RPC request. 1293 */ 1294 int 1295 svc_process(struct svc_rqst *rqstp) 1296 { 1297 struct kvec *argv = &rqstp->rq_arg.head[0]; 1298 struct kvec *resv = &rqstp->rq_res.head[0]; 1299 struct svc_serv *serv = rqstp->rq_server; 1300 u32 dir; 1301 1302 /* 1303 * Setup response xdr_buf. 1304 * Initially it has just one page 1305 */ 1306 rqstp->rq_next_page = &rqstp->rq_respages[1]; 1307 resv->iov_base = page_address(rqstp->rq_respages[0]); 1308 resv->iov_len = 0; 1309 rqstp->rq_res.pages = rqstp->rq_respages + 1; 1310 rqstp->rq_res.len = 0; 1311 rqstp->rq_res.page_base = 0; 1312 rqstp->rq_res.page_len = 0; 1313 rqstp->rq_res.buflen = PAGE_SIZE; 1314 rqstp->rq_res.tail[0].iov_base = NULL; 1315 rqstp->rq_res.tail[0].iov_len = 0; 1316 1317 dir = svc_getnl(argv); 1318 if (dir != 0) { 1319 /* direction != CALL */ 1320 svc_printk(rqstp, "bad direction %d, dropping request\n", dir); 1321 serv->sv_stats->rpcbadfmt++; 1322 goto out_drop; 1323 } 1324 1325 /* Returns 1 for send, 0 for drop */ 1326 if (likely(svc_process_common(rqstp, argv, resv))) { 1327 int ret = svc_send(rqstp); 1328 1329 trace_svc_process(rqstp, ret); 1330 return ret; 1331 } 1332 out_drop: 1333 trace_svc_process(rqstp, 0); 1334 svc_drop(rqstp); 1335 return 0; 1336 } 1337 EXPORT_SYMBOL_GPL(svc_process); 1338 1339 #if defined(CONFIG_SUNRPC_BACKCHANNEL) 1340 /* 1341 * Process a backchannel RPC request that arrived over an existing 1342 * outbound connection 1343 */ 1344 int 1345 bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, 1346 struct svc_rqst *rqstp) 1347 { 1348 struct kvec *argv = &rqstp->rq_arg.head[0]; 1349 struct kvec *resv = &rqstp->rq_res.head[0]; 1350 struct rpc_task *task; 1351 int proc_error; 1352 int error; 1353 1354 dprintk("svc: %s(%p)\n", __func__, req); 1355 1356 /* Build the svc_rqst used by the common processing routine */ 1357 rqstp->rq_xprt = serv->sv_bc_xprt; 1358 rqstp->rq_xid = req->rq_xid; 1359 rqstp->rq_prot = req->rq_xprt->prot; 1360 rqstp->rq_server = serv; 1361 1362 rqstp->rq_addrlen = sizeof(req->rq_xprt->addr); 1363 memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen); 1364 memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg)); 1365 memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res)); 1366 1367 /* reset result send buffer "put" position */ 1368 resv->iov_len = 0; 1369 1370 /* 1371 * Skip the next two words because they've already been 1372 * processed in the transport 1373 */ 1374 svc_getu32(argv); /* XID */ 1375 svc_getnl(argv); /* CALLDIR */ 1376 1377 /* Parse and execute the bc call */ 1378 proc_error = svc_process_common(rqstp, argv, resv); 1379 1380 atomic_inc(&req->rq_xprt->bc_free_slots); 1381 if (!proc_error) { 1382 /* Processing error: drop the request */ 1383 xprt_free_bc_request(req); 1384 return 0; 1385 } 1386 1387 /* Finally, send the reply synchronously */ 1388 memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); 1389 task = rpc_run_bc_task(req); 1390 if (IS_ERR(task)) { 1391 error = PTR_ERR(task); 1392 goto out; 1393 } 1394 1395 WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); 1396 error = task->tk_status; 1397 rpc_put_task(task); 1398 1399 out: 1400 dprintk("svc: %s(), error=%d\n", __func__, error); 1401 return error; 1402 } 1403 EXPORT_SYMBOL_GPL(bc_svc_process); 1404 #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 1405 1406 /* 1407 * Return (transport-specific) limit on the rpc payload. 1408 */ 1409 u32 svc_max_payload(const struct svc_rqst *rqstp) 1410 { 1411 u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload; 1412 1413 if (rqstp->rq_server->sv_max_payload < max) 1414 max = rqstp->rq_server->sv_max_payload; 1415 return max; 1416 } 1417 EXPORT_SYMBOL_GPL(svc_max_payload); 1418