1 // SPDX-License-Identifier: GPL-2.0 2 /* -*- linux-c -*- 3 * sysctl_net_core.c: sysctl interface to net core subsystem. 4 * 5 * Begun April 1, 1996, Mike Shaver. 6 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 7 */ 8 9 #include <linux/mm.h> 10 #include <linux/sysctl.h> 11 #include <linux/module.h> 12 #include <linux/socket.h> 13 #include <linux/netdevice.h> 14 #include <linux/ratelimit.h> 15 #include <linux/vmalloc.h> 16 #include <linux/init.h> 17 #include <linux/slab.h> 18 19 #include <net/ip.h> 20 #include <net/sock.h> 21 #include <net/net_ratelimit.h> 22 #include <net/busy_poll.h> 23 #include <net/pkt_sched.h> 24 25 static int zero = 0; 26 static int one = 1; 27 static int two __maybe_unused = 2; 28 static int min_sndbuf = SOCK_MIN_SNDBUF; 29 static int min_rcvbuf = SOCK_MIN_RCVBUF; 30 static int max_skb_frags = MAX_SKB_FRAGS; 31 32 static int net_msg_warn; /* Unused, but still a sysctl */ 33 34 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; 35 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); 36 37 #ifdef CONFIG_RPS 38 static int rps_sock_flow_sysctl(struct ctl_table *table, int write, 39 void __user *buffer, size_t *lenp, loff_t *ppos) 40 { 41 unsigned int orig_size, size; 42 int ret, i; 43 struct ctl_table tmp = { 44 .data = &size, 45 .maxlen = sizeof(size), 46 .mode = table->mode 47 }; 48 struct rps_sock_flow_table *orig_sock_table, *sock_table; 49 static DEFINE_MUTEX(sock_flow_mutex); 50 51 mutex_lock(&sock_flow_mutex); 52 53 orig_sock_table = rcu_dereference_protected(rps_sock_flow_table, 54 lockdep_is_held(&sock_flow_mutex)); 55 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 56 57 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 58 59 if (write) { 60 if (size) { 61 if (size > 1<<29) { 62 /* Enforce limit to prevent overflow */ 63 mutex_unlock(&sock_flow_mutex); 64 return -EINVAL; 65 } 66 size = roundup_pow_of_two(size); 67 if (size != orig_size) { 68 sock_table = 69 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); 70 if (!sock_table) { 71 mutex_unlock(&sock_flow_mutex); 72 return -ENOMEM; 73 } 74 rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1; 75 sock_table->mask = size - 1; 76 } else 77 sock_table = orig_sock_table; 78 79 for (i = 0; i < size; i++) 80 sock_table->ents[i] = RPS_NO_CPU; 81 } else 82 sock_table = NULL; 83 84 if (sock_table != orig_sock_table) { 85 rcu_assign_pointer(rps_sock_flow_table, sock_table); 86 if (sock_table) { 87 static_key_slow_inc(&rps_needed); 88 static_key_slow_inc(&rfs_needed); 89 } 90 if (orig_sock_table) { 91 static_key_slow_dec(&rps_needed); 92 static_key_slow_dec(&rfs_needed); 93 synchronize_rcu(); 94 vfree(orig_sock_table); 95 } 96 } 97 } 98 99 mutex_unlock(&sock_flow_mutex); 100 101 return ret; 102 } 103 #endif /* CONFIG_RPS */ 104 105 #ifdef CONFIG_NET_FLOW_LIMIT 106 static DEFINE_MUTEX(flow_limit_update_mutex); 107 108 static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, 109 void __user *buffer, size_t *lenp, 110 loff_t *ppos) 111 { 112 struct sd_flow_limit *cur; 113 struct softnet_data *sd; 114 cpumask_var_t mask; 115 int i, len, ret = 0; 116 117 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 118 return -ENOMEM; 119 120 if (write) { 121 ret = cpumask_parse_user(buffer, *lenp, mask); 122 if (ret) 123 goto done; 124 125 mutex_lock(&flow_limit_update_mutex); 126 len = sizeof(*cur) + netdev_flow_limit_table_len; 127 for_each_possible_cpu(i) { 128 sd = &per_cpu(softnet_data, i); 129 cur = rcu_dereference_protected(sd->flow_limit, 130 lockdep_is_held(&flow_limit_update_mutex)); 131 if (cur && !cpumask_test_cpu(i, mask)) { 132 RCU_INIT_POINTER(sd->flow_limit, NULL); 133 synchronize_rcu(); 134 kfree(cur); 135 } else if (!cur && cpumask_test_cpu(i, mask)) { 136 cur = kzalloc_node(len, GFP_KERNEL, 137 cpu_to_node(i)); 138 if (!cur) { 139 /* not unwinding previous changes */ 140 ret = -ENOMEM; 141 goto write_unlock; 142 } 143 cur->num_buckets = netdev_flow_limit_table_len; 144 rcu_assign_pointer(sd->flow_limit, cur); 145 } 146 } 147 write_unlock: 148 mutex_unlock(&flow_limit_update_mutex); 149 } else { 150 char kbuf[128]; 151 152 if (*ppos || !*lenp) { 153 *lenp = 0; 154 goto done; 155 } 156 157 cpumask_clear(mask); 158 rcu_read_lock(); 159 for_each_possible_cpu(i) { 160 sd = &per_cpu(softnet_data, i); 161 if (rcu_dereference(sd->flow_limit)) 162 cpumask_set_cpu(i, mask); 163 } 164 rcu_read_unlock(); 165 166 len = min(sizeof(kbuf) - 1, *lenp); 167 len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); 168 if (!len) { 169 *lenp = 0; 170 goto done; 171 } 172 if (len < *lenp) 173 kbuf[len++] = '\n'; 174 if (copy_to_user(buffer, kbuf, len)) { 175 ret = -EFAULT; 176 goto done; 177 } 178 *lenp = len; 179 *ppos += len; 180 } 181 182 done: 183 free_cpumask_var(mask); 184 return ret; 185 } 186 187 static int flow_limit_table_len_sysctl(struct ctl_table *table, int write, 188 void __user *buffer, size_t *lenp, 189 loff_t *ppos) 190 { 191 unsigned int old, *ptr; 192 int ret; 193 194 mutex_lock(&flow_limit_update_mutex); 195 196 ptr = table->data; 197 old = *ptr; 198 ret = proc_dointvec(table, write, buffer, lenp, ppos); 199 if (!ret && write && !is_power_of_2(*ptr)) { 200 *ptr = old; 201 ret = -EINVAL; 202 } 203 204 mutex_unlock(&flow_limit_update_mutex); 205 return ret; 206 } 207 #endif /* CONFIG_NET_FLOW_LIMIT */ 208 209 #ifdef CONFIG_NET_SCHED 210 static int set_default_qdisc(struct ctl_table *table, int write, 211 void __user *buffer, size_t *lenp, loff_t *ppos) 212 { 213 char id[IFNAMSIZ]; 214 struct ctl_table tbl = { 215 .data = id, 216 .maxlen = IFNAMSIZ, 217 }; 218 int ret; 219 220 qdisc_get_default(id, IFNAMSIZ); 221 222 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 223 if (write && ret == 0) 224 ret = qdisc_set_default(id); 225 return ret; 226 } 227 #endif 228 229 static int proc_do_dev_weight(struct ctl_table *table, int write, 230 void __user *buffer, size_t *lenp, loff_t *ppos) 231 { 232 int ret; 233 234 ret = proc_dointvec(table, write, buffer, lenp, ppos); 235 if (ret != 0) 236 return ret; 237 238 dev_rx_weight = weight_p * dev_weight_rx_bias; 239 dev_tx_weight = weight_p * dev_weight_tx_bias; 240 241 return ret; 242 } 243 244 static int proc_do_rss_key(struct ctl_table *table, int write, 245 void __user *buffer, size_t *lenp, loff_t *ppos) 246 { 247 struct ctl_table fake_table; 248 char buf[NETDEV_RSS_KEY_LEN * 3]; 249 250 snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key); 251 fake_table.data = buf; 252 fake_table.maxlen = sizeof(buf); 253 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 254 } 255 256 #ifdef CONFIG_BPF_JIT 257 static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, 258 void __user *buffer, size_t *lenp, 259 loff_t *ppos) 260 { 261 int ret, jit_enable = *(int *)table->data; 262 struct ctl_table tmp = *table; 263 264 if (write && !capable(CAP_SYS_ADMIN)) 265 return -EPERM; 266 267 tmp.data = &jit_enable; 268 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 269 if (write && !ret) { 270 if (jit_enable < 2 || 271 (jit_enable == 2 && bpf_dump_raw_ok())) { 272 *(int *)table->data = jit_enable; 273 if (jit_enable == 2) 274 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); 275 } else { 276 ret = -EPERM; 277 } 278 } 279 return ret; 280 } 281 282 # ifdef CONFIG_HAVE_EBPF_JIT 283 static int 284 proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, 285 void __user *buffer, size_t *lenp, 286 loff_t *ppos) 287 { 288 if (!capable(CAP_SYS_ADMIN)) 289 return -EPERM; 290 291 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 292 } 293 # endif 294 #endif 295 296 static struct ctl_table net_core_table[] = { 297 #ifdef CONFIG_NET 298 { 299 .procname = "wmem_max", 300 .data = &sysctl_wmem_max, 301 .maxlen = sizeof(int), 302 .mode = 0644, 303 .proc_handler = proc_dointvec_minmax, 304 .extra1 = &min_sndbuf, 305 }, 306 { 307 .procname = "rmem_max", 308 .data = &sysctl_rmem_max, 309 .maxlen = sizeof(int), 310 .mode = 0644, 311 .proc_handler = proc_dointvec_minmax, 312 .extra1 = &min_rcvbuf, 313 }, 314 { 315 .procname = "wmem_default", 316 .data = &sysctl_wmem_default, 317 .maxlen = sizeof(int), 318 .mode = 0644, 319 .proc_handler = proc_dointvec_minmax, 320 .extra1 = &min_sndbuf, 321 }, 322 { 323 .procname = "rmem_default", 324 .data = &sysctl_rmem_default, 325 .maxlen = sizeof(int), 326 .mode = 0644, 327 .proc_handler = proc_dointvec_minmax, 328 .extra1 = &min_rcvbuf, 329 }, 330 { 331 .procname = "dev_weight", 332 .data = &weight_p, 333 .maxlen = sizeof(int), 334 .mode = 0644, 335 .proc_handler = proc_do_dev_weight, 336 }, 337 { 338 .procname = "dev_weight_rx_bias", 339 .data = &dev_weight_rx_bias, 340 .maxlen = sizeof(int), 341 .mode = 0644, 342 .proc_handler = proc_do_dev_weight, 343 }, 344 { 345 .procname = "dev_weight_tx_bias", 346 .data = &dev_weight_tx_bias, 347 .maxlen = sizeof(int), 348 .mode = 0644, 349 .proc_handler = proc_do_dev_weight, 350 }, 351 { 352 .procname = "netdev_max_backlog", 353 .data = &netdev_max_backlog, 354 .maxlen = sizeof(int), 355 .mode = 0644, 356 .proc_handler = proc_dointvec 357 }, 358 { 359 .procname = "netdev_rss_key", 360 .data = &netdev_rss_key, 361 .maxlen = sizeof(int), 362 .mode = 0444, 363 .proc_handler = proc_do_rss_key, 364 }, 365 #ifdef CONFIG_BPF_JIT 366 { 367 .procname = "bpf_jit_enable", 368 .data = &bpf_jit_enable, 369 .maxlen = sizeof(int), 370 .mode = 0644, 371 .proc_handler = proc_dointvec_minmax_bpf_enable, 372 # ifdef CONFIG_BPF_JIT_ALWAYS_ON 373 .extra1 = &one, 374 .extra2 = &one, 375 # else 376 .extra1 = &zero, 377 .extra2 = &two, 378 # endif 379 }, 380 # ifdef CONFIG_HAVE_EBPF_JIT 381 { 382 .procname = "bpf_jit_harden", 383 .data = &bpf_jit_harden, 384 .maxlen = sizeof(int), 385 .mode = 0600, 386 .proc_handler = proc_dointvec_minmax_bpf_restricted, 387 .extra1 = &zero, 388 .extra2 = &two, 389 }, 390 { 391 .procname = "bpf_jit_kallsyms", 392 .data = &bpf_jit_kallsyms, 393 .maxlen = sizeof(int), 394 .mode = 0600, 395 .proc_handler = proc_dointvec_minmax_bpf_restricted, 396 .extra1 = &zero, 397 .extra2 = &one, 398 }, 399 # endif 400 #endif 401 { 402 .procname = "netdev_tstamp_prequeue", 403 .data = &netdev_tstamp_prequeue, 404 .maxlen = sizeof(int), 405 .mode = 0644, 406 .proc_handler = proc_dointvec 407 }, 408 { 409 .procname = "message_cost", 410 .data = &net_ratelimit_state.interval, 411 .maxlen = sizeof(int), 412 .mode = 0644, 413 .proc_handler = proc_dointvec_jiffies, 414 }, 415 { 416 .procname = "message_burst", 417 .data = &net_ratelimit_state.burst, 418 .maxlen = sizeof(int), 419 .mode = 0644, 420 .proc_handler = proc_dointvec, 421 }, 422 { 423 .procname = "optmem_max", 424 .data = &sysctl_optmem_max, 425 .maxlen = sizeof(int), 426 .mode = 0644, 427 .proc_handler = proc_dointvec 428 }, 429 { 430 .procname = "tstamp_allow_data", 431 .data = &sysctl_tstamp_allow_data, 432 .maxlen = sizeof(int), 433 .mode = 0644, 434 .proc_handler = proc_dointvec_minmax, 435 .extra1 = &zero, 436 .extra2 = &one 437 }, 438 #ifdef CONFIG_RPS 439 { 440 .procname = "rps_sock_flow_entries", 441 .maxlen = sizeof(int), 442 .mode = 0644, 443 .proc_handler = rps_sock_flow_sysctl 444 }, 445 #endif 446 #ifdef CONFIG_NET_FLOW_LIMIT 447 { 448 .procname = "flow_limit_cpu_bitmap", 449 .mode = 0644, 450 .proc_handler = flow_limit_cpu_sysctl 451 }, 452 { 453 .procname = "flow_limit_table_len", 454 .data = &netdev_flow_limit_table_len, 455 .maxlen = sizeof(int), 456 .mode = 0644, 457 .proc_handler = flow_limit_table_len_sysctl 458 }, 459 #endif /* CONFIG_NET_FLOW_LIMIT */ 460 #ifdef CONFIG_NET_RX_BUSY_POLL 461 { 462 .procname = "busy_poll", 463 .data = &sysctl_net_busy_poll, 464 .maxlen = sizeof(unsigned int), 465 .mode = 0644, 466 .proc_handler = proc_dointvec_minmax, 467 .extra1 = &zero, 468 }, 469 { 470 .procname = "busy_read", 471 .data = &sysctl_net_busy_read, 472 .maxlen = sizeof(unsigned int), 473 .mode = 0644, 474 .proc_handler = proc_dointvec_minmax, 475 .extra1 = &zero, 476 }, 477 #endif 478 #ifdef CONFIG_NET_SCHED 479 { 480 .procname = "default_qdisc", 481 .mode = 0644, 482 .maxlen = IFNAMSIZ, 483 .proc_handler = set_default_qdisc 484 }, 485 #endif 486 #endif /* CONFIG_NET */ 487 { 488 .procname = "netdev_budget", 489 .data = &netdev_budget, 490 .maxlen = sizeof(int), 491 .mode = 0644, 492 .proc_handler = proc_dointvec 493 }, 494 { 495 .procname = "warnings", 496 .data = &net_msg_warn, 497 .maxlen = sizeof(int), 498 .mode = 0644, 499 .proc_handler = proc_dointvec 500 }, 501 { 502 .procname = "max_skb_frags", 503 .data = &sysctl_max_skb_frags, 504 .maxlen = sizeof(int), 505 .mode = 0644, 506 .proc_handler = proc_dointvec_minmax, 507 .extra1 = &one, 508 .extra2 = &max_skb_frags, 509 }, 510 { 511 .procname = "netdev_budget_usecs", 512 .data = &netdev_budget_usecs, 513 .maxlen = sizeof(unsigned int), 514 .mode = 0644, 515 .proc_handler = proc_dointvec_minmax, 516 .extra1 = &zero, 517 }, 518 { 519 .procname = "fb_tunnels_only_for_init_net", 520 .data = &sysctl_fb_tunnels_only_for_init_net, 521 .maxlen = sizeof(int), 522 .mode = 0644, 523 .proc_handler = proc_dointvec_minmax, 524 .extra1 = &zero, 525 .extra2 = &one, 526 }, 527 { } 528 }; 529 530 static struct ctl_table netns_core_table[] = { 531 { 532 .procname = "somaxconn", 533 .data = &init_net.core.sysctl_somaxconn, 534 .maxlen = sizeof(int), 535 .mode = 0644, 536 .extra1 = &zero, 537 .proc_handler = proc_dointvec_minmax 538 }, 539 { } 540 }; 541 542 static __net_init int sysctl_core_net_init(struct net *net) 543 { 544 struct ctl_table *tbl; 545 546 tbl = netns_core_table; 547 if (!net_eq(net, &init_net)) { 548 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 549 if (tbl == NULL) 550 goto err_dup; 551 552 tbl[0].data = &net->core.sysctl_somaxconn; 553 554 /* Don't export any sysctls to unprivileged users */ 555 if (net->user_ns != &init_user_ns) { 556 tbl[0].procname = NULL; 557 } 558 } 559 560 net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl); 561 if (net->core.sysctl_hdr == NULL) 562 goto err_reg; 563 564 return 0; 565 566 err_reg: 567 if (tbl != netns_core_table) 568 kfree(tbl); 569 err_dup: 570 return -ENOMEM; 571 } 572 573 static __net_exit void sysctl_core_net_exit(struct net *net) 574 { 575 struct ctl_table *tbl; 576 577 tbl = net->core.sysctl_hdr->ctl_table_arg; 578 unregister_net_sysctl_table(net->core.sysctl_hdr); 579 BUG_ON(tbl == netns_core_table); 580 kfree(tbl); 581 } 582 583 static __net_initdata struct pernet_operations sysctl_core_ops = { 584 .init = sysctl_core_net_init, 585 .exit = sysctl_core_net_exit, 586 }; 587 588 static __init int sysctl_core_init(void) 589 { 590 register_net_sysctl(&init_net, "net/core", net_core_table); 591 return register_pernet_subsys(&sysctl_core_ops); 592 } 593 594 fs_initcall(sysctl_core_init); 595