1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem. 4 * 5 * Begun April 1, 1996, Mike Shaver. 6 * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS] 7 */ 8 9 #include <linux/mm.h> 10 #include <linux/module.h> 11 #include <linux/sysctl.h> 12 #include <linux/igmp.h> 13 #include <linux/inetdevice.h> 14 #include <linux/seqlock.h> 15 #include <linux/init.h> 16 #include <linux/slab.h> 17 #include <linux/nsproxy.h> 18 #include <linux/swap.h> 19 #include <net/snmp.h> 20 #include <net/icmp.h> 21 #include <net/ip.h> 22 #include <net/route.h> 23 #include <net/tcp.h> 24 #include <net/udp.h> 25 #include <net/cipso_ipv4.h> 26 #include <net/inet_frag.h> 27 #include <net/ping.h> 28 #include <net/protocol.h> 29 #include <net/netevent.h> 30 31 static int zero; 32 static int one = 1; 33 static int two = 2; 34 static int four = 4; 35 static int thousand = 1000; 36 static int gso_max_segs = GSO_MAX_SEGS; 37 static int tcp_retr1_max = 255; 38 static int ip_local_port_range_min[] = { 1, 1 }; 39 static int ip_local_port_range_max[] = { 65535, 65535 }; 40 static int tcp_adv_win_scale_min = -31; 41 static int tcp_adv_win_scale_max = 31; 42 static int ip_privileged_port_min; 43 static int ip_privileged_port_max = 65535; 44 static int ip_ttl_min = 1; 45 static int ip_ttl_max = 255; 46 static int tcp_syn_retries_min = 1; 47 static int tcp_syn_retries_max = MAX_TCP_SYNCNT; 48 static int ip_ping_group_range_min[] = { 0, 0 }; 49 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; 50 static int comp_sack_nr_max = 255; 51 static u32 u32_max_div_HZ = UINT_MAX / HZ; 52 53 /* obsolete */ 54 static int sysctl_tcp_low_latency __read_mostly; 55 56 /* Update system visible IP port range */ 57 static void set_local_port_range(struct net *net, int range[2]) 58 { 59 bool same_parity = !((range[0] ^ range[1]) & 1); 60 61 write_seqlock_bh(&net->ipv4.ip_local_ports.lock); 62 if (same_parity && !net->ipv4.ip_local_ports.warned) { 63 net->ipv4.ip_local_ports.warned = true; 64 pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n"); 65 } 66 net->ipv4.ip_local_ports.range[0] = range[0]; 67 net->ipv4.ip_local_ports.range[1] = range[1]; 68 write_sequnlock_bh(&net->ipv4.ip_local_ports.lock); 69 } 70 71 /* Validate changes from /proc interface. */ 72 static int ipv4_local_port_range(struct ctl_table *table, int write, 73 void __user *buffer, 74 size_t *lenp, loff_t *ppos) 75 { 76 struct net *net = 77 container_of(table->data, struct net, ipv4.ip_local_ports.range); 78 int ret; 79 int range[2]; 80 struct ctl_table tmp = { 81 .data = &range, 82 .maxlen = sizeof(range), 83 .mode = table->mode, 84 .extra1 = &ip_local_port_range_min, 85 .extra2 = &ip_local_port_range_max, 86 }; 87 88 inet_get_local_port_range(net, &range[0], &range[1]); 89 90 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 91 92 if (write && ret == 0) { 93 /* Ensure that the upper limit is not smaller than the lower, 94 * and that the lower does not encroach upon the privileged 95 * port limit. 96 */ 97 if ((range[1] < range[0]) || 98 (range[0] < net->ipv4.sysctl_ip_prot_sock)) 99 ret = -EINVAL; 100 else 101 set_local_port_range(net, range); 102 } 103 104 return ret; 105 } 106 107 /* Validate changes from /proc interface. */ 108 static int ipv4_privileged_ports(struct ctl_table *table, int write, 109 void __user *buffer, size_t *lenp, loff_t *ppos) 110 { 111 struct net *net = container_of(table->data, struct net, 112 ipv4.sysctl_ip_prot_sock); 113 int ret; 114 int pports; 115 int range[2]; 116 struct ctl_table tmp = { 117 .data = &pports, 118 .maxlen = sizeof(pports), 119 .mode = table->mode, 120 .extra1 = &ip_privileged_port_min, 121 .extra2 = &ip_privileged_port_max, 122 }; 123 124 pports = net->ipv4.sysctl_ip_prot_sock; 125 126 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 127 128 if (write && ret == 0) { 129 inet_get_local_port_range(net, &range[0], &range[1]); 130 /* Ensure that the local port range doesn't overlap with the 131 * privileged port range. 132 */ 133 if (range[0] < pports) 134 ret = -EINVAL; 135 else 136 net->ipv4.sysctl_ip_prot_sock = pports; 137 } 138 139 return ret; 140 } 141 142 static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high) 143 { 144 kgid_t *data = table->data; 145 struct net *net = 146 container_of(table->data, struct net, ipv4.ping_group_range.range); 147 unsigned int seq; 148 do { 149 seq = read_seqbegin(&net->ipv4.ping_group_range.lock); 150 151 *low = data[0]; 152 *high = data[1]; 153 } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); 154 } 155 156 /* Update system visible IP port range */ 157 static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high) 158 { 159 kgid_t *data = table->data; 160 struct net *net = 161 container_of(table->data, struct net, ipv4.ping_group_range.range); 162 write_seqlock(&net->ipv4.ping_group_range.lock); 163 data[0] = low; 164 data[1] = high; 165 write_sequnlock(&net->ipv4.ping_group_range.lock); 166 } 167 168 /* Validate changes from /proc interface. */ 169 static int ipv4_ping_group_range(struct ctl_table *table, int write, 170 void __user *buffer, 171 size_t *lenp, loff_t *ppos) 172 { 173 struct user_namespace *user_ns = current_user_ns(); 174 int ret; 175 gid_t urange[2]; 176 kgid_t low, high; 177 struct ctl_table tmp = { 178 .data = &urange, 179 .maxlen = sizeof(urange), 180 .mode = table->mode, 181 .extra1 = &ip_ping_group_range_min, 182 .extra2 = &ip_ping_group_range_max, 183 }; 184 185 inet_get_ping_group_range_table(table, &low, &high); 186 urange[0] = from_kgid_munged(user_ns, low); 187 urange[1] = from_kgid_munged(user_ns, high); 188 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 189 190 if (write && ret == 0) { 191 low = make_kgid(user_ns, urange[0]); 192 high = make_kgid(user_ns, urange[1]); 193 if (!gid_valid(low) || !gid_valid(high)) 194 return -EINVAL; 195 if (urange[1] < urange[0] || gid_lt(high, low)) { 196 low = make_kgid(&init_user_ns, 1); 197 high = make_kgid(&init_user_ns, 0); 198 } 199 set_ping_group_range(table, low, high); 200 } 201 202 return ret; 203 } 204 205 static int ipv4_fwd_update_priority(struct ctl_table *table, int write, 206 void __user *buffer, 207 size_t *lenp, loff_t *ppos) 208 { 209 struct net *net; 210 int ret; 211 212 net = container_of(table->data, struct net, 213 ipv4.sysctl_ip_fwd_update_priority); 214 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 215 if (write && ret == 0) 216 call_netevent_notifiers(NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE, 217 net); 218 219 return ret; 220 } 221 222 static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, 223 void __user *buffer, size_t *lenp, loff_t *ppos) 224 { 225 struct net *net = container_of(ctl->data, struct net, 226 ipv4.tcp_congestion_control); 227 char val[TCP_CA_NAME_MAX]; 228 struct ctl_table tbl = { 229 .data = val, 230 .maxlen = TCP_CA_NAME_MAX, 231 }; 232 int ret; 233 234 tcp_get_default_congestion_control(net, val); 235 236 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 237 if (write && ret == 0) 238 ret = tcp_set_default_congestion_control(net, val); 239 return ret; 240 } 241 242 static int proc_tcp_available_congestion_control(struct ctl_table *ctl, 243 int write, 244 void __user *buffer, size_t *lenp, 245 loff_t *ppos) 246 { 247 struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, }; 248 int ret; 249 250 tbl.data = kmalloc(tbl.maxlen, GFP_USER); 251 if (!tbl.data) 252 return -ENOMEM; 253 tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX); 254 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 255 kfree(tbl.data); 256 return ret; 257 } 258 259 static int proc_allowed_congestion_control(struct ctl_table *ctl, 260 int write, 261 void __user *buffer, size_t *lenp, 262 loff_t *ppos) 263 { 264 struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; 265 int ret; 266 267 tbl.data = kmalloc(tbl.maxlen, GFP_USER); 268 if (!tbl.data) 269 return -ENOMEM; 270 271 tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen); 272 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 273 if (write && ret == 0) 274 ret = tcp_set_allowed_congestion_control(tbl.data); 275 kfree(tbl.data); 276 return ret; 277 } 278 279 static int proc_tcp_fastopen_key(struct ctl_table *table, int write, 280 void __user *buffer, size_t *lenp, 281 loff_t *ppos) 282 { 283 struct net *net = container_of(table->data, struct net, 284 ipv4.sysctl_tcp_fastopen); 285 struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; 286 struct tcp_fastopen_context *ctxt; 287 u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ 288 __le32 key[4]; 289 int ret, i; 290 291 tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); 292 if (!tbl.data) 293 return -ENOMEM; 294 295 rcu_read_lock(); 296 ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); 297 if (ctxt) 298 memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); 299 else 300 memset(key, 0, sizeof(key)); 301 rcu_read_unlock(); 302 303 for (i = 0; i < ARRAY_SIZE(key); i++) 304 user_key[i] = le32_to_cpu(key[i]); 305 306 snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", 307 user_key[0], user_key[1], user_key[2], user_key[3]); 308 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 309 310 if (write && ret == 0) { 311 if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1, 312 user_key + 2, user_key + 3) != 4) { 313 ret = -EINVAL; 314 goto bad_key; 315 } 316 317 for (i = 0; i < ARRAY_SIZE(user_key); i++) 318 key[i] = cpu_to_le32(user_key[i]); 319 320 tcp_fastopen_reset_cipher(net, NULL, key, 321 TCP_FASTOPEN_KEY_LENGTH); 322 } 323 324 bad_key: 325 pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", 326 user_key[0], user_key[1], user_key[2], user_key[3], 327 (char *)tbl.data, ret); 328 kfree(tbl.data); 329 return ret; 330 } 331 332 static void proc_configure_early_demux(int enabled, int protocol) 333 { 334 struct net_protocol *ipprot; 335 #if IS_ENABLED(CONFIG_IPV6) 336 struct inet6_protocol *ip6prot; 337 #endif 338 339 rcu_read_lock(); 340 341 ipprot = rcu_dereference(inet_protos[protocol]); 342 if (ipprot) 343 ipprot->early_demux = enabled ? ipprot->early_demux_handler : 344 NULL; 345 346 #if IS_ENABLED(CONFIG_IPV6) 347 ip6prot = rcu_dereference(inet6_protos[protocol]); 348 if (ip6prot) 349 ip6prot->early_demux = enabled ? ip6prot->early_demux_handler : 350 NULL; 351 #endif 352 rcu_read_unlock(); 353 } 354 355 static int proc_tcp_early_demux(struct ctl_table *table, int write, 356 void __user *buffer, size_t *lenp, loff_t *ppos) 357 { 358 int ret = 0; 359 360 ret = proc_dointvec(table, write, buffer, lenp, ppos); 361 362 if (write && !ret) { 363 int enabled = init_net.ipv4.sysctl_tcp_early_demux; 364 365 proc_configure_early_demux(enabled, IPPROTO_TCP); 366 } 367 368 return ret; 369 } 370 371 static int proc_udp_early_demux(struct ctl_table *table, int write, 372 void __user *buffer, size_t *lenp, loff_t *ppos) 373 { 374 int ret = 0; 375 376 ret = proc_dointvec(table, write, buffer, lenp, ppos); 377 378 if (write && !ret) { 379 int enabled = init_net.ipv4.sysctl_udp_early_demux; 380 381 proc_configure_early_demux(enabled, IPPROTO_UDP); 382 } 383 384 return ret; 385 } 386 387 static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, 388 int write, 389 void __user *buffer, 390 size_t *lenp, loff_t *ppos) 391 { 392 struct net *net = container_of(table->data, struct net, 393 ipv4.sysctl_tcp_fastopen_blackhole_timeout); 394 int ret; 395 396 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 397 if (write && ret == 0) 398 atomic_set(&net->ipv4.tfo_active_disable_times, 0); 399 400 return ret; 401 } 402 403 static int proc_tcp_available_ulp(struct ctl_table *ctl, 404 int write, 405 void __user *buffer, size_t *lenp, 406 loff_t *ppos) 407 { 408 struct ctl_table tbl = { .maxlen = TCP_ULP_BUF_MAX, }; 409 int ret; 410 411 tbl.data = kmalloc(tbl.maxlen, GFP_USER); 412 if (!tbl.data) 413 return -ENOMEM; 414 tcp_get_available_ulp(tbl.data, TCP_ULP_BUF_MAX); 415 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 416 kfree(tbl.data); 417 418 return ret; 419 } 420 421 #ifdef CONFIG_IP_ROUTE_MULTIPATH 422 static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, 423 void __user *buffer, size_t *lenp, 424 loff_t *ppos) 425 { 426 struct net *net = container_of(table->data, struct net, 427 ipv4.sysctl_fib_multipath_hash_policy); 428 int ret; 429 430 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 431 if (write && ret == 0) 432 call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net); 433 434 return ret; 435 } 436 #endif 437 438 static struct ctl_table ipv4_table[] = { 439 { 440 .procname = "tcp_max_orphans", 441 .data = &sysctl_tcp_max_orphans, 442 .maxlen = sizeof(int), 443 .mode = 0644, 444 .proc_handler = proc_dointvec 445 }, 446 { 447 .procname = "inet_peer_threshold", 448 .data = &inet_peer_threshold, 449 .maxlen = sizeof(int), 450 .mode = 0644, 451 .proc_handler = proc_dointvec 452 }, 453 { 454 .procname = "inet_peer_minttl", 455 .data = &inet_peer_minttl, 456 .maxlen = sizeof(int), 457 .mode = 0644, 458 .proc_handler = proc_dointvec_jiffies, 459 }, 460 { 461 .procname = "inet_peer_maxttl", 462 .data = &inet_peer_maxttl, 463 .maxlen = sizeof(int), 464 .mode = 0644, 465 .proc_handler = proc_dointvec_jiffies, 466 }, 467 { 468 .procname = "tcp_mem", 469 .maxlen = sizeof(sysctl_tcp_mem), 470 .data = &sysctl_tcp_mem, 471 .mode = 0644, 472 .proc_handler = proc_doulongvec_minmax, 473 }, 474 { 475 .procname = "tcp_low_latency", 476 .data = &sysctl_tcp_low_latency, 477 .maxlen = sizeof(int), 478 .mode = 0644, 479 .proc_handler = proc_dointvec 480 }, 481 #ifdef CONFIG_NETLABEL 482 { 483 .procname = "cipso_cache_enable", 484 .data = &cipso_v4_cache_enabled, 485 .maxlen = sizeof(int), 486 .mode = 0644, 487 .proc_handler = proc_dointvec, 488 }, 489 { 490 .procname = "cipso_cache_bucket_size", 491 .data = &cipso_v4_cache_bucketsize, 492 .maxlen = sizeof(int), 493 .mode = 0644, 494 .proc_handler = proc_dointvec, 495 }, 496 { 497 .procname = "cipso_rbm_optfmt", 498 .data = &cipso_v4_rbm_optfmt, 499 .maxlen = sizeof(int), 500 .mode = 0644, 501 .proc_handler = proc_dointvec, 502 }, 503 { 504 .procname = "cipso_rbm_strictvalid", 505 .data = &cipso_v4_rbm_strictvalid, 506 .maxlen = sizeof(int), 507 .mode = 0644, 508 .proc_handler = proc_dointvec, 509 }, 510 #endif /* CONFIG_NETLABEL */ 511 { 512 .procname = "tcp_available_congestion_control", 513 .maxlen = TCP_CA_BUF_MAX, 514 .mode = 0444, 515 .proc_handler = proc_tcp_available_congestion_control, 516 }, 517 { 518 .procname = "tcp_allowed_congestion_control", 519 .maxlen = TCP_CA_BUF_MAX, 520 .mode = 0644, 521 .proc_handler = proc_allowed_congestion_control, 522 }, 523 { 524 .procname = "tcp_available_ulp", 525 .maxlen = TCP_ULP_BUF_MAX, 526 .mode = 0444, 527 .proc_handler = proc_tcp_available_ulp, 528 }, 529 { 530 .procname = "icmp_msgs_per_sec", 531 .data = &sysctl_icmp_msgs_per_sec, 532 .maxlen = sizeof(int), 533 .mode = 0644, 534 .proc_handler = proc_dointvec_minmax, 535 .extra1 = &zero, 536 }, 537 { 538 .procname = "icmp_msgs_burst", 539 .data = &sysctl_icmp_msgs_burst, 540 .maxlen = sizeof(int), 541 .mode = 0644, 542 .proc_handler = proc_dointvec_minmax, 543 .extra1 = &zero, 544 }, 545 { 546 .procname = "udp_mem", 547 .data = &sysctl_udp_mem, 548 .maxlen = sizeof(sysctl_udp_mem), 549 .mode = 0644, 550 .proc_handler = proc_doulongvec_minmax, 551 }, 552 { } 553 }; 554 555 static struct ctl_table ipv4_net_table[] = { 556 { 557 .procname = "icmp_echo_ignore_all", 558 .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, 559 .maxlen = sizeof(int), 560 .mode = 0644, 561 .proc_handler = proc_dointvec 562 }, 563 { 564 .procname = "icmp_echo_ignore_broadcasts", 565 .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, 566 .maxlen = sizeof(int), 567 .mode = 0644, 568 .proc_handler = proc_dointvec 569 }, 570 { 571 .procname = "icmp_ignore_bogus_error_responses", 572 .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, 573 .maxlen = sizeof(int), 574 .mode = 0644, 575 .proc_handler = proc_dointvec 576 }, 577 { 578 .procname = "icmp_errors_use_inbound_ifaddr", 579 .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, 580 .maxlen = sizeof(int), 581 .mode = 0644, 582 .proc_handler = proc_dointvec 583 }, 584 { 585 .procname = "icmp_ratelimit", 586 .data = &init_net.ipv4.sysctl_icmp_ratelimit, 587 .maxlen = sizeof(int), 588 .mode = 0644, 589 .proc_handler = proc_dointvec_ms_jiffies, 590 }, 591 { 592 .procname = "icmp_ratemask", 593 .data = &init_net.ipv4.sysctl_icmp_ratemask, 594 .maxlen = sizeof(int), 595 .mode = 0644, 596 .proc_handler = proc_dointvec 597 }, 598 { 599 .procname = "ping_group_range", 600 .data = &init_net.ipv4.ping_group_range.range, 601 .maxlen = sizeof(gid_t)*2, 602 .mode = 0644, 603 .proc_handler = ipv4_ping_group_range, 604 }, 605 #ifdef CONFIG_NET_L3_MASTER_DEV 606 { 607 .procname = "raw_l3mdev_accept", 608 .data = &init_net.ipv4.sysctl_raw_l3mdev_accept, 609 .maxlen = sizeof(int), 610 .mode = 0644, 611 .proc_handler = proc_dointvec_minmax, 612 .extra1 = &zero, 613 .extra2 = &one, 614 }, 615 #endif 616 { 617 .procname = "tcp_ecn", 618 .data = &init_net.ipv4.sysctl_tcp_ecn, 619 .maxlen = sizeof(int), 620 .mode = 0644, 621 .proc_handler = proc_dointvec 622 }, 623 { 624 .procname = "tcp_ecn_fallback", 625 .data = &init_net.ipv4.sysctl_tcp_ecn_fallback, 626 .maxlen = sizeof(int), 627 .mode = 0644, 628 .proc_handler = proc_dointvec 629 }, 630 { 631 .procname = "ip_dynaddr", 632 .data = &init_net.ipv4.sysctl_ip_dynaddr, 633 .maxlen = sizeof(int), 634 .mode = 0644, 635 .proc_handler = proc_dointvec 636 }, 637 { 638 .procname = "ip_early_demux", 639 .data = &init_net.ipv4.sysctl_ip_early_demux, 640 .maxlen = sizeof(int), 641 .mode = 0644, 642 .proc_handler = proc_dointvec 643 }, 644 { 645 .procname = "udp_early_demux", 646 .data = &init_net.ipv4.sysctl_udp_early_demux, 647 .maxlen = sizeof(int), 648 .mode = 0644, 649 .proc_handler = proc_udp_early_demux 650 }, 651 { 652 .procname = "tcp_early_demux", 653 .data = &init_net.ipv4.sysctl_tcp_early_demux, 654 .maxlen = sizeof(int), 655 .mode = 0644, 656 .proc_handler = proc_tcp_early_demux 657 }, 658 { 659 .procname = "ip_default_ttl", 660 .data = &init_net.ipv4.sysctl_ip_default_ttl, 661 .maxlen = sizeof(int), 662 .mode = 0644, 663 .proc_handler = proc_dointvec_minmax, 664 .extra1 = &ip_ttl_min, 665 .extra2 = &ip_ttl_max, 666 }, 667 { 668 .procname = "ip_local_port_range", 669 .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), 670 .data = &init_net.ipv4.ip_local_ports.range, 671 .mode = 0644, 672 .proc_handler = ipv4_local_port_range, 673 }, 674 { 675 .procname = "ip_local_reserved_ports", 676 .data = &init_net.ipv4.sysctl_local_reserved_ports, 677 .maxlen = 65536, 678 .mode = 0644, 679 .proc_handler = proc_do_large_bitmap, 680 }, 681 { 682 .procname = "ip_no_pmtu_disc", 683 .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc, 684 .maxlen = sizeof(int), 685 .mode = 0644, 686 .proc_handler = proc_dointvec 687 }, 688 { 689 .procname = "ip_forward_use_pmtu", 690 .data = &init_net.ipv4.sysctl_ip_fwd_use_pmtu, 691 .maxlen = sizeof(int), 692 .mode = 0644, 693 .proc_handler = proc_dointvec, 694 }, 695 { 696 .procname = "ip_forward_update_priority", 697 .data = &init_net.ipv4.sysctl_ip_fwd_update_priority, 698 .maxlen = sizeof(int), 699 .mode = 0644, 700 .proc_handler = ipv4_fwd_update_priority, 701 .extra1 = &zero, 702 .extra2 = &one, 703 }, 704 { 705 .procname = "ip_nonlocal_bind", 706 .data = &init_net.ipv4.sysctl_ip_nonlocal_bind, 707 .maxlen = sizeof(int), 708 .mode = 0644, 709 .proc_handler = proc_dointvec 710 }, 711 { 712 .procname = "fwmark_reflect", 713 .data = &init_net.ipv4.sysctl_fwmark_reflect, 714 .maxlen = sizeof(int), 715 .mode = 0644, 716 .proc_handler = proc_dointvec, 717 }, 718 { 719 .procname = "tcp_fwmark_accept", 720 .data = &init_net.ipv4.sysctl_tcp_fwmark_accept, 721 .maxlen = sizeof(int), 722 .mode = 0644, 723 .proc_handler = proc_dointvec, 724 }, 725 #ifdef CONFIG_NET_L3_MASTER_DEV 726 { 727 .procname = "tcp_l3mdev_accept", 728 .data = &init_net.ipv4.sysctl_tcp_l3mdev_accept, 729 .maxlen = sizeof(int), 730 .mode = 0644, 731 .proc_handler = proc_dointvec_minmax, 732 .extra1 = &zero, 733 .extra2 = &one, 734 }, 735 #endif 736 { 737 .procname = "tcp_mtu_probing", 738 .data = &init_net.ipv4.sysctl_tcp_mtu_probing, 739 .maxlen = sizeof(int), 740 .mode = 0644, 741 .proc_handler = proc_dointvec, 742 }, 743 { 744 .procname = "tcp_base_mss", 745 .data = &init_net.ipv4.sysctl_tcp_base_mss, 746 .maxlen = sizeof(int), 747 .mode = 0644, 748 .proc_handler = proc_dointvec, 749 }, 750 { 751 .procname = "tcp_probe_threshold", 752 .data = &init_net.ipv4.sysctl_tcp_probe_threshold, 753 .maxlen = sizeof(int), 754 .mode = 0644, 755 .proc_handler = proc_dointvec, 756 }, 757 { 758 .procname = "tcp_probe_interval", 759 .data = &init_net.ipv4.sysctl_tcp_probe_interval, 760 .maxlen = sizeof(u32), 761 .mode = 0644, 762 .proc_handler = proc_douintvec_minmax, 763 .extra2 = &u32_max_div_HZ, 764 }, 765 { 766 .procname = "igmp_link_local_mcast_reports", 767 .data = &init_net.ipv4.sysctl_igmp_llm_reports, 768 .maxlen = sizeof(int), 769 .mode = 0644, 770 .proc_handler = proc_dointvec 771 }, 772 { 773 .procname = "igmp_max_memberships", 774 .data = &init_net.ipv4.sysctl_igmp_max_memberships, 775 .maxlen = sizeof(int), 776 .mode = 0644, 777 .proc_handler = proc_dointvec 778 }, 779 { 780 .procname = "igmp_max_msf", 781 .data = &init_net.ipv4.sysctl_igmp_max_msf, 782 .maxlen = sizeof(int), 783 .mode = 0644, 784 .proc_handler = proc_dointvec 785 }, 786 #ifdef CONFIG_IP_MULTICAST 787 { 788 .procname = "igmp_qrv", 789 .data = &init_net.ipv4.sysctl_igmp_qrv, 790 .maxlen = sizeof(int), 791 .mode = 0644, 792 .proc_handler = proc_dointvec_minmax, 793 .extra1 = &one 794 }, 795 #endif 796 { 797 .procname = "tcp_congestion_control", 798 .data = &init_net.ipv4.tcp_congestion_control, 799 .mode = 0644, 800 .maxlen = TCP_CA_NAME_MAX, 801 .proc_handler = proc_tcp_congestion_control, 802 }, 803 { 804 .procname = "tcp_keepalive_time", 805 .data = &init_net.ipv4.sysctl_tcp_keepalive_time, 806 .maxlen = sizeof(int), 807 .mode = 0644, 808 .proc_handler = proc_dointvec_jiffies, 809 }, 810 { 811 .procname = "tcp_keepalive_probes", 812 .data = &init_net.ipv4.sysctl_tcp_keepalive_probes, 813 .maxlen = sizeof(int), 814 .mode = 0644, 815 .proc_handler = proc_dointvec 816 }, 817 { 818 .procname = "tcp_keepalive_intvl", 819 .data = &init_net.ipv4.sysctl_tcp_keepalive_intvl, 820 .maxlen = sizeof(int), 821 .mode = 0644, 822 .proc_handler = proc_dointvec_jiffies, 823 }, 824 { 825 .procname = "tcp_syn_retries", 826 .data = &init_net.ipv4.sysctl_tcp_syn_retries, 827 .maxlen = sizeof(int), 828 .mode = 0644, 829 .proc_handler = proc_dointvec_minmax, 830 .extra1 = &tcp_syn_retries_min, 831 .extra2 = &tcp_syn_retries_max 832 }, 833 { 834 .procname = "tcp_synack_retries", 835 .data = &init_net.ipv4.sysctl_tcp_synack_retries, 836 .maxlen = sizeof(int), 837 .mode = 0644, 838 .proc_handler = proc_dointvec 839 }, 840 #ifdef CONFIG_SYN_COOKIES 841 { 842 .procname = "tcp_syncookies", 843 .data = &init_net.ipv4.sysctl_tcp_syncookies, 844 .maxlen = sizeof(int), 845 .mode = 0644, 846 .proc_handler = proc_dointvec 847 }, 848 #endif 849 { 850 .procname = "tcp_reordering", 851 .data = &init_net.ipv4.sysctl_tcp_reordering, 852 .maxlen = sizeof(int), 853 .mode = 0644, 854 .proc_handler = proc_dointvec 855 }, 856 { 857 .procname = "tcp_retries1", 858 .data = &init_net.ipv4.sysctl_tcp_retries1, 859 .maxlen = sizeof(int), 860 .mode = 0644, 861 .proc_handler = proc_dointvec_minmax, 862 .extra2 = &tcp_retr1_max 863 }, 864 { 865 .procname = "tcp_retries2", 866 .data = &init_net.ipv4.sysctl_tcp_retries2, 867 .maxlen = sizeof(int), 868 .mode = 0644, 869 .proc_handler = proc_dointvec 870 }, 871 { 872 .procname = "tcp_orphan_retries", 873 .data = &init_net.ipv4.sysctl_tcp_orphan_retries, 874 .maxlen = sizeof(int), 875 .mode = 0644, 876 .proc_handler = proc_dointvec 877 }, 878 { 879 .procname = "tcp_fin_timeout", 880 .data = &init_net.ipv4.sysctl_tcp_fin_timeout, 881 .maxlen = sizeof(int), 882 .mode = 0644, 883 .proc_handler = proc_dointvec_jiffies, 884 }, 885 { 886 .procname = "tcp_notsent_lowat", 887 .data = &init_net.ipv4.sysctl_tcp_notsent_lowat, 888 .maxlen = sizeof(unsigned int), 889 .mode = 0644, 890 .proc_handler = proc_douintvec, 891 }, 892 { 893 .procname = "tcp_tw_reuse", 894 .data = &init_net.ipv4.sysctl_tcp_tw_reuse, 895 .maxlen = sizeof(int), 896 .mode = 0644, 897 .proc_handler = proc_dointvec_minmax, 898 .extra1 = &zero, 899 .extra2 = &two, 900 }, 901 { 902 .procname = "tcp_max_tw_buckets", 903 .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, 904 .maxlen = sizeof(int), 905 .mode = 0644, 906 .proc_handler = proc_dointvec 907 }, 908 { 909 .procname = "tcp_max_syn_backlog", 910 .data = &init_net.ipv4.sysctl_max_syn_backlog, 911 .maxlen = sizeof(int), 912 .mode = 0644, 913 .proc_handler = proc_dointvec 914 }, 915 { 916 .procname = "tcp_fastopen", 917 .data = &init_net.ipv4.sysctl_tcp_fastopen, 918 .maxlen = sizeof(int), 919 .mode = 0644, 920 .proc_handler = proc_dointvec, 921 }, 922 { 923 .procname = "tcp_fastopen_key", 924 .mode = 0600, 925 .data = &init_net.ipv4.sysctl_tcp_fastopen, 926 .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), 927 .proc_handler = proc_tcp_fastopen_key, 928 }, 929 { 930 .procname = "tcp_fastopen_blackhole_timeout_sec", 931 .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout, 932 .maxlen = sizeof(int), 933 .mode = 0644, 934 .proc_handler = proc_tfo_blackhole_detect_timeout, 935 .extra1 = &zero, 936 }, 937 #ifdef CONFIG_IP_ROUTE_MULTIPATH 938 { 939 .procname = "fib_multipath_use_neigh", 940 .data = &init_net.ipv4.sysctl_fib_multipath_use_neigh, 941 .maxlen = sizeof(int), 942 .mode = 0644, 943 .proc_handler = proc_dointvec_minmax, 944 .extra1 = &zero, 945 .extra2 = &one, 946 }, 947 { 948 .procname = "fib_multipath_hash_policy", 949 .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy, 950 .maxlen = sizeof(int), 951 .mode = 0644, 952 .proc_handler = proc_fib_multipath_hash_policy, 953 .extra1 = &zero, 954 .extra2 = &one, 955 }, 956 #endif 957 { 958 .procname = "ip_unprivileged_port_start", 959 .maxlen = sizeof(int), 960 .data = &init_net.ipv4.sysctl_ip_prot_sock, 961 .mode = 0644, 962 .proc_handler = ipv4_privileged_ports, 963 }, 964 #ifdef CONFIG_NET_L3_MASTER_DEV 965 { 966 .procname = "udp_l3mdev_accept", 967 .data = &init_net.ipv4.sysctl_udp_l3mdev_accept, 968 .maxlen = sizeof(int), 969 .mode = 0644, 970 .proc_handler = proc_dointvec_minmax, 971 .extra1 = &zero, 972 .extra2 = &one, 973 }, 974 #endif 975 { 976 .procname = "tcp_sack", 977 .data = &init_net.ipv4.sysctl_tcp_sack, 978 .maxlen = sizeof(int), 979 .mode = 0644, 980 .proc_handler = proc_dointvec 981 }, 982 { 983 .procname = "tcp_window_scaling", 984 .data = &init_net.ipv4.sysctl_tcp_window_scaling, 985 .maxlen = sizeof(int), 986 .mode = 0644, 987 .proc_handler = proc_dointvec 988 }, 989 { 990 .procname = "tcp_timestamps", 991 .data = &init_net.ipv4.sysctl_tcp_timestamps, 992 .maxlen = sizeof(int), 993 .mode = 0644, 994 .proc_handler = proc_dointvec 995 }, 996 { 997 .procname = "tcp_early_retrans", 998 .data = &init_net.ipv4.sysctl_tcp_early_retrans, 999 .maxlen = sizeof(int), 1000 .mode = 0644, 1001 .proc_handler = proc_dointvec_minmax, 1002 .extra1 = &zero, 1003 .extra2 = &four, 1004 }, 1005 { 1006 .procname = "tcp_recovery", 1007 .data = &init_net.ipv4.sysctl_tcp_recovery, 1008 .maxlen = sizeof(int), 1009 .mode = 0644, 1010 .proc_handler = proc_dointvec, 1011 }, 1012 { 1013 .procname = "tcp_thin_linear_timeouts", 1014 .data = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts, 1015 .maxlen = sizeof(int), 1016 .mode = 0644, 1017 .proc_handler = proc_dointvec 1018 }, 1019 { 1020 .procname = "tcp_slow_start_after_idle", 1021 .data = &init_net.ipv4.sysctl_tcp_slow_start_after_idle, 1022 .maxlen = sizeof(int), 1023 .mode = 0644, 1024 .proc_handler = proc_dointvec 1025 }, 1026 { 1027 .procname = "tcp_retrans_collapse", 1028 .data = &init_net.ipv4.sysctl_tcp_retrans_collapse, 1029 .maxlen = sizeof(int), 1030 .mode = 0644, 1031 .proc_handler = proc_dointvec 1032 }, 1033 { 1034 .procname = "tcp_stdurg", 1035 .data = &init_net.ipv4.sysctl_tcp_stdurg, 1036 .maxlen = sizeof(int), 1037 .mode = 0644, 1038 .proc_handler = proc_dointvec 1039 }, 1040 { 1041 .procname = "tcp_rfc1337", 1042 .data = &init_net.ipv4.sysctl_tcp_rfc1337, 1043 .maxlen = sizeof(int), 1044 .mode = 0644, 1045 .proc_handler = proc_dointvec 1046 }, 1047 { 1048 .procname = "tcp_abort_on_overflow", 1049 .data = &init_net.ipv4.sysctl_tcp_abort_on_overflow, 1050 .maxlen = sizeof(int), 1051 .mode = 0644, 1052 .proc_handler = proc_dointvec 1053 }, 1054 { 1055 .procname = "tcp_fack", 1056 .data = &init_net.ipv4.sysctl_tcp_fack, 1057 .maxlen = sizeof(int), 1058 .mode = 0644, 1059 .proc_handler = proc_dointvec 1060 }, 1061 { 1062 .procname = "tcp_max_reordering", 1063 .data = &init_net.ipv4.sysctl_tcp_max_reordering, 1064 .maxlen = sizeof(int), 1065 .mode = 0644, 1066 .proc_handler = proc_dointvec 1067 }, 1068 { 1069 .procname = "tcp_dsack", 1070 .data = &init_net.ipv4.sysctl_tcp_dsack, 1071 .maxlen = sizeof(int), 1072 .mode = 0644, 1073 .proc_handler = proc_dointvec 1074 }, 1075 { 1076 .procname = "tcp_app_win", 1077 .data = &init_net.ipv4.sysctl_tcp_app_win, 1078 .maxlen = sizeof(int), 1079 .mode = 0644, 1080 .proc_handler = proc_dointvec 1081 }, 1082 { 1083 .procname = "tcp_adv_win_scale", 1084 .data = &init_net.ipv4.sysctl_tcp_adv_win_scale, 1085 .maxlen = sizeof(int), 1086 .mode = 0644, 1087 .proc_handler = proc_dointvec_minmax, 1088 .extra1 = &tcp_adv_win_scale_min, 1089 .extra2 = &tcp_adv_win_scale_max, 1090 }, 1091 { 1092 .procname = "tcp_frto", 1093 .data = &init_net.ipv4.sysctl_tcp_frto, 1094 .maxlen = sizeof(int), 1095 .mode = 0644, 1096 .proc_handler = proc_dointvec 1097 }, 1098 { 1099 .procname = "tcp_no_metrics_save", 1100 .data = &init_net.ipv4.sysctl_tcp_nometrics_save, 1101 .maxlen = sizeof(int), 1102 .mode = 0644, 1103 .proc_handler = proc_dointvec, 1104 }, 1105 { 1106 .procname = "tcp_moderate_rcvbuf", 1107 .data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf, 1108 .maxlen = sizeof(int), 1109 .mode = 0644, 1110 .proc_handler = proc_dointvec, 1111 }, 1112 { 1113 .procname = "tcp_tso_win_divisor", 1114 .data = &init_net.ipv4.sysctl_tcp_tso_win_divisor, 1115 .maxlen = sizeof(int), 1116 .mode = 0644, 1117 .proc_handler = proc_dointvec, 1118 }, 1119 { 1120 .procname = "tcp_workaround_signed_windows", 1121 .data = &init_net.ipv4.sysctl_tcp_workaround_signed_windows, 1122 .maxlen = sizeof(int), 1123 .mode = 0644, 1124 .proc_handler = proc_dointvec 1125 }, 1126 { 1127 .procname = "tcp_limit_output_bytes", 1128 .data = &init_net.ipv4.sysctl_tcp_limit_output_bytes, 1129 .maxlen = sizeof(int), 1130 .mode = 0644, 1131 .proc_handler = proc_dointvec 1132 }, 1133 { 1134 .procname = "tcp_challenge_ack_limit", 1135 .data = &init_net.ipv4.sysctl_tcp_challenge_ack_limit, 1136 .maxlen = sizeof(int), 1137 .mode = 0644, 1138 .proc_handler = proc_dointvec 1139 }, 1140 { 1141 .procname = "tcp_min_tso_segs", 1142 .data = &init_net.ipv4.sysctl_tcp_min_tso_segs, 1143 .maxlen = sizeof(int), 1144 .mode = 0644, 1145 .proc_handler = proc_dointvec_minmax, 1146 .extra1 = &one, 1147 .extra2 = &gso_max_segs, 1148 }, 1149 { 1150 .procname = "tcp_min_rtt_wlen", 1151 .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen, 1152 .maxlen = sizeof(int), 1153 .mode = 0644, 1154 .proc_handler = proc_dointvec 1155 }, 1156 { 1157 .procname = "tcp_autocorking", 1158 .data = &init_net.ipv4.sysctl_tcp_autocorking, 1159 .maxlen = sizeof(int), 1160 .mode = 0644, 1161 .proc_handler = proc_dointvec_minmax, 1162 .extra1 = &zero, 1163 .extra2 = &one, 1164 }, 1165 { 1166 .procname = "tcp_invalid_ratelimit", 1167 .data = &init_net.ipv4.sysctl_tcp_invalid_ratelimit, 1168 .maxlen = sizeof(int), 1169 .mode = 0644, 1170 .proc_handler = proc_dointvec_ms_jiffies, 1171 }, 1172 { 1173 .procname = "tcp_pacing_ss_ratio", 1174 .data = &init_net.ipv4.sysctl_tcp_pacing_ss_ratio, 1175 .maxlen = sizeof(int), 1176 .mode = 0644, 1177 .proc_handler = proc_dointvec_minmax, 1178 .extra1 = &zero, 1179 .extra2 = &thousand, 1180 }, 1181 { 1182 .procname = "tcp_pacing_ca_ratio", 1183 .data = &init_net.ipv4.sysctl_tcp_pacing_ca_ratio, 1184 .maxlen = sizeof(int), 1185 .mode = 0644, 1186 .proc_handler = proc_dointvec_minmax, 1187 .extra1 = &zero, 1188 .extra2 = &thousand, 1189 }, 1190 { 1191 .procname = "tcp_wmem", 1192 .data = &init_net.ipv4.sysctl_tcp_wmem, 1193 .maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem), 1194 .mode = 0644, 1195 .proc_handler = proc_dointvec_minmax, 1196 .extra1 = &one, 1197 }, 1198 { 1199 .procname = "tcp_rmem", 1200 .data = &init_net.ipv4.sysctl_tcp_rmem, 1201 .maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem), 1202 .mode = 0644, 1203 .proc_handler = proc_dointvec_minmax, 1204 .extra1 = &one, 1205 }, 1206 { 1207 .procname = "tcp_comp_sack_delay_ns", 1208 .data = &init_net.ipv4.sysctl_tcp_comp_sack_delay_ns, 1209 .maxlen = sizeof(unsigned long), 1210 .mode = 0644, 1211 .proc_handler = proc_doulongvec_minmax, 1212 }, 1213 { 1214 .procname = "tcp_comp_sack_nr", 1215 .data = &init_net.ipv4.sysctl_tcp_comp_sack_nr, 1216 .maxlen = sizeof(int), 1217 .mode = 0644, 1218 .proc_handler = proc_dointvec_minmax, 1219 .extra1 = &zero, 1220 .extra2 = &comp_sack_nr_max, 1221 }, 1222 { 1223 .procname = "udp_rmem_min", 1224 .data = &init_net.ipv4.sysctl_udp_rmem_min, 1225 .maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min), 1226 .mode = 0644, 1227 .proc_handler = proc_dointvec_minmax, 1228 .extra1 = &one 1229 }, 1230 { 1231 .procname = "udp_wmem_min", 1232 .data = &init_net.ipv4.sysctl_udp_wmem_min, 1233 .maxlen = sizeof(init_net.ipv4.sysctl_udp_wmem_min), 1234 .mode = 0644, 1235 .proc_handler = proc_dointvec_minmax, 1236 .extra1 = &one 1237 }, 1238 { } 1239 }; 1240 1241 static __net_init int ipv4_sysctl_init_net(struct net *net) 1242 { 1243 struct ctl_table *table; 1244 1245 table = ipv4_net_table; 1246 if (!net_eq(net, &init_net)) { 1247 int i; 1248 1249 table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL); 1250 if (!table) 1251 goto err_alloc; 1252 1253 /* Update the variables to point into the current struct net */ 1254 for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) 1255 table[i].data += (void *)net - (void *)&init_net; 1256 } 1257 1258 net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); 1259 if (!net->ipv4.ipv4_hdr) 1260 goto err_reg; 1261 1262 net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); 1263 if (!net->ipv4.sysctl_local_reserved_ports) 1264 goto err_ports; 1265 1266 return 0; 1267 1268 err_ports: 1269 unregister_net_sysctl_table(net->ipv4.ipv4_hdr); 1270 err_reg: 1271 if (!net_eq(net, &init_net)) 1272 kfree(table); 1273 err_alloc: 1274 return -ENOMEM; 1275 } 1276 1277 static __net_exit void ipv4_sysctl_exit_net(struct net *net) 1278 { 1279 struct ctl_table *table; 1280 1281 kfree(net->ipv4.sysctl_local_reserved_ports); 1282 table = net->ipv4.ipv4_hdr->ctl_table_arg; 1283 unregister_net_sysctl_table(net->ipv4.ipv4_hdr); 1284 kfree(table); 1285 } 1286 1287 static __net_initdata struct pernet_operations ipv4_sysctl_ops = { 1288 .init = ipv4_sysctl_init_net, 1289 .exit = ipv4_sysctl_exit_net, 1290 }; 1291 1292 static __init int sysctl_ipv4_init(void) 1293 { 1294 struct ctl_table_header *hdr; 1295 1296 hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table); 1297 if (!hdr) 1298 return -ENOMEM; 1299 1300 if (register_pernet_subsys(&ipv4_sysctl_ops)) { 1301 unregister_net_sysctl_table(hdr); 1302 return -ENOMEM; 1303 } 1304 1305 return 0; 1306 } 1307 1308 __initcall(sysctl_ipv4_init); 1309