1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem. 4 * 5 * Begun April 1, 1996, Mike Shaver. 6 * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS] 7 */ 8 9 #include <linux/mm.h> 10 #include <linux/module.h> 11 #include <linux/sysctl.h> 12 #include <linux/igmp.h> 13 #include <linux/inetdevice.h> 14 #include <linux/seqlock.h> 15 #include <linux/init.h> 16 #include <linux/slab.h> 17 #include <linux/nsproxy.h> 18 #include <linux/swap.h> 19 #include <net/snmp.h> 20 #include <net/icmp.h> 21 #include <net/ip.h> 22 #include <net/route.h> 23 #include <net/tcp.h> 24 #include <net/udp.h> 25 #include <net/cipso_ipv4.h> 26 #include <net/inet_frag.h> 27 #include <net/ping.h> 28 #include <net/protocol.h> 29 #include <net/netevent.h> 30 31 static int zero; 32 static int one = 1; 33 static int two = 2; 34 static int four = 4; 35 static int thousand = 1000; 36 static int gso_max_segs = GSO_MAX_SEGS; 37 static int tcp_retr1_max = 255; 38 static int ip_local_port_range_min[] = { 1, 1 }; 39 static int ip_local_port_range_max[] = { 65535, 65535 }; 40 static int tcp_adv_win_scale_min = -31; 41 static int tcp_adv_win_scale_max = 31; 42 static int ip_privileged_port_min; 43 static int ip_privileged_port_max = 65535; 44 static int ip_ttl_min = 1; 45 static int ip_ttl_max = 255; 46 static int tcp_syn_retries_min = 1; 47 static int tcp_syn_retries_max = MAX_TCP_SYNCNT; 48 static int ip_ping_group_range_min[] = { 0, 0 }; 49 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; 50 static int comp_sack_nr_max = 255; 51 static u32 u32_max_div_HZ = UINT_MAX / HZ; 52 53 /* obsolete */ 54 static int sysctl_tcp_low_latency __read_mostly; 55 56 /* Update system visible IP port range */ 57 static void set_local_port_range(struct net *net, int range[2]) 58 { 59 bool same_parity = !((range[0] ^ range[1]) & 1); 60 61 write_seqlock_bh(&net->ipv4.ip_local_ports.lock); 62 if (same_parity && !net->ipv4.ip_local_ports.warned) { 63 net->ipv4.ip_local_ports.warned = true; 64 pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n"); 65 } 66 net->ipv4.ip_local_ports.range[0] = range[0]; 67 net->ipv4.ip_local_ports.range[1] = range[1]; 68 write_sequnlock_bh(&net->ipv4.ip_local_ports.lock); 69 } 70 71 /* Validate changes from /proc interface. */ 72 static int ipv4_local_port_range(struct ctl_table *table, int write, 73 void __user *buffer, 74 size_t *lenp, loff_t *ppos) 75 { 76 struct net *net = 77 container_of(table->data, struct net, ipv4.ip_local_ports.range); 78 int ret; 79 int range[2]; 80 struct ctl_table tmp = { 81 .data = &range, 82 .maxlen = sizeof(range), 83 .mode = table->mode, 84 .extra1 = &ip_local_port_range_min, 85 .extra2 = &ip_local_port_range_max, 86 }; 87 88 inet_get_local_port_range(net, &range[0], &range[1]); 89 90 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 91 92 if (write && ret == 0) { 93 /* Ensure that the upper limit is not smaller than the lower, 94 * and that the lower does not encroach upon the privileged 95 * port limit. 96 */ 97 if ((range[1] < range[0]) || 98 (range[0] < net->ipv4.sysctl_ip_prot_sock)) 99 ret = -EINVAL; 100 else 101 set_local_port_range(net, range); 102 } 103 104 return ret; 105 } 106 107 /* Validate changes from /proc interface. */ 108 static int ipv4_privileged_ports(struct ctl_table *table, int write, 109 void __user *buffer, size_t *lenp, loff_t *ppos) 110 { 111 struct net *net = container_of(table->data, struct net, 112 ipv4.sysctl_ip_prot_sock); 113 int ret; 114 int pports; 115 int range[2]; 116 struct ctl_table tmp = { 117 .data = &pports, 118 .maxlen = sizeof(pports), 119 .mode = table->mode, 120 .extra1 = &ip_privileged_port_min, 121 .extra2 = &ip_privileged_port_max, 122 }; 123 124 pports = net->ipv4.sysctl_ip_prot_sock; 125 126 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 127 128 if (write && ret == 0) { 129 inet_get_local_port_range(net, &range[0], &range[1]); 130 /* Ensure that the local port range doesn't overlap with the 131 * privileged port range. 132 */ 133 if (range[0] < pports) 134 ret = -EINVAL; 135 else 136 net->ipv4.sysctl_ip_prot_sock = pports; 137 } 138 139 return ret; 140 } 141 142 static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high) 143 { 144 kgid_t *data = table->data; 145 struct net *net = 146 container_of(table->data, struct net, ipv4.ping_group_range.range); 147 unsigned int seq; 148 do { 149 seq = read_seqbegin(&net->ipv4.ping_group_range.lock); 150 151 *low = data[0]; 152 *high = data[1]; 153 } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); 154 } 155 156 /* Update system visible IP port range */ 157 static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high) 158 { 159 kgid_t *data = table->data; 160 struct net *net = 161 container_of(table->data, struct net, ipv4.ping_group_range.range); 162 write_seqlock(&net->ipv4.ping_group_range.lock); 163 data[0] = low; 164 data[1] = high; 165 write_sequnlock(&net->ipv4.ping_group_range.lock); 166 } 167 168 /* Validate changes from /proc interface. */ 169 static int ipv4_ping_group_range(struct ctl_table *table, int write, 170 void __user *buffer, 171 size_t *lenp, loff_t *ppos) 172 { 173 struct user_namespace *user_ns = current_user_ns(); 174 int ret; 175 gid_t urange[2]; 176 kgid_t low, high; 177 struct ctl_table tmp = { 178 .data = &urange, 179 .maxlen = sizeof(urange), 180 .mode = table->mode, 181 .extra1 = &ip_ping_group_range_min, 182 .extra2 = &ip_ping_group_range_max, 183 }; 184 185 inet_get_ping_group_range_table(table, &low, &high); 186 urange[0] = from_kgid_munged(user_ns, low); 187 urange[1] = from_kgid_munged(user_ns, high); 188 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 189 190 if (write && ret == 0) { 191 low = make_kgid(user_ns, urange[0]); 192 high = make_kgid(user_ns, urange[1]); 193 if (!gid_valid(low) || !gid_valid(high)) 194 return -EINVAL; 195 if (urange[1] < urange[0] || gid_lt(high, low)) { 196 low = make_kgid(&init_user_ns, 1); 197 high = make_kgid(&init_user_ns, 0); 198 } 199 set_ping_group_range(table, low, high); 200 } 201 202 return ret; 203 } 204 205 static int ipv4_fwd_update_priority(struct ctl_table *table, int write, 206 void __user *buffer, 207 size_t *lenp, loff_t *ppos) 208 { 209 struct net *net; 210 int ret; 211 212 net = container_of(table->data, struct net, 213 ipv4.sysctl_ip_fwd_update_priority); 214 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 215 if (write && ret == 0) 216 call_netevent_notifiers(NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE, 217 net); 218 219 return ret; 220 } 221 222 static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, 223 void __user *buffer, size_t *lenp, loff_t *ppos) 224 { 225 struct net *net = container_of(ctl->data, struct net, 226 ipv4.tcp_congestion_control); 227 char val[TCP_CA_NAME_MAX]; 228 struct ctl_table tbl = { 229 .data = val, 230 .maxlen = TCP_CA_NAME_MAX, 231 }; 232 int ret; 233 234 tcp_get_default_congestion_control(net, val); 235 236 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 237 if (write && ret == 0) 238 ret = tcp_set_default_congestion_control(net, val); 239 return ret; 240 } 241 242 static int proc_tcp_available_congestion_control(struct ctl_table *ctl, 243 int write, 244 void __user *buffer, size_t *lenp, 245 loff_t *ppos) 246 { 247 struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, }; 248 int ret; 249 250 tbl.data = kmalloc(tbl.maxlen, GFP_USER); 251 if (!tbl.data) 252 return -ENOMEM; 253 tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX); 254 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 255 kfree(tbl.data); 256 return ret; 257 } 258 259 static int proc_allowed_congestion_control(struct ctl_table *ctl, 260 int write, 261 void __user *buffer, size_t *lenp, 262 loff_t *ppos) 263 { 264 struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; 265 int ret; 266 267 tbl.data = kmalloc(tbl.maxlen, GFP_USER); 268 if (!tbl.data) 269 return -ENOMEM; 270 271 tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen); 272 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 273 if (write && ret == 0) 274 ret = tcp_set_allowed_congestion_control(tbl.data); 275 kfree(tbl.data); 276 return ret; 277 } 278 279 static int proc_tcp_fastopen_key(struct ctl_table *table, int write, 280 void __user *buffer, size_t *lenp, 281 loff_t *ppos) 282 { 283 struct net *net = container_of(table->data, struct net, 284 ipv4.sysctl_tcp_fastopen); 285 struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; 286 struct tcp_fastopen_context *ctxt; 287 u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ 288 __le32 key[4]; 289 int ret, i; 290 291 tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); 292 if (!tbl.data) 293 return -ENOMEM; 294 295 rcu_read_lock(); 296 ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); 297 if (ctxt) 298 memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); 299 else 300 memset(key, 0, sizeof(key)); 301 rcu_read_unlock(); 302 303 for (i = 0; i < ARRAY_SIZE(key); i++) 304 user_key[i] = le32_to_cpu(key[i]); 305 306 snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", 307 user_key[0], user_key[1], user_key[2], user_key[3]); 308 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 309 310 if (write && ret == 0) { 311 if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1, 312 user_key + 2, user_key + 3) != 4) { 313 ret = -EINVAL; 314 goto bad_key; 315 } 316 317 for (i = 0; i < ARRAY_SIZE(user_key); i++) 318 key[i] = cpu_to_le32(user_key[i]); 319 320 tcp_fastopen_reset_cipher(net, NULL, key, 321 TCP_FASTOPEN_KEY_LENGTH); 322 } 323 324 bad_key: 325 pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", 326 user_key[0], user_key[1], user_key[2], user_key[3], 327 (char *)tbl.data, ret); 328 kfree(tbl.data); 329 return ret; 330 } 331 332 static void proc_configure_early_demux(int enabled, int protocol) 333 { 334 struct net_protocol *ipprot; 335 #if IS_ENABLED(CONFIG_IPV6) 336 struct inet6_protocol *ip6prot; 337 #endif 338 339 rcu_read_lock(); 340 341 ipprot = rcu_dereference(inet_protos[protocol]); 342 if (ipprot) 343 ipprot->early_demux = enabled ? ipprot->early_demux_handler : 344 NULL; 345 346 #if IS_ENABLED(CONFIG_IPV6) 347 ip6prot = rcu_dereference(inet6_protos[protocol]); 348 if (ip6prot) 349 ip6prot->early_demux = enabled ? ip6prot->early_demux_handler : 350 NULL; 351 #endif 352 rcu_read_unlock(); 353 } 354 355 static int proc_tcp_early_demux(struct ctl_table *table, int write, 356 void __user *buffer, size_t *lenp, loff_t *ppos) 357 { 358 int ret = 0; 359 360 ret = proc_dointvec(table, write, buffer, lenp, ppos); 361 362 if (write && !ret) { 363 int enabled = init_net.ipv4.sysctl_tcp_early_demux; 364 365 proc_configure_early_demux(enabled, IPPROTO_TCP); 366 } 367 368 return ret; 369 } 370 371 static int proc_udp_early_demux(struct ctl_table *table, int write, 372 void __user *buffer, size_t *lenp, loff_t *ppos) 373 { 374 int ret = 0; 375 376 ret = proc_dointvec(table, write, buffer, lenp, ppos); 377 378 if (write && !ret) { 379 int enabled = init_net.ipv4.sysctl_udp_early_demux; 380 381 proc_configure_early_demux(enabled, IPPROTO_UDP); 382 } 383 384 return ret; 385 } 386 387 static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, 388 int write, 389 void __user *buffer, 390 size_t *lenp, loff_t *ppos) 391 { 392 struct net *net = container_of(table->data, struct net, 393 ipv4.sysctl_tcp_fastopen_blackhole_timeout); 394 int ret; 395 396 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 397 if (write && ret == 0) 398 atomic_set(&net->ipv4.tfo_active_disable_times, 0); 399 400 return ret; 401 } 402 403 static int proc_tcp_available_ulp(struct ctl_table *ctl, 404 int write, 405 void __user *buffer, size_t *lenp, 406 loff_t *ppos) 407 { 408 struct ctl_table tbl = { .maxlen = TCP_ULP_BUF_MAX, }; 409 int ret; 410 411 tbl.data = kmalloc(tbl.maxlen, GFP_USER); 412 if (!tbl.data) 413 return -ENOMEM; 414 tcp_get_available_ulp(tbl.data, TCP_ULP_BUF_MAX); 415 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 416 kfree(tbl.data); 417 418 return ret; 419 } 420 421 #ifdef CONFIG_IP_ROUTE_MULTIPATH 422 static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, 423 void __user *buffer, size_t *lenp, 424 loff_t *ppos) 425 { 426 struct net *net = container_of(table->data, struct net, 427 ipv4.sysctl_fib_multipath_hash_policy); 428 int ret; 429 430 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 431 if (write && ret == 0) 432 call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net); 433 434 return ret; 435 } 436 #endif 437 438 static struct ctl_table ipv4_table[] = { 439 { 440 .procname = "tcp_max_orphans", 441 .data = &sysctl_tcp_max_orphans, 442 .maxlen = sizeof(int), 443 .mode = 0644, 444 .proc_handler = proc_dointvec 445 }, 446 { 447 .procname = "inet_peer_threshold", 448 .data = &inet_peer_threshold, 449 .maxlen = sizeof(int), 450 .mode = 0644, 451 .proc_handler = proc_dointvec 452 }, 453 { 454 .procname = "inet_peer_minttl", 455 .data = &inet_peer_minttl, 456 .maxlen = sizeof(int), 457 .mode = 0644, 458 .proc_handler = proc_dointvec_jiffies, 459 }, 460 { 461 .procname = "inet_peer_maxttl", 462 .data = &inet_peer_maxttl, 463 .maxlen = sizeof(int), 464 .mode = 0644, 465 .proc_handler = proc_dointvec_jiffies, 466 }, 467 { 468 .procname = "tcp_mem", 469 .maxlen = sizeof(sysctl_tcp_mem), 470 .data = &sysctl_tcp_mem, 471 .mode = 0644, 472 .proc_handler = proc_doulongvec_minmax, 473 }, 474 { 475 .procname = "tcp_low_latency", 476 .data = &sysctl_tcp_low_latency, 477 .maxlen = sizeof(int), 478 .mode = 0644, 479 .proc_handler = proc_dointvec 480 }, 481 #ifdef CONFIG_NETLABEL 482 { 483 .procname = "cipso_cache_enable", 484 .data = &cipso_v4_cache_enabled, 485 .maxlen = sizeof(int), 486 .mode = 0644, 487 .proc_handler = proc_dointvec, 488 }, 489 { 490 .procname = "cipso_cache_bucket_size", 491 .data = &cipso_v4_cache_bucketsize, 492 .maxlen = sizeof(int), 493 .mode = 0644, 494 .proc_handler = proc_dointvec, 495 }, 496 { 497 .procname = "cipso_rbm_optfmt", 498 .data = &cipso_v4_rbm_optfmt, 499 .maxlen = sizeof(int), 500 .mode = 0644, 501 .proc_handler = proc_dointvec, 502 }, 503 { 504 .procname = "cipso_rbm_strictvalid", 505 .data = &cipso_v4_rbm_strictvalid, 506 .maxlen = sizeof(int), 507 .mode = 0644, 508 .proc_handler = proc_dointvec, 509 }, 510 #endif /* CONFIG_NETLABEL */ 511 { 512 .procname = "tcp_available_congestion_control", 513 .maxlen = TCP_CA_BUF_MAX, 514 .mode = 0444, 515 .proc_handler = proc_tcp_available_congestion_control, 516 }, 517 { 518 .procname = "tcp_allowed_congestion_control", 519 .maxlen = TCP_CA_BUF_MAX, 520 .mode = 0644, 521 .proc_handler = proc_allowed_congestion_control, 522 }, 523 { 524 .procname = "tcp_available_ulp", 525 .maxlen = TCP_ULP_BUF_MAX, 526 .mode = 0444, 527 .proc_handler = proc_tcp_available_ulp, 528 }, 529 { 530 .procname = "icmp_msgs_per_sec", 531 .data = &sysctl_icmp_msgs_per_sec, 532 .maxlen = sizeof(int), 533 .mode = 0644, 534 .proc_handler = proc_dointvec_minmax, 535 .extra1 = &zero, 536 }, 537 { 538 .procname = "icmp_msgs_burst", 539 .data = &sysctl_icmp_msgs_burst, 540 .maxlen = sizeof(int), 541 .mode = 0644, 542 .proc_handler = proc_dointvec_minmax, 543 .extra1 = &zero, 544 }, 545 { 546 .procname = "udp_mem", 547 .data = &sysctl_udp_mem, 548 .maxlen = sizeof(sysctl_udp_mem), 549 .mode = 0644, 550 .proc_handler = proc_doulongvec_minmax, 551 }, 552 { } 553 }; 554 555 static struct ctl_table ipv4_net_table[] = { 556 { 557 .procname = "icmp_echo_ignore_all", 558 .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, 559 .maxlen = sizeof(int), 560 .mode = 0644, 561 .proc_handler = proc_dointvec 562 }, 563 { 564 .procname = "icmp_echo_ignore_broadcasts", 565 .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, 566 .maxlen = sizeof(int), 567 .mode = 0644, 568 .proc_handler = proc_dointvec 569 }, 570 { 571 .procname = "icmp_ignore_bogus_error_responses", 572 .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, 573 .maxlen = sizeof(int), 574 .mode = 0644, 575 .proc_handler = proc_dointvec 576 }, 577 { 578 .procname = "icmp_errors_use_inbound_ifaddr", 579 .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, 580 .maxlen = sizeof(int), 581 .mode = 0644, 582 .proc_handler = proc_dointvec 583 }, 584 { 585 .procname = "icmp_ratelimit", 586 .data = &init_net.ipv4.sysctl_icmp_ratelimit, 587 .maxlen = sizeof(int), 588 .mode = 0644, 589 .proc_handler = proc_dointvec_ms_jiffies, 590 }, 591 { 592 .procname = "icmp_ratemask", 593 .data = &init_net.ipv4.sysctl_icmp_ratemask, 594 .maxlen = sizeof(int), 595 .mode = 0644, 596 .proc_handler = proc_dointvec 597 }, 598 { 599 .procname = "ping_group_range", 600 .data = &init_net.ipv4.ping_group_range.range, 601 .maxlen = sizeof(gid_t)*2, 602 .mode = 0644, 603 .proc_handler = ipv4_ping_group_range, 604 }, 605 { 606 .procname = "tcp_ecn", 607 .data = &init_net.ipv4.sysctl_tcp_ecn, 608 .maxlen = sizeof(int), 609 .mode = 0644, 610 .proc_handler = proc_dointvec 611 }, 612 { 613 .procname = "tcp_ecn_fallback", 614 .data = &init_net.ipv4.sysctl_tcp_ecn_fallback, 615 .maxlen = sizeof(int), 616 .mode = 0644, 617 .proc_handler = proc_dointvec 618 }, 619 { 620 .procname = "ip_dynaddr", 621 .data = &init_net.ipv4.sysctl_ip_dynaddr, 622 .maxlen = sizeof(int), 623 .mode = 0644, 624 .proc_handler = proc_dointvec 625 }, 626 { 627 .procname = "ip_early_demux", 628 .data = &init_net.ipv4.sysctl_ip_early_demux, 629 .maxlen = sizeof(int), 630 .mode = 0644, 631 .proc_handler = proc_dointvec 632 }, 633 { 634 .procname = "udp_early_demux", 635 .data = &init_net.ipv4.sysctl_udp_early_demux, 636 .maxlen = sizeof(int), 637 .mode = 0644, 638 .proc_handler = proc_udp_early_demux 639 }, 640 { 641 .procname = "tcp_early_demux", 642 .data = &init_net.ipv4.sysctl_tcp_early_demux, 643 .maxlen = sizeof(int), 644 .mode = 0644, 645 .proc_handler = proc_tcp_early_demux 646 }, 647 { 648 .procname = "ip_default_ttl", 649 .data = &init_net.ipv4.sysctl_ip_default_ttl, 650 .maxlen = sizeof(int), 651 .mode = 0644, 652 .proc_handler = proc_dointvec_minmax, 653 .extra1 = &ip_ttl_min, 654 .extra2 = &ip_ttl_max, 655 }, 656 { 657 .procname = "ip_local_port_range", 658 .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), 659 .data = &init_net.ipv4.ip_local_ports.range, 660 .mode = 0644, 661 .proc_handler = ipv4_local_port_range, 662 }, 663 { 664 .procname = "ip_local_reserved_ports", 665 .data = &init_net.ipv4.sysctl_local_reserved_ports, 666 .maxlen = 65536, 667 .mode = 0644, 668 .proc_handler = proc_do_large_bitmap, 669 }, 670 { 671 .procname = "ip_no_pmtu_disc", 672 .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc, 673 .maxlen = sizeof(int), 674 .mode = 0644, 675 .proc_handler = proc_dointvec 676 }, 677 { 678 .procname = "ip_forward_use_pmtu", 679 .data = &init_net.ipv4.sysctl_ip_fwd_use_pmtu, 680 .maxlen = sizeof(int), 681 .mode = 0644, 682 .proc_handler = proc_dointvec, 683 }, 684 { 685 .procname = "ip_forward_update_priority", 686 .data = &init_net.ipv4.sysctl_ip_fwd_update_priority, 687 .maxlen = sizeof(int), 688 .mode = 0644, 689 .proc_handler = ipv4_fwd_update_priority, 690 .extra1 = &zero, 691 .extra2 = &one, 692 }, 693 { 694 .procname = "ip_nonlocal_bind", 695 .data = &init_net.ipv4.sysctl_ip_nonlocal_bind, 696 .maxlen = sizeof(int), 697 .mode = 0644, 698 .proc_handler = proc_dointvec 699 }, 700 { 701 .procname = "fwmark_reflect", 702 .data = &init_net.ipv4.sysctl_fwmark_reflect, 703 .maxlen = sizeof(int), 704 .mode = 0644, 705 .proc_handler = proc_dointvec, 706 }, 707 { 708 .procname = "tcp_fwmark_accept", 709 .data = &init_net.ipv4.sysctl_tcp_fwmark_accept, 710 .maxlen = sizeof(int), 711 .mode = 0644, 712 .proc_handler = proc_dointvec, 713 }, 714 #ifdef CONFIG_NET_L3_MASTER_DEV 715 { 716 .procname = "tcp_l3mdev_accept", 717 .data = &init_net.ipv4.sysctl_tcp_l3mdev_accept, 718 .maxlen = sizeof(int), 719 .mode = 0644, 720 .proc_handler = proc_dointvec_minmax, 721 .extra1 = &zero, 722 .extra2 = &one, 723 }, 724 #endif 725 { 726 .procname = "tcp_mtu_probing", 727 .data = &init_net.ipv4.sysctl_tcp_mtu_probing, 728 .maxlen = sizeof(int), 729 .mode = 0644, 730 .proc_handler = proc_dointvec, 731 }, 732 { 733 .procname = "tcp_base_mss", 734 .data = &init_net.ipv4.sysctl_tcp_base_mss, 735 .maxlen = sizeof(int), 736 .mode = 0644, 737 .proc_handler = proc_dointvec, 738 }, 739 { 740 .procname = "tcp_probe_threshold", 741 .data = &init_net.ipv4.sysctl_tcp_probe_threshold, 742 .maxlen = sizeof(int), 743 .mode = 0644, 744 .proc_handler = proc_dointvec, 745 }, 746 { 747 .procname = "tcp_probe_interval", 748 .data = &init_net.ipv4.sysctl_tcp_probe_interval, 749 .maxlen = sizeof(u32), 750 .mode = 0644, 751 .proc_handler = proc_douintvec_minmax, 752 .extra2 = &u32_max_div_HZ, 753 }, 754 { 755 .procname = "igmp_link_local_mcast_reports", 756 .data = &init_net.ipv4.sysctl_igmp_llm_reports, 757 .maxlen = sizeof(int), 758 .mode = 0644, 759 .proc_handler = proc_dointvec 760 }, 761 { 762 .procname = "igmp_max_memberships", 763 .data = &init_net.ipv4.sysctl_igmp_max_memberships, 764 .maxlen = sizeof(int), 765 .mode = 0644, 766 .proc_handler = proc_dointvec 767 }, 768 { 769 .procname = "igmp_max_msf", 770 .data = &init_net.ipv4.sysctl_igmp_max_msf, 771 .maxlen = sizeof(int), 772 .mode = 0644, 773 .proc_handler = proc_dointvec 774 }, 775 #ifdef CONFIG_IP_MULTICAST 776 { 777 .procname = "igmp_qrv", 778 .data = &init_net.ipv4.sysctl_igmp_qrv, 779 .maxlen = sizeof(int), 780 .mode = 0644, 781 .proc_handler = proc_dointvec_minmax, 782 .extra1 = &one 783 }, 784 #endif 785 { 786 .procname = "tcp_congestion_control", 787 .data = &init_net.ipv4.tcp_congestion_control, 788 .mode = 0644, 789 .maxlen = TCP_CA_NAME_MAX, 790 .proc_handler = proc_tcp_congestion_control, 791 }, 792 { 793 .procname = "tcp_keepalive_time", 794 .data = &init_net.ipv4.sysctl_tcp_keepalive_time, 795 .maxlen = sizeof(int), 796 .mode = 0644, 797 .proc_handler = proc_dointvec_jiffies, 798 }, 799 { 800 .procname = "tcp_keepalive_probes", 801 .data = &init_net.ipv4.sysctl_tcp_keepalive_probes, 802 .maxlen = sizeof(int), 803 .mode = 0644, 804 .proc_handler = proc_dointvec 805 }, 806 { 807 .procname = "tcp_keepalive_intvl", 808 .data = &init_net.ipv4.sysctl_tcp_keepalive_intvl, 809 .maxlen = sizeof(int), 810 .mode = 0644, 811 .proc_handler = proc_dointvec_jiffies, 812 }, 813 { 814 .procname = "tcp_syn_retries", 815 .data = &init_net.ipv4.sysctl_tcp_syn_retries, 816 .maxlen = sizeof(int), 817 .mode = 0644, 818 .proc_handler = proc_dointvec_minmax, 819 .extra1 = &tcp_syn_retries_min, 820 .extra2 = &tcp_syn_retries_max 821 }, 822 { 823 .procname = "tcp_synack_retries", 824 .data = &init_net.ipv4.sysctl_tcp_synack_retries, 825 .maxlen = sizeof(int), 826 .mode = 0644, 827 .proc_handler = proc_dointvec 828 }, 829 #ifdef CONFIG_SYN_COOKIES 830 { 831 .procname = "tcp_syncookies", 832 .data = &init_net.ipv4.sysctl_tcp_syncookies, 833 .maxlen = sizeof(int), 834 .mode = 0644, 835 .proc_handler = proc_dointvec 836 }, 837 #endif 838 { 839 .procname = "tcp_reordering", 840 .data = &init_net.ipv4.sysctl_tcp_reordering, 841 .maxlen = sizeof(int), 842 .mode = 0644, 843 .proc_handler = proc_dointvec 844 }, 845 { 846 .procname = "tcp_retries1", 847 .data = &init_net.ipv4.sysctl_tcp_retries1, 848 .maxlen = sizeof(int), 849 .mode = 0644, 850 .proc_handler = proc_dointvec_minmax, 851 .extra2 = &tcp_retr1_max 852 }, 853 { 854 .procname = "tcp_retries2", 855 .data = &init_net.ipv4.sysctl_tcp_retries2, 856 .maxlen = sizeof(int), 857 .mode = 0644, 858 .proc_handler = proc_dointvec 859 }, 860 { 861 .procname = "tcp_orphan_retries", 862 .data = &init_net.ipv4.sysctl_tcp_orphan_retries, 863 .maxlen = sizeof(int), 864 .mode = 0644, 865 .proc_handler = proc_dointvec 866 }, 867 { 868 .procname = "tcp_fin_timeout", 869 .data = &init_net.ipv4.sysctl_tcp_fin_timeout, 870 .maxlen = sizeof(int), 871 .mode = 0644, 872 .proc_handler = proc_dointvec_jiffies, 873 }, 874 { 875 .procname = "tcp_notsent_lowat", 876 .data = &init_net.ipv4.sysctl_tcp_notsent_lowat, 877 .maxlen = sizeof(unsigned int), 878 .mode = 0644, 879 .proc_handler = proc_douintvec, 880 }, 881 { 882 .procname = "tcp_tw_reuse", 883 .data = &init_net.ipv4.sysctl_tcp_tw_reuse, 884 .maxlen = sizeof(int), 885 .mode = 0644, 886 .proc_handler = proc_dointvec_minmax, 887 .extra1 = &zero, 888 .extra2 = &two, 889 }, 890 { 891 .procname = "tcp_max_tw_buckets", 892 .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, 893 .maxlen = sizeof(int), 894 .mode = 0644, 895 .proc_handler = proc_dointvec 896 }, 897 { 898 .procname = "tcp_max_syn_backlog", 899 .data = &init_net.ipv4.sysctl_max_syn_backlog, 900 .maxlen = sizeof(int), 901 .mode = 0644, 902 .proc_handler = proc_dointvec 903 }, 904 { 905 .procname = "tcp_fastopen", 906 .data = &init_net.ipv4.sysctl_tcp_fastopen, 907 .maxlen = sizeof(int), 908 .mode = 0644, 909 .proc_handler = proc_dointvec, 910 }, 911 { 912 .procname = "tcp_fastopen_key", 913 .mode = 0600, 914 .data = &init_net.ipv4.sysctl_tcp_fastopen, 915 .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), 916 .proc_handler = proc_tcp_fastopen_key, 917 }, 918 { 919 .procname = "tcp_fastopen_blackhole_timeout_sec", 920 .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout, 921 .maxlen = sizeof(int), 922 .mode = 0644, 923 .proc_handler = proc_tfo_blackhole_detect_timeout, 924 .extra1 = &zero, 925 }, 926 #ifdef CONFIG_IP_ROUTE_MULTIPATH 927 { 928 .procname = "fib_multipath_use_neigh", 929 .data = &init_net.ipv4.sysctl_fib_multipath_use_neigh, 930 .maxlen = sizeof(int), 931 .mode = 0644, 932 .proc_handler = proc_dointvec_minmax, 933 .extra1 = &zero, 934 .extra2 = &one, 935 }, 936 { 937 .procname = "fib_multipath_hash_policy", 938 .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy, 939 .maxlen = sizeof(int), 940 .mode = 0644, 941 .proc_handler = proc_fib_multipath_hash_policy, 942 .extra1 = &zero, 943 .extra2 = &one, 944 }, 945 #endif 946 { 947 .procname = "ip_unprivileged_port_start", 948 .maxlen = sizeof(int), 949 .data = &init_net.ipv4.sysctl_ip_prot_sock, 950 .mode = 0644, 951 .proc_handler = ipv4_privileged_ports, 952 }, 953 #ifdef CONFIG_NET_L3_MASTER_DEV 954 { 955 .procname = "udp_l3mdev_accept", 956 .data = &init_net.ipv4.sysctl_udp_l3mdev_accept, 957 .maxlen = sizeof(int), 958 .mode = 0644, 959 .proc_handler = proc_dointvec_minmax, 960 .extra1 = &zero, 961 .extra2 = &one, 962 }, 963 #endif 964 { 965 .procname = "tcp_sack", 966 .data = &init_net.ipv4.sysctl_tcp_sack, 967 .maxlen = sizeof(int), 968 .mode = 0644, 969 .proc_handler = proc_dointvec 970 }, 971 { 972 .procname = "tcp_window_scaling", 973 .data = &init_net.ipv4.sysctl_tcp_window_scaling, 974 .maxlen = sizeof(int), 975 .mode = 0644, 976 .proc_handler = proc_dointvec 977 }, 978 { 979 .procname = "tcp_timestamps", 980 .data = &init_net.ipv4.sysctl_tcp_timestamps, 981 .maxlen = sizeof(int), 982 .mode = 0644, 983 .proc_handler = proc_dointvec 984 }, 985 { 986 .procname = "tcp_early_retrans", 987 .data = &init_net.ipv4.sysctl_tcp_early_retrans, 988 .maxlen = sizeof(int), 989 .mode = 0644, 990 .proc_handler = proc_dointvec_minmax, 991 .extra1 = &zero, 992 .extra2 = &four, 993 }, 994 { 995 .procname = "tcp_recovery", 996 .data = &init_net.ipv4.sysctl_tcp_recovery, 997 .maxlen = sizeof(int), 998 .mode = 0644, 999 .proc_handler = proc_dointvec, 1000 }, 1001 { 1002 .procname = "tcp_thin_linear_timeouts", 1003 .data = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts, 1004 .maxlen = sizeof(int), 1005 .mode = 0644, 1006 .proc_handler = proc_dointvec 1007 }, 1008 { 1009 .procname = "tcp_slow_start_after_idle", 1010 .data = &init_net.ipv4.sysctl_tcp_slow_start_after_idle, 1011 .maxlen = sizeof(int), 1012 .mode = 0644, 1013 .proc_handler = proc_dointvec 1014 }, 1015 { 1016 .procname = "tcp_retrans_collapse", 1017 .data = &init_net.ipv4.sysctl_tcp_retrans_collapse, 1018 .maxlen = sizeof(int), 1019 .mode = 0644, 1020 .proc_handler = proc_dointvec 1021 }, 1022 { 1023 .procname = "tcp_stdurg", 1024 .data = &init_net.ipv4.sysctl_tcp_stdurg, 1025 .maxlen = sizeof(int), 1026 .mode = 0644, 1027 .proc_handler = proc_dointvec 1028 }, 1029 { 1030 .procname = "tcp_rfc1337", 1031 .data = &init_net.ipv4.sysctl_tcp_rfc1337, 1032 .maxlen = sizeof(int), 1033 .mode = 0644, 1034 .proc_handler = proc_dointvec 1035 }, 1036 { 1037 .procname = "tcp_abort_on_overflow", 1038 .data = &init_net.ipv4.sysctl_tcp_abort_on_overflow, 1039 .maxlen = sizeof(int), 1040 .mode = 0644, 1041 .proc_handler = proc_dointvec 1042 }, 1043 { 1044 .procname = "tcp_fack", 1045 .data = &init_net.ipv4.sysctl_tcp_fack, 1046 .maxlen = sizeof(int), 1047 .mode = 0644, 1048 .proc_handler = proc_dointvec 1049 }, 1050 { 1051 .procname = "tcp_max_reordering", 1052 .data = &init_net.ipv4.sysctl_tcp_max_reordering, 1053 .maxlen = sizeof(int), 1054 .mode = 0644, 1055 .proc_handler = proc_dointvec 1056 }, 1057 { 1058 .procname = "tcp_dsack", 1059 .data = &init_net.ipv4.sysctl_tcp_dsack, 1060 .maxlen = sizeof(int), 1061 .mode = 0644, 1062 .proc_handler = proc_dointvec 1063 }, 1064 { 1065 .procname = "tcp_app_win", 1066 .data = &init_net.ipv4.sysctl_tcp_app_win, 1067 .maxlen = sizeof(int), 1068 .mode = 0644, 1069 .proc_handler = proc_dointvec 1070 }, 1071 { 1072 .procname = "tcp_adv_win_scale", 1073 .data = &init_net.ipv4.sysctl_tcp_adv_win_scale, 1074 .maxlen = sizeof(int), 1075 .mode = 0644, 1076 .proc_handler = proc_dointvec_minmax, 1077 .extra1 = &tcp_adv_win_scale_min, 1078 .extra2 = &tcp_adv_win_scale_max, 1079 }, 1080 { 1081 .procname = "tcp_frto", 1082 .data = &init_net.ipv4.sysctl_tcp_frto, 1083 .maxlen = sizeof(int), 1084 .mode = 0644, 1085 .proc_handler = proc_dointvec 1086 }, 1087 { 1088 .procname = "tcp_no_metrics_save", 1089 .data = &init_net.ipv4.sysctl_tcp_nometrics_save, 1090 .maxlen = sizeof(int), 1091 .mode = 0644, 1092 .proc_handler = proc_dointvec, 1093 }, 1094 { 1095 .procname = "tcp_moderate_rcvbuf", 1096 .data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf, 1097 .maxlen = sizeof(int), 1098 .mode = 0644, 1099 .proc_handler = proc_dointvec, 1100 }, 1101 { 1102 .procname = "tcp_tso_win_divisor", 1103 .data = &init_net.ipv4.sysctl_tcp_tso_win_divisor, 1104 .maxlen = sizeof(int), 1105 .mode = 0644, 1106 .proc_handler = proc_dointvec, 1107 }, 1108 { 1109 .procname = "tcp_workaround_signed_windows", 1110 .data = &init_net.ipv4.sysctl_tcp_workaround_signed_windows, 1111 .maxlen = sizeof(int), 1112 .mode = 0644, 1113 .proc_handler = proc_dointvec 1114 }, 1115 { 1116 .procname = "tcp_limit_output_bytes", 1117 .data = &init_net.ipv4.sysctl_tcp_limit_output_bytes, 1118 .maxlen = sizeof(int), 1119 .mode = 0644, 1120 .proc_handler = proc_dointvec 1121 }, 1122 { 1123 .procname = "tcp_challenge_ack_limit", 1124 .data = &init_net.ipv4.sysctl_tcp_challenge_ack_limit, 1125 .maxlen = sizeof(int), 1126 .mode = 0644, 1127 .proc_handler = proc_dointvec 1128 }, 1129 { 1130 .procname = "tcp_min_tso_segs", 1131 .data = &init_net.ipv4.sysctl_tcp_min_tso_segs, 1132 .maxlen = sizeof(int), 1133 .mode = 0644, 1134 .proc_handler = proc_dointvec_minmax, 1135 .extra1 = &one, 1136 .extra2 = &gso_max_segs, 1137 }, 1138 { 1139 .procname = "tcp_min_rtt_wlen", 1140 .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen, 1141 .maxlen = sizeof(int), 1142 .mode = 0644, 1143 .proc_handler = proc_dointvec 1144 }, 1145 { 1146 .procname = "tcp_autocorking", 1147 .data = &init_net.ipv4.sysctl_tcp_autocorking, 1148 .maxlen = sizeof(int), 1149 .mode = 0644, 1150 .proc_handler = proc_dointvec_minmax, 1151 .extra1 = &zero, 1152 .extra2 = &one, 1153 }, 1154 { 1155 .procname = "tcp_invalid_ratelimit", 1156 .data = &init_net.ipv4.sysctl_tcp_invalid_ratelimit, 1157 .maxlen = sizeof(int), 1158 .mode = 0644, 1159 .proc_handler = proc_dointvec_ms_jiffies, 1160 }, 1161 { 1162 .procname = "tcp_pacing_ss_ratio", 1163 .data = &init_net.ipv4.sysctl_tcp_pacing_ss_ratio, 1164 .maxlen = sizeof(int), 1165 .mode = 0644, 1166 .proc_handler = proc_dointvec_minmax, 1167 .extra1 = &zero, 1168 .extra2 = &thousand, 1169 }, 1170 { 1171 .procname = "tcp_pacing_ca_ratio", 1172 .data = &init_net.ipv4.sysctl_tcp_pacing_ca_ratio, 1173 .maxlen = sizeof(int), 1174 .mode = 0644, 1175 .proc_handler = proc_dointvec_minmax, 1176 .extra1 = &zero, 1177 .extra2 = &thousand, 1178 }, 1179 { 1180 .procname = "tcp_wmem", 1181 .data = &init_net.ipv4.sysctl_tcp_wmem, 1182 .maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem), 1183 .mode = 0644, 1184 .proc_handler = proc_dointvec_minmax, 1185 .extra1 = &one, 1186 }, 1187 { 1188 .procname = "tcp_rmem", 1189 .data = &init_net.ipv4.sysctl_tcp_rmem, 1190 .maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem), 1191 .mode = 0644, 1192 .proc_handler = proc_dointvec_minmax, 1193 .extra1 = &one, 1194 }, 1195 { 1196 .procname = "tcp_comp_sack_delay_ns", 1197 .data = &init_net.ipv4.sysctl_tcp_comp_sack_delay_ns, 1198 .maxlen = sizeof(unsigned long), 1199 .mode = 0644, 1200 .proc_handler = proc_doulongvec_minmax, 1201 }, 1202 { 1203 .procname = "tcp_comp_sack_nr", 1204 .data = &init_net.ipv4.sysctl_tcp_comp_sack_nr, 1205 .maxlen = sizeof(int), 1206 .mode = 0644, 1207 .proc_handler = proc_dointvec_minmax, 1208 .extra1 = &zero, 1209 .extra2 = &comp_sack_nr_max, 1210 }, 1211 { 1212 .procname = "udp_rmem_min", 1213 .data = &init_net.ipv4.sysctl_udp_rmem_min, 1214 .maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min), 1215 .mode = 0644, 1216 .proc_handler = proc_dointvec_minmax, 1217 .extra1 = &one 1218 }, 1219 { 1220 .procname = "udp_wmem_min", 1221 .data = &init_net.ipv4.sysctl_udp_wmem_min, 1222 .maxlen = sizeof(init_net.ipv4.sysctl_udp_wmem_min), 1223 .mode = 0644, 1224 .proc_handler = proc_dointvec_minmax, 1225 .extra1 = &one 1226 }, 1227 { } 1228 }; 1229 1230 static __net_init int ipv4_sysctl_init_net(struct net *net) 1231 { 1232 struct ctl_table *table; 1233 1234 table = ipv4_net_table; 1235 if (!net_eq(net, &init_net)) { 1236 int i; 1237 1238 table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL); 1239 if (!table) 1240 goto err_alloc; 1241 1242 /* Update the variables to point into the current struct net */ 1243 for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) 1244 table[i].data += (void *)net - (void *)&init_net; 1245 } 1246 1247 net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); 1248 if (!net->ipv4.ipv4_hdr) 1249 goto err_reg; 1250 1251 net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); 1252 if (!net->ipv4.sysctl_local_reserved_ports) 1253 goto err_ports; 1254 1255 return 0; 1256 1257 err_ports: 1258 unregister_net_sysctl_table(net->ipv4.ipv4_hdr); 1259 err_reg: 1260 if (!net_eq(net, &init_net)) 1261 kfree(table); 1262 err_alloc: 1263 return -ENOMEM; 1264 } 1265 1266 static __net_exit void ipv4_sysctl_exit_net(struct net *net) 1267 { 1268 struct ctl_table *table; 1269 1270 kfree(net->ipv4.sysctl_local_reserved_ports); 1271 table = net->ipv4.ipv4_hdr->ctl_table_arg; 1272 unregister_net_sysctl_table(net->ipv4.ipv4_hdr); 1273 kfree(table); 1274 } 1275 1276 static __net_initdata struct pernet_operations ipv4_sysctl_ops = { 1277 .init = ipv4_sysctl_init_net, 1278 .exit = ipv4_sysctl_exit_net, 1279 }; 1280 1281 static __init int sysctl_ipv4_init(void) 1282 { 1283 struct ctl_table_header *hdr; 1284 1285 hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table); 1286 if (!hdr) 1287 return -ENOMEM; 1288 1289 if (register_pernet_subsys(&ipv4_sysctl_ops)) { 1290 unregister_net_sysctl_table(hdr); 1291 return -ENOMEM; 1292 } 1293 1294 return 0; 1295 } 1296 1297 __initcall(sysctl_ipv4_init); 1298