xref: /openbmc/linux/net/ipv4/sysctl_net_ipv4.c (revision b24413180f5600bcb3bb70fbed5cf186b60864bd)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
4  *
5  * Begun April 1, 1996, Mike Shaver.
6  * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
7  */
8 
9 #include <linux/mm.h>
10 #include <linux/module.h>
11 #include <linux/sysctl.h>
12 #include <linux/igmp.h>
13 #include <linux/inetdevice.h>
14 #include <linux/seqlock.h>
15 #include <linux/init.h>
16 #include <linux/slab.h>
17 #include <linux/nsproxy.h>
18 #include <linux/swap.h>
19 #include <net/snmp.h>
20 #include <net/icmp.h>
21 #include <net/ip.h>
22 #include <net/route.h>
23 #include <net/tcp.h>
24 #include <net/udp.h>
25 #include <net/cipso_ipv4.h>
26 #include <net/inet_frag.h>
27 #include <net/ping.h>
28 #include <net/protocol.h>
29 
30 static int zero;
31 static int one = 1;
32 static int four = 4;
33 static int thousand = 1000;
34 static int gso_max_segs = GSO_MAX_SEGS;
35 static int tcp_retr1_max = 255;
36 static int ip_local_port_range_min[] = { 1, 1 };
37 static int ip_local_port_range_max[] = { 65535, 65535 };
38 static int tcp_adv_win_scale_min = -31;
39 static int tcp_adv_win_scale_max = 31;
40 static int ip_privileged_port_min;
41 static int ip_privileged_port_max = 65535;
42 static int ip_ttl_min = 1;
43 static int ip_ttl_max = 255;
44 static int tcp_syn_retries_min = 1;
45 static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
46 static int ip_ping_group_range_min[] = { 0, 0 };
47 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
48 
49 /* obsolete */
50 static int sysctl_tcp_low_latency __read_mostly;
51 
52 /* Update system visible IP port range */
53 static void set_local_port_range(struct net *net, int range[2])
54 {
55 	bool same_parity = !((range[0] ^ range[1]) & 1);
56 
57 	write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
58 	if (same_parity && !net->ipv4.ip_local_ports.warned) {
59 		net->ipv4.ip_local_ports.warned = true;
60 		pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
61 	}
62 	net->ipv4.ip_local_ports.range[0] = range[0];
63 	net->ipv4.ip_local_ports.range[1] = range[1];
64 	write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
65 }
66 
67 /* Validate changes from /proc interface. */
68 static int ipv4_local_port_range(struct ctl_table *table, int write,
69 				 void __user *buffer,
70 				 size_t *lenp, loff_t *ppos)
71 {
72 	struct net *net =
73 		container_of(table->data, struct net, ipv4.ip_local_ports.range);
74 	int ret;
75 	int range[2];
76 	struct ctl_table tmp = {
77 		.data = &range,
78 		.maxlen = sizeof(range),
79 		.mode = table->mode,
80 		.extra1 = &ip_local_port_range_min,
81 		.extra2 = &ip_local_port_range_max,
82 	};
83 
84 	inet_get_local_port_range(net, &range[0], &range[1]);
85 
86 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
87 
88 	if (write && ret == 0) {
89 		/* Ensure that the upper limit is not smaller than the lower,
90 		 * and that the lower does not encroach upon the privileged
91 		 * port limit.
92 		 */
93 		if ((range[1] < range[0]) ||
94 		    (range[0] < net->ipv4.sysctl_ip_prot_sock))
95 			ret = -EINVAL;
96 		else
97 			set_local_port_range(net, range);
98 	}
99 
100 	return ret;
101 }
102 
103 /* Validate changes from /proc interface. */
104 static int ipv4_privileged_ports(struct ctl_table *table, int write,
105 				void __user *buffer, size_t *lenp, loff_t *ppos)
106 {
107 	struct net *net = container_of(table->data, struct net,
108 	    ipv4.sysctl_ip_prot_sock);
109 	int ret;
110 	int pports;
111 	int range[2];
112 	struct ctl_table tmp = {
113 		.data = &pports,
114 		.maxlen = sizeof(pports),
115 		.mode = table->mode,
116 		.extra1 = &ip_privileged_port_min,
117 		.extra2 = &ip_privileged_port_max,
118 	};
119 
120 	pports = net->ipv4.sysctl_ip_prot_sock;
121 
122 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
123 
124 	if (write && ret == 0) {
125 		inet_get_local_port_range(net, &range[0], &range[1]);
126 		/* Ensure that the local port range doesn't overlap with the
127 		 * privileged port range.
128 		 */
129 		if (range[0] < pports)
130 			ret = -EINVAL;
131 		else
132 			net->ipv4.sysctl_ip_prot_sock = pports;
133 	}
134 
135 	return ret;
136 }
137 
138 static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
139 {
140 	kgid_t *data = table->data;
141 	struct net *net =
142 		container_of(table->data, struct net, ipv4.ping_group_range.range);
143 	unsigned int seq;
144 	do {
145 		seq = read_seqbegin(&net->ipv4.ping_group_range.lock);
146 
147 		*low = data[0];
148 		*high = data[1];
149 	} while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
150 }
151 
152 /* Update system visible IP port range */
153 static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
154 {
155 	kgid_t *data = table->data;
156 	struct net *net =
157 		container_of(table->data, struct net, ipv4.ping_group_range.range);
158 	write_seqlock(&net->ipv4.ping_group_range.lock);
159 	data[0] = low;
160 	data[1] = high;
161 	write_sequnlock(&net->ipv4.ping_group_range.lock);
162 }
163 
164 /* Validate changes from /proc interface. */
165 static int ipv4_ping_group_range(struct ctl_table *table, int write,
166 				 void __user *buffer,
167 				 size_t *lenp, loff_t *ppos)
168 {
169 	struct user_namespace *user_ns = current_user_ns();
170 	int ret;
171 	gid_t urange[2];
172 	kgid_t low, high;
173 	struct ctl_table tmp = {
174 		.data = &urange,
175 		.maxlen = sizeof(urange),
176 		.mode = table->mode,
177 		.extra1 = &ip_ping_group_range_min,
178 		.extra2 = &ip_ping_group_range_max,
179 	};
180 
181 	inet_get_ping_group_range_table(table, &low, &high);
182 	urange[0] = from_kgid_munged(user_ns, low);
183 	urange[1] = from_kgid_munged(user_ns, high);
184 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
185 
186 	if (write && ret == 0) {
187 		low = make_kgid(user_ns, urange[0]);
188 		high = make_kgid(user_ns, urange[1]);
189 		if (!gid_valid(low) || !gid_valid(high) ||
190 		    (urange[1] < urange[0]) || gid_lt(high, low)) {
191 			low = make_kgid(&init_user_ns, 1);
192 			high = make_kgid(&init_user_ns, 0);
193 		}
194 		set_ping_group_range(table, low, high);
195 	}
196 
197 	return ret;
198 }
199 
200 static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
201 				       void __user *buffer, size_t *lenp, loff_t *ppos)
202 {
203 	char val[TCP_CA_NAME_MAX];
204 	struct ctl_table tbl = {
205 		.data = val,
206 		.maxlen = TCP_CA_NAME_MAX,
207 	};
208 	int ret;
209 
210 	tcp_get_default_congestion_control(val);
211 
212 	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
213 	if (write && ret == 0)
214 		ret = tcp_set_default_congestion_control(val);
215 	return ret;
216 }
217 
218 static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
219 						 int write,
220 						 void __user *buffer, size_t *lenp,
221 						 loff_t *ppos)
222 {
223 	struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
224 	int ret;
225 
226 	tbl.data = kmalloc(tbl.maxlen, GFP_USER);
227 	if (!tbl.data)
228 		return -ENOMEM;
229 	tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
230 	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
231 	kfree(tbl.data);
232 	return ret;
233 }
234 
235 static int proc_allowed_congestion_control(struct ctl_table *ctl,
236 					   int write,
237 					   void __user *buffer, size_t *lenp,
238 					   loff_t *ppos)
239 {
240 	struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
241 	int ret;
242 
243 	tbl.data = kmalloc(tbl.maxlen, GFP_USER);
244 	if (!tbl.data)
245 		return -ENOMEM;
246 
247 	tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
248 	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
249 	if (write && ret == 0)
250 		ret = tcp_set_allowed_congestion_control(tbl.data);
251 	kfree(tbl.data);
252 	return ret;
253 }
254 
255 static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
256 				 void __user *buffer, size_t *lenp,
257 				 loff_t *ppos)
258 {
259 	struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
260 	struct tcp_fastopen_context *ctxt;
261 	int ret;
262 	u32  user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
263 
264 	tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
265 	if (!tbl.data)
266 		return -ENOMEM;
267 
268 	rcu_read_lock();
269 	ctxt = rcu_dereference(tcp_fastopen_ctx);
270 	if (ctxt)
271 		memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
272 	else
273 		memset(user_key, 0, sizeof(user_key));
274 	rcu_read_unlock();
275 
276 	snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
277 		user_key[0], user_key[1], user_key[2], user_key[3]);
278 	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
279 
280 	if (write && ret == 0) {
281 		if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
282 			   user_key + 2, user_key + 3) != 4) {
283 			ret = -EINVAL;
284 			goto bad_key;
285 		}
286 		/* Generate a dummy secret but don't publish it. This
287 		 * is needed so we don't regenerate a new key on the
288 		 * first invocation of tcp_fastopen_cookie_gen
289 		 */
290 		tcp_fastopen_init_key_once(false);
291 		tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
292 	}
293 
294 bad_key:
295 	pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
296 	       user_key[0], user_key[1], user_key[2], user_key[3],
297 	       (char *)tbl.data, ret);
298 	kfree(tbl.data);
299 	return ret;
300 }
301 
302 static void proc_configure_early_demux(int enabled, int protocol)
303 {
304 	struct net_protocol *ipprot;
305 #if IS_ENABLED(CONFIG_IPV6)
306 	struct inet6_protocol *ip6prot;
307 #endif
308 
309 	rcu_read_lock();
310 
311 	ipprot = rcu_dereference(inet_protos[protocol]);
312 	if (ipprot)
313 		ipprot->early_demux = enabled ? ipprot->early_demux_handler :
314 						NULL;
315 
316 #if IS_ENABLED(CONFIG_IPV6)
317 	ip6prot = rcu_dereference(inet6_protos[protocol]);
318 	if (ip6prot)
319 		ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
320 						 NULL;
321 #endif
322 	rcu_read_unlock();
323 }
324 
325 static int proc_tcp_early_demux(struct ctl_table *table, int write,
326 				void __user *buffer, size_t *lenp, loff_t *ppos)
327 {
328 	int ret = 0;
329 
330 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
331 
332 	if (write && !ret) {
333 		int enabled = init_net.ipv4.sysctl_tcp_early_demux;
334 
335 		proc_configure_early_demux(enabled, IPPROTO_TCP);
336 	}
337 
338 	return ret;
339 }
340 
341 static int proc_udp_early_demux(struct ctl_table *table, int write,
342 				void __user *buffer, size_t *lenp, loff_t *ppos)
343 {
344 	int ret = 0;
345 
346 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
347 
348 	if (write && !ret) {
349 		int enabled = init_net.ipv4.sysctl_udp_early_demux;
350 
351 		proc_configure_early_demux(enabled, IPPROTO_UDP);
352 	}
353 
354 	return ret;
355 }
356 
357 static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
358 					     int write,
359 					     void __user *buffer,
360 					     size_t *lenp, loff_t *ppos)
361 {
362 	int ret;
363 
364 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
365 	if (write && ret == 0)
366 		tcp_fastopen_active_timeout_reset();
367 
368 	return ret;
369 }
370 
371 static int proc_tcp_available_ulp(struct ctl_table *ctl,
372 				  int write,
373 				  void __user *buffer, size_t *lenp,
374 				  loff_t *ppos)
375 {
376 	struct ctl_table tbl = { .maxlen = TCP_ULP_BUF_MAX, };
377 	int ret;
378 
379 	tbl.data = kmalloc(tbl.maxlen, GFP_USER);
380 	if (!tbl.data)
381 		return -ENOMEM;
382 	tcp_get_available_ulp(tbl.data, TCP_ULP_BUF_MAX);
383 	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
384 	kfree(tbl.data);
385 
386 	return ret;
387 }
388 
389 static struct ctl_table ipv4_table[] = {
390 	{
391 		.procname	= "tcp_retrans_collapse",
392 		.data		= &sysctl_tcp_retrans_collapse,
393 		.maxlen		= sizeof(int),
394 		.mode		= 0644,
395 		.proc_handler	= proc_dointvec
396 	},
397 	{
398 		.procname	= "tcp_max_orphans",
399 		.data		= &sysctl_tcp_max_orphans,
400 		.maxlen		= sizeof(int),
401 		.mode		= 0644,
402 		.proc_handler	= proc_dointvec
403 	},
404 	{
405 		.procname	= "tcp_fastopen",
406 		.data		= &sysctl_tcp_fastopen,
407 		.maxlen		= sizeof(int),
408 		.mode		= 0644,
409 		.proc_handler	= proc_dointvec,
410 	},
411 	{
412 		.procname	= "tcp_fastopen_key",
413 		.mode		= 0600,
414 		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
415 		.proc_handler	= proc_tcp_fastopen_key,
416 	},
417 	{
418 		.procname	= "tcp_fastopen_blackhole_timeout_sec",
419 		.data		= &sysctl_tcp_fastopen_blackhole_timeout,
420 		.maxlen		= sizeof(int),
421 		.mode		= 0644,
422 		.proc_handler	= proc_tfo_blackhole_detect_timeout,
423 		.extra1		= &zero,
424 	},
425 	{
426 		.procname	= "tcp_abort_on_overflow",
427 		.data		= &sysctl_tcp_abort_on_overflow,
428 		.maxlen		= sizeof(int),
429 		.mode		= 0644,
430 		.proc_handler	= proc_dointvec
431 	},
432 	{
433 		.procname	= "tcp_stdurg",
434 		.data		= &sysctl_tcp_stdurg,
435 		.maxlen		= sizeof(int),
436 		.mode		= 0644,
437 		.proc_handler	= proc_dointvec
438 	},
439 	{
440 		.procname	= "tcp_rfc1337",
441 		.data		= &sysctl_tcp_rfc1337,
442 		.maxlen		= sizeof(int),
443 		.mode		= 0644,
444 		.proc_handler	= proc_dointvec
445 	},
446 	{
447 		.procname	= "inet_peer_threshold",
448 		.data		= &inet_peer_threshold,
449 		.maxlen		= sizeof(int),
450 		.mode		= 0644,
451 		.proc_handler	= proc_dointvec
452 	},
453 	{
454 		.procname	= "inet_peer_minttl",
455 		.data		= &inet_peer_minttl,
456 		.maxlen		= sizeof(int),
457 		.mode		= 0644,
458 		.proc_handler	= proc_dointvec_jiffies,
459 	},
460 	{
461 		.procname	= "inet_peer_maxttl",
462 		.data		= &inet_peer_maxttl,
463 		.maxlen		= sizeof(int),
464 		.mode		= 0644,
465 		.proc_handler	= proc_dointvec_jiffies,
466 	},
467 	{
468 		.procname	= "tcp_fack",
469 		.data		= &sysctl_tcp_fack,
470 		.maxlen		= sizeof(int),
471 		.mode		= 0644,
472 		.proc_handler	= proc_dointvec
473 	},
474 	{
475 		.procname	= "tcp_recovery",
476 		.data		= &sysctl_tcp_recovery,
477 		.maxlen		= sizeof(int),
478 		.mode		= 0644,
479 		.proc_handler	= proc_dointvec,
480 	},
481 	{
482 		.procname	= "tcp_max_reordering",
483 		.data		= &sysctl_tcp_max_reordering,
484 		.maxlen		= sizeof(int),
485 		.mode		= 0644,
486 		.proc_handler	= proc_dointvec
487 	},
488 	{
489 		.procname	= "tcp_dsack",
490 		.data		= &sysctl_tcp_dsack,
491 		.maxlen		= sizeof(int),
492 		.mode		= 0644,
493 		.proc_handler	= proc_dointvec
494 	},
495 	{
496 		.procname	= "tcp_mem",
497 		.maxlen		= sizeof(sysctl_tcp_mem),
498 		.data		= &sysctl_tcp_mem,
499 		.mode		= 0644,
500 		.proc_handler	= proc_doulongvec_minmax,
501 	},
502 	{
503 		.procname	= "tcp_wmem",
504 		.data		= &sysctl_tcp_wmem,
505 		.maxlen		= sizeof(sysctl_tcp_wmem),
506 		.mode		= 0644,
507 		.proc_handler	= proc_dointvec_minmax,
508 		.extra1		= &one,
509 	},
510 	{
511 		.procname	= "tcp_rmem",
512 		.data		= &sysctl_tcp_rmem,
513 		.maxlen		= sizeof(sysctl_tcp_rmem),
514 		.mode		= 0644,
515 		.proc_handler	= proc_dointvec_minmax,
516 		.extra1		= &one,
517 	},
518 	{
519 		.procname	= "tcp_app_win",
520 		.data		= &sysctl_tcp_app_win,
521 		.maxlen		= sizeof(int),
522 		.mode		= 0644,
523 		.proc_handler	= proc_dointvec
524 	},
525 	{
526 		.procname	= "tcp_adv_win_scale",
527 		.data		= &sysctl_tcp_adv_win_scale,
528 		.maxlen		= sizeof(int),
529 		.mode		= 0644,
530 		.proc_handler	= proc_dointvec_minmax,
531 		.extra1		= &tcp_adv_win_scale_min,
532 		.extra2		= &tcp_adv_win_scale_max,
533 	},
534 	{
535 		.procname	= "tcp_frto",
536 		.data		= &sysctl_tcp_frto,
537 		.maxlen		= sizeof(int),
538 		.mode		= 0644,
539 		.proc_handler	= proc_dointvec
540 	},
541 	{
542 		.procname	= "tcp_min_rtt_wlen",
543 		.data		= &sysctl_tcp_min_rtt_wlen,
544 		.maxlen		= sizeof(int),
545 		.mode		= 0644,
546 		.proc_handler	= proc_dointvec
547 	},
548 	{
549 		.procname	= "tcp_low_latency",
550 		.data		= &sysctl_tcp_low_latency,
551 		.maxlen		= sizeof(int),
552 		.mode		= 0644,
553 		.proc_handler	= proc_dointvec
554 	},
555 	{
556 		.procname	= "tcp_no_metrics_save",
557 		.data		= &sysctl_tcp_nometrics_save,
558 		.maxlen		= sizeof(int),
559 		.mode		= 0644,
560 		.proc_handler	= proc_dointvec,
561 	},
562 	{
563 		.procname	= "tcp_moderate_rcvbuf",
564 		.data		= &sysctl_tcp_moderate_rcvbuf,
565 		.maxlen		= sizeof(int),
566 		.mode		= 0644,
567 		.proc_handler	= proc_dointvec,
568 	},
569 	{
570 		.procname	= "tcp_tso_win_divisor",
571 		.data		= &sysctl_tcp_tso_win_divisor,
572 		.maxlen		= sizeof(int),
573 		.mode		= 0644,
574 		.proc_handler	= proc_dointvec,
575 	},
576 	{
577 		.procname	= "tcp_congestion_control",
578 		.mode		= 0644,
579 		.maxlen		= TCP_CA_NAME_MAX,
580 		.proc_handler	= proc_tcp_congestion_control,
581 	},
582 	{
583 		.procname	= "tcp_workaround_signed_windows",
584 		.data		= &sysctl_tcp_workaround_signed_windows,
585 		.maxlen		= sizeof(int),
586 		.mode		= 0644,
587 		.proc_handler	= proc_dointvec
588 	},
589 	{
590 		.procname	= "tcp_limit_output_bytes",
591 		.data		= &sysctl_tcp_limit_output_bytes,
592 		.maxlen		= sizeof(int),
593 		.mode		= 0644,
594 		.proc_handler	= proc_dointvec
595 	},
596 	{
597 		.procname	= "tcp_challenge_ack_limit",
598 		.data		= &sysctl_tcp_challenge_ack_limit,
599 		.maxlen		= sizeof(int),
600 		.mode		= 0644,
601 		.proc_handler	= proc_dointvec
602 	},
603 	{
604 		.procname	= "tcp_slow_start_after_idle",
605 		.data		= &sysctl_tcp_slow_start_after_idle,
606 		.maxlen		= sizeof(int),
607 		.mode		= 0644,
608 		.proc_handler	= proc_dointvec
609 	},
610 #ifdef CONFIG_NETLABEL
611 	{
612 		.procname	= "cipso_cache_enable",
613 		.data		= &cipso_v4_cache_enabled,
614 		.maxlen		= sizeof(int),
615 		.mode		= 0644,
616 		.proc_handler	= proc_dointvec,
617 	},
618 	{
619 		.procname	= "cipso_cache_bucket_size",
620 		.data		= &cipso_v4_cache_bucketsize,
621 		.maxlen		= sizeof(int),
622 		.mode		= 0644,
623 		.proc_handler	= proc_dointvec,
624 	},
625 	{
626 		.procname	= "cipso_rbm_optfmt",
627 		.data		= &cipso_v4_rbm_optfmt,
628 		.maxlen		= sizeof(int),
629 		.mode		= 0644,
630 		.proc_handler	= proc_dointvec,
631 	},
632 	{
633 		.procname	= "cipso_rbm_strictvalid",
634 		.data		= &cipso_v4_rbm_strictvalid,
635 		.maxlen		= sizeof(int),
636 		.mode		= 0644,
637 		.proc_handler	= proc_dointvec,
638 	},
639 #endif /* CONFIG_NETLABEL */
640 	{
641 		.procname	= "tcp_available_congestion_control",
642 		.maxlen		= TCP_CA_BUF_MAX,
643 		.mode		= 0444,
644 		.proc_handler   = proc_tcp_available_congestion_control,
645 	},
646 	{
647 		.procname	= "tcp_allowed_congestion_control",
648 		.maxlen		= TCP_CA_BUF_MAX,
649 		.mode		= 0644,
650 		.proc_handler   = proc_allowed_congestion_control,
651 	},
652 	{
653 		.procname       = "tcp_thin_linear_timeouts",
654 		.data           = &sysctl_tcp_thin_linear_timeouts,
655 		.maxlen         = sizeof(int),
656 		.mode           = 0644,
657 		.proc_handler   = proc_dointvec
658 	},
659 	{
660 		.procname	= "tcp_early_retrans",
661 		.data		= &sysctl_tcp_early_retrans,
662 		.maxlen		= sizeof(int),
663 		.mode		= 0644,
664 		.proc_handler	= proc_dointvec_minmax,
665 		.extra1		= &zero,
666 		.extra2		= &four,
667 	},
668 	{
669 		.procname	= "tcp_min_tso_segs",
670 		.data		= &sysctl_tcp_min_tso_segs,
671 		.maxlen		= sizeof(int),
672 		.mode		= 0644,
673 		.proc_handler	= proc_dointvec_minmax,
674 		.extra1		= &one,
675 		.extra2		= &gso_max_segs,
676 	},
677 	{
678 		.procname	= "tcp_pacing_ss_ratio",
679 		.data		= &sysctl_tcp_pacing_ss_ratio,
680 		.maxlen		= sizeof(int),
681 		.mode		= 0644,
682 		.proc_handler	= proc_dointvec_minmax,
683 		.extra1		= &zero,
684 		.extra2		= &thousand,
685 	},
686 	{
687 		.procname	= "tcp_pacing_ca_ratio",
688 		.data		= &sysctl_tcp_pacing_ca_ratio,
689 		.maxlen		= sizeof(int),
690 		.mode		= 0644,
691 		.proc_handler	= proc_dointvec_minmax,
692 		.extra1		= &zero,
693 		.extra2		= &thousand,
694 	},
695 	{
696 		.procname	= "tcp_autocorking",
697 		.data		= &sysctl_tcp_autocorking,
698 		.maxlen		= sizeof(int),
699 		.mode		= 0644,
700 		.proc_handler	= proc_dointvec_minmax,
701 		.extra1		= &zero,
702 		.extra2		= &one,
703 	},
704 	{
705 		.procname	= "tcp_invalid_ratelimit",
706 		.data		= &sysctl_tcp_invalid_ratelimit,
707 		.maxlen		= sizeof(int),
708 		.mode		= 0644,
709 		.proc_handler	= proc_dointvec_ms_jiffies,
710 	},
711 	{
712 		.procname	= "tcp_available_ulp",
713 		.maxlen		= TCP_ULP_BUF_MAX,
714 		.mode		= 0444,
715 		.proc_handler   = proc_tcp_available_ulp,
716 	},
717 	{
718 		.procname	= "icmp_msgs_per_sec",
719 		.data		= &sysctl_icmp_msgs_per_sec,
720 		.maxlen		= sizeof(int),
721 		.mode		= 0644,
722 		.proc_handler	= proc_dointvec_minmax,
723 		.extra1		= &zero,
724 	},
725 	{
726 		.procname	= "icmp_msgs_burst",
727 		.data		= &sysctl_icmp_msgs_burst,
728 		.maxlen		= sizeof(int),
729 		.mode		= 0644,
730 		.proc_handler	= proc_dointvec_minmax,
731 		.extra1		= &zero,
732 	},
733 	{
734 		.procname	= "udp_mem",
735 		.data		= &sysctl_udp_mem,
736 		.maxlen		= sizeof(sysctl_udp_mem),
737 		.mode		= 0644,
738 		.proc_handler	= proc_doulongvec_minmax,
739 	},
740 	{
741 		.procname	= "udp_rmem_min",
742 		.data		= &sysctl_udp_rmem_min,
743 		.maxlen		= sizeof(sysctl_udp_rmem_min),
744 		.mode		= 0644,
745 		.proc_handler	= proc_dointvec_minmax,
746 		.extra1		= &one
747 	},
748 	{
749 		.procname	= "udp_wmem_min",
750 		.data		= &sysctl_udp_wmem_min,
751 		.maxlen		= sizeof(sysctl_udp_wmem_min),
752 		.mode		= 0644,
753 		.proc_handler	= proc_dointvec_minmax,
754 		.extra1		= &one
755 	},
756 	{ }
757 };
758 
759 static struct ctl_table ipv4_net_table[] = {
760 	{
761 		.procname	= "icmp_echo_ignore_all",
762 		.data		= &init_net.ipv4.sysctl_icmp_echo_ignore_all,
763 		.maxlen		= sizeof(int),
764 		.mode		= 0644,
765 		.proc_handler	= proc_dointvec
766 	},
767 	{
768 		.procname	= "icmp_echo_ignore_broadcasts",
769 		.data		= &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
770 		.maxlen		= sizeof(int),
771 		.mode		= 0644,
772 		.proc_handler	= proc_dointvec
773 	},
774 	{
775 		.procname	= "icmp_ignore_bogus_error_responses",
776 		.data		= &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
777 		.maxlen		= sizeof(int),
778 		.mode		= 0644,
779 		.proc_handler	= proc_dointvec
780 	},
781 	{
782 		.procname	= "icmp_errors_use_inbound_ifaddr",
783 		.data		= &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
784 		.maxlen		= sizeof(int),
785 		.mode		= 0644,
786 		.proc_handler	= proc_dointvec
787 	},
788 	{
789 		.procname	= "icmp_ratelimit",
790 		.data		= &init_net.ipv4.sysctl_icmp_ratelimit,
791 		.maxlen		= sizeof(int),
792 		.mode		= 0644,
793 		.proc_handler	= proc_dointvec_ms_jiffies,
794 	},
795 	{
796 		.procname	= "icmp_ratemask",
797 		.data		= &init_net.ipv4.sysctl_icmp_ratemask,
798 		.maxlen		= sizeof(int),
799 		.mode		= 0644,
800 		.proc_handler	= proc_dointvec
801 	},
802 	{
803 		.procname	= "ping_group_range",
804 		.data		= &init_net.ipv4.ping_group_range.range,
805 		.maxlen		= sizeof(gid_t)*2,
806 		.mode		= 0644,
807 		.proc_handler	= ipv4_ping_group_range,
808 	},
809 	{
810 		.procname	= "tcp_ecn",
811 		.data		= &init_net.ipv4.sysctl_tcp_ecn,
812 		.maxlen		= sizeof(int),
813 		.mode		= 0644,
814 		.proc_handler	= proc_dointvec
815 	},
816 	{
817 		.procname	= "tcp_ecn_fallback",
818 		.data		= &init_net.ipv4.sysctl_tcp_ecn_fallback,
819 		.maxlen		= sizeof(int),
820 		.mode		= 0644,
821 		.proc_handler	= proc_dointvec
822 	},
823 	{
824 		.procname	= "ip_dynaddr",
825 		.data		= &init_net.ipv4.sysctl_ip_dynaddr,
826 		.maxlen		= sizeof(int),
827 		.mode		= 0644,
828 		.proc_handler	= proc_dointvec
829 	},
830 	{
831 		.procname	= "ip_early_demux",
832 		.data		= &init_net.ipv4.sysctl_ip_early_demux,
833 		.maxlen		= sizeof(int),
834 		.mode		= 0644,
835 		.proc_handler	= proc_dointvec
836 	},
837 	{
838 		.procname       = "udp_early_demux",
839 		.data           = &init_net.ipv4.sysctl_udp_early_demux,
840 		.maxlen         = sizeof(int),
841 		.mode           = 0644,
842 		.proc_handler   = proc_udp_early_demux
843 	},
844 	{
845 		.procname       = "tcp_early_demux",
846 		.data           = &init_net.ipv4.sysctl_tcp_early_demux,
847 		.maxlen         = sizeof(int),
848 		.mode           = 0644,
849 		.proc_handler   = proc_tcp_early_demux
850 	},
851 	{
852 		.procname	= "ip_default_ttl",
853 		.data		= &init_net.ipv4.sysctl_ip_default_ttl,
854 		.maxlen		= sizeof(int),
855 		.mode		= 0644,
856 		.proc_handler	= proc_dointvec_minmax,
857 		.extra1		= &ip_ttl_min,
858 		.extra2		= &ip_ttl_max,
859 	},
860 	{
861 		.procname	= "ip_local_port_range",
862 		.maxlen		= sizeof(init_net.ipv4.ip_local_ports.range),
863 		.data		= &init_net.ipv4.ip_local_ports.range,
864 		.mode		= 0644,
865 		.proc_handler	= ipv4_local_port_range,
866 	},
867 	{
868 		.procname	= "ip_local_reserved_ports",
869 		.data		= &init_net.ipv4.sysctl_local_reserved_ports,
870 		.maxlen		= 65536,
871 		.mode		= 0644,
872 		.proc_handler	= proc_do_large_bitmap,
873 	},
874 	{
875 		.procname	= "ip_no_pmtu_disc",
876 		.data		= &init_net.ipv4.sysctl_ip_no_pmtu_disc,
877 		.maxlen		= sizeof(int),
878 		.mode		= 0644,
879 		.proc_handler	= proc_dointvec
880 	},
881 	{
882 		.procname	= "ip_forward_use_pmtu",
883 		.data		= &init_net.ipv4.sysctl_ip_fwd_use_pmtu,
884 		.maxlen		= sizeof(int),
885 		.mode		= 0644,
886 		.proc_handler	= proc_dointvec,
887 	},
888 	{
889 		.procname	= "ip_nonlocal_bind",
890 		.data		= &init_net.ipv4.sysctl_ip_nonlocal_bind,
891 		.maxlen		= sizeof(int),
892 		.mode		= 0644,
893 		.proc_handler	= proc_dointvec
894 	},
895 	{
896 		.procname	= "fwmark_reflect",
897 		.data		= &init_net.ipv4.sysctl_fwmark_reflect,
898 		.maxlen		= sizeof(int),
899 		.mode		= 0644,
900 		.proc_handler	= proc_dointvec,
901 	},
902 	{
903 		.procname	= "tcp_fwmark_accept",
904 		.data		= &init_net.ipv4.sysctl_tcp_fwmark_accept,
905 		.maxlen		= sizeof(int),
906 		.mode		= 0644,
907 		.proc_handler	= proc_dointvec,
908 	},
909 #ifdef CONFIG_NET_L3_MASTER_DEV
910 	{
911 		.procname	= "tcp_l3mdev_accept",
912 		.data		= &init_net.ipv4.sysctl_tcp_l3mdev_accept,
913 		.maxlen		= sizeof(int),
914 		.mode		= 0644,
915 		.proc_handler	= proc_dointvec_minmax,
916 		.extra1		= &zero,
917 		.extra2		= &one,
918 	},
919 #endif
920 	{
921 		.procname	= "tcp_mtu_probing",
922 		.data		= &init_net.ipv4.sysctl_tcp_mtu_probing,
923 		.maxlen		= sizeof(int),
924 		.mode		= 0644,
925 		.proc_handler	= proc_dointvec,
926 	},
927 	{
928 		.procname	= "tcp_base_mss",
929 		.data		= &init_net.ipv4.sysctl_tcp_base_mss,
930 		.maxlen		= sizeof(int),
931 		.mode		= 0644,
932 		.proc_handler	= proc_dointvec,
933 	},
934 	{
935 		.procname	= "tcp_probe_threshold",
936 		.data		= &init_net.ipv4.sysctl_tcp_probe_threshold,
937 		.maxlen		= sizeof(int),
938 		.mode		= 0644,
939 		.proc_handler	= proc_dointvec,
940 	},
941 	{
942 		.procname	= "tcp_probe_interval",
943 		.data		= &init_net.ipv4.sysctl_tcp_probe_interval,
944 		.maxlen		= sizeof(int),
945 		.mode		= 0644,
946 		.proc_handler	= proc_dointvec,
947 	},
948 	{
949 		.procname	= "igmp_link_local_mcast_reports",
950 		.data		= &init_net.ipv4.sysctl_igmp_llm_reports,
951 		.maxlen		= sizeof(int),
952 		.mode		= 0644,
953 		.proc_handler	= proc_dointvec
954 	},
955 	{
956 		.procname	= "igmp_max_memberships",
957 		.data		= &init_net.ipv4.sysctl_igmp_max_memberships,
958 		.maxlen		= sizeof(int),
959 		.mode		= 0644,
960 		.proc_handler	= proc_dointvec
961 	},
962 	{
963 		.procname	= "igmp_max_msf",
964 		.data		= &init_net.ipv4.sysctl_igmp_max_msf,
965 		.maxlen		= sizeof(int),
966 		.mode		= 0644,
967 		.proc_handler	= proc_dointvec
968 	},
969 #ifdef CONFIG_IP_MULTICAST
970 	{
971 		.procname	= "igmp_qrv",
972 		.data		= &init_net.ipv4.sysctl_igmp_qrv,
973 		.maxlen		= sizeof(int),
974 		.mode		= 0644,
975 		.proc_handler	= proc_dointvec_minmax,
976 		.extra1		= &one
977 	},
978 #endif
979 	{
980 		.procname	= "tcp_keepalive_time",
981 		.data		= &init_net.ipv4.sysctl_tcp_keepalive_time,
982 		.maxlen		= sizeof(int),
983 		.mode		= 0644,
984 		.proc_handler	= proc_dointvec_jiffies,
985 	},
986 	{
987 		.procname	= "tcp_keepalive_probes",
988 		.data		= &init_net.ipv4.sysctl_tcp_keepalive_probes,
989 		.maxlen		= sizeof(int),
990 		.mode		= 0644,
991 		.proc_handler	= proc_dointvec
992 	},
993 	{
994 		.procname	= "tcp_keepalive_intvl",
995 		.data		= &init_net.ipv4.sysctl_tcp_keepalive_intvl,
996 		.maxlen		= sizeof(int),
997 		.mode		= 0644,
998 		.proc_handler	= proc_dointvec_jiffies,
999 	},
1000 	{
1001 		.procname	= "tcp_syn_retries",
1002 		.data		= &init_net.ipv4.sysctl_tcp_syn_retries,
1003 		.maxlen		= sizeof(int),
1004 		.mode		= 0644,
1005 		.proc_handler	= proc_dointvec_minmax,
1006 		.extra1		= &tcp_syn_retries_min,
1007 		.extra2		= &tcp_syn_retries_max
1008 	},
1009 	{
1010 		.procname	= "tcp_synack_retries",
1011 		.data		= &init_net.ipv4.sysctl_tcp_synack_retries,
1012 		.maxlen		= sizeof(int),
1013 		.mode		= 0644,
1014 		.proc_handler	= proc_dointvec
1015 	},
1016 #ifdef CONFIG_SYN_COOKIES
1017 	{
1018 		.procname	= "tcp_syncookies",
1019 		.data		= &init_net.ipv4.sysctl_tcp_syncookies,
1020 		.maxlen		= sizeof(int),
1021 		.mode		= 0644,
1022 		.proc_handler	= proc_dointvec
1023 	},
1024 #endif
1025 	{
1026 		.procname	= "tcp_reordering",
1027 		.data		= &init_net.ipv4.sysctl_tcp_reordering,
1028 		.maxlen		= sizeof(int),
1029 		.mode		= 0644,
1030 		.proc_handler	= proc_dointvec
1031 	},
1032 	{
1033 		.procname	= "tcp_retries1",
1034 		.data		= &init_net.ipv4.sysctl_tcp_retries1,
1035 		.maxlen		= sizeof(int),
1036 		.mode		= 0644,
1037 		.proc_handler	= proc_dointvec_minmax,
1038 		.extra2		= &tcp_retr1_max
1039 	},
1040 	{
1041 		.procname	= "tcp_retries2",
1042 		.data		= &init_net.ipv4.sysctl_tcp_retries2,
1043 		.maxlen		= sizeof(int),
1044 		.mode		= 0644,
1045 		.proc_handler	= proc_dointvec
1046 	},
1047 	{
1048 		.procname	= "tcp_orphan_retries",
1049 		.data		= &init_net.ipv4.sysctl_tcp_orphan_retries,
1050 		.maxlen		= sizeof(int),
1051 		.mode		= 0644,
1052 		.proc_handler	= proc_dointvec
1053 	},
1054 	{
1055 		.procname	= "tcp_fin_timeout",
1056 		.data		= &init_net.ipv4.sysctl_tcp_fin_timeout,
1057 		.maxlen		= sizeof(int),
1058 		.mode		= 0644,
1059 		.proc_handler	= proc_dointvec_jiffies,
1060 	},
1061 	{
1062 		.procname	= "tcp_notsent_lowat",
1063 		.data		= &init_net.ipv4.sysctl_tcp_notsent_lowat,
1064 		.maxlen		= sizeof(unsigned int),
1065 		.mode		= 0644,
1066 		.proc_handler	= proc_douintvec,
1067 	},
1068 	{
1069 		.procname	= "tcp_tw_reuse",
1070 		.data		= &init_net.ipv4.sysctl_tcp_tw_reuse,
1071 		.maxlen		= sizeof(int),
1072 		.mode		= 0644,
1073 		.proc_handler	= proc_dointvec
1074 	},
1075 	{
1076 		.procname	= "tcp_max_tw_buckets",
1077 		.data		= &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
1078 		.maxlen		= sizeof(int),
1079 		.mode		= 0644,
1080 		.proc_handler	= proc_dointvec
1081 	},
1082 	{
1083 		.procname	= "tcp_max_syn_backlog",
1084 		.data		= &init_net.ipv4.sysctl_max_syn_backlog,
1085 		.maxlen		= sizeof(int),
1086 		.mode		= 0644,
1087 		.proc_handler	= proc_dointvec
1088 	},
1089 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1090 	{
1091 		.procname	= "fib_multipath_use_neigh",
1092 		.data		= &init_net.ipv4.sysctl_fib_multipath_use_neigh,
1093 		.maxlen		= sizeof(int),
1094 		.mode		= 0644,
1095 		.proc_handler	= proc_dointvec_minmax,
1096 		.extra1		= &zero,
1097 		.extra2		= &one,
1098 	},
1099 	{
1100 		.procname	= "fib_multipath_hash_policy",
1101 		.data		= &init_net.ipv4.sysctl_fib_multipath_hash_policy,
1102 		.maxlen		= sizeof(int),
1103 		.mode		= 0644,
1104 		.proc_handler	= proc_dointvec_minmax,
1105 		.extra1		= &zero,
1106 		.extra2		= &one,
1107 	},
1108 #endif
1109 	{
1110 		.procname	= "ip_unprivileged_port_start",
1111 		.maxlen		= sizeof(int),
1112 		.data		= &init_net.ipv4.sysctl_ip_prot_sock,
1113 		.mode		= 0644,
1114 		.proc_handler	= ipv4_privileged_ports,
1115 	},
1116 #ifdef CONFIG_NET_L3_MASTER_DEV
1117 	{
1118 		.procname	= "udp_l3mdev_accept",
1119 		.data		= &init_net.ipv4.sysctl_udp_l3mdev_accept,
1120 		.maxlen		= sizeof(int),
1121 		.mode		= 0644,
1122 		.proc_handler	= proc_dointvec_minmax,
1123 		.extra1		= &zero,
1124 		.extra2		= &one,
1125 	},
1126 #endif
1127 	{
1128 		.procname	= "tcp_sack",
1129 		.data		= &init_net.ipv4.sysctl_tcp_sack,
1130 		.maxlen		= sizeof(int),
1131 		.mode		= 0644,
1132 		.proc_handler	= proc_dointvec
1133 	},
1134 	{
1135 		.procname	= "tcp_window_scaling",
1136 		.data		= &init_net.ipv4.sysctl_tcp_window_scaling,
1137 		.maxlen		= sizeof(int),
1138 		.mode		= 0644,
1139 		.proc_handler	= proc_dointvec
1140 	},
1141 	{
1142 		.procname	= "tcp_timestamps",
1143 		.data		= &init_net.ipv4.sysctl_tcp_timestamps,
1144 		.maxlen		= sizeof(int),
1145 		.mode		= 0644,
1146 		.proc_handler	= proc_dointvec
1147 	},
1148 	{ }
1149 };
1150 
1151 static __net_init int ipv4_sysctl_init_net(struct net *net)
1152 {
1153 	struct ctl_table *table;
1154 
1155 	table = ipv4_net_table;
1156 	if (!net_eq(net, &init_net)) {
1157 		int i;
1158 
1159 		table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
1160 		if (!table)
1161 			goto err_alloc;
1162 
1163 		/* Update the variables to point into the current struct net */
1164 		for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++)
1165 			table[i].data += (void *)net - (void *)&init_net;
1166 	}
1167 
1168 	net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
1169 	if (!net->ipv4.ipv4_hdr)
1170 		goto err_reg;
1171 
1172 	net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
1173 	if (!net->ipv4.sysctl_local_reserved_ports)
1174 		goto err_ports;
1175 
1176 	return 0;
1177 
1178 err_ports:
1179 	unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
1180 err_reg:
1181 	if (!net_eq(net, &init_net))
1182 		kfree(table);
1183 err_alloc:
1184 	return -ENOMEM;
1185 }
1186 
1187 static __net_exit void ipv4_sysctl_exit_net(struct net *net)
1188 {
1189 	struct ctl_table *table;
1190 
1191 	kfree(net->ipv4.sysctl_local_reserved_ports);
1192 	table = net->ipv4.ipv4_hdr->ctl_table_arg;
1193 	unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
1194 	kfree(table);
1195 }
1196 
1197 static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
1198 	.init = ipv4_sysctl_init_net,
1199 	.exit = ipv4_sysctl_exit_net,
1200 };
1201 
1202 static __init int sysctl_ipv4_init(void)
1203 {
1204 	struct ctl_table_header *hdr;
1205 
1206 	hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
1207 	if (!hdr)
1208 		return -ENOMEM;
1209 
1210 	if (register_pernet_subsys(&ipv4_sysctl_ops)) {
1211 		unregister_net_sysctl_table(hdr);
1212 		return -ENOMEM;
1213 	}
1214 
1215 	return 0;
1216 }
1217 
1218 __initcall(sysctl_ipv4_init);
1219