xref: /openbmc/linux/net/ipv4/sysctl_net_ipv4.c (revision 4f6cce39)
1 /*
2  * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
3  *
4  * Begun April 1, 1996, Mike Shaver.
5  * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
6  */
7 
8 #include <linux/mm.h>
9 #include <linux/module.h>
10 #include <linux/sysctl.h>
11 #include <linux/igmp.h>
12 #include <linux/inetdevice.h>
13 #include <linux/seqlock.h>
14 #include <linux/init.h>
15 #include <linux/slab.h>
16 #include <linux/nsproxy.h>
17 #include <linux/swap.h>
18 #include <net/snmp.h>
19 #include <net/icmp.h>
20 #include <net/ip.h>
21 #include <net/route.h>
22 #include <net/tcp.h>
23 #include <net/udp.h>
24 #include <net/cipso_ipv4.h>
25 #include <net/inet_frag.h>
26 #include <net/ping.h>
27 
28 static int zero;
29 static int one = 1;
30 static int four = 4;
31 static int thousand = 1000;
32 static int gso_max_segs = GSO_MAX_SEGS;
33 static int tcp_retr1_max = 255;
34 static int ip_local_port_range_min[] = { 1, 1 };
35 static int ip_local_port_range_max[] = { 65535, 65535 };
36 static int tcp_adv_win_scale_min = -31;
37 static int tcp_adv_win_scale_max = 31;
38 static int ip_privileged_port_min;
39 static int ip_privileged_port_max = 65535;
40 static int ip_ttl_min = 1;
41 static int ip_ttl_max = 255;
42 static int tcp_syn_retries_min = 1;
43 static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
44 static int ip_ping_group_range_min[] = { 0, 0 };
45 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
46 
47 /* Update system visible IP port range */
48 static void set_local_port_range(struct net *net, int range[2])
49 {
50 	bool same_parity = !((range[0] ^ range[1]) & 1);
51 
52 	write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
53 	if (same_parity && !net->ipv4.ip_local_ports.warned) {
54 		net->ipv4.ip_local_ports.warned = true;
55 		pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
56 	}
57 	net->ipv4.ip_local_ports.range[0] = range[0];
58 	net->ipv4.ip_local_ports.range[1] = range[1];
59 	write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
60 }
61 
62 /* Validate changes from /proc interface. */
63 static int ipv4_local_port_range(struct ctl_table *table, int write,
64 				 void __user *buffer,
65 				 size_t *lenp, loff_t *ppos)
66 {
67 	struct net *net =
68 		container_of(table->data, struct net, ipv4.ip_local_ports.range);
69 	int ret;
70 	int range[2];
71 	struct ctl_table tmp = {
72 		.data = &range,
73 		.maxlen = sizeof(range),
74 		.mode = table->mode,
75 		.extra1 = &ip_local_port_range_min,
76 		.extra2 = &ip_local_port_range_max,
77 	};
78 
79 	inet_get_local_port_range(net, &range[0], &range[1]);
80 
81 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
82 
83 	if (write && ret == 0) {
84 		/* Ensure that the upper limit is not smaller than the lower,
85 		 * and that the lower does not encroach upon the privileged
86 		 * port limit.
87 		 */
88 		if ((range[1] < range[0]) ||
89 		    (range[0] < net->ipv4.sysctl_ip_prot_sock))
90 			ret = -EINVAL;
91 		else
92 			set_local_port_range(net, range);
93 	}
94 
95 	return ret;
96 }
97 
98 /* Validate changes from /proc interface. */
99 static int ipv4_privileged_ports(struct ctl_table *table, int write,
100 				void __user *buffer, size_t *lenp, loff_t *ppos)
101 {
102 	struct net *net = container_of(table->data, struct net,
103 	    ipv4.sysctl_ip_prot_sock);
104 	int ret;
105 	int pports;
106 	int range[2];
107 	struct ctl_table tmp = {
108 		.data = &pports,
109 		.maxlen = sizeof(pports),
110 		.mode = table->mode,
111 		.extra1 = &ip_privileged_port_min,
112 		.extra2 = &ip_privileged_port_max,
113 	};
114 
115 	pports = net->ipv4.sysctl_ip_prot_sock;
116 
117 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
118 
119 	if (write && ret == 0) {
120 		inet_get_local_port_range(net, &range[0], &range[1]);
121 		/* Ensure that the local port range doesn't overlap with the
122 		 * privileged port range.
123 		 */
124 		if (range[0] < pports)
125 			ret = -EINVAL;
126 		else
127 			net->ipv4.sysctl_ip_prot_sock = pports;
128 	}
129 
130 	return ret;
131 }
132 
133 static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
134 {
135 	kgid_t *data = table->data;
136 	struct net *net =
137 		container_of(table->data, struct net, ipv4.ping_group_range.range);
138 	unsigned int seq;
139 	do {
140 		seq = read_seqbegin(&net->ipv4.ping_group_range.lock);
141 
142 		*low = data[0];
143 		*high = data[1];
144 	} while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
145 }
146 
147 /* Update system visible IP port range */
148 static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
149 {
150 	kgid_t *data = table->data;
151 	struct net *net =
152 		container_of(table->data, struct net, ipv4.ping_group_range.range);
153 	write_seqlock(&net->ipv4.ping_group_range.lock);
154 	data[0] = low;
155 	data[1] = high;
156 	write_sequnlock(&net->ipv4.ping_group_range.lock);
157 }
158 
159 /* Validate changes from /proc interface. */
160 static int ipv4_ping_group_range(struct ctl_table *table, int write,
161 				 void __user *buffer,
162 				 size_t *lenp, loff_t *ppos)
163 {
164 	struct user_namespace *user_ns = current_user_ns();
165 	int ret;
166 	gid_t urange[2];
167 	kgid_t low, high;
168 	struct ctl_table tmp = {
169 		.data = &urange,
170 		.maxlen = sizeof(urange),
171 		.mode = table->mode,
172 		.extra1 = &ip_ping_group_range_min,
173 		.extra2 = &ip_ping_group_range_max,
174 	};
175 
176 	inet_get_ping_group_range_table(table, &low, &high);
177 	urange[0] = from_kgid_munged(user_ns, low);
178 	urange[1] = from_kgid_munged(user_ns, high);
179 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
180 
181 	if (write && ret == 0) {
182 		low = make_kgid(user_ns, urange[0]);
183 		high = make_kgid(user_ns, urange[1]);
184 		if (!gid_valid(low) || !gid_valid(high) ||
185 		    (urange[1] < urange[0]) || gid_lt(high, low)) {
186 			low = make_kgid(&init_user_ns, 1);
187 			high = make_kgid(&init_user_ns, 0);
188 		}
189 		set_ping_group_range(table, low, high);
190 	}
191 
192 	return ret;
193 }
194 
195 static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
196 				       void __user *buffer, size_t *lenp, loff_t *ppos)
197 {
198 	char val[TCP_CA_NAME_MAX];
199 	struct ctl_table tbl = {
200 		.data = val,
201 		.maxlen = TCP_CA_NAME_MAX,
202 	};
203 	int ret;
204 
205 	tcp_get_default_congestion_control(val);
206 
207 	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
208 	if (write && ret == 0)
209 		ret = tcp_set_default_congestion_control(val);
210 	return ret;
211 }
212 
213 static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
214 						 int write,
215 						 void __user *buffer, size_t *lenp,
216 						 loff_t *ppos)
217 {
218 	struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
219 	int ret;
220 
221 	tbl.data = kmalloc(tbl.maxlen, GFP_USER);
222 	if (!tbl.data)
223 		return -ENOMEM;
224 	tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
225 	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
226 	kfree(tbl.data);
227 	return ret;
228 }
229 
230 static int proc_allowed_congestion_control(struct ctl_table *ctl,
231 					   int write,
232 					   void __user *buffer, size_t *lenp,
233 					   loff_t *ppos)
234 {
235 	struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
236 	int ret;
237 
238 	tbl.data = kmalloc(tbl.maxlen, GFP_USER);
239 	if (!tbl.data)
240 		return -ENOMEM;
241 
242 	tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
243 	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
244 	if (write && ret == 0)
245 		ret = tcp_set_allowed_congestion_control(tbl.data);
246 	kfree(tbl.data);
247 	return ret;
248 }
249 
250 static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
251 				 void __user *buffer, size_t *lenp,
252 				 loff_t *ppos)
253 {
254 	struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
255 	struct tcp_fastopen_context *ctxt;
256 	int ret;
257 	u32  user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
258 
259 	tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
260 	if (!tbl.data)
261 		return -ENOMEM;
262 
263 	rcu_read_lock();
264 	ctxt = rcu_dereference(tcp_fastopen_ctx);
265 	if (ctxt)
266 		memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
267 	else
268 		memset(user_key, 0, sizeof(user_key));
269 	rcu_read_unlock();
270 
271 	snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
272 		user_key[0], user_key[1], user_key[2], user_key[3]);
273 	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
274 
275 	if (write && ret == 0) {
276 		if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
277 			   user_key + 2, user_key + 3) != 4) {
278 			ret = -EINVAL;
279 			goto bad_key;
280 		}
281 		/* Generate a dummy secret but don't publish it. This
282 		 * is needed so we don't regenerate a new key on the
283 		 * first invocation of tcp_fastopen_cookie_gen
284 		 */
285 		tcp_fastopen_init_key_once(false);
286 		tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
287 	}
288 
289 bad_key:
290 	pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
291 	       user_key[0], user_key[1], user_key[2], user_key[3],
292 	       (char *)tbl.data, ret);
293 	kfree(tbl.data);
294 	return ret;
295 }
296 
297 static struct ctl_table ipv4_table[] = {
298 	{
299 		.procname	= "tcp_timestamps",
300 		.data		= &sysctl_tcp_timestamps,
301 		.maxlen		= sizeof(int),
302 		.mode		= 0644,
303 		.proc_handler	= proc_dointvec
304 	},
305 	{
306 		.procname	= "tcp_window_scaling",
307 		.data		= &sysctl_tcp_window_scaling,
308 		.maxlen		= sizeof(int),
309 		.mode		= 0644,
310 		.proc_handler	= proc_dointvec
311 	},
312 	{
313 		.procname	= "tcp_sack",
314 		.data		= &sysctl_tcp_sack,
315 		.maxlen		= sizeof(int),
316 		.mode		= 0644,
317 		.proc_handler	= proc_dointvec
318 	},
319 	{
320 		.procname	= "tcp_retrans_collapse",
321 		.data		= &sysctl_tcp_retrans_collapse,
322 		.maxlen		= sizeof(int),
323 		.mode		= 0644,
324 		.proc_handler	= proc_dointvec
325 	},
326 	{
327 		.procname	= "tcp_max_orphans",
328 		.data		= &sysctl_tcp_max_orphans,
329 		.maxlen		= sizeof(int),
330 		.mode		= 0644,
331 		.proc_handler	= proc_dointvec
332 	},
333 	{
334 		.procname	= "tcp_fastopen",
335 		.data		= &sysctl_tcp_fastopen,
336 		.maxlen		= sizeof(int),
337 		.mode		= 0644,
338 		.proc_handler	= proc_dointvec,
339 	},
340 	{
341 		.procname	= "tcp_fastopen_key",
342 		.mode		= 0600,
343 		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
344 		.proc_handler	= proc_tcp_fastopen_key,
345 	},
346 	{
347 		.procname	= "tcp_abort_on_overflow",
348 		.data		= &sysctl_tcp_abort_on_overflow,
349 		.maxlen		= sizeof(int),
350 		.mode		= 0644,
351 		.proc_handler	= proc_dointvec
352 	},
353 	{
354 		.procname	= "tcp_stdurg",
355 		.data		= &sysctl_tcp_stdurg,
356 		.maxlen		= sizeof(int),
357 		.mode		= 0644,
358 		.proc_handler	= proc_dointvec
359 	},
360 	{
361 		.procname	= "tcp_rfc1337",
362 		.data		= &sysctl_tcp_rfc1337,
363 		.maxlen		= sizeof(int),
364 		.mode		= 0644,
365 		.proc_handler	= proc_dointvec
366 	},
367 	{
368 		.procname	= "inet_peer_threshold",
369 		.data		= &inet_peer_threshold,
370 		.maxlen		= sizeof(int),
371 		.mode		= 0644,
372 		.proc_handler	= proc_dointvec
373 	},
374 	{
375 		.procname	= "inet_peer_minttl",
376 		.data		= &inet_peer_minttl,
377 		.maxlen		= sizeof(int),
378 		.mode		= 0644,
379 		.proc_handler	= proc_dointvec_jiffies,
380 	},
381 	{
382 		.procname	= "inet_peer_maxttl",
383 		.data		= &inet_peer_maxttl,
384 		.maxlen		= sizeof(int),
385 		.mode		= 0644,
386 		.proc_handler	= proc_dointvec_jiffies,
387 	},
388 	{
389 		.procname	= "tcp_fack",
390 		.data		= &sysctl_tcp_fack,
391 		.maxlen		= sizeof(int),
392 		.mode		= 0644,
393 		.proc_handler	= proc_dointvec
394 	},
395 	{
396 		.procname	= "tcp_recovery",
397 		.data		= &sysctl_tcp_recovery,
398 		.maxlen		= sizeof(int),
399 		.mode		= 0644,
400 		.proc_handler	= proc_dointvec,
401 	},
402 	{
403 		.procname	= "tcp_max_reordering",
404 		.data		= &sysctl_tcp_max_reordering,
405 		.maxlen		= sizeof(int),
406 		.mode		= 0644,
407 		.proc_handler	= proc_dointvec
408 	},
409 	{
410 		.procname	= "tcp_dsack",
411 		.data		= &sysctl_tcp_dsack,
412 		.maxlen		= sizeof(int),
413 		.mode		= 0644,
414 		.proc_handler	= proc_dointvec
415 	},
416 	{
417 		.procname	= "tcp_mem",
418 		.maxlen		= sizeof(sysctl_tcp_mem),
419 		.data		= &sysctl_tcp_mem,
420 		.mode		= 0644,
421 		.proc_handler	= proc_doulongvec_minmax,
422 	},
423 	{
424 		.procname	= "tcp_wmem",
425 		.data		= &sysctl_tcp_wmem,
426 		.maxlen		= sizeof(sysctl_tcp_wmem),
427 		.mode		= 0644,
428 		.proc_handler	= proc_dointvec_minmax,
429 		.extra1		= &one,
430 	},
431 	{
432 		.procname	= "tcp_rmem",
433 		.data		= &sysctl_tcp_rmem,
434 		.maxlen		= sizeof(sysctl_tcp_rmem),
435 		.mode		= 0644,
436 		.proc_handler	= proc_dointvec_minmax,
437 		.extra1		= &one,
438 	},
439 	{
440 		.procname	= "tcp_app_win",
441 		.data		= &sysctl_tcp_app_win,
442 		.maxlen		= sizeof(int),
443 		.mode		= 0644,
444 		.proc_handler	= proc_dointvec
445 	},
446 	{
447 		.procname	= "tcp_adv_win_scale",
448 		.data		= &sysctl_tcp_adv_win_scale,
449 		.maxlen		= sizeof(int),
450 		.mode		= 0644,
451 		.proc_handler	= proc_dointvec_minmax,
452 		.extra1		= &tcp_adv_win_scale_min,
453 		.extra2		= &tcp_adv_win_scale_max,
454 	},
455 	{
456 		.procname	= "tcp_frto",
457 		.data		= &sysctl_tcp_frto,
458 		.maxlen		= sizeof(int),
459 		.mode		= 0644,
460 		.proc_handler	= proc_dointvec
461 	},
462 	{
463 		.procname	= "tcp_min_rtt_wlen",
464 		.data		= &sysctl_tcp_min_rtt_wlen,
465 		.maxlen		= sizeof(int),
466 		.mode		= 0644,
467 		.proc_handler	= proc_dointvec
468 	},
469 	{
470 		.procname	= "tcp_low_latency",
471 		.data		= &sysctl_tcp_low_latency,
472 		.maxlen		= sizeof(int),
473 		.mode		= 0644,
474 		.proc_handler	= proc_dointvec
475 	},
476 	{
477 		.procname	= "tcp_no_metrics_save",
478 		.data		= &sysctl_tcp_nometrics_save,
479 		.maxlen		= sizeof(int),
480 		.mode		= 0644,
481 		.proc_handler	= proc_dointvec,
482 	},
483 	{
484 		.procname	= "tcp_moderate_rcvbuf",
485 		.data		= &sysctl_tcp_moderate_rcvbuf,
486 		.maxlen		= sizeof(int),
487 		.mode		= 0644,
488 		.proc_handler	= proc_dointvec,
489 	},
490 	{
491 		.procname	= "tcp_tso_win_divisor",
492 		.data		= &sysctl_tcp_tso_win_divisor,
493 		.maxlen		= sizeof(int),
494 		.mode		= 0644,
495 		.proc_handler	= proc_dointvec,
496 	},
497 	{
498 		.procname	= "tcp_congestion_control",
499 		.mode		= 0644,
500 		.maxlen		= TCP_CA_NAME_MAX,
501 		.proc_handler	= proc_tcp_congestion_control,
502 	},
503 	{
504 		.procname	= "tcp_workaround_signed_windows",
505 		.data		= &sysctl_tcp_workaround_signed_windows,
506 		.maxlen		= sizeof(int),
507 		.mode		= 0644,
508 		.proc_handler	= proc_dointvec
509 	},
510 	{
511 		.procname	= "tcp_limit_output_bytes",
512 		.data		= &sysctl_tcp_limit_output_bytes,
513 		.maxlen		= sizeof(int),
514 		.mode		= 0644,
515 		.proc_handler	= proc_dointvec
516 	},
517 	{
518 		.procname	= "tcp_challenge_ack_limit",
519 		.data		= &sysctl_tcp_challenge_ack_limit,
520 		.maxlen		= sizeof(int),
521 		.mode		= 0644,
522 		.proc_handler	= proc_dointvec
523 	},
524 	{
525 		.procname	= "tcp_slow_start_after_idle",
526 		.data		= &sysctl_tcp_slow_start_after_idle,
527 		.maxlen		= sizeof(int),
528 		.mode		= 0644,
529 		.proc_handler	= proc_dointvec
530 	},
531 #ifdef CONFIG_NETLABEL
532 	{
533 		.procname	= "cipso_cache_enable",
534 		.data		= &cipso_v4_cache_enabled,
535 		.maxlen		= sizeof(int),
536 		.mode		= 0644,
537 		.proc_handler	= proc_dointvec,
538 	},
539 	{
540 		.procname	= "cipso_cache_bucket_size",
541 		.data		= &cipso_v4_cache_bucketsize,
542 		.maxlen		= sizeof(int),
543 		.mode		= 0644,
544 		.proc_handler	= proc_dointvec,
545 	},
546 	{
547 		.procname	= "cipso_rbm_optfmt",
548 		.data		= &cipso_v4_rbm_optfmt,
549 		.maxlen		= sizeof(int),
550 		.mode		= 0644,
551 		.proc_handler	= proc_dointvec,
552 	},
553 	{
554 		.procname	= "cipso_rbm_strictvalid",
555 		.data		= &cipso_v4_rbm_strictvalid,
556 		.maxlen		= sizeof(int),
557 		.mode		= 0644,
558 		.proc_handler	= proc_dointvec,
559 	},
560 #endif /* CONFIG_NETLABEL */
561 	{
562 		.procname	= "tcp_available_congestion_control",
563 		.maxlen		= TCP_CA_BUF_MAX,
564 		.mode		= 0444,
565 		.proc_handler   = proc_tcp_available_congestion_control,
566 	},
567 	{
568 		.procname	= "tcp_allowed_congestion_control",
569 		.maxlen		= TCP_CA_BUF_MAX,
570 		.mode		= 0644,
571 		.proc_handler   = proc_allowed_congestion_control,
572 	},
573 	{
574 		.procname       = "tcp_thin_linear_timeouts",
575 		.data           = &sysctl_tcp_thin_linear_timeouts,
576 		.maxlen         = sizeof(int),
577 		.mode           = 0644,
578 		.proc_handler   = proc_dointvec
579 	},
580 	{
581 		.procname	= "tcp_early_retrans",
582 		.data		= &sysctl_tcp_early_retrans,
583 		.maxlen		= sizeof(int),
584 		.mode		= 0644,
585 		.proc_handler	= proc_dointvec_minmax,
586 		.extra1		= &zero,
587 		.extra2		= &four,
588 	},
589 	{
590 		.procname	= "tcp_min_tso_segs",
591 		.data		= &sysctl_tcp_min_tso_segs,
592 		.maxlen		= sizeof(int),
593 		.mode		= 0644,
594 		.proc_handler	= proc_dointvec_minmax,
595 		.extra1		= &one,
596 		.extra2		= &gso_max_segs,
597 	},
598 	{
599 		.procname	= "tcp_pacing_ss_ratio",
600 		.data		= &sysctl_tcp_pacing_ss_ratio,
601 		.maxlen		= sizeof(int),
602 		.mode		= 0644,
603 		.proc_handler	= proc_dointvec_minmax,
604 		.extra1		= &zero,
605 		.extra2		= &thousand,
606 	},
607 	{
608 		.procname	= "tcp_pacing_ca_ratio",
609 		.data		= &sysctl_tcp_pacing_ca_ratio,
610 		.maxlen		= sizeof(int),
611 		.mode		= 0644,
612 		.proc_handler	= proc_dointvec_minmax,
613 		.extra1		= &zero,
614 		.extra2		= &thousand,
615 	},
616 	{
617 		.procname	= "tcp_autocorking",
618 		.data		= &sysctl_tcp_autocorking,
619 		.maxlen		= sizeof(int),
620 		.mode		= 0644,
621 		.proc_handler	= proc_dointvec_minmax,
622 		.extra1		= &zero,
623 		.extra2		= &one,
624 	},
625 	{
626 		.procname	= "tcp_invalid_ratelimit",
627 		.data		= &sysctl_tcp_invalid_ratelimit,
628 		.maxlen		= sizeof(int),
629 		.mode		= 0644,
630 		.proc_handler	= proc_dointvec_ms_jiffies,
631 	},
632 	{
633 		.procname	= "icmp_msgs_per_sec",
634 		.data		= &sysctl_icmp_msgs_per_sec,
635 		.maxlen		= sizeof(int),
636 		.mode		= 0644,
637 		.proc_handler	= proc_dointvec_minmax,
638 		.extra1		= &zero,
639 	},
640 	{
641 		.procname	= "icmp_msgs_burst",
642 		.data		= &sysctl_icmp_msgs_burst,
643 		.maxlen		= sizeof(int),
644 		.mode		= 0644,
645 		.proc_handler	= proc_dointvec_minmax,
646 		.extra1		= &zero,
647 	},
648 	{
649 		.procname	= "udp_mem",
650 		.data		= &sysctl_udp_mem,
651 		.maxlen		= sizeof(sysctl_udp_mem),
652 		.mode		= 0644,
653 		.proc_handler	= proc_doulongvec_minmax,
654 	},
655 	{
656 		.procname	= "udp_rmem_min",
657 		.data		= &sysctl_udp_rmem_min,
658 		.maxlen		= sizeof(sysctl_udp_rmem_min),
659 		.mode		= 0644,
660 		.proc_handler	= proc_dointvec_minmax,
661 		.extra1		= &one
662 	},
663 	{
664 		.procname	= "udp_wmem_min",
665 		.data		= &sysctl_udp_wmem_min,
666 		.maxlen		= sizeof(sysctl_udp_wmem_min),
667 		.mode		= 0644,
668 		.proc_handler	= proc_dointvec_minmax,
669 		.extra1		= &one
670 	},
671 	{ }
672 };
673 
674 static struct ctl_table ipv4_net_table[] = {
675 	{
676 		.procname	= "icmp_echo_ignore_all",
677 		.data		= &init_net.ipv4.sysctl_icmp_echo_ignore_all,
678 		.maxlen		= sizeof(int),
679 		.mode		= 0644,
680 		.proc_handler	= proc_dointvec
681 	},
682 	{
683 		.procname	= "icmp_echo_ignore_broadcasts",
684 		.data		= &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
685 		.maxlen		= sizeof(int),
686 		.mode		= 0644,
687 		.proc_handler	= proc_dointvec
688 	},
689 	{
690 		.procname	= "icmp_ignore_bogus_error_responses",
691 		.data		= &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
692 		.maxlen		= sizeof(int),
693 		.mode		= 0644,
694 		.proc_handler	= proc_dointvec
695 	},
696 	{
697 		.procname	= "icmp_errors_use_inbound_ifaddr",
698 		.data		= &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
699 		.maxlen		= sizeof(int),
700 		.mode		= 0644,
701 		.proc_handler	= proc_dointvec
702 	},
703 	{
704 		.procname	= "icmp_ratelimit",
705 		.data		= &init_net.ipv4.sysctl_icmp_ratelimit,
706 		.maxlen		= sizeof(int),
707 		.mode		= 0644,
708 		.proc_handler	= proc_dointvec_ms_jiffies,
709 	},
710 	{
711 		.procname	= "icmp_ratemask",
712 		.data		= &init_net.ipv4.sysctl_icmp_ratemask,
713 		.maxlen		= sizeof(int),
714 		.mode		= 0644,
715 		.proc_handler	= proc_dointvec
716 	},
717 	{
718 		.procname	= "ping_group_range",
719 		.data		= &init_net.ipv4.ping_group_range.range,
720 		.maxlen		= sizeof(gid_t)*2,
721 		.mode		= 0644,
722 		.proc_handler	= ipv4_ping_group_range,
723 	},
724 	{
725 		.procname	= "tcp_ecn",
726 		.data		= &init_net.ipv4.sysctl_tcp_ecn,
727 		.maxlen		= sizeof(int),
728 		.mode		= 0644,
729 		.proc_handler	= proc_dointvec
730 	},
731 	{
732 		.procname	= "tcp_ecn_fallback",
733 		.data		= &init_net.ipv4.sysctl_tcp_ecn_fallback,
734 		.maxlen		= sizeof(int),
735 		.mode		= 0644,
736 		.proc_handler	= proc_dointvec
737 	},
738 	{
739 		.procname	= "ip_dynaddr",
740 		.data		= &init_net.ipv4.sysctl_ip_dynaddr,
741 		.maxlen		= sizeof(int),
742 		.mode		= 0644,
743 		.proc_handler	= proc_dointvec
744 	},
745 	{
746 		.procname	= "ip_early_demux",
747 		.data		= &init_net.ipv4.sysctl_ip_early_demux,
748 		.maxlen		= sizeof(int),
749 		.mode		= 0644,
750 		.proc_handler	= proc_dointvec
751 	},
752 	{
753 		.procname	= "ip_default_ttl",
754 		.data		= &init_net.ipv4.sysctl_ip_default_ttl,
755 		.maxlen		= sizeof(int),
756 		.mode		= 0644,
757 		.proc_handler	= proc_dointvec_minmax,
758 		.extra1		= &ip_ttl_min,
759 		.extra2		= &ip_ttl_max,
760 	},
761 	{
762 		.procname	= "ip_local_port_range",
763 		.maxlen		= sizeof(init_net.ipv4.ip_local_ports.range),
764 		.data		= &init_net.ipv4.ip_local_ports.range,
765 		.mode		= 0644,
766 		.proc_handler	= ipv4_local_port_range,
767 	},
768 	{
769 		.procname	= "ip_local_reserved_ports",
770 		.data		= &init_net.ipv4.sysctl_local_reserved_ports,
771 		.maxlen		= 65536,
772 		.mode		= 0644,
773 		.proc_handler	= proc_do_large_bitmap,
774 	},
775 	{
776 		.procname	= "ip_no_pmtu_disc",
777 		.data		= &init_net.ipv4.sysctl_ip_no_pmtu_disc,
778 		.maxlen		= sizeof(int),
779 		.mode		= 0644,
780 		.proc_handler	= proc_dointvec
781 	},
782 	{
783 		.procname	= "ip_forward_use_pmtu",
784 		.data		= &init_net.ipv4.sysctl_ip_fwd_use_pmtu,
785 		.maxlen		= sizeof(int),
786 		.mode		= 0644,
787 		.proc_handler	= proc_dointvec,
788 	},
789 	{
790 		.procname	= "ip_nonlocal_bind",
791 		.data		= &init_net.ipv4.sysctl_ip_nonlocal_bind,
792 		.maxlen		= sizeof(int),
793 		.mode		= 0644,
794 		.proc_handler	= proc_dointvec
795 	},
796 	{
797 		.procname	= "fwmark_reflect",
798 		.data		= &init_net.ipv4.sysctl_fwmark_reflect,
799 		.maxlen		= sizeof(int),
800 		.mode		= 0644,
801 		.proc_handler	= proc_dointvec,
802 	},
803 	{
804 		.procname	= "tcp_fwmark_accept",
805 		.data		= &init_net.ipv4.sysctl_tcp_fwmark_accept,
806 		.maxlen		= sizeof(int),
807 		.mode		= 0644,
808 		.proc_handler	= proc_dointvec,
809 	},
810 #ifdef CONFIG_NET_L3_MASTER_DEV
811 	{
812 		.procname	= "tcp_l3mdev_accept",
813 		.data		= &init_net.ipv4.sysctl_tcp_l3mdev_accept,
814 		.maxlen		= sizeof(int),
815 		.mode		= 0644,
816 		.proc_handler	= proc_dointvec_minmax,
817 		.extra1		= &zero,
818 		.extra2		= &one,
819 	},
820 #endif
821 	{
822 		.procname	= "tcp_mtu_probing",
823 		.data		= &init_net.ipv4.sysctl_tcp_mtu_probing,
824 		.maxlen		= sizeof(int),
825 		.mode		= 0644,
826 		.proc_handler	= proc_dointvec,
827 	},
828 	{
829 		.procname	= "tcp_base_mss",
830 		.data		= &init_net.ipv4.sysctl_tcp_base_mss,
831 		.maxlen		= sizeof(int),
832 		.mode		= 0644,
833 		.proc_handler	= proc_dointvec,
834 	},
835 	{
836 		.procname	= "tcp_probe_threshold",
837 		.data		= &init_net.ipv4.sysctl_tcp_probe_threshold,
838 		.maxlen		= sizeof(int),
839 		.mode		= 0644,
840 		.proc_handler	= proc_dointvec,
841 	},
842 	{
843 		.procname	= "tcp_probe_interval",
844 		.data		= &init_net.ipv4.sysctl_tcp_probe_interval,
845 		.maxlen		= sizeof(int),
846 		.mode		= 0644,
847 		.proc_handler	= proc_dointvec,
848 	},
849 	{
850 		.procname	= "igmp_link_local_mcast_reports",
851 		.data		= &init_net.ipv4.sysctl_igmp_llm_reports,
852 		.maxlen		= sizeof(int),
853 		.mode		= 0644,
854 		.proc_handler	= proc_dointvec
855 	},
856 	{
857 		.procname	= "igmp_max_memberships",
858 		.data		= &init_net.ipv4.sysctl_igmp_max_memberships,
859 		.maxlen		= sizeof(int),
860 		.mode		= 0644,
861 		.proc_handler	= proc_dointvec
862 	},
863 	{
864 		.procname	= "igmp_max_msf",
865 		.data		= &init_net.ipv4.sysctl_igmp_max_msf,
866 		.maxlen		= sizeof(int),
867 		.mode		= 0644,
868 		.proc_handler	= proc_dointvec
869 	},
870 #ifdef CONFIG_IP_MULTICAST
871 	{
872 		.procname	= "igmp_qrv",
873 		.data		= &init_net.ipv4.sysctl_igmp_qrv,
874 		.maxlen		= sizeof(int),
875 		.mode		= 0644,
876 		.proc_handler	= proc_dointvec_minmax,
877 		.extra1		= &one
878 	},
879 #endif
880 	{
881 		.procname	= "tcp_keepalive_time",
882 		.data		= &init_net.ipv4.sysctl_tcp_keepalive_time,
883 		.maxlen		= sizeof(int),
884 		.mode		= 0644,
885 		.proc_handler	= proc_dointvec_jiffies,
886 	},
887 	{
888 		.procname	= "tcp_keepalive_probes",
889 		.data		= &init_net.ipv4.sysctl_tcp_keepalive_probes,
890 		.maxlen		= sizeof(int),
891 		.mode		= 0644,
892 		.proc_handler	= proc_dointvec
893 	},
894 	{
895 		.procname	= "tcp_keepalive_intvl",
896 		.data		= &init_net.ipv4.sysctl_tcp_keepalive_intvl,
897 		.maxlen		= sizeof(int),
898 		.mode		= 0644,
899 		.proc_handler	= proc_dointvec_jiffies,
900 	},
901 	{
902 		.procname	= "tcp_syn_retries",
903 		.data		= &init_net.ipv4.sysctl_tcp_syn_retries,
904 		.maxlen		= sizeof(int),
905 		.mode		= 0644,
906 		.proc_handler	= proc_dointvec_minmax,
907 		.extra1		= &tcp_syn_retries_min,
908 		.extra2		= &tcp_syn_retries_max
909 	},
910 	{
911 		.procname	= "tcp_synack_retries",
912 		.data		= &init_net.ipv4.sysctl_tcp_synack_retries,
913 		.maxlen		= sizeof(int),
914 		.mode		= 0644,
915 		.proc_handler	= proc_dointvec
916 	},
917 #ifdef CONFIG_SYN_COOKIES
918 	{
919 		.procname	= "tcp_syncookies",
920 		.data		= &init_net.ipv4.sysctl_tcp_syncookies,
921 		.maxlen		= sizeof(int),
922 		.mode		= 0644,
923 		.proc_handler	= proc_dointvec
924 	},
925 #endif
926 	{
927 		.procname	= "tcp_reordering",
928 		.data		= &init_net.ipv4.sysctl_tcp_reordering,
929 		.maxlen		= sizeof(int),
930 		.mode		= 0644,
931 		.proc_handler	= proc_dointvec
932 	},
933 	{
934 		.procname	= "tcp_retries1",
935 		.data		= &init_net.ipv4.sysctl_tcp_retries1,
936 		.maxlen		= sizeof(int),
937 		.mode		= 0644,
938 		.proc_handler	= proc_dointvec_minmax,
939 		.extra2		= &tcp_retr1_max
940 	},
941 	{
942 		.procname	= "tcp_retries2",
943 		.data		= &init_net.ipv4.sysctl_tcp_retries2,
944 		.maxlen		= sizeof(int),
945 		.mode		= 0644,
946 		.proc_handler	= proc_dointvec
947 	},
948 	{
949 		.procname	= "tcp_orphan_retries",
950 		.data		= &init_net.ipv4.sysctl_tcp_orphan_retries,
951 		.maxlen		= sizeof(int),
952 		.mode		= 0644,
953 		.proc_handler	= proc_dointvec
954 	},
955 	{
956 		.procname	= "tcp_fin_timeout",
957 		.data		= &init_net.ipv4.sysctl_tcp_fin_timeout,
958 		.maxlen		= sizeof(int),
959 		.mode		= 0644,
960 		.proc_handler	= proc_dointvec_jiffies,
961 	},
962 	{
963 		.procname	= "tcp_notsent_lowat",
964 		.data		= &init_net.ipv4.sysctl_tcp_notsent_lowat,
965 		.maxlen		= sizeof(unsigned int),
966 		.mode		= 0644,
967 		.proc_handler	= proc_douintvec,
968 	},
969 	{
970 		.procname	= "tcp_tw_reuse",
971 		.data		= &init_net.ipv4.sysctl_tcp_tw_reuse,
972 		.maxlen		= sizeof(int),
973 		.mode		= 0644,
974 		.proc_handler	= proc_dointvec
975 	},
976 	{
977 		.procname	= "tcp_max_tw_buckets",
978 		.data		= &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
979 		.maxlen		= sizeof(int),
980 		.mode		= 0644,
981 		.proc_handler	= proc_dointvec
982 	},
983 	{
984 		.procname	= "tcp_tw_recycle",
985 		.data		= &init_net.ipv4.tcp_death_row.sysctl_tw_recycle,
986 		.maxlen		= sizeof(int),
987 		.mode		= 0644,
988 		.proc_handler	= proc_dointvec
989 	},
990 	{
991 		.procname	= "tcp_max_syn_backlog",
992 		.data		= &init_net.ipv4.sysctl_max_syn_backlog,
993 		.maxlen		= sizeof(int),
994 		.mode		= 0644,
995 		.proc_handler	= proc_dointvec
996 	},
997 #ifdef CONFIG_IP_ROUTE_MULTIPATH
998 	{
999 		.procname	= "fib_multipath_use_neigh",
1000 		.data		= &init_net.ipv4.sysctl_fib_multipath_use_neigh,
1001 		.maxlen		= sizeof(int),
1002 		.mode		= 0644,
1003 		.proc_handler	= proc_dointvec_minmax,
1004 		.extra1		= &zero,
1005 		.extra2		= &one,
1006 	},
1007 #endif
1008 	{
1009 		.procname	= "ip_unprivileged_port_start",
1010 		.maxlen		= sizeof(int),
1011 		.data		= &init_net.ipv4.sysctl_ip_prot_sock,
1012 		.mode		= 0644,
1013 		.proc_handler	= ipv4_privileged_ports,
1014 	},
1015 #ifdef CONFIG_NET_L3_MASTER_DEV
1016 	{
1017 		.procname	= "udp_l3mdev_accept",
1018 		.data		= &init_net.ipv4.sysctl_udp_l3mdev_accept,
1019 		.maxlen		= sizeof(int),
1020 		.mode		= 0644,
1021 		.proc_handler	= proc_dointvec_minmax,
1022 		.extra1		= &zero,
1023 		.extra2		= &one,
1024 	},
1025 #endif
1026 	{ }
1027 };
1028 
1029 static __net_init int ipv4_sysctl_init_net(struct net *net)
1030 {
1031 	struct ctl_table *table;
1032 
1033 	table = ipv4_net_table;
1034 	if (!net_eq(net, &init_net)) {
1035 		int i;
1036 
1037 		table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
1038 		if (!table)
1039 			goto err_alloc;
1040 
1041 		/* Update the variables to point into the current struct net */
1042 		for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++)
1043 			table[i].data += (void *)net - (void *)&init_net;
1044 	}
1045 
1046 	net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
1047 	if (!net->ipv4.ipv4_hdr)
1048 		goto err_reg;
1049 
1050 	net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
1051 	if (!net->ipv4.sysctl_local_reserved_ports)
1052 		goto err_ports;
1053 
1054 	return 0;
1055 
1056 err_ports:
1057 	unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
1058 err_reg:
1059 	if (!net_eq(net, &init_net))
1060 		kfree(table);
1061 err_alloc:
1062 	return -ENOMEM;
1063 }
1064 
1065 static __net_exit void ipv4_sysctl_exit_net(struct net *net)
1066 {
1067 	struct ctl_table *table;
1068 
1069 	kfree(net->ipv4.sysctl_local_reserved_ports);
1070 	table = net->ipv4.ipv4_hdr->ctl_table_arg;
1071 	unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
1072 	kfree(table);
1073 }
1074 
1075 static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
1076 	.init = ipv4_sysctl_init_net,
1077 	.exit = ipv4_sysctl_exit_net,
1078 };
1079 
1080 static __init int sysctl_ipv4_init(void)
1081 {
1082 	struct ctl_table_header *hdr;
1083 
1084 	hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
1085 	if (!hdr)
1086 		return -ENOMEM;
1087 
1088 	if (register_pernet_subsys(&ipv4_sysctl_ops)) {
1089 		unregister_net_sysctl_table(hdr);
1090 		return -ENOMEM;
1091 	}
1092 
1093 	return 0;
1094 }
1095 
1096 __initcall(sysctl_ipv4_init);
1097