xref: /openbmc/linux/kernel/sysctl.c (revision 64c70b1c)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/capability.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/capability.h>
31 #include <linux/smp_lock.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/hugetlb.h>
40 #include <linux/security.h>
41 #include <linux/initrd.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/nfs_fs.h>
47 #include <linux/acpi.h>
48 
49 #include <asm/uaccess.h>
50 #include <asm/processor.h>
51 
52 extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
53                      void __user *buffer, size_t *lenp, loff_t *ppos);
54 
55 #ifdef CONFIG_X86
56 #include <asm/nmi.h>
57 #include <asm/stacktrace.h>
58 #endif
59 
60 #if defined(CONFIG_SYSCTL)
61 
62 /* External variables not in a header file. */
63 extern int C_A_D;
64 extern int sysctl_overcommit_memory;
65 extern int sysctl_overcommit_ratio;
66 extern int sysctl_panic_on_oom;
67 extern int max_threads;
68 extern int core_uses_pid;
69 extern int suid_dumpable;
70 extern char core_pattern[];
71 extern int pid_max;
72 extern int min_free_kbytes;
73 extern int printk_ratelimit_jiffies;
74 extern int printk_ratelimit_burst;
75 extern int pid_max_min, pid_max_max;
76 extern int sysctl_drop_caches;
77 extern int percpu_pagelist_fraction;
78 extern int compat_log;
79 extern int maps_protect;
80 extern int sysctl_stat_interval;
81 
82 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
83 static int maxolduid = 65535;
84 static int minolduid;
85 static int min_percpu_pagelist_fract = 8;
86 
87 static int ngroups_max = NGROUPS_MAX;
88 
89 #ifdef CONFIG_KMOD
90 extern char modprobe_path[];
91 #endif
92 #ifdef CONFIG_CHR_DEV_SG
93 extern int sg_big_buff;
94 #endif
95 
96 #ifdef __sparc__
97 extern char reboot_command [];
98 extern int stop_a_enabled;
99 extern int scons_pwroff;
100 #endif
101 
102 #ifdef __hppa__
103 extern int pwrsw_enabled;
104 extern int unaligned_enabled;
105 #endif
106 
107 #ifdef CONFIG_S390
108 #ifdef CONFIG_MATHEMU
109 extern int sysctl_ieee_emulation_warnings;
110 #endif
111 extern int sysctl_userprocess_debug;
112 extern int spin_retry;
113 #endif
114 
115 extern int sysctl_hz_timer;
116 
117 #ifdef CONFIG_BSD_PROCESS_ACCT
118 extern int acct_parm[];
119 #endif
120 
121 #ifdef CONFIG_IA64
122 extern int no_unaligned_warning;
123 #endif
124 
125 #ifdef CONFIG_RT_MUTEXES
126 extern int max_lock_depth;
127 #endif
128 
129 #ifdef CONFIG_SYSCTL_SYSCALL
130 static int parse_table(int __user *, int, void __user *, size_t __user *,
131 		void __user *, size_t, ctl_table *);
132 #endif
133 
134 
135 #ifdef CONFIG_PROC_SYSCTL
136 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
137 		  void __user *buffer, size_t *lenp, loff_t *ppos);
138 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
139 			       void __user *buffer, size_t *lenp, loff_t *ppos);
140 #endif
141 
142 static ctl_table root_table[];
143 static struct ctl_table_header root_table_header =
144 	{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
145 
146 static ctl_table kern_table[];
147 static ctl_table vm_table[];
148 static ctl_table fs_table[];
149 static ctl_table debug_table[];
150 static ctl_table dev_table[];
151 extern ctl_table random_table[];
152 #ifdef CONFIG_UNIX98_PTYS
153 extern ctl_table pty_table[];
154 #endif
155 #ifdef CONFIG_INOTIFY_USER
156 extern ctl_table inotify_table[];
157 #endif
158 
159 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
160 int sysctl_legacy_va_layout;
161 #endif
162 
163 
164 /* The default sysctl tables: */
165 
166 static ctl_table root_table[] = {
167 	{
168 		.ctl_name	= CTL_KERN,
169 		.procname	= "kernel",
170 		.mode		= 0555,
171 		.child		= kern_table,
172 	},
173 	{
174 		.ctl_name	= CTL_VM,
175 		.procname	= "vm",
176 		.mode		= 0555,
177 		.child		= vm_table,
178 	},
179 #ifdef CONFIG_NET
180 	{
181 		.ctl_name	= CTL_NET,
182 		.procname	= "net",
183 		.mode		= 0555,
184 		.child		= net_table,
185 	},
186 #endif
187 	{
188 		.ctl_name	= CTL_FS,
189 		.procname	= "fs",
190 		.mode		= 0555,
191 		.child		= fs_table,
192 	},
193 	{
194 		.ctl_name	= CTL_DEBUG,
195 		.procname	= "debug",
196 		.mode		= 0555,
197 		.child		= debug_table,
198 	},
199 	{
200 		.ctl_name	= CTL_DEV,
201 		.procname	= "dev",
202 		.mode		= 0555,
203 		.child		= dev_table,
204 	},
205 
206 	{ .ctl_name = 0 }
207 };
208 
209 #ifdef CONFIG_SCHED_DEBUG
210 static unsigned long min_sched_granularity_ns = 100000;		/* 100 usecs */
211 static unsigned long max_sched_granularity_ns = 1000000000;	/* 1 second */
212 static unsigned long min_wakeup_granularity_ns;			/* 0 usecs */
213 static unsigned long max_wakeup_granularity_ns = 1000000000;	/* 1 second */
214 #endif
215 
216 static ctl_table kern_table[] = {
217 #ifdef CONFIG_SCHED_DEBUG
218 	{
219 		.ctl_name	= CTL_UNNUMBERED,
220 		.procname	= "sched_granularity_ns",
221 		.data		= &sysctl_sched_granularity,
222 		.maxlen		= sizeof(unsigned int),
223 		.mode		= 0644,
224 		.proc_handler	= &proc_dointvec_minmax,
225 		.strategy	= &sysctl_intvec,
226 		.extra1		= &min_sched_granularity_ns,
227 		.extra2		= &max_sched_granularity_ns,
228 	},
229 	{
230 		.ctl_name	= CTL_UNNUMBERED,
231 		.procname	= "sched_wakeup_granularity_ns",
232 		.data		= &sysctl_sched_wakeup_granularity,
233 		.maxlen		= sizeof(unsigned int),
234 		.mode		= 0644,
235 		.proc_handler	= &proc_dointvec_minmax,
236 		.strategy	= &sysctl_intvec,
237 		.extra1		= &min_wakeup_granularity_ns,
238 		.extra2		= &max_wakeup_granularity_ns,
239 	},
240 	{
241 		.ctl_name	= CTL_UNNUMBERED,
242 		.procname	= "sched_batch_wakeup_granularity_ns",
243 		.data		= &sysctl_sched_batch_wakeup_granularity,
244 		.maxlen		= sizeof(unsigned int),
245 		.mode		= 0644,
246 		.proc_handler	= &proc_dointvec_minmax,
247 		.strategy	= &sysctl_intvec,
248 		.extra1		= &min_wakeup_granularity_ns,
249 		.extra2		= &max_wakeup_granularity_ns,
250 	},
251 	{
252 		.ctl_name	= CTL_UNNUMBERED,
253 		.procname	= "sched_stat_granularity_ns",
254 		.data		= &sysctl_sched_stat_granularity,
255 		.maxlen		= sizeof(unsigned int),
256 		.mode		= 0644,
257 		.proc_handler	= &proc_dointvec_minmax,
258 		.strategy	= &sysctl_intvec,
259 		.extra1		= &min_wakeup_granularity_ns,
260 		.extra2		= &max_wakeup_granularity_ns,
261 	},
262 	{
263 		.ctl_name	= CTL_UNNUMBERED,
264 		.procname	= "sched_runtime_limit_ns",
265 		.data		= &sysctl_sched_runtime_limit,
266 		.maxlen		= sizeof(unsigned int),
267 		.mode		= 0644,
268 		.proc_handler	= &proc_dointvec_minmax,
269 		.strategy	= &sysctl_intvec,
270 		.extra1		= &min_sched_granularity_ns,
271 		.extra2		= &max_sched_granularity_ns,
272 	},
273 	{
274 		.ctl_name	= CTL_UNNUMBERED,
275 		.procname	= "sched_child_runs_first",
276 		.data		= &sysctl_sched_child_runs_first,
277 		.maxlen		= sizeof(unsigned int),
278 		.mode		= 0644,
279 		.proc_handler	= &proc_dointvec,
280 	},
281 	{
282 		.ctl_name	= CTL_UNNUMBERED,
283 		.procname	= "sched_features",
284 		.data		= &sysctl_sched_features,
285 		.maxlen		= sizeof(unsigned int),
286 		.mode		= 0644,
287 		.proc_handler	= &proc_dointvec,
288 	},
289 #endif
290 	{
291 		.ctl_name	= KERN_PANIC,
292 		.procname	= "panic",
293 		.data		= &panic_timeout,
294 		.maxlen		= sizeof(int),
295 		.mode		= 0644,
296 		.proc_handler	= &proc_dointvec,
297 	},
298 	{
299 		.ctl_name	= KERN_CORE_USES_PID,
300 		.procname	= "core_uses_pid",
301 		.data		= &core_uses_pid,
302 		.maxlen		= sizeof(int),
303 		.mode		= 0644,
304 		.proc_handler	= &proc_dointvec,
305 	},
306 	{
307 		.ctl_name	= KERN_CORE_PATTERN,
308 		.procname	= "core_pattern",
309 		.data		= core_pattern,
310 		.maxlen		= CORENAME_MAX_SIZE,
311 		.mode		= 0644,
312 		.proc_handler	= &proc_dostring,
313 		.strategy	= &sysctl_string,
314 	},
315 #ifdef CONFIG_PROC_SYSCTL
316 	{
317 		.ctl_name	= KERN_TAINTED,
318 		.procname	= "tainted",
319 		.data		= &tainted,
320 		.maxlen		= sizeof(int),
321 		.mode		= 0644,
322 		.proc_handler	= &proc_dointvec_taint,
323 	},
324 #endif
325 	{
326 		.ctl_name	= KERN_CAP_BSET,
327 		.procname	= "cap-bound",
328 		.data		= &cap_bset,
329 		.maxlen		= sizeof(kernel_cap_t),
330 		.mode		= 0600,
331 		.proc_handler	= &proc_dointvec_bset,
332 	},
333 #ifdef CONFIG_BLK_DEV_INITRD
334 	{
335 		.ctl_name	= KERN_REALROOTDEV,
336 		.procname	= "real-root-dev",
337 		.data		= &real_root_dev,
338 		.maxlen		= sizeof(int),
339 		.mode		= 0644,
340 		.proc_handler	= &proc_dointvec,
341 	},
342 #endif
343 #ifdef __sparc__
344 	{
345 		.ctl_name	= KERN_SPARC_REBOOT,
346 		.procname	= "reboot-cmd",
347 		.data		= reboot_command,
348 		.maxlen		= 256,
349 		.mode		= 0644,
350 		.proc_handler	= &proc_dostring,
351 		.strategy	= &sysctl_string,
352 	},
353 	{
354 		.ctl_name	= KERN_SPARC_STOP_A,
355 		.procname	= "stop-a",
356 		.data		= &stop_a_enabled,
357 		.maxlen		= sizeof (int),
358 		.mode		= 0644,
359 		.proc_handler	= &proc_dointvec,
360 	},
361 	{
362 		.ctl_name	= KERN_SPARC_SCONS_PWROFF,
363 		.procname	= "scons-poweroff",
364 		.data		= &scons_pwroff,
365 		.maxlen		= sizeof (int),
366 		.mode		= 0644,
367 		.proc_handler	= &proc_dointvec,
368 	},
369 #endif
370 #ifdef __hppa__
371 	{
372 		.ctl_name	= KERN_HPPA_PWRSW,
373 		.procname	= "soft-power",
374 		.data		= &pwrsw_enabled,
375 		.maxlen		= sizeof (int),
376 	 	.mode		= 0644,
377 		.proc_handler	= &proc_dointvec,
378 	},
379 	{
380 		.ctl_name	= KERN_HPPA_UNALIGNED,
381 		.procname	= "unaligned-trap",
382 		.data		= &unaligned_enabled,
383 		.maxlen		= sizeof (int),
384 		.mode		= 0644,
385 		.proc_handler	= &proc_dointvec,
386 	},
387 #endif
388 	{
389 		.ctl_name	= KERN_CTLALTDEL,
390 		.procname	= "ctrl-alt-del",
391 		.data		= &C_A_D,
392 		.maxlen		= sizeof(int),
393 		.mode		= 0644,
394 		.proc_handler	= &proc_dointvec,
395 	},
396 	{
397 		.ctl_name	= KERN_PRINTK,
398 		.procname	= "printk",
399 		.data		= &console_loglevel,
400 		.maxlen		= 4*sizeof(int),
401 		.mode		= 0644,
402 		.proc_handler	= &proc_dointvec,
403 	},
404 #ifdef CONFIG_KMOD
405 	{
406 		.ctl_name	= KERN_MODPROBE,
407 		.procname	= "modprobe",
408 		.data		= &modprobe_path,
409 		.maxlen		= KMOD_PATH_LEN,
410 		.mode		= 0644,
411 		.proc_handler	= &proc_dostring,
412 		.strategy	= &sysctl_string,
413 	},
414 #endif
415 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
416 	{
417 		.ctl_name	= KERN_HOTPLUG,
418 		.procname	= "hotplug",
419 		.data		= &uevent_helper,
420 		.maxlen		= UEVENT_HELPER_PATH_LEN,
421 		.mode		= 0644,
422 		.proc_handler	= &proc_dostring,
423 		.strategy	= &sysctl_string,
424 	},
425 #endif
426 #ifdef CONFIG_CHR_DEV_SG
427 	{
428 		.ctl_name	= KERN_SG_BIG_BUFF,
429 		.procname	= "sg-big-buff",
430 		.data		= &sg_big_buff,
431 		.maxlen		= sizeof (int),
432 		.mode		= 0444,
433 		.proc_handler	= &proc_dointvec,
434 	},
435 #endif
436 #ifdef CONFIG_BSD_PROCESS_ACCT
437 	{
438 		.ctl_name	= KERN_ACCT,
439 		.procname	= "acct",
440 		.data		= &acct_parm,
441 		.maxlen		= 3*sizeof(int),
442 		.mode		= 0644,
443 		.proc_handler	= &proc_dointvec,
444 	},
445 #endif
446 #ifdef CONFIG_MAGIC_SYSRQ
447 	{
448 		.ctl_name	= KERN_SYSRQ,
449 		.procname	= "sysrq",
450 		.data		= &__sysrq_enabled,
451 		.maxlen		= sizeof (int),
452 		.mode		= 0644,
453 		.proc_handler	= &proc_dointvec,
454 	},
455 #endif
456 #ifdef CONFIG_PROC_SYSCTL
457 	{
458 		.ctl_name	= KERN_CADPID,
459 		.procname	= "cad_pid",
460 		.data		= NULL,
461 		.maxlen		= sizeof (int),
462 		.mode		= 0600,
463 		.proc_handler	= &proc_do_cad_pid,
464 	},
465 #endif
466 	{
467 		.ctl_name	= KERN_MAX_THREADS,
468 		.procname	= "threads-max",
469 		.data		= &max_threads,
470 		.maxlen		= sizeof(int),
471 		.mode		= 0644,
472 		.proc_handler	= &proc_dointvec,
473 	},
474 	{
475 		.ctl_name	= KERN_RANDOM,
476 		.procname	= "random",
477 		.mode		= 0555,
478 		.child		= random_table,
479 	},
480 #ifdef CONFIG_UNIX98_PTYS
481 	{
482 		.ctl_name	= KERN_PTY,
483 		.procname	= "pty",
484 		.mode		= 0555,
485 		.child		= pty_table,
486 	},
487 #endif
488 	{
489 		.ctl_name	= KERN_OVERFLOWUID,
490 		.procname	= "overflowuid",
491 		.data		= &overflowuid,
492 		.maxlen		= sizeof(int),
493 		.mode		= 0644,
494 		.proc_handler	= &proc_dointvec_minmax,
495 		.strategy	= &sysctl_intvec,
496 		.extra1		= &minolduid,
497 		.extra2		= &maxolduid,
498 	},
499 	{
500 		.ctl_name	= KERN_OVERFLOWGID,
501 		.procname	= "overflowgid",
502 		.data		= &overflowgid,
503 		.maxlen		= sizeof(int),
504 		.mode		= 0644,
505 		.proc_handler	= &proc_dointvec_minmax,
506 		.strategy	= &sysctl_intvec,
507 		.extra1		= &minolduid,
508 		.extra2		= &maxolduid,
509 	},
510 #ifdef CONFIG_S390
511 #ifdef CONFIG_MATHEMU
512 	{
513 		.ctl_name	= KERN_IEEE_EMULATION_WARNINGS,
514 		.procname	= "ieee_emulation_warnings",
515 		.data		= &sysctl_ieee_emulation_warnings,
516 		.maxlen		= sizeof(int),
517 		.mode		= 0644,
518 		.proc_handler	= &proc_dointvec,
519 	},
520 #endif
521 #ifdef CONFIG_NO_IDLE_HZ
522 	{
523 		.ctl_name       = KERN_HZ_TIMER,
524 		.procname       = "hz_timer",
525 		.data           = &sysctl_hz_timer,
526 		.maxlen         = sizeof(int),
527 		.mode           = 0644,
528 		.proc_handler   = &proc_dointvec,
529 	},
530 #endif
531 	{
532 		.ctl_name	= KERN_S390_USER_DEBUG_LOGGING,
533 		.procname	= "userprocess_debug",
534 		.data		= &sysctl_userprocess_debug,
535 		.maxlen		= sizeof(int),
536 		.mode		= 0644,
537 		.proc_handler	= &proc_dointvec,
538 	},
539 #endif
540 	{
541 		.ctl_name	= KERN_PIDMAX,
542 		.procname	= "pid_max",
543 		.data		= &pid_max,
544 		.maxlen		= sizeof (int),
545 		.mode		= 0644,
546 		.proc_handler	= &proc_dointvec_minmax,
547 		.strategy	= sysctl_intvec,
548 		.extra1		= &pid_max_min,
549 		.extra2		= &pid_max_max,
550 	},
551 	{
552 		.ctl_name	= KERN_PANIC_ON_OOPS,
553 		.procname	= "panic_on_oops",
554 		.data		= &panic_on_oops,
555 		.maxlen		= sizeof(int),
556 		.mode		= 0644,
557 		.proc_handler	= &proc_dointvec,
558 	},
559 	{
560 		.ctl_name	= KERN_PRINTK_RATELIMIT,
561 		.procname	= "printk_ratelimit",
562 		.data		= &printk_ratelimit_jiffies,
563 		.maxlen		= sizeof(int),
564 		.mode		= 0644,
565 		.proc_handler	= &proc_dointvec_jiffies,
566 		.strategy	= &sysctl_jiffies,
567 	},
568 	{
569 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
570 		.procname	= "printk_ratelimit_burst",
571 		.data		= &printk_ratelimit_burst,
572 		.maxlen		= sizeof(int),
573 		.mode		= 0644,
574 		.proc_handler	= &proc_dointvec,
575 	},
576 	{
577 		.ctl_name	= KERN_NGROUPS_MAX,
578 		.procname	= "ngroups_max",
579 		.data		= &ngroups_max,
580 		.maxlen		= sizeof (int),
581 		.mode		= 0444,
582 		.proc_handler	= &proc_dointvec,
583 	},
584 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
585 	{
586 		.ctl_name       = KERN_UNKNOWN_NMI_PANIC,
587 		.procname       = "unknown_nmi_panic",
588 		.data           = &unknown_nmi_panic,
589 		.maxlen         = sizeof (int),
590 		.mode           = 0644,
591 		.proc_handler   = &proc_dointvec,
592 	},
593 	{
594 		.ctl_name       = KERN_NMI_WATCHDOG,
595 		.procname       = "nmi_watchdog",
596 		.data           = &nmi_watchdog_enabled,
597 		.maxlen         = sizeof (int),
598 		.mode           = 0644,
599 		.proc_handler   = &proc_nmi_enabled,
600 	},
601 #endif
602 #if defined(CONFIG_X86)
603 	{
604 		.ctl_name	= KERN_PANIC_ON_NMI,
605 		.procname	= "panic_on_unrecovered_nmi",
606 		.data		= &panic_on_unrecovered_nmi,
607 		.maxlen		= sizeof(int),
608 		.mode		= 0644,
609 		.proc_handler	= &proc_dointvec,
610 	},
611 	{
612 		.ctl_name	= KERN_BOOTLOADER_TYPE,
613 		.procname	= "bootloader_type",
614 		.data		= &bootloader_type,
615 		.maxlen		= sizeof (int),
616 		.mode		= 0444,
617 		.proc_handler	= &proc_dointvec,
618 	},
619 	{
620 		.ctl_name	= CTL_UNNUMBERED,
621 		.procname	= "kstack_depth_to_print",
622 		.data		= &kstack_depth_to_print,
623 		.maxlen		= sizeof(int),
624 		.mode		= 0644,
625 		.proc_handler	= &proc_dointvec,
626 	},
627 #endif
628 #if defined(CONFIG_MMU)
629 	{
630 		.ctl_name	= KERN_RANDOMIZE,
631 		.procname	= "randomize_va_space",
632 		.data		= &randomize_va_space,
633 		.maxlen		= sizeof(int),
634 		.mode		= 0644,
635 		.proc_handler	= &proc_dointvec,
636 	},
637 #endif
638 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
639 	{
640 		.ctl_name	= KERN_SPIN_RETRY,
641 		.procname	= "spin_retry",
642 		.data		= &spin_retry,
643 		.maxlen		= sizeof (int),
644 		.mode		= 0644,
645 		.proc_handler	= &proc_dointvec,
646 	},
647 #endif
648 #ifdef CONFIG_ACPI_SLEEP
649 	{
650 		.ctl_name	= KERN_ACPI_VIDEO_FLAGS,
651 		.procname	= "acpi_video_flags",
652 		.data		= &acpi_video_flags,
653 		.maxlen		= sizeof (unsigned long),
654 		.mode		= 0644,
655 		.proc_handler	= &proc_doulongvec_minmax,
656 	},
657 #endif
658 #ifdef CONFIG_IA64
659 	{
660 		.ctl_name	= KERN_IA64_UNALIGNED,
661 		.procname	= "ignore-unaligned-usertrap",
662 		.data		= &no_unaligned_warning,
663 		.maxlen		= sizeof (int),
664 	 	.mode		= 0644,
665 		.proc_handler	= &proc_dointvec,
666 	},
667 #endif
668 #ifdef CONFIG_COMPAT
669 	{
670 		.ctl_name	= KERN_COMPAT_LOG,
671 		.procname	= "compat-log",
672 		.data		= &compat_log,
673 		.maxlen		= sizeof (int),
674 	 	.mode		= 0644,
675 		.proc_handler	= &proc_dointvec,
676 	},
677 #endif
678 #ifdef CONFIG_RT_MUTEXES
679 	{
680 		.ctl_name	= KERN_MAX_LOCK_DEPTH,
681 		.procname	= "max_lock_depth",
682 		.data		= &max_lock_depth,
683 		.maxlen		= sizeof(int),
684 		.mode		= 0644,
685 		.proc_handler	= &proc_dointvec,
686 	},
687 #endif
688 #ifdef CONFIG_PROC_FS
689 	{
690 		.ctl_name       = CTL_UNNUMBERED,
691 		.procname       = "maps_protect",
692 		.data           = &maps_protect,
693 		.maxlen         = sizeof(int),
694 		.mode           = 0644,
695 		.proc_handler   = &proc_dointvec,
696 	},
697 #endif
698 
699 	{ .ctl_name = 0 }
700 };
701 
702 /* Constants for minimum and maximum testing in vm_table.
703    We use these as one-element integer vectors. */
704 static int zero;
705 static int one_hundred = 100;
706 
707 
708 static ctl_table vm_table[] = {
709 	{
710 		.ctl_name	= VM_OVERCOMMIT_MEMORY,
711 		.procname	= "overcommit_memory",
712 		.data		= &sysctl_overcommit_memory,
713 		.maxlen		= sizeof(sysctl_overcommit_memory),
714 		.mode		= 0644,
715 		.proc_handler	= &proc_dointvec,
716 	},
717 	{
718 		.ctl_name	= VM_PANIC_ON_OOM,
719 		.procname	= "panic_on_oom",
720 		.data		= &sysctl_panic_on_oom,
721 		.maxlen		= sizeof(sysctl_panic_on_oom),
722 		.mode		= 0644,
723 		.proc_handler	= &proc_dointvec,
724 	},
725 	{
726 		.ctl_name	= VM_OVERCOMMIT_RATIO,
727 		.procname	= "overcommit_ratio",
728 		.data		= &sysctl_overcommit_ratio,
729 		.maxlen		= sizeof(sysctl_overcommit_ratio),
730 		.mode		= 0644,
731 		.proc_handler	= &proc_dointvec,
732 	},
733 	{
734 		.ctl_name	= VM_PAGE_CLUSTER,
735 		.procname	= "page-cluster",
736 		.data		= &page_cluster,
737 		.maxlen		= sizeof(int),
738 		.mode		= 0644,
739 		.proc_handler	= &proc_dointvec,
740 	},
741 	{
742 		.ctl_name	= VM_DIRTY_BACKGROUND,
743 		.procname	= "dirty_background_ratio",
744 		.data		= &dirty_background_ratio,
745 		.maxlen		= sizeof(dirty_background_ratio),
746 		.mode		= 0644,
747 		.proc_handler	= &proc_dointvec_minmax,
748 		.strategy	= &sysctl_intvec,
749 		.extra1		= &zero,
750 		.extra2		= &one_hundred,
751 	},
752 	{
753 		.ctl_name	= VM_DIRTY_RATIO,
754 		.procname	= "dirty_ratio",
755 		.data		= &vm_dirty_ratio,
756 		.maxlen		= sizeof(vm_dirty_ratio),
757 		.mode		= 0644,
758 		.proc_handler	= &proc_dointvec_minmax,
759 		.strategy	= &sysctl_intvec,
760 		.extra1		= &zero,
761 		.extra2		= &one_hundred,
762 	},
763 	{
764 		.ctl_name	= VM_DIRTY_WB_CS,
765 		.procname	= "dirty_writeback_centisecs",
766 		.data		= &dirty_writeback_interval,
767 		.maxlen		= sizeof(dirty_writeback_interval),
768 		.mode		= 0644,
769 		.proc_handler	= &dirty_writeback_centisecs_handler,
770 	},
771 	{
772 		.ctl_name	= VM_DIRTY_EXPIRE_CS,
773 		.procname	= "dirty_expire_centisecs",
774 		.data		= &dirty_expire_interval,
775 		.maxlen		= sizeof(dirty_expire_interval),
776 		.mode		= 0644,
777 		.proc_handler	= &proc_dointvec_userhz_jiffies,
778 	},
779 	{
780 		.ctl_name	= VM_NR_PDFLUSH_THREADS,
781 		.procname	= "nr_pdflush_threads",
782 		.data		= &nr_pdflush_threads,
783 		.maxlen		= sizeof nr_pdflush_threads,
784 		.mode		= 0444 /* read-only*/,
785 		.proc_handler	= &proc_dointvec,
786 	},
787 	{
788 		.ctl_name	= VM_SWAPPINESS,
789 		.procname	= "swappiness",
790 		.data		= &vm_swappiness,
791 		.maxlen		= sizeof(vm_swappiness),
792 		.mode		= 0644,
793 		.proc_handler	= &proc_dointvec_minmax,
794 		.strategy	= &sysctl_intvec,
795 		.extra1		= &zero,
796 		.extra2		= &one_hundred,
797 	},
798 #ifdef CONFIG_HUGETLB_PAGE
799 	 {
800 		.ctl_name	= VM_HUGETLB_PAGES,
801 		.procname	= "nr_hugepages",
802 		.data		= &max_huge_pages,
803 		.maxlen		= sizeof(unsigned long),
804 		.mode		= 0644,
805 		.proc_handler	= &hugetlb_sysctl_handler,
806 		.extra1		= (void *)&hugetlb_zero,
807 		.extra2		= (void *)&hugetlb_infinity,
808 	 },
809 	 {
810 		.ctl_name	= VM_HUGETLB_GROUP,
811 		.procname	= "hugetlb_shm_group",
812 		.data		= &sysctl_hugetlb_shm_group,
813 		.maxlen		= sizeof(gid_t),
814 		.mode		= 0644,
815 		.proc_handler	= &proc_dointvec,
816 	 },
817 #endif
818 	{
819 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
820 		.procname	= "lowmem_reserve_ratio",
821 		.data		= &sysctl_lowmem_reserve_ratio,
822 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
823 		.mode		= 0644,
824 		.proc_handler	= &lowmem_reserve_ratio_sysctl_handler,
825 		.strategy	= &sysctl_intvec,
826 	},
827 	{
828 		.ctl_name	= VM_DROP_PAGECACHE,
829 		.procname	= "drop_caches",
830 		.data		= &sysctl_drop_caches,
831 		.maxlen		= sizeof(int),
832 		.mode		= 0644,
833 		.proc_handler	= drop_caches_sysctl_handler,
834 		.strategy	= &sysctl_intvec,
835 	},
836 	{
837 		.ctl_name	= VM_MIN_FREE_KBYTES,
838 		.procname	= "min_free_kbytes",
839 		.data		= &min_free_kbytes,
840 		.maxlen		= sizeof(min_free_kbytes),
841 		.mode		= 0644,
842 		.proc_handler	= &min_free_kbytes_sysctl_handler,
843 		.strategy	= &sysctl_intvec,
844 		.extra1		= &zero,
845 	},
846 	{
847 		.ctl_name	= VM_PERCPU_PAGELIST_FRACTION,
848 		.procname	= "percpu_pagelist_fraction",
849 		.data		= &percpu_pagelist_fraction,
850 		.maxlen		= sizeof(percpu_pagelist_fraction),
851 		.mode		= 0644,
852 		.proc_handler	= &percpu_pagelist_fraction_sysctl_handler,
853 		.strategy	= &sysctl_intvec,
854 		.extra1		= &min_percpu_pagelist_fract,
855 	},
856 #ifdef CONFIG_MMU
857 	{
858 		.ctl_name	= VM_MAX_MAP_COUNT,
859 		.procname	= "max_map_count",
860 		.data		= &sysctl_max_map_count,
861 		.maxlen		= sizeof(sysctl_max_map_count),
862 		.mode		= 0644,
863 		.proc_handler	= &proc_dointvec
864 	},
865 #endif
866 	{
867 		.ctl_name	= VM_LAPTOP_MODE,
868 		.procname	= "laptop_mode",
869 		.data		= &laptop_mode,
870 		.maxlen		= sizeof(laptop_mode),
871 		.mode		= 0644,
872 		.proc_handler	= &proc_dointvec_jiffies,
873 		.strategy	= &sysctl_jiffies,
874 	},
875 	{
876 		.ctl_name	= VM_BLOCK_DUMP,
877 		.procname	= "block_dump",
878 		.data		= &block_dump,
879 		.maxlen		= sizeof(block_dump),
880 		.mode		= 0644,
881 		.proc_handler	= &proc_dointvec,
882 		.strategy	= &sysctl_intvec,
883 		.extra1		= &zero,
884 	},
885 	{
886 		.ctl_name	= VM_VFS_CACHE_PRESSURE,
887 		.procname	= "vfs_cache_pressure",
888 		.data		= &sysctl_vfs_cache_pressure,
889 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
890 		.mode		= 0644,
891 		.proc_handler	= &proc_dointvec,
892 		.strategy	= &sysctl_intvec,
893 		.extra1		= &zero,
894 	},
895 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
896 	{
897 		.ctl_name	= VM_LEGACY_VA_LAYOUT,
898 		.procname	= "legacy_va_layout",
899 		.data		= &sysctl_legacy_va_layout,
900 		.maxlen		= sizeof(sysctl_legacy_va_layout),
901 		.mode		= 0644,
902 		.proc_handler	= &proc_dointvec,
903 		.strategy	= &sysctl_intvec,
904 		.extra1		= &zero,
905 	},
906 #endif
907 #ifdef CONFIG_NUMA
908 	{
909 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
910 		.procname	= "zone_reclaim_mode",
911 		.data		= &zone_reclaim_mode,
912 		.maxlen		= sizeof(zone_reclaim_mode),
913 		.mode		= 0644,
914 		.proc_handler	= &proc_dointvec,
915 		.strategy	= &sysctl_intvec,
916 		.extra1		= &zero,
917 	},
918 	{
919 		.ctl_name	= VM_MIN_UNMAPPED,
920 		.procname	= "min_unmapped_ratio",
921 		.data		= &sysctl_min_unmapped_ratio,
922 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
923 		.mode		= 0644,
924 		.proc_handler	= &sysctl_min_unmapped_ratio_sysctl_handler,
925 		.strategy	= &sysctl_intvec,
926 		.extra1		= &zero,
927 		.extra2		= &one_hundred,
928 	},
929 	{
930 		.ctl_name	= VM_MIN_SLAB,
931 		.procname	= "min_slab_ratio",
932 		.data		= &sysctl_min_slab_ratio,
933 		.maxlen		= sizeof(sysctl_min_slab_ratio),
934 		.mode		= 0644,
935 		.proc_handler	= &sysctl_min_slab_ratio_sysctl_handler,
936 		.strategy	= &sysctl_intvec,
937 		.extra1		= &zero,
938 		.extra2		= &one_hundred,
939 	},
940 #endif
941 #ifdef CONFIG_SMP
942 	{
943 		.ctl_name	= CTL_UNNUMBERED,
944 		.procname	= "stat_interval",
945 		.data		= &sysctl_stat_interval,
946 		.maxlen		= sizeof(sysctl_stat_interval),
947 		.mode		= 0644,
948 		.proc_handler	= &proc_dointvec_jiffies,
949 		.strategy	= &sysctl_jiffies,
950 	},
951 #endif
952 #if defined(CONFIG_X86_32) || \
953    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
954 	{
955 		.ctl_name	= VM_VDSO_ENABLED,
956 		.procname	= "vdso_enabled",
957 		.data		= &vdso_enabled,
958 		.maxlen		= sizeof(vdso_enabled),
959 		.mode		= 0644,
960 		.proc_handler	= &proc_dointvec,
961 		.strategy	= &sysctl_intvec,
962 		.extra1		= &zero,
963 	},
964 #endif
965 	{ .ctl_name = 0 }
966 };
967 
968 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
969 static ctl_table binfmt_misc_table[] = {
970 	{ .ctl_name = 0 }
971 };
972 #endif
973 
974 static ctl_table fs_table[] = {
975 	{
976 		.ctl_name	= FS_NRINODE,
977 		.procname	= "inode-nr",
978 		.data		= &inodes_stat,
979 		.maxlen		= 2*sizeof(int),
980 		.mode		= 0444,
981 		.proc_handler	= &proc_dointvec,
982 	},
983 	{
984 		.ctl_name	= FS_STATINODE,
985 		.procname	= "inode-state",
986 		.data		= &inodes_stat,
987 		.maxlen		= 7*sizeof(int),
988 		.mode		= 0444,
989 		.proc_handler	= &proc_dointvec,
990 	},
991 	{
992 		.ctl_name	= FS_NRFILE,
993 		.procname	= "file-nr",
994 		.data		= &files_stat,
995 		.maxlen		= 3*sizeof(int),
996 		.mode		= 0444,
997 		.proc_handler	= &proc_nr_files,
998 	},
999 	{
1000 		.ctl_name	= FS_MAXFILE,
1001 		.procname	= "file-max",
1002 		.data		= &files_stat.max_files,
1003 		.maxlen		= sizeof(int),
1004 		.mode		= 0644,
1005 		.proc_handler	= &proc_dointvec,
1006 	},
1007 	{
1008 		.ctl_name	= FS_DENTRY,
1009 		.procname	= "dentry-state",
1010 		.data		= &dentry_stat,
1011 		.maxlen		= 6*sizeof(int),
1012 		.mode		= 0444,
1013 		.proc_handler	= &proc_dointvec,
1014 	},
1015 	{
1016 		.ctl_name	= FS_OVERFLOWUID,
1017 		.procname	= "overflowuid",
1018 		.data		= &fs_overflowuid,
1019 		.maxlen		= sizeof(int),
1020 		.mode		= 0644,
1021 		.proc_handler	= &proc_dointvec_minmax,
1022 		.strategy	= &sysctl_intvec,
1023 		.extra1		= &minolduid,
1024 		.extra2		= &maxolduid,
1025 	},
1026 	{
1027 		.ctl_name	= FS_OVERFLOWGID,
1028 		.procname	= "overflowgid",
1029 		.data		= &fs_overflowgid,
1030 		.maxlen		= sizeof(int),
1031 		.mode		= 0644,
1032 		.proc_handler	= &proc_dointvec_minmax,
1033 		.strategy	= &sysctl_intvec,
1034 		.extra1		= &minolduid,
1035 		.extra2		= &maxolduid,
1036 	},
1037 	{
1038 		.ctl_name	= FS_LEASES,
1039 		.procname	= "leases-enable",
1040 		.data		= &leases_enable,
1041 		.maxlen		= sizeof(int),
1042 		.mode		= 0644,
1043 		.proc_handler	= &proc_dointvec,
1044 	},
1045 #ifdef CONFIG_DNOTIFY
1046 	{
1047 		.ctl_name	= FS_DIR_NOTIFY,
1048 		.procname	= "dir-notify-enable",
1049 		.data		= &dir_notify_enable,
1050 		.maxlen		= sizeof(int),
1051 		.mode		= 0644,
1052 		.proc_handler	= &proc_dointvec,
1053 	},
1054 #endif
1055 #ifdef CONFIG_MMU
1056 	{
1057 		.ctl_name	= FS_LEASE_TIME,
1058 		.procname	= "lease-break-time",
1059 		.data		= &lease_break_time,
1060 		.maxlen		= sizeof(int),
1061 		.mode		= 0644,
1062 		.proc_handler	= &proc_dointvec,
1063 	},
1064 	{
1065 		.ctl_name	= FS_AIO_NR,
1066 		.procname	= "aio-nr",
1067 		.data		= &aio_nr,
1068 		.maxlen		= sizeof(aio_nr),
1069 		.mode		= 0444,
1070 		.proc_handler	= &proc_doulongvec_minmax,
1071 	},
1072 	{
1073 		.ctl_name	= FS_AIO_MAX_NR,
1074 		.procname	= "aio-max-nr",
1075 		.data		= &aio_max_nr,
1076 		.maxlen		= sizeof(aio_max_nr),
1077 		.mode		= 0644,
1078 		.proc_handler	= &proc_doulongvec_minmax,
1079 	},
1080 #ifdef CONFIG_INOTIFY_USER
1081 	{
1082 		.ctl_name	= FS_INOTIFY,
1083 		.procname	= "inotify",
1084 		.mode		= 0555,
1085 		.child		= inotify_table,
1086 	},
1087 #endif
1088 #endif
1089 	{
1090 		.ctl_name	= KERN_SETUID_DUMPABLE,
1091 		.procname	= "suid_dumpable",
1092 		.data		= &suid_dumpable,
1093 		.maxlen		= sizeof(int),
1094 		.mode		= 0644,
1095 		.proc_handler	= &proc_dointvec,
1096 	},
1097 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1098 	{
1099 		.ctl_name	= CTL_UNNUMBERED,
1100 		.procname	= "binfmt_misc",
1101 		.mode		= 0555,
1102 		.child		= binfmt_misc_table,
1103 	},
1104 #endif
1105 	{ .ctl_name = 0 }
1106 };
1107 
1108 static ctl_table debug_table[] = {
1109 	{ .ctl_name = 0 }
1110 };
1111 
1112 static ctl_table dev_table[] = {
1113 	{ .ctl_name = 0 }
1114 };
1115 
1116 static DEFINE_SPINLOCK(sysctl_lock);
1117 
1118 /* called under sysctl_lock */
1119 static int use_table(struct ctl_table_header *p)
1120 {
1121 	if (unlikely(p->unregistering))
1122 		return 0;
1123 	p->used++;
1124 	return 1;
1125 }
1126 
1127 /* called under sysctl_lock */
1128 static void unuse_table(struct ctl_table_header *p)
1129 {
1130 	if (!--p->used)
1131 		if (unlikely(p->unregistering))
1132 			complete(p->unregistering);
1133 }
1134 
1135 /* called under sysctl_lock, will reacquire if has to wait */
1136 static void start_unregistering(struct ctl_table_header *p)
1137 {
1138 	/*
1139 	 * if p->used is 0, nobody will ever touch that entry again;
1140 	 * we'll eliminate all paths to it before dropping sysctl_lock
1141 	 */
1142 	if (unlikely(p->used)) {
1143 		struct completion wait;
1144 		init_completion(&wait);
1145 		p->unregistering = &wait;
1146 		spin_unlock(&sysctl_lock);
1147 		wait_for_completion(&wait);
1148 		spin_lock(&sysctl_lock);
1149 	}
1150 	/*
1151 	 * do not remove from the list until nobody holds it; walking the
1152 	 * list in do_sysctl() relies on that.
1153 	 */
1154 	list_del_init(&p->ctl_entry);
1155 }
1156 
1157 void sysctl_head_finish(struct ctl_table_header *head)
1158 {
1159 	if (!head)
1160 		return;
1161 	spin_lock(&sysctl_lock);
1162 	unuse_table(head);
1163 	spin_unlock(&sysctl_lock);
1164 }
1165 
1166 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1167 {
1168 	struct ctl_table_header *head;
1169 	struct list_head *tmp;
1170 	spin_lock(&sysctl_lock);
1171 	if (prev) {
1172 		tmp = &prev->ctl_entry;
1173 		unuse_table(prev);
1174 		goto next;
1175 	}
1176 	tmp = &root_table_header.ctl_entry;
1177 	for (;;) {
1178 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1179 
1180 		if (!use_table(head))
1181 			goto next;
1182 		spin_unlock(&sysctl_lock);
1183 		return head;
1184 	next:
1185 		tmp = tmp->next;
1186 		if (tmp == &root_table_header.ctl_entry)
1187 			break;
1188 	}
1189 	spin_unlock(&sysctl_lock);
1190 	return NULL;
1191 }
1192 
1193 #ifdef CONFIG_SYSCTL_SYSCALL
1194 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1195 	       void __user *newval, size_t newlen)
1196 {
1197 	struct ctl_table_header *head;
1198 	int error = -ENOTDIR;
1199 
1200 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
1201 		return -ENOTDIR;
1202 	if (oldval) {
1203 		int old_len;
1204 		if (!oldlenp || get_user(old_len, oldlenp))
1205 			return -EFAULT;
1206 	}
1207 
1208 	for (head = sysctl_head_next(NULL); head;
1209 			head = sysctl_head_next(head)) {
1210 		error = parse_table(name, nlen, oldval, oldlenp,
1211 					newval, newlen, head->ctl_table);
1212 		if (error != -ENOTDIR) {
1213 			sysctl_head_finish(head);
1214 			break;
1215 		}
1216 	}
1217 	return error;
1218 }
1219 
1220 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1221 {
1222 	struct __sysctl_args tmp;
1223 	int error;
1224 
1225 	if (copy_from_user(&tmp, args, sizeof(tmp)))
1226 		return -EFAULT;
1227 
1228 	lock_kernel();
1229 	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1230 			  tmp.newval, tmp.newlen);
1231 	unlock_kernel();
1232 	return error;
1233 }
1234 #endif /* CONFIG_SYSCTL_SYSCALL */
1235 
1236 /*
1237  * sysctl_perm does NOT grant the superuser all rights automatically, because
1238  * some sysctl variables are readonly even to root.
1239  */
1240 
1241 static int test_perm(int mode, int op)
1242 {
1243 	if (!current->euid)
1244 		mode >>= 6;
1245 	else if (in_egroup_p(0))
1246 		mode >>= 3;
1247 	if ((mode & op & 0007) == op)
1248 		return 0;
1249 	return -EACCES;
1250 }
1251 
1252 int sysctl_perm(ctl_table *table, int op)
1253 {
1254 	int error;
1255 	error = security_sysctl(table, op);
1256 	if (error)
1257 		return error;
1258 	return test_perm(table->mode, op);
1259 }
1260 
1261 #ifdef CONFIG_SYSCTL_SYSCALL
1262 static int parse_table(int __user *name, int nlen,
1263 		       void __user *oldval, size_t __user *oldlenp,
1264 		       void __user *newval, size_t newlen,
1265 		       ctl_table *table)
1266 {
1267 	int n;
1268 repeat:
1269 	if (!nlen)
1270 		return -ENOTDIR;
1271 	if (get_user(n, name))
1272 		return -EFAULT;
1273 	for ( ; table->ctl_name || table->procname; table++) {
1274 		if (!table->ctl_name)
1275 			continue;
1276 		if (n == table->ctl_name) {
1277 			int error;
1278 			if (table->child) {
1279 				if (sysctl_perm(table, 001))
1280 					return -EPERM;
1281 				name++;
1282 				nlen--;
1283 				table = table->child;
1284 				goto repeat;
1285 			}
1286 			error = do_sysctl_strategy(table, name, nlen,
1287 						   oldval, oldlenp,
1288 						   newval, newlen);
1289 			return error;
1290 		}
1291 	}
1292 	return -ENOTDIR;
1293 }
1294 
1295 /* Perform the actual read/write of a sysctl table entry. */
1296 int do_sysctl_strategy (ctl_table *table,
1297 			int __user *name, int nlen,
1298 			void __user *oldval, size_t __user *oldlenp,
1299 			void __user *newval, size_t newlen)
1300 {
1301 	int op = 0, rc;
1302 	size_t len;
1303 
1304 	if (oldval)
1305 		op |= 004;
1306 	if (newval)
1307 		op |= 002;
1308 	if (sysctl_perm(table, op))
1309 		return -EPERM;
1310 
1311 	if (table->strategy) {
1312 		rc = table->strategy(table, name, nlen, oldval, oldlenp,
1313 				     newval, newlen);
1314 		if (rc < 0)
1315 			return rc;
1316 		if (rc > 0)
1317 			return 0;
1318 	}
1319 
1320 	/* If there is no strategy routine, or if the strategy returns
1321 	 * zero, proceed with automatic r/w */
1322 	if (table->data && table->maxlen) {
1323 		if (oldval && oldlenp) {
1324 			if (get_user(len, oldlenp))
1325 				return -EFAULT;
1326 			if (len) {
1327 				if (len > table->maxlen)
1328 					len = table->maxlen;
1329 				if(copy_to_user(oldval, table->data, len))
1330 					return -EFAULT;
1331 				if(put_user(len, oldlenp))
1332 					return -EFAULT;
1333 			}
1334 		}
1335 		if (newval && newlen) {
1336 			len = newlen;
1337 			if (len > table->maxlen)
1338 				len = table->maxlen;
1339 			if(copy_from_user(table->data, newval, len))
1340 				return -EFAULT;
1341 		}
1342 	}
1343 	return 0;
1344 }
1345 #endif /* CONFIG_SYSCTL_SYSCALL */
1346 
1347 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1348 {
1349 	for (; table->ctl_name || table->procname; table++) {
1350 		table->parent = parent;
1351 		if (table->child)
1352 			sysctl_set_parent(table, table->child);
1353 	}
1354 }
1355 
1356 static __init int sysctl_init(void)
1357 {
1358 	sysctl_set_parent(NULL, root_table);
1359 	return 0;
1360 }
1361 
1362 core_initcall(sysctl_init);
1363 
1364 /**
1365  * register_sysctl_table - register a sysctl hierarchy
1366  * @table: the top-level table structure
1367  *
1368  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1369  * array. An entry with a ctl_name of 0 terminates the table.
1370  *
1371  * The members of the &ctl_table structure are used as follows:
1372  *
1373  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1374  *            must be unique within that level of sysctl
1375  *
1376  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1377  *            enter a sysctl file
1378  *
1379  * data - a pointer to data for use by proc_handler
1380  *
1381  * maxlen - the maximum size in bytes of the data
1382  *
1383  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1384  *
1385  * child - a pointer to the child sysctl table if this entry is a directory, or
1386  *         %NULL.
1387  *
1388  * proc_handler - the text handler routine (described below)
1389  *
1390  * strategy - the strategy routine (described below)
1391  *
1392  * de - for internal use by the sysctl routines
1393  *
1394  * extra1, extra2 - extra pointers usable by the proc handler routines
1395  *
1396  * Leaf nodes in the sysctl tree will be represented by a single file
1397  * under /proc; non-leaf nodes will be represented by directories.
1398  *
1399  * sysctl(2) can automatically manage read and write requests through
1400  * the sysctl table.  The data and maxlen fields of the ctl_table
1401  * struct enable minimal validation of the values being written to be
1402  * performed, and the mode field allows minimal authentication.
1403  *
1404  * More sophisticated management can be enabled by the provision of a
1405  * strategy routine with the table entry.  This will be called before
1406  * any automatic read or write of the data is performed.
1407  *
1408  * The strategy routine may return
1409  *
1410  * < 0 - Error occurred (error is passed to user process)
1411  *
1412  * 0   - OK - proceed with automatic read or write.
1413  *
1414  * > 0 - OK - read or write has been done by the strategy routine, so
1415  *       return immediately.
1416  *
1417  * There must be a proc_handler routine for any terminal nodes
1418  * mirrored under /proc/sys (non-terminals are handled by a built-in
1419  * directory handler).  Several default handlers are available to
1420  * cover common cases -
1421  *
1422  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1423  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1424  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1425  *
1426  * It is the handler's job to read the input buffer from user memory
1427  * and process it. The handler should return 0 on success.
1428  *
1429  * This routine returns %NULL on a failure to register, and a pointer
1430  * to the table header on success.
1431  */
1432 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1433 {
1434 	struct ctl_table_header *tmp;
1435 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1436 	if (!tmp)
1437 		return NULL;
1438 	tmp->ctl_table = table;
1439 	INIT_LIST_HEAD(&tmp->ctl_entry);
1440 	tmp->used = 0;
1441 	tmp->unregistering = NULL;
1442 	sysctl_set_parent(NULL, table);
1443 	spin_lock(&sysctl_lock);
1444 	list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1445 	spin_unlock(&sysctl_lock);
1446 	return tmp;
1447 }
1448 
1449 /**
1450  * unregister_sysctl_table - unregister a sysctl table hierarchy
1451  * @header: the header returned from register_sysctl_table
1452  *
1453  * Unregisters the sysctl table and all children. proc entries may not
1454  * actually be removed until they are no longer used by anyone.
1455  */
1456 void unregister_sysctl_table(struct ctl_table_header * header)
1457 {
1458 	might_sleep();
1459 	spin_lock(&sysctl_lock);
1460 	start_unregistering(header);
1461 	spin_unlock(&sysctl_lock);
1462 	kfree(header);
1463 }
1464 
1465 #else /* !CONFIG_SYSCTL */
1466 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1467 {
1468 	return NULL;
1469 }
1470 
1471 void unregister_sysctl_table(struct ctl_table_header * table)
1472 {
1473 }
1474 
1475 #endif /* CONFIG_SYSCTL */
1476 
1477 /*
1478  * /proc/sys support
1479  */
1480 
1481 #ifdef CONFIG_PROC_SYSCTL
1482 
1483 static int _proc_do_string(void* data, int maxlen, int write,
1484 			   struct file *filp, void __user *buffer,
1485 			   size_t *lenp, loff_t *ppos)
1486 {
1487 	size_t len;
1488 	char __user *p;
1489 	char c;
1490 
1491 	if (!data || !maxlen || !*lenp) {
1492 		*lenp = 0;
1493 		return 0;
1494 	}
1495 
1496 	if (write) {
1497 		len = 0;
1498 		p = buffer;
1499 		while (len < *lenp) {
1500 			if (get_user(c, p++))
1501 				return -EFAULT;
1502 			if (c == 0 || c == '\n')
1503 				break;
1504 			len++;
1505 		}
1506 		if (len >= maxlen)
1507 			len = maxlen-1;
1508 		if(copy_from_user(data, buffer, len))
1509 			return -EFAULT;
1510 		((char *) data)[len] = 0;
1511 		*ppos += *lenp;
1512 	} else {
1513 		len = strlen(data);
1514 		if (len > maxlen)
1515 			len = maxlen;
1516 
1517 		if (*ppos > len) {
1518 			*lenp = 0;
1519 			return 0;
1520 		}
1521 
1522 		data += *ppos;
1523 		len  -= *ppos;
1524 
1525 		if (len > *lenp)
1526 			len = *lenp;
1527 		if (len)
1528 			if(copy_to_user(buffer, data, len))
1529 				return -EFAULT;
1530 		if (len < *lenp) {
1531 			if(put_user('\n', ((char __user *) buffer) + len))
1532 				return -EFAULT;
1533 			len++;
1534 		}
1535 		*lenp = len;
1536 		*ppos += len;
1537 	}
1538 	return 0;
1539 }
1540 
1541 /**
1542  * proc_dostring - read a string sysctl
1543  * @table: the sysctl table
1544  * @write: %TRUE if this is a write to the sysctl file
1545  * @filp: the file structure
1546  * @buffer: the user buffer
1547  * @lenp: the size of the user buffer
1548  * @ppos: file position
1549  *
1550  * Reads/writes a string from/to the user buffer. If the kernel
1551  * buffer provided is not large enough to hold the string, the
1552  * string is truncated. The copied string is %NULL-terminated.
1553  * If the string is being read by the user process, it is copied
1554  * and a newline '\n' is added. It is truncated if the buffer is
1555  * not large enough.
1556  *
1557  * Returns 0 on success.
1558  */
1559 int proc_dostring(ctl_table *table, int write, struct file *filp,
1560 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1561 {
1562 	return _proc_do_string(table->data, table->maxlen, write, filp,
1563 			       buffer, lenp, ppos);
1564 }
1565 
1566 
1567 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1568 				 int *valp,
1569 				 int write, void *data)
1570 {
1571 	if (write) {
1572 		*valp = *negp ? -*lvalp : *lvalp;
1573 	} else {
1574 		int val = *valp;
1575 		if (val < 0) {
1576 			*negp = -1;
1577 			*lvalp = (unsigned long)-val;
1578 		} else {
1579 			*negp = 0;
1580 			*lvalp = (unsigned long)val;
1581 		}
1582 	}
1583 	return 0;
1584 }
1585 
1586 static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
1587 		  int write, struct file *filp, void __user *buffer,
1588 		  size_t *lenp, loff_t *ppos,
1589 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1590 			      int write, void *data),
1591 		  void *data)
1592 {
1593 #define TMPBUFLEN 21
1594 	int *i, vleft, first=1, neg, val;
1595 	unsigned long lval;
1596 	size_t left, len;
1597 
1598 	char buf[TMPBUFLEN], *p;
1599 	char __user *s = buffer;
1600 
1601 	if (!tbl_data || !table->maxlen || !*lenp ||
1602 	    (*ppos && !write)) {
1603 		*lenp = 0;
1604 		return 0;
1605 	}
1606 
1607 	i = (int *) tbl_data;
1608 	vleft = table->maxlen / sizeof(*i);
1609 	left = *lenp;
1610 
1611 	if (!conv)
1612 		conv = do_proc_dointvec_conv;
1613 
1614 	for (; left && vleft--; i++, first=0) {
1615 		if (write) {
1616 			while (left) {
1617 				char c;
1618 				if (get_user(c, s))
1619 					return -EFAULT;
1620 				if (!isspace(c))
1621 					break;
1622 				left--;
1623 				s++;
1624 			}
1625 			if (!left)
1626 				break;
1627 			neg = 0;
1628 			len = left;
1629 			if (len > sizeof(buf) - 1)
1630 				len = sizeof(buf) - 1;
1631 			if (copy_from_user(buf, s, len))
1632 				return -EFAULT;
1633 			buf[len] = 0;
1634 			p = buf;
1635 			if (*p == '-' && left > 1) {
1636 				neg = 1;
1637 				p++;
1638 			}
1639 			if (*p < '0' || *p > '9')
1640 				break;
1641 
1642 			lval = simple_strtoul(p, &p, 0);
1643 
1644 			len = p-buf;
1645 			if ((len < left) && *p && !isspace(*p))
1646 				break;
1647 			if (neg)
1648 				val = -val;
1649 			s += len;
1650 			left -= len;
1651 
1652 			if (conv(&neg, &lval, i, 1, data))
1653 				break;
1654 		} else {
1655 			p = buf;
1656 			if (!first)
1657 				*p++ = '\t';
1658 
1659 			if (conv(&neg, &lval, i, 0, data))
1660 				break;
1661 
1662 			sprintf(p, "%s%lu", neg ? "-" : "", lval);
1663 			len = strlen(buf);
1664 			if (len > left)
1665 				len = left;
1666 			if(copy_to_user(s, buf, len))
1667 				return -EFAULT;
1668 			left -= len;
1669 			s += len;
1670 		}
1671 	}
1672 
1673 	if (!write && !first && left) {
1674 		if(put_user('\n', s))
1675 			return -EFAULT;
1676 		left--, s++;
1677 	}
1678 	if (write) {
1679 		while (left) {
1680 			char c;
1681 			if (get_user(c, s++))
1682 				return -EFAULT;
1683 			if (!isspace(c))
1684 				break;
1685 			left--;
1686 		}
1687 	}
1688 	if (write && first)
1689 		return -EINVAL;
1690 	*lenp -= left;
1691 	*ppos += *lenp;
1692 	return 0;
1693 #undef TMPBUFLEN
1694 }
1695 
1696 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1697 		  void __user *buffer, size_t *lenp, loff_t *ppos,
1698 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1699 			      int write, void *data),
1700 		  void *data)
1701 {
1702 	return __do_proc_dointvec(table->data, table, write, filp,
1703 			buffer, lenp, ppos, conv, data);
1704 }
1705 
1706 /**
1707  * proc_dointvec - read a vector of integers
1708  * @table: the sysctl table
1709  * @write: %TRUE if this is a write to the sysctl file
1710  * @filp: the file structure
1711  * @buffer: the user buffer
1712  * @lenp: the size of the user buffer
1713  * @ppos: file position
1714  *
1715  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1716  * values from/to the user buffer, treated as an ASCII string.
1717  *
1718  * Returns 0 on success.
1719  */
1720 int proc_dointvec(ctl_table *table, int write, struct file *filp,
1721 		     void __user *buffer, size_t *lenp, loff_t *ppos)
1722 {
1723     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1724 		    	    NULL,NULL);
1725 }
1726 
1727 #define OP_SET	0
1728 #define OP_AND	1
1729 #define OP_OR	2
1730 
1731 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1732 				      int *valp,
1733 				      int write, void *data)
1734 {
1735 	int op = *(int *)data;
1736 	if (write) {
1737 		int val = *negp ? -*lvalp : *lvalp;
1738 		switch(op) {
1739 		case OP_SET:	*valp = val; break;
1740 		case OP_AND:	*valp &= val; break;
1741 		case OP_OR:	*valp |= val; break;
1742 		}
1743 	} else {
1744 		int val = *valp;
1745 		if (val < 0) {
1746 			*negp = -1;
1747 			*lvalp = (unsigned long)-val;
1748 		} else {
1749 			*negp = 0;
1750 			*lvalp = (unsigned long)val;
1751 		}
1752 	}
1753 	return 0;
1754 }
1755 
1756 /*
1757  *	init may raise the set.
1758  */
1759 
1760 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1761 			void __user *buffer, size_t *lenp, loff_t *ppos)
1762 {
1763 	int op;
1764 
1765 	if (write && !capable(CAP_SYS_MODULE)) {
1766 		return -EPERM;
1767 	}
1768 
1769 	op = is_init(current) ? OP_SET : OP_AND;
1770 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1771 				do_proc_dointvec_bset_conv,&op);
1772 }
1773 
1774 /*
1775  *	Taint values can only be increased
1776  */
1777 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
1778 			       void __user *buffer, size_t *lenp, loff_t *ppos)
1779 {
1780 	int op;
1781 
1782 	if (write && !capable(CAP_SYS_ADMIN))
1783 		return -EPERM;
1784 
1785 	op = OP_OR;
1786 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1787 				do_proc_dointvec_bset_conv,&op);
1788 }
1789 
1790 struct do_proc_dointvec_minmax_conv_param {
1791 	int *min;
1792 	int *max;
1793 };
1794 
1795 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
1796 					int *valp,
1797 					int write, void *data)
1798 {
1799 	struct do_proc_dointvec_minmax_conv_param *param = data;
1800 	if (write) {
1801 		int val = *negp ? -*lvalp : *lvalp;
1802 		if ((param->min && *param->min > val) ||
1803 		    (param->max && *param->max < val))
1804 			return -EINVAL;
1805 		*valp = val;
1806 	} else {
1807 		int val = *valp;
1808 		if (val < 0) {
1809 			*negp = -1;
1810 			*lvalp = (unsigned long)-val;
1811 		} else {
1812 			*negp = 0;
1813 			*lvalp = (unsigned long)val;
1814 		}
1815 	}
1816 	return 0;
1817 }
1818 
1819 /**
1820  * proc_dointvec_minmax - read a vector of integers with min/max values
1821  * @table: the sysctl table
1822  * @write: %TRUE if this is a write to the sysctl file
1823  * @filp: the file structure
1824  * @buffer: the user buffer
1825  * @lenp: the size of the user buffer
1826  * @ppos: file position
1827  *
1828  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1829  * values from/to the user buffer, treated as an ASCII string.
1830  *
1831  * This routine will ensure the values are within the range specified by
1832  * table->extra1 (min) and table->extra2 (max).
1833  *
1834  * Returns 0 on success.
1835  */
1836 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1837 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1838 {
1839 	struct do_proc_dointvec_minmax_conv_param param = {
1840 		.min = (int *) table->extra1,
1841 		.max = (int *) table->extra2,
1842 	};
1843 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1844 				do_proc_dointvec_minmax_conv, &param);
1845 }
1846 
1847 static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
1848 				     struct file *filp,
1849 				     void __user *buffer,
1850 				     size_t *lenp, loff_t *ppos,
1851 				     unsigned long convmul,
1852 				     unsigned long convdiv)
1853 {
1854 #define TMPBUFLEN 21
1855 	unsigned long *i, *min, *max, val;
1856 	int vleft, first=1, neg;
1857 	size_t len, left;
1858 	char buf[TMPBUFLEN], *p;
1859 	char __user *s = buffer;
1860 
1861 	if (!data || !table->maxlen || !*lenp ||
1862 	    (*ppos && !write)) {
1863 		*lenp = 0;
1864 		return 0;
1865 	}
1866 
1867 	i = (unsigned long *) data;
1868 	min = (unsigned long *) table->extra1;
1869 	max = (unsigned long *) table->extra2;
1870 	vleft = table->maxlen / sizeof(unsigned long);
1871 	left = *lenp;
1872 
1873 	for (; left && vleft--; i++, min++, max++, first=0) {
1874 		if (write) {
1875 			while (left) {
1876 				char c;
1877 				if (get_user(c, s))
1878 					return -EFAULT;
1879 				if (!isspace(c))
1880 					break;
1881 				left--;
1882 				s++;
1883 			}
1884 			if (!left)
1885 				break;
1886 			neg = 0;
1887 			len = left;
1888 			if (len > TMPBUFLEN-1)
1889 				len = TMPBUFLEN-1;
1890 			if (copy_from_user(buf, s, len))
1891 				return -EFAULT;
1892 			buf[len] = 0;
1893 			p = buf;
1894 			if (*p == '-' && left > 1) {
1895 				neg = 1;
1896 				p++;
1897 			}
1898 			if (*p < '0' || *p > '9')
1899 				break;
1900 			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
1901 			len = p-buf;
1902 			if ((len < left) && *p && !isspace(*p))
1903 				break;
1904 			if (neg)
1905 				val = -val;
1906 			s += len;
1907 			left -= len;
1908 
1909 			if(neg)
1910 				continue;
1911 			if ((min && val < *min) || (max && val > *max))
1912 				continue;
1913 			*i = val;
1914 		} else {
1915 			p = buf;
1916 			if (!first)
1917 				*p++ = '\t';
1918 			sprintf(p, "%lu", convdiv * (*i) / convmul);
1919 			len = strlen(buf);
1920 			if (len > left)
1921 				len = left;
1922 			if(copy_to_user(s, buf, len))
1923 				return -EFAULT;
1924 			left -= len;
1925 			s += len;
1926 		}
1927 	}
1928 
1929 	if (!write && !first && left) {
1930 		if(put_user('\n', s))
1931 			return -EFAULT;
1932 		left--, s++;
1933 	}
1934 	if (write) {
1935 		while (left) {
1936 			char c;
1937 			if (get_user(c, s++))
1938 				return -EFAULT;
1939 			if (!isspace(c))
1940 				break;
1941 			left--;
1942 		}
1943 	}
1944 	if (write && first)
1945 		return -EINVAL;
1946 	*lenp -= left;
1947 	*ppos += *lenp;
1948 	return 0;
1949 #undef TMPBUFLEN
1950 }
1951 
1952 static int do_proc_doulongvec_minmax(ctl_table *table, int write,
1953 				     struct file *filp,
1954 				     void __user *buffer,
1955 				     size_t *lenp, loff_t *ppos,
1956 				     unsigned long convmul,
1957 				     unsigned long convdiv)
1958 {
1959 	return __do_proc_doulongvec_minmax(table->data, table, write,
1960 			filp, buffer, lenp, ppos, convmul, convdiv);
1961 }
1962 
1963 /**
1964  * proc_doulongvec_minmax - read a vector of long integers with min/max values
1965  * @table: the sysctl table
1966  * @write: %TRUE if this is a write to the sysctl file
1967  * @filp: the file structure
1968  * @buffer: the user buffer
1969  * @lenp: the size of the user buffer
1970  * @ppos: file position
1971  *
1972  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1973  * values from/to the user buffer, treated as an ASCII string.
1974  *
1975  * This routine will ensure the values are within the range specified by
1976  * table->extra1 (min) and table->extra2 (max).
1977  *
1978  * Returns 0 on success.
1979  */
1980 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
1981 			   void __user *buffer, size_t *lenp, loff_t *ppos)
1982 {
1983     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
1984 }
1985 
1986 /**
1987  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1988  * @table: the sysctl table
1989  * @write: %TRUE if this is a write to the sysctl file
1990  * @filp: the file structure
1991  * @buffer: the user buffer
1992  * @lenp: the size of the user buffer
1993  * @ppos: file position
1994  *
1995  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1996  * values from/to the user buffer, treated as an ASCII string. The values
1997  * are treated as milliseconds, and converted to jiffies when they are stored.
1998  *
1999  * This routine will ensure the values are within the range specified by
2000  * table->extra1 (min) and table->extra2 (max).
2001  *
2002  * Returns 0 on success.
2003  */
2004 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2005 				      struct file *filp,
2006 				      void __user *buffer,
2007 				      size_t *lenp, loff_t *ppos)
2008 {
2009     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2010 				     lenp, ppos, HZ, 1000l);
2011 }
2012 
2013 
2014 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2015 					 int *valp,
2016 					 int write, void *data)
2017 {
2018 	if (write) {
2019 		if (*lvalp > LONG_MAX / HZ)
2020 			return 1;
2021 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2022 	} else {
2023 		int val = *valp;
2024 		unsigned long lval;
2025 		if (val < 0) {
2026 			*negp = -1;
2027 			lval = (unsigned long)-val;
2028 		} else {
2029 			*negp = 0;
2030 			lval = (unsigned long)val;
2031 		}
2032 		*lvalp = lval / HZ;
2033 	}
2034 	return 0;
2035 }
2036 
2037 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2038 						int *valp,
2039 						int write, void *data)
2040 {
2041 	if (write) {
2042 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2043 			return 1;
2044 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2045 	} else {
2046 		int val = *valp;
2047 		unsigned long lval;
2048 		if (val < 0) {
2049 			*negp = -1;
2050 			lval = (unsigned long)-val;
2051 		} else {
2052 			*negp = 0;
2053 			lval = (unsigned long)val;
2054 		}
2055 		*lvalp = jiffies_to_clock_t(lval);
2056 	}
2057 	return 0;
2058 }
2059 
2060 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2061 					    int *valp,
2062 					    int write, void *data)
2063 {
2064 	if (write) {
2065 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2066 	} else {
2067 		int val = *valp;
2068 		unsigned long lval;
2069 		if (val < 0) {
2070 			*negp = -1;
2071 			lval = (unsigned long)-val;
2072 		} else {
2073 			*negp = 0;
2074 			lval = (unsigned long)val;
2075 		}
2076 		*lvalp = jiffies_to_msecs(lval);
2077 	}
2078 	return 0;
2079 }
2080 
2081 /**
2082  * proc_dointvec_jiffies - read a vector of integers as seconds
2083  * @table: the sysctl table
2084  * @write: %TRUE if this is a write to the sysctl file
2085  * @filp: the file structure
2086  * @buffer: the user buffer
2087  * @lenp: the size of the user buffer
2088  * @ppos: file position
2089  *
2090  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2091  * values from/to the user buffer, treated as an ASCII string.
2092  * The values read are assumed to be in seconds, and are converted into
2093  * jiffies.
2094  *
2095  * Returns 0 on success.
2096  */
2097 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2098 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2099 {
2100     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2101 		    	    do_proc_dointvec_jiffies_conv,NULL);
2102 }
2103 
2104 /**
2105  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2106  * @table: the sysctl table
2107  * @write: %TRUE if this is a write to the sysctl file
2108  * @filp: the file structure
2109  * @buffer: the user buffer
2110  * @lenp: the size of the user buffer
2111  * @ppos: pointer to the file position
2112  *
2113  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2114  * values from/to the user buffer, treated as an ASCII string.
2115  * The values read are assumed to be in 1/USER_HZ seconds, and
2116  * are converted into jiffies.
2117  *
2118  * Returns 0 on success.
2119  */
2120 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2121 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2122 {
2123     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2124 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2125 }
2126 
2127 /**
2128  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2129  * @table: the sysctl table
2130  * @write: %TRUE if this is a write to the sysctl file
2131  * @filp: the file structure
2132  * @buffer: the user buffer
2133  * @lenp: the size of the user buffer
2134  * @ppos: file position
2135  * @ppos: the current position in the file
2136  *
2137  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2138  * values from/to the user buffer, treated as an ASCII string.
2139  * The values read are assumed to be in 1/1000 seconds, and
2140  * are converted into jiffies.
2141  *
2142  * Returns 0 on success.
2143  */
2144 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2145 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2146 {
2147 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2148 				do_proc_dointvec_ms_jiffies_conv, NULL);
2149 }
2150 
2151 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
2152 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2153 {
2154 	struct pid *new_pid;
2155 	pid_t tmp;
2156 	int r;
2157 
2158 	tmp = pid_nr(cad_pid);
2159 
2160 	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2161 			       lenp, ppos, NULL, NULL);
2162 	if (r || !write)
2163 		return r;
2164 
2165 	new_pid = find_get_pid(tmp);
2166 	if (!new_pid)
2167 		return -ESRCH;
2168 
2169 	put_pid(xchg(&cad_pid, new_pid));
2170 	return 0;
2171 }
2172 
2173 #else /* CONFIG_PROC_FS */
2174 
2175 int proc_dostring(ctl_table *table, int write, struct file *filp,
2176 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2177 {
2178 	return -ENOSYS;
2179 }
2180 
2181 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2182 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2183 {
2184 	return -ENOSYS;
2185 }
2186 
2187 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2188 			void __user *buffer, size_t *lenp, loff_t *ppos)
2189 {
2190 	return -ENOSYS;
2191 }
2192 
2193 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2194 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2195 {
2196 	return -ENOSYS;
2197 }
2198 
2199 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2200 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2201 {
2202 	return -ENOSYS;
2203 }
2204 
2205 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2206 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2207 {
2208 	return -ENOSYS;
2209 }
2210 
2211 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2212 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2213 {
2214 	return -ENOSYS;
2215 }
2216 
2217 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2218 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2219 {
2220 	return -ENOSYS;
2221 }
2222 
2223 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2224 				      struct file *filp,
2225 				      void __user *buffer,
2226 				      size_t *lenp, loff_t *ppos)
2227 {
2228     return -ENOSYS;
2229 }
2230 
2231 
2232 #endif /* CONFIG_PROC_FS */
2233 
2234 
2235 #ifdef CONFIG_SYSCTL_SYSCALL
2236 /*
2237  * General sysctl support routines
2238  */
2239 
2240 /* The generic string strategy routine: */
2241 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2242 		  void __user *oldval, size_t __user *oldlenp,
2243 		  void __user *newval, size_t newlen)
2244 {
2245 	if (!table->data || !table->maxlen)
2246 		return -ENOTDIR;
2247 
2248 	if (oldval && oldlenp) {
2249 		size_t bufsize;
2250 		if (get_user(bufsize, oldlenp))
2251 			return -EFAULT;
2252 		if (bufsize) {
2253 			size_t len = strlen(table->data), copied;
2254 
2255 			/* This shouldn't trigger for a well-formed sysctl */
2256 			if (len > table->maxlen)
2257 				len = table->maxlen;
2258 
2259 			/* Copy up to a max of bufsize-1 bytes of the string */
2260 			copied = (len >= bufsize) ? bufsize - 1 : len;
2261 
2262 			if (copy_to_user(oldval, table->data, copied) ||
2263 			    put_user(0, (char __user *)(oldval + copied)))
2264 				return -EFAULT;
2265 			if (put_user(len, oldlenp))
2266 				return -EFAULT;
2267 		}
2268 	}
2269 	if (newval && newlen) {
2270 		size_t len = newlen;
2271 		if (len > table->maxlen)
2272 			len = table->maxlen;
2273 		if(copy_from_user(table->data, newval, len))
2274 			return -EFAULT;
2275 		if (len == table->maxlen)
2276 			len--;
2277 		((char *) table->data)[len] = 0;
2278 	}
2279 	return 1;
2280 }
2281 
2282 /*
2283  * This function makes sure that all of the integers in the vector
2284  * are between the minimum and maximum values given in the arrays
2285  * table->extra1 and table->extra2, respectively.
2286  */
2287 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2288 		void __user *oldval, size_t __user *oldlenp,
2289 		void __user *newval, size_t newlen)
2290 {
2291 
2292 	if (newval && newlen) {
2293 		int __user *vec = (int __user *) newval;
2294 		int *min = (int *) table->extra1;
2295 		int *max = (int *) table->extra2;
2296 		size_t length;
2297 		int i;
2298 
2299 		if (newlen % sizeof(int) != 0)
2300 			return -EINVAL;
2301 
2302 		if (!table->extra1 && !table->extra2)
2303 			return 0;
2304 
2305 		if (newlen > table->maxlen)
2306 			newlen = table->maxlen;
2307 		length = newlen / sizeof(int);
2308 
2309 		for (i = 0; i < length; i++) {
2310 			int value;
2311 			if (get_user(value, vec + i))
2312 				return -EFAULT;
2313 			if (min && value < min[i])
2314 				return -EINVAL;
2315 			if (max && value > max[i])
2316 				return -EINVAL;
2317 		}
2318 	}
2319 	return 0;
2320 }
2321 
2322 /* Strategy function to convert jiffies to seconds */
2323 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2324 		void __user *oldval, size_t __user *oldlenp,
2325 		void __user *newval, size_t newlen)
2326 {
2327 	if (oldval && oldlenp) {
2328 		size_t olen;
2329 
2330 		if (get_user(olen, oldlenp))
2331 			return -EFAULT;
2332 		if (olen) {
2333 			int val;
2334 
2335 			if (olen < sizeof(int))
2336 				return -EINVAL;
2337 
2338 			val = *(int *)(table->data) / HZ;
2339 			if (put_user(val, (int __user *)oldval))
2340 				return -EFAULT;
2341 			if (put_user(sizeof(int), oldlenp))
2342 				return -EFAULT;
2343 		}
2344 	}
2345 	if (newval && newlen) {
2346 		int new;
2347 		if (newlen != sizeof(int))
2348 			return -EINVAL;
2349 		if (get_user(new, (int __user *)newval))
2350 			return -EFAULT;
2351 		*(int *)(table->data) = new*HZ;
2352 	}
2353 	return 1;
2354 }
2355 
2356 /* Strategy function to convert jiffies to seconds */
2357 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2358 		void __user *oldval, size_t __user *oldlenp,
2359 		void __user *newval, size_t newlen)
2360 {
2361 	if (oldval && oldlenp) {
2362 		size_t olen;
2363 
2364 		if (get_user(olen, oldlenp))
2365 			return -EFAULT;
2366 		if (olen) {
2367 			int val;
2368 
2369 			if (olen < sizeof(int))
2370 				return -EINVAL;
2371 
2372 			val = jiffies_to_msecs(*(int *)(table->data));
2373 			if (put_user(val, (int __user *)oldval))
2374 				return -EFAULT;
2375 			if (put_user(sizeof(int), oldlenp))
2376 				return -EFAULT;
2377 		}
2378 	}
2379 	if (newval && newlen) {
2380 		int new;
2381 		if (newlen != sizeof(int))
2382 			return -EINVAL;
2383 		if (get_user(new, (int __user *)newval))
2384 			return -EFAULT;
2385 		*(int *)(table->data) = msecs_to_jiffies(new);
2386 	}
2387 	return 1;
2388 }
2389 
2390 
2391 
2392 #else /* CONFIG_SYSCTL_SYSCALL */
2393 
2394 
2395 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2396 {
2397 	static int msg_count;
2398 	struct __sysctl_args tmp;
2399 	int name[CTL_MAXNAME];
2400 	int i;
2401 
2402 	/* Read in the sysctl name for better debug message logging */
2403 	if (copy_from_user(&tmp, args, sizeof(tmp)))
2404 		return -EFAULT;
2405 	if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME)
2406 		return -ENOTDIR;
2407 	for (i = 0; i < tmp.nlen; i++)
2408 		if (get_user(name[i], tmp.name + i))
2409 			return -EFAULT;
2410 
2411 	/* Ignore accesses to kernel.version */
2412 	if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
2413 		goto out;
2414 
2415 	if (msg_count < 5) {
2416 		msg_count++;
2417 		printk(KERN_INFO
2418 			"warning: process `%s' used the removed sysctl "
2419 			"system call with ", current->comm);
2420 		for (i = 0; i < tmp.nlen; i++)
2421 			printk("%d.", name[i]);
2422 		printk("\n");
2423 	}
2424 out:
2425 	return -ENOSYS;
2426 }
2427 
2428 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2429 		  void __user *oldval, size_t __user *oldlenp,
2430 		  void __user *newval, size_t newlen)
2431 {
2432 	return -ENOSYS;
2433 }
2434 
2435 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2436 		void __user *oldval, size_t __user *oldlenp,
2437 		void __user *newval, size_t newlen)
2438 {
2439 	return -ENOSYS;
2440 }
2441 
2442 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2443 		void __user *oldval, size_t __user *oldlenp,
2444 		void __user *newval, size_t newlen)
2445 {
2446 	return -ENOSYS;
2447 }
2448 
2449 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2450 		void __user *oldval, size_t __user *oldlenp,
2451 		void __user *newval, size_t newlen)
2452 {
2453 	return -ENOSYS;
2454 }
2455 
2456 #endif /* CONFIG_SYSCTL_SYSCALL */
2457 
2458 /*
2459  * No sense putting this after each symbol definition, twice,
2460  * exception granted :-)
2461  */
2462 EXPORT_SYMBOL(proc_dointvec);
2463 EXPORT_SYMBOL(proc_dointvec_jiffies);
2464 EXPORT_SYMBOL(proc_dointvec_minmax);
2465 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2466 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2467 EXPORT_SYMBOL(proc_dostring);
2468 EXPORT_SYMBOL(proc_doulongvec_minmax);
2469 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2470 EXPORT_SYMBOL(register_sysctl_table);
2471 EXPORT_SYMBOL(sysctl_intvec);
2472 EXPORT_SYMBOL(sysctl_jiffies);
2473 EXPORT_SYMBOL(sysctl_ms_jiffies);
2474 EXPORT_SYMBOL(sysctl_string);
2475 EXPORT_SYMBOL(unregister_sysctl_table);
2476