1 /* -*- linux-c -*- 2 * sysctl_net_core.c: sysctl interface to net core subsystem. 3 * 4 * Begun April 1, 1996, Mike Shaver. 5 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/sysctl.h> 10 #include <linux/module.h> 11 #include <linux/socket.h> 12 #include <linux/netdevice.h> 13 #include <linux/ratelimit.h> 14 #include <linux/vmalloc.h> 15 #include <linux/init.h> 16 #include <linux/slab.h> 17 #include <linux/kmemleak.h> 18 19 #include <net/ip.h> 20 #include <net/sock.h> 21 #include <net/net_ratelimit.h> 22 #include <net/busy_poll.h> 23 #include <net/pkt_sched.h> 24 25 static int zero = 0; 26 static int one = 1; 27 static int min_sndbuf = SOCK_MIN_SNDBUF; 28 static int min_rcvbuf = SOCK_MIN_RCVBUF; 29 30 static int net_msg_warn; /* Unused, but still a sysctl */ 31 32 #ifdef CONFIG_RPS 33 static int rps_sock_flow_sysctl(struct ctl_table *table, int write, 34 void __user *buffer, size_t *lenp, loff_t *ppos) 35 { 36 unsigned int orig_size, size; 37 int ret, i; 38 struct ctl_table tmp = { 39 .data = &size, 40 .maxlen = sizeof(size), 41 .mode = table->mode 42 }; 43 struct rps_sock_flow_table *orig_sock_table, *sock_table; 44 static DEFINE_MUTEX(sock_flow_mutex); 45 46 mutex_lock(&sock_flow_mutex); 47 48 orig_sock_table = rcu_dereference_protected(rps_sock_flow_table, 49 lockdep_is_held(&sock_flow_mutex)); 50 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 51 52 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 53 54 if (write) { 55 if (size) { 56 if (size > 1<<29) { 57 /* Enforce limit to prevent overflow */ 58 mutex_unlock(&sock_flow_mutex); 59 return -EINVAL; 60 } 61 size = roundup_pow_of_two(size); 62 if (size != orig_size) { 63 sock_table = 64 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); 65 if (!sock_table) { 66 mutex_unlock(&sock_flow_mutex); 67 return -ENOMEM; 68 } 69 rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1; 70 sock_table->mask = size - 1; 71 } else 72 sock_table = orig_sock_table; 73 74 for (i = 0; i < size; i++) 75 sock_table->ents[i] = RPS_NO_CPU; 76 } else 77 sock_table = NULL; 78 79 if (sock_table != orig_sock_table) { 80 rcu_assign_pointer(rps_sock_flow_table, sock_table); 81 if (sock_table) 82 static_key_slow_inc(&rps_needed); 83 if (orig_sock_table) { 84 static_key_slow_dec(&rps_needed); 85 synchronize_rcu(); 86 vfree(orig_sock_table); 87 } 88 } 89 } 90 91 mutex_unlock(&sock_flow_mutex); 92 93 return ret; 94 } 95 #endif /* CONFIG_RPS */ 96 97 #ifdef CONFIG_NET_FLOW_LIMIT 98 static DEFINE_MUTEX(flow_limit_update_mutex); 99 100 static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, 101 void __user *buffer, size_t *lenp, 102 loff_t *ppos) 103 { 104 struct sd_flow_limit *cur; 105 struct softnet_data *sd; 106 cpumask_var_t mask; 107 int i, len, ret = 0; 108 109 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 110 return -ENOMEM; 111 112 if (write) { 113 ret = cpumask_parse_user(buffer, *lenp, mask); 114 if (ret) 115 goto done; 116 117 mutex_lock(&flow_limit_update_mutex); 118 len = sizeof(*cur) + netdev_flow_limit_table_len; 119 for_each_possible_cpu(i) { 120 sd = &per_cpu(softnet_data, i); 121 cur = rcu_dereference_protected(sd->flow_limit, 122 lockdep_is_held(&flow_limit_update_mutex)); 123 if (cur && !cpumask_test_cpu(i, mask)) { 124 RCU_INIT_POINTER(sd->flow_limit, NULL); 125 synchronize_rcu(); 126 kfree(cur); 127 } else if (!cur && cpumask_test_cpu(i, mask)) { 128 cur = kzalloc_node(len, GFP_KERNEL, 129 cpu_to_node(i)); 130 if (!cur) { 131 /* not unwinding previous changes */ 132 ret = -ENOMEM; 133 goto write_unlock; 134 } 135 cur->num_buckets = netdev_flow_limit_table_len; 136 rcu_assign_pointer(sd->flow_limit, cur); 137 } 138 } 139 write_unlock: 140 mutex_unlock(&flow_limit_update_mutex); 141 } else { 142 char kbuf[128]; 143 144 if (*ppos || !*lenp) { 145 *lenp = 0; 146 goto done; 147 } 148 149 cpumask_clear(mask); 150 rcu_read_lock(); 151 for_each_possible_cpu(i) { 152 sd = &per_cpu(softnet_data, i); 153 if (rcu_dereference(sd->flow_limit)) 154 cpumask_set_cpu(i, mask); 155 } 156 rcu_read_unlock(); 157 158 len = min(sizeof(kbuf) - 1, *lenp); 159 len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); 160 if (!len) { 161 *lenp = 0; 162 goto done; 163 } 164 if (len < *lenp) 165 kbuf[len++] = '\n'; 166 if (copy_to_user(buffer, kbuf, len)) { 167 ret = -EFAULT; 168 goto done; 169 } 170 *lenp = len; 171 *ppos += len; 172 } 173 174 done: 175 free_cpumask_var(mask); 176 return ret; 177 } 178 179 static int flow_limit_table_len_sysctl(struct ctl_table *table, int write, 180 void __user *buffer, size_t *lenp, 181 loff_t *ppos) 182 { 183 unsigned int old, *ptr; 184 int ret; 185 186 mutex_lock(&flow_limit_update_mutex); 187 188 ptr = table->data; 189 old = *ptr; 190 ret = proc_dointvec(table, write, buffer, lenp, ppos); 191 if (!ret && write && !is_power_of_2(*ptr)) { 192 *ptr = old; 193 ret = -EINVAL; 194 } 195 196 mutex_unlock(&flow_limit_update_mutex); 197 return ret; 198 } 199 #endif /* CONFIG_NET_FLOW_LIMIT */ 200 201 #ifdef CONFIG_NET_SCHED 202 static int set_default_qdisc(struct ctl_table *table, int write, 203 void __user *buffer, size_t *lenp, loff_t *ppos) 204 { 205 char id[IFNAMSIZ]; 206 struct ctl_table tbl = { 207 .data = id, 208 .maxlen = IFNAMSIZ, 209 }; 210 int ret; 211 212 qdisc_get_default(id, IFNAMSIZ); 213 214 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 215 if (write && ret == 0) 216 ret = qdisc_set_default(id); 217 return ret; 218 } 219 #endif 220 221 static int proc_do_rss_key(struct ctl_table *table, int write, 222 void __user *buffer, size_t *lenp, loff_t *ppos) 223 { 224 struct ctl_table fake_table; 225 char buf[NETDEV_RSS_KEY_LEN * 3]; 226 227 snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key); 228 fake_table.data = buf; 229 fake_table.maxlen = sizeof(buf); 230 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 231 } 232 233 static struct ctl_table net_core_table[] = { 234 #ifdef CONFIG_NET 235 { 236 .procname = "wmem_max", 237 .data = &sysctl_wmem_max, 238 .maxlen = sizeof(int), 239 .mode = 0644, 240 .proc_handler = proc_dointvec_minmax, 241 .extra1 = &min_sndbuf, 242 }, 243 { 244 .procname = "rmem_max", 245 .data = &sysctl_rmem_max, 246 .maxlen = sizeof(int), 247 .mode = 0644, 248 .proc_handler = proc_dointvec_minmax, 249 .extra1 = &min_rcvbuf, 250 }, 251 { 252 .procname = "wmem_default", 253 .data = &sysctl_wmem_default, 254 .maxlen = sizeof(int), 255 .mode = 0644, 256 .proc_handler = proc_dointvec_minmax, 257 .extra1 = &min_sndbuf, 258 }, 259 { 260 .procname = "rmem_default", 261 .data = &sysctl_rmem_default, 262 .maxlen = sizeof(int), 263 .mode = 0644, 264 .proc_handler = proc_dointvec_minmax, 265 .extra1 = &min_rcvbuf, 266 }, 267 { 268 .procname = "dev_weight", 269 .data = &weight_p, 270 .maxlen = sizeof(int), 271 .mode = 0644, 272 .proc_handler = proc_dointvec 273 }, 274 { 275 .procname = "netdev_max_backlog", 276 .data = &netdev_max_backlog, 277 .maxlen = sizeof(int), 278 .mode = 0644, 279 .proc_handler = proc_dointvec 280 }, 281 { 282 .procname = "netdev_rss_key", 283 .data = &netdev_rss_key, 284 .maxlen = sizeof(int), 285 .mode = 0444, 286 .proc_handler = proc_do_rss_key, 287 }, 288 #ifdef CONFIG_BPF_JIT 289 { 290 .procname = "bpf_jit_enable", 291 .data = &bpf_jit_enable, 292 .maxlen = sizeof(int), 293 .mode = 0644, 294 .proc_handler = proc_dointvec 295 }, 296 #endif 297 { 298 .procname = "netdev_tstamp_prequeue", 299 .data = &netdev_tstamp_prequeue, 300 .maxlen = sizeof(int), 301 .mode = 0644, 302 .proc_handler = proc_dointvec 303 }, 304 { 305 .procname = "message_cost", 306 .data = &net_ratelimit_state.interval, 307 .maxlen = sizeof(int), 308 .mode = 0644, 309 .proc_handler = proc_dointvec_jiffies, 310 }, 311 { 312 .procname = "message_burst", 313 .data = &net_ratelimit_state.burst, 314 .maxlen = sizeof(int), 315 .mode = 0644, 316 .proc_handler = proc_dointvec, 317 }, 318 { 319 .procname = "optmem_max", 320 .data = &sysctl_optmem_max, 321 .maxlen = sizeof(int), 322 .mode = 0644, 323 .proc_handler = proc_dointvec 324 }, 325 { 326 .procname = "tstamp_allow_data", 327 .data = &sysctl_tstamp_allow_data, 328 .maxlen = sizeof(int), 329 .mode = 0644, 330 .proc_handler = proc_dointvec_minmax, 331 .extra1 = &zero, 332 .extra2 = &one 333 }, 334 #ifdef CONFIG_RPS 335 { 336 .procname = "rps_sock_flow_entries", 337 .maxlen = sizeof(int), 338 .mode = 0644, 339 .proc_handler = rps_sock_flow_sysctl 340 }, 341 #endif 342 #ifdef CONFIG_NET_FLOW_LIMIT 343 { 344 .procname = "flow_limit_cpu_bitmap", 345 .mode = 0644, 346 .proc_handler = flow_limit_cpu_sysctl 347 }, 348 { 349 .procname = "flow_limit_table_len", 350 .data = &netdev_flow_limit_table_len, 351 .maxlen = sizeof(int), 352 .mode = 0644, 353 .proc_handler = flow_limit_table_len_sysctl 354 }, 355 #endif /* CONFIG_NET_FLOW_LIMIT */ 356 #ifdef CONFIG_NET_RX_BUSY_POLL 357 { 358 .procname = "busy_poll", 359 .data = &sysctl_net_busy_poll, 360 .maxlen = sizeof(unsigned int), 361 .mode = 0644, 362 .proc_handler = proc_dointvec 363 }, 364 { 365 .procname = "busy_read", 366 .data = &sysctl_net_busy_read, 367 .maxlen = sizeof(unsigned int), 368 .mode = 0644, 369 .proc_handler = proc_dointvec 370 }, 371 #endif 372 #ifdef CONFIG_NET_SCHED 373 { 374 .procname = "default_qdisc", 375 .mode = 0644, 376 .maxlen = IFNAMSIZ, 377 .proc_handler = set_default_qdisc 378 }, 379 #endif 380 #endif /* CONFIG_NET */ 381 { 382 .procname = "netdev_budget", 383 .data = &netdev_budget, 384 .maxlen = sizeof(int), 385 .mode = 0644, 386 .proc_handler = proc_dointvec 387 }, 388 { 389 .procname = "warnings", 390 .data = &net_msg_warn, 391 .maxlen = sizeof(int), 392 .mode = 0644, 393 .proc_handler = proc_dointvec 394 }, 395 { } 396 }; 397 398 static struct ctl_table netns_core_table[] = { 399 { 400 .procname = "somaxconn", 401 .data = &init_net.core.sysctl_somaxconn, 402 .maxlen = sizeof(int), 403 .mode = 0644, 404 .extra1 = &zero, 405 .proc_handler = proc_dointvec_minmax 406 }, 407 { } 408 }; 409 410 static __net_init int sysctl_core_net_init(struct net *net) 411 { 412 struct ctl_table *tbl; 413 414 net->core.sysctl_somaxconn = SOMAXCONN; 415 416 tbl = netns_core_table; 417 if (!net_eq(net, &init_net)) { 418 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 419 if (tbl == NULL) 420 goto err_dup; 421 422 tbl[0].data = &net->core.sysctl_somaxconn; 423 424 /* Don't export any sysctls to unprivileged users */ 425 if (net->user_ns != &init_user_ns) { 426 tbl[0].procname = NULL; 427 } 428 } 429 430 net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl); 431 if (net->core.sysctl_hdr == NULL) 432 goto err_reg; 433 434 return 0; 435 436 err_reg: 437 if (tbl != netns_core_table) 438 kfree(tbl); 439 err_dup: 440 return -ENOMEM; 441 } 442 443 static __net_exit void sysctl_core_net_exit(struct net *net) 444 { 445 struct ctl_table *tbl; 446 447 tbl = net->core.sysctl_hdr->ctl_table_arg; 448 unregister_net_sysctl_table(net->core.sysctl_hdr); 449 BUG_ON(tbl == netns_core_table); 450 kfree(tbl); 451 } 452 453 static __net_initdata struct pernet_operations sysctl_core_ops = { 454 .init = sysctl_core_net_init, 455 .exit = sysctl_core_net_exit, 456 }; 457 458 static __init int sysctl_core_init(void) 459 { 460 register_net_sysctl(&init_net, "net/core", net_core_table); 461 return register_pernet_subsys(&sysctl_core_ops); 462 } 463 464 fs_initcall(sysctl_core_init); 465