1 /* -*- linux-c -*- 2 * sysctl_net_core.c: sysctl interface to net core subsystem. 3 * 4 * Begun April 1, 1996, Mike Shaver. 5 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/sysctl.h> 10 #include <linux/module.h> 11 #include <linux/socket.h> 12 #include <linux/netdevice.h> 13 #include <linux/ratelimit.h> 14 #include <linux/vmalloc.h> 15 #include <linux/init.h> 16 #include <linux/slab.h> 17 #include <linux/kmemleak.h> 18 19 #include <net/ip.h> 20 #include <net/sock.h> 21 #include <net/net_ratelimit.h> 22 #include <net/busy_poll.h> 23 #include <net/pkt_sched.h> 24 25 static int zero = 0; 26 static int one = 1; 27 static int ushort_max = USHRT_MAX; 28 29 static int net_msg_warn; /* Unused, but still a sysctl */ 30 31 #ifdef CONFIG_RPS 32 static int rps_sock_flow_sysctl(struct ctl_table *table, int write, 33 void __user *buffer, size_t *lenp, loff_t *ppos) 34 { 35 unsigned int orig_size, size; 36 int ret, i; 37 struct ctl_table tmp = { 38 .data = &size, 39 .maxlen = sizeof(size), 40 .mode = table->mode 41 }; 42 struct rps_sock_flow_table *orig_sock_table, *sock_table; 43 static DEFINE_MUTEX(sock_flow_mutex); 44 45 mutex_lock(&sock_flow_mutex); 46 47 orig_sock_table = rcu_dereference_protected(rps_sock_flow_table, 48 lockdep_is_held(&sock_flow_mutex)); 49 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 50 51 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 52 53 if (write) { 54 if (size) { 55 if (size > 1<<30) { 56 /* Enforce limit to prevent overflow */ 57 mutex_unlock(&sock_flow_mutex); 58 return -EINVAL; 59 } 60 size = roundup_pow_of_two(size); 61 if (size != orig_size) { 62 sock_table = 63 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); 64 if (!sock_table) { 65 mutex_unlock(&sock_flow_mutex); 66 return -ENOMEM; 67 } 68 69 sock_table->mask = size - 1; 70 } else 71 sock_table = orig_sock_table; 72 73 for (i = 0; i < size; i++) 74 sock_table->ents[i] = RPS_NO_CPU; 75 } else 76 sock_table = NULL; 77 78 if (sock_table != orig_sock_table) { 79 rcu_assign_pointer(rps_sock_flow_table, sock_table); 80 if (sock_table) 81 static_key_slow_inc(&rps_needed); 82 if (orig_sock_table) { 83 static_key_slow_dec(&rps_needed); 84 synchronize_rcu(); 85 vfree(orig_sock_table); 86 } 87 } 88 } 89 90 mutex_unlock(&sock_flow_mutex); 91 92 return ret; 93 } 94 #endif /* CONFIG_RPS */ 95 96 #ifdef CONFIG_NET_FLOW_LIMIT 97 static DEFINE_MUTEX(flow_limit_update_mutex); 98 99 static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, 100 void __user *buffer, size_t *lenp, 101 loff_t *ppos) 102 { 103 struct sd_flow_limit *cur; 104 struct softnet_data *sd; 105 cpumask_var_t mask; 106 int i, len, ret = 0; 107 108 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 109 return -ENOMEM; 110 111 if (write) { 112 ret = cpumask_parse_user(buffer, *lenp, mask); 113 if (ret) 114 goto done; 115 116 mutex_lock(&flow_limit_update_mutex); 117 len = sizeof(*cur) + netdev_flow_limit_table_len; 118 for_each_possible_cpu(i) { 119 sd = &per_cpu(softnet_data, i); 120 cur = rcu_dereference_protected(sd->flow_limit, 121 lockdep_is_held(&flow_limit_update_mutex)); 122 if (cur && !cpumask_test_cpu(i, mask)) { 123 RCU_INIT_POINTER(sd->flow_limit, NULL); 124 synchronize_rcu(); 125 kfree(cur); 126 } else if (!cur && cpumask_test_cpu(i, mask)) { 127 cur = kzalloc_node(len, GFP_KERNEL, 128 cpu_to_node(i)); 129 if (!cur) { 130 /* not unwinding previous changes */ 131 ret = -ENOMEM; 132 goto write_unlock; 133 } 134 cur->num_buckets = netdev_flow_limit_table_len; 135 rcu_assign_pointer(sd->flow_limit, cur); 136 } 137 } 138 write_unlock: 139 mutex_unlock(&flow_limit_update_mutex); 140 } else { 141 char kbuf[128]; 142 143 if (*ppos || !*lenp) { 144 *lenp = 0; 145 goto done; 146 } 147 148 cpumask_clear(mask); 149 rcu_read_lock(); 150 for_each_possible_cpu(i) { 151 sd = &per_cpu(softnet_data, i); 152 if (rcu_dereference(sd->flow_limit)) 153 cpumask_set_cpu(i, mask); 154 } 155 rcu_read_unlock(); 156 157 len = min(sizeof(kbuf) - 1, *lenp); 158 len = cpumask_scnprintf(kbuf, len, mask); 159 if (!len) { 160 *lenp = 0; 161 goto done; 162 } 163 if (len < *lenp) 164 kbuf[len++] = '\n'; 165 if (copy_to_user(buffer, kbuf, len)) { 166 ret = -EFAULT; 167 goto done; 168 } 169 *lenp = len; 170 *ppos += len; 171 } 172 173 done: 174 free_cpumask_var(mask); 175 return ret; 176 } 177 178 static int flow_limit_table_len_sysctl(struct ctl_table *table, int write, 179 void __user *buffer, size_t *lenp, 180 loff_t *ppos) 181 { 182 unsigned int old, *ptr; 183 int ret; 184 185 mutex_lock(&flow_limit_update_mutex); 186 187 ptr = table->data; 188 old = *ptr; 189 ret = proc_dointvec(table, write, buffer, lenp, ppos); 190 if (!ret && write && !is_power_of_2(*ptr)) { 191 *ptr = old; 192 ret = -EINVAL; 193 } 194 195 mutex_unlock(&flow_limit_update_mutex); 196 return ret; 197 } 198 #endif /* CONFIG_NET_FLOW_LIMIT */ 199 200 #ifdef CONFIG_NET_SCHED 201 static int set_default_qdisc(struct ctl_table *table, int write, 202 void __user *buffer, size_t *lenp, loff_t *ppos) 203 { 204 char id[IFNAMSIZ]; 205 struct ctl_table tbl = { 206 .data = id, 207 .maxlen = IFNAMSIZ, 208 }; 209 int ret; 210 211 qdisc_get_default(id, IFNAMSIZ); 212 213 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 214 if (write && ret == 0) 215 ret = qdisc_set_default(id); 216 return ret; 217 } 218 #endif 219 220 static int proc_do_rss_key(struct ctl_table *table, int write, 221 void __user *buffer, size_t *lenp, loff_t *ppos) 222 { 223 struct ctl_table fake_table; 224 char buf[NETDEV_RSS_KEY_LEN * 3]; 225 226 snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key); 227 fake_table.data = buf; 228 fake_table.maxlen = sizeof(buf); 229 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 230 } 231 232 static struct ctl_table net_core_table[] = { 233 #ifdef CONFIG_NET 234 { 235 .procname = "wmem_max", 236 .data = &sysctl_wmem_max, 237 .maxlen = sizeof(int), 238 .mode = 0644, 239 .proc_handler = proc_dointvec_minmax, 240 .extra1 = &one, 241 }, 242 { 243 .procname = "rmem_max", 244 .data = &sysctl_rmem_max, 245 .maxlen = sizeof(int), 246 .mode = 0644, 247 .proc_handler = proc_dointvec_minmax, 248 .extra1 = &one, 249 }, 250 { 251 .procname = "wmem_default", 252 .data = &sysctl_wmem_default, 253 .maxlen = sizeof(int), 254 .mode = 0644, 255 .proc_handler = proc_dointvec_minmax, 256 .extra1 = &one, 257 }, 258 { 259 .procname = "rmem_default", 260 .data = &sysctl_rmem_default, 261 .maxlen = sizeof(int), 262 .mode = 0644, 263 .proc_handler = proc_dointvec_minmax, 264 .extra1 = &one, 265 }, 266 { 267 .procname = "dev_weight", 268 .data = &weight_p, 269 .maxlen = sizeof(int), 270 .mode = 0644, 271 .proc_handler = proc_dointvec 272 }, 273 { 274 .procname = "netdev_max_backlog", 275 .data = &netdev_max_backlog, 276 .maxlen = sizeof(int), 277 .mode = 0644, 278 .proc_handler = proc_dointvec 279 }, 280 { 281 .procname = "netdev_rss_key", 282 .data = &netdev_rss_key, 283 .maxlen = sizeof(int), 284 .mode = 0444, 285 .proc_handler = proc_do_rss_key, 286 }, 287 #ifdef CONFIG_BPF_JIT 288 { 289 .procname = "bpf_jit_enable", 290 .data = &bpf_jit_enable, 291 .maxlen = sizeof(int), 292 .mode = 0644, 293 .proc_handler = proc_dointvec 294 }, 295 #endif 296 { 297 .procname = "netdev_tstamp_prequeue", 298 .data = &netdev_tstamp_prequeue, 299 .maxlen = sizeof(int), 300 .mode = 0644, 301 .proc_handler = proc_dointvec 302 }, 303 { 304 .procname = "message_cost", 305 .data = &net_ratelimit_state.interval, 306 .maxlen = sizeof(int), 307 .mode = 0644, 308 .proc_handler = proc_dointvec_jiffies, 309 }, 310 { 311 .procname = "message_burst", 312 .data = &net_ratelimit_state.burst, 313 .maxlen = sizeof(int), 314 .mode = 0644, 315 .proc_handler = proc_dointvec, 316 }, 317 { 318 .procname = "optmem_max", 319 .data = &sysctl_optmem_max, 320 .maxlen = sizeof(int), 321 .mode = 0644, 322 .proc_handler = proc_dointvec 323 }, 324 #ifdef CONFIG_RPS 325 { 326 .procname = "rps_sock_flow_entries", 327 .maxlen = sizeof(int), 328 .mode = 0644, 329 .proc_handler = rps_sock_flow_sysctl 330 }, 331 #endif 332 #ifdef CONFIG_NET_FLOW_LIMIT 333 { 334 .procname = "flow_limit_cpu_bitmap", 335 .mode = 0644, 336 .proc_handler = flow_limit_cpu_sysctl 337 }, 338 { 339 .procname = "flow_limit_table_len", 340 .data = &netdev_flow_limit_table_len, 341 .maxlen = sizeof(int), 342 .mode = 0644, 343 .proc_handler = flow_limit_table_len_sysctl 344 }, 345 #endif /* CONFIG_NET_FLOW_LIMIT */ 346 #ifdef CONFIG_NET_RX_BUSY_POLL 347 { 348 .procname = "busy_poll", 349 .data = &sysctl_net_busy_poll, 350 .maxlen = sizeof(unsigned int), 351 .mode = 0644, 352 .proc_handler = proc_dointvec 353 }, 354 { 355 .procname = "busy_read", 356 .data = &sysctl_net_busy_read, 357 .maxlen = sizeof(unsigned int), 358 .mode = 0644, 359 .proc_handler = proc_dointvec 360 }, 361 #endif 362 #ifdef CONFIG_NET_SCHED 363 { 364 .procname = "default_qdisc", 365 .mode = 0644, 366 .maxlen = IFNAMSIZ, 367 .proc_handler = set_default_qdisc 368 }, 369 #endif 370 #endif /* CONFIG_NET */ 371 { 372 .procname = "netdev_budget", 373 .data = &netdev_budget, 374 .maxlen = sizeof(int), 375 .mode = 0644, 376 .proc_handler = proc_dointvec 377 }, 378 { 379 .procname = "warnings", 380 .data = &net_msg_warn, 381 .maxlen = sizeof(int), 382 .mode = 0644, 383 .proc_handler = proc_dointvec 384 }, 385 { } 386 }; 387 388 static struct ctl_table netns_core_table[] = { 389 { 390 .procname = "somaxconn", 391 .data = &init_net.core.sysctl_somaxconn, 392 .maxlen = sizeof(int), 393 .mode = 0644, 394 .extra1 = &zero, 395 .extra2 = &ushort_max, 396 .proc_handler = proc_dointvec_minmax 397 }, 398 { } 399 }; 400 401 static __net_init int sysctl_core_net_init(struct net *net) 402 { 403 struct ctl_table *tbl; 404 405 net->core.sysctl_somaxconn = SOMAXCONN; 406 407 tbl = netns_core_table; 408 if (!net_eq(net, &init_net)) { 409 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 410 if (tbl == NULL) 411 goto err_dup; 412 413 tbl[0].data = &net->core.sysctl_somaxconn; 414 415 /* Don't export any sysctls to unprivileged users */ 416 if (net->user_ns != &init_user_ns) { 417 tbl[0].procname = NULL; 418 } 419 } 420 421 net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl); 422 if (net->core.sysctl_hdr == NULL) 423 goto err_reg; 424 425 return 0; 426 427 err_reg: 428 if (tbl != netns_core_table) 429 kfree(tbl); 430 err_dup: 431 return -ENOMEM; 432 } 433 434 static __net_exit void sysctl_core_net_exit(struct net *net) 435 { 436 struct ctl_table *tbl; 437 438 tbl = net->core.sysctl_hdr->ctl_table_arg; 439 unregister_net_sysctl_table(net->core.sysctl_hdr); 440 BUG_ON(tbl == netns_core_table); 441 kfree(tbl); 442 } 443 444 static __net_initdata struct pernet_operations sysctl_core_ops = { 445 .init = sysctl_core_net_init, 446 .exit = sysctl_core_net_exit, 447 }; 448 449 static __init int sysctl_core_init(void) 450 { 451 register_net_sysctl(&init_net, "net/core", net_core_table); 452 return register_pernet_subsys(&sysctl_core_ops); 453 } 454 455 fs_initcall(sysctl_core_init); 456