1 /* 2 * net-sysfs.c - network device class and attributes 3 * 4 * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #include <linux/capability.h> 13 #include <linux/kernel.h> 14 #include <linux/netdevice.h> 15 #include <linux/if_arp.h> 16 #include <linux/slab.h> 17 #include <linux/nsproxy.h> 18 #include <net/sock.h> 19 #include <net/net_namespace.h> 20 #include <linux/rtnetlink.h> 21 #include <linux/wireless.h> 22 #include <linux/vmalloc.h> 23 #include <linux/export.h> 24 #include <linux/jiffies.h> 25 #include <net/wext.h> 26 27 #include "net-sysfs.h" 28 29 #ifdef CONFIG_SYSFS 30 static const char fmt_hex[] = "%#x\n"; 31 static const char fmt_long_hex[] = "%#lx\n"; 32 static const char fmt_dec[] = "%d\n"; 33 static const char fmt_udec[] = "%u\n"; 34 static const char fmt_ulong[] = "%lu\n"; 35 static const char fmt_u64[] = "%llu\n"; 36 37 static inline int dev_isalive(const struct net_device *dev) 38 { 39 return dev->reg_state <= NETREG_REGISTERED; 40 } 41 42 /* use same locking rules as GIF* ioctl's */ 43 static ssize_t netdev_show(const struct device *dev, 44 struct device_attribute *attr, char *buf, 45 ssize_t (*format)(const struct net_device *, char *)) 46 { 47 struct net_device *net = to_net_dev(dev); 48 ssize_t ret = -EINVAL; 49 50 read_lock(&dev_base_lock); 51 if (dev_isalive(net)) 52 ret = (*format)(net, buf); 53 read_unlock(&dev_base_lock); 54 55 return ret; 56 } 57 58 /* generate a show function for simple field */ 59 #define NETDEVICE_SHOW(field, format_string) \ 60 static ssize_t format_##field(const struct net_device *net, char *buf) \ 61 { \ 62 return sprintf(buf, format_string, net->field); \ 63 } \ 64 static ssize_t show_##field(struct device *dev, \ 65 struct device_attribute *attr, char *buf) \ 66 { \ 67 return netdev_show(dev, attr, buf, format_##field); \ 68 } 69 70 71 /* use same locking and permission rules as SIF* ioctl's */ 72 static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, 73 const char *buf, size_t len, 74 int (*set)(struct net_device *, unsigned long)) 75 { 76 struct net_device *net = to_net_dev(dev); 77 unsigned long new; 78 int ret = -EINVAL; 79 80 if (!capable(CAP_NET_ADMIN)) 81 return -EPERM; 82 83 ret = kstrtoul(buf, 0, &new); 84 if (ret) 85 goto err; 86 87 if (!rtnl_trylock()) 88 return restart_syscall(); 89 90 if (dev_isalive(net)) { 91 if ((ret = (*set)(net, new)) == 0) 92 ret = len; 93 } 94 rtnl_unlock(); 95 err: 96 return ret; 97 } 98 99 NETDEVICE_SHOW(dev_id, fmt_hex); 100 NETDEVICE_SHOW(addr_assign_type, fmt_dec); 101 NETDEVICE_SHOW(addr_len, fmt_dec); 102 NETDEVICE_SHOW(iflink, fmt_dec); 103 NETDEVICE_SHOW(ifindex, fmt_dec); 104 NETDEVICE_SHOW(type, fmt_dec); 105 NETDEVICE_SHOW(link_mode, fmt_dec); 106 107 /* use same locking rules as GIFHWADDR ioctl's */ 108 static ssize_t show_address(struct device *dev, struct device_attribute *attr, 109 char *buf) 110 { 111 struct net_device *net = to_net_dev(dev); 112 ssize_t ret = -EINVAL; 113 114 read_lock(&dev_base_lock); 115 if (dev_isalive(net)) 116 ret = sysfs_format_mac(buf, net->dev_addr, net->addr_len); 117 read_unlock(&dev_base_lock); 118 return ret; 119 } 120 121 static ssize_t show_broadcast(struct device *dev, 122 struct device_attribute *attr, char *buf) 123 { 124 struct net_device *net = to_net_dev(dev); 125 if (dev_isalive(net)) 126 return sysfs_format_mac(buf, net->broadcast, net->addr_len); 127 return -EINVAL; 128 } 129 130 static ssize_t show_carrier(struct device *dev, 131 struct device_attribute *attr, char *buf) 132 { 133 struct net_device *netdev = to_net_dev(dev); 134 if (netif_running(netdev)) { 135 return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev)); 136 } 137 return -EINVAL; 138 } 139 140 static ssize_t show_speed(struct device *dev, 141 struct device_attribute *attr, char *buf) 142 { 143 struct net_device *netdev = to_net_dev(dev); 144 int ret = -EINVAL; 145 146 if (!rtnl_trylock()) 147 return restart_syscall(); 148 149 if (netif_running(netdev)) { 150 struct ethtool_cmd cmd; 151 if (!__ethtool_get_settings(netdev, &cmd)) 152 ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd)); 153 } 154 rtnl_unlock(); 155 return ret; 156 } 157 158 static ssize_t show_duplex(struct device *dev, 159 struct device_attribute *attr, char *buf) 160 { 161 struct net_device *netdev = to_net_dev(dev); 162 int ret = -EINVAL; 163 164 if (!rtnl_trylock()) 165 return restart_syscall(); 166 167 if (netif_running(netdev)) { 168 struct ethtool_cmd cmd; 169 if (!__ethtool_get_settings(netdev, &cmd)) 170 ret = sprintf(buf, "%s\n", 171 cmd.duplex ? "full" : "half"); 172 } 173 rtnl_unlock(); 174 return ret; 175 } 176 177 static ssize_t show_dormant(struct device *dev, 178 struct device_attribute *attr, char *buf) 179 { 180 struct net_device *netdev = to_net_dev(dev); 181 182 if (netif_running(netdev)) 183 return sprintf(buf, fmt_dec, !!netif_dormant(netdev)); 184 185 return -EINVAL; 186 } 187 188 static const char *const operstates[] = { 189 "unknown", 190 "notpresent", /* currently unused */ 191 "down", 192 "lowerlayerdown", 193 "testing", /* currently unused */ 194 "dormant", 195 "up" 196 }; 197 198 static ssize_t show_operstate(struct device *dev, 199 struct device_attribute *attr, char *buf) 200 { 201 const struct net_device *netdev = to_net_dev(dev); 202 unsigned char operstate; 203 204 read_lock(&dev_base_lock); 205 operstate = netdev->operstate; 206 if (!netif_running(netdev)) 207 operstate = IF_OPER_DOWN; 208 read_unlock(&dev_base_lock); 209 210 if (operstate >= ARRAY_SIZE(operstates)) 211 return -EINVAL; /* should not happen */ 212 213 return sprintf(buf, "%s\n", operstates[operstate]); 214 } 215 216 /* read-write attributes */ 217 NETDEVICE_SHOW(mtu, fmt_dec); 218 219 static int change_mtu(struct net_device *net, unsigned long new_mtu) 220 { 221 return dev_set_mtu(net, (int) new_mtu); 222 } 223 224 static ssize_t store_mtu(struct device *dev, struct device_attribute *attr, 225 const char *buf, size_t len) 226 { 227 return netdev_store(dev, attr, buf, len, change_mtu); 228 } 229 230 NETDEVICE_SHOW(flags, fmt_hex); 231 232 static int change_flags(struct net_device *net, unsigned long new_flags) 233 { 234 return dev_change_flags(net, (unsigned int) new_flags); 235 } 236 237 static ssize_t store_flags(struct device *dev, struct device_attribute *attr, 238 const char *buf, size_t len) 239 { 240 return netdev_store(dev, attr, buf, len, change_flags); 241 } 242 243 NETDEVICE_SHOW(tx_queue_len, fmt_ulong); 244 245 static int change_tx_queue_len(struct net_device *net, unsigned long new_len) 246 { 247 net->tx_queue_len = new_len; 248 return 0; 249 } 250 251 static ssize_t store_tx_queue_len(struct device *dev, 252 struct device_attribute *attr, 253 const char *buf, size_t len) 254 { 255 return netdev_store(dev, attr, buf, len, change_tx_queue_len); 256 } 257 258 static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr, 259 const char *buf, size_t len) 260 { 261 struct net_device *netdev = to_net_dev(dev); 262 size_t count = len; 263 ssize_t ret; 264 265 if (!capable(CAP_NET_ADMIN)) 266 return -EPERM; 267 268 /* ignore trailing newline */ 269 if (len > 0 && buf[len - 1] == '\n') 270 --count; 271 272 if (!rtnl_trylock()) 273 return restart_syscall(); 274 ret = dev_set_alias(netdev, buf, count); 275 rtnl_unlock(); 276 277 return ret < 0 ? ret : len; 278 } 279 280 static ssize_t show_ifalias(struct device *dev, 281 struct device_attribute *attr, char *buf) 282 { 283 const struct net_device *netdev = to_net_dev(dev); 284 ssize_t ret = 0; 285 286 if (!rtnl_trylock()) 287 return restart_syscall(); 288 if (netdev->ifalias) 289 ret = sprintf(buf, "%s\n", netdev->ifalias); 290 rtnl_unlock(); 291 return ret; 292 } 293 294 NETDEVICE_SHOW(group, fmt_dec); 295 296 static int change_group(struct net_device *net, unsigned long new_group) 297 { 298 dev_set_group(net, (int) new_group); 299 return 0; 300 } 301 302 static ssize_t store_group(struct device *dev, struct device_attribute *attr, 303 const char *buf, size_t len) 304 { 305 return netdev_store(dev, attr, buf, len, change_group); 306 } 307 308 static struct device_attribute net_class_attributes[] = { 309 __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), 310 __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), 311 __ATTR(dev_id, S_IRUGO, show_dev_id, NULL), 312 __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), 313 __ATTR(iflink, S_IRUGO, show_iflink, NULL), 314 __ATTR(ifindex, S_IRUGO, show_ifindex, NULL), 315 __ATTR(type, S_IRUGO, show_type, NULL), 316 __ATTR(link_mode, S_IRUGO, show_link_mode, NULL), 317 __ATTR(address, S_IRUGO, show_address, NULL), 318 __ATTR(broadcast, S_IRUGO, show_broadcast, NULL), 319 __ATTR(carrier, S_IRUGO, show_carrier, NULL), 320 __ATTR(speed, S_IRUGO, show_speed, NULL), 321 __ATTR(duplex, S_IRUGO, show_duplex, NULL), 322 __ATTR(dormant, S_IRUGO, show_dormant, NULL), 323 __ATTR(operstate, S_IRUGO, show_operstate, NULL), 324 __ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu), 325 __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), 326 __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, 327 store_tx_queue_len), 328 __ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group), 329 {} 330 }; 331 332 /* Show a given an attribute in the statistics group */ 333 static ssize_t netstat_show(const struct device *d, 334 struct device_attribute *attr, char *buf, 335 unsigned long offset) 336 { 337 struct net_device *dev = to_net_dev(d); 338 ssize_t ret = -EINVAL; 339 340 WARN_ON(offset > sizeof(struct rtnl_link_stats64) || 341 offset % sizeof(u64) != 0); 342 343 read_lock(&dev_base_lock); 344 if (dev_isalive(dev)) { 345 struct rtnl_link_stats64 temp; 346 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); 347 348 ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset)); 349 } 350 read_unlock(&dev_base_lock); 351 return ret; 352 } 353 354 /* generate a read-only statistics attribute */ 355 #define NETSTAT_ENTRY(name) \ 356 static ssize_t show_##name(struct device *d, \ 357 struct device_attribute *attr, char *buf) \ 358 { \ 359 return netstat_show(d, attr, buf, \ 360 offsetof(struct rtnl_link_stats64, name)); \ 361 } \ 362 static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) 363 364 NETSTAT_ENTRY(rx_packets); 365 NETSTAT_ENTRY(tx_packets); 366 NETSTAT_ENTRY(rx_bytes); 367 NETSTAT_ENTRY(tx_bytes); 368 NETSTAT_ENTRY(rx_errors); 369 NETSTAT_ENTRY(tx_errors); 370 NETSTAT_ENTRY(rx_dropped); 371 NETSTAT_ENTRY(tx_dropped); 372 NETSTAT_ENTRY(multicast); 373 NETSTAT_ENTRY(collisions); 374 NETSTAT_ENTRY(rx_length_errors); 375 NETSTAT_ENTRY(rx_over_errors); 376 NETSTAT_ENTRY(rx_crc_errors); 377 NETSTAT_ENTRY(rx_frame_errors); 378 NETSTAT_ENTRY(rx_fifo_errors); 379 NETSTAT_ENTRY(rx_missed_errors); 380 NETSTAT_ENTRY(tx_aborted_errors); 381 NETSTAT_ENTRY(tx_carrier_errors); 382 NETSTAT_ENTRY(tx_fifo_errors); 383 NETSTAT_ENTRY(tx_heartbeat_errors); 384 NETSTAT_ENTRY(tx_window_errors); 385 NETSTAT_ENTRY(rx_compressed); 386 NETSTAT_ENTRY(tx_compressed); 387 388 static struct attribute *netstat_attrs[] = { 389 &dev_attr_rx_packets.attr, 390 &dev_attr_tx_packets.attr, 391 &dev_attr_rx_bytes.attr, 392 &dev_attr_tx_bytes.attr, 393 &dev_attr_rx_errors.attr, 394 &dev_attr_tx_errors.attr, 395 &dev_attr_rx_dropped.attr, 396 &dev_attr_tx_dropped.attr, 397 &dev_attr_multicast.attr, 398 &dev_attr_collisions.attr, 399 &dev_attr_rx_length_errors.attr, 400 &dev_attr_rx_over_errors.attr, 401 &dev_attr_rx_crc_errors.attr, 402 &dev_attr_rx_frame_errors.attr, 403 &dev_attr_rx_fifo_errors.attr, 404 &dev_attr_rx_missed_errors.attr, 405 &dev_attr_tx_aborted_errors.attr, 406 &dev_attr_tx_carrier_errors.attr, 407 &dev_attr_tx_fifo_errors.attr, 408 &dev_attr_tx_heartbeat_errors.attr, 409 &dev_attr_tx_window_errors.attr, 410 &dev_attr_rx_compressed.attr, 411 &dev_attr_tx_compressed.attr, 412 NULL 413 }; 414 415 416 static struct attribute_group netstat_group = { 417 .name = "statistics", 418 .attrs = netstat_attrs, 419 }; 420 #endif /* CONFIG_SYSFS */ 421 422 #ifdef CONFIG_RPS 423 /* 424 * RX queue sysfs structures and functions. 425 */ 426 struct rx_queue_attribute { 427 struct attribute attr; 428 ssize_t (*show)(struct netdev_rx_queue *queue, 429 struct rx_queue_attribute *attr, char *buf); 430 ssize_t (*store)(struct netdev_rx_queue *queue, 431 struct rx_queue_attribute *attr, const char *buf, size_t len); 432 }; 433 #define to_rx_queue_attr(_attr) container_of(_attr, \ 434 struct rx_queue_attribute, attr) 435 436 #define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj) 437 438 static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr, 439 char *buf) 440 { 441 struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); 442 struct netdev_rx_queue *queue = to_rx_queue(kobj); 443 444 if (!attribute->show) 445 return -EIO; 446 447 return attribute->show(queue, attribute, buf); 448 } 449 450 static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr, 451 const char *buf, size_t count) 452 { 453 struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); 454 struct netdev_rx_queue *queue = to_rx_queue(kobj); 455 456 if (!attribute->store) 457 return -EIO; 458 459 return attribute->store(queue, attribute, buf, count); 460 } 461 462 static const struct sysfs_ops rx_queue_sysfs_ops = { 463 .show = rx_queue_attr_show, 464 .store = rx_queue_attr_store, 465 }; 466 467 static ssize_t show_rps_map(struct netdev_rx_queue *queue, 468 struct rx_queue_attribute *attribute, char *buf) 469 { 470 struct rps_map *map; 471 cpumask_var_t mask; 472 size_t len = 0; 473 int i; 474 475 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) 476 return -ENOMEM; 477 478 rcu_read_lock(); 479 map = rcu_dereference(queue->rps_map); 480 if (map) 481 for (i = 0; i < map->len; i++) 482 cpumask_set_cpu(map->cpus[i], mask); 483 484 len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); 485 if (PAGE_SIZE - len < 3) { 486 rcu_read_unlock(); 487 free_cpumask_var(mask); 488 return -EINVAL; 489 } 490 rcu_read_unlock(); 491 492 free_cpumask_var(mask); 493 len += sprintf(buf + len, "\n"); 494 return len; 495 } 496 497 static ssize_t store_rps_map(struct netdev_rx_queue *queue, 498 struct rx_queue_attribute *attribute, 499 const char *buf, size_t len) 500 { 501 struct rps_map *old_map, *map; 502 cpumask_var_t mask; 503 int err, cpu, i; 504 static DEFINE_SPINLOCK(rps_map_lock); 505 506 if (!capable(CAP_NET_ADMIN)) 507 return -EPERM; 508 509 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 510 return -ENOMEM; 511 512 err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); 513 if (err) { 514 free_cpumask_var(mask); 515 return err; 516 } 517 518 map = kzalloc(max_t(unsigned int, 519 RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES), 520 GFP_KERNEL); 521 if (!map) { 522 free_cpumask_var(mask); 523 return -ENOMEM; 524 } 525 526 i = 0; 527 for_each_cpu_and(cpu, mask, cpu_online_mask) 528 map->cpus[i++] = cpu; 529 530 if (i) 531 map->len = i; 532 else { 533 kfree(map); 534 map = NULL; 535 } 536 537 spin_lock(&rps_map_lock); 538 old_map = rcu_dereference_protected(queue->rps_map, 539 lockdep_is_held(&rps_map_lock)); 540 rcu_assign_pointer(queue->rps_map, map); 541 spin_unlock(&rps_map_lock); 542 543 if (map) 544 static_key_slow_inc(&rps_needed); 545 if (old_map) { 546 kfree_rcu(old_map, rcu); 547 static_key_slow_dec(&rps_needed); 548 } 549 free_cpumask_var(mask); 550 return len; 551 } 552 553 static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, 554 struct rx_queue_attribute *attr, 555 char *buf) 556 { 557 struct rps_dev_flow_table *flow_table; 558 unsigned long val = 0; 559 560 rcu_read_lock(); 561 flow_table = rcu_dereference(queue->rps_flow_table); 562 if (flow_table) 563 val = (unsigned long)flow_table->mask + 1; 564 rcu_read_unlock(); 565 566 return sprintf(buf, "%lu\n", val); 567 } 568 569 static void rps_dev_flow_table_release_work(struct work_struct *work) 570 { 571 struct rps_dev_flow_table *table = container_of(work, 572 struct rps_dev_flow_table, free_work); 573 574 vfree(table); 575 } 576 577 static void rps_dev_flow_table_release(struct rcu_head *rcu) 578 { 579 struct rps_dev_flow_table *table = container_of(rcu, 580 struct rps_dev_flow_table, rcu); 581 582 INIT_WORK(&table->free_work, rps_dev_flow_table_release_work); 583 schedule_work(&table->free_work); 584 } 585 586 static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, 587 struct rx_queue_attribute *attr, 588 const char *buf, size_t len) 589 { 590 unsigned long mask, count; 591 struct rps_dev_flow_table *table, *old_table; 592 static DEFINE_SPINLOCK(rps_dev_flow_lock); 593 int rc; 594 595 if (!capable(CAP_NET_ADMIN)) 596 return -EPERM; 597 598 rc = kstrtoul(buf, 0, &count); 599 if (rc < 0) 600 return rc; 601 602 if (count) { 603 mask = count - 1; 604 /* mask = roundup_pow_of_two(count) - 1; 605 * without overflows... 606 */ 607 while ((mask | (mask >> 1)) != mask) 608 mask |= (mask >> 1); 609 /* On 64 bit arches, must check mask fits in table->mask (u32), 610 * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1) 611 * doesnt overflow. 612 */ 613 #if BITS_PER_LONG > 32 614 if (mask > (unsigned long)(u32)mask) 615 return -EINVAL; 616 #else 617 if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1)) 618 / sizeof(struct rps_dev_flow)) { 619 /* Enforce a limit to prevent overflow */ 620 return -EINVAL; 621 } 622 #endif 623 table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1)); 624 if (!table) 625 return -ENOMEM; 626 627 table->mask = mask; 628 for (count = 0; count <= mask; count++) 629 table->flows[count].cpu = RPS_NO_CPU; 630 } else 631 table = NULL; 632 633 spin_lock(&rps_dev_flow_lock); 634 old_table = rcu_dereference_protected(queue->rps_flow_table, 635 lockdep_is_held(&rps_dev_flow_lock)); 636 rcu_assign_pointer(queue->rps_flow_table, table); 637 spin_unlock(&rps_dev_flow_lock); 638 639 if (old_table) 640 call_rcu(&old_table->rcu, rps_dev_flow_table_release); 641 642 return len; 643 } 644 645 static struct rx_queue_attribute rps_cpus_attribute = 646 __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map); 647 648 649 static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute = 650 __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, 651 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); 652 653 static struct attribute *rx_queue_default_attrs[] = { 654 &rps_cpus_attribute.attr, 655 &rps_dev_flow_table_cnt_attribute.attr, 656 NULL 657 }; 658 659 static void rx_queue_release(struct kobject *kobj) 660 { 661 struct netdev_rx_queue *queue = to_rx_queue(kobj); 662 struct rps_map *map; 663 struct rps_dev_flow_table *flow_table; 664 665 666 map = rcu_dereference_protected(queue->rps_map, 1); 667 if (map) { 668 RCU_INIT_POINTER(queue->rps_map, NULL); 669 kfree_rcu(map, rcu); 670 } 671 672 flow_table = rcu_dereference_protected(queue->rps_flow_table, 1); 673 if (flow_table) { 674 RCU_INIT_POINTER(queue->rps_flow_table, NULL); 675 call_rcu(&flow_table->rcu, rps_dev_flow_table_release); 676 } 677 678 memset(kobj, 0, sizeof(*kobj)); 679 dev_put(queue->dev); 680 } 681 682 static struct kobj_type rx_queue_ktype = { 683 .sysfs_ops = &rx_queue_sysfs_ops, 684 .release = rx_queue_release, 685 .default_attrs = rx_queue_default_attrs, 686 }; 687 688 static int rx_queue_add_kobject(struct net_device *net, int index) 689 { 690 struct netdev_rx_queue *queue = net->_rx + index; 691 struct kobject *kobj = &queue->kobj; 692 int error = 0; 693 694 kobj->kset = net->queues_kset; 695 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, 696 "rx-%u", index); 697 if (error) { 698 kobject_put(kobj); 699 return error; 700 } 701 702 kobject_uevent(kobj, KOBJ_ADD); 703 dev_hold(queue->dev); 704 705 return error; 706 } 707 #endif /* CONFIG_RPS */ 708 709 int 710 net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) 711 { 712 #ifdef CONFIG_RPS 713 int i; 714 int error = 0; 715 716 for (i = old_num; i < new_num; i++) { 717 error = rx_queue_add_kobject(net, i); 718 if (error) { 719 new_num = old_num; 720 break; 721 } 722 } 723 724 while (--i >= new_num) 725 kobject_put(&net->_rx[i].kobj); 726 727 return error; 728 #else 729 return 0; 730 #endif 731 } 732 733 #ifdef CONFIG_SYSFS 734 /* 735 * netdev_queue sysfs structures and functions. 736 */ 737 struct netdev_queue_attribute { 738 struct attribute attr; 739 ssize_t (*show)(struct netdev_queue *queue, 740 struct netdev_queue_attribute *attr, char *buf); 741 ssize_t (*store)(struct netdev_queue *queue, 742 struct netdev_queue_attribute *attr, const char *buf, size_t len); 743 }; 744 #define to_netdev_queue_attr(_attr) container_of(_attr, \ 745 struct netdev_queue_attribute, attr) 746 747 #define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj) 748 749 static ssize_t netdev_queue_attr_show(struct kobject *kobj, 750 struct attribute *attr, char *buf) 751 { 752 struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); 753 struct netdev_queue *queue = to_netdev_queue(kobj); 754 755 if (!attribute->show) 756 return -EIO; 757 758 return attribute->show(queue, attribute, buf); 759 } 760 761 static ssize_t netdev_queue_attr_store(struct kobject *kobj, 762 struct attribute *attr, 763 const char *buf, size_t count) 764 { 765 struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); 766 struct netdev_queue *queue = to_netdev_queue(kobj); 767 768 if (!attribute->store) 769 return -EIO; 770 771 return attribute->store(queue, attribute, buf, count); 772 } 773 774 static const struct sysfs_ops netdev_queue_sysfs_ops = { 775 .show = netdev_queue_attr_show, 776 .store = netdev_queue_attr_store, 777 }; 778 779 static ssize_t show_trans_timeout(struct netdev_queue *queue, 780 struct netdev_queue_attribute *attribute, 781 char *buf) 782 { 783 unsigned long trans_timeout; 784 785 spin_lock_irq(&queue->_xmit_lock); 786 trans_timeout = queue->trans_timeout; 787 spin_unlock_irq(&queue->_xmit_lock); 788 789 return sprintf(buf, "%lu", trans_timeout); 790 } 791 792 static struct netdev_queue_attribute queue_trans_timeout = 793 __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); 794 795 #ifdef CONFIG_BQL 796 /* 797 * Byte queue limits sysfs structures and functions. 798 */ 799 static ssize_t bql_show(char *buf, unsigned int value) 800 { 801 return sprintf(buf, "%u\n", value); 802 } 803 804 static ssize_t bql_set(const char *buf, const size_t count, 805 unsigned int *pvalue) 806 { 807 unsigned int value; 808 int err; 809 810 if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) 811 value = DQL_MAX_LIMIT; 812 else { 813 err = kstrtouint(buf, 10, &value); 814 if (err < 0) 815 return err; 816 if (value > DQL_MAX_LIMIT) 817 return -EINVAL; 818 } 819 820 *pvalue = value; 821 822 return count; 823 } 824 825 static ssize_t bql_show_hold_time(struct netdev_queue *queue, 826 struct netdev_queue_attribute *attr, 827 char *buf) 828 { 829 struct dql *dql = &queue->dql; 830 831 return sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time)); 832 } 833 834 static ssize_t bql_set_hold_time(struct netdev_queue *queue, 835 struct netdev_queue_attribute *attribute, 836 const char *buf, size_t len) 837 { 838 struct dql *dql = &queue->dql; 839 unsigned int value; 840 int err; 841 842 err = kstrtouint(buf, 10, &value); 843 if (err < 0) 844 return err; 845 846 dql->slack_hold_time = msecs_to_jiffies(value); 847 848 return len; 849 } 850 851 static struct netdev_queue_attribute bql_hold_time_attribute = 852 __ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time, 853 bql_set_hold_time); 854 855 static ssize_t bql_show_inflight(struct netdev_queue *queue, 856 struct netdev_queue_attribute *attr, 857 char *buf) 858 { 859 struct dql *dql = &queue->dql; 860 861 return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed); 862 } 863 864 static struct netdev_queue_attribute bql_inflight_attribute = 865 __ATTR(inflight, S_IRUGO, bql_show_inflight, NULL); 866 867 #define BQL_ATTR(NAME, FIELD) \ 868 static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \ 869 struct netdev_queue_attribute *attr, \ 870 char *buf) \ 871 { \ 872 return bql_show(buf, queue->dql.FIELD); \ 873 } \ 874 \ 875 static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \ 876 struct netdev_queue_attribute *attr, \ 877 const char *buf, size_t len) \ 878 { \ 879 return bql_set(buf, len, &queue->dql.FIELD); \ 880 } \ 881 \ 882 static struct netdev_queue_attribute bql_ ## NAME ## _attribute = \ 883 __ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME, \ 884 bql_set_ ## NAME); 885 886 BQL_ATTR(limit, limit) 887 BQL_ATTR(limit_max, max_limit) 888 BQL_ATTR(limit_min, min_limit) 889 890 static struct attribute *dql_attrs[] = { 891 &bql_limit_attribute.attr, 892 &bql_limit_max_attribute.attr, 893 &bql_limit_min_attribute.attr, 894 &bql_hold_time_attribute.attr, 895 &bql_inflight_attribute.attr, 896 NULL 897 }; 898 899 static struct attribute_group dql_group = { 900 .name = "byte_queue_limits", 901 .attrs = dql_attrs, 902 }; 903 #endif /* CONFIG_BQL */ 904 905 #ifdef CONFIG_XPS 906 static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) 907 { 908 struct net_device *dev = queue->dev; 909 int i; 910 911 for (i = 0; i < dev->num_tx_queues; i++) 912 if (queue == &dev->_tx[i]) 913 break; 914 915 BUG_ON(i >= dev->num_tx_queues); 916 917 return i; 918 } 919 920 921 static ssize_t show_xps_map(struct netdev_queue *queue, 922 struct netdev_queue_attribute *attribute, char *buf) 923 { 924 struct net_device *dev = queue->dev; 925 struct xps_dev_maps *dev_maps; 926 cpumask_var_t mask; 927 unsigned long index; 928 size_t len = 0; 929 int i; 930 931 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) 932 return -ENOMEM; 933 934 index = get_netdev_queue_index(queue); 935 936 rcu_read_lock(); 937 dev_maps = rcu_dereference(dev->xps_maps); 938 if (dev_maps) { 939 for_each_possible_cpu(i) { 940 struct xps_map *map = 941 rcu_dereference(dev_maps->cpu_map[i]); 942 if (map) { 943 int j; 944 for (j = 0; j < map->len; j++) { 945 if (map->queues[j] == index) { 946 cpumask_set_cpu(i, mask); 947 break; 948 } 949 } 950 } 951 } 952 } 953 rcu_read_unlock(); 954 955 len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); 956 if (PAGE_SIZE - len < 3) { 957 free_cpumask_var(mask); 958 return -EINVAL; 959 } 960 961 free_cpumask_var(mask); 962 len += sprintf(buf + len, "\n"); 963 return len; 964 } 965 966 static DEFINE_MUTEX(xps_map_mutex); 967 #define xmap_dereference(P) \ 968 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) 969 970 static void xps_queue_release(struct netdev_queue *queue) 971 { 972 struct net_device *dev = queue->dev; 973 struct xps_dev_maps *dev_maps; 974 struct xps_map *map; 975 unsigned long index; 976 int i, pos, nonempty = 0; 977 978 index = get_netdev_queue_index(queue); 979 980 mutex_lock(&xps_map_mutex); 981 dev_maps = xmap_dereference(dev->xps_maps); 982 983 if (dev_maps) { 984 for_each_possible_cpu(i) { 985 map = xmap_dereference(dev_maps->cpu_map[i]); 986 if (!map) 987 continue; 988 989 for (pos = 0; pos < map->len; pos++) 990 if (map->queues[pos] == index) 991 break; 992 993 if (pos < map->len) { 994 if (map->len > 1) 995 map->queues[pos] = 996 map->queues[--map->len]; 997 else { 998 RCU_INIT_POINTER(dev_maps->cpu_map[i], 999 NULL); 1000 kfree_rcu(map, rcu); 1001 map = NULL; 1002 } 1003 } 1004 if (map) 1005 nonempty = 1; 1006 } 1007 1008 if (!nonempty) { 1009 RCU_INIT_POINTER(dev->xps_maps, NULL); 1010 kfree_rcu(dev_maps, rcu); 1011 } 1012 } 1013 mutex_unlock(&xps_map_mutex); 1014 } 1015 1016 static ssize_t store_xps_map(struct netdev_queue *queue, 1017 struct netdev_queue_attribute *attribute, 1018 const char *buf, size_t len) 1019 { 1020 struct net_device *dev = queue->dev; 1021 cpumask_var_t mask; 1022 int err, i, cpu, pos, map_len, alloc_len, need_set; 1023 unsigned long index; 1024 struct xps_map *map, *new_map; 1025 struct xps_dev_maps *dev_maps, *new_dev_maps; 1026 int nonempty = 0; 1027 int numa_node_id = -2; 1028 1029 if (!capable(CAP_NET_ADMIN)) 1030 return -EPERM; 1031 1032 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 1033 return -ENOMEM; 1034 1035 index = get_netdev_queue_index(queue); 1036 1037 err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); 1038 if (err) { 1039 free_cpumask_var(mask); 1040 return err; 1041 } 1042 1043 new_dev_maps = kzalloc(max_t(unsigned int, 1044 XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL); 1045 if (!new_dev_maps) { 1046 free_cpumask_var(mask); 1047 return -ENOMEM; 1048 } 1049 1050 mutex_lock(&xps_map_mutex); 1051 1052 dev_maps = xmap_dereference(dev->xps_maps); 1053 1054 for_each_possible_cpu(cpu) { 1055 map = dev_maps ? 1056 xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; 1057 new_map = map; 1058 if (map) { 1059 for (pos = 0; pos < map->len; pos++) 1060 if (map->queues[pos] == index) 1061 break; 1062 map_len = map->len; 1063 alloc_len = map->alloc_len; 1064 } else 1065 pos = map_len = alloc_len = 0; 1066 1067 need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu); 1068 #ifdef CONFIG_NUMA 1069 if (need_set) { 1070 if (numa_node_id == -2) 1071 numa_node_id = cpu_to_node(cpu); 1072 else if (numa_node_id != cpu_to_node(cpu)) 1073 numa_node_id = -1; 1074 } 1075 #endif 1076 if (need_set && pos >= map_len) { 1077 /* Need to add queue to this CPU's map */ 1078 if (map_len >= alloc_len) { 1079 alloc_len = alloc_len ? 1080 2 * alloc_len : XPS_MIN_MAP_ALLOC; 1081 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), 1082 GFP_KERNEL, 1083 cpu_to_node(cpu)); 1084 if (!new_map) 1085 goto error; 1086 new_map->alloc_len = alloc_len; 1087 for (i = 0; i < map_len; i++) 1088 new_map->queues[i] = map->queues[i]; 1089 new_map->len = map_len; 1090 } 1091 new_map->queues[new_map->len++] = index; 1092 } else if (!need_set && pos < map_len) { 1093 /* Need to remove queue from this CPU's map */ 1094 if (map_len > 1) 1095 new_map->queues[pos] = 1096 new_map->queues[--new_map->len]; 1097 else 1098 new_map = NULL; 1099 } 1100 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map); 1101 } 1102 1103 /* Cleanup old maps */ 1104 for_each_possible_cpu(cpu) { 1105 map = dev_maps ? 1106 xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; 1107 if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map) 1108 kfree_rcu(map, rcu); 1109 if (new_dev_maps->cpu_map[cpu]) 1110 nonempty = 1; 1111 } 1112 1113 if (nonempty) { 1114 rcu_assign_pointer(dev->xps_maps, new_dev_maps); 1115 } else { 1116 kfree(new_dev_maps); 1117 RCU_INIT_POINTER(dev->xps_maps, NULL); 1118 } 1119 1120 if (dev_maps) 1121 kfree_rcu(dev_maps, rcu); 1122 1123 netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id : 1124 NUMA_NO_NODE); 1125 1126 mutex_unlock(&xps_map_mutex); 1127 1128 free_cpumask_var(mask); 1129 return len; 1130 1131 error: 1132 mutex_unlock(&xps_map_mutex); 1133 1134 if (new_dev_maps) 1135 for_each_possible_cpu(i) 1136 kfree(rcu_dereference_protected( 1137 new_dev_maps->cpu_map[i], 1138 1)); 1139 kfree(new_dev_maps); 1140 free_cpumask_var(mask); 1141 return -ENOMEM; 1142 } 1143 1144 static struct netdev_queue_attribute xps_cpus_attribute = 1145 __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map); 1146 #endif /* CONFIG_XPS */ 1147 1148 static struct attribute *netdev_queue_default_attrs[] = { 1149 &queue_trans_timeout.attr, 1150 #ifdef CONFIG_XPS 1151 &xps_cpus_attribute.attr, 1152 #endif 1153 NULL 1154 }; 1155 1156 static void netdev_queue_release(struct kobject *kobj) 1157 { 1158 struct netdev_queue *queue = to_netdev_queue(kobj); 1159 1160 #ifdef CONFIG_XPS 1161 xps_queue_release(queue); 1162 #endif 1163 1164 memset(kobj, 0, sizeof(*kobj)); 1165 dev_put(queue->dev); 1166 } 1167 1168 static struct kobj_type netdev_queue_ktype = { 1169 .sysfs_ops = &netdev_queue_sysfs_ops, 1170 .release = netdev_queue_release, 1171 .default_attrs = netdev_queue_default_attrs, 1172 }; 1173 1174 static int netdev_queue_add_kobject(struct net_device *net, int index) 1175 { 1176 struct netdev_queue *queue = net->_tx + index; 1177 struct kobject *kobj = &queue->kobj; 1178 int error = 0; 1179 1180 kobj->kset = net->queues_kset; 1181 error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, 1182 "tx-%u", index); 1183 if (error) 1184 goto exit; 1185 1186 #ifdef CONFIG_BQL 1187 error = sysfs_create_group(kobj, &dql_group); 1188 if (error) 1189 goto exit; 1190 #endif 1191 1192 kobject_uevent(kobj, KOBJ_ADD); 1193 dev_hold(queue->dev); 1194 1195 return 0; 1196 exit: 1197 kobject_put(kobj); 1198 return error; 1199 } 1200 #endif /* CONFIG_SYSFS */ 1201 1202 int 1203 netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) 1204 { 1205 #ifdef CONFIG_SYSFS 1206 int i; 1207 int error = 0; 1208 1209 for (i = old_num; i < new_num; i++) { 1210 error = netdev_queue_add_kobject(net, i); 1211 if (error) { 1212 new_num = old_num; 1213 break; 1214 } 1215 } 1216 1217 while (--i >= new_num) { 1218 struct netdev_queue *queue = net->_tx + i; 1219 1220 #ifdef CONFIG_BQL 1221 sysfs_remove_group(&queue->kobj, &dql_group); 1222 #endif 1223 kobject_put(&queue->kobj); 1224 } 1225 1226 return error; 1227 #else 1228 return 0; 1229 #endif /* CONFIG_SYSFS */ 1230 } 1231 1232 static int register_queue_kobjects(struct net_device *net) 1233 { 1234 int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; 1235 1236 #ifdef CONFIG_SYSFS 1237 net->queues_kset = kset_create_and_add("queues", 1238 NULL, &net->dev.kobj); 1239 if (!net->queues_kset) 1240 return -ENOMEM; 1241 #endif 1242 1243 #ifdef CONFIG_RPS 1244 real_rx = net->real_num_rx_queues; 1245 #endif 1246 real_tx = net->real_num_tx_queues; 1247 1248 error = net_rx_queue_update_kobjects(net, 0, real_rx); 1249 if (error) 1250 goto error; 1251 rxq = real_rx; 1252 1253 error = netdev_queue_update_kobjects(net, 0, real_tx); 1254 if (error) 1255 goto error; 1256 txq = real_tx; 1257 1258 return 0; 1259 1260 error: 1261 netdev_queue_update_kobjects(net, txq, 0); 1262 net_rx_queue_update_kobjects(net, rxq, 0); 1263 return error; 1264 } 1265 1266 static void remove_queue_kobjects(struct net_device *net) 1267 { 1268 int real_rx = 0, real_tx = 0; 1269 1270 #ifdef CONFIG_RPS 1271 real_rx = net->real_num_rx_queues; 1272 #endif 1273 real_tx = net->real_num_tx_queues; 1274 1275 net_rx_queue_update_kobjects(net, real_rx, 0); 1276 netdev_queue_update_kobjects(net, real_tx, 0); 1277 #ifdef CONFIG_SYSFS 1278 kset_unregister(net->queues_kset); 1279 #endif 1280 } 1281 1282 static void *net_grab_current_ns(void) 1283 { 1284 struct net *ns = current->nsproxy->net_ns; 1285 #ifdef CONFIG_NET_NS 1286 if (ns) 1287 atomic_inc(&ns->passive); 1288 #endif 1289 return ns; 1290 } 1291 1292 static const void *net_initial_ns(void) 1293 { 1294 return &init_net; 1295 } 1296 1297 static const void *net_netlink_ns(struct sock *sk) 1298 { 1299 return sock_net(sk); 1300 } 1301 1302 struct kobj_ns_type_operations net_ns_type_operations = { 1303 .type = KOBJ_NS_TYPE_NET, 1304 .grab_current_ns = net_grab_current_ns, 1305 .netlink_ns = net_netlink_ns, 1306 .initial_ns = net_initial_ns, 1307 .drop_ns = net_drop_ns, 1308 }; 1309 EXPORT_SYMBOL_GPL(net_ns_type_operations); 1310 1311 #ifdef CONFIG_HOTPLUG 1312 static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) 1313 { 1314 struct net_device *dev = to_net_dev(d); 1315 int retval; 1316 1317 /* pass interface to uevent. */ 1318 retval = add_uevent_var(env, "INTERFACE=%s", dev->name); 1319 if (retval) 1320 goto exit; 1321 1322 /* pass ifindex to uevent. 1323 * ifindex is useful as it won't change (interface name may change) 1324 * and is what RtNetlink uses natively. */ 1325 retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex); 1326 1327 exit: 1328 return retval; 1329 } 1330 #endif 1331 1332 /* 1333 * netdev_release -- destroy and free a dead device. 1334 * Called when last reference to device kobject is gone. 1335 */ 1336 static void netdev_release(struct device *d) 1337 { 1338 struct net_device *dev = to_net_dev(d); 1339 1340 BUG_ON(dev->reg_state != NETREG_RELEASED); 1341 1342 kfree(dev->ifalias); 1343 kfree((char *)dev - dev->padded); 1344 } 1345 1346 static const void *net_namespace(struct device *d) 1347 { 1348 struct net_device *dev; 1349 dev = container_of(d, struct net_device, dev); 1350 return dev_net(dev); 1351 } 1352 1353 static struct class net_class = { 1354 .name = "net", 1355 .dev_release = netdev_release, 1356 #ifdef CONFIG_SYSFS 1357 .dev_attrs = net_class_attributes, 1358 #endif /* CONFIG_SYSFS */ 1359 #ifdef CONFIG_HOTPLUG 1360 .dev_uevent = netdev_uevent, 1361 #endif 1362 .ns_type = &net_ns_type_operations, 1363 .namespace = net_namespace, 1364 }; 1365 1366 /* Delete sysfs entries but hold kobject reference until after all 1367 * netdev references are gone. 1368 */ 1369 void netdev_unregister_kobject(struct net_device * net) 1370 { 1371 struct device *dev = &(net->dev); 1372 1373 kobject_get(&dev->kobj); 1374 1375 remove_queue_kobjects(net); 1376 1377 device_del(dev); 1378 } 1379 1380 /* Create sysfs entries for network device. */ 1381 int netdev_register_kobject(struct net_device *net) 1382 { 1383 struct device *dev = &(net->dev); 1384 const struct attribute_group **groups = net->sysfs_groups; 1385 int error = 0; 1386 1387 device_initialize(dev); 1388 dev->class = &net_class; 1389 dev->platform_data = net; 1390 dev->groups = groups; 1391 1392 dev_set_name(dev, "%s", net->name); 1393 1394 #ifdef CONFIG_SYSFS 1395 /* Allow for a device specific group */ 1396 if (*groups) 1397 groups++; 1398 1399 *groups++ = &netstat_group; 1400 #endif /* CONFIG_SYSFS */ 1401 1402 error = device_add(dev); 1403 if (error) 1404 return error; 1405 1406 error = register_queue_kobjects(net); 1407 if (error) { 1408 device_del(dev); 1409 return error; 1410 } 1411 1412 return error; 1413 } 1414 1415 int netdev_class_create_file(struct class_attribute *class_attr) 1416 { 1417 return class_create_file(&net_class, class_attr); 1418 } 1419 EXPORT_SYMBOL(netdev_class_create_file); 1420 1421 void netdev_class_remove_file(struct class_attribute *class_attr) 1422 { 1423 class_remove_file(&net_class, class_attr); 1424 } 1425 EXPORT_SYMBOL(netdev_class_remove_file); 1426 1427 int netdev_kobject_init(void) 1428 { 1429 kobj_ns_type_register(&net_ns_type_operations); 1430 return class_register(&net_class); 1431 } 1432