xref: /openbmc/linux/net/core/net-sysfs.c (revision 4800cd83)
1 /*
2  * net-sysfs.c - network device class and attributes
3  *
4  * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  */
11 
12 #include <linux/capability.h>
13 #include <linux/kernel.h>
14 #include <linux/netdevice.h>
15 #include <linux/if_arp.h>
16 #include <linux/slab.h>
17 #include <linux/nsproxy.h>
18 #include <net/sock.h>
19 #include <net/net_namespace.h>
20 #include <linux/rtnetlink.h>
21 #include <linux/wireless.h>
22 #include <linux/vmalloc.h>
23 #include <net/wext.h>
24 
25 #include "net-sysfs.h"
26 
27 #ifdef CONFIG_SYSFS
28 static const char fmt_hex[] = "%#x\n";
29 static const char fmt_long_hex[] = "%#lx\n";
30 static const char fmt_dec[] = "%d\n";
31 static const char fmt_ulong[] = "%lu\n";
32 static const char fmt_u64[] = "%llu\n";
33 
34 static inline int dev_isalive(const struct net_device *dev)
35 {
36 	return dev->reg_state <= NETREG_REGISTERED;
37 }
38 
39 /* use same locking rules as GIF* ioctl's */
40 static ssize_t netdev_show(const struct device *dev,
41 			   struct device_attribute *attr, char *buf,
42 			   ssize_t (*format)(const struct net_device *, char *))
43 {
44 	struct net_device *net = to_net_dev(dev);
45 	ssize_t ret = -EINVAL;
46 
47 	read_lock(&dev_base_lock);
48 	if (dev_isalive(net))
49 		ret = (*format)(net, buf);
50 	read_unlock(&dev_base_lock);
51 
52 	return ret;
53 }
54 
55 /* generate a show function for simple field */
56 #define NETDEVICE_SHOW(field, format_string)				\
57 static ssize_t format_##field(const struct net_device *net, char *buf)	\
58 {									\
59 	return sprintf(buf, format_string, net->field);			\
60 }									\
61 static ssize_t show_##field(struct device *dev,				\
62 			    struct device_attribute *attr, char *buf)	\
63 {									\
64 	return netdev_show(dev, attr, buf, format_##field);		\
65 }
66 
67 
68 /* use same locking and permission rules as SIF* ioctl's */
69 static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
70 			    const char *buf, size_t len,
71 			    int (*set)(struct net_device *, unsigned long))
72 {
73 	struct net_device *net = to_net_dev(dev);
74 	char *endp;
75 	unsigned long new;
76 	int ret = -EINVAL;
77 
78 	if (!capable(CAP_NET_ADMIN))
79 		return -EPERM;
80 
81 	new = simple_strtoul(buf, &endp, 0);
82 	if (endp == buf)
83 		goto err;
84 
85 	if (!rtnl_trylock())
86 		return restart_syscall();
87 
88 	if (dev_isalive(net)) {
89 		if ((ret = (*set)(net, new)) == 0)
90 			ret = len;
91 	}
92 	rtnl_unlock();
93  err:
94 	return ret;
95 }
96 
97 NETDEVICE_SHOW(dev_id, fmt_hex);
98 NETDEVICE_SHOW(addr_assign_type, fmt_dec);
99 NETDEVICE_SHOW(addr_len, fmt_dec);
100 NETDEVICE_SHOW(iflink, fmt_dec);
101 NETDEVICE_SHOW(ifindex, fmt_dec);
102 NETDEVICE_SHOW(features, fmt_long_hex);
103 NETDEVICE_SHOW(type, fmt_dec);
104 NETDEVICE_SHOW(link_mode, fmt_dec);
105 
106 /* use same locking rules as GIFHWADDR ioctl's */
107 static ssize_t show_address(struct device *dev, struct device_attribute *attr,
108 			    char *buf)
109 {
110 	struct net_device *net = to_net_dev(dev);
111 	ssize_t ret = -EINVAL;
112 
113 	read_lock(&dev_base_lock);
114 	if (dev_isalive(net))
115 		ret = sysfs_format_mac(buf, net->dev_addr, net->addr_len);
116 	read_unlock(&dev_base_lock);
117 	return ret;
118 }
119 
120 static ssize_t show_broadcast(struct device *dev,
121 			    struct device_attribute *attr, char *buf)
122 {
123 	struct net_device *net = to_net_dev(dev);
124 	if (dev_isalive(net))
125 		return sysfs_format_mac(buf, net->broadcast, net->addr_len);
126 	return -EINVAL;
127 }
128 
129 static ssize_t show_carrier(struct device *dev,
130 			    struct device_attribute *attr, char *buf)
131 {
132 	struct net_device *netdev = to_net_dev(dev);
133 	if (netif_running(netdev)) {
134 		return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
135 	}
136 	return -EINVAL;
137 }
138 
139 static ssize_t show_speed(struct device *dev,
140 			  struct device_attribute *attr, char *buf)
141 {
142 	struct net_device *netdev = to_net_dev(dev);
143 	int ret = -EINVAL;
144 
145 	if (!rtnl_trylock())
146 		return restart_syscall();
147 
148 	if (netif_running(netdev) &&
149 	    netdev->ethtool_ops &&
150 	    netdev->ethtool_ops->get_settings) {
151 		struct ethtool_cmd cmd = { ETHTOOL_GSET };
152 
153 		if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
154 			ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd));
155 	}
156 	rtnl_unlock();
157 	return ret;
158 }
159 
160 static ssize_t show_duplex(struct device *dev,
161 			   struct device_attribute *attr, char *buf)
162 {
163 	struct net_device *netdev = to_net_dev(dev);
164 	int ret = -EINVAL;
165 
166 	if (!rtnl_trylock())
167 		return restart_syscall();
168 
169 	if (netif_running(netdev) &&
170 	    netdev->ethtool_ops &&
171 	    netdev->ethtool_ops->get_settings) {
172 		struct ethtool_cmd cmd = { ETHTOOL_GSET };
173 
174 		if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
175 			ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half");
176 	}
177 	rtnl_unlock();
178 	return ret;
179 }
180 
181 static ssize_t show_dormant(struct device *dev,
182 			    struct device_attribute *attr, char *buf)
183 {
184 	struct net_device *netdev = to_net_dev(dev);
185 
186 	if (netif_running(netdev))
187 		return sprintf(buf, fmt_dec, !!netif_dormant(netdev));
188 
189 	return -EINVAL;
190 }
191 
192 static const char *const operstates[] = {
193 	"unknown",
194 	"notpresent", /* currently unused */
195 	"down",
196 	"lowerlayerdown",
197 	"testing", /* currently unused */
198 	"dormant",
199 	"up"
200 };
201 
202 static ssize_t show_operstate(struct device *dev,
203 			      struct device_attribute *attr, char *buf)
204 {
205 	const struct net_device *netdev = to_net_dev(dev);
206 	unsigned char operstate;
207 
208 	read_lock(&dev_base_lock);
209 	operstate = netdev->operstate;
210 	if (!netif_running(netdev))
211 		operstate = IF_OPER_DOWN;
212 	read_unlock(&dev_base_lock);
213 
214 	if (operstate >= ARRAY_SIZE(operstates))
215 		return -EINVAL; /* should not happen */
216 
217 	return sprintf(buf, "%s\n", operstates[operstate]);
218 }
219 
220 /* read-write attributes */
221 NETDEVICE_SHOW(mtu, fmt_dec);
222 
223 static int change_mtu(struct net_device *net, unsigned long new_mtu)
224 {
225 	return dev_set_mtu(net, (int) new_mtu);
226 }
227 
228 static ssize_t store_mtu(struct device *dev, struct device_attribute *attr,
229 			 const char *buf, size_t len)
230 {
231 	return netdev_store(dev, attr, buf, len, change_mtu);
232 }
233 
234 NETDEVICE_SHOW(flags, fmt_hex);
235 
236 static int change_flags(struct net_device *net, unsigned long new_flags)
237 {
238 	return dev_change_flags(net, (unsigned) new_flags);
239 }
240 
241 static ssize_t store_flags(struct device *dev, struct device_attribute *attr,
242 			   const char *buf, size_t len)
243 {
244 	return netdev_store(dev, attr, buf, len, change_flags);
245 }
246 
247 NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
248 
249 static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
250 {
251 	net->tx_queue_len = new_len;
252 	return 0;
253 }
254 
255 static ssize_t store_tx_queue_len(struct device *dev,
256 				  struct device_attribute *attr,
257 				  const char *buf, size_t len)
258 {
259 	return netdev_store(dev, attr, buf, len, change_tx_queue_len);
260 }
261 
262 static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
263 			     const char *buf, size_t len)
264 {
265 	struct net_device *netdev = to_net_dev(dev);
266 	size_t count = len;
267 	ssize_t ret;
268 
269 	if (!capable(CAP_NET_ADMIN))
270 		return -EPERM;
271 
272 	/* ignore trailing newline */
273 	if (len >  0 && buf[len - 1] == '\n')
274 		--count;
275 
276 	if (!rtnl_trylock())
277 		return restart_syscall();
278 	ret = dev_set_alias(netdev, buf, count);
279 	rtnl_unlock();
280 
281 	return ret < 0 ? ret : len;
282 }
283 
284 static ssize_t show_ifalias(struct device *dev,
285 			    struct device_attribute *attr, char *buf)
286 {
287 	const struct net_device *netdev = to_net_dev(dev);
288 	ssize_t ret = 0;
289 
290 	if (!rtnl_trylock())
291 		return restart_syscall();
292 	if (netdev->ifalias)
293 		ret = sprintf(buf, "%s\n", netdev->ifalias);
294 	rtnl_unlock();
295 	return ret;
296 }
297 
298 static struct device_attribute net_class_attributes[] = {
299 	__ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
300 	__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
301 	__ATTR(dev_id, S_IRUGO, show_dev_id, NULL),
302 	__ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias),
303 	__ATTR(iflink, S_IRUGO, show_iflink, NULL),
304 	__ATTR(ifindex, S_IRUGO, show_ifindex, NULL),
305 	__ATTR(features, S_IRUGO, show_features, NULL),
306 	__ATTR(type, S_IRUGO, show_type, NULL),
307 	__ATTR(link_mode, S_IRUGO, show_link_mode, NULL),
308 	__ATTR(address, S_IRUGO, show_address, NULL),
309 	__ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
310 	__ATTR(carrier, S_IRUGO, show_carrier, NULL),
311 	__ATTR(speed, S_IRUGO, show_speed, NULL),
312 	__ATTR(duplex, S_IRUGO, show_duplex, NULL),
313 	__ATTR(dormant, S_IRUGO, show_dormant, NULL),
314 	__ATTR(operstate, S_IRUGO, show_operstate, NULL),
315 	__ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu),
316 	__ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
317 	__ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
318 	       store_tx_queue_len),
319 	{}
320 };
321 
322 /* Show a given an attribute in the statistics group */
323 static ssize_t netstat_show(const struct device *d,
324 			    struct device_attribute *attr, char *buf,
325 			    unsigned long offset)
326 {
327 	struct net_device *dev = to_net_dev(d);
328 	ssize_t ret = -EINVAL;
329 
330 	WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
331 			offset % sizeof(u64) != 0);
332 
333 	read_lock(&dev_base_lock);
334 	if (dev_isalive(dev)) {
335 		struct rtnl_link_stats64 temp;
336 		const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
337 
338 		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
339 	}
340 	read_unlock(&dev_base_lock);
341 	return ret;
342 }
343 
344 /* generate a read-only statistics attribute */
345 #define NETSTAT_ENTRY(name)						\
346 static ssize_t show_##name(struct device *d,				\
347 			   struct device_attribute *attr, char *buf) 	\
348 {									\
349 	return netstat_show(d, attr, buf,				\
350 			    offsetof(struct rtnl_link_stats64, name));	\
351 }									\
352 static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
353 
354 NETSTAT_ENTRY(rx_packets);
355 NETSTAT_ENTRY(tx_packets);
356 NETSTAT_ENTRY(rx_bytes);
357 NETSTAT_ENTRY(tx_bytes);
358 NETSTAT_ENTRY(rx_errors);
359 NETSTAT_ENTRY(tx_errors);
360 NETSTAT_ENTRY(rx_dropped);
361 NETSTAT_ENTRY(tx_dropped);
362 NETSTAT_ENTRY(multicast);
363 NETSTAT_ENTRY(collisions);
364 NETSTAT_ENTRY(rx_length_errors);
365 NETSTAT_ENTRY(rx_over_errors);
366 NETSTAT_ENTRY(rx_crc_errors);
367 NETSTAT_ENTRY(rx_frame_errors);
368 NETSTAT_ENTRY(rx_fifo_errors);
369 NETSTAT_ENTRY(rx_missed_errors);
370 NETSTAT_ENTRY(tx_aborted_errors);
371 NETSTAT_ENTRY(tx_carrier_errors);
372 NETSTAT_ENTRY(tx_fifo_errors);
373 NETSTAT_ENTRY(tx_heartbeat_errors);
374 NETSTAT_ENTRY(tx_window_errors);
375 NETSTAT_ENTRY(rx_compressed);
376 NETSTAT_ENTRY(tx_compressed);
377 
378 static struct attribute *netstat_attrs[] = {
379 	&dev_attr_rx_packets.attr,
380 	&dev_attr_tx_packets.attr,
381 	&dev_attr_rx_bytes.attr,
382 	&dev_attr_tx_bytes.attr,
383 	&dev_attr_rx_errors.attr,
384 	&dev_attr_tx_errors.attr,
385 	&dev_attr_rx_dropped.attr,
386 	&dev_attr_tx_dropped.attr,
387 	&dev_attr_multicast.attr,
388 	&dev_attr_collisions.attr,
389 	&dev_attr_rx_length_errors.attr,
390 	&dev_attr_rx_over_errors.attr,
391 	&dev_attr_rx_crc_errors.attr,
392 	&dev_attr_rx_frame_errors.attr,
393 	&dev_attr_rx_fifo_errors.attr,
394 	&dev_attr_rx_missed_errors.attr,
395 	&dev_attr_tx_aborted_errors.attr,
396 	&dev_attr_tx_carrier_errors.attr,
397 	&dev_attr_tx_fifo_errors.attr,
398 	&dev_attr_tx_heartbeat_errors.attr,
399 	&dev_attr_tx_window_errors.attr,
400 	&dev_attr_rx_compressed.attr,
401 	&dev_attr_tx_compressed.attr,
402 	NULL
403 };
404 
405 
406 static struct attribute_group netstat_group = {
407 	.name  = "statistics",
408 	.attrs  = netstat_attrs,
409 };
410 
411 #ifdef CONFIG_WIRELESS_EXT_SYSFS
412 /* helper function that does all the locking etc for wireless stats */
413 static ssize_t wireless_show(struct device *d, char *buf,
414 			     ssize_t (*format)(const struct iw_statistics *,
415 					       char *))
416 {
417 	struct net_device *dev = to_net_dev(d);
418 	const struct iw_statistics *iw;
419 	ssize_t ret = -EINVAL;
420 
421 	if (!rtnl_trylock())
422 		return restart_syscall();
423 	if (dev_isalive(dev)) {
424 		iw = get_wireless_stats(dev);
425 		if (iw)
426 			ret = (*format)(iw, buf);
427 	}
428 	rtnl_unlock();
429 
430 	return ret;
431 }
432 
433 /* show function template for wireless fields */
434 #define WIRELESS_SHOW(name, field, format_string)			\
435 static ssize_t format_iw_##name(const struct iw_statistics *iw, char *buf) \
436 {									\
437 	return sprintf(buf, format_string, iw->field);			\
438 }									\
439 static ssize_t show_iw_##name(struct device *d,				\
440 			      struct device_attribute *attr, char *buf)	\
441 {									\
442 	return wireless_show(d, buf, format_iw_##name);			\
443 }									\
444 static DEVICE_ATTR(name, S_IRUGO, show_iw_##name, NULL)
445 
446 WIRELESS_SHOW(status, status, fmt_hex);
447 WIRELESS_SHOW(link, qual.qual, fmt_dec);
448 WIRELESS_SHOW(level, qual.level, fmt_dec);
449 WIRELESS_SHOW(noise, qual.noise, fmt_dec);
450 WIRELESS_SHOW(nwid, discard.nwid, fmt_dec);
451 WIRELESS_SHOW(crypt, discard.code, fmt_dec);
452 WIRELESS_SHOW(fragment, discard.fragment, fmt_dec);
453 WIRELESS_SHOW(misc, discard.misc, fmt_dec);
454 WIRELESS_SHOW(retries, discard.retries, fmt_dec);
455 WIRELESS_SHOW(beacon, miss.beacon, fmt_dec);
456 
457 static struct attribute *wireless_attrs[] = {
458 	&dev_attr_status.attr,
459 	&dev_attr_link.attr,
460 	&dev_attr_level.attr,
461 	&dev_attr_noise.attr,
462 	&dev_attr_nwid.attr,
463 	&dev_attr_crypt.attr,
464 	&dev_attr_fragment.attr,
465 	&dev_attr_retries.attr,
466 	&dev_attr_misc.attr,
467 	&dev_attr_beacon.attr,
468 	NULL
469 };
470 
471 static struct attribute_group wireless_group = {
472 	.name = "wireless",
473 	.attrs = wireless_attrs,
474 };
475 #endif
476 #endif /* CONFIG_SYSFS */
477 
478 #ifdef CONFIG_RPS
479 /*
480  * RX queue sysfs structures and functions.
481  */
482 struct rx_queue_attribute {
483 	struct attribute attr;
484 	ssize_t (*show)(struct netdev_rx_queue *queue,
485 	    struct rx_queue_attribute *attr, char *buf);
486 	ssize_t (*store)(struct netdev_rx_queue *queue,
487 	    struct rx_queue_attribute *attr, const char *buf, size_t len);
488 };
489 #define to_rx_queue_attr(_attr) container_of(_attr,		\
490     struct rx_queue_attribute, attr)
491 
492 #define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
493 
494 static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
495 				  char *buf)
496 {
497 	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
498 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
499 
500 	if (!attribute->show)
501 		return -EIO;
502 
503 	return attribute->show(queue, attribute, buf);
504 }
505 
506 static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
507 				   const char *buf, size_t count)
508 {
509 	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
510 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
511 
512 	if (!attribute->store)
513 		return -EIO;
514 
515 	return attribute->store(queue, attribute, buf, count);
516 }
517 
518 static const struct sysfs_ops rx_queue_sysfs_ops = {
519 	.show = rx_queue_attr_show,
520 	.store = rx_queue_attr_store,
521 };
522 
523 static ssize_t show_rps_map(struct netdev_rx_queue *queue,
524 			    struct rx_queue_attribute *attribute, char *buf)
525 {
526 	struct rps_map *map;
527 	cpumask_var_t mask;
528 	size_t len = 0;
529 	int i;
530 
531 	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
532 		return -ENOMEM;
533 
534 	rcu_read_lock();
535 	map = rcu_dereference(queue->rps_map);
536 	if (map)
537 		for (i = 0; i < map->len; i++)
538 			cpumask_set_cpu(map->cpus[i], mask);
539 
540 	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
541 	if (PAGE_SIZE - len < 3) {
542 		rcu_read_unlock();
543 		free_cpumask_var(mask);
544 		return -EINVAL;
545 	}
546 	rcu_read_unlock();
547 
548 	free_cpumask_var(mask);
549 	len += sprintf(buf + len, "\n");
550 	return len;
551 }
552 
553 static void rps_map_release(struct rcu_head *rcu)
554 {
555 	struct rps_map *map = container_of(rcu, struct rps_map, rcu);
556 
557 	kfree(map);
558 }
559 
560 static ssize_t store_rps_map(struct netdev_rx_queue *queue,
561 		      struct rx_queue_attribute *attribute,
562 		      const char *buf, size_t len)
563 {
564 	struct rps_map *old_map, *map;
565 	cpumask_var_t mask;
566 	int err, cpu, i;
567 	static DEFINE_SPINLOCK(rps_map_lock);
568 
569 	if (!capable(CAP_NET_ADMIN))
570 		return -EPERM;
571 
572 	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
573 		return -ENOMEM;
574 
575 	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
576 	if (err) {
577 		free_cpumask_var(mask);
578 		return err;
579 	}
580 
581 	map = kzalloc(max_t(unsigned,
582 	    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
583 	    GFP_KERNEL);
584 	if (!map) {
585 		free_cpumask_var(mask);
586 		return -ENOMEM;
587 	}
588 
589 	i = 0;
590 	for_each_cpu_and(cpu, mask, cpu_online_mask)
591 		map->cpus[i++] = cpu;
592 
593 	if (i)
594 		map->len = i;
595 	else {
596 		kfree(map);
597 		map = NULL;
598 	}
599 
600 	spin_lock(&rps_map_lock);
601 	old_map = rcu_dereference_protected(queue->rps_map,
602 					    lockdep_is_held(&rps_map_lock));
603 	rcu_assign_pointer(queue->rps_map, map);
604 	spin_unlock(&rps_map_lock);
605 
606 	if (old_map)
607 		call_rcu(&old_map->rcu, rps_map_release);
608 
609 	free_cpumask_var(mask);
610 	return len;
611 }
612 
613 static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
614 					   struct rx_queue_attribute *attr,
615 					   char *buf)
616 {
617 	struct rps_dev_flow_table *flow_table;
618 	unsigned int val = 0;
619 
620 	rcu_read_lock();
621 	flow_table = rcu_dereference(queue->rps_flow_table);
622 	if (flow_table)
623 		val = flow_table->mask + 1;
624 	rcu_read_unlock();
625 
626 	return sprintf(buf, "%u\n", val);
627 }
628 
629 static void rps_dev_flow_table_release_work(struct work_struct *work)
630 {
631 	struct rps_dev_flow_table *table = container_of(work,
632 	    struct rps_dev_flow_table, free_work);
633 
634 	vfree(table);
635 }
636 
637 static void rps_dev_flow_table_release(struct rcu_head *rcu)
638 {
639 	struct rps_dev_flow_table *table = container_of(rcu,
640 	    struct rps_dev_flow_table, rcu);
641 
642 	INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
643 	schedule_work(&table->free_work);
644 }
645 
646 static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
647 				     struct rx_queue_attribute *attr,
648 				     const char *buf, size_t len)
649 {
650 	unsigned int count;
651 	char *endp;
652 	struct rps_dev_flow_table *table, *old_table;
653 	static DEFINE_SPINLOCK(rps_dev_flow_lock);
654 
655 	if (!capable(CAP_NET_ADMIN))
656 		return -EPERM;
657 
658 	count = simple_strtoul(buf, &endp, 0);
659 	if (endp == buf)
660 		return -EINVAL;
661 
662 	if (count) {
663 		int i;
664 
665 		if (count > 1<<30) {
666 			/* Enforce a limit to prevent overflow */
667 			return -EINVAL;
668 		}
669 		count = roundup_pow_of_two(count);
670 		table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));
671 		if (!table)
672 			return -ENOMEM;
673 
674 		table->mask = count - 1;
675 		for (i = 0; i < count; i++)
676 			table->flows[i].cpu = RPS_NO_CPU;
677 	} else
678 		table = NULL;
679 
680 	spin_lock(&rps_dev_flow_lock);
681 	old_table = rcu_dereference_protected(queue->rps_flow_table,
682 					      lockdep_is_held(&rps_dev_flow_lock));
683 	rcu_assign_pointer(queue->rps_flow_table, table);
684 	spin_unlock(&rps_dev_flow_lock);
685 
686 	if (old_table)
687 		call_rcu(&old_table->rcu, rps_dev_flow_table_release);
688 
689 	return len;
690 }
691 
692 static struct rx_queue_attribute rps_cpus_attribute =
693 	__ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
694 
695 
696 static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
697 	__ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
698 	    show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
699 
700 static struct attribute *rx_queue_default_attrs[] = {
701 	&rps_cpus_attribute.attr,
702 	&rps_dev_flow_table_cnt_attribute.attr,
703 	NULL
704 };
705 
706 static void rx_queue_release(struct kobject *kobj)
707 {
708 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
709 	struct rps_map *map;
710 	struct rps_dev_flow_table *flow_table;
711 
712 
713 	map = rcu_dereference_raw(queue->rps_map);
714 	if (map) {
715 		RCU_INIT_POINTER(queue->rps_map, NULL);
716 		call_rcu(&map->rcu, rps_map_release);
717 	}
718 
719 	flow_table = rcu_dereference_raw(queue->rps_flow_table);
720 	if (flow_table) {
721 		RCU_INIT_POINTER(queue->rps_flow_table, NULL);
722 		call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
723 	}
724 
725 	memset(kobj, 0, sizeof(*kobj));
726 	dev_put(queue->dev);
727 }
728 
729 static struct kobj_type rx_queue_ktype = {
730 	.sysfs_ops = &rx_queue_sysfs_ops,
731 	.release = rx_queue_release,
732 	.default_attrs = rx_queue_default_attrs,
733 };
734 
735 static int rx_queue_add_kobject(struct net_device *net, int index)
736 {
737 	struct netdev_rx_queue *queue = net->_rx + index;
738 	struct kobject *kobj = &queue->kobj;
739 	int error = 0;
740 
741 	kobj->kset = net->queues_kset;
742 	error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
743 	    "rx-%u", index);
744 	if (error) {
745 		kobject_put(kobj);
746 		return error;
747 	}
748 
749 	kobject_uevent(kobj, KOBJ_ADD);
750 	dev_hold(queue->dev);
751 
752 	return error;
753 }
754 #endif /* CONFIG_RPS */
755 
756 int
757 net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
758 {
759 #ifdef CONFIG_RPS
760 	int i;
761 	int error = 0;
762 
763 	for (i = old_num; i < new_num; i++) {
764 		error = rx_queue_add_kobject(net, i);
765 		if (error) {
766 			new_num = old_num;
767 			break;
768 		}
769 	}
770 
771 	while (--i >= new_num)
772 		kobject_put(&net->_rx[i].kobj);
773 
774 	return error;
775 #else
776 	return 0;
777 #endif
778 }
779 
780 #ifdef CONFIG_XPS
781 /*
782  * netdev_queue sysfs structures and functions.
783  */
784 struct netdev_queue_attribute {
785 	struct attribute attr;
786 	ssize_t (*show)(struct netdev_queue *queue,
787 	    struct netdev_queue_attribute *attr, char *buf);
788 	ssize_t (*store)(struct netdev_queue *queue,
789 	    struct netdev_queue_attribute *attr, const char *buf, size_t len);
790 };
791 #define to_netdev_queue_attr(_attr) container_of(_attr,		\
792     struct netdev_queue_attribute, attr)
793 
794 #define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
795 
796 static ssize_t netdev_queue_attr_show(struct kobject *kobj,
797 				      struct attribute *attr, char *buf)
798 {
799 	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
800 	struct netdev_queue *queue = to_netdev_queue(kobj);
801 
802 	if (!attribute->show)
803 		return -EIO;
804 
805 	return attribute->show(queue, attribute, buf);
806 }
807 
808 static ssize_t netdev_queue_attr_store(struct kobject *kobj,
809 				       struct attribute *attr,
810 				       const char *buf, size_t count)
811 {
812 	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
813 	struct netdev_queue *queue = to_netdev_queue(kobj);
814 
815 	if (!attribute->store)
816 		return -EIO;
817 
818 	return attribute->store(queue, attribute, buf, count);
819 }
820 
821 static const struct sysfs_ops netdev_queue_sysfs_ops = {
822 	.show = netdev_queue_attr_show,
823 	.store = netdev_queue_attr_store,
824 };
825 
826 static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
827 {
828 	struct net_device *dev = queue->dev;
829 	int i;
830 
831 	for (i = 0; i < dev->num_tx_queues; i++)
832 		if (queue == &dev->_tx[i])
833 			break;
834 
835 	BUG_ON(i >= dev->num_tx_queues);
836 
837 	return i;
838 }
839 
840 
841 static ssize_t show_xps_map(struct netdev_queue *queue,
842 			    struct netdev_queue_attribute *attribute, char *buf)
843 {
844 	struct net_device *dev = queue->dev;
845 	struct xps_dev_maps *dev_maps;
846 	cpumask_var_t mask;
847 	unsigned long index;
848 	size_t len = 0;
849 	int i;
850 
851 	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
852 		return -ENOMEM;
853 
854 	index = get_netdev_queue_index(queue);
855 
856 	rcu_read_lock();
857 	dev_maps = rcu_dereference(dev->xps_maps);
858 	if (dev_maps) {
859 		for_each_possible_cpu(i) {
860 			struct xps_map *map =
861 			    rcu_dereference(dev_maps->cpu_map[i]);
862 			if (map) {
863 				int j;
864 				for (j = 0; j < map->len; j++) {
865 					if (map->queues[j] == index) {
866 						cpumask_set_cpu(i, mask);
867 						break;
868 					}
869 				}
870 			}
871 		}
872 	}
873 	rcu_read_unlock();
874 
875 	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
876 	if (PAGE_SIZE - len < 3) {
877 		free_cpumask_var(mask);
878 		return -EINVAL;
879 	}
880 
881 	free_cpumask_var(mask);
882 	len += sprintf(buf + len, "\n");
883 	return len;
884 }
885 
886 static void xps_map_release(struct rcu_head *rcu)
887 {
888 	struct xps_map *map = container_of(rcu, struct xps_map, rcu);
889 
890 	kfree(map);
891 }
892 
893 static void xps_dev_maps_release(struct rcu_head *rcu)
894 {
895 	struct xps_dev_maps *dev_maps =
896 	    container_of(rcu, struct xps_dev_maps, rcu);
897 
898 	kfree(dev_maps);
899 }
900 
901 static DEFINE_MUTEX(xps_map_mutex);
902 #define xmap_dereference(P)		\
903 	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
904 
905 static ssize_t store_xps_map(struct netdev_queue *queue,
906 		      struct netdev_queue_attribute *attribute,
907 		      const char *buf, size_t len)
908 {
909 	struct net_device *dev = queue->dev;
910 	cpumask_var_t mask;
911 	int err, i, cpu, pos, map_len, alloc_len, need_set;
912 	unsigned long index;
913 	struct xps_map *map, *new_map;
914 	struct xps_dev_maps *dev_maps, *new_dev_maps;
915 	int nonempty = 0;
916 	int numa_node = -2;
917 
918 	if (!capable(CAP_NET_ADMIN))
919 		return -EPERM;
920 
921 	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
922 		return -ENOMEM;
923 
924 	index = get_netdev_queue_index(queue);
925 
926 	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
927 	if (err) {
928 		free_cpumask_var(mask);
929 		return err;
930 	}
931 
932 	new_dev_maps = kzalloc(max_t(unsigned,
933 	    XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
934 	if (!new_dev_maps) {
935 		free_cpumask_var(mask);
936 		return -ENOMEM;
937 	}
938 
939 	mutex_lock(&xps_map_mutex);
940 
941 	dev_maps = xmap_dereference(dev->xps_maps);
942 
943 	for_each_possible_cpu(cpu) {
944 		map = dev_maps ?
945 			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
946 		new_map = map;
947 		if (map) {
948 			for (pos = 0; pos < map->len; pos++)
949 				if (map->queues[pos] == index)
950 					break;
951 			map_len = map->len;
952 			alloc_len = map->alloc_len;
953 		} else
954 			pos = map_len = alloc_len = 0;
955 
956 		need_set = cpu_isset(cpu, *mask) && cpu_online(cpu);
957 #ifdef CONFIG_NUMA
958 		if (need_set) {
959 			if (numa_node == -2)
960 				numa_node = cpu_to_node(cpu);
961 			else if (numa_node != cpu_to_node(cpu))
962 				numa_node = -1;
963 		}
964 #endif
965 		if (need_set && pos >= map_len) {
966 			/* Need to add queue to this CPU's map */
967 			if (map_len >= alloc_len) {
968 				alloc_len = alloc_len ?
969 				    2 * alloc_len : XPS_MIN_MAP_ALLOC;
970 				new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
971 						       GFP_KERNEL,
972 						       cpu_to_node(cpu));
973 				if (!new_map)
974 					goto error;
975 				new_map->alloc_len = alloc_len;
976 				for (i = 0; i < map_len; i++)
977 					new_map->queues[i] = map->queues[i];
978 				new_map->len = map_len;
979 			}
980 			new_map->queues[new_map->len++] = index;
981 		} else if (!need_set && pos < map_len) {
982 			/* Need to remove queue from this CPU's map */
983 			if (map_len > 1)
984 				new_map->queues[pos] =
985 				    new_map->queues[--new_map->len];
986 			else
987 				new_map = NULL;
988 		}
989 		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
990 	}
991 
992 	/* Cleanup old maps */
993 	for_each_possible_cpu(cpu) {
994 		map = dev_maps ?
995 			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
996 		if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
997 			call_rcu(&map->rcu, xps_map_release);
998 		if (new_dev_maps->cpu_map[cpu])
999 			nonempty = 1;
1000 	}
1001 
1002 	if (nonempty)
1003 		rcu_assign_pointer(dev->xps_maps, new_dev_maps);
1004 	else {
1005 		kfree(new_dev_maps);
1006 		rcu_assign_pointer(dev->xps_maps, NULL);
1007 	}
1008 
1009 	if (dev_maps)
1010 		call_rcu(&dev_maps->rcu, xps_dev_maps_release);
1011 
1012 	netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node :
1013 					    NUMA_NO_NODE);
1014 
1015 	mutex_unlock(&xps_map_mutex);
1016 
1017 	free_cpumask_var(mask);
1018 	return len;
1019 
1020 error:
1021 	mutex_unlock(&xps_map_mutex);
1022 
1023 	if (new_dev_maps)
1024 		for_each_possible_cpu(i)
1025 			kfree(rcu_dereference_protected(
1026 				new_dev_maps->cpu_map[i],
1027 				1));
1028 	kfree(new_dev_maps);
1029 	free_cpumask_var(mask);
1030 	return -ENOMEM;
1031 }
1032 
1033 static struct netdev_queue_attribute xps_cpus_attribute =
1034     __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
1035 
1036 static struct attribute *netdev_queue_default_attrs[] = {
1037 	&xps_cpus_attribute.attr,
1038 	NULL
1039 };
1040 
1041 static void netdev_queue_release(struct kobject *kobj)
1042 {
1043 	struct netdev_queue *queue = to_netdev_queue(kobj);
1044 	struct net_device *dev = queue->dev;
1045 	struct xps_dev_maps *dev_maps;
1046 	struct xps_map *map;
1047 	unsigned long index;
1048 	int i, pos, nonempty = 0;
1049 
1050 	index = get_netdev_queue_index(queue);
1051 
1052 	mutex_lock(&xps_map_mutex);
1053 	dev_maps = xmap_dereference(dev->xps_maps);
1054 
1055 	if (dev_maps) {
1056 		for_each_possible_cpu(i) {
1057 			map = xmap_dereference(dev_maps->cpu_map[i]);
1058 			if (!map)
1059 				continue;
1060 
1061 			for (pos = 0; pos < map->len; pos++)
1062 				if (map->queues[pos] == index)
1063 					break;
1064 
1065 			if (pos < map->len) {
1066 				if (map->len > 1)
1067 					map->queues[pos] =
1068 					    map->queues[--map->len];
1069 				else {
1070 					RCU_INIT_POINTER(dev_maps->cpu_map[i],
1071 					    NULL);
1072 					call_rcu(&map->rcu, xps_map_release);
1073 					map = NULL;
1074 				}
1075 			}
1076 			if (map)
1077 				nonempty = 1;
1078 		}
1079 
1080 		if (!nonempty) {
1081 			RCU_INIT_POINTER(dev->xps_maps, NULL);
1082 			call_rcu(&dev_maps->rcu, xps_dev_maps_release);
1083 		}
1084 	}
1085 
1086 	mutex_unlock(&xps_map_mutex);
1087 
1088 	memset(kobj, 0, sizeof(*kobj));
1089 	dev_put(queue->dev);
1090 }
1091 
1092 static struct kobj_type netdev_queue_ktype = {
1093 	.sysfs_ops = &netdev_queue_sysfs_ops,
1094 	.release = netdev_queue_release,
1095 	.default_attrs = netdev_queue_default_attrs,
1096 };
1097 
1098 static int netdev_queue_add_kobject(struct net_device *net, int index)
1099 {
1100 	struct netdev_queue *queue = net->_tx + index;
1101 	struct kobject *kobj = &queue->kobj;
1102 	int error = 0;
1103 
1104 	kobj->kset = net->queues_kset;
1105 	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
1106 	    "tx-%u", index);
1107 	if (error) {
1108 		kobject_put(kobj);
1109 		return error;
1110 	}
1111 
1112 	kobject_uevent(kobj, KOBJ_ADD);
1113 	dev_hold(queue->dev);
1114 
1115 	return error;
1116 }
1117 #endif /* CONFIG_XPS */
1118 
1119 int
1120 netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
1121 {
1122 #ifdef CONFIG_XPS
1123 	int i;
1124 	int error = 0;
1125 
1126 	for (i = old_num; i < new_num; i++) {
1127 		error = netdev_queue_add_kobject(net, i);
1128 		if (error) {
1129 			new_num = old_num;
1130 			break;
1131 		}
1132 	}
1133 
1134 	while (--i >= new_num)
1135 		kobject_put(&net->_tx[i].kobj);
1136 
1137 	return error;
1138 #else
1139 	return 0;
1140 #endif
1141 }
1142 
1143 static int register_queue_kobjects(struct net_device *net)
1144 {
1145 	int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
1146 
1147 #if defined(CONFIG_RPS) || defined(CONFIG_XPS)
1148 	net->queues_kset = kset_create_and_add("queues",
1149 	    NULL, &net->dev.kobj);
1150 	if (!net->queues_kset)
1151 		return -ENOMEM;
1152 #endif
1153 
1154 #ifdef CONFIG_RPS
1155 	real_rx = net->real_num_rx_queues;
1156 #endif
1157 	real_tx = net->real_num_tx_queues;
1158 
1159 	error = net_rx_queue_update_kobjects(net, 0, real_rx);
1160 	if (error)
1161 		goto error;
1162 	rxq = real_rx;
1163 
1164 	error = netdev_queue_update_kobjects(net, 0, real_tx);
1165 	if (error)
1166 		goto error;
1167 	txq = real_tx;
1168 
1169 	return 0;
1170 
1171 error:
1172 	netdev_queue_update_kobjects(net, txq, 0);
1173 	net_rx_queue_update_kobjects(net, rxq, 0);
1174 	return error;
1175 }
1176 
1177 static void remove_queue_kobjects(struct net_device *net)
1178 {
1179 	int real_rx = 0, real_tx = 0;
1180 
1181 #ifdef CONFIG_RPS
1182 	real_rx = net->real_num_rx_queues;
1183 #endif
1184 	real_tx = net->real_num_tx_queues;
1185 
1186 	net_rx_queue_update_kobjects(net, real_rx, 0);
1187 	netdev_queue_update_kobjects(net, real_tx, 0);
1188 #if defined(CONFIG_RPS) || defined(CONFIG_XPS)
1189 	kset_unregister(net->queues_kset);
1190 #endif
1191 }
1192 
1193 static const void *net_current_ns(void)
1194 {
1195 	return current->nsproxy->net_ns;
1196 }
1197 
1198 static const void *net_initial_ns(void)
1199 {
1200 	return &init_net;
1201 }
1202 
1203 static const void *net_netlink_ns(struct sock *sk)
1204 {
1205 	return sock_net(sk);
1206 }
1207 
1208 struct kobj_ns_type_operations net_ns_type_operations = {
1209 	.type = KOBJ_NS_TYPE_NET,
1210 	.current_ns = net_current_ns,
1211 	.netlink_ns = net_netlink_ns,
1212 	.initial_ns = net_initial_ns,
1213 };
1214 EXPORT_SYMBOL_GPL(net_ns_type_operations);
1215 
1216 static void net_kobj_ns_exit(struct net *net)
1217 {
1218 	kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
1219 }
1220 
1221 static struct pernet_operations kobj_net_ops = {
1222 	.exit = net_kobj_ns_exit,
1223 };
1224 
1225 
1226 #ifdef CONFIG_HOTPLUG
1227 static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
1228 {
1229 	struct net_device *dev = to_net_dev(d);
1230 	int retval;
1231 
1232 	/* pass interface to uevent. */
1233 	retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
1234 	if (retval)
1235 		goto exit;
1236 
1237 	/* pass ifindex to uevent.
1238 	 * ifindex is useful as it won't change (interface name may change)
1239 	 * and is what RtNetlink uses natively. */
1240 	retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
1241 
1242 exit:
1243 	return retval;
1244 }
1245 #endif
1246 
1247 /*
1248  *	netdev_release -- destroy and free a dead device.
1249  *	Called when last reference to device kobject is gone.
1250  */
1251 static void netdev_release(struct device *d)
1252 {
1253 	struct net_device *dev = to_net_dev(d);
1254 
1255 	BUG_ON(dev->reg_state != NETREG_RELEASED);
1256 
1257 	kfree(dev->ifalias);
1258 	kfree((char *)dev - dev->padded);
1259 }
1260 
1261 static const void *net_namespace(struct device *d)
1262 {
1263 	struct net_device *dev;
1264 	dev = container_of(d, struct net_device, dev);
1265 	return dev_net(dev);
1266 }
1267 
1268 static struct class net_class = {
1269 	.name = "net",
1270 	.dev_release = netdev_release,
1271 #ifdef CONFIG_SYSFS
1272 	.dev_attrs = net_class_attributes,
1273 #endif /* CONFIG_SYSFS */
1274 #ifdef CONFIG_HOTPLUG
1275 	.dev_uevent = netdev_uevent,
1276 #endif
1277 	.ns_type = &net_ns_type_operations,
1278 	.namespace = net_namespace,
1279 };
1280 
1281 /* Delete sysfs entries but hold kobject reference until after all
1282  * netdev references are gone.
1283  */
1284 void netdev_unregister_kobject(struct net_device * net)
1285 {
1286 	struct device *dev = &(net->dev);
1287 
1288 	kobject_get(&dev->kobj);
1289 
1290 	remove_queue_kobjects(net);
1291 
1292 	device_del(dev);
1293 }
1294 
1295 /* Create sysfs entries for network device. */
1296 int netdev_register_kobject(struct net_device *net)
1297 {
1298 	struct device *dev = &(net->dev);
1299 	const struct attribute_group **groups = net->sysfs_groups;
1300 	int error = 0;
1301 
1302 	device_initialize(dev);
1303 	dev->class = &net_class;
1304 	dev->platform_data = net;
1305 	dev->groups = groups;
1306 
1307 	dev_set_name(dev, "%s", net->name);
1308 
1309 #ifdef CONFIG_SYSFS
1310 	/* Allow for a device specific group */
1311 	if (*groups)
1312 		groups++;
1313 
1314 	*groups++ = &netstat_group;
1315 #ifdef CONFIG_WIRELESS_EXT_SYSFS
1316 	if (net->ieee80211_ptr)
1317 		*groups++ = &wireless_group;
1318 #ifdef CONFIG_WIRELESS_EXT
1319 	else if (net->wireless_handlers)
1320 		*groups++ = &wireless_group;
1321 #endif
1322 #endif
1323 #endif /* CONFIG_SYSFS */
1324 
1325 	error = device_add(dev);
1326 	if (error)
1327 		return error;
1328 
1329 	error = register_queue_kobjects(net);
1330 	if (error) {
1331 		device_del(dev);
1332 		return error;
1333 	}
1334 
1335 	return error;
1336 }
1337 
1338 int netdev_class_create_file(struct class_attribute *class_attr)
1339 {
1340 	return class_create_file(&net_class, class_attr);
1341 }
1342 EXPORT_SYMBOL(netdev_class_create_file);
1343 
1344 void netdev_class_remove_file(struct class_attribute *class_attr)
1345 {
1346 	class_remove_file(&net_class, class_attr);
1347 }
1348 EXPORT_SYMBOL(netdev_class_remove_file);
1349 
1350 int netdev_kobject_init(void)
1351 {
1352 	kobj_ns_type_register(&net_ns_type_operations);
1353 	register_pernet_subsys(&kobj_net_ops);
1354 	return class_register(&net_class);
1355 }
1356