xref: /openbmc/linux/net/sched/sch_teql.c (revision 12eb4683)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25 
26 /*
27    How to setup it.
28    ----------------
29 
30    After loading this module you will find a new device teqlN
31    and new qdisc with the same name. To join a slave to the equalizer
32    you should just set this qdisc on a device f.e.
33 
34    # tc qdisc add dev eth0 root teql0
35    # tc qdisc add dev eth1 root teql0
36 
37    That's all. Full PnP 8)
38 
39    Applicability.
40    --------------
41 
42    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43       signal and generate EOI events. If you want to equalize virtual devices
44       like tunnels, use a normal eql device.
45    2. This device puts no limitations on physical slave characteristics
46       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47       Certainly, large difference in link speeds will make the resulting
48       eqalized link unusable, because of huge packet reordering.
49       I estimate an upper useful difference as ~10 times.
50    3. If the slave requires address resolution, only protocols using
51       neighbour cache (IPv4/IPv6) will work over the equalized link.
52       Other protocols are still allowed to use the slave device directly,
53       which will not break load balancing, though native slave
54       traffic will have the highest priority.  */
55 
56 struct teql_master {
57 	struct Qdisc_ops qops;
58 	struct net_device *dev;
59 	struct Qdisc *slaves;
60 	struct list_head master_list;
61 	unsigned long	tx_bytes;
62 	unsigned long	tx_packets;
63 	unsigned long	tx_errors;
64 	unsigned long	tx_dropped;
65 };
66 
67 struct teql_sched_data {
68 	struct Qdisc *next;
69 	struct teql_master *m;
70 	struct sk_buff_head q;
71 };
72 
73 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
74 
75 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
76 
77 /* "teql*" qdisc routines */
78 
79 static int
80 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
81 {
82 	struct net_device *dev = qdisc_dev(sch);
83 	struct teql_sched_data *q = qdisc_priv(sch);
84 
85 	if (q->q.qlen < dev->tx_queue_len) {
86 		__skb_queue_tail(&q->q, skb);
87 		return NET_XMIT_SUCCESS;
88 	}
89 
90 	return qdisc_drop(skb, sch);
91 }
92 
93 static struct sk_buff *
94 teql_dequeue(struct Qdisc *sch)
95 {
96 	struct teql_sched_data *dat = qdisc_priv(sch);
97 	struct netdev_queue *dat_queue;
98 	struct sk_buff *skb;
99 
100 	skb = __skb_dequeue(&dat->q);
101 	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
102 	if (skb == NULL) {
103 		struct net_device *m = qdisc_dev(dat_queue->qdisc);
104 		if (m) {
105 			dat->m->slaves = sch;
106 			netif_wake_queue(m);
107 		}
108 	} else {
109 		qdisc_bstats_update(sch, skb);
110 	}
111 	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
112 	return skb;
113 }
114 
115 static struct sk_buff *
116 teql_peek(struct Qdisc *sch)
117 {
118 	/* teql is meant to be used as root qdisc */
119 	return NULL;
120 }
121 
122 static inline void
123 teql_neigh_release(struct neighbour *n)
124 {
125 	if (n)
126 		neigh_release(n);
127 }
128 
129 static void
130 teql_reset(struct Qdisc *sch)
131 {
132 	struct teql_sched_data *dat = qdisc_priv(sch);
133 
134 	skb_queue_purge(&dat->q);
135 	sch->q.qlen = 0;
136 }
137 
138 static void
139 teql_destroy(struct Qdisc *sch)
140 {
141 	struct Qdisc *q, *prev;
142 	struct teql_sched_data *dat = qdisc_priv(sch);
143 	struct teql_master *master = dat->m;
144 
145 	prev = master->slaves;
146 	if (prev) {
147 		do {
148 			q = NEXT_SLAVE(prev);
149 			if (q == sch) {
150 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
151 				if (q == master->slaves) {
152 					master->slaves = NEXT_SLAVE(q);
153 					if (q == master->slaves) {
154 						struct netdev_queue *txq;
155 						spinlock_t *root_lock;
156 
157 						txq = netdev_get_tx_queue(master->dev, 0);
158 						master->slaves = NULL;
159 
160 						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
161 						spin_lock_bh(root_lock);
162 						qdisc_reset(txq->qdisc);
163 						spin_unlock_bh(root_lock);
164 					}
165 				}
166 				skb_queue_purge(&dat->q);
167 				break;
168 			}
169 
170 		} while ((prev = q) != master->slaves);
171 	}
172 }
173 
174 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
175 {
176 	struct net_device *dev = qdisc_dev(sch);
177 	struct teql_master *m = (struct teql_master *)sch->ops;
178 	struct teql_sched_data *q = qdisc_priv(sch);
179 
180 	if (dev->hard_header_len > m->dev->hard_header_len)
181 		return -EINVAL;
182 
183 	if (m->dev == dev)
184 		return -ELOOP;
185 
186 	q->m = m;
187 
188 	skb_queue_head_init(&q->q);
189 
190 	if (m->slaves) {
191 		if (m->dev->flags & IFF_UP) {
192 			if ((m->dev->flags & IFF_POINTOPOINT &&
193 			     !(dev->flags & IFF_POINTOPOINT)) ||
194 			    (m->dev->flags & IFF_BROADCAST &&
195 			     !(dev->flags & IFF_BROADCAST)) ||
196 			    (m->dev->flags & IFF_MULTICAST &&
197 			     !(dev->flags & IFF_MULTICAST)) ||
198 			    dev->mtu < m->dev->mtu)
199 				return -EINVAL;
200 		} else {
201 			if (!(dev->flags&IFF_POINTOPOINT))
202 				m->dev->flags &= ~IFF_POINTOPOINT;
203 			if (!(dev->flags&IFF_BROADCAST))
204 				m->dev->flags &= ~IFF_BROADCAST;
205 			if (!(dev->flags&IFF_MULTICAST))
206 				m->dev->flags &= ~IFF_MULTICAST;
207 			if (dev->mtu < m->dev->mtu)
208 				m->dev->mtu = dev->mtu;
209 		}
210 		q->next = NEXT_SLAVE(m->slaves);
211 		NEXT_SLAVE(m->slaves) = sch;
212 	} else {
213 		q->next = sch;
214 		m->slaves = sch;
215 		m->dev->mtu = dev->mtu;
216 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
217 	}
218 	return 0;
219 }
220 
221 
222 static int
223 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
224 	       struct net_device *dev, struct netdev_queue *txq,
225 	       struct dst_entry *dst)
226 {
227 	struct neighbour *n;
228 	int err = 0;
229 
230 	n = dst_neigh_lookup_skb(dst, skb);
231 	if (!n)
232 		return -ENOENT;
233 
234 	if (dst->dev != dev) {
235 		struct neighbour *mn;
236 
237 		mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
238 		neigh_release(n);
239 		if (IS_ERR(mn))
240 			return PTR_ERR(mn);
241 		n = mn;
242 	}
243 
244 	if (neigh_event_send(n, skb_res) == 0) {
245 		int err;
246 		char haddr[MAX_ADDR_LEN];
247 
248 		neigh_ha_snapshot(haddr, n, dev);
249 		err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
250 				      NULL, skb->len);
251 
252 		if (err < 0)
253 			err = -EINVAL;
254 	} else {
255 		err = (skb_res == NULL) ? -EAGAIN : 1;
256 	}
257 	neigh_release(n);
258 	return err;
259 }
260 
261 static inline int teql_resolve(struct sk_buff *skb,
262 			       struct sk_buff *skb_res,
263 			       struct net_device *dev,
264 			       struct netdev_queue *txq)
265 {
266 	struct dst_entry *dst = skb_dst(skb);
267 	int res;
268 
269 	if (txq->qdisc == &noop_qdisc)
270 		return -ENODEV;
271 
272 	if (!dev->header_ops || !dst)
273 		return 0;
274 
275 	rcu_read_lock();
276 	res = __teql_resolve(skb, skb_res, dev, txq, dst);
277 	rcu_read_unlock();
278 
279 	return res;
280 }
281 
282 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
283 {
284 	struct teql_master *master = netdev_priv(dev);
285 	struct Qdisc *start, *q;
286 	int busy;
287 	int nores;
288 	int subq = skb_get_queue_mapping(skb);
289 	struct sk_buff *skb_res = NULL;
290 
291 	start = master->slaves;
292 
293 restart:
294 	nores = 0;
295 	busy = 0;
296 
297 	q = start;
298 	if (!q)
299 		goto drop;
300 
301 	do {
302 		struct net_device *slave = qdisc_dev(q);
303 		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
304 		const struct net_device_ops *slave_ops = slave->netdev_ops;
305 
306 		if (slave_txq->qdisc_sleeping != q)
307 			continue;
308 		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
309 		    !netif_running(slave)) {
310 			busy = 1;
311 			continue;
312 		}
313 
314 		switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
315 		case 0:
316 			if (__netif_tx_trylock(slave_txq)) {
317 				unsigned int length = qdisc_pkt_len(skb);
318 
319 				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
320 				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
321 					txq_trans_update(slave_txq);
322 					__netif_tx_unlock(slave_txq);
323 					master->slaves = NEXT_SLAVE(q);
324 					netif_wake_queue(dev);
325 					master->tx_packets++;
326 					master->tx_bytes += length;
327 					return NETDEV_TX_OK;
328 				}
329 				__netif_tx_unlock(slave_txq);
330 			}
331 			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
332 				busy = 1;
333 			break;
334 		case 1:
335 			master->slaves = NEXT_SLAVE(q);
336 			return NETDEV_TX_OK;
337 		default:
338 			nores = 1;
339 			break;
340 		}
341 		__skb_pull(skb, skb_network_offset(skb));
342 	} while ((q = NEXT_SLAVE(q)) != start);
343 
344 	if (nores && skb_res == NULL) {
345 		skb_res = skb;
346 		goto restart;
347 	}
348 
349 	if (busy) {
350 		netif_stop_queue(dev);
351 		return NETDEV_TX_BUSY;
352 	}
353 	master->tx_errors++;
354 
355 drop:
356 	master->tx_dropped++;
357 	dev_kfree_skb(skb);
358 	return NETDEV_TX_OK;
359 }
360 
361 static int teql_master_open(struct net_device *dev)
362 {
363 	struct Qdisc *q;
364 	struct teql_master *m = netdev_priv(dev);
365 	int mtu = 0xFFFE;
366 	unsigned int flags = IFF_NOARP | IFF_MULTICAST;
367 
368 	if (m->slaves == NULL)
369 		return -EUNATCH;
370 
371 	flags = FMASK;
372 
373 	q = m->slaves;
374 	do {
375 		struct net_device *slave = qdisc_dev(q);
376 
377 		if (slave == NULL)
378 			return -EUNATCH;
379 
380 		if (slave->mtu < mtu)
381 			mtu = slave->mtu;
382 		if (slave->hard_header_len > LL_MAX_HEADER)
383 			return -EINVAL;
384 
385 		/* If all the slaves are BROADCAST, master is BROADCAST
386 		   If all the slaves are PtP, master is PtP
387 		   Otherwise, master is NBMA.
388 		 */
389 		if (!(slave->flags&IFF_POINTOPOINT))
390 			flags &= ~IFF_POINTOPOINT;
391 		if (!(slave->flags&IFF_BROADCAST))
392 			flags &= ~IFF_BROADCAST;
393 		if (!(slave->flags&IFF_MULTICAST))
394 			flags &= ~IFF_MULTICAST;
395 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
396 
397 	m->dev->mtu = mtu;
398 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
399 	netif_start_queue(m->dev);
400 	return 0;
401 }
402 
403 static int teql_master_close(struct net_device *dev)
404 {
405 	netif_stop_queue(dev);
406 	return 0;
407 }
408 
409 static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
410 						     struct rtnl_link_stats64 *stats)
411 {
412 	struct teql_master *m = netdev_priv(dev);
413 
414 	stats->tx_packets	= m->tx_packets;
415 	stats->tx_bytes		= m->tx_bytes;
416 	stats->tx_errors	= m->tx_errors;
417 	stats->tx_dropped	= m->tx_dropped;
418 	return stats;
419 }
420 
421 static int teql_master_mtu(struct net_device *dev, int new_mtu)
422 {
423 	struct teql_master *m = netdev_priv(dev);
424 	struct Qdisc *q;
425 
426 	if (new_mtu < 68)
427 		return -EINVAL;
428 
429 	q = m->slaves;
430 	if (q) {
431 		do {
432 			if (new_mtu > qdisc_dev(q)->mtu)
433 				return -EINVAL;
434 		} while ((q = NEXT_SLAVE(q)) != m->slaves);
435 	}
436 
437 	dev->mtu = new_mtu;
438 	return 0;
439 }
440 
441 static const struct net_device_ops teql_netdev_ops = {
442 	.ndo_open	= teql_master_open,
443 	.ndo_stop	= teql_master_close,
444 	.ndo_start_xmit	= teql_master_xmit,
445 	.ndo_get_stats64 = teql_master_stats64,
446 	.ndo_change_mtu	= teql_master_mtu,
447 };
448 
449 static __init void teql_master_setup(struct net_device *dev)
450 {
451 	struct teql_master *master = netdev_priv(dev);
452 	struct Qdisc_ops *ops = &master->qops;
453 
454 	master->dev	= dev;
455 	ops->priv_size  = sizeof(struct teql_sched_data);
456 
457 	ops->enqueue	=	teql_enqueue;
458 	ops->dequeue	=	teql_dequeue;
459 	ops->peek	=	teql_peek;
460 	ops->init	=	teql_qdisc_init;
461 	ops->reset	=	teql_reset;
462 	ops->destroy	=	teql_destroy;
463 	ops->owner	=	THIS_MODULE;
464 
465 	dev->netdev_ops =       &teql_netdev_ops;
466 	dev->type		= ARPHRD_VOID;
467 	dev->mtu		= 1500;
468 	dev->tx_queue_len	= 100;
469 	dev->flags		= IFF_NOARP;
470 	dev->hard_header_len	= LL_MAX_HEADER;
471 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
472 }
473 
474 static LIST_HEAD(master_dev_list);
475 static int max_equalizers = 1;
476 module_param(max_equalizers, int, 0);
477 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
478 
479 static int __init teql_init(void)
480 {
481 	int i;
482 	int err = -ENODEV;
483 
484 	for (i = 0; i < max_equalizers; i++) {
485 		struct net_device *dev;
486 		struct teql_master *master;
487 
488 		dev = alloc_netdev(sizeof(struct teql_master),
489 				  "teql%d", teql_master_setup);
490 		if (!dev) {
491 			err = -ENOMEM;
492 			break;
493 		}
494 
495 		if ((err = register_netdev(dev))) {
496 			free_netdev(dev);
497 			break;
498 		}
499 
500 		master = netdev_priv(dev);
501 
502 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
503 		err = register_qdisc(&master->qops);
504 
505 		if (err) {
506 			unregister_netdev(dev);
507 			free_netdev(dev);
508 			break;
509 		}
510 
511 		list_add_tail(&master->master_list, &master_dev_list);
512 	}
513 	return i ? 0 : err;
514 }
515 
516 static void __exit teql_exit(void)
517 {
518 	struct teql_master *master, *nxt;
519 
520 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
521 
522 		list_del(&master->master_list);
523 
524 		unregister_qdisc(&master->qops);
525 		unregister_netdev(master->dev);
526 		free_netdev(master->dev);
527 	}
528 }
529 
530 module_init(teql_init);
531 module_exit(teql_exit);
532 
533 MODULE_LICENSE("GPL");
534