xref: /openbmc/linux/net/sched/sch_teql.c (revision 17045755)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25 
26 /*
27    How to setup it.
28    ----------------
29 
30    After loading this module you will find a new device teqlN
31    and new qdisc with the same name. To join a slave to the equalizer
32    you should just set this qdisc on a device f.e.
33 
34    # tc qdisc add dev eth0 root teql0
35    # tc qdisc add dev eth1 root teql0
36 
37    That's all. Full PnP 8)
38 
39    Applicability.
40    --------------
41 
42    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43       signal and generate EOI events. If you want to equalize virtual devices
44       like tunnels, use a normal eql device.
45    2. This device puts no limitations on physical slave characteristics
46       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47       Certainly, large difference in link speeds will make the resulting
48       eqalized link unusable, because of huge packet reordering.
49       I estimate an upper useful difference as ~10 times.
50    3. If the slave requires address resolution, only protocols using
51       neighbour cache (IPv4/IPv6) will work over the equalized link.
52       Other protocols are still allowed to use the slave device directly,
53       which will not break load balancing, though native slave
54       traffic will have the highest priority.  */
55 
56 struct teql_master {
57 	struct Qdisc_ops qops;
58 	struct net_device *dev;
59 	struct Qdisc *slaves;
60 	struct list_head master_list;
61 	unsigned long	tx_bytes;
62 	unsigned long	tx_packets;
63 	unsigned long	tx_errors;
64 	unsigned long	tx_dropped;
65 };
66 
67 struct teql_sched_data {
68 	struct Qdisc *next;
69 	struct teql_master *m;
70 	struct neighbour *ncache;
71 	struct sk_buff_head q;
72 };
73 
74 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
75 
76 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
77 
78 /* "teql*" qdisc routines */
79 
80 static int
81 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
82 {
83 	struct net_device *dev = qdisc_dev(sch);
84 	struct teql_sched_data *q = qdisc_priv(sch);
85 
86 	if (q->q.qlen < dev->tx_queue_len) {
87 		__skb_queue_tail(&q->q, skb);
88 		return NET_XMIT_SUCCESS;
89 	}
90 
91 	return qdisc_drop(skb, sch);
92 }
93 
94 static struct sk_buff *
95 teql_dequeue(struct Qdisc *sch)
96 {
97 	struct teql_sched_data *dat = qdisc_priv(sch);
98 	struct netdev_queue *dat_queue;
99 	struct sk_buff *skb;
100 
101 	skb = __skb_dequeue(&dat->q);
102 	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
103 	if (skb == NULL) {
104 		struct net_device *m = qdisc_dev(dat_queue->qdisc);
105 		if (m) {
106 			dat->m->slaves = sch;
107 			netif_wake_queue(m);
108 		}
109 	} else {
110 		qdisc_bstats_update(sch, skb);
111 	}
112 	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
113 	return skb;
114 }
115 
116 static struct sk_buff *
117 teql_peek(struct Qdisc *sch)
118 {
119 	/* teql is meant to be used as root qdisc */
120 	return NULL;
121 }
122 
123 static inline void
124 teql_neigh_release(struct neighbour *n)
125 {
126 	if (n)
127 		neigh_release(n);
128 }
129 
130 static void
131 teql_reset(struct Qdisc *sch)
132 {
133 	struct teql_sched_data *dat = qdisc_priv(sch);
134 
135 	skb_queue_purge(&dat->q);
136 	sch->q.qlen = 0;
137 	teql_neigh_release(xchg(&dat->ncache, NULL));
138 }
139 
140 static void
141 teql_destroy(struct Qdisc *sch)
142 {
143 	struct Qdisc *q, *prev;
144 	struct teql_sched_data *dat = qdisc_priv(sch);
145 	struct teql_master *master = dat->m;
146 
147 	prev = master->slaves;
148 	if (prev) {
149 		do {
150 			q = NEXT_SLAVE(prev);
151 			if (q == sch) {
152 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
153 				if (q == master->slaves) {
154 					master->slaves = NEXT_SLAVE(q);
155 					if (q == master->slaves) {
156 						struct netdev_queue *txq;
157 						spinlock_t *root_lock;
158 
159 						txq = netdev_get_tx_queue(master->dev, 0);
160 						master->slaves = NULL;
161 
162 						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
163 						spin_lock_bh(root_lock);
164 						qdisc_reset(txq->qdisc);
165 						spin_unlock_bh(root_lock);
166 					}
167 				}
168 				skb_queue_purge(&dat->q);
169 				teql_neigh_release(xchg(&dat->ncache, NULL));
170 				break;
171 			}
172 
173 		} while ((prev = q) != master->slaves);
174 	}
175 }
176 
177 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
178 {
179 	struct net_device *dev = qdisc_dev(sch);
180 	struct teql_master *m = (struct teql_master *)sch->ops;
181 	struct teql_sched_data *q = qdisc_priv(sch);
182 
183 	if (dev->hard_header_len > m->dev->hard_header_len)
184 		return -EINVAL;
185 
186 	if (m->dev == dev)
187 		return -ELOOP;
188 
189 	q->m = m;
190 
191 	skb_queue_head_init(&q->q);
192 
193 	if (m->slaves) {
194 		if (m->dev->flags & IFF_UP) {
195 			if ((m->dev->flags & IFF_POINTOPOINT &&
196 			     !(dev->flags & IFF_POINTOPOINT)) ||
197 			    (m->dev->flags & IFF_BROADCAST &&
198 			     !(dev->flags & IFF_BROADCAST)) ||
199 			    (m->dev->flags & IFF_MULTICAST &&
200 			     !(dev->flags & IFF_MULTICAST)) ||
201 			    dev->mtu < m->dev->mtu)
202 				return -EINVAL;
203 		} else {
204 			if (!(dev->flags&IFF_POINTOPOINT))
205 				m->dev->flags &= ~IFF_POINTOPOINT;
206 			if (!(dev->flags&IFF_BROADCAST))
207 				m->dev->flags &= ~IFF_BROADCAST;
208 			if (!(dev->flags&IFF_MULTICAST))
209 				m->dev->flags &= ~IFF_MULTICAST;
210 			if (dev->mtu < m->dev->mtu)
211 				m->dev->mtu = dev->mtu;
212 		}
213 		q->next = NEXT_SLAVE(m->slaves);
214 		NEXT_SLAVE(m->slaves) = sch;
215 	} else {
216 		q->next = sch;
217 		m->slaves = sch;
218 		m->dev->mtu = dev->mtu;
219 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
220 	}
221 	return 0;
222 }
223 
224 
225 static int
226 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
227 	       struct net_device *dev, struct netdev_queue *txq,
228 	       struct neighbour *mn)
229 {
230 	struct teql_sched_data *q = qdisc_priv(txq->qdisc);
231 	struct neighbour *n = q->ncache;
232 
233 	if (mn->tbl == NULL)
234 		return -EINVAL;
235 	if (n && n->tbl == mn->tbl &&
236 	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
237 		atomic_inc(&n->refcnt);
238 	} else {
239 		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
240 		if (IS_ERR(n))
241 			return PTR_ERR(n);
242 	}
243 	if (neigh_event_send(n, skb_res) == 0) {
244 		int err;
245 		char haddr[MAX_ADDR_LEN];
246 
247 		neigh_ha_snapshot(haddr, n, dev);
248 		err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
249 				      NULL, skb->len);
250 
251 		if (err < 0) {
252 			neigh_release(n);
253 			return -EINVAL;
254 		}
255 		teql_neigh_release(xchg(&q->ncache, n));
256 		return 0;
257 	}
258 	neigh_release(n);
259 	return (skb_res == NULL) ? -EAGAIN : 1;
260 }
261 
262 static inline int teql_resolve(struct sk_buff *skb,
263 			       struct sk_buff *skb_res,
264 			       struct net_device *dev,
265 			       struct netdev_queue *txq)
266 {
267 	struct dst_entry *dst = skb_dst(skb);
268 	struct neighbour *mn;
269 	int res;
270 
271 	if (txq->qdisc == &noop_qdisc)
272 		return -ENODEV;
273 
274 	if (!dev->header_ops || !dst)
275 		return 0;
276 
277 	rcu_read_lock();
278 	mn = dst_get_neighbour_noref(dst);
279 	res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0;
280 	rcu_read_unlock();
281 
282 	return res;
283 }
284 
285 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
286 {
287 	struct teql_master *master = netdev_priv(dev);
288 	struct Qdisc *start, *q;
289 	int busy;
290 	int nores;
291 	int subq = skb_get_queue_mapping(skb);
292 	struct sk_buff *skb_res = NULL;
293 
294 	start = master->slaves;
295 
296 restart:
297 	nores = 0;
298 	busy = 0;
299 
300 	q = start;
301 	if (!q)
302 		goto drop;
303 
304 	do {
305 		struct net_device *slave = qdisc_dev(q);
306 		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
307 		const struct net_device_ops *slave_ops = slave->netdev_ops;
308 
309 		if (slave_txq->qdisc_sleeping != q)
310 			continue;
311 		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
312 		    !netif_running(slave)) {
313 			busy = 1;
314 			continue;
315 		}
316 
317 		switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
318 		case 0:
319 			if (__netif_tx_trylock(slave_txq)) {
320 				unsigned int length = qdisc_pkt_len(skb);
321 
322 				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
323 				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
324 					txq_trans_update(slave_txq);
325 					__netif_tx_unlock(slave_txq);
326 					master->slaves = NEXT_SLAVE(q);
327 					netif_wake_queue(dev);
328 					master->tx_packets++;
329 					master->tx_bytes += length;
330 					return NETDEV_TX_OK;
331 				}
332 				__netif_tx_unlock(slave_txq);
333 			}
334 			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
335 				busy = 1;
336 			break;
337 		case 1:
338 			master->slaves = NEXT_SLAVE(q);
339 			return NETDEV_TX_OK;
340 		default:
341 			nores = 1;
342 			break;
343 		}
344 		__skb_pull(skb, skb_network_offset(skb));
345 	} while ((q = NEXT_SLAVE(q)) != start);
346 
347 	if (nores && skb_res == NULL) {
348 		skb_res = skb;
349 		goto restart;
350 	}
351 
352 	if (busy) {
353 		netif_stop_queue(dev);
354 		return NETDEV_TX_BUSY;
355 	}
356 	master->tx_errors++;
357 
358 drop:
359 	master->tx_dropped++;
360 	dev_kfree_skb(skb);
361 	return NETDEV_TX_OK;
362 }
363 
364 static int teql_master_open(struct net_device *dev)
365 {
366 	struct Qdisc *q;
367 	struct teql_master *m = netdev_priv(dev);
368 	int mtu = 0xFFFE;
369 	unsigned int flags = IFF_NOARP | IFF_MULTICAST;
370 
371 	if (m->slaves == NULL)
372 		return -EUNATCH;
373 
374 	flags = FMASK;
375 
376 	q = m->slaves;
377 	do {
378 		struct net_device *slave = qdisc_dev(q);
379 
380 		if (slave == NULL)
381 			return -EUNATCH;
382 
383 		if (slave->mtu < mtu)
384 			mtu = slave->mtu;
385 		if (slave->hard_header_len > LL_MAX_HEADER)
386 			return -EINVAL;
387 
388 		/* If all the slaves are BROADCAST, master is BROADCAST
389 		   If all the slaves are PtP, master is PtP
390 		   Otherwise, master is NBMA.
391 		 */
392 		if (!(slave->flags&IFF_POINTOPOINT))
393 			flags &= ~IFF_POINTOPOINT;
394 		if (!(slave->flags&IFF_BROADCAST))
395 			flags &= ~IFF_BROADCAST;
396 		if (!(slave->flags&IFF_MULTICAST))
397 			flags &= ~IFF_MULTICAST;
398 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
399 
400 	m->dev->mtu = mtu;
401 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
402 	netif_start_queue(m->dev);
403 	return 0;
404 }
405 
406 static int teql_master_close(struct net_device *dev)
407 {
408 	netif_stop_queue(dev);
409 	return 0;
410 }
411 
412 static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
413 						     struct rtnl_link_stats64 *stats)
414 {
415 	struct teql_master *m = netdev_priv(dev);
416 
417 	stats->tx_packets	= m->tx_packets;
418 	stats->tx_bytes		= m->tx_bytes;
419 	stats->tx_errors	= m->tx_errors;
420 	stats->tx_dropped	= m->tx_dropped;
421 	return stats;
422 }
423 
424 static int teql_master_mtu(struct net_device *dev, int new_mtu)
425 {
426 	struct teql_master *m = netdev_priv(dev);
427 	struct Qdisc *q;
428 
429 	if (new_mtu < 68)
430 		return -EINVAL;
431 
432 	q = m->slaves;
433 	if (q) {
434 		do {
435 			if (new_mtu > qdisc_dev(q)->mtu)
436 				return -EINVAL;
437 		} while ((q = NEXT_SLAVE(q)) != m->slaves);
438 	}
439 
440 	dev->mtu = new_mtu;
441 	return 0;
442 }
443 
444 static const struct net_device_ops teql_netdev_ops = {
445 	.ndo_open	= teql_master_open,
446 	.ndo_stop	= teql_master_close,
447 	.ndo_start_xmit	= teql_master_xmit,
448 	.ndo_get_stats64 = teql_master_stats64,
449 	.ndo_change_mtu	= teql_master_mtu,
450 };
451 
452 static __init void teql_master_setup(struct net_device *dev)
453 {
454 	struct teql_master *master = netdev_priv(dev);
455 	struct Qdisc_ops *ops = &master->qops;
456 
457 	master->dev	= dev;
458 	ops->priv_size  = sizeof(struct teql_sched_data);
459 
460 	ops->enqueue	=	teql_enqueue;
461 	ops->dequeue	=	teql_dequeue;
462 	ops->peek	=	teql_peek;
463 	ops->init	=	teql_qdisc_init;
464 	ops->reset	=	teql_reset;
465 	ops->destroy	=	teql_destroy;
466 	ops->owner	=	THIS_MODULE;
467 
468 	dev->netdev_ops =       &teql_netdev_ops;
469 	dev->type		= ARPHRD_VOID;
470 	dev->mtu		= 1500;
471 	dev->tx_queue_len	= 100;
472 	dev->flags		= IFF_NOARP;
473 	dev->hard_header_len	= LL_MAX_HEADER;
474 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
475 }
476 
477 static LIST_HEAD(master_dev_list);
478 static int max_equalizers = 1;
479 module_param(max_equalizers, int, 0);
480 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
481 
482 static int __init teql_init(void)
483 {
484 	int i;
485 	int err = -ENODEV;
486 
487 	for (i = 0; i < max_equalizers; i++) {
488 		struct net_device *dev;
489 		struct teql_master *master;
490 
491 		dev = alloc_netdev(sizeof(struct teql_master),
492 				  "teql%d", teql_master_setup);
493 		if (!dev) {
494 			err = -ENOMEM;
495 			break;
496 		}
497 
498 		if ((err = register_netdev(dev))) {
499 			free_netdev(dev);
500 			break;
501 		}
502 
503 		master = netdev_priv(dev);
504 
505 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
506 		err = register_qdisc(&master->qops);
507 
508 		if (err) {
509 			unregister_netdev(dev);
510 			free_netdev(dev);
511 			break;
512 		}
513 
514 		list_add_tail(&master->master_list, &master_dev_list);
515 	}
516 	return i ? 0 : err;
517 }
518 
519 static void __exit teql_exit(void)
520 {
521 	struct teql_master *master, *nxt;
522 
523 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
524 
525 		list_del(&master->master_list);
526 
527 		unregister_qdisc(&master->qops);
528 		unregister_netdev(master->dev);
529 		free_netdev(master->dev);
530 	}
531 }
532 
533 module_init(teql_init);
534 module_exit(teql_exit);
535 
536 MODULE_LICENSE("GPL");
537