xref: /openbmc/linux/net/sched/sch_teql.c (revision 78c99ba1)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/string.h>
15 #include <linux/errno.h>
16 #include <linux/if_arp.h>
17 #include <linux/netdevice.h>
18 #include <linux/init.h>
19 #include <linux/skbuff.h>
20 #include <linux/moduleparam.h>
21 #include <net/dst.h>
22 #include <net/neighbour.h>
23 #include <net/pkt_sched.h>
24 
25 /*
26    How to setup it.
27    ----------------
28 
29    After loading this module you will find a new device teqlN
30    and new qdisc with the same name. To join a slave to the equalizer
31    you should just set this qdisc on a device f.e.
32 
33    # tc qdisc add dev eth0 root teql0
34    # tc qdisc add dev eth1 root teql0
35 
36    That's all. Full PnP 8)
37 
38    Applicability.
39    --------------
40 
41    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42       signal and generate EOI events. If you want to equalize virtual devices
43       like tunnels, use a normal eql device.
44    2. This device puts no limitations on physical slave characteristics
45       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46       Certainly, large difference in link speeds will make the resulting
47       eqalized link unusable, because of huge packet reordering.
48       I estimate an upper useful difference as ~10 times.
49    3. If the slave requires address resolution, only protocols using
50       neighbour cache (IPv4/IPv6) will work over the equalized link.
51       Other protocols are still allowed to use the slave device directly,
52       which will not break load balancing, though native slave
53       traffic will have the highest priority.  */
54 
55 struct teql_master
56 {
57 	struct Qdisc_ops qops;
58 	struct net_device *dev;
59 	struct Qdisc *slaves;
60 	struct list_head master_list;
61 	struct net_device_stats stats;
62 };
63 
64 struct teql_sched_data
65 {
66 	struct Qdisc *next;
67 	struct teql_master *m;
68 	struct neighbour *ncache;
69 	struct sk_buff_head q;
70 };
71 
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73 
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
75 
76 /* "teql*" qdisc routines */
77 
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81 	struct net_device *dev = qdisc_dev(sch);
82 	struct teql_sched_data *q = qdisc_priv(sch);
83 
84 	if (q->q.qlen < dev->tx_queue_len) {
85 		__skb_queue_tail(&q->q, skb);
86 		sch->bstats.bytes += qdisc_pkt_len(skb);
87 		sch->bstats.packets++;
88 		return 0;
89 	}
90 
91 	kfree_skb(skb);
92 	sch->qstats.drops++;
93 	return NET_XMIT_DROP;
94 }
95 
96 static struct sk_buff *
97 teql_dequeue(struct Qdisc* sch)
98 {
99 	struct teql_sched_data *dat = qdisc_priv(sch);
100 	struct netdev_queue *dat_queue;
101 	struct sk_buff *skb;
102 
103 	skb = __skb_dequeue(&dat->q);
104 	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
105 	if (skb == NULL) {
106 		struct net_device *m = qdisc_dev(dat_queue->qdisc);
107 		if (m) {
108 			dat->m->slaves = sch;
109 			netif_wake_queue(m);
110 		}
111 	}
112 	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
113 	return skb;
114 }
115 
116 static struct sk_buff *
117 teql_peek(struct Qdisc* sch)
118 {
119 	/* teql is meant to be used as root qdisc */
120 	return NULL;
121 }
122 
123 static __inline__ void
124 teql_neigh_release(struct neighbour *n)
125 {
126 	if (n)
127 		neigh_release(n);
128 }
129 
130 static void
131 teql_reset(struct Qdisc* sch)
132 {
133 	struct teql_sched_data *dat = qdisc_priv(sch);
134 
135 	skb_queue_purge(&dat->q);
136 	sch->q.qlen = 0;
137 	teql_neigh_release(xchg(&dat->ncache, NULL));
138 }
139 
140 static void
141 teql_destroy(struct Qdisc* sch)
142 {
143 	struct Qdisc *q, *prev;
144 	struct teql_sched_data *dat = qdisc_priv(sch);
145 	struct teql_master *master = dat->m;
146 
147 	if ((prev = master->slaves) != NULL) {
148 		do {
149 			q = NEXT_SLAVE(prev);
150 			if (q == sch) {
151 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
152 				if (q == master->slaves) {
153 					master->slaves = NEXT_SLAVE(q);
154 					if (q == master->slaves) {
155 						struct netdev_queue *txq;
156 						spinlock_t *root_lock;
157 
158 						txq = netdev_get_tx_queue(master->dev, 0);
159 						master->slaves = NULL;
160 
161 						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
162 						spin_lock_bh(root_lock);
163 						qdisc_reset(txq->qdisc);
164 						spin_unlock_bh(root_lock);
165 					}
166 				}
167 				skb_queue_purge(&dat->q);
168 				teql_neigh_release(xchg(&dat->ncache, NULL));
169 				break;
170 			}
171 
172 		} while ((prev = q) != master->slaves);
173 	}
174 }
175 
176 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
177 {
178 	struct net_device *dev = qdisc_dev(sch);
179 	struct teql_master *m = (struct teql_master*)sch->ops;
180 	struct teql_sched_data *q = qdisc_priv(sch);
181 
182 	if (dev->hard_header_len > m->dev->hard_header_len)
183 		return -EINVAL;
184 
185 	if (m->dev == dev)
186 		return -ELOOP;
187 
188 	q->m = m;
189 
190 	skb_queue_head_init(&q->q);
191 
192 	if (m->slaves) {
193 		if (m->dev->flags & IFF_UP) {
194 			if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
195 			    || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
196 			    || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
197 			    || dev->mtu < m->dev->mtu)
198 				return -EINVAL;
199 		} else {
200 			if (!(dev->flags&IFF_POINTOPOINT))
201 				m->dev->flags &= ~IFF_POINTOPOINT;
202 			if (!(dev->flags&IFF_BROADCAST))
203 				m->dev->flags &= ~IFF_BROADCAST;
204 			if (!(dev->flags&IFF_MULTICAST))
205 				m->dev->flags &= ~IFF_MULTICAST;
206 			if (dev->mtu < m->dev->mtu)
207 				m->dev->mtu = dev->mtu;
208 		}
209 		q->next = NEXT_SLAVE(m->slaves);
210 		NEXT_SLAVE(m->slaves) = sch;
211 	} else {
212 		q->next = sch;
213 		m->slaves = sch;
214 		m->dev->mtu = dev->mtu;
215 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
216 	}
217 	return 0;
218 }
219 
220 
221 static int
222 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
223 {
224 	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
225 	struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
226 	struct neighbour *mn = skb->dst->neighbour;
227 	struct neighbour *n = q->ncache;
228 
229 	if (mn->tbl == NULL)
230 		return -EINVAL;
231 	if (n && n->tbl == mn->tbl &&
232 	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
233 		atomic_inc(&n->refcnt);
234 	} else {
235 		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
236 		if (IS_ERR(n))
237 			return PTR_ERR(n);
238 	}
239 	if (neigh_event_send(n, skb_res) == 0) {
240 		int err;
241 
242 		read_lock(&n->lock);
243 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
244 				      n->ha, NULL, skb->len);
245 		read_unlock(&n->lock);
246 
247 		if (err < 0) {
248 			neigh_release(n);
249 			return -EINVAL;
250 		}
251 		teql_neigh_release(xchg(&q->ncache, n));
252 		return 0;
253 	}
254 	neigh_release(n);
255 	return (skb_res == NULL) ? -EAGAIN : 1;
256 }
257 
258 static inline int teql_resolve(struct sk_buff *skb,
259 			       struct sk_buff *skb_res, struct net_device *dev)
260 {
261 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
262 	if (txq->qdisc == &noop_qdisc)
263 		return -ENODEV;
264 
265 	if (dev->header_ops == NULL ||
266 	    skb->dst == NULL ||
267 	    skb->dst->neighbour == NULL)
268 		return 0;
269 	return __teql_resolve(skb, skb_res, dev);
270 }
271 
272 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
273 {
274 	struct teql_master *master = netdev_priv(dev);
275 	struct Qdisc *start, *q;
276 	int busy;
277 	int nores;
278 	int subq = skb_get_queue_mapping(skb);
279 	struct sk_buff *skb_res = NULL;
280 
281 	start = master->slaves;
282 
283 restart:
284 	nores = 0;
285 	busy = 0;
286 
287 	if ((q = start) == NULL)
288 		goto drop;
289 
290 	do {
291 		struct net_device *slave = qdisc_dev(q);
292 		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
293 		const struct net_device_ops *slave_ops = slave->netdev_ops;
294 
295 		if (slave_txq->qdisc_sleeping != q)
296 			continue;
297 		if (__netif_subqueue_stopped(slave, subq) ||
298 		    !netif_running(slave)) {
299 			busy = 1;
300 			continue;
301 		}
302 
303 		switch (teql_resolve(skb, skb_res, slave)) {
304 		case 0:
305 			if (__netif_tx_trylock(slave_txq)) {
306 				unsigned int length = qdisc_pkt_len(skb);
307 
308 				if (!netif_tx_queue_stopped(slave_txq) &&
309 				    !netif_tx_queue_frozen(slave_txq) &&
310 				    slave_ops->ndo_start_xmit(skb, slave) == 0) {
311 					__netif_tx_unlock(slave_txq);
312 					master->slaves = NEXT_SLAVE(q);
313 					netif_wake_queue(dev);
314 					master->stats.tx_packets++;
315 					master->stats.tx_bytes += length;
316 					return 0;
317 				}
318 				__netif_tx_unlock(slave_txq);
319 			}
320 			if (netif_queue_stopped(dev))
321 				busy = 1;
322 			break;
323 		case 1:
324 			master->slaves = NEXT_SLAVE(q);
325 			return 0;
326 		default:
327 			nores = 1;
328 			break;
329 		}
330 		__skb_pull(skb, skb_network_offset(skb));
331 	} while ((q = NEXT_SLAVE(q)) != start);
332 
333 	if (nores && skb_res == NULL) {
334 		skb_res = skb;
335 		goto restart;
336 	}
337 
338 	if (busy) {
339 		netif_stop_queue(dev);
340 		return 1;
341 	}
342 	master->stats.tx_errors++;
343 
344 drop:
345 	master->stats.tx_dropped++;
346 	dev_kfree_skb(skb);
347 	return 0;
348 }
349 
350 static int teql_master_open(struct net_device *dev)
351 {
352 	struct Qdisc * q;
353 	struct teql_master *m = netdev_priv(dev);
354 	int mtu = 0xFFFE;
355 	unsigned flags = IFF_NOARP|IFF_MULTICAST;
356 
357 	if (m->slaves == NULL)
358 		return -EUNATCH;
359 
360 	flags = FMASK;
361 
362 	q = m->slaves;
363 	do {
364 		struct net_device *slave = qdisc_dev(q);
365 
366 		if (slave == NULL)
367 			return -EUNATCH;
368 
369 		if (slave->mtu < mtu)
370 			mtu = slave->mtu;
371 		if (slave->hard_header_len > LL_MAX_HEADER)
372 			return -EINVAL;
373 
374 		/* If all the slaves are BROADCAST, master is BROADCAST
375 		   If all the slaves are PtP, master is PtP
376 		   Otherwise, master is NBMA.
377 		 */
378 		if (!(slave->flags&IFF_POINTOPOINT))
379 			flags &= ~IFF_POINTOPOINT;
380 		if (!(slave->flags&IFF_BROADCAST))
381 			flags &= ~IFF_BROADCAST;
382 		if (!(slave->flags&IFF_MULTICAST))
383 			flags &= ~IFF_MULTICAST;
384 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
385 
386 	m->dev->mtu = mtu;
387 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
388 	netif_start_queue(m->dev);
389 	return 0;
390 }
391 
392 static int teql_master_close(struct net_device *dev)
393 {
394 	netif_stop_queue(dev);
395 	return 0;
396 }
397 
398 static struct net_device_stats *teql_master_stats(struct net_device *dev)
399 {
400 	struct teql_master *m = netdev_priv(dev);
401 	return &m->stats;
402 }
403 
404 static int teql_master_mtu(struct net_device *dev, int new_mtu)
405 {
406 	struct teql_master *m = netdev_priv(dev);
407 	struct Qdisc *q;
408 
409 	if (new_mtu < 68)
410 		return -EINVAL;
411 
412 	q = m->slaves;
413 	if (q) {
414 		do {
415 			if (new_mtu > qdisc_dev(q)->mtu)
416 				return -EINVAL;
417 		} while ((q=NEXT_SLAVE(q)) != m->slaves);
418 	}
419 
420 	dev->mtu = new_mtu;
421 	return 0;
422 }
423 
424 static const struct net_device_ops teql_netdev_ops = {
425 	.ndo_open	= teql_master_open,
426 	.ndo_stop	= teql_master_close,
427 	.ndo_start_xmit	= teql_master_xmit,
428 	.ndo_get_stats	= teql_master_stats,
429 	.ndo_change_mtu	= teql_master_mtu,
430 };
431 
432 static __init void teql_master_setup(struct net_device *dev)
433 {
434 	struct teql_master *master = netdev_priv(dev);
435 	struct Qdisc_ops *ops = &master->qops;
436 
437 	master->dev	= dev;
438 	ops->priv_size  = sizeof(struct teql_sched_data);
439 
440 	ops->enqueue	=	teql_enqueue;
441 	ops->dequeue	=	teql_dequeue;
442 	ops->peek	=	teql_peek;
443 	ops->init	=	teql_qdisc_init;
444 	ops->reset	=	teql_reset;
445 	ops->destroy	=	teql_destroy;
446 	ops->owner	=	THIS_MODULE;
447 
448 	dev->netdev_ops =       &teql_netdev_ops;
449 	dev->type		= ARPHRD_VOID;
450 	dev->mtu		= 1500;
451 	dev->tx_queue_len	= 100;
452 	dev->flags		= IFF_NOARP;
453 	dev->hard_header_len	= LL_MAX_HEADER;
454 }
455 
456 static LIST_HEAD(master_dev_list);
457 static int max_equalizers = 1;
458 module_param(max_equalizers, int, 0);
459 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
460 
461 static int __init teql_init(void)
462 {
463 	int i;
464 	int err = -ENODEV;
465 
466 	for (i = 0; i < max_equalizers; i++) {
467 		struct net_device *dev;
468 		struct teql_master *master;
469 
470 		dev = alloc_netdev(sizeof(struct teql_master),
471 				  "teql%d", teql_master_setup);
472 		if (!dev) {
473 			err = -ENOMEM;
474 			break;
475 		}
476 
477 		if ((err = register_netdev(dev))) {
478 			free_netdev(dev);
479 			break;
480 		}
481 
482 		master = netdev_priv(dev);
483 
484 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
485 		err = register_qdisc(&master->qops);
486 
487 		if (err) {
488 			unregister_netdev(dev);
489 			free_netdev(dev);
490 			break;
491 		}
492 
493 		list_add_tail(&master->master_list, &master_dev_list);
494 	}
495 	return i ? 0 : err;
496 }
497 
498 static void __exit teql_exit(void)
499 {
500 	struct teql_master *master, *nxt;
501 
502 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
503 
504 		list_del(&master->master_list);
505 
506 		unregister_qdisc(&master->qops);
507 		unregister_netdev(master->dev);
508 		free_netdev(master->dev);
509 	}
510 }
511 
512 module_init(teql_init);
513 module_exit(teql_exit);
514 
515 MODULE_LICENSE("GPL");
516