xref: /openbmc/linux/net/sched/sch_teql.c (revision 93dc544c)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/string.h>
15 #include <linux/errno.h>
16 #include <linux/if_arp.h>
17 #include <linux/netdevice.h>
18 #include <linux/init.h>
19 #include <linux/skbuff.h>
20 #include <linux/moduleparam.h>
21 #include <net/dst.h>
22 #include <net/neighbour.h>
23 #include <net/pkt_sched.h>
24 
25 /*
26    How to setup it.
27    ----------------
28 
29    After loading this module you will find a new device teqlN
30    and new qdisc with the same name. To join a slave to the equalizer
31    you should just set this qdisc on a device f.e.
32 
33    # tc qdisc add dev eth0 root teql0
34    # tc qdisc add dev eth1 root teql0
35 
36    That's all. Full PnP 8)
37 
38    Applicability.
39    --------------
40 
41    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42       signal and generate EOI events. If you want to equalize virtual devices
43       like tunnels, use a normal eql device.
44    2. This device puts no limitations on physical slave characteristics
45       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46       Certainly, large difference in link speeds will make the resulting
47       eqalized link unusable, because of huge packet reordering.
48       I estimate an upper useful difference as ~10 times.
49    3. If the slave requires address resolution, only protocols using
50       neighbour cache (IPv4/IPv6) will work over the equalized link.
51       Other protocols are still allowed to use the slave device directly,
52       which will not break load balancing, though native slave
53       traffic will have the highest priority.  */
54 
55 struct teql_master
56 {
57 	struct Qdisc_ops qops;
58 	struct net_device *dev;
59 	struct Qdisc *slaves;
60 	struct list_head master_list;
61 	struct net_device_stats stats;
62 };
63 
64 struct teql_sched_data
65 {
66 	struct Qdisc *next;
67 	struct teql_master *m;
68 	struct neighbour *ncache;
69 	struct sk_buff_head q;
70 };
71 
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73 
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
75 
76 /* "teql*" qdisc routines */
77 
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81 	struct net_device *dev = qdisc_dev(sch);
82 	struct teql_sched_data *q = qdisc_priv(sch);
83 
84 	if (q->q.qlen < dev->tx_queue_len) {
85 		__skb_queue_tail(&q->q, skb);
86 		sch->bstats.bytes += qdisc_pkt_len(skb);
87 		sch->bstats.packets++;
88 		return 0;
89 	}
90 
91 	kfree_skb(skb);
92 	sch->qstats.drops++;
93 	return NET_XMIT_DROP;
94 }
95 
96 static int
97 teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
98 {
99 	struct teql_sched_data *q = qdisc_priv(sch);
100 
101 	__skb_queue_head(&q->q, skb);
102 	sch->qstats.requeues++;
103 	return 0;
104 }
105 
106 static struct sk_buff *
107 teql_dequeue(struct Qdisc* sch)
108 {
109 	struct teql_sched_data *dat = qdisc_priv(sch);
110 	struct netdev_queue *dat_queue;
111 	struct sk_buff *skb;
112 
113 	skb = __skb_dequeue(&dat->q);
114 	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
115 	if (skb == NULL) {
116 		struct net_device *m = qdisc_dev(dat_queue->qdisc);
117 		if (m) {
118 			dat->m->slaves = sch;
119 			netif_wake_queue(m);
120 		}
121 	}
122 	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
123 	return skb;
124 }
125 
126 static __inline__ void
127 teql_neigh_release(struct neighbour *n)
128 {
129 	if (n)
130 		neigh_release(n);
131 }
132 
133 static void
134 teql_reset(struct Qdisc* sch)
135 {
136 	struct teql_sched_data *dat = qdisc_priv(sch);
137 
138 	skb_queue_purge(&dat->q);
139 	sch->q.qlen = 0;
140 	teql_neigh_release(xchg(&dat->ncache, NULL));
141 }
142 
143 static void
144 teql_destroy(struct Qdisc* sch)
145 {
146 	struct Qdisc *q, *prev;
147 	struct teql_sched_data *dat = qdisc_priv(sch);
148 	struct teql_master *master = dat->m;
149 
150 	if ((prev = master->slaves) != NULL) {
151 		do {
152 			q = NEXT_SLAVE(prev);
153 			if (q == sch) {
154 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
155 				if (q == master->slaves) {
156 					master->slaves = NEXT_SLAVE(q);
157 					if (q == master->slaves) {
158 						struct netdev_queue *txq;
159 						spinlock_t *root_lock;
160 
161 						txq = netdev_get_tx_queue(master->dev, 0);
162 						master->slaves = NULL;
163 
164 						root_lock = qdisc_root_lock(txq->qdisc);
165 						spin_lock_bh(root_lock);
166 						qdisc_reset(txq->qdisc);
167 						spin_unlock_bh(root_lock);
168 					}
169 				}
170 				skb_queue_purge(&dat->q);
171 				teql_neigh_release(xchg(&dat->ncache, NULL));
172 				break;
173 			}
174 
175 		} while ((prev = q) != master->slaves);
176 	}
177 }
178 
179 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
180 {
181 	struct net_device *dev = qdisc_dev(sch);
182 	struct teql_master *m = (struct teql_master*)sch->ops;
183 	struct teql_sched_data *q = qdisc_priv(sch);
184 
185 	if (dev->hard_header_len > m->dev->hard_header_len)
186 		return -EINVAL;
187 
188 	if (m->dev == dev)
189 		return -ELOOP;
190 
191 	q->m = m;
192 
193 	skb_queue_head_init(&q->q);
194 
195 	if (m->slaves) {
196 		if (m->dev->flags & IFF_UP) {
197 			if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
198 			    || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
199 			    || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
200 			    || dev->mtu < m->dev->mtu)
201 				return -EINVAL;
202 		} else {
203 			if (!(dev->flags&IFF_POINTOPOINT))
204 				m->dev->flags &= ~IFF_POINTOPOINT;
205 			if (!(dev->flags&IFF_BROADCAST))
206 				m->dev->flags &= ~IFF_BROADCAST;
207 			if (!(dev->flags&IFF_MULTICAST))
208 				m->dev->flags &= ~IFF_MULTICAST;
209 			if (dev->mtu < m->dev->mtu)
210 				m->dev->mtu = dev->mtu;
211 		}
212 		q->next = NEXT_SLAVE(m->slaves);
213 		NEXT_SLAVE(m->slaves) = sch;
214 	} else {
215 		q->next = sch;
216 		m->slaves = sch;
217 		m->dev->mtu = dev->mtu;
218 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
219 	}
220 	return 0;
221 }
222 
223 
224 static int
225 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
226 {
227 	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
228 	struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
229 	struct neighbour *mn = skb->dst->neighbour;
230 	struct neighbour *n = q->ncache;
231 
232 	if (mn->tbl == NULL)
233 		return -EINVAL;
234 	if (n && n->tbl == mn->tbl &&
235 	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
236 		atomic_inc(&n->refcnt);
237 	} else {
238 		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
239 		if (IS_ERR(n))
240 			return PTR_ERR(n);
241 	}
242 	if (neigh_event_send(n, skb_res) == 0) {
243 		int err;
244 
245 		read_lock(&n->lock);
246 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
247 				      n->ha, NULL, skb->len);
248 		read_unlock(&n->lock);
249 
250 		if (err < 0) {
251 			neigh_release(n);
252 			return -EINVAL;
253 		}
254 		teql_neigh_release(xchg(&q->ncache, n));
255 		return 0;
256 	}
257 	neigh_release(n);
258 	return (skb_res == NULL) ? -EAGAIN : 1;
259 }
260 
261 static inline int teql_resolve(struct sk_buff *skb,
262 			       struct sk_buff *skb_res, struct net_device *dev)
263 {
264 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
265 	if (txq->qdisc == &noop_qdisc)
266 		return -ENODEV;
267 
268 	if (dev->header_ops == NULL ||
269 	    skb->dst == NULL ||
270 	    skb->dst->neighbour == NULL)
271 		return 0;
272 	return __teql_resolve(skb, skb_res, dev);
273 }
274 
275 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
276 {
277 	struct teql_master *master = netdev_priv(dev);
278 	struct Qdisc *start, *q;
279 	int busy;
280 	int nores;
281 	int subq = skb_get_queue_mapping(skb);
282 	struct sk_buff *skb_res = NULL;
283 
284 	start = master->slaves;
285 
286 restart:
287 	nores = 0;
288 	busy = 0;
289 
290 	if ((q = start) == NULL)
291 		goto drop;
292 
293 	do {
294 		struct net_device *slave = qdisc_dev(q);
295 		struct netdev_queue *slave_txq;
296 
297 		slave_txq = netdev_get_tx_queue(slave, 0);
298 		if (slave_txq->qdisc_sleeping != q)
299 			continue;
300 		if (__netif_subqueue_stopped(slave, subq) ||
301 		    !netif_running(slave)) {
302 			busy = 1;
303 			continue;
304 		}
305 
306 		switch (teql_resolve(skb, skb_res, slave)) {
307 		case 0:
308 			if (netif_tx_trylock(slave)) {
309 				if (!__netif_subqueue_stopped(slave, subq) &&
310 				    slave->hard_start_xmit(skb, slave) == 0) {
311 					netif_tx_unlock(slave);
312 					master->slaves = NEXT_SLAVE(q);
313 					netif_wake_queue(dev);
314 					master->stats.tx_packets++;
315 					master->stats.tx_bytes +=
316 						qdisc_pkt_len(skb);
317 					return 0;
318 				}
319 				netif_tx_unlock(slave);
320 			}
321 			if (netif_queue_stopped(dev))
322 				busy = 1;
323 			break;
324 		case 1:
325 			master->slaves = NEXT_SLAVE(q);
326 			return 0;
327 		default:
328 			nores = 1;
329 			break;
330 		}
331 		__skb_pull(skb, skb_network_offset(skb));
332 	} while ((q = NEXT_SLAVE(q)) != start);
333 
334 	if (nores && skb_res == NULL) {
335 		skb_res = skb;
336 		goto restart;
337 	}
338 
339 	if (busy) {
340 		netif_stop_queue(dev);
341 		return 1;
342 	}
343 	master->stats.tx_errors++;
344 
345 drop:
346 	master->stats.tx_dropped++;
347 	dev_kfree_skb(skb);
348 	return 0;
349 }
350 
351 static int teql_master_open(struct net_device *dev)
352 {
353 	struct Qdisc * q;
354 	struct teql_master *m = netdev_priv(dev);
355 	int mtu = 0xFFFE;
356 	unsigned flags = IFF_NOARP|IFF_MULTICAST;
357 
358 	if (m->slaves == NULL)
359 		return -EUNATCH;
360 
361 	flags = FMASK;
362 
363 	q = m->slaves;
364 	do {
365 		struct net_device *slave = qdisc_dev(q);
366 
367 		if (slave == NULL)
368 			return -EUNATCH;
369 
370 		if (slave->mtu < mtu)
371 			mtu = slave->mtu;
372 		if (slave->hard_header_len > LL_MAX_HEADER)
373 			return -EINVAL;
374 
375 		/* If all the slaves are BROADCAST, master is BROADCAST
376 		   If all the slaves are PtP, master is PtP
377 		   Otherwise, master is NBMA.
378 		 */
379 		if (!(slave->flags&IFF_POINTOPOINT))
380 			flags &= ~IFF_POINTOPOINT;
381 		if (!(slave->flags&IFF_BROADCAST))
382 			flags &= ~IFF_BROADCAST;
383 		if (!(slave->flags&IFF_MULTICAST))
384 			flags &= ~IFF_MULTICAST;
385 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
386 
387 	m->dev->mtu = mtu;
388 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
389 	netif_start_queue(m->dev);
390 	return 0;
391 }
392 
393 static int teql_master_close(struct net_device *dev)
394 {
395 	netif_stop_queue(dev);
396 	return 0;
397 }
398 
399 static struct net_device_stats *teql_master_stats(struct net_device *dev)
400 {
401 	struct teql_master *m = netdev_priv(dev);
402 	return &m->stats;
403 }
404 
405 static int teql_master_mtu(struct net_device *dev, int new_mtu)
406 {
407 	struct teql_master *m = netdev_priv(dev);
408 	struct Qdisc *q;
409 
410 	if (new_mtu < 68)
411 		return -EINVAL;
412 
413 	q = m->slaves;
414 	if (q) {
415 		do {
416 			if (new_mtu > qdisc_dev(q)->mtu)
417 				return -EINVAL;
418 		} while ((q=NEXT_SLAVE(q)) != m->slaves);
419 	}
420 
421 	dev->mtu = new_mtu;
422 	return 0;
423 }
424 
425 static __init void teql_master_setup(struct net_device *dev)
426 {
427 	struct teql_master *master = netdev_priv(dev);
428 	struct Qdisc_ops *ops = &master->qops;
429 
430 	master->dev	= dev;
431 	ops->priv_size  = sizeof(struct teql_sched_data);
432 
433 	ops->enqueue	=	teql_enqueue;
434 	ops->dequeue	=	teql_dequeue;
435 	ops->requeue	=	teql_requeue;
436 	ops->init	=	teql_qdisc_init;
437 	ops->reset	=	teql_reset;
438 	ops->destroy	=	teql_destroy;
439 	ops->owner	=	THIS_MODULE;
440 
441 	dev->open		= teql_master_open;
442 	dev->hard_start_xmit	= teql_master_xmit;
443 	dev->stop		= teql_master_close;
444 	dev->get_stats		= teql_master_stats;
445 	dev->change_mtu		= teql_master_mtu;
446 	dev->type		= ARPHRD_VOID;
447 	dev->mtu		= 1500;
448 	dev->tx_queue_len	= 100;
449 	dev->flags		= IFF_NOARP;
450 	dev->hard_header_len	= LL_MAX_HEADER;
451 }
452 
453 static LIST_HEAD(master_dev_list);
454 static int max_equalizers = 1;
455 module_param(max_equalizers, int, 0);
456 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
457 
458 static int __init teql_init(void)
459 {
460 	int i;
461 	int err = -ENODEV;
462 
463 	for (i = 0; i < max_equalizers; i++) {
464 		struct net_device *dev;
465 		struct teql_master *master;
466 
467 		dev = alloc_netdev(sizeof(struct teql_master),
468 				  "teql%d", teql_master_setup);
469 		if (!dev) {
470 			err = -ENOMEM;
471 			break;
472 		}
473 
474 		if ((err = register_netdev(dev))) {
475 			free_netdev(dev);
476 			break;
477 		}
478 
479 		master = netdev_priv(dev);
480 
481 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
482 		err = register_qdisc(&master->qops);
483 
484 		if (err) {
485 			unregister_netdev(dev);
486 			free_netdev(dev);
487 			break;
488 		}
489 
490 		list_add_tail(&master->master_list, &master_dev_list);
491 	}
492 	return i ? 0 : err;
493 }
494 
495 static void __exit teql_exit(void)
496 {
497 	struct teql_master *master, *nxt;
498 
499 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
500 
501 		list_del(&master->master_list);
502 
503 		unregister_qdisc(&master->qops);
504 		unregister_netdev(master->dev);
505 		free_netdev(master->dev);
506 	}
507 }
508 
509 module_init(teql_init);
510 module_exit(teql_exit);
511 
512 MODULE_LICENSE("GPL");
513