xref: /openbmc/linux/net/sched/sch_teql.c (revision c21b37f6)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/string.h>
15 #include <linux/errno.h>
16 #include <linux/if_arp.h>
17 #include <linux/netdevice.h>
18 #include <linux/init.h>
19 #include <linux/skbuff.h>
20 #include <linux/moduleparam.h>
21 #include <net/dst.h>
22 #include <net/neighbour.h>
23 #include <net/pkt_sched.h>
24 
25 /*
26    How to setup it.
27    ----------------
28 
29    After loading this module you will find a new device teqlN
30    and new qdisc with the same name. To join a slave to the equalizer
31    you should just set this qdisc on a device f.e.
32 
33    # tc qdisc add dev eth0 root teql0
34    # tc qdisc add dev eth1 root teql0
35 
36    That's all. Full PnP 8)
37 
38    Applicability.
39    --------------
40 
41    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42       signal and generate EOI events. If you want to equalize virtual devices
43       like tunnels, use a normal eql device.
44    2. This device puts no limitations on physical slave characteristics
45       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46       Certainly, large difference in link speeds will make the resulting
47       eqalized link unusable, because of huge packet reordering.
48       I estimate an upper useful difference as ~10 times.
49    3. If the slave requires address resolution, only protocols using
50       neighbour cache (IPv4/IPv6) will work over the equalized link.
51       Other protocols are still allowed to use the slave device directly,
52       which will not break load balancing, though native slave
53       traffic will have the highest priority.  */
54 
55 struct teql_master
56 {
57 	struct Qdisc_ops qops;
58 	struct net_device *dev;
59 	struct Qdisc *slaves;
60 	struct list_head master_list;
61 	struct net_device_stats stats;
62 };
63 
64 struct teql_sched_data
65 {
66 	struct Qdisc *next;
67 	struct teql_master *m;
68 	struct neighbour *ncache;
69 	struct sk_buff_head q;
70 };
71 
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73 
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT|IFF_BROADCAST)
75 
76 /* "teql*" qdisc routines */
77 
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81 	struct net_device *dev = sch->dev;
82 	struct teql_sched_data *q = qdisc_priv(sch);
83 
84 	if (q->q.qlen < dev->tx_queue_len) {
85 		__skb_queue_tail(&q->q, skb);
86 		sch->bstats.bytes += skb->len;
87 		sch->bstats.packets++;
88 		return 0;
89 	}
90 
91 	kfree_skb(skb);
92 	sch->qstats.drops++;
93 	return NET_XMIT_DROP;
94 }
95 
96 static int
97 teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
98 {
99 	struct teql_sched_data *q = qdisc_priv(sch);
100 
101 	__skb_queue_head(&q->q, skb);
102 	sch->qstats.requeues++;
103 	return 0;
104 }
105 
106 static struct sk_buff *
107 teql_dequeue(struct Qdisc* sch)
108 {
109 	struct teql_sched_data *dat = qdisc_priv(sch);
110 	struct sk_buff *skb;
111 
112 	skb = __skb_dequeue(&dat->q);
113 	if (skb == NULL) {
114 		struct net_device *m = dat->m->dev->qdisc->dev;
115 		if (m) {
116 			dat->m->slaves = sch;
117 			netif_wake_queue(m);
118 		}
119 	}
120 	sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen;
121 	return skb;
122 }
123 
124 static __inline__ void
125 teql_neigh_release(struct neighbour *n)
126 {
127 	if (n)
128 		neigh_release(n);
129 }
130 
131 static void
132 teql_reset(struct Qdisc* sch)
133 {
134 	struct teql_sched_data *dat = qdisc_priv(sch);
135 
136 	skb_queue_purge(&dat->q);
137 	sch->q.qlen = 0;
138 	teql_neigh_release(xchg(&dat->ncache, NULL));
139 }
140 
141 static void
142 teql_destroy(struct Qdisc* sch)
143 {
144 	struct Qdisc *q, *prev;
145 	struct teql_sched_data *dat = qdisc_priv(sch);
146 	struct teql_master *master = dat->m;
147 
148 	if ((prev = master->slaves) != NULL) {
149 		do {
150 			q = NEXT_SLAVE(prev);
151 			if (q == sch) {
152 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
153 				if (q == master->slaves) {
154 					master->slaves = NEXT_SLAVE(q);
155 					if (q == master->slaves) {
156 						master->slaves = NULL;
157 						spin_lock_bh(&master->dev->queue_lock);
158 						qdisc_reset(master->dev->qdisc);
159 						spin_unlock_bh(&master->dev->queue_lock);
160 					}
161 				}
162 				skb_queue_purge(&dat->q);
163 				teql_neigh_release(xchg(&dat->ncache, NULL));
164 				break;
165 			}
166 
167 		} while ((prev = q) != master->slaves);
168 	}
169 }
170 
171 static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt)
172 {
173 	struct net_device *dev = sch->dev;
174 	struct teql_master *m = (struct teql_master*)sch->ops;
175 	struct teql_sched_data *q = qdisc_priv(sch);
176 
177 	if (dev->hard_header_len > m->dev->hard_header_len)
178 		return -EINVAL;
179 
180 	if (m->dev == dev)
181 		return -ELOOP;
182 
183 	q->m = m;
184 
185 	skb_queue_head_init(&q->q);
186 
187 	if (m->slaves) {
188 		if (m->dev->flags & IFF_UP) {
189 			if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
190 			    || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
191 			    || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
192 			    || dev->mtu < m->dev->mtu)
193 				return -EINVAL;
194 		} else {
195 			if (!(dev->flags&IFF_POINTOPOINT))
196 				m->dev->flags &= ~IFF_POINTOPOINT;
197 			if (!(dev->flags&IFF_BROADCAST))
198 				m->dev->flags &= ~IFF_BROADCAST;
199 			if (!(dev->flags&IFF_MULTICAST))
200 				m->dev->flags &= ~IFF_MULTICAST;
201 			if (dev->mtu < m->dev->mtu)
202 				m->dev->mtu = dev->mtu;
203 		}
204 		q->next = NEXT_SLAVE(m->slaves);
205 		NEXT_SLAVE(m->slaves) = sch;
206 	} else {
207 		q->next = sch;
208 		m->slaves = sch;
209 		m->dev->mtu = dev->mtu;
210 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
211 	}
212 	return 0;
213 }
214 
215 
216 static int
217 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
218 {
219 	struct teql_sched_data *q = qdisc_priv(dev->qdisc);
220 	struct neighbour *mn = skb->dst->neighbour;
221 	struct neighbour *n = q->ncache;
222 
223 	if (mn->tbl == NULL)
224 		return -EINVAL;
225 	if (n && n->tbl == mn->tbl &&
226 	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
227 		atomic_inc(&n->refcnt);
228 	} else {
229 		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
230 		if (IS_ERR(n))
231 			return PTR_ERR(n);
232 	}
233 	if (neigh_event_send(n, skb_res) == 0) {
234 		int err;
235 		read_lock(&n->lock);
236 		err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len);
237 		read_unlock(&n->lock);
238 		if (err < 0) {
239 			neigh_release(n);
240 			return -EINVAL;
241 		}
242 		teql_neigh_release(xchg(&q->ncache, n));
243 		return 0;
244 	}
245 	neigh_release(n);
246 	return (skb_res == NULL) ? -EAGAIN : 1;
247 }
248 
249 static __inline__ int
250 teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
251 {
252 	if (dev->hard_header == NULL ||
253 	    skb->dst == NULL ||
254 	    skb->dst->neighbour == NULL)
255 		return 0;
256 	return __teql_resolve(skb, skb_res, dev);
257 }
258 
259 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
260 {
261 	struct teql_master *master = netdev_priv(dev);
262 	struct Qdisc *start, *q;
263 	int busy;
264 	int nores;
265 	int len = skb->len;
266 	int subq = skb->queue_mapping;
267 	struct sk_buff *skb_res = NULL;
268 
269 	start = master->slaves;
270 
271 restart:
272 	nores = 0;
273 	busy = 0;
274 
275 	if ((q = start) == NULL)
276 		goto drop;
277 
278 	do {
279 		struct net_device *slave = q->dev;
280 
281 		if (slave->qdisc_sleeping != q)
282 			continue;
283 		if (netif_queue_stopped(slave) ||
284 		    netif_subqueue_stopped(slave, subq) ||
285 		    !netif_running(slave)) {
286 			busy = 1;
287 			continue;
288 		}
289 
290 		switch (teql_resolve(skb, skb_res, slave)) {
291 		case 0:
292 			if (netif_tx_trylock(slave)) {
293 				if (!netif_queue_stopped(slave) &&
294 				    !netif_subqueue_stopped(slave, subq) &&
295 				    slave->hard_start_xmit(skb, slave) == 0) {
296 					netif_tx_unlock(slave);
297 					master->slaves = NEXT_SLAVE(q);
298 					netif_wake_queue(dev);
299 					master->stats.tx_packets++;
300 					master->stats.tx_bytes += len;
301 					return 0;
302 				}
303 				netif_tx_unlock(slave);
304 			}
305 			if (netif_queue_stopped(dev))
306 				busy = 1;
307 			break;
308 		case 1:
309 			master->slaves = NEXT_SLAVE(q);
310 			return 0;
311 		default:
312 			nores = 1;
313 			break;
314 		}
315 		__skb_pull(skb, skb_network_offset(skb));
316 	} while ((q = NEXT_SLAVE(q)) != start);
317 
318 	if (nores && skb_res == NULL) {
319 		skb_res = skb;
320 		goto restart;
321 	}
322 
323 	if (busy) {
324 		netif_stop_queue(dev);
325 		return 1;
326 	}
327 	master->stats.tx_errors++;
328 
329 drop:
330 	master->stats.tx_dropped++;
331 	dev_kfree_skb(skb);
332 	return 0;
333 }
334 
335 static int teql_master_open(struct net_device *dev)
336 {
337 	struct Qdisc * q;
338 	struct teql_master *m = netdev_priv(dev);
339 	int mtu = 0xFFFE;
340 	unsigned flags = IFF_NOARP|IFF_MULTICAST;
341 
342 	if (m->slaves == NULL)
343 		return -EUNATCH;
344 
345 	flags = FMASK;
346 
347 	q = m->slaves;
348 	do {
349 		struct net_device *slave = q->dev;
350 
351 		if (slave == NULL)
352 			return -EUNATCH;
353 
354 		if (slave->mtu < mtu)
355 			mtu = slave->mtu;
356 		if (slave->hard_header_len > LL_MAX_HEADER)
357 			return -EINVAL;
358 
359 		/* If all the slaves are BROADCAST, master is BROADCAST
360 		   If all the slaves are PtP, master is PtP
361 		   Otherwise, master is NBMA.
362 		 */
363 		if (!(slave->flags&IFF_POINTOPOINT))
364 			flags &= ~IFF_POINTOPOINT;
365 		if (!(slave->flags&IFF_BROADCAST))
366 			flags &= ~IFF_BROADCAST;
367 		if (!(slave->flags&IFF_MULTICAST))
368 			flags &= ~IFF_MULTICAST;
369 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
370 
371 	m->dev->mtu = mtu;
372 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
373 	netif_start_queue(m->dev);
374 	return 0;
375 }
376 
377 static int teql_master_close(struct net_device *dev)
378 {
379 	netif_stop_queue(dev);
380 	return 0;
381 }
382 
383 static struct net_device_stats *teql_master_stats(struct net_device *dev)
384 {
385 	struct teql_master *m = netdev_priv(dev);
386 	return &m->stats;
387 }
388 
389 static int teql_master_mtu(struct net_device *dev, int new_mtu)
390 {
391 	struct teql_master *m = netdev_priv(dev);
392 	struct Qdisc *q;
393 
394 	if (new_mtu < 68)
395 		return -EINVAL;
396 
397 	q = m->slaves;
398 	if (q) {
399 		do {
400 			if (new_mtu > q->dev->mtu)
401 				return -EINVAL;
402 		} while ((q=NEXT_SLAVE(q)) != m->slaves);
403 	}
404 
405 	dev->mtu = new_mtu;
406 	return 0;
407 }
408 
409 static __init void teql_master_setup(struct net_device *dev)
410 {
411 	struct teql_master *master = netdev_priv(dev);
412 	struct Qdisc_ops *ops = &master->qops;
413 
414 	master->dev	= dev;
415 	ops->priv_size  = sizeof(struct teql_sched_data);
416 
417 	ops->enqueue	=	teql_enqueue;
418 	ops->dequeue	=	teql_dequeue;
419 	ops->requeue	=	teql_requeue;
420 	ops->init	=	teql_qdisc_init;
421 	ops->reset	=	teql_reset;
422 	ops->destroy	=	teql_destroy;
423 	ops->owner	=	THIS_MODULE;
424 
425 	dev->open		= teql_master_open;
426 	dev->hard_start_xmit	= teql_master_xmit;
427 	dev->stop		= teql_master_close;
428 	dev->get_stats		= teql_master_stats;
429 	dev->change_mtu		= teql_master_mtu;
430 	dev->type		= ARPHRD_VOID;
431 	dev->mtu		= 1500;
432 	dev->tx_queue_len	= 100;
433 	dev->flags		= IFF_NOARP;
434 	dev->hard_header_len	= LL_MAX_HEADER;
435 	SET_MODULE_OWNER(dev);
436 }
437 
438 static LIST_HEAD(master_dev_list);
439 static int max_equalizers = 1;
440 module_param(max_equalizers, int, 0);
441 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
442 
443 static int __init teql_init(void)
444 {
445 	int i;
446 	int err = -ENODEV;
447 
448 	for (i = 0; i < max_equalizers; i++) {
449 		struct net_device *dev;
450 		struct teql_master *master;
451 
452 		dev = alloc_netdev(sizeof(struct teql_master),
453 				  "teql%d", teql_master_setup);
454 		if (!dev) {
455 			err = -ENOMEM;
456 			break;
457 		}
458 
459 		if ((err = register_netdev(dev))) {
460 			free_netdev(dev);
461 			break;
462 		}
463 
464 		master = netdev_priv(dev);
465 
466 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
467 		err = register_qdisc(&master->qops);
468 
469 		if (err) {
470 			unregister_netdev(dev);
471 			free_netdev(dev);
472 			break;
473 		}
474 
475 		list_add_tail(&master->master_list, &master_dev_list);
476 	}
477 	return i ? 0 : err;
478 }
479 
480 static void __exit teql_exit(void)
481 {
482 	struct teql_master *master, *nxt;
483 
484 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
485 
486 		list_del(&master->master_list);
487 
488 		unregister_qdisc(&master->qops);
489 		unregister_netdev(master->dev);
490 		free_netdev(master->dev);
491 	}
492 }
493 
494 module_init(teql_init);
495 module_exit(teql_exit);
496 
497 MODULE_LICENSE("GPL");
498