xref: /openbmc/linux/net/sched/sch_teql.c (revision cd354f1a)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <asm/uaccess.h>
13 #include <asm/system.h>
14 #include <linux/bitops.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/mm.h>
19 #include <linux/socket.h>
20 #include <linux/sockios.h>
21 #include <linux/in.h>
22 #include <linux/errno.h>
23 #include <linux/interrupt.h>
24 #include <linux/if_arp.h>
25 #include <linux/if_ether.h>
26 #include <linux/inet.h>
27 #include <linux/netdevice.h>
28 #include <linux/etherdevice.h>
29 #include <linux/notifier.h>
30 #include <linux/init.h>
31 #include <net/ip.h>
32 #include <net/route.h>
33 #include <linux/skbuff.h>
34 #include <linux/moduleparam.h>
35 #include <net/sock.h>
36 #include <net/pkt_sched.h>
37 
38 /*
39    How to setup it.
40    ----------------
41 
42    After loading this module you will find a new device teqlN
43    and new qdisc with the same name. To join a slave to the equalizer
44    you should just set this qdisc on a device f.e.
45 
46    # tc qdisc add dev eth0 root teql0
47    # tc qdisc add dev eth1 root teql0
48 
49    That's all. Full PnP 8)
50 
51    Applicability.
52    --------------
53 
54    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
55       signal and generate EOI events. If you want to equalize virtual devices
56       like tunnels, use a normal eql device.
57    2. This device puts no limitations on physical slave characteristics
58       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
59       Certainly, large difference in link speeds will make the resulting
60       eqalized link unusable, because of huge packet reordering.
61       I estimate an upper useful difference as ~10 times.
62    3. If the slave requires address resolution, only protocols using
63       neighbour cache (IPv4/IPv6) will work over the equalized link.
64       Other protocols are still allowed to use the slave device directly,
65       which will not break load balancing, though native slave
66       traffic will have the highest priority.  */
67 
68 struct teql_master
69 {
70 	struct Qdisc_ops qops;
71 	struct net_device *dev;
72 	struct Qdisc *slaves;
73 	struct list_head master_list;
74 	struct net_device_stats stats;
75 };
76 
77 struct teql_sched_data
78 {
79 	struct Qdisc *next;
80 	struct teql_master *m;
81 	struct neighbour *ncache;
82 	struct sk_buff_head q;
83 };
84 
85 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
86 
87 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT|IFF_BROADCAST)
88 
89 /* "teql*" qdisc routines */
90 
91 static int
92 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
93 {
94 	struct net_device *dev = sch->dev;
95 	struct teql_sched_data *q = qdisc_priv(sch);
96 
97 	__skb_queue_tail(&q->q, skb);
98 	if (q->q.qlen <= dev->tx_queue_len) {
99 		sch->bstats.bytes += skb->len;
100 		sch->bstats.packets++;
101 		return 0;
102 	}
103 
104 	__skb_unlink(skb, &q->q);
105 	kfree_skb(skb);
106 	sch->qstats.drops++;
107 	return NET_XMIT_DROP;
108 }
109 
110 static int
111 teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
112 {
113 	struct teql_sched_data *q = qdisc_priv(sch);
114 
115 	__skb_queue_head(&q->q, skb);
116 	sch->qstats.requeues++;
117 	return 0;
118 }
119 
120 static struct sk_buff *
121 teql_dequeue(struct Qdisc* sch)
122 {
123 	struct teql_sched_data *dat = qdisc_priv(sch);
124 	struct sk_buff *skb;
125 
126 	skb = __skb_dequeue(&dat->q);
127 	if (skb == NULL) {
128 		struct net_device *m = dat->m->dev->qdisc->dev;
129 		if (m) {
130 			dat->m->slaves = sch;
131 			netif_wake_queue(m);
132 		}
133 	}
134 	sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen;
135 	return skb;
136 }
137 
138 static __inline__ void
139 teql_neigh_release(struct neighbour *n)
140 {
141 	if (n)
142 		neigh_release(n);
143 }
144 
145 static void
146 teql_reset(struct Qdisc* sch)
147 {
148 	struct teql_sched_data *dat = qdisc_priv(sch);
149 
150 	skb_queue_purge(&dat->q);
151 	sch->q.qlen = 0;
152 	teql_neigh_release(xchg(&dat->ncache, NULL));
153 }
154 
155 static void
156 teql_destroy(struct Qdisc* sch)
157 {
158 	struct Qdisc *q, *prev;
159 	struct teql_sched_data *dat = qdisc_priv(sch);
160 	struct teql_master *master = dat->m;
161 
162 	if ((prev = master->slaves) != NULL) {
163 		do {
164 			q = NEXT_SLAVE(prev);
165 			if (q == sch) {
166 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
167 				if (q == master->slaves) {
168 					master->slaves = NEXT_SLAVE(q);
169 					if (q == master->slaves) {
170 						master->slaves = NULL;
171 						spin_lock_bh(&master->dev->queue_lock);
172 						qdisc_reset(master->dev->qdisc);
173 						spin_unlock_bh(&master->dev->queue_lock);
174 					}
175 				}
176 				skb_queue_purge(&dat->q);
177 				teql_neigh_release(xchg(&dat->ncache, NULL));
178 				break;
179 			}
180 
181 		} while ((prev = q) != master->slaves);
182 	}
183 }
184 
185 static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt)
186 {
187 	struct net_device *dev = sch->dev;
188 	struct teql_master *m = (struct teql_master*)sch->ops;
189 	struct teql_sched_data *q = qdisc_priv(sch);
190 
191 	if (dev->hard_header_len > m->dev->hard_header_len)
192 		return -EINVAL;
193 
194 	if (m->dev == dev)
195 		return -ELOOP;
196 
197 	q->m = m;
198 
199 	skb_queue_head_init(&q->q);
200 
201 	if (m->slaves) {
202 		if (m->dev->flags & IFF_UP) {
203 			if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
204 			    || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
205 			    || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
206 			    || dev->mtu < m->dev->mtu)
207 				return -EINVAL;
208 		} else {
209 			if (!(dev->flags&IFF_POINTOPOINT))
210 				m->dev->flags &= ~IFF_POINTOPOINT;
211 			if (!(dev->flags&IFF_BROADCAST))
212 				m->dev->flags &= ~IFF_BROADCAST;
213 			if (!(dev->flags&IFF_MULTICAST))
214 				m->dev->flags &= ~IFF_MULTICAST;
215 			if (dev->mtu < m->dev->mtu)
216 				m->dev->mtu = dev->mtu;
217 		}
218 		q->next = NEXT_SLAVE(m->slaves);
219 		NEXT_SLAVE(m->slaves) = sch;
220 	} else {
221 		q->next = sch;
222 		m->slaves = sch;
223 		m->dev->mtu = dev->mtu;
224 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
225 	}
226 	return 0;
227 }
228 
229 /* "teql*" netdevice routines */
230 
231 static int
232 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
233 {
234 	struct teql_sched_data *q = qdisc_priv(dev->qdisc);
235 	struct neighbour *mn = skb->dst->neighbour;
236 	struct neighbour *n = q->ncache;
237 
238 	if (mn->tbl == NULL)
239 		return -EINVAL;
240 	if (n && n->tbl == mn->tbl &&
241 	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
242 		atomic_inc(&n->refcnt);
243 	} else {
244 		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
245 		if (IS_ERR(n))
246 			return PTR_ERR(n);
247 	}
248 	if (neigh_event_send(n, skb_res) == 0) {
249 		int err;
250 		read_lock(&n->lock);
251 		err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len);
252 		read_unlock(&n->lock);
253 		if (err < 0) {
254 			neigh_release(n);
255 			return -EINVAL;
256 		}
257 		teql_neigh_release(xchg(&q->ncache, n));
258 		return 0;
259 	}
260 	neigh_release(n);
261 	return (skb_res == NULL) ? -EAGAIN : 1;
262 }
263 
264 static __inline__ int
265 teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
266 {
267 	if (dev->hard_header == NULL ||
268 	    skb->dst == NULL ||
269 	    skb->dst->neighbour == NULL)
270 		return 0;
271 	return __teql_resolve(skb, skb_res, dev);
272 }
273 
274 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
275 {
276 	struct teql_master *master = netdev_priv(dev);
277 	struct Qdisc *start, *q;
278 	int busy;
279 	int nores;
280 	int len = skb->len;
281 	struct sk_buff *skb_res = NULL;
282 
283 	start = master->slaves;
284 
285 restart:
286 	nores = 0;
287 	busy = 0;
288 
289 	if ((q = start) == NULL)
290 		goto drop;
291 
292 	do {
293 		struct net_device *slave = q->dev;
294 
295 		if (slave->qdisc_sleeping != q)
296 			continue;
297 		if (netif_queue_stopped(slave) || ! netif_running(slave)) {
298 			busy = 1;
299 			continue;
300 		}
301 
302 		switch (teql_resolve(skb, skb_res, slave)) {
303 		case 0:
304 			if (netif_tx_trylock(slave)) {
305 				if (!netif_queue_stopped(slave) &&
306 				    slave->hard_start_xmit(skb, slave) == 0) {
307 					netif_tx_unlock(slave);
308 					master->slaves = NEXT_SLAVE(q);
309 					netif_wake_queue(dev);
310 					master->stats.tx_packets++;
311 					master->stats.tx_bytes += len;
312 					return 0;
313 				}
314 				netif_tx_unlock(slave);
315 			}
316 			if (netif_queue_stopped(dev))
317 				busy = 1;
318 			break;
319 		case 1:
320 			master->slaves = NEXT_SLAVE(q);
321 			return 0;
322 		default:
323 			nores = 1;
324 			break;
325 		}
326 		__skb_pull(skb, skb->nh.raw - skb->data);
327 	} while ((q = NEXT_SLAVE(q)) != start);
328 
329 	if (nores && skb_res == NULL) {
330 		skb_res = skb;
331 		goto restart;
332 	}
333 
334 	if (busy) {
335 		netif_stop_queue(dev);
336 		return 1;
337 	}
338 	master->stats.tx_errors++;
339 
340 drop:
341 	master->stats.tx_dropped++;
342 	dev_kfree_skb(skb);
343 	return 0;
344 }
345 
346 static int teql_master_open(struct net_device *dev)
347 {
348 	struct Qdisc * q;
349 	struct teql_master *m = netdev_priv(dev);
350 	int mtu = 0xFFFE;
351 	unsigned flags = IFF_NOARP|IFF_MULTICAST;
352 
353 	if (m->slaves == NULL)
354 		return -EUNATCH;
355 
356 	flags = FMASK;
357 
358 	q = m->slaves;
359 	do {
360 		struct net_device *slave = q->dev;
361 
362 		if (slave == NULL)
363 			return -EUNATCH;
364 
365 		if (slave->mtu < mtu)
366 			mtu = slave->mtu;
367 		if (slave->hard_header_len > LL_MAX_HEADER)
368 			return -EINVAL;
369 
370 		/* If all the slaves are BROADCAST, master is BROADCAST
371 		   If all the slaves are PtP, master is PtP
372 		   Otherwise, master is NBMA.
373 		 */
374 		if (!(slave->flags&IFF_POINTOPOINT))
375 			flags &= ~IFF_POINTOPOINT;
376 		if (!(slave->flags&IFF_BROADCAST))
377 			flags &= ~IFF_BROADCAST;
378 		if (!(slave->flags&IFF_MULTICAST))
379 			flags &= ~IFF_MULTICAST;
380 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
381 
382 	m->dev->mtu = mtu;
383 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
384 	netif_start_queue(m->dev);
385 	return 0;
386 }
387 
388 static int teql_master_close(struct net_device *dev)
389 {
390 	netif_stop_queue(dev);
391 	return 0;
392 }
393 
394 static struct net_device_stats *teql_master_stats(struct net_device *dev)
395 {
396 	struct teql_master *m = netdev_priv(dev);
397 	return &m->stats;
398 }
399 
400 static int teql_master_mtu(struct net_device *dev, int new_mtu)
401 {
402 	struct teql_master *m = netdev_priv(dev);
403 	struct Qdisc *q;
404 
405 	if (new_mtu < 68)
406 		return -EINVAL;
407 
408 	q = m->slaves;
409 	if (q) {
410 		do {
411 			if (new_mtu > q->dev->mtu)
412 				return -EINVAL;
413 		} while ((q=NEXT_SLAVE(q)) != m->slaves);
414 	}
415 
416 	dev->mtu = new_mtu;
417 	return 0;
418 }
419 
420 static __init void teql_master_setup(struct net_device *dev)
421 {
422 	struct teql_master *master = netdev_priv(dev);
423 	struct Qdisc_ops *ops = &master->qops;
424 
425 	master->dev	= dev;
426 	ops->priv_size  = sizeof(struct teql_sched_data);
427 
428 	ops->enqueue	=	teql_enqueue;
429 	ops->dequeue	=	teql_dequeue;
430 	ops->requeue	=	teql_requeue;
431 	ops->init	=	teql_qdisc_init;
432 	ops->reset	=	teql_reset;
433 	ops->destroy	=	teql_destroy;
434 	ops->owner	=	THIS_MODULE;
435 
436 	dev->open		= teql_master_open;
437 	dev->hard_start_xmit	= teql_master_xmit;
438 	dev->stop		= teql_master_close;
439 	dev->get_stats		= teql_master_stats;
440 	dev->change_mtu		= teql_master_mtu;
441 	dev->type		= ARPHRD_VOID;
442 	dev->mtu		= 1500;
443 	dev->tx_queue_len	= 100;
444 	dev->flags		= IFF_NOARP;
445 	dev->hard_header_len	= LL_MAX_HEADER;
446 	SET_MODULE_OWNER(dev);
447 }
448 
449 static LIST_HEAD(master_dev_list);
450 static int max_equalizers = 1;
451 module_param(max_equalizers, int, 0);
452 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
453 
454 static int __init teql_init(void)
455 {
456 	int i;
457 	int err = -ENODEV;
458 
459 	for (i = 0; i < max_equalizers; i++) {
460 		struct net_device *dev;
461 		struct teql_master *master;
462 
463 		dev = alloc_netdev(sizeof(struct teql_master),
464 				  "teql%d", teql_master_setup);
465 		if (!dev) {
466 			err = -ENOMEM;
467 			break;
468 		}
469 
470 		if ((err = register_netdev(dev))) {
471 			free_netdev(dev);
472 			break;
473 		}
474 
475 		master = netdev_priv(dev);
476 
477 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
478 		err = register_qdisc(&master->qops);
479 
480 		if (err) {
481 			unregister_netdev(dev);
482 			free_netdev(dev);
483 			break;
484 		}
485 
486 		list_add_tail(&master->master_list, &master_dev_list);
487 	}
488 	return i ? 0 : err;
489 }
490 
491 static void __exit teql_exit(void)
492 {
493 	struct teql_master *master, *nxt;
494 
495 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
496 
497 		list_del(&master->master_list);
498 
499 		unregister_qdisc(&master->qops);
500 		unregister_netdev(master->dev);
501 		free_netdev(master->dev);
502 	}
503 }
504 
505 module_init(teql_init);
506 module_exit(teql_exit);
507 
508 MODULE_LICENSE("GPL");
509