xref: /openbmc/linux/net/sched/sch_teql.c (revision 87c2ce3b)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <asm/uaccess.h>
13 #include <asm/system.h>
14 #include <linux/bitops.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/sched.h>
18 #include <linux/string.h>
19 #include <linux/mm.h>
20 #include <linux/socket.h>
21 #include <linux/sockios.h>
22 #include <linux/in.h>
23 #include <linux/errno.h>
24 #include <linux/interrupt.h>
25 #include <linux/if_arp.h>
26 #include <linux/if_ether.h>
27 #include <linux/inet.h>
28 #include <linux/netdevice.h>
29 #include <linux/etherdevice.h>
30 #include <linux/notifier.h>
31 #include <linux/init.h>
32 #include <net/ip.h>
33 #include <net/route.h>
34 #include <linux/skbuff.h>
35 #include <linux/moduleparam.h>
36 #include <net/sock.h>
37 #include <net/pkt_sched.h>
38 
39 /*
40    How to setup it.
41    ----------------
42 
43    After loading this module you will find a new device teqlN
44    and new qdisc with the same name. To join a slave to the equalizer
45    you should just set this qdisc on a device f.e.
46 
47    # tc qdisc add dev eth0 root teql0
48    # tc qdisc add dev eth1 root teql0
49 
50    That's all. Full PnP 8)
51 
52    Applicability.
53    --------------
54 
55    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
56       signal and generate EOI events. If you want to equalize virtual devices
57       like tunnels, use a normal eql device.
58    2. This device puts no limitations on physical slave characteristics
59       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
60       Certainly, large difference in link speeds will make the resulting
61       eqalized link unusable, because of huge packet reordering.
62       I estimate an upper useful difference as ~10 times.
63    3. If the slave requires address resolution, only protocols using
64       neighbour cache (IPv4/IPv6) will work over the equalized link.
65       Other protocols are still allowed to use the slave device directly,
66       which will not break load balancing, though native slave
67       traffic will have the highest priority.  */
68 
69 struct teql_master
70 {
71 	struct Qdisc_ops qops;
72 	struct net_device *dev;
73 	struct Qdisc *slaves;
74 	struct list_head master_list;
75 	struct net_device_stats stats;
76 };
77 
78 struct teql_sched_data
79 {
80 	struct Qdisc *next;
81 	struct teql_master *m;
82 	struct neighbour *ncache;
83 	struct sk_buff_head q;
84 };
85 
86 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
87 
88 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT|IFF_BROADCAST)
89 
90 /* "teql*" qdisc routines */
91 
92 static int
93 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
94 {
95 	struct net_device *dev = sch->dev;
96 	struct teql_sched_data *q = qdisc_priv(sch);
97 
98 	__skb_queue_tail(&q->q, skb);
99 	if (q->q.qlen <= dev->tx_queue_len) {
100 		sch->bstats.bytes += skb->len;
101 		sch->bstats.packets++;
102 		return 0;
103 	}
104 
105 	__skb_unlink(skb, &q->q);
106 	kfree_skb(skb);
107 	sch->qstats.drops++;
108 	return NET_XMIT_DROP;
109 }
110 
111 static int
112 teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
113 {
114 	struct teql_sched_data *q = qdisc_priv(sch);
115 
116 	__skb_queue_head(&q->q, skb);
117 	sch->qstats.requeues++;
118 	return 0;
119 }
120 
121 static struct sk_buff *
122 teql_dequeue(struct Qdisc* sch)
123 {
124 	struct teql_sched_data *dat = qdisc_priv(sch);
125 	struct sk_buff *skb;
126 
127 	skb = __skb_dequeue(&dat->q);
128 	if (skb == NULL) {
129 		struct net_device *m = dat->m->dev->qdisc->dev;
130 		if (m) {
131 			dat->m->slaves = sch;
132 			netif_wake_queue(m);
133 		}
134 	}
135 	sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen;
136 	return skb;
137 }
138 
139 static __inline__ void
140 teql_neigh_release(struct neighbour *n)
141 {
142 	if (n)
143 		neigh_release(n);
144 }
145 
146 static void
147 teql_reset(struct Qdisc* sch)
148 {
149 	struct teql_sched_data *dat = qdisc_priv(sch);
150 
151 	skb_queue_purge(&dat->q);
152 	sch->q.qlen = 0;
153 	teql_neigh_release(xchg(&dat->ncache, NULL));
154 }
155 
156 static void
157 teql_destroy(struct Qdisc* sch)
158 {
159 	struct Qdisc *q, *prev;
160 	struct teql_sched_data *dat = qdisc_priv(sch);
161 	struct teql_master *master = dat->m;
162 
163 	if ((prev = master->slaves) != NULL) {
164 		do {
165 			q = NEXT_SLAVE(prev);
166 			if (q == sch) {
167 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
168 				if (q == master->slaves) {
169 					master->slaves = NEXT_SLAVE(q);
170 					if (q == master->slaves) {
171 						master->slaves = NULL;
172 						spin_lock_bh(&master->dev->queue_lock);
173 						qdisc_reset(master->dev->qdisc);
174 						spin_unlock_bh(&master->dev->queue_lock);
175 					}
176 				}
177 				skb_queue_purge(&dat->q);
178 				teql_neigh_release(xchg(&dat->ncache, NULL));
179 				break;
180 			}
181 
182 		} while ((prev = q) != master->slaves);
183 	}
184 }
185 
186 static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt)
187 {
188 	struct net_device *dev = sch->dev;
189 	struct teql_master *m = (struct teql_master*)sch->ops;
190 	struct teql_sched_data *q = qdisc_priv(sch);
191 
192 	if (dev->hard_header_len > m->dev->hard_header_len)
193 		return -EINVAL;
194 
195 	if (m->dev == dev)
196 		return -ELOOP;
197 
198 	q->m = m;
199 
200 	skb_queue_head_init(&q->q);
201 
202 	if (m->slaves) {
203 		if (m->dev->flags & IFF_UP) {
204 			if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
205 			    || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
206 			    || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
207 			    || dev->mtu < m->dev->mtu)
208 				return -EINVAL;
209 		} else {
210 			if (!(dev->flags&IFF_POINTOPOINT))
211 				m->dev->flags &= ~IFF_POINTOPOINT;
212 			if (!(dev->flags&IFF_BROADCAST))
213 				m->dev->flags &= ~IFF_BROADCAST;
214 			if (!(dev->flags&IFF_MULTICAST))
215 				m->dev->flags &= ~IFF_MULTICAST;
216 			if (dev->mtu < m->dev->mtu)
217 				m->dev->mtu = dev->mtu;
218 		}
219 		q->next = NEXT_SLAVE(m->slaves);
220 		NEXT_SLAVE(m->slaves) = sch;
221 	} else {
222 		q->next = sch;
223 		m->slaves = sch;
224 		m->dev->mtu = dev->mtu;
225 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
226 	}
227 	return 0;
228 }
229 
230 /* "teql*" netdevice routines */
231 
232 static int
233 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
234 {
235 	struct teql_sched_data *q = qdisc_priv(dev->qdisc);
236 	struct neighbour *mn = skb->dst->neighbour;
237 	struct neighbour *n = q->ncache;
238 
239 	if (mn->tbl == NULL)
240 		return -EINVAL;
241 	if (n && n->tbl == mn->tbl &&
242 	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
243 		atomic_inc(&n->refcnt);
244 	} else {
245 		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
246 		if (IS_ERR(n))
247 			return PTR_ERR(n);
248 	}
249 	if (neigh_event_send(n, skb_res) == 0) {
250 		int err;
251 		read_lock(&n->lock);
252 		err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len);
253 		read_unlock(&n->lock);
254 		if (err < 0) {
255 			neigh_release(n);
256 			return -EINVAL;
257 		}
258 		teql_neigh_release(xchg(&q->ncache, n));
259 		return 0;
260 	}
261 	neigh_release(n);
262 	return (skb_res == NULL) ? -EAGAIN : 1;
263 }
264 
265 static __inline__ int
266 teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
267 {
268 	if (dev->hard_header == NULL ||
269 	    skb->dst == NULL ||
270 	    skb->dst->neighbour == NULL)
271 		return 0;
272 	return __teql_resolve(skb, skb_res, dev);
273 }
274 
275 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
276 {
277 	struct teql_master *master = netdev_priv(dev);
278 	struct Qdisc *start, *q;
279 	int busy;
280 	int nores;
281 	int len = skb->len;
282 	struct sk_buff *skb_res = NULL;
283 
284 	start = master->slaves;
285 
286 restart:
287 	nores = 0;
288 	busy = 0;
289 
290 	if ((q = start) == NULL)
291 		goto drop;
292 
293 	do {
294 		struct net_device *slave = q->dev;
295 
296 		if (slave->qdisc_sleeping != q)
297 			continue;
298 		if (netif_queue_stopped(slave) || ! netif_running(slave)) {
299 			busy = 1;
300 			continue;
301 		}
302 
303 		switch (teql_resolve(skb, skb_res, slave)) {
304 		case 0:
305 			if (spin_trylock(&slave->xmit_lock)) {
306 				slave->xmit_lock_owner = smp_processor_id();
307 				if (!netif_queue_stopped(slave) &&
308 				    slave->hard_start_xmit(skb, slave) == 0) {
309 					slave->xmit_lock_owner = -1;
310 					spin_unlock(&slave->xmit_lock);
311 					master->slaves = NEXT_SLAVE(q);
312 					netif_wake_queue(dev);
313 					master->stats.tx_packets++;
314 					master->stats.tx_bytes += len;
315 					return 0;
316 				}
317 				slave->xmit_lock_owner = -1;
318 				spin_unlock(&slave->xmit_lock);
319 			}
320 			if (netif_queue_stopped(dev))
321 				busy = 1;
322 			break;
323 		case 1:
324 			master->slaves = NEXT_SLAVE(q);
325 			return 0;
326 		default:
327 			nores = 1;
328 			break;
329 		}
330 		__skb_pull(skb, skb->nh.raw - skb->data);
331 	} while ((q = NEXT_SLAVE(q)) != start);
332 
333 	if (nores && skb_res == NULL) {
334 		skb_res = skb;
335 		goto restart;
336 	}
337 
338 	if (busy) {
339 		netif_stop_queue(dev);
340 		return 1;
341 	}
342 	master->stats.tx_errors++;
343 
344 drop:
345 	master->stats.tx_dropped++;
346 	dev_kfree_skb(skb);
347 	return 0;
348 }
349 
350 static int teql_master_open(struct net_device *dev)
351 {
352 	struct Qdisc * q;
353 	struct teql_master *m = netdev_priv(dev);
354 	int mtu = 0xFFFE;
355 	unsigned flags = IFF_NOARP|IFF_MULTICAST;
356 
357 	if (m->slaves == NULL)
358 		return -EUNATCH;
359 
360 	flags = FMASK;
361 
362 	q = m->slaves;
363 	do {
364 		struct net_device *slave = q->dev;
365 
366 		if (slave == NULL)
367 			return -EUNATCH;
368 
369 		if (slave->mtu < mtu)
370 			mtu = slave->mtu;
371 		if (slave->hard_header_len > LL_MAX_HEADER)
372 			return -EINVAL;
373 
374 		/* If all the slaves are BROADCAST, master is BROADCAST
375 		   If all the slaves are PtP, master is PtP
376 		   Otherwise, master is NBMA.
377 		 */
378 		if (!(slave->flags&IFF_POINTOPOINT))
379 			flags &= ~IFF_POINTOPOINT;
380 		if (!(slave->flags&IFF_BROADCAST))
381 			flags &= ~IFF_BROADCAST;
382 		if (!(slave->flags&IFF_MULTICAST))
383 			flags &= ~IFF_MULTICAST;
384 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
385 
386 	m->dev->mtu = mtu;
387 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
388 	netif_start_queue(m->dev);
389 	return 0;
390 }
391 
392 static int teql_master_close(struct net_device *dev)
393 {
394 	netif_stop_queue(dev);
395 	return 0;
396 }
397 
398 static struct net_device_stats *teql_master_stats(struct net_device *dev)
399 {
400 	struct teql_master *m = netdev_priv(dev);
401 	return &m->stats;
402 }
403 
404 static int teql_master_mtu(struct net_device *dev, int new_mtu)
405 {
406 	struct teql_master *m = netdev_priv(dev);
407 	struct Qdisc *q;
408 
409 	if (new_mtu < 68)
410 		return -EINVAL;
411 
412 	q = m->slaves;
413 	if (q) {
414 		do {
415 			if (new_mtu > q->dev->mtu)
416 				return -EINVAL;
417 		} while ((q=NEXT_SLAVE(q)) != m->slaves);
418 	}
419 
420 	dev->mtu = new_mtu;
421 	return 0;
422 }
423 
424 static __init void teql_master_setup(struct net_device *dev)
425 {
426 	struct teql_master *master = netdev_priv(dev);
427 	struct Qdisc_ops *ops = &master->qops;
428 
429 	master->dev	= dev;
430 	ops->priv_size  = sizeof(struct teql_sched_data);
431 
432 	ops->enqueue	=	teql_enqueue;
433 	ops->dequeue	=	teql_dequeue;
434 	ops->requeue	=	teql_requeue;
435 	ops->init	=	teql_qdisc_init;
436 	ops->reset	=	teql_reset;
437 	ops->destroy	=	teql_destroy;
438 	ops->owner	=	THIS_MODULE;
439 
440 	dev->open		= teql_master_open;
441 	dev->hard_start_xmit	= teql_master_xmit;
442 	dev->stop		= teql_master_close;
443 	dev->get_stats		= teql_master_stats;
444 	dev->change_mtu		= teql_master_mtu;
445 	dev->type		= ARPHRD_VOID;
446 	dev->mtu		= 1500;
447 	dev->tx_queue_len	= 100;
448 	dev->flags		= IFF_NOARP;
449 	dev->hard_header_len	= LL_MAX_HEADER;
450 	SET_MODULE_OWNER(dev);
451 }
452 
453 static LIST_HEAD(master_dev_list);
454 static int max_equalizers = 1;
455 module_param(max_equalizers, int, 0);
456 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
457 
458 static int __init teql_init(void)
459 {
460 	int i;
461 	int err = -ENODEV;
462 
463 	for (i = 0; i < max_equalizers; i++) {
464 		struct net_device *dev;
465 		struct teql_master *master;
466 
467 		dev = alloc_netdev(sizeof(struct teql_master),
468 				  "teql%d", teql_master_setup);
469 		if (!dev) {
470 			err = -ENOMEM;
471 			break;
472 		}
473 
474 		if ((err = register_netdev(dev))) {
475 			free_netdev(dev);
476 			break;
477 		}
478 
479 		master = netdev_priv(dev);
480 
481 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
482 		err = register_qdisc(&master->qops);
483 
484 		if (err) {
485 			unregister_netdev(dev);
486 			free_netdev(dev);
487 			break;
488 		}
489 
490 		list_add_tail(&master->master_list, &master_dev_list);
491 	}
492 	return i ? 0 : err;
493 }
494 
495 static void __exit teql_exit(void)
496 {
497 	struct teql_master *master, *nxt;
498 
499 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
500 
501 		list_del(&master->master_list);
502 
503 		unregister_qdisc(&master->qops);
504 		unregister_netdev(master->dev);
505 		free_netdev(master->dev);
506 	}
507 }
508 
509 module_init(teql_init);
510 module_exit(teql_exit);
511 
512 MODULE_LICENSE("GPL");
513