xref: /openbmc/linux/net/sched/sch_teql.c (revision f42b3800)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/string.h>
15 #include <linux/errno.h>
16 #include <linux/if_arp.h>
17 #include <linux/netdevice.h>
18 #include <linux/init.h>
19 #include <linux/skbuff.h>
20 #include <linux/moduleparam.h>
21 #include <net/dst.h>
22 #include <net/neighbour.h>
23 #include <net/pkt_sched.h>
24 
25 /*
26    How to setup it.
27    ----------------
28 
29    After loading this module you will find a new device teqlN
30    and new qdisc with the same name. To join a slave to the equalizer
31    you should just set this qdisc on a device f.e.
32 
33    # tc qdisc add dev eth0 root teql0
34    # tc qdisc add dev eth1 root teql0
35 
36    That's all. Full PnP 8)
37 
38    Applicability.
39    --------------
40 
41    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42       signal and generate EOI events. If you want to equalize virtual devices
43       like tunnels, use a normal eql device.
44    2. This device puts no limitations on physical slave characteristics
45       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46       Certainly, large difference in link speeds will make the resulting
47       eqalized link unusable, because of huge packet reordering.
48       I estimate an upper useful difference as ~10 times.
49    3. If the slave requires address resolution, only protocols using
50       neighbour cache (IPv4/IPv6) will work over the equalized link.
51       Other protocols are still allowed to use the slave device directly,
52       which will not break load balancing, though native slave
53       traffic will have the highest priority.  */
54 
55 struct teql_master
56 {
57 	struct Qdisc_ops qops;
58 	struct net_device *dev;
59 	struct Qdisc *slaves;
60 	struct list_head master_list;
61 	struct net_device_stats stats;
62 };
63 
64 struct teql_sched_data
65 {
66 	struct Qdisc *next;
67 	struct teql_master *m;
68 	struct neighbour *ncache;
69 	struct sk_buff_head q;
70 };
71 
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73 
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
75 
76 /* "teql*" qdisc routines */
77 
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81 	struct net_device *dev = sch->dev;
82 	struct teql_sched_data *q = qdisc_priv(sch);
83 
84 	if (q->q.qlen < dev->tx_queue_len) {
85 		__skb_queue_tail(&q->q, skb);
86 		sch->bstats.bytes += skb->len;
87 		sch->bstats.packets++;
88 		return 0;
89 	}
90 
91 	kfree_skb(skb);
92 	sch->qstats.drops++;
93 	return NET_XMIT_DROP;
94 }
95 
96 static int
97 teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
98 {
99 	struct teql_sched_data *q = qdisc_priv(sch);
100 
101 	__skb_queue_head(&q->q, skb);
102 	sch->qstats.requeues++;
103 	return 0;
104 }
105 
106 static struct sk_buff *
107 teql_dequeue(struct Qdisc* sch)
108 {
109 	struct teql_sched_data *dat = qdisc_priv(sch);
110 	struct sk_buff *skb;
111 
112 	skb = __skb_dequeue(&dat->q);
113 	if (skb == NULL) {
114 		struct net_device *m = dat->m->dev->qdisc->dev;
115 		if (m) {
116 			dat->m->slaves = sch;
117 			netif_wake_queue(m);
118 		}
119 	}
120 	sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen;
121 	return skb;
122 }
123 
124 static __inline__ void
125 teql_neigh_release(struct neighbour *n)
126 {
127 	if (n)
128 		neigh_release(n);
129 }
130 
131 static void
132 teql_reset(struct Qdisc* sch)
133 {
134 	struct teql_sched_data *dat = qdisc_priv(sch);
135 
136 	skb_queue_purge(&dat->q);
137 	sch->q.qlen = 0;
138 	teql_neigh_release(xchg(&dat->ncache, NULL));
139 }
140 
141 static void
142 teql_destroy(struct Qdisc* sch)
143 {
144 	struct Qdisc *q, *prev;
145 	struct teql_sched_data *dat = qdisc_priv(sch);
146 	struct teql_master *master = dat->m;
147 
148 	if ((prev = master->slaves) != NULL) {
149 		do {
150 			q = NEXT_SLAVE(prev);
151 			if (q == sch) {
152 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
153 				if (q == master->slaves) {
154 					master->slaves = NEXT_SLAVE(q);
155 					if (q == master->slaves) {
156 						master->slaves = NULL;
157 						spin_lock_bh(&master->dev->queue_lock);
158 						qdisc_reset(master->dev->qdisc);
159 						spin_unlock_bh(&master->dev->queue_lock);
160 					}
161 				}
162 				skb_queue_purge(&dat->q);
163 				teql_neigh_release(xchg(&dat->ncache, NULL));
164 				break;
165 			}
166 
167 		} while ((prev = q) != master->slaves);
168 	}
169 }
170 
171 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
172 {
173 	struct net_device *dev = sch->dev;
174 	struct teql_master *m = (struct teql_master*)sch->ops;
175 	struct teql_sched_data *q = qdisc_priv(sch);
176 
177 	if (dev->hard_header_len > m->dev->hard_header_len)
178 		return -EINVAL;
179 
180 	if (m->dev == dev)
181 		return -ELOOP;
182 
183 	q->m = m;
184 
185 	skb_queue_head_init(&q->q);
186 
187 	if (m->slaves) {
188 		if (m->dev->flags & IFF_UP) {
189 			if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
190 			    || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
191 			    || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
192 			    || dev->mtu < m->dev->mtu)
193 				return -EINVAL;
194 		} else {
195 			if (!(dev->flags&IFF_POINTOPOINT))
196 				m->dev->flags &= ~IFF_POINTOPOINT;
197 			if (!(dev->flags&IFF_BROADCAST))
198 				m->dev->flags &= ~IFF_BROADCAST;
199 			if (!(dev->flags&IFF_MULTICAST))
200 				m->dev->flags &= ~IFF_MULTICAST;
201 			if (dev->mtu < m->dev->mtu)
202 				m->dev->mtu = dev->mtu;
203 		}
204 		q->next = NEXT_SLAVE(m->slaves);
205 		NEXT_SLAVE(m->slaves) = sch;
206 	} else {
207 		q->next = sch;
208 		m->slaves = sch;
209 		m->dev->mtu = dev->mtu;
210 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
211 	}
212 	return 0;
213 }
214 
215 
216 static int
217 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
218 {
219 	struct teql_sched_data *q = qdisc_priv(dev->qdisc);
220 	struct neighbour *mn = skb->dst->neighbour;
221 	struct neighbour *n = q->ncache;
222 
223 	if (mn->tbl == NULL)
224 		return -EINVAL;
225 	if (n && n->tbl == mn->tbl &&
226 	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
227 		atomic_inc(&n->refcnt);
228 	} else {
229 		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
230 		if (IS_ERR(n))
231 			return PTR_ERR(n);
232 	}
233 	if (neigh_event_send(n, skb_res) == 0) {
234 		int err;
235 
236 		read_lock(&n->lock);
237 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
238 				      n->ha, NULL, skb->len);
239 		read_unlock(&n->lock);
240 
241 		if (err < 0) {
242 			neigh_release(n);
243 			return -EINVAL;
244 		}
245 		teql_neigh_release(xchg(&q->ncache, n));
246 		return 0;
247 	}
248 	neigh_release(n);
249 	return (skb_res == NULL) ? -EAGAIN : 1;
250 }
251 
252 static inline int teql_resolve(struct sk_buff *skb,
253 			       struct sk_buff *skb_res, struct net_device *dev)
254 {
255 	if (dev->qdisc == &noop_qdisc)
256 		return -ENODEV;
257 
258 	if (dev->header_ops == NULL ||
259 	    skb->dst == NULL ||
260 	    skb->dst->neighbour == NULL)
261 		return 0;
262 	return __teql_resolve(skb, skb_res, dev);
263 }
264 
265 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
266 {
267 	struct teql_master *master = netdev_priv(dev);
268 	struct Qdisc *start, *q;
269 	int busy;
270 	int nores;
271 	int len = skb->len;
272 	int subq = skb_get_queue_mapping(skb);
273 	struct sk_buff *skb_res = NULL;
274 
275 	start = master->slaves;
276 
277 restart:
278 	nores = 0;
279 	busy = 0;
280 
281 	if ((q = start) == NULL)
282 		goto drop;
283 
284 	do {
285 		struct net_device *slave = q->dev;
286 
287 		if (slave->qdisc_sleeping != q)
288 			continue;
289 		if (netif_queue_stopped(slave) ||
290 		    __netif_subqueue_stopped(slave, subq) ||
291 		    !netif_running(slave)) {
292 			busy = 1;
293 			continue;
294 		}
295 
296 		switch (teql_resolve(skb, skb_res, slave)) {
297 		case 0:
298 			if (netif_tx_trylock(slave)) {
299 				if (!netif_queue_stopped(slave) &&
300 				    !__netif_subqueue_stopped(slave, subq) &&
301 				    slave->hard_start_xmit(skb, slave) == 0) {
302 					netif_tx_unlock(slave);
303 					master->slaves = NEXT_SLAVE(q);
304 					netif_wake_queue(dev);
305 					master->stats.tx_packets++;
306 					master->stats.tx_bytes += len;
307 					return 0;
308 				}
309 				netif_tx_unlock(slave);
310 			}
311 			if (netif_queue_stopped(dev))
312 				busy = 1;
313 			break;
314 		case 1:
315 			master->slaves = NEXT_SLAVE(q);
316 			return 0;
317 		default:
318 			nores = 1;
319 			break;
320 		}
321 		__skb_pull(skb, skb_network_offset(skb));
322 	} while ((q = NEXT_SLAVE(q)) != start);
323 
324 	if (nores && skb_res == NULL) {
325 		skb_res = skb;
326 		goto restart;
327 	}
328 
329 	if (busy) {
330 		netif_stop_queue(dev);
331 		return 1;
332 	}
333 	master->stats.tx_errors++;
334 
335 drop:
336 	master->stats.tx_dropped++;
337 	dev_kfree_skb(skb);
338 	return 0;
339 }
340 
341 static int teql_master_open(struct net_device *dev)
342 {
343 	struct Qdisc * q;
344 	struct teql_master *m = netdev_priv(dev);
345 	int mtu = 0xFFFE;
346 	unsigned flags = IFF_NOARP|IFF_MULTICAST;
347 
348 	if (m->slaves == NULL)
349 		return -EUNATCH;
350 
351 	flags = FMASK;
352 
353 	q = m->slaves;
354 	do {
355 		struct net_device *slave = q->dev;
356 
357 		if (slave == NULL)
358 			return -EUNATCH;
359 
360 		if (slave->mtu < mtu)
361 			mtu = slave->mtu;
362 		if (slave->hard_header_len > LL_MAX_HEADER)
363 			return -EINVAL;
364 
365 		/* If all the slaves are BROADCAST, master is BROADCAST
366 		   If all the slaves are PtP, master is PtP
367 		   Otherwise, master is NBMA.
368 		 */
369 		if (!(slave->flags&IFF_POINTOPOINT))
370 			flags &= ~IFF_POINTOPOINT;
371 		if (!(slave->flags&IFF_BROADCAST))
372 			flags &= ~IFF_BROADCAST;
373 		if (!(slave->flags&IFF_MULTICAST))
374 			flags &= ~IFF_MULTICAST;
375 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
376 
377 	m->dev->mtu = mtu;
378 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
379 	netif_start_queue(m->dev);
380 	return 0;
381 }
382 
383 static int teql_master_close(struct net_device *dev)
384 {
385 	netif_stop_queue(dev);
386 	return 0;
387 }
388 
389 static struct net_device_stats *teql_master_stats(struct net_device *dev)
390 {
391 	struct teql_master *m = netdev_priv(dev);
392 	return &m->stats;
393 }
394 
395 static int teql_master_mtu(struct net_device *dev, int new_mtu)
396 {
397 	struct teql_master *m = netdev_priv(dev);
398 	struct Qdisc *q;
399 
400 	if (new_mtu < 68)
401 		return -EINVAL;
402 
403 	q = m->slaves;
404 	if (q) {
405 		do {
406 			if (new_mtu > q->dev->mtu)
407 				return -EINVAL;
408 		} while ((q=NEXT_SLAVE(q)) != m->slaves);
409 	}
410 
411 	dev->mtu = new_mtu;
412 	return 0;
413 }
414 
415 static __init void teql_master_setup(struct net_device *dev)
416 {
417 	struct teql_master *master = netdev_priv(dev);
418 	struct Qdisc_ops *ops = &master->qops;
419 
420 	master->dev	= dev;
421 	ops->priv_size  = sizeof(struct teql_sched_data);
422 
423 	ops->enqueue	=	teql_enqueue;
424 	ops->dequeue	=	teql_dequeue;
425 	ops->requeue	=	teql_requeue;
426 	ops->init	=	teql_qdisc_init;
427 	ops->reset	=	teql_reset;
428 	ops->destroy	=	teql_destroy;
429 	ops->owner	=	THIS_MODULE;
430 
431 	dev->open		= teql_master_open;
432 	dev->hard_start_xmit	= teql_master_xmit;
433 	dev->stop		= teql_master_close;
434 	dev->get_stats		= teql_master_stats;
435 	dev->change_mtu		= teql_master_mtu;
436 	dev->type		= ARPHRD_VOID;
437 	dev->mtu		= 1500;
438 	dev->tx_queue_len	= 100;
439 	dev->flags		= IFF_NOARP;
440 	dev->hard_header_len	= LL_MAX_HEADER;
441 }
442 
443 static LIST_HEAD(master_dev_list);
444 static int max_equalizers = 1;
445 module_param(max_equalizers, int, 0);
446 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
447 
448 static int __init teql_init(void)
449 {
450 	int i;
451 	int err = -ENODEV;
452 
453 	for (i = 0; i < max_equalizers; i++) {
454 		struct net_device *dev;
455 		struct teql_master *master;
456 
457 		dev = alloc_netdev(sizeof(struct teql_master),
458 				  "teql%d", teql_master_setup);
459 		if (!dev) {
460 			err = -ENOMEM;
461 			break;
462 		}
463 
464 		if ((err = register_netdev(dev))) {
465 			free_netdev(dev);
466 			break;
467 		}
468 
469 		master = netdev_priv(dev);
470 
471 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
472 		err = register_qdisc(&master->qops);
473 
474 		if (err) {
475 			unregister_netdev(dev);
476 			free_netdev(dev);
477 			break;
478 		}
479 
480 		list_add_tail(&master->master_list, &master_dev_list);
481 	}
482 	return i ? 0 : err;
483 }
484 
485 static void __exit teql_exit(void)
486 {
487 	struct teql_master *master, *nxt;
488 
489 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
490 
491 		list_del(&master->master_list);
492 
493 		unregister_qdisc(&master->qops);
494 		unregister_netdev(master->dev);
495 		free_netdev(master->dev);
496 	}
497 }
498 
499 module_init(teql_init);
500 module_exit(teql_exit);
501 
502 MODULE_LICENSE("GPL");
503