xref: /openbmc/linux/net/sched/sch_teql.c (revision 8fa5723aa7e053d498336b48448b292fc2e0458b)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/string.h>
15 #include <linux/errno.h>
16 #include <linux/if_arp.h>
17 #include <linux/netdevice.h>
18 #include <linux/init.h>
19 #include <linux/skbuff.h>
20 #include <linux/moduleparam.h>
21 #include <net/dst.h>
22 #include <net/neighbour.h>
23 #include <net/pkt_sched.h>
24 
25 /*
26    How to setup it.
27    ----------------
28 
29    After loading this module you will find a new device teqlN
30    and new qdisc with the same name. To join a slave to the equalizer
31    you should just set this qdisc on a device f.e.
32 
33    # tc qdisc add dev eth0 root teql0
34    # tc qdisc add dev eth1 root teql0
35 
36    That's all. Full PnP 8)
37 
38    Applicability.
39    --------------
40 
41    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42       signal and generate EOI events. If you want to equalize virtual devices
43       like tunnels, use a normal eql device.
44    2. This device puts no limitations on physical slave characteristics
45       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46       Certainly, large difference in link speeds will make the resulting
47       eqalized link unusable, because of huge packet reordering.
48       I estimate an upper useful difference as ~10 times.
49    3. If the slave requires address resolution, only protocols using
50       neighbour cache (IPv4/IPv6) will work over the equalized link.
51       Other protocols are still allowed to use the slave device directly,
52       which will not break load balancing, though native slave
53       traffic will have the highest priority.  */
54 
55 struct teql_master
56 {
57 	struct Qdisc_ops qops;
58 	struct net_device *dev;
59 	struct Qdisc *slaves;
60 	struct list_head master_list;
61 	struct net_device_stats stats;
62 };
63 
64 struct teql_sched_data
65 {
66 	struct Qdisc *next;
67 	struct teql_master *m;
68 	struct neighbour *ncache;
69 	struct sk_buff_head q;
70 };
71 
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73 
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
75 
76 /* "teql*" qdisc routines */
77 
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81 	struct net_device *dev = qdisc_dev(sch);
82 	struct teql_sched_data *q = qdisc_priv(sch);
83 
84 	if (q->q.qlen < dev->tx_queue_len) {
85 		__skb_queue_tail(&q->q, skb);
86 		sch->bstats.bytes += qdisc_pkt_len(skb);
87 		sch->bstats.packets++;
88 		return 0;
89 	}
90 
91 	kfree_skb(skb);
92 	sch->qstats.drops++;
93 	return NET_XMIT_DROP;
94 }
95 
96 static int
97 teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
98 {
99 	struct teql_sched_data *q = qdisc_priv(sch);
100 
101 	__skb_queue_head(&q->q, skb);
102 	sch->qstats.requeues++;
103 	return 0;
104 }
105 
106 static struct sk_buff *
107 teql_dequeue(struct Qdisc* sch)
108 {
109 	struct teql_sched_data *dat = qdisc_priv(sch);
110 	struct netdev_queue *dat_queue;
111 	struct sk_buff *skb;
112 
113 	skb = __skb_dequeue(&dat->q);
114 	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
115 	if (skb == NULL) {
116 		struct net_device *m = qdisc_dev(dat_queue->qdisc);
117 		if (m) {
118 			dat->m->slaves = sch;
119 			netif_wake_queue(m);
120 		}
121 	}
122 	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
123 	return skb;
124 }
125 
126 static __inline__ void
127 teql_neigh_release(struct neighbour *n)
128 {
129 	if (n)
130 		neigh_release(n);
131 }
132 
133 static void
134 teql_reset(struct Qdisc* sch)
135 {
136 	struct teql_sched_data *dat = qdisc_priv(sch);
137 
138 	skb_queue_purge(&dat->q);
139 	sch->q.qlen = 0;
140 	teql_neigh_release(xchg(&dat->ncache, NULL));
141 }
142 
143 static void
144 teql_destroy(struct Qdisc* sch)
145 {
146 	struct Qdisc *q, *prev;
147 	struct teql_sched_data *dat = qdisc_priv(sch);
148 	struct teql_master *master = dat->m;
149 
150 	if ((prev = master->slaves) != NULL) {
151 		do {
152 			q = NEXT_SLAVE(prev);
153 			if (q == sch) {
154 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
155 				if (q == master->slaves) {
156 					master->slaves = NEXT_SLAVE(q);
157 					if (q == master->slaves) {
158 						struct netdev_queue *txq;
159 						spinlock_t *root_lock;
160 
161 						txq = netdev_get_tx_queue(master->dev, 0);
162 						master->slaves = NULL;
163 
164 						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
165 						spin_lock_bh(root_lock);
166 						qdisc_reset(txq->qdisc);
167 						spin_unlock_bh(root_lock);
168 					}
169 				}
170 				skb_queue_purge(&dat->q);
171 				teql_neigh_release(xchg(&dat->ncache, NULL));
172 				break;
173 			}
174 
175 		} while ((prev = q) != master->slaves);
176 	}
177 }
178 
179 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
180 {
181 	struct net_device *dev = qdisc_dev(sch);
182 	struct teql_master *m = (struct teql_master*)sch->ops;
183 	struct teql_sched_data *q = qdisc_priv(sch);
184 
185 	if (dev->hard_header_len > m->dev->hard_header_len)
186 		return -EINVAL;
187 
188 	if (m->dev == dev)
189 		return -ELOOP;
190 
191 	q->m = m;
192 
193 	skb_queue_head_init(&q->q);
194 
195 	if (m->slaves) {
196 		if (m->dev->flags & IFF_UP) {
197 			if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
198 			    || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
199 			    || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
200 			    || dev->mtu < m->dev->mtu)
201 				return -EINVAL;
202 		} else {
203 			if (!(dev->flags&IFF_POINTOPOINT))
204 				m->dev->flags &= ~IFF_POINTOPOINT;
205 			if (!(dev->flags&IFF_BROADCAST))
206 				m->dev->flags &= ~IFF_BROADCAST;
207 			if (!(dev->flags&IFF_MULTICAST))
208 				m->dev->flags &= ~IFF_MULTICAST;
209 			if (dev->mtu < m->dev->mtu)
210 				m->dev->mtu = dev->mtu;
211 		}
212 		q->next = NEXT_SLAVE(m->slaves);
213 		NEXT_SLAVE(m->slaves) = sch;
214 	} else {
215 		q->next = sch;
216 		m->slaves = sch;
217 		m->dev->mtu = dev->mtu;
218 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
219 	}
220 	return 0;
221 }
222 
223 
224 static int
225 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
226 {
227 	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
228 	struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
229 	struct neighbour *mn = skb->dst->neighbour;
230 	struct neighbour *n = q->ncache;
231 
232 	if (mn->tbl == NULL)
233 		return -EINVAL;
234 	if (n && n->tbl == mn->tbl &&
235 	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
236 		atomic_inc(&n->refcnt);
237 	} else {
238 		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
239 		if (IS_ERR(n))
240 			return PTR_ERR(n);
241 	}
242 	if (neigh_event_send(n, skb_res) == 0) {
243 		int err;
244 
245 		read_lock(&n->lock);
246 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
247 				      n->ha, NULL, skb->len);
248 		read_unlock(&n->lock);
249 
250 		if (err < 0) {
251 			neigh_release(n);
252 			return -EINVAL;
253 		}
254 		teql_neigh_release(xchg(&q->ncache, n));
255 		return 0;
256 	}
257 	neigh_release(n);
258 	return (skb_res == NULL) ? -EAGAIN : 1;
259 }
260 
261 static inline int teql_resolve(struct sk_buff *skb,
262 			       struct sk_buff *skb_res, struct net_device *dev)
263 {
264 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
265 	if (txq->qdisc == &noop_qdisc)
266 		return -ENODEV;
267 
268 	if (dev->header_ops == NULL ||
269 	    skb->dst == NULL ||
270 	    skb->dst->neighbour == NULL)
271 		return 0;
272 	return __teql_resolve(skb, skb_res, dev);
273 }
274 
275 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
276 {
277 	struct teql_master *master = netdev_priv(dev);
278 	struct Qdisc *start, *q;
279 	int busy;
280 	int nores;
281 	int subq = skb_get_queue_mapping(skb);
282 	struct sk_buff *skb_res = NULL;
283 
284 	start = master->slaves;
285 
286 restart:
287 	nores = 0;
288 	busy = 0;
289 
290 	if ((q = start) == NULL)
291 		goto drop;
292 
293 	do {
294 		struct net_device *slave = qdisc_dev(q);
295 		struct netdev_queue *slave_txq;
296 
297 		slave_txq = netdev_get_tx_queue(slave, 0);
298 		if (slave_txq->qdisc_sleeping != q)
299 			continue;
300 		if (__netif_subqueue_stopped(slave, subq) ||
301 		    !netif_running(slave)) {
302 			busy = 1;
303 			continue;
304 		}
305 
306 		switch (teql_resolve(skb, skb_res, slave)) {
307 		case 0:
308 			if (__netif_tx_trylock(slave_txq)) {
309 				if (!netif_tx_queue_stopped(slave_txq) &&
310 				    !netif_tx_queue_frozen(slave_txq) &&
311 				    slave->hard_start_xmit(skb, slave) == 0) {
312 					__netif_tx_unlock(slave_txq);
313 					master->slaves = NEXT_SLAVE(q);
314 					netif_wake_queue(dev);
315 					master->stats.tx_packets++;
316 					master->stats.tx_bytes +=
317 						qdisc_pkt_len(skb);
318 					return 0;
319 				}
320 				__netif_tx_unlock(slave_txq);
321 			}
322 			if (netif_queue_stopped(dev))
323 				busy = 1;
324 			break;
325 		case 1:
326 			master->slaves = NEXT_SLAVE(q);
327 			return 0;
328 		default:
329 			nores = 1;
330 			break;
331 		}
332 		__skb_pull(skb, skb_network_offset(skb));
333 	} while ((q = NEXT_SLAVE(q)) != start);
334 
335 	if (nores && skb_res == NULL) {
336 		skb_res = skb;
337 		goto restart;
338 	}
339 
340 	if (busy) {
341 		netif_stop_queue(dev);
342 		return 1;
343 	}
344 	master->stats.tx_errors++;
345 
346 drop:
347 	master->stats.tx_dropped++;
348 	dev_kfree_skb(skb);
349 	return 0;
350 }
351 
352 static int teql_master_open(struct net_device *dev)
353 {
354 	struct Qdisc * q;
355 	struct teql_master *m = netdev_priv(dev);
356 	int mtu = 0xFFFE;
357 	unsigned flags = IFF_NOARP|IFF_MULTICAST;
358 
359 	if (m->slaves == NULL)
360 		return -EUNATCH;
361 
362 	flags = FMASK;
363 
364 	q = m->slaves;
365 	do {
366 		struct net_device *slave = qdisc_dev(q);
367 
368 		if (slave == NULL)
369 			return -EUNATCH;
370 
371 		if (slave->mtu < mtu)
372 			mtu = slave->mtu;
373 		if (slave->hard_header_len > LL_MAX_HEADER)
374 			return -EINVAL;
375 
376 		/* If all the slaves are BROADCAST, master is BROADCAST
377 		   If all the slaves are PtP, master is PtP
378 		   Otherwise, master is NBMA.
379 		 */
380 		if (!(slave->flags&IFF_POINTOPOINT))
381 			flags &= ~IFF_POINTOPOINT;
382 		if (!(slave->flags&IFF_BROADCAST))
383 			flags &= ~IFF_BROADCAST;
384 		if (!(slave->flags&IFF_MULTICAST))
385 			flags &= ~IFF_MULTICAST;
386 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
387 
388 	m->dev->mtu = mtu;
389 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
390 	netif_start_queue(m->dev);
391 	return 0;
392 }
393 
394 static int teql_master_close(struct net_device *dev)
395 {
396 	netif_stop_queue(dev);
397 	return 0;
398 }
399 
400 static struct net_device_stats *teql_master_stats(struct net_device *dev)
401 {
402 	struct teql_master *m = netdev_priv(dev);
403 	return &m->stats;
404 }
405 
406 static int teql_master_mtu(struct net_device *dev, int new_mtu)
407 {
408 	struct teql_master *m = netdev_priv(dev);
409 	struct Qdisc *q;
410 
411 	if (new_mtu < 68)
412 		return -EINVAL;
413 
414 	q = m->slaves;
415 	if (q) {
416 		do {
417 			if (new_mtu > qdisc_dev(q)->mtu)
418 				return -EINVAL;
419 		} while ((q=NEXT_SLAVE(q)) != m->slaves);
420 	}
421 
422 	dev->mtu = new_mtu;
423 	return 0;
424 }
425 
426 static __init void teql_master_setup(struct net_device *dev)
427 {
428 	struct teql_master *master = netdev_priv(dev);
429 	struct Qdisc_ops *ops = &master->qops;
430 
431 	master->dev	= dev;
432 	ops->priv_size  = sizeof(struct teql_sched_data);
433 
434 	ops->enqueue	=	teql_enqueue;
435 	ops->dequeue	=	teql_dequeue;
436 	ops->requeue	=	teql_requeue;
437 	ops->init	=	teql_qdisc_init;
438 	ops->reset	=	teql_reset;
439 	ops->destroy	=	teql_destroy;
440 	ops->owner	=	THIS_MODULE;
441 
442 	dev->open		= teql_master_open;
443 	dev->hard_start_xmit	= teql_master_xmit;
444 	dev->stop		= teql_master_close;
445 	dev->get_stats		= teql_master_stats;
446 	dev->change_mtu		= teql_master_mtu;
447 	dev->type		= ARPHRD_VOID;
448 	dev->mtu		= 1500;
449 	dev->tx_queue_len	= 100;
450 	dev->flags		= IFF_NOARP;
451 	dev->hard_header_len	= LL_MAX_HEADER;
452 }
453 
454 static LIST_HEAD(master_dev_list);
455 static int max_equalizers = 1;
456 module_param(max_equalizers, int, 0);
457 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
458 
459 static int __init teql_init(void)
460 {
461 	int i;
462 	int err = -ENODEV;
463 
464 	for (i = 0; i < max_equalizers; i++) {
465 		struct net_device *dev;
466 		struct teql_master *master;
467 
468 		dev = alloc_netdev(sizeof(struct teql_master),
469 				  "teql%d", teql_master_setup);
470 		if (!dev) {
471 			err = -ENOMEM;
472 			break;
473 		}
474 
475 		if ((err = register_netdev(dev))) {
476 			free_netdev(dev);
477 			break;
478 		}
479 
480 		master = netdev_priv(dev);
481 
482 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
483 		err = register_qdisc(&master->qops);
484 
485 		if (err) {
486 			unregister_netdev(dev);
487 			free_netdev(dev);
488 			break;
489 		}
490 
491 		list_add_tail(&master->master_list, &master_dev_list);
492 	}
493 	return i ? 0 : err;
494 }
495 
496 static void __exit teql_exit(void)
497 {
498 	struct teql_master *master, *nxt;
499 
500 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
501 
502 		list_del(&master->master_list);
503 
504 		unregister_qdisc(&master->qops);
505 		unregister_netdev(master->dev);
506 		free_netdev(master->dev);
507 	}
508 }
509 
510 module_init(teql_init);
511 module_exit(teql_exit);
512 
513 MODULE_LICENSE("GPL");
514