xref: /openbmc/linux/net/sched/sch_teql.c (revision b627b4ed)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/string.h>
15 #include <linux/errno.h>
16 #include <linux/if_arp.h>
17 #include <linux/netdevice.h>
18 #include <linux/init.h>
19 #include <linux/skbuff.h>
20 #include <linux/moduleparam.h>
21 #include <net/dst.h>
22 #include <net/neighbour.h>
23 #include <net/pkt_sched.h>
24 
25 /*
26    How to setup it.
27    ----------------
28 
29    After loading this module you will find a new device teqlN
30    and new qdisc with the same name. To join a slave to the equalizer
31    you should just set this qdisc on a device f.e.
32 
33    # tc qdisc add dev eth0 root teql0
34    # tc qdisc add dev eth1 root teql0
35 
36    That's all. Full PnP 8)
37 
38    Applicability.
39    --------------
40 
41    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42       signal and generate EOI events. If you want to equalize virtual devices
43       like tunnels, use a normal eql device.
44    2. This device puts no limitations on physical slave characteristics
45       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46       Certainly, large difference in link speeds will make the resulting
47       eqalized link unusable, because of huge packet reordering.
48       I estimate an upper useful difference as ~10 times.
49    3. If the slave requires address resolution, only protocols using
50       neighbour cache (IPv4/IPv6) will work over the equalized link.
51       Other protocols are still allowed to use the slave device directly,
52       which will not break load balancing, though native slave
53       traffic will have the highest priority.  */
54 
55 struct teql_master
56 {
57 	struct Qdisc_ops qops;
58 	struct net_device *dev;
59 	struct Qdisc *slaves;
60 	struct list_head master_list;
61 	struct net_device_stats stats;
62 };
63 
64 struct teql_sched_data
65 {
66 	struct Qdisc *next;
67 	struct teql_master *m;
68 	struct neighbour *ncache;
69 	struct sk_buff_head q;
70 };
71 
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73 
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
75 
76 /* "teql*" qdisc routines */
77 
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81 	struct net_device *dev = qdisc_dev(sch);
82 	struct teql_sched_data *q = qdisc_priv(sch);
83 
84 	if (q->q.qlen < dev->tx_queue_len) {
85 		__skb_queue_tail(&q->q, skb);
86 		sch->bstats.bytes += qdisc_pkt_len(skb);
87 		sch->bstats.packets++;
88 		return 0;
89 	}
90 
91 	kfree_skb(skb);
92 	sch->qstats.drops++;
93 	return NET_XMIT_DROP;
94 }
95 
96 static struct sk_buff *
97 teql_dequeue(struct Qdisc* sch)
98 {
99 	struct teql_sched_data *dat = qdisc_priv(sch);
100 	struct netdev_queue *dat_queue;
101 	struct sk_buff *skb;
102 
103 	skb = __skb_dequeue(&dat->q);
104 	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
105 	if (skb == NULL) {
106 		struct net_device *m = qdisc_dev(dat_queue->qdisc);
107 		if (m) {
108 			dat->m->slaves = sch;
109 			netif_wake_queue(m);
110 		}
111 	}
112 	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
113 	return skb;
114 }
115 
116 static struct sk_buff *
117 teql_peek(struct Qdisc* sch)
118 {
119 	/* teql is meant to be used as root qdisc */
120 	return NULL;
121 }
122 
123 static __inline__ void
124 teql_neigh_release(struct neighbour *n)
125 {
126 	if (n)
127 		neigh_release(n);
128 }
129 
130 static void
131 teql_reset(struct Qdisc* sch)
132 {
133 	struct teql_sched_data *dat = qdisc_priv(sch);
134 
135 	skb_queue_purge(&dat->q);
136 	sch->q.qlen = 0;
137 	teql_neigh_release(xchg(&dat->ncache, NULL));
138 }
139 
140 static void
141 teql_destroy(struct Qdisc* sch)
142 {
143 	struct Qdisc *q, *prev;
144 	struct teql_sched_data *dat = qdisc_priv(sch);
145 	struct teql_master *master = dat->m;
146 
147 	if ((prev = master->slaves) != NULL) {
148 		do {
149 			q = NEXT_SLAVE(prev);
150 			if (q == sch) {
151 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
152 				if (q == master->slaves) {
153 					master->slaves = NEXT_SLAVE(q);
154 					if (q == master->slaves) {
155 						struct netdev_queue *txq;
156 						spinlock_t *root_lock;
157 
158 						txq = netdev_get_tx_queue(master->dev, 0);
159 						master->slaves = NULL;
160 
161 						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
162 						spin_lock_bh(root_lock);
163 						qdisc_reset(txq->qdisc);
164 						spin_unlock_bh(root_lock);
165 					}
166 				}
167 				skb_queue_purge(&dat->q);
168 				teql_neigh_release(xchg(&dat->ncache, NULL));
169 				break;
170 			}
171 
172 		} while ((prev = q) != master->slaves);
173 	}
174 }
175 
176 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
177 {
178 	struct net_device *dev = qdisc_dev(sch);
179 	struct teql_master *m = (struct teql_master*)sch->ops;
180 	struct teql_sched_data *q = qdisc_priv(sch);
181 
182 	if (dev->hard_header_len > m->dev->hard_header_len)
183 		return -EINVAL;
184 
185 	if (m->dev == dev)
186 		return -ELOOP;
187 
188 	q->m = m;
189 
190 	skb_queue_head_init(&q->q);
191 
192 	if (m->slaves) {
193 		if (m->dev->flags & IFF_UP) {
194 			if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
195 			    || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
196 			    || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
197 			    || dev->mtu < m->dev->mtu)
198 				return -EINVAL;
199 		} else {
200 			if (!(dev->flags&IFF_POINTOPOINT))
201 				m->dev->flags &= ~IFF_POINTOPOINT;
202 			if (!(dev->flags&IFF_BROADCAST))
203 				m->dev->flags &= ~IFF_BROADCAST;
204 			if (!(dev->flags&IFF_MULTICAST))
205 				m->dev->flags &= ~IFF_MULTICAST;
206 			if (dev->mtu < m->dev->mtu)
207 				m->dev->mtu = dev->mtu;
208 		}
209 		q->next = NEXT_SLAVE(m->slaves);
210 		NEXT_SLAVE(m->slaves) = sch;
211 	} else {
212 		q->next = sch;
213 		m->slaves = sch;
214 		m->dev->mtu = dev->mtu;
215 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
216 	}
217 	return 0;
218 }
219 
220 
221 static int
222 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
223 {
224 	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
225 	struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
226 	struct neighbour *mn = skb->dst->neighbour;
227 	struct neighbour *n = q->ncache;
228 
229 	if (mn->tbl == NULL)
230 		return -EINVAL;
231 	if (n && n->tbl == mn->tbl &&
232 	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
233 		atomic_inc(&n->refcnt);
234 	} else {
235 		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
236 		if (IS_ERR(n))
237 			return PTR_ERR(n);
238 	}
239 	if (neigh_event_send(n, skb_res) == 0) {
240 		int err;
241 
242 		read_lock(&n->lock);
243 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
244 				      n->ha, NULL, skb->len);
245 		read_unlock(&n->lock);
246 
247 		if (err < 0) {
248 			neigh_release(n);
249 			return -EINVAL;
250 		}
251 		teql_neigh_release(xchg(&q->ncache, n));
252 		return 0;
253 	}
254 	neigh_release(n);
255 	return (skb_res == NULL) ? -EAGAIN : 1;
256 }
257 
258 static inline int teql_resolve(struct sk_buff *skb,
259 			       struct sk_buff *skb_res, struct net_device *dev)
260 {
261 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
262 	if (txq->qdisc == &noop_qdisc)
263 		return -ENODEV;
264 
265 	if (dev->header_ops == NULL ||
266 	    skb->dst == NULL ||
267 	    skb->dst->neighbour == NULL)
268 		return 0;
269 	return __teql_resolve(skb, skb_res, dev);
270 }
271 
272 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
273 {
274 	struct teql_master *master = netdev_priv(dev);
275 	struct Qdisc *start, *q;
276 	int busy;
277 	int nores;
278 	int subq = skb_get_queue_mapping(skb);
279 	struct sk_buff *skb_res = NULL;
280 
281 	start = master->slaves;
282 
283 restart:
284 	nores = 0;
285 	busy = 0;
286 
287 	if ((q = start) == NULL)
288 		goto drop;
289 
290 	do {
291 		struct net_device *slave = qdisc_dev(q);
292 		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
293 		const struct net_device_ops *slave_ops = slave->netdev_ops;
294 
295 		if (slave_txq->qdisc_sleeping != q)
296 			continue;
297 		if (__netif_subqueue_stopped(slave, subq) ||
298 		    !netif_running(slave)) {
299 			busy = 1;
300 			continue;
301 		}
302 
303 		switch (teql_resolve(skb, skb_res, slave)) {
304 		case 0:
305 			if (__netif_tx_trylock(slave_txq)) {
306 				if (!netif_tx_queue_stopped(slave_txq) &&
307 				    !netif_tx_queue_frozen(slave_txq) &&
308 				    slave_ops->ndo_start_xmit(skb, slave) == 0) {
309 					__netif_tx_unlock(slave_txq);
310 					master->slaves = NEXT_SLAVE(q);
311 					netif_wake_queue(dev);
312 					master->stats.tx_packets++;
313 					master->stats.tx_bytes +=
314 						qdisc_pkt_len(skb);
315 					return 0;
316 				}
317 				__netif_tx_unlock(slave_txq);
318 			}
319 			if (netif_queue_stopped(dev))
320 				busy = 1;
321 			break;
322 		case 1:
323 			master->slaves = NEXT_SLAVE(q);
324 			return 0;
325 		default:
326 			nores = 1;
327 			break;
328 		}
329 		__skb_pull(skb, skb_network_offset(skb));
330 	} while ((q = NEXT_SLAVE(q)) != start);
331 
332 	if (nores && skb_res == NULL) {
333 		skb_res = skb;
334 		goto restart;
335 	}
336 
337 	if (busy) {
338 		netif_stop_queue(dev);
339 		return 1;
340 	}
341 	master->stats.tx_errors++;
342 
343 drop:
344 	master->stats.tx_dropped++;
345 	dev_kfree_skb(skb);
346 	return 0;
347 }
348 
349 static int teql_master_open(struct net_device *dev)
350 {
351 	struct Qdisc * q;
352 	struct teql_master *m = netdev_priv(dev);
353 	int mtu = 0xFFFE;
354 	unsigned flags = IFF_NOARP|IFF_MULTICAST;
355 
356 	if (m->slaves == NULL)
357 		return -EUNATCH;
358 
359 	flags = FMASK;
360 
361 	q = m->slaves;
362 	do {
363 		struct net_device *slave = qdisc_dev(q);
364 
365 		if (slave == NULL)
366 			return -EUNATCH;
367 
368 		if (slave->mtu < mtu)
369 			mtu = slave->mtu;
370 		if (slave->hard_header_len > LL_MAX_HEADER)
371 			return -EINVAL;
372 
373 		/* If all the slaves are BROADCAST, master is BROADCAST
374 		   If all the slaves are PtP, master is PtP
375 		   Otherwise, master is NBMA.
376 		 */
377 		if (!(slave->flags&IFF_POINTOPOINT))
378 			flags &= ~IFF_POINTOPOINT;
379 		if (!(slave->flags&IFF_BROADCAST))
380 			flags &= ~IFF_BROADCAST;
381 		if (!(slave->flags&IFF_MULTICAST))
382 			flags &= ~IFF_MULTICAST;
383 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
384 
385 	m->dev->mtu = mtu;
386 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
387 	netif_start_queue(m->dev);
388 	return 0;
389 }
390 
391 static int teql_master_close(struct net_device *dev)
392 {
393 	netif_stop_queue(dev);
394 	return 0;
395 }
396 
397 static struct net_device_stats *teql_master_stats(struct net_device *dev)
398 {
399 	struct teql_master *m = netdev_priv(dev);
400 	return &m->stats;
401 }
402 
403 static int teql_master_mtu(struct net_device *dev, int new_mtu)
404 {
405 	struct teql_master *m = netdev_priv(dev);
406 	struct Qdisc *q;
407 
408 	if (new_mtu < 68)
409 		return -EINVAL;
410 
411 	q = m->slaves;
412 	if (q) {
413 		do {
414 			if (new_mtu > qdisc_dev(q)->mtu)
415 				return -EINVAL;
416 		} while ((q=NEXT_SLAVE(q)) != m->slaves);
417 	}
418 
419 	dev->mtu = new_mtu;
420 	return 0;
421 }
422 
423 static const struct net_device_ops teql_netdev_ops = {
424 	.ndo_open	= teql_master_open,
425 	.ndo_stop	= teql_master_close,
426 	.ndo_start_xmit	= teql_master_xmit,
427 	.ndo_get_stats	= teql_master_stats,
428 	.ndo_change_mtu	= teql_master_mtu,
429 };
430 
431 static __init void teql_master_setup(struct net_device *dev)
432 {
433 	struct teql_master *master = netdev_priv(dev);
434 	struct Qdisc_ops *ops = &master->qops;
435 
436 	master->dev	= dev;
437 	ops->priv_size  = sizeof(struct teql_sched_data);
438 
439 	ops->enqueue	=	teql_enqueue;
440 	ops->dequeue	=	teql_dequeue;
441 	ops->peek	=	teql_peek;
442 	ops->init	=	teql_qdisc_init;
443 	ops->reset	=	teql_reset;
444 	ops->destroy	=	teql_destroy;
445 	ops->owner	=	THIS_MODULE;
446 
447 	dev->netdev_ops =       &teql_netdev_ops;
448 	dev->type		= ARPHRD_VOID;
449 	dev->mtu		= 1500;
450 	dev->tx_queue_len	= 100;
451 	dev->flags		= IFF_NOARP;
452 	dev->hard_header_len	= LL_MAX_HEADER;
453 }
454 
455 static LIST_HEAD(master_dev_list);
456 static int max_equalizers = 1;
457 module_param(max_equalizers, int, 0);
458 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
459 
460 static int __init teql_init(void)
461 {
462 	int i;
463 	int err = -ENODEV;
464 
465 	for (i = 0; i < max_equalizers; i++) {
466 		struct net_device *dev;
467 		struct teql_master *master;
468 
469 		dev = alloc_netdev(sizeof(struct teql_master),
470 				  "teql%d", teql_master_setup);
471 		if (!dev) {
472 			err = -ENOMEM;
473 			break;
474 		}
475 
476 		if ((err = register_netdev(dev))) {
477 			free_netdev(dev);
478 			break;
479 		}
480 
481 		master = netdev_priv(dev);
482 
483 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
484 		err = register_qdisc(&master->qops);
485 
486 		if (err) {
487 			unregister_netdev(dev);
488 			free_netdev(dev);
489 			break;
490 		}
491 
492 		list_add_tail(&master->master_list, &master_dev_list);
493 	}
494 	return i ? 0 : err;
495 }
496 
497 static void __exit teql_exit(void)
498 {
499 	struct teql_master *master, *nxt;
500 
501 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
502 
503 		list_del(&master->master_list);
504 
505 		unregister_qdisc(&master->qops);
506 		unregister_netdev(master->dev);
507 		free_netdev(master->dev);
508 	}
509 }
510 
511 module_init(teql_init);
512 module_exit(teql_exit);
513 
514 MODULE_LICENSE("GPL");
515