xref: /openbmc/linux/net/sched/sch_teql.c (revision 49531192)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25 
26 /*
27    How to setup it.
28    ----------------
29 
30    After loading this module you will find a new device teqlN
31    and new qdisc with the same name. To join a slave to the equalizer
32    you should just set this qdisc on a device f.e.
33 
34    # tc qdisc add dev eth0 root teql0
35    # tc qdisc add dev eth1 root teql0
36 
37    That's all. Full PnP 8)
38 
39    Applicability.
40    --------------
41 
42    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43       signal and generate EOI events. If you want to equalize virtual devices
44       like tunnels, use a normal eql device.
45    2. This device puts no limitations on physical slave characteristics
46       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47       Certainly, large difference in link speeds will make the resulting
48       eqalized link unusable, because of huge packet reordering.
49       I estimate an upper useful difference as ~10 times.
50    3. If the slave requires address resolution, only protocols using
51       neighbour cache (IPv4/IPv6) will work over the equalized link.
52       Other protocols are still allowed to use the slave device directly,
53       which will not break load balancing, though native slave
54       traffic will have the highest priority.  */
55 
56 struct teql_master
57 {
58 	struct Qdisc_ops qops;
59 	struct net_device *dev;
60 	struct Qdisc *slaves;
61 	struct list_head master_list;
62 };
63 
64 struct teql_sched_data
65 {
66 	struct Qdisc *next;
67 	struct teql_master *m;
68 	struct neighbour *ncache;
69 	struct sk_buff_head q;
70 };
71 
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73 
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
75 
76 /* "teql*" qdisc routines */
77 
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81 	struct net_device *dev = qdisc_dev(sch);
82 	struct teql_sched_data *q = qdisc_priv(sch);
83 
84 	if (q->q.qlen < dev->tx_queue_len) {
85 		__skb_queue_tail(&q->q, skb);
86 		sch->bstats.bytes += qdisc_pkt_len(skb);
87 		sch->bstats.packets++;
88 		return 0;
89 	}
90 
91 	kfree_skb(skb);
92 	sch->qstats.drops++;
93 	return NET_XMIT_DROP;
94 }
95 
96 static struct sk_buff *
97 teql_dequeue(struct Qdisc* sch)
98 {
99 	struct teql_sched_data *dat = qdisc_priv(sch);
100 	struct netdev_queue *dat_queue;
101 	struct sk_buff *skb;
102 
103 	skb = __skb_dequeue(&dat->q);
104 	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
105 	if (skb == NULL) {
106 		struct net_device *m = qdisc_dev(dat_queue->qdisc);
107 		if (m) {
108 			dat->m->slaves = sch;
109 			netif_wake_queue(m);
110 		}
111 	}
112 	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
113 	return skb;
114 }
115 
116 static struct sk_buff *
117 teql_peek(struct Qdisc* sch)
118 {
119 	/* teql is meant to be used as root qdisc */
120 	return NULL;
121 }
122 
123 static __inline__ void
124 teql_neigh_release(struct neighbour *n)
125 {
126 	if (n)
127 		neigh_release(n);
128 }
129 
130 static void
131 teql_reset(struct Qdisc* sch)
132 {
133 	struct teql_sched_data *dat = qdisc_priv(sch);
134 
135 	skb_queue_purge(&dat->q);
136 	sch->q.qlen = 0;
137 	teql_neigh_release(xchg(&dat->ncache, NULL));
138 }
139 
140 static void
141 teql_destroy(struct Qdisc* sch)
142 {
143 	struct Qdisc *q, *prev;
144 	struct teql_sched_data *dat = qdisc_priv(sch);
145 	struct teql_master *master = dat->m;
146 
147 	if ((prev = master->slaves) != NULL) {
148 		do {
149 			q = NEXT_SLAVE(prev);
150 			if (q == sch) {
151 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
152 				if (q == master->slaves) {
153 					master->slaves = NEXT_SLAVE(q);
154 					if (q == master->slaves) {
155 						struct netdev_queue *txq;
156 						spinlock_t *root_lock;
157 
158 						txq = netdev_get_tx_queue(master->dev, 0);
159 						master->slaves = NULL;
160 
161 						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
162 						spin_lock_bh(root_lock);
163 						qdisc_reset(txq->qdisc);
164 						spin_unlock_bh(root_lock);
165 					}
166 				}
167 				skb_queue_purge(&dat->q);
168 				teql_neigh_release(xchg(&dat->ncache, NULL));
169 				break;
170 			}
171 
172 		} while ((prev = q) != master->slaves);
173 	}
174 }
175 
176 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
177 {
178 	struct net_device *dev = qdisc_dev(sch);
179 	struct teql_master *m = (struct teql_master*)sch->ops;
180 	struct teql_sched_data *q = qdisc_priv(sch);
181 
182 	if (dev->hard_header_len > m->dev->hard_header_len)
183 		return -EINVAL;
184 
185 	if (m->dev == dev)
186 		return -ELOOP;
187 
188 	q->m = m;
189 
190 	skb_queue_head_init(&q->q);
191 
192 	if (m->slaves) {
193 		if (m->dev->flags & IFF_UP) {
194 			if ((m->dev->flags & IFF_POINTOPOINT &&
195 			     !(dev->flags & IFF_POINTOPOINT)) ||
196 			    (m->dev->flags & IFF_BROADCAST &&
197 			     !(dev->flags & IFF_BROADCAST)) ||
198 			    (m->dev->flags & IFF_MULTICAST &&
199 			     !(dev->flags & IFF_MULTICAST)) ||
200 			    dev->mtu < m->dev->mtu)
201 				return -EINVAL;
202 		} else {
203 			if (!(dev->flags&IFF_POINTOPOINT))
204 				m->dev->flags &= ~IFF_POINTOPOINT;
205 			if (!(dev->flags&IFF_BROADCAST))
206 				m->dev->flags &= ~IFF_BROADCAST;
207 			if (!(dev->flags&IFF_MULTICAST))
208 				m->dev->flags &= ~IFF_MULTICAST;
209 			if (dev->mtu < m->dev->mtu)
210 				m->dev->mtu = dev->mtu;
211 		}
212 		q->next = NEXT_SLAVE(m->slaves);
213 		NEXT_SLAVE(m->slaves) = sch;
214 	} else {
215 		q->next = sch;
216 		m->slaves = sch;
217 		m->dev->mtu = dev->mtu;
218 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
219 	}
220 	return 0;
221 }
222 
223 
224 static int
225 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
226 {
227 	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
228 	struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
229 	struct neighbour *mn = skb_dst(skb)->neighbour;
230 	struct neighbour *n = q->ncache;
231 
232 	if (mn->tbl == NULL)
233 		return -EINVAL;
234 	if (n && n->tbl == mn->tbl &&
235 	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
236 		atomic_inc(&n->refcnt);
237 	} else {
238 		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
239 		if (IS_ERR(n))
240 			return PTR_ERR(n);
241 	}
242 	if (neigh_event_send(n, skb_res) == 0) {
243 		int err;
244 
245 		read_lock(&n->lock);
246 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
247 				      n->ha, NULL, skb->len);
248 		read_unlock(&n->lock);
249 
250 		if (err < 0) {
251 			neigh_release(n);
252 			return -EINVAL;
253 		}
254 		teql_neigh_release(xchg(&q->ncache, n));
255 		return 0;
256 	}
257 	neigh_release(n);
258 	return (skb_res == NULL) ? -EAGAIN : 1;
259 }
260 
261 static inline int teql_resolve(struct sk_buff *skb,
262 			       struct sk_buff *skb_res, struct net_device *dev)
263 {
264 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
265 	if (txq->qdisc == &noop_qdisc)
266 		return -ENODEV;
267 
268 	if (dev->header_ops == NULL ||
269 	    skb_dst(skb) == NULL ||
270 	    skb_dst(skb)->neighbour == NULL)
271 		return 0;
272 	return __teql_resolve(skb, skb_res, dev);
273 }
274 
275 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
276 {
277 	struct teql_master *master = netdev_priv(dev);
278 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
279 	struct Qdisc *start, *q;
280 	int busy;
281 	int nores;
282 	int subq = skb_get_queue_mapping(skb);
283 	struct sk_buff *skb_res = NULL;
284 
285 	start = master->slaves;
286 
287 restart:
288 	nores = 0;
289 	busy = 0;
290 
291 	if ((q = start) == NULL)
292 		goto drop;
293 
294 	do {
295 		struct net_device *slave = qdisc_dev(q);
296 		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
297 		const struct net_device_ops *slave_ops = slave->netdev_ops;
298 
299 		if (slave_txq->qdisc_sleeping != q)
300 			continue;
301 		if (__netif_subqueue_stopped(slave, subq) ||
302 		    !netif_running(slave)) {
303 			busy = 1;
304 			continue;
305 		}
306 
307 		switch (teql_resolve(skb, skb_res, slave)) {
308 		case 0:
309 			if (__netif_tx_trylock(slave_txq)) {
310 				unsigned int length = qdisc_pkt_len(skb);
311 
312 				if (!netif_tx_queue_stopped(slave_txq) &&
313 				    !netif_tx_queue_frozen(slave_txq) &&
314 				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
315 					txq_trans_update(slave_txq);
316 					__netif_tx_unlock(slave_txq);
317 					master->slaves = NEXT_SLAVE(q);
318 					netif_wake_queue(dev);
319 					txq->tx_packets++;
320 					txq->tx_bytes += length;
321 					return NETDEV_TX_OK;
322 				}
323 				__netif_tx_unlock(slave_txq);
324 			}
325 			if (netif_queue_stopped(dev))
326 				busy = 1;
327 			break;
328 		case 1:
329 			master->slaves = NEXT_SLAVE(q);
330 			return NETDEV_TX_OK;
331 		default:
332 			nores = 1;
333 			break;
334 		}
335 		__skb_pull(skb, skb_network_offset(skb));
336 	} while ((q = NEXT_SLAVE(q)) != start);
337 
338 	if (nores && skb_res == NULL) {
339 		skb_res = skb;
340 		goto restart;
341 	}
342 
343 	if (busy) {
344 		netif_stop_queue(dev);
345 		return NETDEV_TX_BUSY;
346 	}
347 	dev->stats.tx_errors++;
348 
349 drop:
350 	txq->tx_dropped++;
351 	dev_kfree_skb(skb);
352 	return NETDEV_TX_OK;
353 }
354 
355 static int teql_master_open(struct net_device *dev)
356 {
357 	struct Qdisc * q;
358 	struct teql_master *m = netdev_priv(dev);
359 	int mtu = 0xFFFE;
360 	unsigned flags = IFF_NOARP|IFF_MULTICAST;
361 
362 	if (m->slaves == NULL)
363 		return -EUNATCH;
364 
365 	flags = FMASK;
366 
367 	q = m->slaves;
368 	do {
369 		struct net_device *slave = qdisc_dev(q);
370 
371 		if (slave == NULL)
372 			return -EUNATCH;
373 
374 		if (slave->mtu < mtu)
375 			mtu = slave->mtu;
376 		if (slave->hard_header_len > LL_MAX_HEADER)
377 			return -EINVAL;
378 
379 		/* If all the slaves are BROADCAST, master is BROADCAST
380 		   If all the slaves are PtP, master is PtP
381 		   Otherwise, master is NBMA.
382 		 */
383 		if (!(slave->flags&IFF_POINTOPOINT))
384 			flags &= ~IFF_POINTOPOINT;
385 		if (!(slave->flags&IFF_BROADCAST))
386 			flags &= ~IFF_BROADCAST;
387 		if (!(slave->flags&IFF_MULTICAST))
388 			flags &= ~IFF_MULTICAST;
389 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
390 
391 	m->dev->mtu = mtu;
392 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
393 	netif_start_queue(m->dev);
394 	return 0;
395 }
396 
397 static int teql_master_close(struct net_device *dev)
398 {
399 	netif_stop_queue(dev);
400 	return 0;
401 }
402 
403 static int teql_master_mtu(struct net_device *dev, int new_mtu)
404 {
405 	struct teql_master *m = netdev_priv(dev);
406 	struct Qdisc *q;
407 
408 	if (new_mtu < 68)
409 		return -EINVAL;
410 
411 	q = m->slaves;
412 	if (q) {
413 		do {
414 			if (new_mtu > qdisc_dev(q)->mtu)
415 				return -EINVAL;
416 		} while ((q=NEXT_SLAVE(q)) != m->slaves);
417 	}
418 
419 	dev->mtu = new_mtu;
420 	return 0;
421 }
422 
423 static const struct net_device_ops teql_netdev_ops = {
424 	.ndo_open	= teql_master_open,
425 	.ndo_stop	= teql_master_close,
426 	.ndo_start_xmit	= teql_master_xmit,
427 	.ndo_change_mtu	= teql_master_mtu,
428 };
429 
430 static __init void teql_master_setup(struct net_device *dev)
431 {
432 	struct teql_master *master = netdev_priv(dev);
433 	struct Qdisc_ops *ops = &master->qops;
434 
435 	master->dev	= dev;
436 	ops->priv_size  = sizeof(struct teql_sched_data);
437 
438 	ops->enqueue	=	teql_enqueue;
439 	ops->dequeue	=	teql_dequeue;
440 	ops->peek	=	teql_peek;
441 	ops->init	=	teql_qdisc_init;
442 	ops->reset	=	teql_reset;
443 	ops->destroy	=	teql_destroy;
444 	ops->owner	=	THIS_MODULE;
445 
446 	dev->netdev_ops =       &teql_netdev_ops;
447 	dev->type		= ARPHRD_VOID;
448 	dev->mtu		= 1500;
449 	dev->tx_queue_len	= 100;
450 	dev->flags		= IFF_NOARP;
451 	dev->hard_header_len	= LL_MAX_HEADER;
452 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
453 }
454 
455 static LIST_HEAD(master_dev_list);
456 static int max_equalizers = 1;
457 module_param(max_equalizers, int, 0);
458 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
459 
460 static int __init teql_init(void)
461 {
462 	int i;
463 	int err = -ENODEV;
464 
465 	for (i = 0; i < max_equalizers; i++) {
466 		struct net_device *dev;
467 		struct teql_master *master;
468 
469 		dev = alloc_netdev(sizeof(struct teql_master),
470 				  "teql%d", teql_master_setup);
471 		if (!dev) {
472 			err = -ENOMEM;
473 			break;
474 		}
475 
476 		if ((err = register_netdev(dev))) {
477 			free_netdev(dev);
478 			break;
479 		}
480 
481 		master = netdev_priv(dev);
482 
483 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
484 		err = register_qdisc(&master->qops);
485 
486 		if (err) {
487 			unregister_netdev(dev);
488 			free_netdev(dev);
489 			break;
490 		}
491 
492 		list_add_tail(&master->master_list, &master_dev_list);
493 	}
494 	return i ? 0 : err;
495 }
496 
497 static void __exit teql_exit(void)
498 {
499 	struct teql_master *master, *nxt;
500 
501 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
502 
503 		list_del(&master->master_list);
504 
505 		unregister_qdisc(&master->qops);
506 		unregister_netdev(master->dev);
507 		free_netdev(master->dev);
508 	}
509 }
510 
511 module_init(teql_init);
512 module_exit(teql_exit);
513 
514 MODULE_LICENSE("GPL");
515