xref: /openbmc/linux/net/sched/sch_teql.c (revision baa7eb025ab14f3cba2e35c0a8648f9c9f01d24f)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25 
26 /*
27    How to setup it.
28    ----------------
29 
30    After loading this module you will find a new device teqlN
31    and new qdisc with the same name. To join a slave to the equalizer
32    you should just set this qdisc on a device f.e.
33 
34    # tc qdisc add dev eth0 root teql0
35    # tc qdisc add dev eth1 root teql0
36 
37    That's all. Full PnP 8)
38 
39    Applicability.
40    --------------
41 
42    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43       signal and generate EOI events. If you want to equalize virtual devices
44       like tunnels, use a normal eql device.
45    2. This device puts no limitations on physical slave characteristics
46       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47       Certainly, large difference in link speeds will make the resulting
48       eqalized link unusable, because of huge packet reordering.
49       I estimate an upper useful difference as ~10 times.
50    3. If the slave requires address resolution, only protocols using
51       neighbour cache (IPv4/IPv6) will work over the equalized link.
52       Other protocols are still allowed to use the slave device directly,
53       which will not break load balancing, though native slave
54       traffic will have the highest priority.  */
55 
56 struct teql_master
57 {
58 	struct Qdisc_ops qops;
59 	struct net_device *dev;
60 	struct Qdisc *slaves;
61 	struct list_head master_list;
62 };
63 
64 struct teql_sched_data
65 {
66 	struct Qdisc *next;
67 	struct teql_master *m;
68 	struct neighbour *ncache;
69 	struct sk_buff_head q;
70 };
71 
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73 
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
75 
76 /* "teql*" qdisc routines */
77 
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81 	struct net_device *dev = qdisc_dev(sch);
82 	struct teql_sched_data *q = qdisc_priv(sch);
83 
84 	if (q->q.qlen < dev->tx_queue_len) {
85 		__skb_queue_tail(&q->q, skb);
86 		sch->bstats.bytes += qdisc_pkt_len(skb);
87 		sch->bstats.packets++;
88 		return NET_XMIT_SUCCESS;
89 	}
90 
91 	kfree_skb(skb);
92 	sch->qstats.drops++;
93 	return NET_XMIT_DROP;
94 }
95 
96 static struct sk_buff *
97 teql_dequeue(struct Qdisc* sch)
98 {
99 	struct teql_sched_data *dat = qdisc_priv(sch);
100 	struct netdev_queue *dat_queue;
101 	struct sk_buff *skb;
102 
103 	skb = __skb_dequeue(&dat->q);
104 	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
105 	if (skb == NULL) {
106 		struct net_device *m = qdisc_dev(dat_queue->qdisc);
107 		if (m) {
108 			dat->m->slaves = sch;
109 			netif_wake_queue(m);
110 		}
111 	}
112 	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
113 	return skb;
114 }
115 
116 static struct sk_buff *
117 teql_peek(struct Qdisc* sch)
118 {
119 	/* teql is meant to be used as root qdisc */
120 	return NULL;
121 }
122 
123 static __inline__ void
124 teql_neigh_release(struct neighbour *n)
125 {
126 	if (n)
127 		neigh_release(n);
128 }
129 
130 static void
131 teql_reset(struct Qdisc* sch)
132 {
133 	struct teql_sched_data *dat = qdisc_priv(sch);
134 
135 	skb_queue_purge(&dat->q);
136 	sch->q.qlen = 0;
137 	teql_neigh_release(xchg(&dat->ncache, NULL));
138 }
139 
140 static void
141 teql_destroy(struct Qdisc* sch)
142 {
143 	struct Qdisc *q, *prev;
144 	struct teql_sched_data *dat = qdisc_priv(sch);
145 	struct teql_master *master = dat->m;
146 
147 	if ((prev = master->slaves) != NULL) {
148 		do {
149 			q = NEXT_SLAVE(prev);
150 			if (q == sch) {
151 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
152 				if (q == master->slaves) {
153 					master->slaves = NEXT_SLAVE(q);
154 					if (q == master->slaves) {
155 						struct netdev_queue *txq;
156 						spinlock_t *root_lock;
157 
158 						txq = netdev_get_tx_queue(master->dev, 0);
159 						master->slaves = NULL;
160 
161 						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
162 						spin_lock_bh(root_lock);
163 						qdisc_reset(txq->qdisc);
164 						spin_unlock_bh(root_lock);
165 					}
166 				}
167 				skb_queue_purge(&dat->q);
168 				teql_neigh_release(xchg(&dat->ncache, NULL));
169 				break;
170 			}
171 
172 		} while ((prev = q) != master->slaves);
173 	}
174 }
175 
176 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
177 {
178 	struct net_device *dev = qdisc_dev(sch);
179 	struct teql_master *m = (struct teql_master*)sch->ops;
180 	struct teql_sched_data *q = qdisc_priv(sch);
181 
182 	if (dev->hard_header_len > m->dev->hard_header_len)
183 		return -EINVAL;
184 
185 	if (m->dev == dev)
186 		return -ELOOP;
187 
188 	q->m = m;
189 
190 	skb_queue_head_init(&q->q);
191 
192 	if (m->slaves) {
193 		if (m->dev->flags & IFF_UP) {
194 			if ((m->dev->flags & IFF_POINTOPOINT &&
195 			     !(dev->flags & IFF_POINTOPOINT)) ||
196 			    (m->dev->flags & IFF_BROADCAST &&
197 			     !(dev->flags & IFF_BROADCAST)) ||
198 			    (m->dev->flags & IFF_MULTICAST &&
199 			     !(dev->flags & IFF_MULTICAST)) ||
200 			    dev->mtu < m->dev->mtu)
201 				return -EINVAL;
202 		} else {
203 			if (!(dev->flags&IFF_POINTOPOINT))
204 				m->dev->flags &= ~IFF_POINTOPOINT;
205 			if (!(dev->flags&IFF_BROADCAST))
206 				m->dev->flags &= ~IFF_BROADCAST;
207 			if (!(dev->flags&IFF_MULTICAST))
208 				m->dev->flags &= ~IFF_MULTICAST;
209 			if (dev->mtu < m->dev->mtu)
210 				m->dev->mtu = dev->mtu;
211 		}
212 		q->next = NEXT_SLAVE(m->slaves);
213 		NEXT_SLAVE(m->slaves) = sch;
214 	} else {
215 		q->next = sch;
216 		m->slaves = sch;
217 		m->dev->mtu = dev->mtu;
218 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
219 	}
220 	return 0;
221 }
222 
223 
224 static int
225 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
226 {
227 	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
228 	struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
229 	struct neighbour *mn = skb_dst(skb)->neighbour;
230 	struct neighbour *n = q->ncache;
231 
232 	if (mn->tbl == NULL)
233 		return -EINVAL;
234 	if (n && n->tbl == mn->tbl &&
235 	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
236 		atomic_inc(&n->refcnt);
237 	} else {
238 		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
239 		if (IS_ERR(n))
240 			return PTR_ERR(n);
241 	}
242 	if (neigh_event_send(n, skb_res) == 0) {
243 		int err;
244 		char haddr[MAX_ADDR_LEN];
245 
246 		neigh_ha_snapshot(haddr, n, dev);
247 		err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
248 				      NULL, skb->len);
249 
250 		if (err < 0) {
251 			neigh_release(n);
252 			return -EINVAL;
253 		}
254 		teql_neigh_release(xchg(&q->ncache, n));
255 		return 0;
256 	}
257 	neigh_release(n);
258 	return (skb_res == NULL) ? -EAGAIN : 1;
259 }
260 
261 static inline int teql_resolve(struct sk_buff *skb,
262 			       struct sk_buff *skb_res, struct net_device *dev)
263 {
264 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
265 	if (txq->qdisc == &noop_qdisc)
266 		return -ENODEV;
267 
268 	if (dev->header_ops == NULL ||
269 	    skb_dst(skb) == NULL ||
270 	    skb_dst(skb)->neighbour == NULL)
271 		return 0;
272 	return __teql_resolve(skb, skb_res, dev);
273 }
274 
275 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
276 {
277 	struct teql_master *master = netdev_priv(dev);
278 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
279 	struct Qdisc *start, *q;
280 	int busy;
281 	int nores;
282 	int subq = skb_get_queue_mapping(skb);
283 	struct sk_buff *skb_res = NULL;
284 
285 	start = master->slaves;
286 
287 restart:
288 	nores = 0;
289 	busy = 0;
290 
291 	if ((q = start) == NULL)
292 		goto drop;
293 
294 	do {
295 		struct net_device *slave = qdisc_dev(q);
296 		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
297 		const struct net_device_ops *slave_ops = slave->netdev_ops;
298 
299 		if (slave_txq->qdisc_sleeping != q)
300 			continue;
301 		if (__netif_subqueue_stopped(slave, subq) ||
302 		    !netif_running(slave)) {
303 			busy = 1;
304 			continue;
305 		}
306 
307 		switch (teql_resolve(skb, skb_res, slave)) {
308 		case 0:
309 			if (__netif_tx_trylock(slave_txq)) {
310 				unsigned int length = qdisc_pkt_len(skb);
311 
312 				if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
313 				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
314 					txq_trans_update(slave_txq);
315 					__netif_tx_unlock(slave_txq);
316 					master->slaves = NEXT_SLAVE(q);
317 					netif_wake_queue(dev);
318 					txq->tx_packets++;
319 					txq->tx_bytes += length;
320 					return NETDEV_TX_OK;
321 				}
322 				__netif_tx_unlock(slave_txq);
323 			}
324 			if (netif_queue_stopped(dev))
325 				busy = 1;
326 			break;
327 		case 1:
328 			master->slaves = NEXT_SLAVE(q);
329 			return NETDEV_TX_OK;
330 		default:
331 			nores = 1;
332 			break;
333 		}
334 		__skb_pull(skb, skb_network_offset(skb));
335 	} while ((q = NEXT_SLAVE(q)) != start);
336 
337 	if (nores && skb_res == NULL) {
338 		skb_res = skb;
339 		goto restart;
340 	}
341 
342 	if (busy) {
343 		netif_stop_queue(dev);
344 		return NETDEV_TX_BUSY;
345 	}
346 	dev->stats.tx_errors++;
347 
348 drop:
349 	txq->tx_dropped++;
350 	dev_kfree_skb(skb);
351 	return NETDEV_TX_OK;
352 }
353 
354 static int teql_master_open(struct net_device *dev)
355 {
356 	struct Qdisc * q;
357 	struct teql_master *m = netdev_priv(dev);
358 	int mtu = 0xFFFE;
359 	unsigned flags = IFF_NOARP|IFF_MULTICAST;
360 
361 	if (m->slaves == NULL)
362 		return -EUNATCH;
363 
364 	flags = FMASK;
365 
366 	q = m->slaves;
367 	do {
368 		struct net_device *slave = qdisc_dev(q);
369 
370 		if (slave == NULL)
371 			return -EUNATCH;
372 
373 		if (slave->mtu < mtu)
374 			mtu = slave->mtu;
375 		if (slave->hard_header_len > LL_MAX_HEADER)
376 			return -EINVAL;
377 
378 		/* If all the slaves are BROADCAST, master is BROADCAST
379 		   If all the slaves are PtP, master is PtP
380 		   Otherwise, master is NBMA.
381 		 */
382 		if (!(slave->flags&IFF_POINTOPOINT))
383 			flags &= ~IFF_POINTOPOINT;
384 		if (!(slave->flags&IFF_BROADCAST))
385 			flags &= ~IFF_BROADCAST;
386 		if (!(slave->flags&IFF_MULTICAST))
387 			flags &= ~IFF_MULTICAST;
388 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
389 
390 	m->dev->mtu = mtu;
391 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
392 	netif_start_queue(m->dev);
393 	return 0;
394 }
395 
396 static int teql_master_close(struct net_device *dev)
397 {
398 	netif_stop_queue(dev);
399 	return 0;
400 }
401 
402 static int teql_master_mtu(struct net_device *dev, int new_mtu)
403 {
404 	struct teql_master *m = netdev_priv(dev);
405 	struct Qdisc *q;
406 
407 	if (new_mtu < 68)
408 		return -EINVAL;
409 
410 	q = m->slaves;
411 	if (q) {
412 		do {
413 			if (new_mtu > qdisc_dev(q)->mtu)
414 				return -EINVAL;
415 		} while ((q=NEXT_SLAVE(q)) != m->slaves);
416 	}
417 
418 	dev->mtu = new_mtu;
419 	return 0;
420 }
421 
422 static const struct net_device_ops teql_netdev_ops = {
423 	.ndo_open	= teql_master_open,
424 	.ndo_stop	= teql_master_close,
425 	.ndo_start_xmit	= teql_master_xmit,
426 	.ndo_change_mtu	= teql_master_mtu,
427 };
428 
429 static __init void teql_master_setup(struct net_device *dev)
430 {
431 	struct teql_master *master = netdev_priv(dev);
432 	struct Qdisc_ops *ops = &master->qops;
433 
434 	master->dev	= dev;
435 	ops->priv_size  = sizeof(struct teql_sched_data);
436 
437 	ops->enqueue	=	teql_enqueue;
438 	ops->dequeue	=	teql_dequeue;
439 	ops->peek	=	teql_peek;
440 	ops->init	=	teql_qdisc_init;
441 	ops->reset	=	teql_reset;
442 	ops->destroy	=	teql_destroy;
443 	ops->owner	=	THIS_MODULE;
444 
445 	dev->netdev_ops =       &teql_netdev_ops;
446 	dev->type		= ARPHRD_VOID;
447 	dev->mtu		= 1500;
448 	dev->tx_queue_len	= 100;
449 	dev->flags		= IFF_NOARP;
450 	dev->hard_header_len	= LL_MAX_HEADER;
451 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
452 }
453 
454 static LIST_HEAD(master_dev_list);
455 static int max_equalizers = 1;
456 module_param(max_equalizers, int, 0);
457 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
458 
459 static int __init teql_init(void)
460 {
461 	int i;
462 	int err = -ENODEV;
463 
464 	for (i = 0; i < max_equalizers; i++) {
465 		struct net_device *dev;
466 		struct teql_master *master;
467 
468 		dev = alloc_netdev(sizeof(struct teql_master),
469 				  "teql%d", teql_master_setup);
470 		if (!dev) {
471 			err = -ENOMEM;
472 			break;
473 		}
474 
475 		if ((err = register_netdev(dev))) {
476 			free_netdev(dev);
477 			break;
478 		}
479 
480 		master = netdev_priv(dev);
481 
482 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
483 		err = register_qdisc(&master->qops);
484 
485 		if (err) {
486 			unregister_netdev(dev);
487 			free_netdev(dev);
488 			break;
489 		}
490 
491 		list_add_tail(&master->master_list, &master_dev_list);
492 	}
493 	return i ? 0 : err;
494 }
495 
496 static void __exit teql_exit(void)
497 {
498 	struct teql_master *master, *nxt;
499 
500 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
501 
502 		list_del(&master->master_list);
503 
504 		unregister_qdisc(&master->qops);
505 		unregister_netdev(master->dev);
506 		free_netdev(master->dev);
507 	}
508 }
509 
510 module_init(teql_init);
511 module_exit(teql_exit);
512 
513 MODULE_LICENSE("GPL");
514