xref: /openbmc/linux/net/sched/sch_teql.c (revision fd589a8f)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/string.h>
15 #include <linux/errno.h>
16 #include <linux/if_arp.h>
17 #include <linux/netdevice.h>
18 #include <linux/init.h>
19 #include <linux/skbuff.h>
20 #include <linux/moduleparam.h>
21 #include <net/dst.h>
22 #include <net/neighbour.h>
23 #include <net/pkt_sched.h>
24 
25 /*
26    How to setup it.
27    ----------------
28 
29    After loading this module you will find a new device teqlN
30    and new qdisc with the same name. To join a slave to the equalizer
31    you should just set this qdisc on a device f.e.
32 
33    # tc qdisc add dev eth0 root teql0
34    # tc qdisc add dev eth1 root teql0
35 
36    That's all. Full PnP 8)
37 
38    Applicability.
39    --------------
40 
41    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42       signal and generate EOI events. If you want to equalize virtual devices
43       like tunnels, use a normal eql device.
44    2. This device puts no limitations on physical slave characteristics
45       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46       Certainly, large difference in link speeds will make the resulting
47       eqalized link unusable, because of huge packet reordering.
48       I estimate an upper useful difference as ~10 times.
49    3. If the slave requires address resolution, only protocols using
50       neighbour cache (IPv4/IPv6) will work over the equalized link.
51       Other protocols are still allowed to use the slave device directly,
52       which will not break load balancing, though native slave
53       traffic will have the highest priority.  */
54 
55 struct teql_master
56 {
57 	struct Qdisc_ops qops;
58 	struct net_device *dev;
59 	struct Qdisc *slaves;
60 	struct list_head master_list;
61 };
62 
63 struct teql_sched_data
64 {
65 	struct Qdisc *next;
66 	struct teql_master *m;
67 	struct neighbour *ncache;
68 	struct sk_buff_head q;
69 };
70 
71 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
72 
73 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
74 
75 /* "teql*" qdisc routines */
76 
77 static int
78 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
79 {
80 	struct net_device *dev = qdisc_dev(sch);
81 	struct teql_sched_data *q = qdisc_priv(sch);
82 
83 	if (q->q.qlen < dev->tx_queue_len) {
84 		__skb_queue_tail(&q->q, skb);
85 		sch->bstats.bytes += qdisc_pkt_len(skb);
86 		sch->bstats.packets++;
87 		return 0;
88 	}
89 
90 	kfree_skb(skb);
91 	sch->qstats.drops++;
92 	return NET_XMIT_DROP;
93 }
94 
95 static struct sk_buff *
96 teql_dequeue(struct Qdisc* sch)
97 {
98 	struct teql_sched_data *dat = qdisc_priv(sch);
99 	struct netdev_queue *dat_queue;
100 	struct sk_buff *skb;
101 
102 	skb = __skb_dequeue(&dat->q);
103 	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
104 	if (skb == NULL) {
105 		struct net_device *m = qdisc_dev(dat_queue->qdisc);
106 		if (m) {
107 			dat->m->slaves = sch;
108 			netif_wake_queue(m);
109 		}
110 	}
111 	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
112 	return skb;
113 }
114 
115 static struct sk_buff *
116 teql_peek(struct Qdisc* sch)
117 {
118 	/* teql is meant to be used as root qdisc */
119 	return NULL;
120 }
121 
122 static __inline__ void
123 teql_neigh_release(struct neighbour *n)
124 {
125 	if (n)
126 		neigh_release(n);
127 }
128 
129 static void
130 teql_reset(struct Qdisc* sch)
131 {
132 	struct teql_sched_data *dat = qdisc_priv(sch);
133 
134 	skb_queue_purge(&dat->q);
135 	sch->q.qlen = 0;
136 	teql_neigh_release(xchg(&dat->ncache, NULL));
137 }
138 
139 static void
140 teql_destroy(struct Qdisc* sch)
141 {
142 	struct Qdisc *q, *prev;
143 	struct teql_sched_data *dat = qdisc_priv(sch);
144 	struct teql_master *master = dat->m;
145 
146 	if ((prev = master->slaves) != NULL) {
147 		do {
148 			q = NEXT_SLAVE(prev);
149 			if (q == sch) {
150 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
151 				if (q == master->slaves) {
152 					master->slaves = NEXT_SLAVE(q);
153 					if (q == master->slaves) {
154 						struct netdev_queue *txq;
155 						spinlock_t *root_lock;
156 
157 						txq = netdev_get_tx_queue(master->dev, 0);
158 						master->slaves = NULL;
159 
160 						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
161 						spin_lock_bh(root_lock);
162 						qdisc_reset(txq->qdisc);
163 						spin_unlock_bh(root_lock);
164 					}
165 				}
166 				skb_queue_purge(&dat->q);
167 				teql_neigh_release(xchg(&dat->ncache, NULL));
168 				break;
169 			}
170 
171 		} while ((prev = q) != master->slaves);
172 	}
173 }
174 
175 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
176 {
177 	struct net_device *dev = qdisc_dev(sch);
178 	struct teql_master *m = (struct teql_master*)sch->ops;
179 	struct teql_sched_data *q = qdisc_priv(sch);
180 
181 	if (dev->hard_header_len > m->dev->hard_header_len)
182 		return -EINVAL;
183 
184 	if (m->dev == dev)
185 		return -ELOOP;
186 
187 	q->m = m;
188 
189 	skb_queue_head_init(&q->q);
190 
191 	if (m->slaves) {
192 		if (m->dev->flags & IFF_UP) {
193 			if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
194 			    || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
195 			    || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
196 			    || dev->mtu < m->dev->mtu)
197 				return -EINVAL;
198 		} else {
199 			if (!(dev->flags&IFF_POINTOPOINT))
200 				m->dev->flags &= ~IFF_POINTOPOINT;
201 			if (!(dev->flags&IFF_BROADCAST))
202 				m->dev->flags &= ~IFF_BROADCAST;
203 			if (!(dev->flags&IFF_MULTICAST))
204 				m->dev->flags &= ~IFF_MULTICAST;
205 			if (dev->mtu < m->dev->mtu)
206 				m->dev->mtu = dev->mtu;
207 		}
208 		q->next = NEXT_SLAVE(m->slaves);
209 		NEXT_SLAVE(m->slaves) = sch;
210 	} else {
211 		q->next = sch;
212 		m->slaves = sch;
213 		m->dev->mtu = dev->mtu;
214 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
215 	}
216 	return 0;
217 }
218 
219 
220 static int
221 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
222 {
223 	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
224 	struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
225 	struct neighbour *mn = skb_dst(skb)->neighbour;
226 	struct neighbour *n = q->ncache;
227 
228 	if (mn->tbl == NULL)
229 		return -EINVAL;
230 	if (n && n->tbl == mn->tbl &&
231 	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
232 		atomic_inc(&n->refcnt);
233 	} else {
234 		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
235 		if (IS_ERR(n))
236 			return PTR_ERR(n);
237 	}
238 	if (neigh_event_send(n, skb_res) == 0) {
239 		int err;
240 
241 		read_lock(&n->lock);
242 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
243 				      n->ha, NULL, skb->len);
244 		read_unlock(&n->lock);
245 
246 		if (err < 0) {
247 			neigh_release(n);
248 			return -EINVAL;
249 		}
250 		teql_neigh_release(xchg(&q->ncache, n));
251 		return 0;
252 	}
253 	neigh_release(n);
254 	return (skb_res == NULL) ? -EAGAIN : 1;
255 }
256 
257 static inline int teql_resolve(struct sk_buff *skb,
258 			       struct sk_buff *skb_res, struct net_device *dev)
259 {
260 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
261 	if (txq->qdisc == &noop_qdisc)
262 		return -ENODEV;
263 
264 	if (dev->header_ops == NULL ||
265 	    skb_dst(skb) == NULL ||
266 	    skb_dst(skb)->neighbour == NULL)
267 		return 0;
268 	return __teql_resolve(skb, skb_res, dev);
269 }
270 
271 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
272 {
273 	struct teql_master *master = netdev_priv(dev);
274 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
275 	struct Qdisc *start, *q;
276 	int busy;
277 	int nores;
278 	int subq = skb_get_queue_mapping(skb);
279 	struct sk_buff *skb_res = NULL;
280 
281 	start = master->slaves;
282 
283 restart:
284 	nores = 0;
285 	busy = 0;
286 
287 	if ((q = start) == NULL)
288 		goto drop;
289 
290 	do {
291 		struct net_device *slave = qdisc_dev(q);
292 		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
293 		const struct net_device_ops *slave_ops = slave->netdev_ops;
294 
295 		if (slave_txq->qdisc_sleeping != q)
296 			continue;
297 		if (__netif_subqueue_stopped(slave, subq) ||
298 		    !netif_running(slave)) {
299 			busy = 1;
300 			continue;
301 		}
302 
303 		switch (teql_resolve(skb, skb_res, slave)) {
304 		case 0:
305 			if (__netif_tx_trylock(slave_txq)) {
306 				unsigned int length = qdisc_pkt_len(skb);
307 
308 				if (!netif_tx_queue_stopped(slave_txq) &&
309 				    !netif_tx_queue_frozen(slave_txq) &&
310 				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
311 					txq_trans_update(slave_txq);
312 					__netif_tx_unlock(slave_txq);
313 					master->slaves = NEXT_SLAVE(q);
314 					netif_wake_queue(dev);
315 					txq->tx_packets++;
316 					txq->tx_bytes += length;
317 					return NETDEV_TX_OK;
318 				}
319 				__netif_tx_unlock(slave_txq);
320 			}
321 			if (netif_queue_stopped(dev))
322 				busy = 1;
323 			break;
324 		case 1:
325 			master->slaves = NEXT_SLAVE(q);
326 			return NETDEV_TX_OK;
327 		default:
328 			nores = 1;
329 			break;
330 		}
331 		__skb_pull(skb, skb_network_offset(skb));
332 	} while ((q = NEXT_SLAVE(q)) != start);
333 
334 	if (nores && skb_res == NULL) {
335 		skb_res = skb;
336 		goto restart;
337 	}
338 
339 	if (busy) {
340 		netif_stop_queue(dev);
341 		return NETDEV_TX_BUSY;
342 	}
343 	dev->stats.tx_errors++;
344 
345 drop:
346 	txq->tx_dropped++;
347 	dev_kfree_skb(skb);
348 	return NETDEV_TX_OK;
349 }
350 
351 static int teql_master_open(struct net_device *dev)
352 {
353 	struct Qdisc * q;
354 	struct teql_master *m = netdev_priv(dev);
355 	int mtu = 0xFFFE;
356 	unsigned flags = IFF_NOARP|IFF_MULTICAST;
357 
358 	if (m->slaves == NULL)
359 		return -EUNATCH;
360 
361 	flags = FMASK;
362 
363 	q = m->slaves;
364 	do {
365 		struct net_device *slave = qdisc_dev(q);
366 
367 		if (slave == NULL)
368 			return -EUNATCH;
369 
370 		if (slave->mtu < mtu)
371 			mtu = slave->mtu;
372 		if (slave->hard_header_len > LL_MAX_HEADER)
373 			return -EINVAL;
374 
375 		/* If all the slaves are BROADCAST, master is BROADCAST
376 		   If all the slaves are PtP, master is PtP
377 		   Otherwise, master is NBMA.
378 		 */
379 		if (!(slave->flags&IFF_POINTOPOINT))
380 			flags &= ~IFF_POINTOPOINT;
381 		if (!(slave->flags&IFF_BROADCAST))
382 			flags &= ~IFF_BROADCAST;
383 		if (!(slave->flags&IFF_MULTICAST))
384 			flags &= ~IFF_MULTICAST;
385 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
386 
387 	m->dev->mtu = mtu;
388 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
389 	netif_start_queue(m->dev);
390 	return 0;
391 }
392 
393 static int teql_master_close(struct net_device *dev)
394 {
395 	netif_stop_queue(dev);
396 	return 0;
397 }
398 
399 static int teql_master_mtu(struct net_device *dev, int new_mtu)
400 {
401 	struct teql_master *m = netdev_priv(dev);
402 	struct Qdisc *q;
403 
404 	if (new_mtu < 68)
405 		return -EINVAL;
406 
407 	q = m->slaves;
408 	if (q) {
409 		do {
410 			if (new_mtu > qdisc_dev(q)->mtu)
411 				return -EINVAL;
412 		} while ((q=NEXT_SLAVE(q)) != m->slaves);
413 	}
414 
415 	dev->mtu = new_mtu;
416 	return 0;
417 }
418 
419 static const struct net_device_ops teql_netdev_ops = {
420 	.ndo_open	= teql_master_open,
421 	.ndo_stop	= teql_master_close,
422 	.ndo_start_xmit	= teql_master_xmit,
423 	.ndo_change_mtu	= teql_master_mtu,
424 };
425 
426 static __init void teql_master_setup(struct net_device *dev)
427 {
428 	struct teql_master *master = netdev_priv(dev);
429 	struct Qdisc_ops *ops = &master->qops;
430 
431 	master->dev	= dev;
432 	ops->priv_size  = sizeof(struct teql_sched_data);
433 
434 	ops->enqueue	=	teql_enqueue;
435 	ops->dequeue	=	teql_dequeue;
436 	ops->peek	=	teql_peek;
437 	ops->init	=	teql_qdisc_init;
438 	ops->reset	=	teql_reset;
439 	ops->destroy	=	teql_destroy;
440 	ops->owner	=	THIS_MODULE;
441 
442 	dev->netdev_ops =       &teql_netdev_ops;
443 	dev->type		= ARPHRD_VOID;
444 	dev->mtu		= 1500;
445 	dev->tx_queue_len	= 100;
446 	dev->flags		= IFF_NOARP;
447 	dev->hard_header_len	= LL_MAX_HEADER;
448 }
449 
450 static LIST_HEAD(master_dev_list);
451 static int max_equalizers = 1;
452 module_param(max_equalizers, int, 0);
453 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
454 
455 static int __init teql_init(void)
456 {
457 	int i;
458 	int err = -ENODEV;
459 
460 	for (i = 0; i < max_equalizers; i++) {
461 		struct net_device *dev;
462 		struct teql_master *master;
463 
464 		dev = alloc_netdev(sizeof(struct teql_master),
465 				  "teql%d", teql_master_setup);
466 		if (!dev) {
467 			err = -ENOMEM;
468 			break;
469 		}
470 
471 		if ((err = register_netdev(dev))) {
472 			free_netdev(dev);
473 			break;
474 		}
475 
476 		master = netdev_priv(dev);
477 
478 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
479 		err = register_qdisc(&master->qops);
480 
481 		if (err) {
482 			unregister_netdev(dev);
483 			free_netdev(dev);
484 			break;
485 		}
486 
487 		list_add_tail(&master->master_list, &master_dev_list);
488 	}
489 	return i ? 0 : err;
490 }
491 
492 static void __exit teql_exit(void)
493 {
494 	struct teql_master *master, *nxt;
495 
496 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
497 
498 		list_del(&master->master_list);
499 
500 		unregister_qdisc(&master->qops);
501 		unregister_netdev(master->dev);
502 		free_netdev(master->dev);
503 	}
504 }
505 
506 module_init(teql_init);
507 module_exit(teql_exit);
508 
509 MODULE_LICENSE("GPL");
510