xref: /openbmc/linux/net/ipv4/udp_tunnel_nic.c (revision b8265621)
1 // SPDX-License-Identifier: GPL-2.0-only
2 // Copyright (c) 2020 Facebook Inc.
3 
4 #include <linux/ethtool_netlink.h>
5 #include <linux/netdevice.h>
6 #include <linux/slab.h>
7 #include <linux/types.h>
8 #include <linux/workqueue.h>
9 #include <net/udp_tunnel.h>
10 
11 enum udp_tunnel_nic_table_entry_flags {
12 	UDP_TUNNEL_NIC_ENTRY_ADD	= BIT(0),
13 	UDP_TUNNEL_NIC_ENTRY_DEL	= BIT(1),
14 	UDP_TUNNEL_NIC_ENTRY_OP_FAIL	= BIT(2),
15 	UDP_TUNNEL_NIC_ENTRY_FROZEN	= BIT(3),
16 };
17 
18 struct udp_tunnel_nic_table_entry {
19 	__be16 port;
20 	u8 type;
21 	u8 use_cnt;
22 	u8 flags;
23 	u8 hw_priv;
24 };
25 
26 /**
27  * struct udp_tunnel_nic - UDP tunnel port offload state
28  * @work:	async work for talking to hardware from process context
29  * @dev:	netdev pointer
30  * @need_sync:	at least one port start changed
31  * @need_replay: space was freed, we need a replay of all ports
32  * @work_pending: @work is currently scheduled
33  * @n_tables:	number of tables under @entries
34  * @missed:	bitmap of tables which overflown
35  * @entries:	table of tables of ports currently offloaded
36  */
37 struct udp_tunnel_nic {
38 	struct work_struct work;
39 
40 	struct net_device *dev;
41 
42 	u8 need_sync:1;
43 	u8 need_replay:1;
44 	u8 work_pending:1;
45 
46 	unsigned int n_tables;
47 	unsigned long missed;
48 	struct udp_tunnel_nic_table_entry **entries;
49 };
50 
51 /* We ensure all work structs are done using driver state, but not the code.
52  * We need a workqueue we can flush before module gets removed.
53  */
54 static struct workqueue_struct *udp_tunnel_nic_workqueue;
55 
56 static const char *udp_tunnel_nic_tunnel_type_name(unsigned int type)
57 {
58 	switch (type) {
59 	case UDP_TUNNEL_TYPE_VXLAN:
60 		return "vxlan";
61 	case UDP_TUNNEL_TYPE_GENEVE:
62 		return "geneve";
63 	case UDP_TUNNEL_TYPE_VXLAN_GPE:
64 		return "vxlan-gpe";
65 	default:
66 		return "unknown";
67 	}
68 }
69 
70 static bool
71 udp_tunnel_nic_entry_is_free(struct udp_tunnel_nic_table_entry *entry)
72 {
73 	return entry->use_cnt == 0 && !entry->flags;
74 }
75 
76 static bool
77 udp_tunnel_nic_entry_is_present(struct udp_tunnel_nic_table_entry *entry)
78 {
79 	return entry->use_cnt && !(entry->flags & ~UDP_TUNNEL_NIC_ENTRY_FROZEN);
80 }
81 
82 static bool
83 udp_tunnel_nic_entry_is_frozen(struct udp_tunnel_nic_table_entry *entry)
84 {
85 	return entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN;
86 }
87 
88 static void
89 udp_tunnel_nic_entry_freeze_used(struct udp_tunnel_nic_table_entry *entry)
90 {
91 	if (!udp_tunnel_nic_entry_is_free(entry))
92 		entry->flags |= UDP_TUNNEL_NIC_ENTRY_FROZEN;
93 }
94 
95 static void
96 udp_tunnel_nic_entry_unfreeze(struct udp_tunnel_nic_table_entry *entry)
97 {
98 	entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_FROZEN;
99 }
100 
101 static bool
102 udp_tunnel_nic_entry_is_queued(struct udp_tunnel_nic_table_entry *entry)
103 {
104 	return entry->flags & (UDP_TUNNEL_NIC_ENTRY_ADD |
105 			       UDP_TUNNEL_NIC_ENTRY_DEL);
106 }
107 
108 static void
109 udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn,
110 			   struct udp_tunnel_nic_table_entry *entry,
111 			   unsigned int flag)
112 {
113 	entry->flags |= flag;
114 	utn->need_sync = 1;
115 }
116 
117 static void
118 udp_tunnel_nic_ti_from_entry(struct udp_tunnel_nic_table_entry *entry,
119 			     struct udp_tunnel_info *ti)
120 {
121 	memset(ti, 0, sizeof(*ti));
122 	ti->port = entry->port;
123 	ti->type = entry->type;
124 	ti->hw_priv = entry->hw_priv;
125 }
126 
127 static bool
128 udp_tunnel_nic_is_empty(struct net_device *dev, struct udp_tunnel_nic *utn)
129 {
130 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
131 	unsigned int i, j;
132 
133 	for (i = 0; i < utn->n_tables; i++)
134 		for (j = 0; j < info->tables[i].n_entries; j++)
135 			if (!udp_tunnel_nic_entry_is_free(&utn->entries[i][j]))
136 				return false;
137 	return true;
138 }
139 
140 static bool
141 udp_tunnel_nic_should_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
142 {
143 	const struct udp_tunnel_nic_table_info *table;
144 	unsigned int i, j;
145 
146 	if (!utn->missed)
147 		return false;
148 
149 	for (i = 0; i < utn->n_tables; i++) {
150 		table = &dev->udp_tunnel_nic_info->tables[i];
151 		if (!test_bit(i, &utn->missed))
152 			continue;
153 
154 		for (j = 0; j < table->n_entries; j++)
155 			if (udp_tunnel_nic_entry_is_free(&utn->entries[i][j]))
156 				return true;
157 	}
158 
159 	return false;
160 }
161 
162 static void
163 __udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table,
164 			  unsigned int idx, struct udp_tunnel_info *ti)
165 {
166 	struct udp_tunnel_nic_table_entry *entry;
167 	struct udp_tunnel_nic *utn;
168 
169 	utn = dev->udp_tunnel_nic;
170 	entry = &utn->entries[table][idx];
171 
172 	if (entry->use_cnt)
173 		udp_tunnel_nic_ti_from_entry(entry, ti);
174 }
175 
176 static void
177 __udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table,
178 			       unsigned int idx, u8 priv)
179 {
180 	dev->udp_tunnel_nic->entries[table][idx].hw_priv = priv;
181 }
182 
183 static void
184 udp_tunnel_nic_entry_update_done(struct udp_tunnel_nic_table_entry *entry,
185 				 int err)
186 {
187 	bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
188 
189 	WARN_ON_ONCE(entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD &&
190 		     entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL);
191 
192 	if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD &&
193 	    (!err || (err == -EEXIST && dodgy)))
194 		entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_ADD;
195 
196 	if (entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL &&
197 	    (!err || (err == -ENOENT && dodgy)))
198 		entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_DEL;
199 
200 	if (!err)
201 		entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
202 	else
203 		entry->flags |= UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
204 }
205 
206 static void
207 udp_tunnel_nic_device_sync_one(struct net_device *dev,
208 			       struct udp_tunnel_nic *utn,
209 			       unsigned int table, unsigned int idx)
210 {
211 	struct udp_tunnel_nic_table_entry *entry;
212 	struct udp_tunnel_info ti;
213 	int err;
214 
215 	entry = &utn->entries[table][idx];
216 	if (!udp_tunnel_nic_entry_is_queued(entry))
217 		return;
218 
219 	udp_tunnel_nic_ti_from_entry(entry, &ti);
220 	if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD)
221 		err = dev->udp_tunnel_nic_info->set_port(dev, table, idx, &ti);
222 	else
223 		err = dev->udp_tunnel_nic_info->unset_port(dev, table, idx,
224 							   &ti);
225 	udp_tunnel_nic_entry_update_done(entry, err);
226 
227 	if (err)
228 		netdev_warn(dev,
229 			    "UDP tunnel port sync failed port %d type %s: %d\n",
230 			    be16_to_cpu(entry->port),
231 			    udp_tunnel_nic_tunnel_type_name(entry->type),
232 			    err);
233 }
234 
235 static void
236 udp_tunnel_nic_device_sync_by_port(struct net_device *dev,
237 				   struct udp_tunnel_nic *utn)
238 {
239 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
240 	unsigned int i, j;
241 
242 	for (i = 0; i < utn->n_tables; i++)
243 		for (j = 0; j < info->tables[i].n_entries; j++)
244 			udp_tunnel_nic_device_sync_one(dev, utn, i, j);
245 }
246 
247 static void
248 udp_tunnel_nic_device_sync_by_table(struct net_device *dev,
249 				    struct udp_tunnel_nic *utn)
250 {
251 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
252 	unsigned int i, j;
253 	int err;
254 
255 	for (i = 0; i < utn->n_tables; i++) {
256 		/* Find something that needs sync in this table */
257 		for (j = 0; j < info->tables[i].n_entries; j++)
258 			if (udp_tunnel_nic_entry_is_queued(&utn->entries[i][j]))
259 				break;
260 		if (j == info->tables[i].n_entries)
261 			continue;
262 
263 		err = info->sync_table(dev, i);
264 		if (err)
265 			netdev_warn(dev, "UDP tunnel port sync failed for table %d: %d\n",
266 				    i, err);
267 
268 		for (j = 0; j < info->tables[i].n_entries; j++) {
269 			struct udp_tunnel_nic_table_entry *entry;
270 
271 			entry = &utn->entries[i][j];
272 			if (udp_tunnel_nic_entry_is_queued(entry))
273 				udp_tunnel_nic_entry_update_done(entry, err);
274 		}
275 	}
276 }
277 
278 static void
279 __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
280 {
281 	if (!utn->need_sync)
282 		return;
283 
284 	if (dev->udp_tunnel_nic_info->sync_table)
285 		udp_tunnel_nic_device_sync_by_table(dev, utn);
286 	else
287 		udp_tunnel_nic_device_sync_by_port(dev, utn);
288 
289 	utn->need_sync = 0;
290 	/* Can't replay directly here, in case we come from the tunnel driver's
291 	 * notification - trying to replay may deadlock inside tunnel driver.
292 	 */
293 	utn->need_replay = udp_tunnel_nic_should_replay(dev, utn);
294 }
295 
296 static void
297 udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
298 {
299 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
300 	bool may_sleep;
301 
302 	if (!utn->need_sync)
303 		return;
304 
305 	/* Drivers which sleep in the callback need to update from
306 	 * the workqueue, if we come from the tunnel driver's notification.
307 	 */
308 	may_sleep = info->flags & UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
309 	if (!may_sleep)
310 		__udp_tunnel_nic_device_sync(dev, utn);
311 	if (may_sleep || utn->need_replay) {
312 		queue_work(udp_tunnel_nic_workqueue, &utn->work);
313 		utn->work_pending = 1;
314 	}
315 }
316 
317 static bool
318 udp_tunnel_nic_table_is_capable(const struct udp_tunnel_nic_table_info *table,
319 				struct udp_tunnel_info *ti)
320 {
321 	return table->tunnel_types & ti->type;
322 }
323 
324 static bool
325 udp_tunnel_nic_is_capable(struct net_device *dev, struct udp_tunnel_nic *utn,
326 			  struct udp_tunnel_info *ti)
327 {
328 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
329 	unsigned int i;
330 
331 	/* Special case IPv4-only NICs */
332 	if (info->flags & UDP_TUNNEL_NIC_INFO_IPV4_ONLY &&
333 	    ti->sa_family != AF_INET)
334 		return false;
335 
336 	for (i = 0; i < utn->n_tables; i++)
337 		if (udp_tunnel_nic_table_is_capable(&info->tables[i], ti))
338 			return true;
339 	return false;
340 }
341 
342 static int
343 udp_tunnel_nic_has_collision(struct net_device *dev, struct udp_tunnel_nic *utn,
344 			     struct udp_tunnel_info *ti)
345 {
346 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
347 	struct udp_tunnel_nic_table_entry *entry;
348 	unsigned int i, j;
349 
350 	for (i = 0; i < utn->n_tables; i++)
351 		for (j = 0; j < info->tables[i].n_entries; j++) {
352 			entry =	&utn->entries[i][j];
353 
354 			if (!udp_tunnel_nic_entry_is_free(entry) &&
355 			    entry->port == ti->port &&
356 			    entry->type != ti->type) {
357 				__set_bit(i, &utn->missed);
358 				return true;
359 			}
360 		}
361 	return false;
362 }
363 
364 static void
365 udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn,
366 			 unsigned int table, unsigned int idx, int use_cnt_adj)
367 {
368 	struct udp_tunnel_nic_table_entry *entry =  &utn->entries[table][idx];
369 	bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
370 	unsigned int from, to;
371 
372 	/* If not going from used to unused or vice versa - all done.
373 	 * For dodgy entries make sure we try to sync again (queue the entry).
374 	 */
375 	entry->use_cnt += use_cnt_adj;
376 	if (!dodgy && !entry->use_cnt == !(entry->use_cnt - use_cnt_adj))
377 		return;
378 
379 	/* Cancel the op before it was sent to the device, if possible,
380 	 * otherwise we'd need to take special care to issue commands
381 	 * in the same order the ports arrived.
382 	 */
383 	if (use_cnt_adj < 0) {
384 		from = UDP_TUNNEL_NIC_ENTRY_ADD;
385 		to = UDP_TUNNEL_NIC_ENTRY_DEL;
386 	} else {
387 		from = UDP_TUNNEL_NIC_ENTRY_DEL;
388 		to = UDP_TUNNEL_NIC_ENTRY_ADD;
389 	}
390 
391 	if (entry->flags & from) {
392 		entry->flags &= ~from;
393 		if (!dodgy)
394 			return;
395 	}
396 
397 	udp_tunnel_nic_entry_queue(utn, entry, to);
398 }
399 
400 static bool
401 udp_tunnel_nic_entry_try_adj(struct udp_tunnel_nic *utn,
402 			     unsigned int table, unsigned int idx,
403 			     struct udp_tunnel_info *ti, int use_cnt_adj)
404 {
405 	struct udp_tunnel_nic_table_entry *entry =  &utn->entries[table][idx];
406 
407 	if (udp_tunnel_nic_entry_is_free(entry) ||
408 	    entry->port != ti->port ||
409 	    entry->type != ti->type)
410 		return false;
411 
412 	if (udp_tunnel_nic_entry_is_frozen(entry))
413 		return true;
414 
415 	udp_tunnel_nic_entry_adj(utn, table, idx, use_cnt_adj);
416 	return true;
417 }
418 
419 /* Try to find existing matching entry and adjust its use count, instead of
420  * adding a new one. Returns true if entry was found. In case of delete the
421  * entry may have gotten removed in the process, in which case it will be
422  * queued for removal.
423  */
424 static bool
425 udp_tunnel_nic_try_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
426 			    struct udp_tunnel_info *ti, int use_cnt_adj)
427 {
428 	const struct udp_tunnel_nic_table_info *table;
429 	unsigned int i, j;
430 
431 	for (i = 0; i < utn->n_tables; i++) {
432 		table = &dev->udp_tunnel_nic_info->tables[i];
433 		if (!udp_tunnel_nic_table_is_capable(table, ti))
434 			continue;
435 
436 		for (j = 0; j < table->n_entries; j++)
437 			if (udp_tunnel_nic_entry_try_adj(utn, i, j, ti,
438 							 use_cnt_adj))
439 				return true;
440 	}
441 
442 	return false;
443 }
444 
445 static bool
446 udp_tunnel_nic_add_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
447 			    struct udp_tunnel_info *ti)
448 {
449 	return udp_tunnel_nic_try_existing(dev, utn, ti, +1);
450 }
451 
452 static bool
453 udp_tunnel_nic_del_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
454 			    struct udp_tunnel_info *ti)
455 {
456 	return udp_tunnel_nic_try_existing(dev, utn, ti, -1);
457 }
458 
459 static bool
460 udp_tunnel_nic_add_new(struct net_device *dev, struct udp_tunnel_nic *utn,
461 		       struct udp_tunnel_info *ti)
462 {
463 	const struct udp_tunnel_nic_table_info *table;
464 	unsigned int i, j;
465 
466 	for (i = 0; i < utn->n_tables; i++) {
467 		table = &dev->udp_tunnel_nic_info->tables[i];
468 		if (!udp_tunnel_nic_table_is_capable(table, ti))
469 			continue;
470 
471 		for (j = 0; j < table->n_entries; j++) {
472 			struct udp_tunnel_nic_table_entry *entry;
473 
474 			entry = &utn->entries[i][j];
475 			if (!udp_tunnel_nic_entry_is_free(entry))
476 				continue;
477 
478 			entry->port = ti->port;
479 			entry->type = ti->type;
480 			entry->use_cnt = 1;
481 			udp_tunnel_nic_entry_queue(utn, entry,
482 						   UDP_TUNNEL_NIC_ENTRY_ADD);
483 			return true;
484 		}
485 
486 		/* The different table may still fit this port in, but there
487 		 * are no devices currently which have multiple tables accepting
488 		 * the same tunnel type, and false positives are okay.
489 		 */
490 		__set_bit(i, &utn->missed);
491 	}
492 
493 	return false;
494 }
495 
496 static void
497 __udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti)
498 {
499 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
500 	struct udp_tunnel_nic *utn;
501 
502 	utn = dev->udp_tunnel_nic;
503 	if (!utn)
504 		return;
505 	if (!netif_running(dev) && info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)
506 		return;
507 
508 	if (!udp_tunnel_nic_is_capable(dev, utn, ti))
509 		return;
510 
511 	/* It may happen that a tunnel of one type is removed and different
512 	 * tunnel type tries to reuse its port before the device was informed.
513 	 * Rely on utn->missed to re-add this port later.
514 	 */
515 	if (udp_tunnel_nic_has_collision(dev, utn, ti))
516 		return;
517 
518 	if (!udp_tunnel_nic_add_existing(dev, utn, ti))
519 		udp_tunnel_nic_add_new(dev, utn, ti);
520 
521 	udp_tunnel_nic_device_sync(dev, utn);
522 }
523 
524 static void
525 __udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti)
526 {
527 	struct udp_tunnel_nic *utn;
528 
529 	utn = dev->udp_tunnel_nic;
530 	if (!utn)
531 		return;
532 
533 	if (!udp_tunnel_nic_is_capable(dev, utn, ti))
534 		return;
535 
536 	udp_tunnel_nic_del_existing(dev, utn, ti);
537 
538 	udp_tunnel_nic_device_sync(dev, utn);
539 }
540 
541 static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
542 {
543 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
544 	struct udp_tunnel_nic *utn;
545 	unsigned int i, j;
546 
547 	ASSERT_RTNL();
548 
549 	utn = dev->udp_tunnel_nic;
550 	if (!utn)
551 		return;
552 
553 	utn->need_sync = false;
554 	for (i = 0; i < utn->n_tables; i++)
555 		for (j = 0; j < info->tables[i].n_entries; j++) {
556 			struct udp_tunnel_nic_table_entry *entry;
557 
558 			entry = &utn->entries[i][j];
559 
560 			entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL |
561 					  UDP_TUNNEL_NIC_ENTRY_OP_FAIL);
562 			/* We don't release rtnl across ops */
563 			WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN);
564 			if (!entry->use_cnt)
565 				continue;
566 
567 			udp_tunnel_nic_entry_queue(utn, entry,
568 						   UDP_TUNNEL_NIC_ENTRY_ADD);
569 		}
570 
571 	__udp_tunnel_nic_device_sync(dev, utn);
572 }
573 
574 static size_t
575 __udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table)
576 {
577 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
578 	struct udp_tunnel_nic *utn;
579 	unsigned int j;
580 	size_t size;
581 
582 	utn = dev->udp_tunnel_nic;
583 	if (!utn)
584 		return 0;
585 
586 	size = 0;
587 	for (j = 0; j < info->tables[table].n_entries; j++) {
588 		if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j]))
589 			continue;
590 
591 		size += nla_total_size(0) +		 /* _TABLE_ENTRY */
592 			nla_total_size(sizeof(__be16)) + /* _ENTRY_PORT */
593 			nla_total_size(sizeof(u32));	 /* _ENTRY_TYPE */
594 	}
595 
596 	return size;
597 }
598 
599 static int
600 __udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table,
601 			    struct sk_buff *skb)
602 {
603 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
604 	struct udp_tunnel_nic *utn;
605 	struct nlattr *nest;
606 	unsigned int j;
607 
608 	utn = dev->udp_tunnel_nic;
609 	if (!utn)
610 		return 0;
611 
612 	for (j = 0; j < info->tables[table].n_entries; j++) {
613 		if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j]))
614 			continue;
615 
616 		nest = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY);
617 
618 		if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT,
619 				 utn->entries[table][j].port) ||
620 		    nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE,
621 				ilog2(utn->entries[table][j].type)))
622 			goto err_cancel;
623 
624 		nla_nest_end(skb, nest);
625 	}
626 
627 	return 0;
628 
629 err_cancel:
630 	nla_nest_cancel(skb, nest);
631 	return -EMSGSIZE;
632 }
633 
634 static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = {
635 	.get_port	= __udp_tunnel_nic_get_port,
636 	.set_port_priv	= __udp_tunnel_nic_set_port_priv,
637 	.add_port	= __udp_tunnel_nic_add_port,
638 	.del_port	= __udp_tunnel_nic_del_port,
639 	.reset_ntf	= __udp_tunnel_nic_reset_ntf,
640 	.dump_size	= __udp_tunnel_nic_dump_size,
641 	.dump_write	= __udp_tunnel_nic_dump_write,
642 };
643 
644 static void
645 udp_tunnel_nic_flush(struct net_device *dev, struct udp_tunnel_nic *utn)
646 {
647 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
648 	unsigned int i, j;
649 
650 	for (i = 0; i < utn->n_tables; i++)
651 		for (j = 0; j < info->tables[i].n_entries; j++) {
652 			int adj_cnt = -utn->entries[i][j].use_cnt;
653 
654 			if (adj_cnt)
655 				udp_tunnel_nic_entry_adj(utn, i, j, adj_cnt);
656 		}
657 
658 	__udp_tunnel_nic_device_sync(dev, utn);
659 
660 	for (i = 0; i < utn->n_tables; i++)
661 		memset(utn->entries[i], 0, array_size(info->tables[i].n_entries,
662 						      sizeof(**utn->entries)));
663 	WARN_ON(utn->need_sync);
664 	utn->need_replay = 0;
665 }
666 
667 static void
668 udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
669 {
670 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
671 	unsigned int i, j;
672 
673 	/* Freeze all the ports we are already tracking so that the replay
674 	 * does not double up the refcount.
675 	 */
676 	for (i = 0; i < utn->n_tables; i++)
677 		for (j = 0; j < info->tables[i].n_entries; j++)
678 			udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]);
679 	utn->missed = 0;
680 	utn->need_replay = 0;
681 
682 	udp_tunnel_get_rx_info(dev);
683 
684 	for (i = 0; i < utn->n_tables; i++)
685 		for (j = 0; j < info->tables[i].n_entries; j++)
686 			udp_tunnel_nic_entry_unfreeze(&utn->entries[i][j]);
687 }
688 
689 static void udp_tunnel_nic_device_sync_work(struct work_struct *work)
690 {
691 	struct udp_tunnel_nic *utn =
692 		container_of(work, struct udp_tunnel_nic, work);
693 
694 	rtnl_lock();
695 	utn->work_pending = 0;
696 	__udp_tunnel_nic_device_sync(utn->dev, utn);
697 
698 	if (utn->need_replay)
699 		udp_tunnel_nic_replay(utn->dev, utn);
700 	rtnl_unlock();
701 }
702 
703 static struct udp_tunnel_nic *
704 udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
705 		     unsigned int n_tables)
706 {
707 	struct udp_tunnel_nic *utn;
708 	unsigned int i;
709 
710 	utn = kzalloc(sizeof(*utn), GFP_KERNEL);
711 	if (!utn)
712 		return NULL;
713 	utn->n_tables = n_tables;
714 	INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work);
715 
716 	utn->entries = kmalloc_array(n_tables, sizeof(void *), GFP_KERNEL);
717 	if (!utn->entries)
718 		goto err_free_utn;
719 
720 	for (i = 0; i < n_tables; i++) {
721 		utn->entries[i] = kcalloc(info->tables[i].n_entries,
722 					  sizeof(*utn->entries[i]), GFP_KERNEL);
723 		if (!utn->entries[i])
724 			goto err_free_prev_entries;
725 	}
726 
727 	return utn;
728 
729 err_free_prev_entries:
730 	while (i--)
731 		kfree(utn->entries[i]);
732 	kfree(utn->entries);
733 err_free_utn:
734 	kfree(utn);
735 	return NULL;
736 }
737 
738 static int udp_tunnel_nic_register(struct net_device *dev)
739 {
740 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
741 	struct udp_tunnel_nic *utn;
742 	unsigned int n_tables, i;
743 
744 	BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE <
745 		     UDP_TUNNEL_NIC_MAX_TABLES);
746 
747 	if (WARN_ON(!info->set_port != !info->unset_port) ||
748 	    WARN_ON(!info->set_port == !info->sync_table) ||
749 	    WARN_ON(!info->tables[0].n_entries))
750 		return -EINVAL;
751 
752 	n_tables = 1;
753 	for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) {
754 		if (!info->tables[i].n_entries)
755 			continue;
756 
757 		n_tables++;
758 		if (WARN_ON(!info->tables[i - 1].n_entries))
759 			return -EINVAL;
760 	}
761 
762 	utn = udp_tunnel_nic_alloc(info, n_tables);
763 	if (!utn)
764 		return -ENOMEM;
765 
766 	utn->dev = dev;
767 	dev_hold(dev);
768 	dev->udp_tunnel_nic = utn;
769 
770 	if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
771 		udp_tunnel_get_rx_info(dev);
772 
773 	return 0;
774 }
775 
776 static void
777 udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
778 {
779 	unsigned int i;
780 
781 	/* Flush before we check work, so we don't waste time adding entries
782 	 * from the work which we will boot immediately.
783 	 */
784 	udp_tunnel_nic_flush(dev, utn);
785 
786 	/* Wait for the work to be done using the state, netdev core will
787 	 * retry unregister until we give up our reference on this device.
788 	 */
789 	if (utn->work_pending)
790 		return;
791 
792 	for (i = 0; i < utn->n_tables; i++)
793 		kfree(utn->entries[i]);
794 	kfree(utn->entries);
795 	kfree(utn);
796 	dev->udp_tunnel_nic = NULL;
797 	dev_put(dev);
798 }
799 
800 static int
801 udp_tunnel_nic_netdevice_event(struct notifier_block *unused,
802 			       unsigned long event, void *ptr)
803 {
804 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
805 	const struct udp_tunnel_nic_info *info;
806 	struct udp_tunnel_nic *utn;
807 
808 	info = dev->udp_tunnel_nic_info;
809 	if (!info)
810 		return NOTIFY_DONE;
811 
812 	if (event == NETDEV_REGISTER) {
813 		int err;
814 
815 		err = udp_tunnel_nic_register(dev);
816 		if (err)
817 			netdev_WARN(dev, "failed to register for UDP tunnel offloads: %d", err);
818 		return notifier_from_errno(err);
819 	}
820 	/* All other events will need the udp_tunnel_nic state */
821 	utn = dev->udp_tunnel_nic;
822 	if (!utn)
823 		return NOTIFY_DONE;
824 
825 	if (event == NETDEV_UNREGISTER) {
826 		udp_tunnel_nic_unregister(dev, utn);
827 		return NOTIFY_OK;
828 	}
829 
830 	/* All other events only matter if NIC has to be programmed open */
831 	if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
832 		return NOTIFY_DONE;
833 
834 	if (event == NETDEV_UP) {
835 		WARN_ON(!udp_tunnel_nic_is_empty(dev, utn));
836 		udp_tunnel_get_rx_info(dev);
837 		return NOTIFY_OK;
838 	}
839 	if (event == NETDEV_GOING_DOWN) {
840 		udp_tunnel_nic_flush(dev, utn);
841 		return NOTIFY_OK;
842 	}
843 
844 	return NOTIFY_DONE;
845 }
846 
847 static struct notifier_block udp_tunnel_nic_notifier_block __read_mostly = {
848 	.notifier_call = udp_tunnel_nic_netdevice_event,
849 };
850 
851 static int __init udp_tunnel_nic_init_module(void)
852 {
853 	int err;
854 
855 	udp_tunnel_nic_workqueue = alloc_workqueue("udp_tunnel_nic", 0, 0);
856 	if (!udp_tunnel_nic_workqueue)
857 		return -ENOMEM;
858 
859 	rtnl_lock();
860 	udp_tunnel_nic_ops = &__udp_tunnel_nic_ops;
861 	rtnl_unlock();
862 
863 	err = register_netdevice_notifier(&udp_tunnel_nic_notifier_block);
864 	if (err)
865 		goto err_unset_ops;
866 
867 	return 0;
868 
869 err_unset_ops:
870 	rtnl_lock();
871 	udp_tunnel_nic_ops = NULL;
872 	rtnl_unlock();
873 	destroy_workqueue(udp_tunnel_nic_workqueue);
874 	return err;
875 }
876 late_initcall(udp_tunnel_nic_init_module);
877 
878 static void __exit udp_tunnel_nic_cleanup_module(void)
879 {
880 	unregister_netdevice_notifier(&udp_tunnel_nic_notifier_block);
881 
882 	rtnl_lock();
883 	udp_tunnel_nic_ops = NULL;
884 	rtnl_unlock();
885 
886 	destroy_workqueue(udp_tunnel_nic_workqueue);
887 }
888 module_exit(udp_tunnel_nic_cleanup_module);
889 
890 MODULE_LICENSE("GPL");
891