xref: /openbmc/linux/net/ipv4/udp_tunnel_nic.c (revision f97769fd)
1 // SPDX-License-Identifier: GPL-2.0-only
2 // Copyright (c) 2020 Facebook Inc.
3 
4 #include <linux/ethtool_netlink.h>
5 #include <linux/netdevice.h>
6 #include <linux/slab.h>
7 #include <linux/types.h>
8 #include <linux/workqueue.h>
9 #include <net/udp_tunnel.h>
10 #include <net/vxlan.h>
11 
12 enum udp_tunnel_nic_table_entry_flags {
13 	UDP_TUNNEL_NIC_ENTRY_ADD	= BIT(0),
14 	UDP_TUNNEL_NIC_ENTRY_DEL	= BIT(1),
15 	UDP_TUNNEL_NIC_ENTRY_OP_FAIL	= BIT(2),
16 	UDP_TUNNEL_NIC_ENTRY_FROZEN	= BIT(3),
17 };
18 
19 struct udp_tunnel_nic_table_entry {
20 	__be16 port;
21 	u8 type;
22 	u8 use_cnt;
23 	u8 flags;
24 	u8 hw_priv;
25 };
26 
27 /**
28  * struct udp_tunnel_nic - UDP tunnel port offload state
29  * @work:	async work for talking to hardware from process context
30  * @dev:	netdev pointer
31  * @need_sync:	at least one port start changed
32  * @need_replay: space was freed, we need a replay of all ports
33  * @work_pending: @work is currently scheduled
34  * @n_tables:	number of tables under @entries
35  * @missed:	bitmap of tables which overflown
36  * @entries:	table of tables of ports currently offloaded
37  */
38 struct udp_tunnel_nic {
39 	struct work_struct work;
40 
41 	struct net_device *dev;
42 
43 	u8 need_sync:1;
44 	u8 need_replay:1;
45 	u8 work_pending:1;
46 
47 	unsigned int n_tables;
48 	unsigned long missed;
49 	struct udp_tunnel_nic_table_entry **entries;
50 };
51 
52 /* We ensure all work structs are done using driver state, but not the code.
53  * We need a workqueue we can flush before module gets removed.
54  */
55 static struct workqueue_struct *udp_tunnel_nic_workqueue;
56 
57 static const char *udp_tunnel_nic_tunnel_type_name(unsigned int type)
58 {
59 	switch (type) {
60 	case UDP_TUNNEL_TYPE_VXLAN:
61 		return "vxlan";
62 	case UDP_TUNNEL_TYPE_GENEVE:
63 		return "geneve";
64 	case UDP_TUNNEL_TYPE_VXLAN_GPE:
65 		return "vxlan-gpe";
66 	default:
67 		return "unknown";
68 	}
69 }
70 
71 static bool
72 udp_tunnel_nic_entry_is_free(struct udp_tunnel_nic_table_entry *entry)
73 {
74 	return entry->use_cnt == 0 && !entry->flags;
75 }
76 
77 static bool
78 udp_tunnel_nic_entry_is_present(struct udp_tunnel_nic_table_entry *entry)
79 {
80 	return entry->use_cnt && !(entry->flags & ~UDP_TUNNEL_NIC_ENTRY_FROZEN);
81 }
82 
83 static bool
84 udp_tunnel_nic_entry_is_frozen(struct udp_tunnel_nic_table_entry *entry)
85 {
86 	return entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN;
87 }
88 
89 static void
90 udp_tunnel_nic_entry_freeze_used(struct udp_tunnel_nic_table_entry *entry)
91 {
92 	if (!udp_tunnel_nic_entry_is_free(entry))
93 		entry->flags |= UDP_TUNNEL_NIC_ENTRY_FROZEN;
94 }
95 
96 static void
97 udp_tunnel_nic_entry_unfreeze(struct udp_tunnel_nic_table_entry *entry)
98 {
99 	entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_FROZEN;
100 }
101 
102 static bool
103 udp_tunnel_nic_entry_is_queued(struct udp_tunnel_nic_table_entry *entry)
104 {
105 	return entry->flags & (UDP_TUNNEL_NIC_ENTRY_ADD |
106 			       UDP_TUNNEL_NIC_ENTRY_DEL);
107 }
108 
109 static void
110 udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn,
111 			   struct udp_tunnel_nic_table_entry *entry,
112 			   unsigned int flag)
113 {
114 	entry->flags |= flag;
115 	utn->need_sync = 1;
116 }
117 
118 static void
119 udp_tunnel_nic_ti_from_entry(struct udp_tunnel_nic_table_entry *entry,
120 			     struct udp_tunnel_info *ti)
121 {
122 	memset(ti, 0, sizeof(*ti));
123 	ti->port = entry->port;
124 	ti->type = entry->type;
125 	ti->hw_priv = entry->hw_priv;
126 }
127 
128 static bool
129 udp_tunnel_nic_is_empty(struct net_device *dev, struct udp_tunnel_nic *utn)
130 {
131 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
132 	unsigned int i, j;
133 
134 	for (i = 0; i < utn->n_tables; i++)
135 		for (j = 0; j < info->tables[i].n_entries; j++)
136 			if (!udp_tunnel_nic_entry_is_free(&utn->entries[i][j]))
137 				return false;
138 	return true;
139 }
140 
141 static bool
142 udp_tunnel_nic_should_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
143 {
144 	const struct udp_tunnel_nic_table_info *table;
145 	unsigned int i, j;
146 
147 	if (!utn->missed)
148 		return false;
149 
150 	for (i = 0; i < utn->n_tables; i++) {
151 		table = &dev->udp_tunnel_nic_info->tables[i];
152 		if (!test_bit(i, &utn->missed))
153 			continue;
154 
155 		for (j = 0; j < table->n_entries; j++)
156 			if (udp_tunnel_nic_entry_is_free(&utn->entries[i][j]))
157 				return true;
158 	}
159 
160 	return false;
161 }
162 
163 static void
164 __udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table,
165 			  unsigned int idx, struct udp_tunnel_info *ti)
166 {
167 	struct udp_tunnel_nic_table_entry *entry;
168 	struct udp_tunnel_nic *utn;
169 
170 	utn = dev->udp_tunnel_nic;
171 	entry = &utn->entries[table][idx];
172 
173 	if (entry->use_cnt)
174 		udp_tunnel_nic_ti_from_entry(entry, ti);
175 }
176 
177 static void
178 __udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table,
179 			       unsigned int idx, u8 priv)
180 {
181 	dev->udp_tunnel_nic->entries[table][idx].hw_priv = priv;
182 }
183 
184 static void
185 udp_tunnel_nic_entry_update_done(struct udp_tunnel_nic_table_entry *entry,
186 				 int err)
187 {
188 	bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
189 
190 	WARN_ON_ONCE(entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD &&
191 		     entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL);
192 
193 	if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD &&
194 	    (!err || (err == -EEXIST && dodgy)))
195 		entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_ADD;
196 
197 	if (entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL &&
198 	    (!err || (err == -ENOENT && dodgy)))
199 		entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_DEL;
200 
201 	if (!err)
202 		entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
203 	else
204 		entry->flags |= UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
205 }
206 
207 static void
208 udp_tunnel_nic_device_sync_one(struct net_device *dev,
209 			       struct udp_tunnel_nic *utn,
210 			       unsigned int table, unsigned int idx)
211 {
212 	struct udp_tunnel_nic_table_entry *entry;
213 	struct udp_tunnel_info ti;
214 	int err;
215 
216 	entry = &utn->entries[table][idx];
217 	if (!udp_tunnel_nic_entry_is_queued(entry))
218 		return;
219 
220 	udp_tunnel_nic_ti_from_entry(entry, &ti);
221 	if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD)
222 		err = dev->udp_tunnel_nic_info->set_port(dev, table, idx, &ti);
223 	else
224 		err = dev->udp_tunnel_nic_info->unset_port(dev, table, idx,
225 							   &ti);
226 	udp_tunnel_nic_entry_update_done(entry, err);
227 
228 	if (err)
229 		netdev_warn(dev,
230 			    "UDP tunnel port sync failed port %d type %s: %d\n",
231 			    be16_to_cpu(entry->port),
232 			    udp_tunnel_nic_tunnel_type_name(entry->type),
233 			    err);
234 }
235 
236 static void
237 udp_tunnel_nic_device_sync_by_port(struct net_device *dev,
238 				   struct udp_tunnel_nic *utn)
239 {
240 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
241 	unsigned int i, j;
242 
243 	for (i = 0; i < utn->n_tables; i++)
244 		for (j = 0; j < info->tables[i].n_entries; j++)
245 			udp_tunnel_nic_device_sync_one(dev, utn, i, j);
246 }
247 
248 static void
249 udp_tunnel_nic_device_sync_by_table(struct net_device *dev,
250 				    struct udp_tunnel_nic *utn)
251 {
252 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
253 	unsigned int i, j;
254 	int err;
255 
256 	for (i = 0; i < utn->n_tables; i++) {
257 		/* Find something that needs sync in this table */
258 		for (j = 0; j < info->tables[i].n_entries; j++)
259 			if (udp_tunnel_nic_entry_is_queued(&utn->entries[i][j]))
260 				break;
261 		if (j == info->tables[i].n_entries)
262 			continue;
263 
264 		err = info->sync_table(dev, i);
265 		if (err)
266 			netdev_warn(dev, "UDP tunnel port sync failed for table %d: %d\n",
267 				    i, err);
268 
269 		for (j = 0; j < info->tables[i].n_entries; j++) {
270 			struct udp_tunnel_nic_table_entry *entry;
271 
272 			entry = &utn->entries[i][j];
273 			if (udp_tunnel_nic_entry_is_queued(entry))
274 				udp_tunnel_nic_entry_update_done(entry, err);
275 		}
276 	}
277 }
278 
279 static void
280 __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
281 {
282 	if (!utn->need_sync)
283 		return;
284 
285 	if (dev->udp_tunnel_nic_info->sync_table)
286 		udp_tunnel_nic_device_sync_by_table(dev, utn);
287 	else
288 		udp_tunnel_nic_device_sync_by_port(dev, utn);
289 
290 	utn->need_sync = 0;
291 	/* Can't replay directly here, in case we come from the tunnel driver's
292 	 * notification - trying to replay may deadlock inside tunnel driver.
293 	 */
294 	utn->need_replay = udp_tunnel_nic_should_replay(dev, utn);
295 }
296 
297 static void
298 udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
299 {
300 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
301 	bool may_sleep;
302 
303 	if (!utn->need_sync)
304 		return;
305 
306 	/* Drivers which sleep in the callback need to update from
307 	 * the workqueue, if we come from the tunnel driver's notification.
308 	 */
309 	may_sleep = info->flags & UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
310 	if (!may_sleep)
311 		__udp_tunnel_nic_device_sync(dev, utn);
312 	if (may_sleep || utn->need_replay) {
313 		queue_work(udp_tunnel_nic_workqueue, &utn->work);
314 		utn->work_pending = 1;
315 	}
316 }
317 
318 static bool
319 udp_tunnel_nic_table_is_capable(const struct udp_tunnel_nic_table_info *table,
320 				struct udp_tunnel_info *ti)
321 {
322 	return table->tunnel_types & ti->type;
323 }
324 
325 static bool
326 udp_tunnel_nic_is_capable(struct net_device *dev, struct udp_tunnel_nic *utn,
327 			  struct udp_tunnel_info *ti)
328 {
329 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
330 	unsigned int i;
331 
332 	/* Special case IPv4-only NICs */
333 	if (info->flags & UDP_TUNNEL_NIC_INFO_IPV4_ONLY &&
334 	    ti->sa_family != AF_INET)
335 		return false;
336 
337 	for (i = 0; i < utn->n_tables; i++)
338 		if (udp_tunnel_nic_table_is_capable(&info->tables[i], ti))
339 			return true;
340 	return false;
341 }
342 
343 static int
344 udp_tunnel_nic_has_collision(struct net_device *dev, struct udp_tunnel_nic *utn,
345 			     struct udp_tunnel_info *ti)
346 {
347 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
348 	struct udp_tunnel_nic_table_entry *entry;
349 	unsigned int i, j;
350 
351 	for (i = 0; i < utn->n_tables; i++)
352 		for (j = 0; j < info->tables[i].n_entries; j++) {
353 			entry =	&utn->entries[i][j];
354 
355 			if (!udp_tunnel_nic_entry_is_free(entry) &&
356 			    entry->port == ti->port &&
357 			    entry->type != ti->type) {
358 				__set_bit(i, &utn->missed);
359 				return true;
360 			}
361 		}
362 	return false;
363 }
364 
365 static void
366 udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn,
367 			 unsigned int table, unsigned int idx, int use_cnt_adj)
368 {
369 	struct udp_tunnel_nic_table_entry *entry =  &utn->entries[table][idx];
370 	bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
371 	unsigned int from, to;
372 
373 	/* If not going from used to unused or vice versa - all done.
374 	 * For dodgy entries make sure we try to sync again (queue the entry).
375 	 */
376 	entry->use_cnt += use_cnt_adj;
377 	if (!dodgy && !entry->use_cnt == !(entry->use_cnt - use_cnt_adj))
378 		return;
379 
380 	/* Cancel the op before it was sent to the device, if possible,
381 	 * otherwise we'd need to take special care to issue commands
382 	 * in the same order the ports arrived.
383 	 */
384 	if (use_cnt_adj < 0) {
385 		from = UDP_TUNNEL_NIC_ENTRY_ADD;
386 		to = UDP_TUNNEL_NIC_ENTRY_DEL;
387 	} else {
388 		from = UDP_TUNNEL_NIC_ENTRY_DEL;
389 		to = UDP_TUNNEL_NIC_ENTRY_ADD;
390 	}
391 
392 	if (entry->flags & from) {
393 		entry->flags &= ~from;
394 		if (!dodgy)
395 			return;
396 	}
397 
398 	udp_tunnel_nic_entry_queue(utn, entry, to);
399 }
400 
401 static bool
402 udp_tunnel_nic_entry_try_adj(struct udp_tunnel_nic *utn,
403 			     unsigned int table, unsigned int idx,
404 			     struct udp_tunnel_info *ti, int use_cnt_adj)
405 {
406 	struct udp_tunnel_nic_table_entry *entry =  &utn->entries[table][idx];
407 
408 	if (udp_tunnel_nic_entry_is_free(entry) ||
409 	    entry->port != ti->port ||
410 	    entry->type != ti->type)
411 		return false;
412 
413 	if (udp_tunnel_nic_entry_is_frozen(entry))
414 		return true;
415 
416 	udp_tunnel_nic_entry_adj(utn, table, idx, use_cnt_adj);
417 	return true;
418 }
419 
420 /* Try to find existing matching entry and adjust its use count, instead of
421  * adding a new one. Returns true if entry was found. In case of delete the
422  * entry may have gotten removed in the process, in which case it will be
423  * queued for removal.
424  */
425 static bool
426 udp_tunnel_nic_try_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
427 			    struct udp_tunnel_info *ti, int use_cnt_adj)
428 {
429 	const struct udp_tunnel_nic_table_info *table;
430 	unsigned int i, j;
431 
432 	for (i = 0; i < utn->n_tables; i++) {
433 		table = &dev->udp_tunnel_nic_info->tables[i];
434 		if (!udp_tunnel_nic_table_is_capable(table, ti))
435 			continue;
436 
437 		for (j = 0; j < table->n_entries; j++)
438 			if (udp_tunnel_nic_entry_try_adj(utn, i, j, ti,
439 							 use_cnt_adj))
440 				return true;
441 	}
442 
443 	return false;
444 }
445 
446 static bool
447 udp_tunnel_nic_add_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
448 			    struct udp_tunnel_info *ti)
449 {
450 	return udp_tunnel_nic_try_existing(dev, utn, ti, +1);
451 }
452 
453 static bool
454 udp_tunnel_nic_del_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
455 			    struct udp_tunnel_info *ti)
456 {
457 	return udp_tunnel_nic_try_existing(dev, utn, ti, -1);
458 }
459 
460 static bool
461 udp_tunnel_nic_add_new(struct net_device *dev, struct udp_tunnel_nic *utn,
462 		       struct udp_tunnel_info *ti)
463 {
464 	const struct udp_tunnel_nic_table_info *table;
465 	unsigned int i, j;
466 
467 	for (i = 0; i < utn->n_tables; i++) {
468 		table = &dev->udp_tunnel_nic_info->tables[i];
469 		if (!udp_tunnel_nic_table_is_capable(table, ti))
470 			continue;
471 
472 		for (j = 0; j < table->n_entries; j++) {
473 			struct udp_tunnel_nic_table_entry *entry;
474 
475 			entry = &utn->entries[i][j];
476 			if (!udp_tunnel_nic_entry_is_free(entry))
477 				continue;
478 
479 			entry->port = ti->port;
480 			entry->type = ti->type;
481 			entry->use_cnt = 1;
482 			udp_tunnel_nic_entry_queue(utn, entry,
483 						   UDP_TUNNEL_NIC_ENTRY_ADD);
484 			return true;
485 		}
486 
487 		/* The different table may still fit this port in, but there
488 		 * are no devices currently which have multiple tables accepting
489 		 * the same tunnel type, and false positives are okay.
490 		 */
491 		__set_bit(i, &utn->missed);
492 	}
493 
494 	return false;
495 }
496 
497 static void
498 __udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti)
499 {
500 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
501 	struct udp_tunnel_nic *utn;
502 
503 	utn = dev->udp_tunnel_nic;
504 	if (!utn)
505 		return;
506 	if (!netif_running(dev) && info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)
507 		return;
508 	if (info->flags & UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN &&
509 	    ti->port == htons(IANA_VXLAN_UDP_PORT)) {
510 		if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
511 			netdev_warn(dev, "device assumes port 4789 will be used by vxlan tunnels\n");
512 		return;
513 	}
514 
515 	if (!udp_tunnel_nic_is_capable(dev, utn, ti))
516 		return;
517 
518 	/* It may happen that a tunnel of one type is removed and different
519 	 * tunnel type tries to reuse its port before the device was informed.
520 	 * Rely on utn->missed to re-add this port later.
521 	 */
522 	if (udp_tunnel_nic_has_collision(dev, utn, ti))
523 		return;
524 
525 	if (!udp_tunnel_nic_add_existing(dev, utn, ti))
526 		udp_tunnel_nic_add_new(dev, utn, ti);
527 
528 	udp_tunnel_nic_device_sync(dev, utn);
529 }
530 
531 static void
532 __udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti)
533 {
534 	struct udp_tunnel_nic *utn;
535 
536 	utn = dev->udp_tunnel_nic;
537 	if (!utn)
538 		return;
539 
540 	if (!udp_tunnel_nic_is_capable(dev, utn, ti))
541 		return;
542 
543 	udp_tunnel_nic_del_existing(dev, utn, ti);
544 
545 	udp_tunnel_nic_device_sync(dev, utn);
546 }
547 
548 static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
549 {
550 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
551 	struct udp_tunnel_nic *utn;
552 	unsigned int i, j;
553 
554 	ASSERT_RTNL();
555 
556 	utn = dev->udp_tunnel_nic;
557 	if (!utn)
558 		return;
559 
560 	utn->need_sync = false;
561 	for (i = 0; i < utn->n_tables; i++)
562 		for (j = 0; j < info->tables[i].n_entries; j++) {
563 			struct udp_tunnel_nic_table_entry *entry;
564 
565 			entry = &utn->entries[i][j];
566 
567 			entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL |
568 					  UDP_TUNNEL_NIC_ENTRY_OP_FAIL);
569 			/* We don't release rtnl across ops */
570 			WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN);
571 			if (!entry->use_cnt)
572 				continue;
573 
574 			udp_tunnel_nic_entry_queue(utn, entry,
575 						   UDP_TUNNEL_NIC_ENTRY_ADD);
576 		}
577 
578 	__udp_tunnel_nic_device_sync(dev, utn);
579 }
580 
581 static size_t
582 __udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table)
583 {
584 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
585 	struct udp_tunnel_nic *utn;
586 	unsigned int j;
587 	size_t size;
588 
589 	utn = dev->udp_tunnel_nic;
590 	if (!utn)
591 		return 0;
592 
593 	size = 0;
594 	for (j = 0; j < info->tables[table].n_entries; j++) {
595 		if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j]))
596 			continue;
597 
598 		size += nla_total_size(0) +		 /* _TABLE_ENTRY */
599 			nla_total_size(sizeof(__be16)) + /* _ENTRY_PORT */
600 			nla_total_size(sizeof(u32));	 /* _ENTRY_TYPE */
601 	}
602 
603 	return size;
604 }
605 
606 static int
607 __udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table,
608 			    struct sk_buff *skb)
609 {
610 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
611 	struct udp_tunnel_nic *utn;
612 	struct nlattr *nest;
613 	unsigned int j;
614 
615 	utn = dev->udp_tunnel_nic;
616 	if (!utn)
617 		return 0;
618 
619 	for (j = 0; j < info->tables[table].n_entries; j++) {
620 		if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j]))
621 			continue;
622 
623 		nest = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY);
624 
625 		if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT,
626 				 utn->entries[table][j].port) ||
627 		    nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE,
628 				ilog2(utn->entries[table][j].type)))
629 			goto err_cancel;
630 
631 		nla_nest_end(skb, nest);
632 	}
633 
634 	return 0;
635 
636 err_cancel:
637 	nla_nest_cancel(skb, nest);
638 	return -EMSGSIZE;
639 }
640 
641 static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = {
642 	.get_port	= __udp_tunnel_nic_get_port,
643 	.set_port_priv	= __udp_tunnel_nic_set_port_priv,
644 	.add_port	= __udp_tunnel_nic_add_port,
645 	.del_port	= __udp_tunnel_nic_del_port,
646 	.reset_ntf	= __udp_tunnel_nic_reset_ntf,
647 	.dump_size	= __udp_tunnel_nic_dump_size,
648 	.dump_write	= __udp_tunnel_nic_dump_write,
649 };
650 
651 static void
652 udp_tunnel_nic_flush(struct net_device *dev, struct udp_tunnel_nic *utn)
653 {
654 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
655 	unsigned int i, j;
656 
657 	for (i = 0; i < utn->n_tables; i++)
658 		for (j = 0; j < info->tables[i].n_entries; j++) {
659 			int adj_cnt = -utn->entries[i][j].use_cnt;
660 
661 			if (adj_cnt)
662 				udp_tunnel_nic_entry_adj(utn, i, j, adj_cnt);
663 		}
664 
665 	__udp_tunnel_nic_device_sync(dev, utn);
666 
667 	for (i = 0; i < utn->n_tables; i++)
668 		memset(utn->entries[i], 0, array_size(info->tables[i].n_entries,
669 						      sizeof(**utn->entries)));
670 	WARN_ON(utn->need_sync);
671 	utn->need_replay = 0;
672 }
673 
674 static void
675 udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
676 {
677 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
678 	unsigned int i, j;
679 
680 	/* Freeze all the ports we are already tracking so that the replay
681 	 * does not double up the refcount.
682 	 */
683 	for (i = 0; i < utn->n_tables; i++)
684 		for (j = 0; j < info->tables[i].n_entries; j++)
685 			udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]);
686 	utn->missed = 0;
687 	utn->need_replay = 0;
688 
689 	udp_tunnel_get_rx_info(dev);
690 
691 	for (i = 0; i < utn->n_tables; i++)
692 		for (j = 0; j < info->tables[i].n_entries; j++)
693 			udp_tunnel_nic_entry_unfreeze(&utn->entries[i][j]);
694 }
695 
696 static void udp_tunnel_nic_device_sync_work(struct work_struct *work)
697 {
698 	struct udp_tunnel_nic *utn =
699 		container_of(work, struct udp_tunnel_nic, work);
700 
701 	rtnl_lock();
702 	utn->work_pending = 0;
703 	__udp_tunnel_nic_device_sync(utn->dev, utn);
704 
705 	if (utn->need_replay)
706 		udp_tunnel_nic_replay(utn->dev, utn);
707 	rtnl_unlock();
708 }
709 
710 static struct udp_tunnel_nic *
711 udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
712 		     unsigned int n_tables)
713 {
714 	struct udp_tunnel_nic *utn;
715 	unsigned int i;
716 
717 	utn = kzalloc(sizeof(*utn), GFP_KERNEL);
718 	if (!utn)
719 		return NULL;
720 	utn->n_tables = n_tables;
721 	INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work);
722 
723 	utn->entries = kmalloc_array(n_tables, sizeof(void *), GFP_KERNEL);
724 	if (!utn->entries)
725 		goto err_free_utn;
726 
727 	for (i = 0; i < n_tables; i++) {
728 		utn->entries[i] = kcalloc(info->tables[i].n_entries,
729 					  sizeof(*utn->entries[i]), GFP_KERNEL);
730 		if (!utn->entries[i])
731 			goto err_free_prev_entries;
732 	}
733 
734 	return utn;
735 
736 err_free_prev_entries:
737 	while (i--)
738 		kfree(utn->entries[i]);
739 	kfree(utn->entries);
740 err_free_utn:
741 	kfree(utn);
742 	return NULL;
743 }
744 
745 static int udp_tunnel_nic_register(struct net_device *dev)
746 {
747 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
748 	struct udp_tunnel_nic *utn;
749 	unsigned int n_tables, i;
750 
751 	BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE <
752 		     UDP_TUNNEL_NIC_MAX_TABLES);
753 
754 	if (WARN_ON(!info->set_port != !info->unset_port) ||
755 	    WARN_ON(!info->set_port == !info->sync_table) ||
756 	    WARN_ON(!info->tables[0].n_entries))
757 		return -EINVAL;
758 
759 	n_tables = 1;
760 	for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) {
761 		if (!info->tables[i].n_entries)
762 			continue;
763 
764 		n_tables++;
765 		if (WARN_ON(!info->tables[i - 1].n_entries))
766 			return -EINVAL;
767 	}
768 
769 	utn = udp_tunnel_nic_alloc(info, n_tables);
770 	if (!utn)
771 		return -ENOMEM;
772 
773 	utn->dev = dev;
774 	dev_hold(dev);
775 	dev->udp_tunnel_nic = utn;
776 
777 	if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
778 		udp_tunnel_get_rx_info(dev);
779 
780 	return 0;
781 }
782 
783 static void
784 udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
785 {
786 	unsigned int i;
787 
788 	/* Flush before we check work, so we don't waste time adding entries
789 	 * from the work which we will boot immediately.
790 	 */
791 	udp_tunnel_nic_flush(dev, utn);
792 
793 	/* Wait for the work to be done using the state, netdev core will
794 	 * retry unregister until we give up our reference on this device.
795 	 */
796 	if (utn->work_pending)
797 		return;
798 
799 	for (i = 0; i < utn->n_tables; i++)
800 		kfree(utn->entries[i]);
801 	kfree(utn->entries);
802 	kfree(utn);
803 	dev->udp_tunnel_nic = NULL;
804 	dev_put(dev);
805 }
806 
807 static int
808 udp_tunnel_nic_netdevice_event(struct notifier_block *unused,
809 			       unsigned long event, void *ptr)
810 {
811 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
812 	const struct udp_tunnel_nic_info *info;
813 	struct udp_tunnel_nic *utn;
814 
815 	info = dev->udp_tunnel_nic_info;
816 	if (!info)
817 		return NOTIFY_DONE;
818 
819 	if (event == NETDEV_REGISTER) {
820 		int err;
821 
822 		err = udp_tunnel_nic_register(dev);
823 		if (err)
824 			netdev_WARN(dev, "failed to register for UDP tunnel offloads: %d", err);
825 		return notifier_from_errno(err);
826 	}
827 	/* All other events will need the udp_tunnel_nic state */
828 	utn = dev->udp_tunnel_nic;
829 	if (!utn)
830 		return NOTIFY_DONE;
831 
832 	if (event == NETDEV_UNREGISTER) {
833 		udp_tunnel_nic_unregister(dev, utn);
834 		return NOTIFY_OK;
835 	}
836 
837 	/* All other events only matter if NIC has to be programmed open */
838 	if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
839 		return NOTIFY_DONE;
840 
841 	if (event == NETDEV_UP) {
842 		WARN_ON(!udp_tunnel_nic_is_empty(dev, utn));
843 		udp_tunnel_get_rx_info(dev);
844 		return NOTIFY_OK;
845 	}
846 	if (event == NETDEV_GOING_DOWN) {
847 		udp_tunnel_nic_flush(dev, utn);
848 		return NOTIFY_OK;
849 	}
850 
851 	return NOTIFY_DONE;
852 }
853 
854 static struct notifier_block udp_tunnel_nic_notifier_block __read_mostly = {
855 	.notifier_call = udp_tunnel_nic_netdevice_event,
856 };
857 
858 static int __init udp_tunnel_nic_init_module(void)
859 {
860 	int err;
861 
862 	udp_tunnel_nic_workqueue = alloc_workqueue("udp_tunnel_nic", 0, 0);
863 	if (!udp_tunnel_nic_workqueue)
864 		return -ENOMEM;
865 
866 	rtnl_lock();
867 	udp_tunnel_nic_ops = &__udp_tunnel_nic_ops;
868 	rtnl_unlock();
869 
870 	err = register_netdevice_notifier(&udp_tunnel_nic_notifier_block);
871 	if (err)
872 		goto err_unset_ops;
873 
874 	return 0;
875 
876 err_unset_ops:
877 	rtnl_lock();
878 	udp_tunnel_nic_ops = NULL;
879 	rtnl_unlock();
880 	destroy_workqueue(udp_tunnel_nic_workqueue);
881 	return err;
882 }
883 late_initcall(udp_tunnel_nic_init_module);
884 
885 static void __exit udp_tunnel_nic_cleanup_module(void)
886 {
887 	unregister_netdevice_notifier(&udp_tunnel_nic_notifier_block);
888 
889 	rtnl_lock();
890 	udp_tunnel_nic_ops = NULL;
891 	rtnl_unlock();
892 
893 	destroy_workqueue(udp_tunnel_nic_workqueue);
894 }
895 module_exit(udp_tunnel_nic_cleanup_module);
896 
897 MODULE_LICENSE("GPL");
898