xref: /openbmc/linux/net/openvswitch/flow_table.c (revision 77d84ff8)
1 /*
2  * Copyright (c) 2007-2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18 
19 #include "flow.h"
20 #include "datapath.h"
21 #include <linux/uaccess.h>
22 #include <linux/netdevice.h>
23 #include <linux/etherdevice.h>
24 #include <linux/if_ether.h>
25 #include <linux/if_vlan.h>
26 #include <net/llc_pdu.h>
27 #include <linux/kernel.h>
28 #include <linux/jhash.h>
29 #include <linux/jiffies.h>
30 #include <linux/llc.h>
31 #include <linux/module.h>
32 #include <linux/in.h>
33 #include <linux/rcupdate.h>
34 #include <linux/if_arp.h>
35 #include <linux/ip.h>
36 #include <linux/ipv6.h>
37 #include <linux/sctp.h>
38 #include <linux/tcp.h>
39 #include <linux/udp.h>
40 #include <linux/icmp.h>
41 #include <linux/icmpv6.h>
42 #include <linux/rculist.h>
43 #include <net/ip.h>
44 #include <net/ipv6.h>
45 #include <net/ndisc.h>
46 
47 #include "datapath.h"
48 
49 #define TBL_MIN_BUCKETS		1024
50 #define REHASH_INTERVAL		(10 * 60 * HZ)
51 
52 static struct kmem_cache *flow_cache;
53 
54 static u16 range_n_bytes(const struct sw_flow_key_range *range)
55 {
56 	return range->end - range->start;
57 }
58 
59 void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
60 		       const struct sw_flow_mask *mask)
61 {
62 	const long *m = (long *)((u8 *)&mask->key + mask->range.start);
63 	const long *s = (long *)((u8 *)src + mask->range.start);
64 	long *d = (long *)((u8 *)dst + mask->range.start);
65 	int i;
66 
67 	/* The memory outside of the 'mask->range' are not set since
68 	 * further operations on 'dst' only uses contents within
69 	 * 'mask->range'.
70 	 */
71 	for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
72 		*d++ = *s++ & *m++;
73 }
74 
75 struct sw_flow *ovs_flow_alloc(void)
76 {
77 	struct sw_flow *flow;
78 
79 	flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
80 	if (!flow)
81 		return ERR_PTR(-ENOMEM);
82 
83 	spin_lock_init(&flow->lock);
84 	flow->sf_acts = NULL;
85 	flow->mask = NULL;
86 
87 	return flow;
88 }
89 
90 int ovs_flow_tbl_count(struct flow_table *table)
91 {
92 	return table->count;
93 }
94 
95 static struct flex_array *alloc_buckets(unsigned int n_buckets)
96 {
97 	struct flex_array *buckets;
98 	int i, err;
99 
100 	buckets = flex_array_alloc(sizeof(struct hlist_head),
101 				   n_buckets, GFP_KERNEL);
102 	if (!buckets)
103 		return NULL;
104 
105 	err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
106 	if (err) {
107 		flex_array_free(buckets);
108 		return NULL;
109 	}
110 
111 	for (i = 0; i < n_buckets; i++)
112 		INIT_HLIST_HEAD((struct hlist_head *)
113 					flex_array_get(buckets, i));
114 
115 	return buckets;
116 }
117 
118 static void flow_free(struct sw_flow *flow)
119 {
120 	kfree((struct sf_flow_acts __force *)flow->sf_acts);
121 	kmem_cache_free(flow_cache, flow);
122 }
123 
124 static void rcu_free_flow_callback(struct rcu_head *rcu)
125 {
126 	struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
127 
128 	flow_free(flow);
129 }
130 
131 static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu)
132 {
133 	struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu);
134 
135 	kfree(mask);
136 }
137 
138 static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)
139 {
140 	if (!mask)
141 		return;
142 
143 	BUG_ON(!mask->ref_count);
144 	mask->ref_count--;
145 
146 	if (!mask->ref_count) {
147 		list_del_rcu(&mask->list);
148 		if (deferred)
149 			call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb);
150 		else
151 			kfree(mask);
152 	}
153 }
154 
155 void ovs_flow_free(struct sw_flow *flow, bool deferred)
156 {
157 	if (!flow)
158 		return;
159 
160 	flow_mask_del_ref(flow->mask, deferred);
161 
162 	if (deferred)
163 		call_rcu(&flow->rcu, rcu_free_flow_callback);
164 	else
165 		flow_free(flow);
166 }
167 
168 static void free_buckets(struct flex_array *buckets)
169 {
170 	flex_array_free(buckets);
171 }
172 
173 static void __table_instance_destroy(struct table_instance *ti)
174 {
175 	int i;
176 
177 	if (ti->keep_flows)
178 		goto skip_flows;
179 
180 	for (i = 0; i < ti->n_buckets; i++) {
181 		struct sw_flow *flow;
182 		struct hlist_head *head = flex_array_get(ti->buckets, i);
183 		struct hlist_node *n;
184 		int ver = ti->node_ver;
185 
186 		hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
187 			hlist_del(&flow->hash_node[ver]);
188 			ovs_flow_free(flow, false);
189 		}
190 	}
191 
192 skip_flows:
193 	free_buckets(ti->buckets);
194 	kfree(ti);
195 }
196 
197 static struct table_instance *table_instance_alloc(int new_size)
198 {
199 	struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
200 
201 	if (!ti)
202 		return NULL;
203 
204 	ti->buckets = alloc_buckets(new_size);
205 
206 	if (!ti->buckets) {
207 		kfree(ti);
208 		return NULL;
209 	}
210 	ti->n_buckets = new_size;
211 	ti->node_ver = 0;
212 	ti->keep_flows = false;
213 	get_random_bytes(&ti->hash_seed, sizeof(u32));
214 
215 	return ti;
216 }
217 
218 int ovs_flow_tbl_init(struct flow_table *table)
219 {
220 	struct table_instance *ti;
221 
222 	ti = table_instance_alloc(TBL_MIN_BUCKETS);
223 
224 	if (!ti)
225 		return -ENOMEM;
226 
227 	rcu_assign_pointer(table->ti, ti);
228 	INIT_LIST_HEAD(&table->mask_list);
229 	table->last_rehash = jiffies;
230 	table->count = 0;
231 	return 0;
232 }
233 
234 static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
235 {
236 	struct table_instance *ti = container_of(rcu, struct table_instance, rcu);
237 
238 	__table_instance_destroy(ti);
239 }
240 
241 static void table_instance_destroy(struct table_instance *ti, bool deferred)
242 {
243 	if (!ti)
244 		return;
245 
246 	if (deferred)
247 		call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
248 	else
249 		__table_instance_destroy(ti);
250 }
251 
252 void ovs_flow_tbl_destroy(struct flow_table *table)
253 {
254 	struct table_instance *ti = ovsl_dereference(table->ti);
255 
256 	table_instance_destroy(ti, false);
257 }
258 
259 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
260 				       u32 *bucket, u32 *last)
261 {
262 	struct sw_flow *flow;
263 	struct hlist_head *head;
264 	int ver;
265 	int i;
266 
267 	ver = ti->node_ver;
268 	while (*bucket < ti->n_buckets) {
269 		i = 0;
270 		head = flex_array_get(ti->buckets, *bucket);
271 		hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
272 			if (i < *last) {
273 				i++;
274 				continue;
275 			}
276 			*last = i + 1;
277 			return flow;
278 		}
279 		(*bucket)++;
280 		*last = 0;
281 	}
282 
283 	return NULL;
284 }
285 
286 static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
287 {
288 	hash = jhash_1word(hash, ti->hash_seed);
289 	return flex_array_get(ti->buckets,
290 				(hash & (ti->n_buckets - 1)));
291 }
292 
293 static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow)
294 {
295 	struct hlist_head *head;
296 
297 	head = find_bucket(ti, flow->hash);
298 	hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head);
299 }
300 
301 static void flow_table_copy_flows(struct table_instance *old,
302 				  struct table_instance *new)
303 {
304 	int old_ver;
305 	int i;
306 
307 	old_ver = old->node_ver;
308 	new->node_ver = !old_ver;
309 
310 	/* Insert in new table. */
311 	for (i = 0; i < old->n_buckets; i++) {
312 		struct sw_flow *flow;
313 		struct hlist_head *head;
314 
315 		head = flex_array_get(old->buckets, i);
316 
317 		hlist_for_each_entry(flow, head, hash_node[old_ver])
318 			table_instance_insert(new, flow);
319 	}
320 
321 	old->keep_flows = true;
322 }
323 
324 static struct table_instance *table_instance_rehash(struct table_instance *ti,
325 					    int n_buckets)
326 {
327 	struct table_instance *new_ti;
328 
329 	new_ti = table_instance_alloc(n_buckets);
330 	if (!new_ti)
331 		return NULL;
332 
333 	flow_table_copy_flows(ti, new_ti);
334 
335 	return new_ti;
336 }
337 
338 int ovs_flow_tbl_flush(struct flow_table *flow_table)
339 {
340 	struct table_instance *old_ti;
341 	struct table_instance *new_ti;
342 
343 	old_ti = ovsl_dereference(flow_table->ti);
344 	new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
345 	if (!new_ti)
346 		return -ENOMEM;
347 
348 	rcu_assign_pointer(flow_table->ti, new_ti);
349 	flow_table->last_rehash = jiffies;
350 	flow_table->count = 0;
351 
352 	table_instance_destroy(old_ti, true);
353 	return 0;
354 }
355 
356 static u32 flow_hash(const struct sw_flow_key *key, int key_start,
357 		     int key_end)
358 {
359 	u32 *hash_key = (u32 *)((u8 *)key + key_start);
360 	int hash_u32s = (key_end - key_start) >> 2;
361 
362 	/* Make sure number of hash bytes are multiple of u32. */
363 	BUILD_BUG_ON(sizeof(long) % sizeof(u32));
364 
365 	return jhash2(hash_key, hash_u32s, 0);
366 }
367 
368 static int flow_key_start(const struct sw_flow_key *key)
369 {
370 	if (key->tun_key.ipv4_dst)
371 		return 0;
372 	else
373 		return rounddown(offsetof(struct sw_flow_key, phy),
374 					  sizeof(long));
375 }
376 
377 static bool cmp_key(const struct sw_flow_key *key1,
378 		    const struct sw_flow_key *key2,
379 		    int key_start, int key_end)
380 {
381 	const long *cp1 = (long *)((u8 *)key1 + key_start);
382 	const long *cp2 = (long *)((u8 *)key2 + key_start);
383 	long diffs = 0;
384 	int i;
385 
386 	for (i = key_start; i < key_end;  i += sizeof(long))
387 		diffs |= *cp1++ ^ *cp2++;
388 
389 	return diffs == 0;
390 }
391 
392 static bool flow_cmp_masked_key(const struct sw_flow *flow,
393 				const struct sw_flow_key *key,
394 				int key_start, int key_end)
395 {
396 	return cmp_key(&flow->key, key, key_start, key_end);
397 }
398 
399 bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
400 			       struct sw_flow_match *match)
401 {
402 	struct sw_flow_key *key = match->key;
403 	int key_start = flow_key_start(key);
404 	int key_end = match->range.end;
405 
406 	return cmp_key(&flow->unmasked_key, key, key_start, key_end);
407 }
408 
409 static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
410 					  const struct sw_flow_key *unmasked,
411 					  struct sw_flow_mask *mask)
412 {
413 	struct sw_flow *flow;
414 	struct hlist_head *head;
415 	int key_start = mask->range.start;
416 	int key_end = mask->range.end;
417 	u32 hash;
418 	struct sw_flow_key masked_key;
419 
420 	ovs_flow_mask_key(&masked_key, unmasked, mask);
421 	hash = flow_hash(&masked_key, key_start, key_end);
422 	head = find_bucket(ti, hash);
423 	hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
424 		if (flow->mask == mask && flow->hash == hash &&
425 		    flow_cmp_masked_key(flow, &masked_key,
426 					  key_start, key_end))
427 			return flow;
428 	}
429 	return NULL;
430 }
431 
432 struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
433 				    const struct sw_flow_key *key,
434 				    u32 *n_mask_hit)
435 {
436 	struct table_instance *ti = rcu_dereference(tbl->ti);
437 	struct sw_flow_mask *mask;
438 	struct sw_flow *flow;
439 
440 	*n_mask_hit = 0;
441 	list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
442 		(*n_mask_hit)++;
443 		flow = masked_flow_lookup(ti, key, mask);
444 		if (flow)  /* Found */
445 			return flow;
446 	}
447 	return NULL;
448 }
449 
450 int ovs_flow_tbl_num_masks(const struct flow_table *table)
451 {
452 	struct sw_flow_mask *mask;
453 	int num = 0;
454 
455 	list_for_each_entry(mask, &table->mask_list, list)
456 		num++;
457 
458 	return num;
459 }
460 
461 static struct table_instance *table_instance_expand(struct table_instance *ti)
462 {
463 	return table_instance_rehash(ti, ti->n_buckets * 2);
464 }
465 
466 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
467 {
468 	struct table_instance *ti = ovsl_dereference(table->ti);
469 
470 	BUG_ON(table->count == 0);
471 	hlist_del_rcu(&flow->hash_node[ti->node_ver]);
472 	table->count--;
473 }
474 
475 static struct sw_flow_mask *mask_alloc(void)
476 {
477 	struct sw_flow_mask *mask;
478 
479 	mask = kmalloc(sizeof(*mask), GFP_KERNEL);
480 	if (mask)
481 		mask->ref_count = 0;
482 
483 	return mask;
484 }
485 
486 static void mask_add_ref(struct sw_flow_mask *mask)
487 {
488 	mask->ref_count++;
489 }
490 
491 static bool mask_equal(const struct sw_flow_mask *a,
492 		       const struct sw_flow_mask *b)
493 {
494 	u8 *a_ = (u8 *)&a->key + a->range.start;
495 	u8 *b_ = (u8 *)&b->key + b->range.start;
496 
497 	return  (a->range.end == b->range.end)
498 		&& (a->range.start == b->range.start)
499 		&& (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
500 }
501 
502 static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
503 					   const struct sw_flow_mask *mask)
504 {
505 	struct list_head *ml;
506 
507 	list_for_each(ml, &tbl->mask_list) {
508 		struct sw_flow_mask *m;
509 		m = container_of(ml, struct sw_flow_mask, list);
510 		if (mask_equal(mask, m))
511 			return m;
512 	}
513 
514 	return NULL;
515 }
516 
517 /**
518  * add a new mask into the mask list.
519  * The caller needs to make sure that 'mask' is not the same
520  * as any masks that are already on the list.
521  */
522 static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
523 			    struct sw_flow_mask *new)
524 {
525 	struct sw_flow_mask *mask;
526 	mask = flow_mask_find(tbl, new);
527 	if (!mask) {
528 		/* Allocate a new mask if none exsits. */
529 		mask = mask_alloc();
530 		if (!mask)
531 			return -ENOMEM;
532 		mask->key = new->key;
533 		mask->range = new->range;
534 		list_add_rcu(&mask->list, &tbl->mask_list);
535 	}
536 
537 	mask_add_ref(mask);
538 	flow->mask = mask;
539 	return 0;
540 }
541 
542 int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
543 			struct sw_flow_mask *mask)
544 {
545 	struct table_instance *new_ti = NULL;
546 	struct table_instance *ti;
547 	int err;
548 
549 	err = flow_mask_insert(table, flow, mask);
550 	if (err)
551 		return err;
552 
553 	flow->hash = flow_hash(&flow->key, flow->mask->range.start,
554 			flow->mask->range.end);
555 	ti = ovsl_dereference(table->ti);
556 	table_instance_insert(ti, flow);
557 	table->count++;
558 
559 	/* Expand table, if necessary, to make room. */
560 	if (table->count > ti->n_buckets)
561 		new_ti = table_instance_expand(ti);
562 	else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
563 		new_ti = table_instance_rehash(ti, ti->n_buckets);
564 
565 	if (new_ti) {
566 		rcu_assign_pointer(table->ti, new_ti);
567 		table_instance_destroy(ti, true);
568 		table->last_rehash = jiffies;
569 	}
570 	return 0;
571 }
572 
573 /* Initializes the flow module.
574  * Returns zero if successful or a negative error code. */
575 int ovs_flow_init(void)
576 {
577 	BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
578 	BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
579 
580 	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
581 					0, NULL);
582 	if (flow_cache == NULL)
583 		return -ENOMEM;
584 
585 	return 0;
586 }
587 
588 /* Uninitializes the flow module. */
589 void ovs_flow_exit(void)
590 {
591 	kmem_cache_destroy(flow_cache);
592 }
593