1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Randomized tests for eBPF longest-prefix-match maps
4  *
5  * This program runs randomized tests against the lpm-bpf-map. It implements a
6  * "Trivial Longest Prefix Match" (tlpm) based on simple, linear, singly linked
7  * lists. The implementation should be pretty straightforward.
8  *
9  * Based on tlpm, this inserts randomized data into bpf-lpm-maps and verifies
10  * the trie-based bpf-map implementation behaves the same way as tlpm.
11  */
12 
13 #include <assert.h>
14 #include <errno.h>
15 #include <inttypes.h>
16 #include <linux/bpf.h>
17 #include <pthread.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <time.h>
22 #include <unistd.h>
23 #include <arpa/inet.h>
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 
27 #include <bpf/bpf.h>
28 #include "bpf_util.h"
29 
30 struct tlpm_node {
31 	struct tlpm_node *next;
32 	size_t n_bits;
33 	uint8_t key[];
34 };
35 
36 static struct tlpm_node *tlpm_match(struct tlpm_node *list,
37 				    const uint8_t *key,
38 				    size_t n_bits);
39 
40 static struct tlpm_node *tlpm_add(struct tlpm_node *list,
41 				  const uint8_t *key,
42 				  size_t n_bits)
43 {
44 	struct tlpm_node *node;
45 	size_t n;
46 
47 	n = (n_bits + 7) / 8;
48 
49 	/* 'overwrite' an equivalent entry if one already exists */
50 	node = tlpm_match(list, key, n_bits);
51 	if (node && node->n_bits == n_bits) {
52 		memcpy(node->key, key, n);
53 		return list;
54 	}
55 
56 	/* add new entry with @key/@n_bits to @list and return new head */
57 
58 	node = malloc(sizeof(*node) + n);
59 	assert(node);
60 
61 	node->next = list;
62 	node->n_bits = n_bits;
63 	memcpy(node->key, key, n);
64 
65 	return node;
66 }
67 
68 static void tlpm_clear(struct tlpm_node *list)
69 {
70 	struct tlpm_node *node;
71 
72 	/* free all entries in @list */
73 
74 	while ((node = list)) {
75 		list = list->next;
76 		free(node);
77 	}
78 }
79 
80 static struct tlpm_node *tlpm_match(struct tlpm_node *list,
81 				    const uint8_t *key,
82 				    size_t n_bits)
83 {
84 	struct tlpm_node *best = NULL;
85 	size_t i;
86 
87 	/* Perform longest prefix-match on @key/@n_bits. That is, iterate all
88 	 * entries and match each prefix against @key. Remember the "best"
89 	 * entry we find (i.e., the longest prefix that matches) and return it
90 	 * to the caller when done.
91 	 */
92 
93 	for ( ; list; list = list->next) {
94 		for (i = 0; i < n_bits && i < list->n_bits; ++i) {
95 			if ((key[i / 8] & (1 << (7 - i % 8))) !=
96 			    (list->key[i / 8] & (1 << (7 - i % 8))))
97 				break;
98 		}
99 
100 		if (i >= list->n_bits) {
101 			if (!best || i > best->n_bits)
102 				best = list;
103 		}
104 	}
105 
106 	return best;
107 }
108 
109 static struct tlpm_node *tlpm_delete(struct tlpm_node *list,
110 				     const uint8_t *key,
111 				     size_t n_bits)
112 {
113 	struct tlpm_node *best = tlpm_match(list, key, n_bits);
114 	struct tlpm_node *node;
115 
116 	if (!best || best->n_bits != n_bits)
117 		return list;
118 
119 	if (best == list) {
120 		node = best->next;
121 		free(best);
122 		return node;
123 	}
124 
125 	for (node = list; node; node = node->next) {
126 		if (node->next == best) {
127 			node->next = best->next;
128 			free(best);
129 			return list;
130 		}
131 	}
132 	/* should never get here */
133 	assert(0);
134 	return list;
135 }
136 
137 static void test_lpm_basic(void)
138 {
139 	struct tlpm_node *list = NULL, *t1, *t2;
140 
141 	/* very basic, static tests to verify tlpm works as expected */
142 
143 	assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
144 
145 	t1 = list = tlpm_add(list, (uint8_t[]){ 0xff }, 8);
146 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
147 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
148 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0x00 }, 16));
149 	assert(!tlpm_match(list, (uint8_t[]){ 0x7f }, 8));
150 	assert(!tlpm_match(list, (uint8_t[]){ 0xfe }, 8));
151 	assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 7));
152 
153 	t2 = list = tlpm_add(list, (uint8_t[]){ 0xff, 0xff }, 16);
154 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
155 	assert(t2 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
156 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15));
157 	assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16));
158 
159 	list = tlpm_delete(list, (uint8_t[]){ 0xff, 0xff }, 16);
160 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
161 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
162 
163 	list = tlpm_delete(list, (uint8_t[]){ 0xff }, 8);
164 	assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
165 
166 	tlpm_clear(list);
167 }
168 
169 static void test_lpm_order(void)
170 {
171 	struct tlpm_node *t1, *t2, *l1 = NULL, *l2 = NULL;
172 	size_t i, j;
173 
174 	/* Verify the tlpm implementation works correctly regardless of the
175 	 * order of entries. Insert a random set of entries into @l1, and copy
176 	 * the same data in reverse order into @l2. Then verify a lookup of
177 	 * random keys will yield the same result in both sets.
178 	 */
179 
180 	for (i = 0; i < (1 << 12); ++i)
181 		l1 = tlpm_add(l1, (uint8_t[]){
182 					rand() % 0xff,
183 					rand() % 0xff,
184 				}, rand() % 16 + 1);
185 
186 	for (t1 = l1; t1; t1 = t1->next)
187 		l2 = tlpm_add(l2, t1->key, t1->n_bits);
188 
189 	for (i = 0; i < (1 << 8); ++i) {
190 		uint8_t key[] = { rand() % 0xff, rand() % 0xff };
191 
192 		t1 = tlpm_match(l1, key, 16);
193 		t2 = tlpm_match(l2, key, 16);
194 
195 		assert(!t1 == !t2);
196 		if (t1) {
197 			assert(t1->n_bits == t2->n_bits);
198 			for (j = 0; j < t1->n_bits; ++j)
199 				assert((t1->key[j / 8] & (1 << (7 - j % 8))) ==
200 				       (t2->key[j / 8] & (1 << (7 - j % 8))));
201 		}
202 	}
203 
204 	tlpm_clear(l1);
205 	tlpm_clear(l2);
206 }
207 
208 static void test_lpm_map(int keysize)
209 {
210 	size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
211 	struct tlpm_node *t, *list = NULL;
212 	struct bpf_lpm_trie_key *key;
213 	uint8_t *data, *value;
214 	int r, map;
215 
216 	/* Compare behavior of tlpm vs. bpf-lpm. Create a randomized set of
217 	 * prefixes and insert it into both tlpm and bpf-lpm. Then run some
218 	 * randomized lookups and verify both maps return the same result.
219 	 */
220 
221 	n_matches = 0;
222 	n_matches_after_delete = 0;
223 	n_nodes = 1 << 8;
224 	n_lookups = 1 << 16;
225 
226 	data = alloca(keysize);
227 	memset(data, 0, keysize);
228 
229 	value = alloca(keysize + 1);
230 	memset(value, 0, keysize + 1);
231 
232 	key = alloca(sizeof(*key) + keysize);
233 	memset(key, 0, sizeof(*key) + keysize);
234 
235 	map = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
236 			     sizeof(*key) + keysize,
237 			     keysize + 1,
238 			     4096,
239 			     BPF_F_NO_PREALLOC);
240 	assert(map >= 0);
241 
242 	for (i = 0; i < n_nodes; ++i) {
243 		for (j = 0; j < keysize; ++j)
244 			value[j] = rand() & 0xff;
245 		value[keysize] = rand() % (8 * keysize + 1);
246 
247 		list = tlpm_add(list, value, value[keysize]);
248 
249 		key->prefixlen = value[keysize];
250 		memcpy(key->data, value, keysize);
251 		r = bpf_map_update_elem(map, key, value, 0);
252 		assert(!r);
253 	}
254 
255 	for (i = 0; i < n_lookups; ++i) {
256 		for (j = 0; j < keysize; ++j)
257 			data[j] = rand() & 0xff;
258 
259 		t = tlpm_match(list, data, 8 * keysize);
260 
261 		key->prefixlen = 8 * keysize;
262 		memcpy(key->data, data, keysize);
263 		r = bpf_map_lookup_elem(map, key, value);
264 		assert(!r || errno == ENOENT);
265 		assert(!t == !!r);
266 
267 		if (t) {
268 			++n_matches;
269 			assert(t->n_bits == value[keysize]);
270 			for (j = 0; j < t->n_bits; ++j)
271 				assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
272 				       (value[j / 8] & (1 << (7 - j % 8))));
273 		}
274 	}
275 
276 	/* Remove the first half of the elements in the tlpm and the
277 	 * corresponding nodes from the bpf-lpm.  Then run the same
278 	 * large number of random lookups in both and make sure they match.
279 	 * Note: we need to count the number of nodes actually inserted
280 	 * since there may have been duplicates.
281 	 */
282 	for (i = 0, t = list; t; i++, t = t->next)
283 		;
284 	for (j = 0; j < i / 2; ++j) {
285 		key->prefixlen = list->n_bits;
286 		memcpy(key->data, list->key, keysize);
287 		r = bpf_map_delete_elem(map, key);
288 		assert(!r);
289 		list = tlpm_delete(list, list->key, list->n_bits);
290 		assert(list);
291 	}
292 	for (i = 0; i < n_lookups; ++i) {
293 		for (j = 0; j < keysize; ++j)
294 			data[j] = rand() & 0xff;
295 
296 		t = tlpm_match(list, data, 8 * keysize);
297 
298 		key->prefixlen = 8 * keysize;
299 		memcpy(key->data, data, keysize);
300 		r = bpf_map_lookup_elem(map, key, value);
301 		assert(!r || errno == ENOENT);
302 		assert(!t == !!r);
303 
304 		if (t) {
305 			++n_matches_after_delete;
306 			assert(t->n_bits == value[keysize]);
307 			for (j = 0; j < t->n_bits; ++j)
308 				assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
309 				       (value[j / 8] & (1 << (7 - j % 8))));
310 		}
311 	}
312 
313 	close(map);
314 	tlpm_clear(list);
315 
316 	/* With 255 random nodes in the map, we are pretty likely to match
317 	 * something on every lookup. For statistics, use this:
318 	 *
319 	 *     printf("          nodes: %zu\n"
320 	 *            "        lookups: %zu\n"
321 	 *            "        matches: %zu\n"
322 	 *            "matches(delete): %zu\n",
323 	 *            n_nodes, n_lookups, n_matches, n_matches_after_delete);
324 	 */
325 }
326 
327 /* Test the implementation with some 'real world' examples */
328 
329 static void test_lpm_ipaddr(void)
330 {
331 	struct bpf_lpm_trie_key *key_ipv4;
332 	struct bpf_lpm_trie_key *key_ipv6;
333 	size_t key_size_ipv4;
334 	size_t key_size_ipv6;
335 	int map_fd_ipv4;
336 	int map_fd_ipv6;
337 	__u64 value;
338 
339 	key_size_ipv4 = sizeof(*key_ipv4) + sizeof(__u32);
340 	key_size_ipv6 = sizeof(*key_ipv6) + sizeof(__u32) * 4;
341 	key_ipv4 = alloca(key_size_ipv4);
342 	key_ipv6 = alloca(key_size_ipv6);
343 
344 	map_fd_ipv4 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
345 				     key_size_ipv4, sizeof(value),
346 				     100, BPF_F_NO_PREALLOC);
347 	assert(map_fd_ipv4 >= 0);
348 
349 	map_fd_ipv6 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
350 				     key_size_ipv6, sizeof(value),
351 				     100, BPF_F_NO_PREALLOC);
352 	assert(map_fd_ipv6 >= 0);
353 
354 	/* Fill data some IPv4 and IPv6 address ranges */
355 	value = 1;
356 	key_ipv4->prefixlen = 16;
357 	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
358 	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
359 
360 	value = 2;
361 	key_ipv4->prefixlen = 24;
362 	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
363 	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
364 
365 	value = 3;
366 	key_ipv4->prefixlen = 24;
367 	inet_pton(AF_INET, "192.168.128.0", key_ipv4->data);
368 	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
369 
370 	value = 5;
371 	key_ipv4->prefixlen = 24;
372 	inet_pton(AF_INET, "192.168.1.0", key_ipv4->data);
373 	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
374 
375 	value = 4;
376 	key_ipv4->prefixlen = 23;
377 	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
378 	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
379 
380 	value = 0xdeadbeef;
381 	key_ipv6->prefixlen = 64;
382 	inet_pton(AF_INET6, "2a00:1450:4001:814::200e", key_ipv6->data);
383 	assert(bpf_map_update_elem(map_fd_ipv6, key_ipv6, &value, 0) == 0);
384 
385 	/* Set tprefixlen to maximum for lookups */
386 	key_ipv4->prefixlen = 32;
387 	key_ipv6->prefixlen = 128;
388 
389 	/* Test some lookups that should come back with a value */
390 	inet_pton(AF_INET, "192.168.128.23", key_ipv4->data);
391 	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == 0);
392 	assert(value == 3);
393 
394 	inet_pton(AF_INET, "192.168.0.1", key_ipv4->data);
395 	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == 0);
396 	assert(value == 2);
397 
398 	inet_pton(AF_INET6, "2a00:1450:4001:814::", key_ipv6->data);
399 	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == 0);
400 	assert(value == 0xdeadbeef);
401 
402 	inet_pton(AF_INET6, "2a00:1450:4001:814::1", key_ipv6->data);
403 	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == 0);
404 	assert(value == 0xdeadbeef);
405 
406 	/* Test some lookups that should not match any entry */
407 	inet_pton(AF_INET, "10.0.0.1", key_ipv4->data);
408 	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
409 	       errno == ENOENT);
410 
411 	inet_pton(AF_INET, "11.11.11.11", key_ipv4->data);
412 	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
413 	       errno == ENOENT);
414 
415 	inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data);
416 	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -1 &&
417 	       errno == ENOENT);
418 
419 	close(map_fd_ipv4);
420 	close(map_fd_ipv6);
421 }
422 
423 static void test_lpm_delete(void)
424 {
425 	struct bpf_lpm_trie_key *key;
426 	size_t key_size;
427 	int map_fd;
428 	__u64 value;
429 
430 	key_size = sizeof(*key) + sizeof(__u32);
431 	key = alloca(key_size);
432 
433 	map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
434 				key_size, sizeof(value),
435 				100, BPF_F_NO_PREALLOC);
436 	assert(map_fd >= 0);
437 
438 	/* Add nodes:
439 	 * 192.168.0.0/16   (1)
440 	 * 192.168.0.0/24   (2)
441 	 * 192.168.128.0/24 (3)
442 	 * 192.168.1.0/24   (4)
443 	 *
444 	 *         (1)
445 	 *        /   \
446          *     (IM)    (3)
447 	 *    /   \
448          *   (2)  (4)
449 	 */
450 	value = 1;
451 	key->prefixlen = 16;
452 	inet_pton(AF_INET, "192.168.0.0", key->data);
453 	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
454 
455 	value = 2;
456 	key->prefixlen = 24;
457 	inet_pton(AF_INET, "192.168.0.0", key->data);
458 	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
459 
460 	value = 3;
461 	key->prefixlen = 24;
462 	inet_pton(AF_INET, "192.168.128.0", key->data);
463 	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
464 
465 	value = 4;
466 	key->prefixlen = 24;
467 	inet_pton(AF_INET, "192.168.1.0", key->data);
468 	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
469 
470 	/* remove non-existent node */
471 	key->prefixlen = 32;
472 	inet_pton(AF_INET, "10.0.0.1", key->data);
473 	assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
474 		errno == ENOENT);
475 
476 	/* assert initial lookup */
477 	key->prefixlen = 32;
478 	inet_pton(AF_INET, "192.168.0.1", key->data);
479 	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
480 	assert(value == 2);
481 
482 	/* remove leaf node */
483 	key->prefixlen = 24;
484 	inet_pton(AF_INET, "192.168.0.0", key->data);
485 	assert(bpf_map_delete_elem(map_fd, key) == 0);
486 
487 	key->prefixlen = 32;
488 	inet_pton(AF_INET, "192.168.0.1", key->data);
489 	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
490 	assert(value == 1);
491 
492 	/* remove leaf (and intermediary) node */
493 	key->prefixlen = 24;
494 	inet_pton(AF_INET, "192.168.1.0", key->data);
495 	assert(bpf_map_delete_elem(map_fd, key) == 0);
496 
497 	key->prefixlen = 32;
498 	inet_pton(AF_INET, "192.168.1.1", key->data);
499 	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
500 	assert(value == 1);
501 
502 	/* remove root node */
503 	key->prefixlen = 16;
504 	inet_pton(AF_INET, "192.168.0.0", key->data);
505 	assert(bpf_map_delete_elem(map_fd, key) == 0);
506 
507 	key->prefixlen = 32;
508 	inet_pton(AF_INET, "192.168.128.1", key->data);
509 	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
510 	assert(value == 3);
511 
512 	/* remove last node */
513 	key->prefixlen = 24;
514 	inet_pton(AF_INET, "192.168.128.0", key->data);
515 	assert(bpf_map_delete_elem(map_fd, key) == 0);
516 
517 	key->prefixlen = 32;
518 	inet_pton(AF_INET, "192.168.128.1", key->data);
519 	assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
520 		errno == ENOENT);
521 
522 	close(map_fd);
523 }
524 
525 static void test_lpm_get_next_key(void)
526 {
527 	struct bpf_lpm_trie_key *key_p, *next_key_p;
528 	size_t key_size;
529 	__u32 value = 0;
530 	int map_fd;
531 
532 	key_size = sizeof(*key_p) + sizeof(__u32);
533 	key_p = alloca(key_size);
534 	next_key_p = alloca(key_size);
535 
536 	map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value),
537 				100, BPF_F_NO_PREALLOC);
538 	assert(map_fd >= 0);
539 
540 	/* empty tree. get_next_key should return ENOENT */
541 	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 &&
542 	       errno == ENOENT);
543 
544 	/* get and verify the first key, get the second one should fail. */
545 	key_p->prefixlen = 16;
546 	inet_pton(AF_INET, "192.168.0.0", key_p->data);
547 	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
548 
549 	memset(key_p, 0, key_size);
550 	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
551 	assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
552 	       key_p->data[1] == 168);
553 
554 	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
555 	       errno == ENOENT);
556 
557 	/* no exact matching key should get the first one in post order. */
558 	key_p->prefixlen = 8;
559 	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
560 	assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
561 	       key_p->data[1] == 168);
562 
563 	/* add one more element (total two) */
564 	key_p->prefixlen = 24;
565 	inet_pton(AF_INET, "192.168.0.0", key_p->data);
566 	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
567 
568 	memset(key_p, 0, key_size);
569 	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
570 	assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
571 	       key_p->data[1] == 168 && key_p->data[2] == 0);
572 
573 	memset(next_key_p, 0, key_size);
574 	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
575 	assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
576 	       next_key_p->data[1] == 168);
577 
578 	memcpy(key_p, next_key_p, key_size);
579 	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
580 	       errno == ENOENT);
581 
582 	/* Add one more element (total three) */
583 	key_p->prefixlen = 24;
584 	inet_pton(AF_INET, "192.168.128.0", key_p->data);
585 	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
586 
587 	memset(key_p, 0, key_size);
588 	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
589 	assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
590 	       key_p->data[1] == 168 && key_p->data[2] == 0);
591 
592 	memset(next_key_p, 0, key_size);
593 	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
594 	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
595 	       next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
596 
597 	memcpy(key_p, next_key_p, key_size);
598 	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
599 	assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
600 	       next_key_p->data[1] == 168);
601 
602 	memcpy(key_p, next_key_p, key_size);
603 	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
604 	       errno == ENOENT);
605 
606 	/* Add one more element (total four) */
607 	key_p->prefixlen = 24;
608 	inet_pton(AF_INET, "192.168.1.0", key_p->data);
609 	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
610 
611 	memset(key_p, 0, key_size);
612 	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
613 	assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
614 	       key_p->data[1] == 168 && key_p->data[2] == 0);
615 
616 	memset(next_key_p, 0, key_size);
617 	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
618 	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
619 	       next_key_p->data[1] == 168 && next_key_p->data[2] == 1);
620 
621 	memcpy(key_p, next_key_p, key_size);
622 	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
623 	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
624 	       next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
625 
626 	memcpy(key_p, next_key_p, key_size);
627 	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
628 	assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
629 	       next_key_p->data[1] == 168);
630 
631 	memcpy(key_p, next_key_p, key_size);
632 	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
633 	       errno == ENOENT);
634 
635 	/* no exact matching key should return the first one in post order */
636 	key_p->prefixlen = 22;
637 	inet_pton(AF_INET, "192.168.1.0", key_p->data);
638 	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
639 	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
640 	       next_key_p->data[1] == 168 && next_key_p->data[2] == 0);
641 
642 	close(map_fd);
643 }
644 
645 #define MAX_TEST_KEYS	4
646 struct lpm_mt_test_info {
647 	int cmd; /* 0: update, 1: delete, 2: lookup, 3: get_next_key */
648 	int iter;
649 	int map_fd;
650 	struct {
651 		__u32 prefixlen;
652 		__u32 data;
653 	} key[MAX_TEST_KEYS];
654 };
655 
656 static void *lpm_test_command(void *arg)
657 {
658 	int i, j, ret, iter, key_size;
659 	struct lpm_mt_test_info *info = arg;
660 	struct bpf_lpm_trie_key *key_p;
661 
662 	key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
663 	key_p = alloca(key_size);
664 	for (iter = 0; iter < info->iter; iter++)
665 		for (i = 0; i < MAX_TEST_KEYS; i++) {
666 			/* first half of iterations in forward order,
667 			 * and second half in backward order.
668 			 */
669 			j = (iter < (info->iter / 2)) ? i : MAX_TEST_KEYS - i - 1;
670 			key_p->prefixlen = info->key[j].prefixlen;
671 			memcpy(key_p->data, &info->key[j].data, sizeof(__u32));
672 			if (info->cmd == 0) {
673 				__u32 value = j;
674 				/* update must succeed */
675 				assert(bpf_map_update_elem(info->map_fd, key_p, &value, 0) == 0);
676 			} else if (info->cmd == 1) {
677 				ret = bpf_map_delete_elem(info->map_fd, key_p);
678 				assert(ret == 0 || errno == ENOENT);
679 			} else if (info->cmd == 2) {
680 				__u32 value;
681 				ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
682 				assert(ret == 0 || errno == ENOENT);
683 			} else {
684 				struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
685 				ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
686 				assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
687 			}
688 		}
689 
690 	// Pass successful exit info back to the main thread
691 	pthread_exit((void *)info);
692 }
693 
694 static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd)
695 {
696 	info->iter = 2000;
697 	info->map_fd = map_fd;
698 	info->key[0].prefixlen = 16;
699 	inet_pton(AF_INET, "192.168.0.0", &info->key[0].data);
700 	info->key[1].prefixlen = 24;
701 	inet_pton(AF_INET, "192.168.0.0", &info->key[1].data);
702 	info->key[2].prefixlen = 24;
703 	inet_pton(AF_INET, "192.168.128.0", &info->key[2].data);
704 	info->key[3].prefixlen = 24;
705 	inet_pton(AF_INET, "192.168.1.0", &info->key[3].data);
706 }
707 
708 static void test_lpm_multi_thread(void)
709 {
710 	struct lpm_mt_test_info info[4];
711 	size_t key_size, value_size;
712 	pthread_t thread_id[4];
713 	int i, map_fd;
714 	void *ret;
715 
716 	/* create a trie */
717 	value_size = sizeof(__u32);
718 	key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
719 	map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size,
720 				100, BPF_F_NO_PREALLOC);
721 
722 	/* create 4 threads to test update, delete, lookup and get_next_key */
723 	setup_lpm_mt_test_info(&info[0], map_fd);
724 	for (i = 0; i < 4; i++) {
725 		if (i != 0)
726 			memcpy(&info[i], &info[0], sizeof(info[i]));
727 		info[i].cmd = i;
728 		assert(pthread_create(&thread_id[i], NULL, &lpm_test_command, &info[i]) == 0);
729 	}
730 
731 	for (i = 0; i < 4; i++)
732 		assert(pthread_join(thread_id[i], &ret) == 0 && ret == (void *)&info[i]);
733 
734 	close(map_fd);
735 }
736 
737 int main(void)
738 {
739 	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
740 	int i, ret;
741 
742 	/* we want predictable, pseudo random tests */
743 	srand(0xf00ba1);
744 
745 	/* allow unlimited locked memory */
746 	ret = setrlimit(RLIMIT_MEMLOCK, &limit);
747 	if (ret < 0)
748 		perror("Unable to lift memlock rlimit");
749 
750 	test_lpm_basic();
751 	test_lpm_order();
752 
753 	/* Test with 8, 16, 24, 32, ... 128 bit prefix length */
754 	for (i = 1; i <= 16; ++i)
755 		test_lpm_map(i);
756 
757 	test_lpm_ipaddr();
758 
759 	test_lpm_delete();
760 
761 	test_lpm_get_next_key();
762 
763 	test_lpm_multi_thread();
764 
765 	printf("test_lpm: OK\n");
766 	return 0;
767 }
768