1 /*
2  * Randomized tests for eBPF longest-prefix-match maps
3  *
4  * This program runs randomized tests against the lpm-bpf-map. It implements a
5  * "Trivial Longest Prefix Match" (tlpm) based on simple, linear, singly linked
6  * lists. The implementation should be pretty straightforward.
7  *
8  * Based on tlpm, this inserts randomized data into bpf-lpm-maps and verifies
9  * the trie-based bpf-map implementation behaves the same way as tlpm.
10  */
11 
12 #include <assert.h>
13 #include <errno.h>
14 #include <inttypes.h>
15 #include <linux/bpf.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <time.h>
20 #include <unistd.h>
21 #include <arpa/inet.h>
22 #include <sys/time.h>
23 #include <sys/resource.h>
24 
25 #include <bpf/bpf.h>
26 #include "bpf_util.h"
27 
28 struct tlpm_node {
29 	struct tlpm_node *next;
30 	size_t n_bits;
31 	uint8_t key[];
32 };
33 
34 static struct tlpm_node *tlpm_match(struct tlpm_node *list,
35 				    const uint8_t *key,
36 				    size_t n_bits);
37 
38 static struct tlpm_node *tlpm_add(struct tlpm_node *list,
39 				  const uint8_t *key,
40 				  size_t n_bits)
41 {
42 	struct tlpm_node *node;
43 	size_t n;
44 
45 	n = (n_bits + 7) / 8;
46 
47 	/* 'overwrite' an equivalent entry if one already exists */
48 	node = tlpm_match(list, key, n_bits);
49 	if (node && node->n_bits == n_bits) {
50 		memcpy(node->key, key, n);
51 		return list;
52 	}
53 
54 	/* add new entry with @key/@n_bits to @list and return new head */
55 
56 	node = malloc(sizeof(*node) + n);
57 	assert(node);
58 
59 	node->next = list;
60 	node->n_bits = n_bits;
61 	memcpy(node->key, key, n);
62 
63 	return node;
64 }
65 
66 static void tlpm_clear(struct tlpm_node *list)
67 {
68 	struct tlpm_node *node;
69 
70 	/* free all entries in @list */
71 
72 	while ((node = list)) {
73 		list = list->next;
74 		free(node);
75 	}
76 }
77 
78 static struct tlpm_node *tlpm_match(struct tlpm_node *list,
79 				    const uint8_t *key,
80 				    size_t n_bits)
81 {
82 	struct tlpm_node *best = NULL;
83 	size_t i;
84 
85 	/* Perform longest prefix-match on @key/@n_bits. That is, iterate all
86 	 * entries and match each prefix against @key. Remember the "best"
87 	 * entry we find (i.e., the longest prefix that matches) and return it
88 	 * to the caller when done.
89 	 */
90 
91 	for ( ; list; list = list->next) {
92 		for (i = 0; i < n_bits && i < list->n_bits; ++i) {
93 			if ((key[i / 8] & (1 << (7 - i % 8))) !=
94 			    (list->key[i / 8] & (1 << (7 - i % 8))))
95 				break;
96 		}
97 
98 		if (i >= list->n_bits) {
99 			if (!best || i > best->n_bits)
100 				best = list;
101 		}
102 	}
103 
104 	return best;
105 }
106 
107 static struct tlpm_node *tlpm_delete(struct tlpm_node *list,
108 				     const uint8_t *key,
109 				     size_t n_bits)
110 {
111 	struct tlpm_node *best = tlpm_match(list, key, n_bits);
112 	struct tlpm_node *node;
113 
114 	if (!best || best->n_bits != n_bits)
115 		return list;
116 
117 	if (best == list) {
118 		node = best->next;
119 		free(best);
120 		return node;
121 	}
122 
123 	for (node = list; node; node = node->next) {
124 		if (node->next == best) {
125 			node->next = best->next;
126 			free(best);
127 			return list;
128 		}
129 	}
130 	/* should never get here */
131 	assert(0);
132 	return list;
133 }
134 
135 static void test_lpm_basic(void)
136 {
137 	struct tlpm_node *list = NULL, *t1, *t2;
138 
139 	/* very basic, static tests to verify tlpm works as expected */
140 
141 	assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
142 
143 	t1 = list = tlpm_add(list, (uint8_t[]){ 0xff }, 8);
144 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
145 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
146 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0x00 }, 16));
147 	assert(!tlpm_match(list, (uint8_t[]){ 0x7f }, 8));
148 	assert(!tlpm_match(list, (uint8_t[]){ 0xfe }, 8));
149 	assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 7));
150 
151 	t2 = list = tlpm_add(list, (uint8_t[]){ 0xff, 0xff }, 16);
152 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
153 	assert(t2 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
154 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15));
155 	assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16));
156 
157 	list = tlpm_delete(list, (uint8_t[]){ 0xff, 0xff }, 16);
158 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
159 	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
160 
161 	list = tlpm_delete(list, (uint8_t[]){ 0xff }, 8);
162 	assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
163 
164 	tlpm_clear(list);
165 }
166 
167 static void test_lpm_order(void)
168 {
169 	struct tlpm_node *t1, *t2, *l1 = NULL, *l2 = NULL;
170 	size_t i, j;
171 
172 	/* Verify the tlpm implementation works correctly regardless of the
173 	 * order of entries. Insert a random set of entries into @l1, and copy
174 	 * the same data in reverse order into @l2. Then verify a lookup of
175 	 * random keys will yield the same result in both sets.
176 	 */
177 
178 	for (i = 0; i < (1 << 12); ++i)
179 		l1 = tlpm_add(l1, (uint8_t[]){
180 					rand() % 0xff,
181 					rand() % 0xff,
182 				}, rand() % 16 + 1);
183 
184 	for (t1 = l1; t1; t1 = t1->next)
185 		l2 = tlpm_add(l2, t1->key, t1->n_bits);
186 
187 	for (i = 0; i < (1 << 8); ++i) {
188 		uint8_t key[] = { rand() % 0xff, rand() % 0xff };
189 
190 		t1 = tlpm_match(l1, key, 16);
191 		t2 = tlpm_match(l2, key, 16);
192 
193 		assert(!t1 == !t2);
194 		if (t1) {
195 			assert(t1->n_bits == t2->n_bits);
196 			for (j = 0; j < t1->n_bits; ++j)
197 				assert((t1->key[j / 8] & (1 << (7 - j % 8))) ==
198 				       (t2->key[j / 8] & (1 << (7 - j % 8))));
199 		}
200 	}
201 
202 	tlpm_clear(l1);
203 	tlpm_clear(l2);
204 }
205 
206 static void test_lpm_map(int keysize)
207 {
208 	size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
209 	struct tlpm_node *t, *list = NULL;
210 	struct bpf_lpm_trie_key *key;
211 	uint8_t *data, *value;
212 	int r, map;
213 
214 	/* Compare behavior of tlpm vs. bpf-lpm. Create a randomized set of
215 	 * prefixes and insert it into both tlpm and bpf-lpm. Then run some
216 	 * randomized lookups and verify both maps return the same result.
217 	 */
218 
219 	n_matches = 0;
220 	n_matches_after_delete = 0;
221 	n_nodes = 1 << 8;
222 	n_lookups = 1 << 16;
223 
224 	data = alloca(keysize);
225 	memset(data, 0, keysize);
226 
227 	value = alloca(keysize + 1);
228 	memset(value, 0, keysize + 1);
229 
230 	key = alloca(sizeof(*key) + keysize);
231 	memset(key, 0, sizeof(*key) + keysize);
232 
233 	map = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
234 			     sizeof(*key) + keysize,
235 			     keysize + 1,
236 			     4096,
237 			     BPF_F_NO_PREALLOC);
238 	assert(map >= 0);
239 
240 	for (i = 0; i < n_nodes; ++i) {
241 		for (j = 0; j < keysize; ++j)
242 			value[j] = rand() & 0xff;
243 		value[keysize] = rand() % (8 * keysize + 1);
244 
245 		list = tlpm_add(list, value, value[keysize]);
246 
247 		key->prefixlen = value[keysize];
248 		memcpy(key->data, value, keysize);
249 		r = bpf_map_update_elem(map, key, value, 0);
250 		assert(!r);
251 	}
252 
253 	for (i = 0; i < n_lookups; ++i) {
254 		for (j = 0; j < keysize; ++j)
255 			data[j] = rand() & 0xff;
256 
257 		t = tlpm_match(list, data, 8 * keysize);
258 
259 		key->prefixlen = 8 * keysize;
260 		memcpy(key->data, data, keysize);
261 		r = bpf_map_lookup_elem(map, key, value);
262 		assert(!r || errno == ENOENT);
263 		assert(!t == !!r);
264 
265 		if (t) {
266 			++n_matches;
267 			assert(t->n_bits == value[keysize]);
268 			for (j = 0; j < t->n_bits; ++j)
269 				assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
270 				       (value[j / 8] & (1 << (7 - j % 8))));
271 		}
272 	}
273 
274 	/* Remove the first half of the elements in the tlpm and the
275 	 * corresponding nodes from the bpf-lpm.  Then run the same
276 	 * large number of random lookups in both and make sure they match.
277 	 * Note: we need to count the number of nodes actually inserted
278 	 * since there may have been duplicates.
279 	 */
280 	for (i = 0, t = list; t; i++, t = t->next)
281 		;
282 	for (j = 0; j < i / 2; ++j) {
283 		key->prefixlen = list->n_bits;
284 		memcpy(key->data, list->key, keysize);
285 		r = bpf_map_delete_elem(map, key);
286 		assert(!r);
287 		list = tlpm_delete(list, list->key, list->n_bits);
288 		assert(list);
289 	}
290 	for (i = 0; i < n_lookups; ++i) {
291 		for (j = 0; j < keysize; ++j)
292 			data[j] = rand() & 0xff;
293 
294 		t = tlpm_match(list, data, 8 * keysize);
295 
296 		key->prefixlen = 8 * keysize;
297 		memcpy(key->data, data, keysize);
298 		r = bpf_map_lookup_elem(map, key, value);
299 		assert(!r || errno == ENOENT);
300 		assert(!t == !!r);
301 
302 		if (t) {
303 			++n_matches_after_delete;
304 			assert(t->n_bits == value[keysize]);
305 			for (j = 0; j < t->n_bits; ++j)
306 				assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
307 				       (value[j / 8] & (1 << (7 - j % 8))));
308 		}
309 	}
310 
311 	close(map);
312 	tlpm_clear(list);
313 
314 	/* With 255 random nodes in the map, we are pretty likely to match
315 	 * something on every lookup. For statistics, use this:
316 	 *
317 	 *     printf("          nodes: %zu\n"
318 	 *            "        lookups: %zu\n"
319 	 *            "        matches: %zu\n"
320 	 *            "matches(delete): %zu\n",
321 	 *            n_nodes, n_lookups, n_matches, n_matches_after_delete);
322 	 */
323 }
324 
325 /* Test the implementation with some 'real world' examples */
326 
327 static void test_lpm_ipaddr(void)
328 {
329 	struct bpf_lpm_trie_key *key_ipv4;
330 	struct bpf_lpm_trie_key *key_ipv6;
331 	size_t key_size_ipv4;
332 	size_t key_size_ipv6;
333 	int map_fd_ipv4;
334 	int map_fd_ipv6;
335 	__u64 value;
336 
337 	key_size_ipv4 = sizeof(*key_ipv4) + sizeof(__u32);
338 	key_size_ipv6 = sizeof(*key_ipv6) + sizeof(__u32) * 4;
339 	key_ipv4 = alloca(key_size_ipv4);
340 	key_ipv6 = alloca(key_size_ipv6);
341 
342 	map_fd_ipv4 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
343 				     key_size_ipv4, sizeof(value),
344 				     100, BPF_F_NO_PREALLOC);
345 	assert(map_fd_ipv4 >= 0);
346 
347 	map_fd_ipv6 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
348 				     key_size_ipv6, sizeof(value),
349 				     100, BPF_F_NO_PREALLOC);
350 	assert(map_fd_ipv6 >= 0);
351 
352 	/* Fill data some IPv4 and IPv6 address ranges */
353 	value = 1;
354 	key_ipv4->prefixlen = 16;
355 	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
356 	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
357 
358 	value = 2;
359 	key_ipv4->prefixlen = 24;
360 	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
361 	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
362 
363 	value = 3;
364 	key_ipv4->prefixlen = 24;
365 	inet_pton(AF_INET, "192.168.128.0", key_ipv4->data);
366 	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
367 
368 	value = 5;
369 	key_ipv4->prefixlen = 24;
370 	inet_pton(AF_INET, "192.168.1.0", key_ipv4->data);
371 	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
372 
373 	value = 4;
374 	key_ipv4->prefixlen = 23;
375 	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
376 	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
377 
378 	value = 0xdeadbeef;
379 	key_ipv6->prefixlen = 64;
380 	inet_pton(AF_INET6, "2a00:1450:4001:814::200e", key_ipv6->data);
381 	assert(bpf_map_update_elem(map_fd_ipv6, key_ipv6, &value, 0) == 0);
382 
383 	/* Set tprefixlen to maximum for lookups */
384 	key_ipv4->prefixlen = 32;
385 	key_ipv6->prefixlen = 128;
386 
387 	/* Test some lookups that should come back with a value */
388 	inet_pton(AF_INET, "192.168.128.23", key_ipv4->data);
389 	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == 0);
390 	assert(value == 3);
391 
392 	inet_pton(AF_INET, "192.168.0.1", key_ipv4->data);
393 	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == 0);
394 	assert(value == 2);
395 
396 	inet_pton(AF_INET6, "2a00:1450:4001:814::", key_ipv6->data);
397 	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == 0);
398 	assert(value == 0xdeadbeef);
399 
400 	inet_pton(AF_INET6, "2a00:1450:4001:814::1", key_ipv6->data);
401 	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == 0);
402 	assert(value == 0xdeadbeef);
403 
404 	/* Test some lookups that should not match any entry */
405 	inet_pton(AF_INET, "10.0.0.1", key_ipv4->data);
406 	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
407 	       errno == ENOENT);
408 
409 	inet_pton(AF_INET, "11.11.11.11", key_ipv4->data);
410 	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
411 	       errno == ENOENT);
412 
413 	inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data);
414 	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -1 &&
415 	       errno == ENOENT);
416 
417 	close(map_fd_ipv4);
418 	close(map_fd_ipv6);
419 }
420 
421 static void test_lpm_delete(void)
422 {
423 	struct bpf_lpm_trie_key *key;
424 	size_t key_size;
425 	int map_fd;
426 	__u64 value;
427 
428 	key_size = sizeof(*key) + sizeof(__u32);
429 	key = alloca(key_size);
430 
431 	map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
432 				key_size, sizeof(value),
433 				100, BPF_F_NO_PREALLOC);
434 	assert(map_fd >= 0);
435 
436 	/* Add nodes:
437 	 * 192.168.0.0/16   (1)
438 	 * 192.168.0.0/24   (2)
439 	 * 192.168.128.0/24 (3)
440 	 * 192.168.1.0/24   (4)
441 	 *
442 	 *         (1)
443 	 *        /   \
444          *     (IM)    (3)
445 	 *    /   \
446          *   (2)  (4)
447 	 */
448 	value = 1;
449 	key->prefixlen = 16;
450 	inet_pton(AF_INET, "192.168.0.0", key->data);
451 	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
452 
453 	value = 2;
454 	key->prefixlen = 24;
455 	inet_pton(AF_INET, "192.168.0.0", key->data);
456 	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
457 
458 	value = 3;
459 	key->prefixlen = 24;
460 	inet_pton(AF_INET, "192.168.128.0", key->data);
461 	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
462 
463 	value = 4;
464 	key->prefixlen = 24;
465 	inet_pton(AF_INET, "192.168.1.0", key->data);
466 	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
467 
468 	/* remove non-existent node */
469 	key->prefixlen = 32;
470 	inet_pton(AF_INET, "10.0.0.1", key->data);
471 	assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
472 		errno == ENOENT);
473 
474 	/* assert initial lookup */
475 	key->prefixlen = 32;
476 	inet_pton(AF_INET, "192.168.0.1", key->data);
477 	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
478 	assert(value == 2);
479 
480 	/* remove leaf node */
481 	key->prefixlen = 24;
482 	inet_pton(AF_INET, "192.168.0.0", key->data);
483 	assert(bpf_map_delete_elem(map_fd, key) == 0);
484 
485 	key->prefixlen = 32;
486 	inet_pton(AF_INET, "192.168.0.1", key->data);
487 	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
488 	assert(value == 1);
489 
490 	/* remove leaf (and intermediary) node */
491 	key->prefixlen = 24;
492 	inet_pton(AF_INET, "192.168.1.0", key->data);
493 	assert(bpf_map_delete_elem(map_fd, key) == 0);
494 
495 	key->prefixlen = 32;
496 	inet_pton(AF_INET, "192.168.1.1", key->data);
497 	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
498 	assert(value == 1);
499 
500 	/* remove root node */
501 	key->prefixlen = 16;
502 	inet_pton(AF_INET, "192.168.0.0", key->data);
503 	assert(bpf_map_delete_elem(map_fd, key) == 0);
504 
505 	key->prefixlen = 32;
506 	inet_pton(AF_INET, "192.168.128.1", key->data);
507 	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
508 	assert(value == 3);
509 
510 	/* remove last node */
511 	key->prefixlen = 24;
512 	inet_pton(AF_INET, "192.168.128.0", key->data);
513 	assert(bpf_map_delete_elem(map_fd, key) == 0);
514 
515 	key->prefixlen = 32;
516 	inet_pton(AF_INET, "192.168.128.1", key->data);
517 	assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
518 		errno == ENOENT);
519 
520 	close(map_fd);
521 }
522 
523 int main(void)
524 {
525 	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
526 	int i, ret;
527 
528 	/* we want predictable, pseudo random tests */
529 	srand(0xf00ba1);
530 
531 	/* allow unlimited locked memory */
532 	ret = setrlimit(RLIMIT_MEMLOCK, &limit);
533 	if (ret < 0)
534 		perror("Unable to lift memlock rlimit");
535 
536 	test_lpm_basic();
537 	test_lpm_order();
538 
539 	/* Test with 8, 16, 24, 32, ... 128 bit prefix length */
540 	for (i = 1; i <= 16; ++i)
541 		test_lpm_map(i);
542 
543 	test_lpm_ipaddr();
544 
545 	test_lpm_delete();
546 
547 	printf("test_lpm: OK\n");
548 	return 0;
549 }
550