1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2017 Oracle.  All rights reserved.
4  */
5 
6 #include <linux/types.h>
7 #include "btrfs-tests.h"
8 #include "../ctree.h"
9 #include "../volumes.h"
10 #include "../disk-io.h"
11 #include "../block-group.h"
12 
13 static void free_extent_map_tree(struct extent_map_tree *em_tree)
14 {
15 	struct extent_map *em;
16 	struct rb_node *node;
17 
18 	while (!RB_EMPTY_ROOT(&em_tree->map.rb_root)) {
19 		node = rb_first_cached(&em_tree->map);
20 		em = rb_entry(node, struct extent_map, rb_node);
21 		remove_extent_mapping(em_tree, em);
22 
23 #ifdef CONFIG_BTRFS_DEBUG
24 		if (refcount_read(&em->refs) != 1) {
25 			test_err(
26 "em leak: em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx) refs %d",
27 				 em->start, em->len, em->block_start,
28 				 em->block_len, refcount_read(&em->refs));
29 
30 			refcount_set(&em->refs, 1);
31 		}
32 #endif
33 		free_extent_map(em);
34 	}
35 }
36 
37 /*
38  * Test scenario:
39  *
40  * Suppose that no extent map has been loaded into memory yet, there is a file
41  * extent [0, 16K), followed by another file extent [16K, 20K), two dio reads
42  * are entering btrfs_get_extent() concurrently, t1 is reading [8K, 16K), t2 is
43  * reading [0, 8K)
44  *
45  *     t1                            t2
46  *  btrfs_get_extent()              btrfs_get_extent()
47  *    -> lookup_extent_mapping()      ->lookup_extent_mapping()
48  *    -> add_extent_mapping(0, 16K)
49  *    -> return em
50  *                                    ->add_extent_mapping(0, 16K)
51  *                                    -> #handle -EEXIST
52  */
53 static int test_case_1(struct btrfs_fs_info *fs_info,
54 		struct extent_map_tree *em_tree)
55 {
56 	struct extent_map *em;
57 	u64 start = 0;
58 	u64 len = SZ_8K;
59 	int ret;
60 
61 	em = alloc_extent_map();
62 	if (!em) {
63 		test_std_err(TEST_ALLOC_EXTENT_MAP);
64 		return -ENOMEM;
65 	}
66 
67 	/* Add [0, 16K) */
68 	em->start = 0;
69 	em->len = SZ_16K;
70 	em->block_start = 0;
71 	em->block_len = SZ_16K;
72 	write_lock(&em_tree->lock);
73 	ret = add_extent_mapping(em_tree, em, 0);
74 	write_unlock(&em_tree->lock);
75 	if (ret < 0) {
76 		test_err("cannot add extent range [0, 16K)");
77 		goto out;
78 	}
79 	free_extent_map(em);
80 
81 	/* Add [16K, 20K) following [0, 16K)  */
82 	em = alloc_extent_map();
83 	if (!em) {
84 		test_std_err(TEST_ALLOC_EXTENT_MAP);
85 		ret = -ENOMEM;
86 		goto out;
87 	}
88 
89 	em->start = SZ_16K;
90 	em->len = SZ_4K;
91 	em->block_start = SZ_32K; /* avoid merging */
92 	em->block_len = SZ_4K;
93 	write_lock(&em_tree->lock);
94 	ret = add_extent_mapping(em_tree, em, 0);
95 	write_unlock(&em_tree->lock);
96 	if (ret < 0) {
97 		test_err("cannot add extent range [16K, 20K)");
98 		goto out;
99 	}
100 	free_extent_map(em);
101 
102 	em = alloc_extent_map();
103 	if (!em) {
104 		test_std_err(TEST_ALLOC_EXTENT_MAP);
105 		ret = -ENOMEM;
106 		goto out;
107 	}
108 
109 	/* Add [0, 8K), should return [0, 16K) instead. */
110 	em->start = start;
111 	em->len = len;
112 	em->block_start = start;
113 	em->block_len = len;
114 	write_lock(&em_tree->lock);
115 	ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
116 	write_unlock(&em_tree->lock);
117 	if (ret) {
118 		test_err("case1 [%llu %llu]: ret %d", start, start + len, ret);
119 		goto out;
120 	}
121 	if (em &&
122 	    (em->start != 0 || extent_map_end(em) != SZ_16K ||
123 	     em->block_start != 0 || em->block_len != SZ_16K)) {
124 		test_err(
125 "case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
126 			 start, start + len, ret, em->start, em->len,
127 			 em->block_start, em->block_len);
128 		ret = -EINVAL;
129 	}
130 	free_extent_map(em);
131 out:
132 	free_extent_map_tree(em_tree);
133 
134 	return ret;
135 }
136 
137 /*
138  * Test scenario:
139  *
140  * Reading the inline ending up with EEXIST, ie. read an inline
141  * extent and discard page cache and read it again.
142  */
143 static int test_case_2(struct btrfs_fs_info *fs_info,
144 		struct extent_map_tree *em_tree)
145 {
146 	struct extent_map *em;
147 	int ret;
148 
149 	em = alloc_extent_map();
150 	if (!em) {
151 		test_std_err(TEST_ALLOC_EXTENT_MAP);
152 		return -ENOMEM;
153 	}
154 
155 	/* Add [0, 1K) */
156 	em->start = 0;
157 	em->len = SZ_1K;
158 	em->block_start = EXTENT_MAP_INLINE;
159 	em->block_len = (u64)-1;
160 	write_lock(&em_tree->lock);
161 	ret = add_extent_mapping(em_tree, em, 0);
162 	write_unlock(&em_tree->lock);
163 	if (ret < 0) {
164 		test_err("cannot add extent range [0, 1K)");
165 		goto out;
166 	}
167 	free_extent_map(em);
168 
169 	/* Add [4K, 8K) following [0, 1K)  */
170 	em = alloc_extent_map();
171 	if (!em) {
172 		test_std_err(TEST_ALLOC_EXTENT_MAP);
173 		ret = -ENOMEM;
174 		goto out;
175 	}
176 
177 	em->start = SZ_4K;
178 	em->len = SZ_4K;
179 	em->block_start = SZ_4K;
180 	em->block_len = SZ_4K;
181 	write_lock(&em_tree->lock);
182 	ret = add_extent_mapping(em_tree, em, 0);
183 	write_unlock(&em_tree->lock);
184 	if (ret < 0) {
185 		test_err("cannot add extent range [4K, 8K)");
186 		goto out;
187 	}
188 	free_extent_map(em);
189 
190 	em = alloc_extent_map();
191 	if (!em) {
192 		test_std_err(TEST_ALLOC_EXTENT_MAP);
193 		ret = -ENOMEM;
194 		goto out;
195 	}
196 
197 	/* Add [0, 1K) */
198 	em->start = 0;
199 	em->len = SZ_1K;
200 	em->block_start = EXTENT_MAP_INLINE;
201 	em->block_len = (u64)-1;
202 	write_lock(&em_tree->lock);
203 	ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
204 	write_unlock(&em_tree->lock);
205 	if (ret) {
206 		test_err("case2 [0 1K]: ret %d", ret);
207 		goto out;
208 	}
209 	if (em &&
210 	    (em->start != 0 || extent_map_end(em) != SZ_1K ||
211 	     em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1)) {
212 		test_err(
213 "case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
214 			 ret, em->start, em->len, em->block_start,
215 			 em->block_len);
216 		ret = -EINVAL;
217 	}
218 	free_extent_map(em);
219 out:
220 	free_extent_map_tree(em_tree);
221 
222 	return ret;
223 }
224 
225 static int __test_case_3(struct btrfs_fs_info *fs_info,
226 		struct extent_map_tree *em_tree, u64 start)
227 {
228 	struct extent_map *em;
229 	u64 len = SZ_4K;
230 	int ret;
231 
232 	em = alloc_extent_map();
233 	if (!em) {
234 		test_std_err(TEST_ALLOC_EXTENT_MAP);
235 		return -ENOMEM;
236 	}
237 
238 	/* Add [4K, 8K) */
239 	em->start = SZ_4K;
240 	em->len = SZ_4K;
241 	em->block_start = SZ_4K;
242 	em->block_len = SZ_4K;
243 	write_lock(&em_tree->lock);
244 	ret = add_extent_mapping(em_tree, em, 0);
245 	write_unlock(&em_tree->lock);
246 	if (ret < 0) {
247 		test_err("cannot add extent range [4K, 8K)");
248 		goto out;
249 	}
250 	free_extent_map(em);
251 
252 	em = alloc_extent_map();
253 	if (!em) {
254 		test_std_err(TEST_ALLOC_EXTENT_MAP);
255 		ret = -ENOMEM;
256 		goto out;
257 	}
258 
259 	/* Add [0, 16K) */
260 	em->start = 0;
261 	em->len = SZ_16K;
262 	em->block_start = 0;
263 	em->block_len = SZ_16K;
264 	write_lock(&em_tree->lock);
265 	ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
266 	write_unlock(&em_tree->lock);
267 	if (ret) {
268 		test_err("case3 [0x%llx 0x%llx): ret %d",
269 			 start, start + len, ret);
270 		goto out;
271 	}
272 	/*
273 	 * Since bytes within em are contiguous, em->block_start is identical to
274 	 * em->start.
275 	 */
276 	if (em &&
277 	    (start < em->start || start + len > extent_map_end(em) ||
278 	     em->start != em->block_start || em->len != em->block_len)) {
279 		test_err(
280 "case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
281 			 start, start + len, ret, em->start, em->len,
282 			 em->block_start, em->block_len);
283 		ret = -EINVAL;
284 	}
285 	free_extent_map(em);
286 out:
287 	free_extent_map_tree(em_tree);
288 
289 	return ret;
290 }
291 
292 /*
293  * Test scenario:
294  *
295  * Suppose that no extent map has been loaded into memory yet.
296  * There is a file extent [0, 16K), two jobs are running concurrently
297  * against it, t1 is buffered writing to [4K, 8K) and t2 is doing dio
298  * read from [0, 4K) or [8K, 12K) or [12K, 16K).
299  *
300  * t1 goes ahead of t2 and adds em [4K, 8K) into tree.
301  *
302  *         t1                       t2
303  *  cow_file_range()	     btrfs_get_extent()
304  *                            -> lookup_extent_mapping()
305  *   -> add_extent_mapping()
306  *                            -> add_extent_mapping()
307  */
308 static int test_case_3(struct btrfs_fs_info *fs_info,
309 		struct extent_map_tree *em_tree)
310 {
311 	int ret;
312 
313 	ret = __test_case_3(fs_info, em_tree, 0);
314 	if (ret)
315 		return ret;
316 	ret = __test_case_3(fs_info, em_tree, SZ_8K);
317 	if (ret)
318 		return ret;
319 	ret = __test_case_3(fs_info, em_tree, (12 * SZ_1K));
320 
321 	return ret;
322 }
323 
324 static int __test_case_4(struct btrfs_fs_info *fs_info,
325 		struct extent_map_tree *em_tree, u64 start)
326 {
327 	struct extent_map *em;
328 	u64 len = SZ_4K;
329 	int ret;
330 
331 	em = alloc_extent_map();
332 	if (!em) {
333 		test_std_err(TEST_ALLOC_EXTENT_MAP);
334 		return -ENOMEM;
335 	}
336 
337 	/* Add [0K, 8K) */
338 	em->start = 0;
339 	em->len = SZ_8K;
340 	em->block_start = 0;
341 	em->block_len = SZ_8K;
342 	write_lock(&em_tree->lock);
343 	ret = add_extent_mapping(em_tree, em, 0);
344 	write_unlock(&em_tree->lock);
345 	if (ret < 0) {
346 		test_err("cannot add extent range [0, 8K)");
347 		goto out;
348 	}
349 	free_extent_map(em);
350 
351 	em = alloc_extent_map();
352 	if (!em) {
353 		test_std_err(TEST_ALLOC_EXTENT_MAP);
354 		ret = -ENOMEM;
355 		goto out;
356 	}
357 
358 	/* Add [8K, 32K) */
359 	em->start = SZ_8K;
360 	em->len = 24 * SZ_1K;
361 	em->block_start = SZ_16K; /* avoid merging */
362 	em->block_len = 24 * SZ_1K;
363 	write_lock(&em_tree->lock);
364 	ret = add_extent_mapping(em_tree, em, 0);
365 	write_unlock(&em_tree->lock);
366 	if (ret < 0) {
367 		test_err("cannot add extent range [8K, 32K)");
368 		goto out;
369 	}
370 	free_extent_map(em);
371 
372 	em = alloc_extent_map();
373 	if (!em) {
374 		test_std_err(TEST_ALLOC_EXTENT_MAP);
375 		ret = -ENOMEM;
376 		goto out;
377 	}
378 	/* Add [0K, 32K) */
379 	em->start = 0;
380 	em->len = SZ_32K;
381 	em->block_start = 0;
382 	em->block_len = SZ_32K;
383 	write_lock(&em_tree->lock);
384 	ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
385 	write_unlock(&em_tree->lock);
386 	if (ret) {
387 		test_err("case4 [0x%llx 0x%llx): ret %d",
388 			 start, len, ret);
389 		goto out;
390 	}
391 	if (em && (start < em->start || start + len > extent_map_end(em))) {
392 		test_err(
393 "case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
394 			 start, len, ret, em->start, em->len, em->block_start,
395 			 em->block_len);
396 		ret = -EINVAL;
397 	}
398 	free_extent_map(em);
399 out:
400 	free_extent_map_tree(em_tree);
401 
402 	return ret;
403 }
404 
405 /*
406  * Test scenario:
407  *
408  * Suppose that no extent map has been loaded into memory yet.
409  * There is a file extent [0, 32K), two jobs are running concurrently
410  * against it, t1 is doing dio write to [8K, 32K) and t2 is doing dio
411  * read from [0, 4K) or [4K, 8K).
412  *
413  * t1 goes ahead of t2 and splits em [0, 32K) to em [0K, 8K) and [8K 32K).
414  *
415  *         t1                                t2
416  *  btrfs_get_blocks_direct()	       btrfs_get_blocks_direct()
417  *   -> btrfs_get_extent()              -> btrfs_get_extent()
418  *       -> lookup_extent_mapping()
419  *       -> add_extent_mapping()            -> lookup_extent_mapping()
420  *          # load [0, 32K)
421  *   -> btrfs_new_extent_direct()
422  *       -> btrfs_drop_extent_cache()
423  *          # split [0, 32K)
424  *       -> add_extent_mapping()
425  *          # add [8K, 32K)
426  *                                          -> add_extent_mapping()
427  *                                             # handle -EEXIST when adding
428  *                                             # [0, 32K)
429  */
430 static int test_case_4(struct btrfs_fs_info *fs_info,
431 		struct extent_map_tree *em_tree)
432 {
433 	int ret;
434 
435 	ret = __test_case_4(fs_info, em_tree, 0);
436 	if (ret)
437 		return ret;
438 	ret = __test_case_4(fs_info, em_tree, SZ_4K);
439 
440 	return ret;
441 }
442 
443 struct rmap_test_vector {
444 	u64 raid_type;
445 	u64 physical_start;
446 	u64 data_stripe_size;
447 	u64 num_data_stripes;
448 	u64 num_stripes;
449 	/* Assume we won't have more than 5 physical stripes */
450 	u64 data_stripe_phys_start[5];
451 	bool expected_mapped_addr;
452 	/* Physical to logical addresses */
453 	u64 mapped_logical[5];
454 };
455 
456 static int test_rmap_block(struct btrfs_fs_info *fs_info,
457 			   struct rmap_test_vector *test)
458 {
459 	struct extent_map *em;
460 	struct map_lookup *map = NULL;
461 	u64 *logical = NULL;
462 	int i, out_ndaddrs, out_stripe_len;
463 	int ret;
464 
465 	em = alloc_extent_map();
466 	if (!em) {
467 		test_std_err(TEST_ALLOC_EXTENT_MAP);
468 		return -ENOMEM;
469 	}
470 
471 	map = kmalloc(map_lookup_size(test->num_stripes), GFP_KERNEL);
472 	if (!map) {
473 		kfree(em);
474 		test_std_err(TEST_ALLOC_EXTENT_MAP);
475 		return -ENOMEM;
476 	}
477 
478 	set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
479 	/* Start at 4GiB logical address */
480 	em->start = SZ_4G;
481 	em->len = test->data_stripe_size * test->num_data_stripes;
482 	em->block_len = em->len;
483 	em->orig_block_len = test->data_stripe_size;
484 	em->map_lookup = map;
485 
486 	map->num_stripes = test->num_stripes;
487 	map->stripe_len = BTRFS_STRIPE_LEN;
488 	map->type = test->raid_type;
489 
490 	for (i = 0; i < map->num_stripes; i++) {
491 		struct btrfs_device *dev = btrfs_alloc_dummy_device(fs_info);
492 
493 		if (IS_ERR(dev)) {
494 			test_err("cannot allocate device");
495 			ret = PTR_ERR(dev);
496 			goto out;
497 		}
498 		map->stripes[i].dev = dev;
499 		map->stripes[i].physical = test->data_stripe_phys_start[i];
500 	}
501 
502 	write_lock(&fs_info->mapping_tree.lock);
503 	ret = add_extent_mapping(&fs_info->mapping_tree, em, 0);
504 	write_unlock(&fs_info->mapping_tree.lock);
505 	if (ret) {
506 		test_err("error adding block group mapping to mapping tree");
507 		goto out_free;
508 	}
509 
510 	ret = btrfs_rmap_block(fs_info, em->start, NULL, btrfs_sb_offset(1),
511 			       &logical, &out_ndaddrs, &out_stripe_len);
512 	if (ret || (out_ndaddrs == 0 && test->expected_mapped_addr)) {
513 		test_err("didn't rmap anything but expected %d",
514 			 test->expected_mapped_addr);
515 		goto out;
516 	}
517 
518 	if (out_stripe_len != BTRFS_STRIPE_LEN) {
519 		test_err("calculated stripe length doesn't match");
520 		goto out;
521 	}
522 
523 	if (out_ndaddrs != test->expected_mapped_addr) {
524 		for (i = 0; i < out_ndaddrs; i++)
525 			test_msg("mapped %llu", logical[i]);
526 		test_err("unexpected number of mapped addresses: %d", out_ndaddrs);
527 		goto out;
528 	}
529 
530 	for (i = 0; i < out_ndaddrs; i++) {
531 		if (logical[i] != test->mapped_logical[i]) {
532 			test_err("unexpected logical address mapped");
533 			goto out;
534 		}
535 	}
536 
537 	ret = 0;
538 out:
539 	write_lock(&fs_info->mapping_tree.lock);
540 	remove_extent_mapping(&fs_info->mapping_tree, em);
541 	write_unlock(&fs_info->mapping_tree.lock);
542 	/* For us */
543 	free_extent_map(em);
544 out_free:
545 	/* For the tree */
546 	free_extent_map(em);
547 	kfree(logical);
548 	return ret;
549 }
550 
551 int btrfs_test_extent_map(void)
552 {
553 	struct btrfs_fs_info *fs_info = NULL;
554 	struct extent_map_tree *em_tree;
555 	int ret = 0, i;
556 	struct rmap_test_vector rmap_tests[] = {
557 		{
558 			/*
559 			 * Test a chunk with 2 data stripes one of which
560 			 * intersects the physical address of the super block
561 			 * is correctly recognised.
562 			 */
563 			.raid_type = BTRFS_BLOCK_GROUP_RAID1,
564 			.physical_start = SZ_64M - SZ_4M,
565 			.data_stripe_size = SZ_256M,
566 			.num_data_stripes = 2,
567 			.num_stripes = 2,
568 			.data_stripe_phys_start =
569 				{SZ_64M - SZ_4M, SZ_64M - SZ_4M + SZ_256M},
570 			.expected_mapped_addr = true,
571 			.mapped_logical= {SZ_4G + SZ_4M}
572 		},
573 		{
574 			/*
575 			 * Test that out-of-range physical addresses are
576 			 * ignored
577 			 */
578 
579 			 /* SINGLE chunk type */
580 			.raid_type = 0,
581 			.physical_start = SZ_4G,
582 			.data_stripe_size = SZ_256M,
583 			.num_data_stripes = 1,
584 			.num_stripes = 1,
585 			.data_stripe_phys_start = {SZ_256M},
586 			.expected_mapped_addr = false,
587 			.mapped_logical = {0}
588 		}
589 	};
590 
591 	test_msg("running extent_map tests");
592 
593 	/*
594 	 * Note: the fs_info is not set up completely, we only need
595 	 * fs_info::fsid for the tracepoint.
596 	 */
597 	fs_info = btrfs_alloc_dummy_fs_info(PAGE_SIZE, PAGE_SIZE);
598 	if (!fs_info) {
599 		test_std_err(TEST_ALLOC_FS_INFO);
600 		return -ENOMEM;
601 	}
602 
603 	em_tree = kzalloc(sizeof(*em_tree), GFP_KERNEL);
604 	if (!em_tree) {
605 		ret = -ENOMEM;
606 		goto out;
607 	}
608 
609 	extent_map_tree_init(em_tree);
610 
611 	ret = test_case_1(fs_info, em_tree);
612 	if (ret)
613 		goto out;
614 	ret = test_case_2(fs_info, em_tree);
615 	if (ret)
616 		goto out;
617 	ret = test_case_3(fs_info, em_tree);
618 	if (ret)
619 		goto out;
620 	ret = test_case_4(fs_info, em_tree);
621 
622 	test_msg("running rmap tests");
623 	for (i = 0; i < ARRAY_SIZE(rmap_tests); i++) {
624 		ret = test_rmap_block(fs_info, &rmap_tests[i]);
625 		if (ret)
626 			goto out;
627 	}
628 
629 out:
630 	kfree(em_tree);
631 	btrfs_free_dummy_fs_info(fs_info);
632 
633 	return ret;
634 }
635