1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2017 Oracle.  All rights reserved.
4  */
5 
6 #include <linux/types.h>
7 #include "btrfs-tests.h"
8 #include "../ctree.h"
9 #include "../volumes.h"
10 #include "../disk-io.h"
11 #include "../block-group.h"
12 
13 static void free_extent_map_tree(struct extent_map_tree *em_tree)
14 {
15 	struct extent_map *em;
16 	struct rb_node *node;
17 
18 	write_lock(&em_tree->lock);
19 	while (!RB_EMPTY_ROOT(&em_tree->map.rb_root)) {
20 		node = rb_first_cached(&em_tree->map);
21 		em = rb_entry(node, struct extent_map, rb_node);
22 		remove_extent_mapping(em_tree, em);
23 
24 #ifdef CONFIG_BTRFS_DEBUG
25 		if (refcount_read(&em->refs) != 1) {
26 			test_err(
27 "em leak: em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx) refs %d",
28 				 em->start, em->len, em->block_start,
29 				 em->block_len, refcount_read(&em->refs));
30 
31 			refcount_set(&em->refs, 1);
32 		}
33 #endif
34 		free_extent_map(em);
35 	}
36 	write_unlock(&em_tree->lock);
37 }
38 
39 /*
40  * Test scenario:
41  *
42  * Suppose that no extent map has been loaded into memory yet, there is a file
43  * extent [0, 16K), followed by another file extent [16K, 20K), two dio reads
44  * are entering btrfs_get_extent() concurrently, t1 is reading [8K, 16K), t2 is
45  * reading [0, 8K)
46  *
47  *     t1                            t2
48  *  btrfs_get_extent()              btrfs_get_extent()
49  *    -> lookup_extent_mapping()      ->lookup_extent_mapping()
50  *    -> add_extent_mapping(0, 16K)
51  *    -> return em
52  *                                    ->add_extent_mapping(0, 16K)
53  *                                    -> #handle -EEXIST
54  */
55 static int test_case_1(struct btrfs_fs_info *fs_info,
56 		struct extent_map_tree *em_tree)
57 {
58 	struct extent_map *em;
59 	u64 start = 0;
60 	u64 len = SZ_8K;
61 	int ret;
62 
63 	em = alloc_extent_map();
64 	if (!em) {
65 		test_std_err(TEST_ALLOC_EXTENT_MAP);
66 		return -ENOMEM;
67 	}
68 
69 	/* Add [0, 16K) */
70 	em->start = 0;
71 	em->len = SZ_16K;
72 	em->block_start = 0;
73 	em->block_len = SZ_16K;
74 	write_lock(&em_tree->lock);
75 	ret = add_extent_mapping(em_tree, em, 0);
76 	write_unlock(&em_tree->lock);
77 	if (ret < 0) {
78 		test_err("cannot add extent range [0, 16K)");
79 		goto out;
80 	}
81 	free_extent_map(em);
82 
83 	/* Add [16K, 20K) following [0, 16K)  */
84 	em = alloc_extent_map();
85 	if (!em) {
86 		test_std_err(TEST_ALLOC_EXTENT_MAP);
87 		ret = -ENOMEM;
88 		goto out;
89 	}
90 
91 	em->start = SZ_16K;
92 	em->len = SZ_4K;
93 	em->block_start = SZ_32K; /* avoid merging */
94 	em->block_len = SZ_4K;
95 	write_lock(&em_tree->lock);
96 	ret = add_extent_mapping(em_tree, em, 0);
97 	write_unlock(&em_tree->lock);
98 	if (ret < 0) {
99 		test_err("cannot add extent range [16K, 20K)");
100 		goto out;
101 	}
102 	free_extent_map(em);
103 
104 	em = alloc_extent_map();
105 	if (!em) {
106 		test_std_err(TEST_ALLOC_EXTENT_MAP);
107 		ret = -ENOMEM;
108 		goto out;
109 	}
110 
111 	/* Add [0, 8K), should return [0, 16K) instead. */
112 	em->start = start;
113 	em->len = len;
114 	em->block_start = start;
115 	em->block_len = len;
116 	write_lock(&em_tree->lock);
117 	ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
118 	write_unlock(&em_tree->lock);
119 	if (ret) {
120 		test_err("case1 [%llu %llu]: ret %d", start, start + len, ret);
121 		goto out;
122 	}
123 	if (em &&
124 	    (em->start != 0 || extent_map_end(em) != SZ_16K ||
125 	     em->block_start != 0 || em->block_len != SZ_16K)) {
126 		test_err(
127 "case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
128 			 start, start + len, ret, em->start, em->len,
129 			 em->block_start, em->block_len);
130 		ret = -EINVAL;
131 	}
132 	free_extent_map(em);
133 out:
134 	free_extent_map_tree(em_tree);
135 
136 	return ret;
137 }
138 
139 /*
140  * Test scenario:
141  *
142  * Reading the inline ending up with EEXIST, ie. read an inline
143  * extent and discard page cache and read it again.
144  */
145 static int test_case_2(struct btrfs_fs_info *fs_info,
146 		struct extent_map_tree *em_tree)
147 {
148 	struct extent_map *em;
149 	int ret;
150 
151 	em = alloc_extent_map();
152 	if (!em) {
153 		test_std_err(TEST_ALLOC_EXTENT_MAP);
154 		return -ENOMEM;
155 	}
156 
157 	/* Add [0, 1K) */
158 	em->start = 0;
159 	em->len = SZ_1K;
160 	em->block_start = EXTENT_MAP_INLINE;
161 	em->block_len = (u64)-1;
162 	write_lock(&em_tree->lock);
163 	ret = add_extent_mapping(em_tree, em, 0);
164 	write_unlock(&em_tree->lock);
165 	if (ret < 0) {
166 		test_err("cannot add extent range [0, 1K)");
167 		goto out;
168 	}
169 	free_extent_map(em);
170 
171 	/* Add [4K, 8K) following [0, 1K)  */
172 	em = alloc_extent_map();
173 	if (!em) {
174 		test_std_err(TEST_ALLOC_EXTENT_MAP);
175 		ret = -ENOMEM;
176 		goto out;
177 	}
178 
179 	em->start = SZ_4K;
180 	em->len = SZ_4K;
181 	em->block_start = SZ_4K;
182 	em->block_len = SZ_4K;
183 	write_lock(&em_tree->lock);
184 	ret = add_extent_mapping(em_tree, em, 0);
185 	write_unlock(&em_tree->lock);
186 	if (ret < 0) {
187 		test_err("cannot add extent range [4K, 8K)");
188 		goto out;
189 	}
190 	free_extent_map(em);
191 
192 	em = alloc_extent_map();
193 	if (!em) {
194 		test_std_err(TEST_ALLOC_EXTENT_MAP);
195 		ret = -ENOMEM;
196 		goto out;
197 	}
198 
199 	/* Add [0, 1K) */
200 	em->start = 0;
201 	em->len = SZ_1K;
202 	em->block_start = EXTENT_MAP_INLINE;
203 	em->block_len = (u64)-1;
204 	write_lock(&em_tree->lock);
205 	ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
206 	write_unlock(&em_tree->lock);
207 	if (ret) {
208 		test_err("case2 [0 1K]: ret %d", ret);
209 		goto out;
210 	}
211 	if (em &&
212 	    (em->start != 0 || extent_map_end(em) != SZ_1K ||
213 	     em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1)) {
214 		test_err(
215 "case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
216 			 ret, em->start, em->len, em->block_start,
217 			 em->block_len);
218 		ret = -EINVAL;
219 	}
220 	free_extent_map(em);
221 out:
222 	free_extent_map_tree(em_tree);
223 
224 	return ret;
225 }
226 
227 static int __test_case_3(struct btrfs_fs_info *fs_info,
228 		struct extent_map_tree *em_tree, u64 start)
229 {
230 	struct extent_map *em;
231 	u64 len = SZ_4K;
232 	int ret;
233 
234 	em = alloc_extent_map();
235 	if (!em) {
236 		test_std_err(TEST_ALLOC_EXTENT_MAP);
237 		return -ENOMEM;
238 	}
239 
240 	/* Add [4K, 8K) */
241 	em->start = SZ_4K;
242 	em->len = SZ_4K;
243 	em->block_start = SZ_4K;
244 	em->block_len = SZ_4K;
245 	write_lock(&em_tree->lock);
246 	ret = add_extent_mapping(em_tree, em, 0);
247 	write_unlock(&em_tree->lock);
248 	if (ret < 0) {
249 		test_err("cannot add extent range [4K, 8K)");
250 		goto out;
251 	}
252 	free_extent_map(em);
253 
254 	em = alloc_extent_map();
255 	if (!em) {
256 		test_std_err(TEST_ALLOC_EXTENT_MAP);
257 		ret = -ENOMEM;
258 		goto out;
259 	}
260 
261 	/* Add [0, 16K) */
262 	em->start = 0;
263 	em->len = SZ_16K;
264 	em->block_start = 0;
265 	em->block_len = SZ_16K;
266 	write_lock(&em_tree->lock);
267 	ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
268 	write_unlock(&em_tree->lock);
269 	if (ret) {
270 		test_err("case3 [0x%llx 0x%llx): ret %d",
271 			 start, start + len, ret);
272 		goto out;
273 	}
274 	/*
275 	 * Since bytes within em are contiguous, em->block_start is identical to
276 	 * em->start.
277 	 */
278 	if (em &&
279 	    (start < em->start || start + len > extent_map_end(em) ||
280 	     em->start != em->block_start || em->len != em->block_len)) {
281 		test_err(
282 "case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
283 			 start, start + len, ret, em->start, em->len,
284 			 em->block_start, em->block_len);
285 		ret = -EINVAL;
286 	}
287 	free_extent_map(em);
288 out:
289 	free_extent_map_tree(em_tree);
290 
291 	return ret;
292 }
293 
294 /*
295  * Test scenario:
296  *
297  * Suppose that no extent map has been loaded into memory yet.
298  * There is a file extent [0, 16K), two jobs are running concurrently
299  * against it, t1 is buffered writing to [4K, 8K) and t2 is doing dio
300  * read from [0, 4K) or [8K, 12K) or [12K, 16K).
301  *
302  * t1 goes ahead of t2 and adds em [4K, 8K) into tree.
303  *
304  *         t1                       t2
305  *  cow_file_range()	     btrfs_get_extent()
306  *                            -> lookup_extent_mapping()
307  *   -> add_extent_mapping()
308  *                            -> add_extent_mapping()
309  */
310 static int test_case_3(struct btrfs_fs_info *fs_info,
311 		struct extent_map_tree *em_tree)
312 {
313 	int ret;
314 
315 	ret = __test_case_3(fs_info, em_tree, 0);
316 	if (ret)
317 		return ret;
318 	ret = __test_case_3(fs_info, em_tree, SZ_8K);
319 	if (ret)
320 		return ret;
321 	ret = __test_case_3(fs_info, em_tree, (12 * SZ_1K));
322 
323 	return ret;
324 }
325 
326 static int __test_case_4(struct btrfs_fs_info *fs_info,
327 		struct extent_map_tree *em_tree, u64 start)
328 {
329 	struct extent_map *em;
330 	u64 len = SZ_4K;
331 	int ret;
332 
333 	em = alloc_extent_map();
334 	if (!em) {
335 		test_std_err(TEST_ALLOC_EXTENT_MAP);
336 		return -ENOMEM;
337 	}
338 
339 	/* Add [0K, 8K) */
340 	em->start = 0;
341 	em->len = SZ_8K;
342 	em->block_start = 0;
343 	em->block_len = SZ_8K;
344 	write_lock(&em_tree->lock);
345 	ret = add_extent_mapping(em_tree, em, 0);
346 	write_unlock(&em_tree->lock);
347 	if (ret < 0) {
348 		test_err("cannot add extent range [0, 8K)");
349 		goto out;
350 	}
351 	free_extent_map(em);
352 
353 	em = alloc_extent_map();
354 	if (!em) {
355 		test_std_err(TEST_ALLOC_EXTENT_MAP);
356 		ret = -ENOMEM;
357 		goto out;
358 	}
359 
360 	/* Add [8K, 32K) */
361 	em->start = SZ_8K;
362 	em->len = 24 * SZ_1K;
363 	em->block_start = SZ_16K; /* avoid merging */
364 	em->block_len = 24 * SZ_1K;
365 	write_lock(&em_tree->lock);
366 	ret = add_extent_mapping(em_tree, em, 0);
367 	write_unlock(&em_tree->lock);
368 	if (ret < 0) {
369 		test_err("cannot add extent range [8K, 32K)");
370 		goto out;
371 	}
372 	free_extent_map(em);
373 
374 	em = alloc_extent_map();
375 	if (!em) {
376 		test_std_err(TEST_ALLOC_EXTENT_MAP);
377 		ret = -ENOMEM;
378 		goto out;
379 	}
380 	/* Add [0K, 32K) */
381 	em->start = 0;
382 	em->len = SZ_32K;
383 	em->block_start = 0;
384 	em->block_len = SZ_32K;
385 	write_lock(&em_tree->lock);
386 	ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
387 	write_unlock(&em_tree->lock);
388 	if (ret) {
389 		test_err("case4 [0x%llx 0x%llx): ret %d",
390 			 start, len, ret);
391 		goto out;
392 	}
393 	if (em && (start < em->start || start + len > extent_map_end(em))) {
394 		test_err(
395 "case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
396 			 start, len, ret, em->start, em->len, em->block_start,
397 			 em->block_len);
398 		ret = -EINVAL;
399 	}
400 	free_extent_map(em);
401 out:
402 	free_extent_map_tree(em_tree);
403 
404 	return ret;
405 }
406 
407 /*
408  * Test scenario:
409  *
410  * Suppose that no extent map has been loaded into memory yet.
411  * There is a file extent [0, 32K), two jobs are running concurrently
412  * against it, t1 is doing dio write to [8K, 32K) and t2 is doing dio
413  * read from [0, 4K) or [4K, 8K).
414  *
415  * t1 goes ahead of t2 and splits em [0, 32K) to em [0K, 8K) and [8K 32K).
416  *
417  *         t1                                t2
418  *  btrfs_get_blocks_direct()	       btrfs_get_blocks_direct()
419  *   -> btrfs_get_extent()              -> btrfs_get_extent()
420  *       -> lookup_extent_mapping()
421  *       -> add_extent_mapping()            -> lookup_extent_mapping()
422  *          # load [0, 32K)
423  *   -> btrfs_new_extent_direct()
424  *       -> btrfs_drop_extent_cache()
425  *          # split [0, 32K)
426  *       -> add_extent_mapping()
427  *          # add [8K, 32K)
428  *                                          -> add_extent_mapping()
429  *                                             # handle -EEXIST when adding
430  *                                             # [0, 32K)
431  */
432 static int test_case_4(struct btrfs_fs_info *fs_info,
433 		struct extent_map_tree *em_tree)
434 {
435 	int ret;
436 
437 	ret = __test_case_4(fs_info, em_tree, 0);
438 	if (ret)
439 		return ret;
440 	ret = __test_case_4(fs_info, em_tree, SZ_4K);
441 
442 	return ret;
443 }
444 
445 struct rmap_test_vector {
446 	u64 raid_type;
447 	u64 physical_start;
448 	u64 data_stripe_size;
449 	u64 num_data_stripes;
450 	u64 num_stripes;
451 	/* Assume we won't have more than 5 physical stripes */
452 	u64 data_stripe_phys_start[5];
453 	bool expected_mapped_addr;
454 	/* Physical to logical addresses */
455 	u64 mapped_logical[5];
456 };
457 
458 static int test_rmap_block(struct btrfs_fs_info *fs_info,
459 			   struct rmap_test_vector *test)
460 {
461 	struct extent_map *em;
462 	struct map_lookup *map = NULL;
463 	u64 *logical = NULL;
464 	int i, out_ndaddrs, out_stripe_len;
465 	int ret;
466 
467 	em = alloc_extent_map();
468 	if (!em) {
469 		test_std_err(TEST_ALLOC_EXTENT_MAP);
470 		return -ENOMEM;
471 	}
472 
473 	map = kmalloc(map_lookup_size(test->num_stripes), GFP_KERNEL);
474 	if (!map) {
475 		kfree(em);
476 		test_std_err(TEST_ALLOC_EXTENT_MAP);
477 		return -ENOMEM;
478 	}
479 
480 	set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
481 	/* Start at 4GiB logical address */
482 	em->start = SZ_4G;
483 	em->len = test->data_stripe_size * test->num_data_stripes;
484 	em->block_len = em->len;
485 	em->orig_block_len = test->data_stripe_size;
486 	em->map_lookup = map;
487 
488 	map->num_stripes = test->num_stripes;
489 	map->stripe_len = BTRFS_STRIPE_LEN;
490 	map->type = test->raid_type;
491 
492 	for (i = 0; i < map->num_stripes; i++) {
493 		struct btrfs_device *dev = btrfs_alloc_dummy_device(fs_info);
494 
495 		if (IS_ERR(dev)) {
496 			test_err("cannot allocate device");
497 			ret = PTR_ERR(dev);
498 			goto out;
499 		}
500 		map->stripes[i].dev = dev;
501 		map->stripes[i].physical = test->data_stripe_phys_start[i];
502 	}
503 
504 	write_lock(&fs_info->mapping_tree.lock);
505 	ret = add_extent_mapping(&fs_info->mapping_tree, em, 0);
506 	write_unlock(&fs_info->mapping_tree.lock);
507 	if (ret) {
508 		test_err("error adding block group mapping to mapping tree");
509 		goto out_free;
510 	}
511 
512 	ret = btrfs_rmap_block(fs_info, em->start, NULL, btrfs_sb_offset(1),
513 			       &logical, &out_ndaddrs, &out_stripe_len);
514 	if (ret || (out_ndaddrs == 0 && test->expected_mapped_addr)) {
515 		test_err("didn't rmap anything but expected %d",
516 			 test->expected_mapped_addr);
517 		goto out;
518 	}
519 
520 	if (out_stripe_len != BTRFS_STRIPE_LEN) {
521 		test_err("calculated stripe length doesn't match");
522 		goto out;
523 	}
524 
525 	if (out_ndaddrs != test->expected_mapped_addr) {
526 		for (i = 0; i < out_ndaddrs; i++)
527 			test_msg("mapped %llu", logical[i]);
528 		test_err("unexpected number of mapped addresses: %d", out_ndaddrs);
529 		goto out;
530 	}
531 
532 	for (i = 0; i < out_ndaddrs; i++) {
533 		if (logical[i] != test->mapped_logical[i]) {
534 			test_err("unexpected logical address mapped");
535 			goto out;
536 		}
537 	}
538 
539 	ret = 0;
540 out:
541 	write_lock(&fs_info->mapping_tree.lock);
542 	remove_extent_mapping(&fs_info->mapping_tree, em);
543 	write_unlock(&fs_info->mapping_tree.lock);
544 	/* For us */
545 	free_extent_map(em);
546 out_free:
547 	/* For the tree */
548 	free_extent_map(em);
549 	kfree(logical);
550 	return ret;
551 }
552 
553 int btrfs_test_extent_map(void)
554 {
555 	struct btrfs_fs_info *fs_info = NULL;
556 	struct extent_map_tree *em_tree;
557 	int ret = 0, i;
558 	struct rmap_test_vector rmap_tests[] = {
559 		{
560 			/*
561 			 * Test a chunk with 2 data stripes one of which
562 			 * intersects the physical address of the super block
563 			 * is correctly recognised.
564 			 */
565 			.raid_type = BTRFS_BLOCK_GROUP_RAID1,
566 			.physical_start = SZ_64M - SZ_4M,
567 			.data_stripe_size = SZ_256M,
568 			.num_data_stripes = 2,
569 			.num_stripes = 2,
570 			.data_stripe_phys_start =
571 				{SZ_64M - SZ_4M, SZ_64M - SZ_4M + SZ_256M},
572 			.expected_mapped_addr = true,
573 			.mapped_logical= {SZ_4G + SZ_4M}
574 		},
575 		{
576 			/*
577 			 * Test that out-of-range physical addresses are
578 			 * ignored
579 			 */
580 
581 			 /* SINGLE chunk type */
582 			.raid_type = 0,
583 			.physical_start = SZ_4G,
584 			.data_stripe_size = SZ_256M,
585 			.num_data_stripes = 1,
586 			.num_stripes = 1,
587 			.data_stripe_phys_start = {SZ_256M},
588 			.expected_mapped_addr = false,
589 			.mapped_logical = {0}
590 		}
591 	};
592 
593 	test_msg("running extent_map tests");
594 
595 	/*
596 	 * Note: the fs_info is not set up completely, we only need
597 	 * fs_info::fsid for the tracepoint.
598 	 */
599 	fs_info = btrfs_alloc_dummy_fs_info(PAGE_SIZE, PAGE_SIZE);
600 	if (!fs_info) {
601 		test_std_err(TEST_ALLOC_FS_INFO);
602 		return -ENOMEM;
603 	}
604 
605 	em_tree = kzalloc(sizeof(*em_tree), GFP_KERNEL);
606 	if (!em_tree) {
607 		ret = -ENOMEM;
608 		goto out;
609 	}
610 
611 	extent_map_tree_init(em_tree);
612 
613 	ret = test_case_1(fs_info, em_tree);
614 	if (ret)
615 		goto out;
616 	ret = test_case_2(fs_info, em_tree);
617 	if (ret)
618 		goto out;
619 	ret = test_case_3(fs_info, em_tree);
620 	if (ret)
621 		goto out;
622 	ret = test_case_4(fs_info, em_tree);
623 
624 	test_msg("running rmap tests");
625 	for (i = 0; i < ARRAY_SIZE(rmap_tests); i++) {
626 		ret = test_rmap_block(fs_info, &rmap_tests[i]);
627 		if (ret)
628 			goto out;
629 	}
630 
631 out:
632 	kfree(em_tree);
633 	btrfs_free_dummy_fs_info(fs_info);
634 
635 	return ret;
636 }
637