xref: /openbmc/linux/fs/erofs/zmap.c (revision 31da107f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2018-2019 HUAWEI, Inc.
4  *             https://www.huawei.com/
5  */
6 #include "internal.h"
7 #include <asm/unaligned.h>
8 #include <trace/events/erofs.h>
9 
10 static int z_erofs_do_map_blocks(struct inode *inode,
11 				 struct erofs_map_blocks *map,
12 				 int flags);
13 
14 int z_erofs_fill_inode(struct inode *inode)
15 {
16 	struct erofs_inode *const vi = EROFS_I(inode);
17 	struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
18 
19 	if (!erofs_sb_has_big_pcluster(sbi) &&
20 	    !erofs_sb_has_ztailpacking(sbi) && !erofs_sb_has_fragments(sbi) &&
21 	    vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
22 		vi->z_advise = 0;
23 		vi->z_algorithmtype[0] = 0;
24 		vi->z_algorithmtype[1] = 0;
25 		vi->z_logical_clusterbits = LOG_BLOCK_SIZE;
26 		set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
27 	}
28 	inode->i_mapping->a_ops = &z_erofs_aops;
29 	return 0;
30 }
31 
32 static int z_erofs_fill_inode_lazy(struct inode *inode)
33 {
34 	struct erofs_inode *const vi = EROFS_I(inode);
35 	struct super_block *const sb = inode->i_sb;
36 	int err, headnr;
37 	erofs_off_t pos;
38 	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
39 	void *kaddr;
40 	struct z_erofs_map_header *h;
41 
42 	if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) {
43 		/*
44 		 * paired with smp_mb() at the end of the function to ensure
45 		 * fields will only be observed after the bit is set.
46 		 */
47 		smp_mb();
48 		return 0;
49 	}
50 
51 	if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE))
52 		return -ERESTARTSYS;
53 
54 	err = 0;
55 	if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
56 		goto out_unlock;
57 
58 	pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
59 		    vi->xattr_isize, 8);
60 	kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos),
61 				   EROFS_KMAP_ATOMIC);
62 	if (IS_ERR(kaddr)) {
63 		err = PTR_ERR(kaddr);
64 		goto out_unlock;
65 	}
66 
67 	h = kaddr + erofs_blkoff(pos);
68 	/*
69 	 * if the highest bit of the 8-byte map header is set, the whole file
70 	 * is stored in the packed inode. The rest bits keeps z_fragmentoff.
71 	 */
72 	if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
73 		vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
74 		vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
75 		vi->z_tailextent_headlcn = 0;
76 		goto unmap_done;
77 	}
78 	vi->z_advise = le16_to_cpu(h->h_advise);
79 	vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
80 	vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
81 
82 	headnr = 0;
83 	if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX ||
84 	    vi->z_algorithmtype[++headnr] >= Z_EROFS_COMPRESSION_MAX) {
85 		erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel",
86 			  headnr + 1, vi->z_algorithmtype[headnr], vi->nid);
87 		err = -EOPNOTSUPP;
88 		goto unmap_done;
89 	}
90 
91 	vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
92 	if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
93 	    vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
94 			    Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
95 		erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
96 			  vi->nid);
97 		err = -EFSCORRUPTED;
98 		goto unmap_done;
99 	}
100 	if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION &&
101 	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
102 	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
103 		erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
104 			  vi->nid);
105 		err = -EFSCORRUPTED;
106 		goto unmap_done;
107 	}
108 unmap_done:
109 	erofs_put_metabuf(&buf);
110 	if (err)
111 		goto out_unlock;
112 
113 	if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
114 		struct erofs_map_blocks map = {
115 			.buf = __EROFS_BUF_INITIALIZER
116 		};
117 
118 		vi->z_idata_size = le16_to_cpu(h->h_idata_size);
119 		err = z_erofs_do_map_blocks(inode, &map,
120 					    EROFS_GET_BLOCKS_FINDTAIL);
121 		erofs_put_metabuf(&map.buf);
122 
123 		if (!map.m_plen ||
124 		    erofs_blkoff(map.m_pa) + map.m_plen > EROFS_BLKSIZ) {
125 			erofs_err(sb, "invalid tail-packing pclustersize %llu",
126 				  map.m_plen);
127 			err = -EFSCORRUPTED;
128 		}
129 		if (err < 0)
130 			goto out_unlock;
131 	}
132 
133 	if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
134 	    !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
135 		struct erofs_map_blocks map = {
136 			.buf = __EROFS_BUF_INITIALIZER
137 		};
138 
139 		vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
140 		err = z_erofs_do_map_blocks(inode, &map,
141 					    EROFS_GET_BLOCKS_FINDTAIL);
142 		erofs_put_metabuf(&map.buf);
143 		if (err < 0)
144 			goto out_unlock;
145 	}
146 	/* paired with smp_mb() at the beginning of the function */
147 	smp_mb();
148 	set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
149 out_unlock:
150 	clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
151 	return err;
152 }
153 
154 struct z_erofs_maprecorder {
155 	struct inode *inode;
156 	struct erofs_map_blocks *map;
157 	void *kaddr;
158 
159 	unsigned long lcn;
160 	/* compression extent information gathered */
161 	u8  type, headtype;
162 	u16 clusterofs;
163 	u16 delta[2];
164 	erofs_blk_t pblk, compressedblks;
165 	erofs_off_t nextpackoff;
166 	bool partialref;
167 };
168 
169 static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
170 					 unsigned long lcn)
171 {
172 	struct inode *const inode = m->inode;
173 	struct erofs_inode *const vi = EROFS_I(inode);
174 	const erofs_off_t ibase = iloc(EROFS_I_SB(inode), vi->nid);
175 	const erofs_off_t pos =
176 		Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize +
177 					       vi->xattr_isize) +
178 		lcn * sizeof(struct z_erofs_vle_decompressed_index);
179 	struct z_erofs_vle_decompressed_index *di;
180 	unsigned int advise, type;
181 
182 	m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
183 				      erofs_blknr(pos), EROFS_KMAP_ATOMIC);
184 	if (IS_ERR(m->kaddr))
185 		return PTR_ERR(m->kaddr);
186 
187 	m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index);
188 	m->lcn = lcn;
189 	di = m->kaddr + erofs_blkoff(pos);
190 
191 	advise = le16_to_cpu(di->di_advise);
192 	type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) &
193 		((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1);
194 	switch (type) {
195 	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
196 		m->clusterofs = 1 << vi->z_logical_clusterbits;
197 		m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
198 		if (m->delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) {
199 			if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
200 					Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
201 				DBG_BUGON(1);
202 				return -EFSCORRUPTED;
203 			}
204 			m->compressedblks = m->delta[0] &
205 				~Z_EROFS_VLE_DI_D0_CBLKCNT;
206 			m->delta[0] = 1;
207 		}
208 		m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
209 		break;
210 	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
211 	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
212 	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
213 		if (advise & Z_EROFS_VLE_DI_PARTIAL_REF)
214 			m->partialref = true;
215 		m->clusterofs = le16_to_cpu(di->di_clusterofs);
216 		m->pblk = le32_to_cpu(di->di_u.blkaddr);
217 		break;
218 	default:
219 		DBG_BUGON(1);
220 		return -EOPNOTSUPP;
221 	}
222 	m->type = type;
223 	return 0;
224 }
225 
226 static unsigned int decode_compactedbits(unsigned int lobits,
227 					 unsigned int lomask,
228 					 u8 *in, unsigned int pos, u8 *type)
229 {
230 	const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
231 	const unsigned int lo = v & lomask;
232 
233 	*type = (v >> lobits) & 3;
234 	return lo;
235 }
236 
237 static int get_compacted_la_distance(unsigned int lclusterbits,
238 				     unsigned int encodebits,
239 				     unsigned int vcnt, u8 *in, int i)
240 {
241 	const unsigned int lomask = (1 << lclusterbits) - 1;
242 	unsigned int lo, d1 = 0;
243 	u8 type;
244 
245 	DBG_BUGON(i >= vcnt);
246 
247 	do {
248 		lo = decode_compactedbits(lclusterbits, lomask,
249 					  in, encodebits * i, &type);
250 
251 		if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
252 			return d1;
253 		++d1;
254 	} while (++i < vcnt);
255 
256 	/* vcnt - 1 (Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) item */
257 	if (!(lo & Z_EROFS_VLE_DI_D0_CBLKCNT))
258 		d1 += lo - 1;
259 	return d1;
260 }
261 
262 static int unpack_compacted_index(struct z_erofs_maprecorder *m,
263 				  unsigned int amortizedshift,
264 				  erofs_off_t pos, bool lookahead)
265 {
266 	struct erofs_inode *const vi = EROFS_I(m->inode);
267 	const unsigned int lclusterbits = vi->z_logical_clusterbits;
268 	const unsigned int lomask = (1 << lclusterbits) - 1;
269 	unsigned int vcnt, base, lo, encodebits, nblk, eofs;
270 	int i;
271 	u8 *in, type;
272 	bool big_pcluster;
273 
274 	if (1 << amortizedshift == 4)
275 		vcnt = 2;
276 	else if (1 << amortizedshift == 2 && lclusterbits == 12)
277 		vcnt = 16;
278 	else
279 		return -EOPNOTSUPP;
280 
281 	/* it doesn't equal to round_up(..) */
282 	m->nextpackoff = round_down(pos, vcnt << amortizedshift) +
283 			 (vcnt << amortizedshift);
284 	big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
285 	encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
286 	eofs = erofs_blkoff(pos);
287 	base = round_down(eofs, vcnt << amortizedshift);
288 	in = m->kaddr + base;
289 
290 	i = (eofs - base) >> amortizedshift;
291 
292 	lo = decode_compactedbits(lclusterbits, lomask,
293 				  in, encodebits * i, &type);
294 	m->type = type;
295 	if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
296 		m->clusterofs = 1 << lclusterbits;
297 
298 		/* figure out lookahead_distance: delta[1] if needed */
299 		if (lookahead)
300 			m->delta[1] = get_compacted_la_distance(lclusterbits,
301 						encodebits, vcnt, in, i);
302 		if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
303 			if (!big_pcluster) {
304 				DBG_BUGON(1);
305 				return -EFSCORRUPTED;
306 			}
307 			m->compressedblks = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
308 			m->delta[0] = 1;
309 			return 0;
310 		} else if (i + 1 != (int)vcnt) {
311 			m->delta[0] = lo;
312 			return 0;
313 		}
314 		/*
315 		 * since the last lcluster in the pack is special,
316 		 * of which lo saves delta[1] rather than delta[0].
317 		 * Hence, get delta[0] by the previous lcluster indirectly.
318 		 */
319 		lo = decode_compactedbits(lclusterbits, lomask,
320 					  in, encodebits * (i - 1), &type);
321 		if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
322 			lo = 0;
323 		else if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT)
324 			lo = 1;
325 		m->delta[0] = lo + 1;
326 		return 0;
327 	}
328 	m->clusterofs = lo;
329 	m->delta[0] = 0;
330 	/* figout out blkaddr (pblk) for HEAD lclusters */
331 	if (!big_pcluster) {
332 		nblk = 1;
333 		while (i > 0) {
334 			--i;
335 			lo = decode_compactedbits(lclusterbits, lomask,
336 						  in, encodebits * i, &type);
337 			if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
338 				i -= lo;
339 
340 			if (i >= 0)
341 				++nblk;
342 		}
343 	} else {
344 		nblk = 0;
345 		while (i > 0) {
346 			--i;
347 			lo = decode_compactedbits(lclusterbits, lomask,
348 						  in, encodebits * i, &type);
349 			if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
350 				if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
351 					--i;
352 					nblk += lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
353 					continue;
354 				}
355 				/* bigpcluster shouldn't have plain d0 == 1 */
356 				if (lo <= 1) {
357 					DBG_BUGON(1);
358 					return -EFSCORRUPTED;
359 				}
360 				i -= lo - 2;
361 				continue;
362 			}
363 			++nblk;
364 		}
365 	}
366 	in += (vcnt << amortizedshift) - sizeof(__le32);
367 	m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
368 	return 0;
369 }
370 
371 static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
372 					    unsigned long lcn, bool lookahead)
373 {
374 	struct inode *const inode = m->inode;
375 	struct erofs_inode *const vi = EROFS_I(inode);
376 	const unsigned int lclusterbits = vi->z_logical_clusterbits;
377 	const erofs_off_t ebase = ALIGN(iloc(EROFS_I_SB(inode), vi->nid) +
378 					vi->inode_isize + vi->xattr_isize, 8) +
379 		sizeof(struct z_erofs_map_header);
380 	const unsigned int totalidx = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
381 	unsigned int compacted_4b_initial, compacted_2b;
382 	unsigned int amortizedshift;
383 	erofs_off_t pos;
384 
385 	if (lclusterbits != 12)
386 		return -EOPNOTSUPP;
387 
388 	if (lcn >= totalidx)
389 		return -EINVAL;
390 
391 	m->lcn = lcn;
392 	/* used to align to 32-byte (compacted_2b) alignment */
393 	compacted_4b_initial = (32 - ebase % 32) / 4;
394 	if (compacted_4b_initial == 32 / 4)
395 		compacted_4b_initial = 0;
396 
397 	if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) &&
398 	    compacted_4b_initial < totalidx)
399 		compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
400 	else
401 		compacted_2b = 0;
402 
403 	pos = ebase;
404 	if (lcn < compacted_4b_initial) {
405 		amortizedshift = 2;
406 		goto out;
407 	}
408 	pos += compacted_4b_initial * 4;
409 	lcn -= compacted_4b_initial;
410 
411 	if (lcn < compacted_2b) {
412 		amortizedshift = 1;
413 		goto out;
414 	}
415 	pos += compacted_2b * 2;
416 	lcn -= compacted_2b;
417 	amortizedshift = 2;
418 out:
419 	pos += lcn * (1 << amortizedshift);
420 	m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb,
421 				      erofs_blknr(pos), EROFS_KMAP_ATOMIC);
422 	if (IS_ERR(m->kaddr))
423 		return PTR_ERR(m->kaddr);
424 	return unpack_compacted_index(m, amortizedshift, pos, lookahead);
425 }
426 
427 static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m,
428 					  unsigned int lcn, bool lookahead)
429 {
430 	const unsigned int datamode = EROFS_I(m->inode)->datalayout;
431 
432 	if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
433 		return legacy_load_cluster_from_disk(m, lcn);
434 
435 	if (datamode == EROFS_INODE_FLAT_COMPRESSION)
436 		return compacted_load_cluster_from_disk(m, lcn, lookahead);
437 
438 	return -EINVAL;
439 }
440 
441 static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
442 				   unsigned int lookback_distance)
443 {
444 	struct erofs_inode *const vi = EROFS_I(m->inode);
445 	const unsigned int lclusterbits = vi->z_logical_clusterbits;
446 
447 	while (m->lcn >= lookback_distance) {
448 		unsigned long lcn = m->lcn - lookback_distance;
449 		int err;
450 
451 		/* load extent head logical cluster if needed */
452 		err = z_erofs_load_cluster_from_disk(m, lcn, false);
453 		if (err)
454 			return err;
455 
456 		switch (m->type) {
457 		case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
458 			if (!m->delta[0]) {
459 				erofs_err(m->inode->i_sb,
460 					  "invalid lookback distance 0 @ nid %llu",
461 					  vi->nid);
462 				DBG_BUGON(1);
463 				return -EFSCORRUPTED;
464 			}
465 			lookback_distance = m->delta[0];
466 			continue;
467 		case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
468 		case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
469 		case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
470 			m->headtype = m->type;
471 			m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
472 			return 0;
473 		default:
474 			erofs_err(m->inode->i_sb,
475 				  "unknown type %u @ lcn %lu of nid %llu",
476 				  m->type, lcn, vi->nid);
477 			DBG_BUGON(1);
478 			return -EOPNOTSUPP;
479 		}
480 	}
481 
482 	erofs_err(m->inode->i_sb, "bogus lookback distance @ nid %llu",
483 		  vi->nid);
484 	DBG_BUGON(1);
485 	return -EFSCORRUPTED;
486 }
487 
488 static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
489 					    unsigned int initial_lcn)
490 {
491 	struct erofs_inode *const vi = EROFS_I(m->inode);
492 	struct erofs_map_blocks *const map = m->map;
493 	const unsigned int lclusterbits = vi->z_logical_clusterbits;
494 	unsigned long lcn;
495 	int err;
496 
497 	DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN &&
498 		  m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD1 &&
499 		  m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD2);
500 	DBG_BUGON(m->type != m->headtype);
501 
502 	if (m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN ||
503 	    ((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD1) &&
504 	     !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) ||
505 	    ((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) &&
506 	     !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
507 		map->m_plen = 1ULL << lclusterbits;
508 		return 0;
509 	}
510 	lcn = m->lcn + 1;
511 	if (m->compressedblks)
512 		goto out;
513 
514 	err = z_erofs_load_cluster_from_disk(m, lcn, false);
515 	if (err)
516 		return err;
517 
518 	/*
519 	 * If the 1st NONHEAD lcluster has already been handled initially w/o
520 	 * valid compressedblks, which means at least it mustn't be CBLKCNT, or
521 	 * an internal implemenatation error is detected.
522 	 *
523 	 * The following code can also handle it properly anyway, but let's
524 	 * BUG_ON in the debugging mode only for developers to notice that.
525 	 */
526 	DBG_BUGON(lcn == initial_lcn &&
527 		  m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD);
528 
529 	switch (m->type) {
530 	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
531 	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
532 	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
533 		/*
534 		 * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
535 		 * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
536 		 */
537 		m->compressedblks = 1 << (lclusterbits - LOG_BLOCK_SIZE);
538 		break;
539 	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
540 		if (m->delta[0] != 1)
541 			goto err_bonus_cblkcnt;
542 		if (m->compressedblks)
543 			break;
544 		fallthrough;
545 	default:
546 		erofs_err(m->inode->i_sb,
547 			  "cannot found CBLKCNT @ lcn %lu of nid %llu",
548 			  lcn, vi->nid);
549 		DBG_BUGON(1);
550 		return -EFSCORRUPTED;
551 	}
552 out:
553 	map->m_plen = (u64)m->compressedblks << LOG_BLOCK_SIZE;
554 	return 0;
555 err_bonus_cblkcnt:
556 	erofs_err(m->inode->i_sb,
557 		  "bogus CBLKCNT @ lcn %lu of nid %llu",
558 		  lcn, vi->nid);
559 	DBG_BUGON(1);
560 	return -EFSCORRUPTED;
561 }
562 
563 static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
564 {
565 	struct inode *inode = m->inode;
566 	struct erofs_inode *vi = EROFS_I(inode);
567 	struct erofs_map_blocks *map = m->map;
568 	unsigned int lclusterbits = vi->z_logical_clusterbits;
569 	u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits;
570 	int err;
571 
572 	do {
573 		/* handle the last EOF pcluster (no next HEAD lcluster) */
574 		if ((lcn << lclusterbits) >= inode->i_size) {
575 			map->m_llen = inode->i_size - map->m_la;
576 			return 0;
577 		}
578 
579 		err = z_erofs_load_cluster_from_disk(m, lcn, true);
580 		if (err)
581 			return err;
582 
583 		if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
584 			DBG_BUGON(!m->delta[1] &&
585 				  m->clusterofs != 1 << lclusterbits);
586 		} else if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN ||
587 			   m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD1 ||
588 			   m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) {
589 			/* go on until the next HEAD lcluster */
590 			if (lcn != headlcn)
591 				break;
592 			m->delta[1] = 1;
593 		} else {
594 			erofs_err(inode->i_sb, "unknown type %u @ lcn %llu of nid %llu",
595 				  m->type, lcn, vi->nid);
596 			DBG_BUGON(1);
597 			return -EOPNOTSUPP;
598 		}
599 		lcn += m->delta[1];
600 	} while (m->delta[1]);
601 
602 	map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la;
603 	return 0;
604 }
605 
606 static int z_erofs_do_map_blocks(struct inode *inode,
607 				 struct erofs_map_blocks *map,
608 				 int flags)
609 {
610 	struct erofs_inode *const vi = EROFS_I(inode);
611 	bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
612 	bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
613 	struct z_erofs_maprecorder m = {
614 		.inode = inode,
615 		.map = map,
616 	};
617 	int err = 0;
618 	unsigned int lclusterbits, endoff;
619 	unsigned long initial_lcn;
620 	unsigned long long ofs, end;
621 
622 	lclusterbits = vi->z_logical_clusterbits;
623 	ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la;
624 	initial_lcn = ofs >> lclusterbits;
625 	endoff = ofs & ((1 << lclusterbits) - 1);
626 
627 	err = z_erofs_load_cluster_from_disk(&m, initial_lcn, false);
628 	if (err)
629 		goto unmap_out;
630 
631 	if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL))
632 		vi->z_idataoff = m.nextpackoff;
633 
634 	map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
635 	end = (m.lcn + 1ULL) << lclusterbits;
636 
637 	switch (m.type) {
638 	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
639 	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
640 	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
641 		if (endoff >= m.clusterofs) {
642 			m.headtype = m.type;
643 			map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
644 			/*
645 			 * For ztailpacking files, in order to inline data more
646 			 * effectively, special EOF lclusters are now supported
647 			 * which can have three parts at most.
648 			 */
649 			if (ztailpacking && end > inode->i_size)
650 				end = inode->i_size;
651 			break;
652 		}
653 		/* m.lcn should be >= 1 if endoff < m.clusterofs */
654 		if (!m.lcn) {
655 			erofs_err(inode->i_sb,
656 				  "invalid logical cluster 0 at nid %llu",
657 				  vi->nid);
658 			err = -EFSCORRUPTED;
659 			goto unmap_out;
660 		}
661 		end = (m.lcn << lclusterbits) | m.clusterofs;
662 		map->m_flags |= EROFS_MAP_FULL_MAPPED;
663 		m.delta[0] = 1;
664 		fallthrough;
665 	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
666 		/* get the corresponding first chunk */
667 		err = z_erofs_extent_lookback(&m, m.delta[0]);
668 		if (err)
669 			goto unmap_out;
670 		break;
671 	default:
672 		erofs_err(inode->i_sb,
673 			  "unknown type %u @ offset %llu of nid %llu",
674 			  m.type, ofs, vi->nid);
675 		err = -EOPNOTSUPP;
676 		goto unmap_out;
677 	}
678 	if (m.partialref)
679 		map->m_flags |= EROFS_MAP_PARTIAL_REF;
680 	map->m_llen = end - map->m_la;
681 
682 	if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
683 		vi->z_tailextent_headlcn = m.lcn;
684 		/* for non-compact indexes, fragmentoff is 64 bits */
685 		if (fragment &&
686 		    vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
687 			vi->z_fragmentoff |= (u64)m.pblk << 32;
688 	}
689 	if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
690 		map->m_flags |= EROFS_MAP_META;
691 		map->m_pa = vi->z_idataoff;
692 		map->m_plen = vi->z_idata_size;
693 	} else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
694 		map->m_flags |= EROFS_MAP_FRAGMENT;
695 	} else {
696 		map->m_pa = blknr_to_addr(m.pblk);
697 		err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
698 		if (err)
699 			goto out;
700 	}
701 
702 	if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) {
703 		if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
704 			map->m_algorithmformat =
705 				Z_EROFS_COMPRESSION_INTERLACED;
706 		else
707 			map->m_algorithmformat =
708 				Z_EROFS_COMPRESSION_SHIFTED;
709 	} else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) {
710 		map->m_algorithmformat = vi->z_algorithmtype[1];
711 	} else {
712 		map->m_algorithmformat = vi->z_algorithmtype[0];
713 	}
714 
715 	if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
716 	    ((flags & EROFS_GET_BLOCKS_READMORE) &&
717 	     map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA &&
718 	     map->m_llen >= EROFS_BLKSIZ)) {
719 		err = z_erofs_get_extent_decompressedlen(&m);
720 		if (!err)
721 			map->m_flags |= EROFS_MAP_FULL_MAPPED;
722 	}
723 unmap_out:
724 	erofs_unmap_metabuf(&m.map->buf);
725 
726 out:
727 	erofs_dbg("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o",
728 		  __func__, map->m_la, map->m_pa,
729 		  map->m_llen, map->m_plen, map->m_flags);
730 
731 	return err;
732 }
733 
734 int z_erofs_map_blocks_iter(struct inode *inode,
735 			    struct erofs_map_blocks *map,
736 			    int flags)
737 {
738 	struct erofs_inode *const vi = EROFS_I(inode);
739 	int err = 0;
740 
741 	trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
742 
743 	/* when trying to read beyond EOF, leave it unmapped */
744 	if (map->m_la >= inode->i_size) {
745 		map->m_llen = map->m_la + 1 - inode->i_size;
746 		map->m_la = inode->i_size;
747 		map->m_flags = 0;
748 		goto out;
749 	}
750 
751 	err = z_erofs_fill_inode_lazy(inode);
752 	if (err)
753 		goto out;
754 
755 	if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) &&
756 	    !vi->z_tailextent_headlcn) {
757 		map->m_la = 0;
758 		map->m_llen = inode->i_size;
759 		map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED |
760 				EROFS_MAP_FRAGMENT;
761 		goto out;
762 	}
763 
764 	err = z_erofs_do_map_blocks(inode, map, flags);
765 out:
766 	trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
767 
768 	/* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
769 	DBG_BUGON(err < 0 && err != -ENOMEM);
770 	return err;
771 }
772 
773 static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
774 				loff_t length, unsigned int flags,
775 				struct iomap *iomap, struct iomap *srcmap)
776 {
777 	int ret;
778 	struct erofs_map_blocks map = { .m_la = offset };
779 
780 	ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
781 	erofs_put_metabuf(&map.buf);
782 	if (ret < 0)
783 		return ret;
784 
785 	iomap->bdev = inode->i_sb->s_bdev;
786 	iomap->offset = map.m_la;
787 	iomap->length = map.m_llen;
788 	if (map.m_flags & EROFS_MAP_MAPPED) {
789 		iomap->type = IOMAP_MAPPED;
790 		iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ?
791 			      IOMAP_NULL_ADDR : map.m_pa;
792 	} else {
793 		iomap->type = IOMAP_HOLE;
794 		iomap->addr = IOMAP_NULL_ADDR;
795 		/*
796 		 * No strict rule how to describe extents for post EOF, yet
797 		 * we need do like below. Otherwise, iomap itself will get
798 		 * into an endless loop on post EOF.
799 		 */
800 		if (iomap->offset >= inode->i_size)
801 			iomap->length = length + map.m_la - offset;
802 	}
803 	iomap->flags = 0;
804 	return 0;
805 }
806 
807 const struct iomap_ops z_erofs_iomap_report_ops = {
808 	.iomap_begin = z_erofs_iomap_begin_report,
809 };
810