1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2018-2019 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 * Created by Gao Xiang <gaoxiang25@huawei.com> 6 */ 7 #include "internal.h" 8 #include <asm/unaligned.h> 9 #include <trace/events/erofs.h> 10 11 int z_erofs_fill_inode(struct inode *inode) 12 { 13 struct erofs_inode *const vi = EROFS_I(inode); 14 struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); 15 16 if (!erofs_sb_has_big_pcluster(sbi) && 17 vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { 18 vi->z_advise = 0; 19 vi->z_algorithmtype[0] = 0; 20 vi->z_algorithmtype[1] = 0; 21 vi->z_logical_clusterbits = LOG_BLOCK_SIZE; 22 set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); 23 } 24 inode->i_mapping->a_ops = &z_erofs_aops; 25 return 0; 26 } 27 28 static int z_erofs_fill_inode_lazy(struct inode *inode) 29 { 30 struct erofs_inode *const vi = EROFS_I(inode); 31 struct super_block *const sb = inode->i_sb; 32 int err; 33 erofs_off_t pos; 34 struct page *page; 35 void *kaddr; 36 struct z_erofs_map_header *h; 37 38 if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) { 39 /* 40 * paired with smp_mb() at the end of the function to ensure 41 * fields will only be observed after the bit is set. 42 */ 43 smp_mb(); 44 return 0; 45 } 46 47 if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE)) 48 return -ERESTARTSYS; 49 50 err = 0; 51 if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) 52 goto out_unlock; 53 54 DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && 55 vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY); 56 57 pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + 58 vi->xattr_isize, 8); 59 page = erofs_get_meta_page(sb, erofs_blknr(pos)); 60 if (IS_ERR(page)) { 61 err = PTR_ERR(page); 62 goto out_unlock; 63 } 64 65 kaddr = kmap_atomic(page); 66 67 h = kaddr + erofs_blkoff(pos); 68 vi->z_advise = le16_to_cpu(h->h_advise); 69 vi->z_algorithmtype[0] = h->h_algorithmtype & 15; 70 vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; 71 72 if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX) { 73 erofs_err(sb, "unknown compression format %u for nid %llu, please upgrade kernel", 74 vi->z_algorithmtype[0], vi->nid); 75 err = -EOPNOTSUPP; 76 goto unmap_done; 77 } 78 79 vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7); 80 if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && 81 vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | 82 Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { 83 erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu", 84 vi->nid); 85 err = -EFSCORRUPTED; 86 goto unmap_done; 87 } 88 if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION && 89 !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^ 90 !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { 91 erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu", 92 vi->nid); 93 err = -EFSCORRUPTED; 94 goto unmap_done; 95 } 96 /* paired with smp_mb() at the beginning of the function */ 97 smp_mb(); 98 set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); 99 unmap_done: 100 kunmap_atomic(kaddr); 101 unlock_page(page); 102 put_page(page); 103 out_unlock: 104 clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags); 105 return err; 106 } 107 108 struct z_erofs_maprecorder { 109 struct inode *inode; 110 struct erofs_map_blocks *map; 111 void *kaddr; 112 113 unsigned long lcn; 114 /* compression extent information gathered */ 115 u8 type; 116 u16 clusterofs; 117 u16 delta[2]; 118 erofs_blk_t pblk, compressedlcs; 119 }; 120 121 static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m, 122 erofs_blk_t eblk) 123 { 124 struct super_block *const sb = m->inode->i_sb; 125 struct erofs_map_blocks *const map = m->map; 126 struct page *mpage = map->mpage; 127 128 if (mpage) { 129 if (mpage->index == eblk) { 130 if (!m->kaddr) 131 m->kaddr = kmap_atomic(mpage); 132 return 0; 133 } 134 135 if (m->kaddr) { 136 kunmap_atomic(m->kaddr); 137 m->kaddr = NULL; 138 } 139 put_page(mpage); 140 } 141 142 mpage = erofs_get_meta_page(sb, eblk); 143 if (IS_ERR(mpage)) { 144 map->mpage = NULL; 145 return PTR_ERR(mpage); 146 } 147 m->kaddr = kmap_atomic(mpage); 148 unlock_page(mpage); 149 map->mpage = mpage; 150 return 0; 151 } 152 153 static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, 154 unsigned long lcn) 155 { 156 struct inode *const inode = m->inode; 157 struct erofs_inode *const vi = EROFS_I(inode); 158 const erofs_off_t ibase = iloc(EROFS_I_SB(inode), vi->nid); 159 const erofs_off_t pos = 160 Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize + 161 vi->xattr_isize) + 162 lcn * sizeof(struct z_erofs_vle_decompressed_index); 163 struct z_erofs_vle_decompressed_index *di; 164 unsigned int advise, type; 165 int err; 166 167 err = z_erofs_reload_indexes(m, erofs_blknr(pos)); 168 if (err) 169 return err; 170 171 m->lcn = lcn; 172 di = m->kaddr + erofs_blkoff(pos); 173 174 advise = le16_to_cpu(di->di_advise); 175 type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) & 176 ((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1); 177 switch (type) { 178 case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: 179 m->clusterofs = 1 << vi->z_logical_clusterbits; 180 m->delta[0] = le16_to_cpu(di->di_u.delta[0]); 181 if (m->delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) { 182 if (!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) { 183 DBG_BUGON(1); 184 return -EFSCORRUPTED; 185 } 186 m->compressedlcs = m->delta[0] & 187 ~Z_EROFS_VLE_DI_D0_CBLKCNT; 188 m->delta[0] = 1; 189 } 190 m->delta[1] = le16_to_cpu(di->di_u.delta[1]); 191 break; 192 case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: 193 case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: 194 m->clusterofs = le16_to_cpu(di->di_clusterofs); 195 m->pblk = le32_to_cpu(di->di_u.blkaddr); 196 break; 197 default: 198 DBG_BUGON(1); 199 return -EOPNOTSUPP; 200 } 201 m->type = type; 202 return 0; 203 } 204 205 static unsigned int decode_compactedbits(unsigned int lobits, 206 unsigned int lomask, 207 u8 *in, unsigned int pos, u8 *type) 208 { 209 const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7); 210 const unsigned int lo = v & lomask; 211 212 *type = (v >> lobits) & 3; 213 return lo; 214 } 215 216 static int unpack_compacted_index(struct z_erofs_maprecorder *m, 217 unsigned int amortizedshift, 218 unsigned int eofs) 219 { 220 struct erofs_inode *const vi = EROFS_I(m->inode); 221 const unsigned int lclusterbits = vi->z_logical_clusterbits; 222 const unsigned int lomask = (1 << lclusterbits) - 1; 223 unsigned int vcnt, base, lo, encodebits, nblk; 224 int i; 225 u8 *in, type; 226 bool big_pcluster; 227 228 if (1 << amortizedshift == 4) 229 vcnt = 2; 230 else if (1 << amortizedshift == 2 && lclusterbits == 12) 231 vcnt = 16; 232 else 233 return -EOPNOTSUPP; 234 235 big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; 236 encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; 237 base = round_down(eofs, vcnt << amortizedshift); 238 in = m->kaddr + base; 239 240 i = (eofs - base) >> amortizedshift; 241 242 lo = decode_compactedbits(lclusterbits, lomask, 243 in, encodebits * i, &type); 244 m->type = type; 245 if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { 246 m->clusterofs = 1 << lclusterbits; 247 if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) { 248 if (!big_pcluster) { 249 DBG_BUGON(1); 250 return -EFSCORRUPTED; 251 } 252 m->compressedlcs = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT; 253 m->delta[0] = 1; 254 return 0; 255 } else if (i + 1 != (int)vcnt) { 256 m->delta[0] = lo; 257 return 0; 258 } 259 /* 260 * since the last lcluster in the pack is special, 261 * of which lo saves delta[1] rather than delta[0]. 262 * Hence, get delta[0] by the previous lcluster indirectly. 263 */ 264 lo = decode_compactedbits(lclusterbits, lomask, 265 in, encodebits * (i - 1), &type); 266 if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) 267 lo = 0; 268 else if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) 269 lo = 1; 270 m->delta[0] = lo + 1; 271 return 0; 272 } 273 m->clusterofs = lo; 274 m->delta[0] = 0; 275 /* figout out blkaddr (pblk) for HEAD lclusters */ 276 if (!big_pcluster) { 277 nblk = 1; 278 while (i > 0) { 279 --i; 280 lo = decode_compactedbits(lclusterbits, lomask, 281 in, encodebits * i, &type); 282 if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) 283 i -= lo; 284 285 if (i >= 0) 286 ++nblk; 287 } 288 } else { 289 nblk = 0; 290 while (i > 0) { 291 --i; 292 lo = decode_compactedbits(lclusterbits, lomask, 293 in, encodebits * i, &type); 294 if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { 295 if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) { 296 --i; 297 nblk += lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT; 298 continue; 299 } 300 /* bigpcluster shouldn't have plain d0 == 1 */ 301 if (lo <= 1) { 302 DBG_BUGON(1); 303 return -EFSCORRUPTED; 304 } 305 i -= lo - 2; 306 continue; 307 } 308 ++nblk; 309 } 310 } 311 in += (vcnt << amortizedshift) - sizeof(__le32); 312 m->pblk = le32_to_cpu(*(__le32 *)in) + nblk; 313 return 0; 314 } 315 316 static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, 317 unsigned long lcn) 318 { 319 struct inode *const inode = m->inode; 320 struct erofs_inode *const vi = EROFS_I(inode); 321 const unsigned int lclusterbits = vi->z_logical_clusterbits; 322 const erofs_off_t ebase = ALIGN(iloc(EROFS_I_SB(inode), vi->nid) + 323 vi->inode_isize + vi->xattr_isize, 8) + 324 sizeof(struct z_erofs_map_header); 325 const unsigned int totalidx = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ); 326 unsigned int compacted_4b_initial, compacted_2b; 327 unsigned int amortizedshift; 328 erofs_off_t pos; 329 int err; 330 331 if (lclusterbits != 12) 332 return -EOPNOTSUPP; 333 334 if (lcn >= totalidx) 335 return -EINVAL; 336 337 m->lcn = lcn; 338 /* used to align to 32-byte (compacted_2b) alignment */ 339 compacted_4b_initial = (32 - ebase % 32) / 4; 340 if (compacted_4b_initial == 32 / 4) 341 compacted_4b_initial = 0; 342 343 if (vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) 344 compacted_2b = rounddown(totalidx - compacted_4b_initial, 16); 345 else 346 compacted_2b = 0; 347 348 pos = ebase; 349 if (lcn < compacted_4b_initial) { 350 amortizedshift = 2; 351 goto out; 352 } 353 pos += compacted_4b_initial * 4; 354 lcn -= compacted_4b_initial; 355 356 if (lcn < compacted_2b) { 357 amortizedshift = 1; 358 goto out; 359 } 360 pos += compacted_2b * 2; 361 lcn -= compacted_2b; 362 amortizedshift = 2; 363 out: 364 pos += lcn * (1 << amortizedshift); 365 err = z_erofs_reload_indexes(m, erofs_blknr(pos)); 366 if (err) 367 return err; 368 return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos)); 369 } 370 371 static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m, 372 unsigned int lcn) 373 { 374 const unsigned int datamode = EROFS_I(m->inode)->datalayout; 375 376 if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY) 377 return legacy_load_cluster_from_disk(m, lcn); 378 379 if (datamode == EROFS_INODE_FLAT_COMPRESSION) 380 return compacted_load_cluster_from_disk(m, lcn); 381 382 return -EINVAL; 383 } 384 385 static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, 386 unsigned int lookback_distance) 387 { 388 struct erofs_inode *const vi = EROFS_I(m->inode); 389 struct erofs_map_blocks *const map = m->map; 390 const unsigned int lclusterbits = vi->z_logical_clusterbits; 391 unsigned long lcn = m->lcn; 392 int err; 393 394 if (lcn < lookback_distance) { 395 erofs_err(m->inode->i_sb, 396 "bogus lookback distance @ nid %llu", vi->nid); 397 DBG_BUGON(1); 398 return -EFSCORRUPTED; 399 } 400 401 /* load extent head logical cluster if needed */ 402 lcn -= lookback_distance; 403 err = z_erofs_load_cluster_from_disk(m, lcn); 404 if (err) 405 return err; 406 407 switch (m->type) { 408 case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: 409 if (!m->delta[0]) { 410 erofs_err(m->inode->i_sb, 411 "invalid lookback distance 0 @ nid %llu", 412 vi->nid); 413 DBG_BUGON(1); 414 return -EFSCORRUPTED; 415 } 416 return z_erofs_extent_lookback(m, m->delta[0]); 417 case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: 418 map->m_flags &= ~EROFS_MAP_ZIPPED; 419 fallthrough; 420 case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: 421 map->m_la = (lcn << lclusterbits) | m->clusterofs; 422 break; 423 default: 424 erofs_err(m->inode->i_sb, 425 "unknown type %u @ lcn %lu of nid %llu", 426 m->type, lcn, vi->nid); 427 DBG_BUGON(1); 428 return -EOPNOTSUPP; 429 } 430 return 0; 431 } 432 433 static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, 434 unsigned int initial_lcn) 435 { 436 struct erofs_inode *const vi = EROFS_I(m->inode); 437 struct erofs_map_blocks *const map = m->map; 438 const unsigned int lclusterbits = vi->z_logical_clusterbits; 439 unsigned long lcn; 440 int err; 441 442 DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN && 443 m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD); 444 if (!(map->m_flags & EROFS_MAP_ZIPPED) || 445 !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) { 446 map->m_plen = 1 << lclusterbits; 447 return 0; 448 } 449 450 lcn = m->lcn + 1; 451 if (m->compressedlcs) 452 goto out; 453 if (lcn == initial_lcn) 454 goto err_bonus_cblkcnt; 455 456 err = z_erofs_load_cluster_from_disk(m, lcn); 457 if (err) 458 return err; 459 460 switch (m->type) { 461 case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: 462 if (m->delta[0] != 1) 463 goto err_bonus_cblkcnt; 464 if (m->compressedlcs) 465 break; 466 fallthrough; 467 default: 468 erofs_err(m->inode->i_sb, 469 "cannot found CBLKCNT @ lcn %lu of nid %llu", 470 lcn, vi->nid); 471 DBG_BUGON(1); 472 return -EFSCORRUPTED; 473 } 474 out: 475 map->m_plen = m->compressedlcs << lclusterbits; 476 return 0; 477 err_bonus_cblkcnt: 478 erofs_err(m->inode->i_sb, 479 "bogus CBLKCNT @ lcn %lu of nid %llu", 480 lcn, vi->nid); 481 DBG_BUGON(1); 482 return -EFSCORRUPTED; 483 } 484 485 int z_erofs_map_blocks_iter(struct inode *inode, 486 struct erofs_map_blocks *map, 487 int flags) 488 { 489 struct erofs_inode *const vi = EROFS_I(inode); 490 struct z_erofs_maprecorder m = { 491 .inode = inode, 492 .map = map, 493 }; 494 int err = 0; 495 unsigned int lclusterbits, endoff; 496 unsigned long initial_lcn; 497 unsigned long long ofs, end; 498 499 trace_z_erofs_map_blocks_iter_enter(inode, map, flags); 500 501 /* when trying to read beyond EOF, leave it unmapped */ 502 if (map->m_la >= inode->i_size) { 503 map->m_llen = map->m_la + 1 - inode->i_size; 504 map->m_la = inode->i_size; 505 map->m_flags = 0; 506 goto out; 507 } 508 509 err = z_erofs_fill_inode_lazy(inode); 510 if (err) 511 goto out; 512 513 lclusterbits = vi->z_logical_clusterbits; 514 ofs = map->m_la; 515 initial_lcn = ofs >> lclusterbits; 516 endoff = ofs & ((1 << lclusterbits) - 1); 517 518 err = z_erofs_load_cluster_from_disk(&m, initial_lcn); 519 if (err) 520 goto unmap_out; 521 522 map->m_flags = EROFS_MAP_ZIPPED; /* by default, compressed */ 523 end = (m.lcn + 1ULL) << lclusterbits; 524 525 switch (m.type) { 526 case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: 527 if (endoff >= m.clusterofs) 528 map->m_flags &= ~EROFS_MAP_ZIPPED; 529 fallthrough; 530 case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: 531 if (endoff >= m.clusterofs) { 532 map->m_la = (m.lcn << lclusterbits) | m.clusterofs; 533 break; 534 } 535 /* m.lcn should be >= 1 if endoff < m.clusterofs */ 536 if (!m.lcn) { 537 erofs_err(inode->i_sb, 538 "invalid logical cluster 0 at nid %llu", 539 vi->nid); 540 err = -EFSCORRUPTED; 541 goto unmap_out; 542 } 543 end = (m.lcn << lclusterbits) | m.clusterofs; 544 map->m_flags |= EROFS_MAP_FULL_MAPPED; 545 m.delta[0] = 1; 546 fallthrough; 547 case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: 548 /* get the corresponding first chunk */ 549 err = z_erofs_extent_lookback(&m, m.delta[0]); 550 if (err) 551 goto unmap_out; 552 break; 553 default: 554 erofs_err(inode->i_sb, 555 "unknown type %u @ offset %llu of nid %llu", 556 m.type, ofs, vi->nid); 557 err = -EOPNOTSUPP; 558 goto unmap_out; 559 } 560 561 map->m_llen = end - map->m_la; 562 map->m_pa = blknr_to_addr(m.pblk); 563 map->m_flags |= EROFS_MAP_MAPPED; 564 565 err = z_erofs_get_extent_compressedlen(&m, initial_lcn); 566 if (err) 567 goto out; 568 unmap_out: 569 if (m.kaddr) 570 kunmap_atomic(m.kaddr); 571 572 out: 573 erofs_dbg("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o", 574 __func__, map->m_la, map->m_pa, 575 map->m_llen, map->m_plen, map->m_flags); 576 577 trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err); 578 579 /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */ 580 DBG_BUGON(err < 0 && err != -ENOMEM); 581 return err; 582 } 583 584