1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Squashfs - a compressed read only filesystem for Linux 4 * 5 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 6 * Phillip Lougher <phillip@squashfs.org.uk> 7 * 8 * file.c 9 */ 10 11 /* 12 * This file contains code for handling regular files. A regular file 13 * consists of a sequence of contiguous compressed blocks, and/or a 14 * compressed fragment block (tail-end packed block). The compressed size 15 * of each datablock is stored in a block list contained within the 16 * file inode (itself stored in one or more compressed metadata blocks). 17 * 18 * To speed up access to datablocks when reading 'large' files (256 Mbytes or 19 * larger), the code implements an index cache that caches the mapping from 20 * block index to datablock location on disk. 21 * 22 * The index cache allows Squashfs to handle large files (up to 1.75 TiB) while 23 * retaining a simple and space-efficient block list on disk. The cache 24 * is split into slots, caching up to eight 224 GiB files (128 KiB blocks). 25 * Larger files use multiple slots, with 1.75 TiB files using all 8 slots. 26 * The index cache is designed to be memory efficient, and by default uses 27 * 16 KiB. 28 */ 29 30 #include <linux/fs.h> 31 #include <linux/vfs.h> 32 #include <linux/kernel.h> 33 #include <linux/slab.h> 34 #include <linux/string.h> 35 #include <linux/pagemap.h> 36 #include <linux/mutex.h> 37 38 #include "squashfs_fs.h" 39 #include "squashfs_fs_sb.h" 40 #include "squashfs_fs_i.h" 41 #include "squashfs.h" 42 #include "page_actor.h" 43 44 /* 45 * Locate cache slot in range [offset, index] for specified inode. If 46 * there's more than one return the slot closest to index. 47 */ 48 static struct meta_index *locate_meta_index(struct inode *inode, int offset, 49 int index) 50 { 51 struct meta_index *meta = NULL; 52 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 53 int i; 54 55 mutex_lock(&msblk->meta_index_mutex); 56 57 TRACE("locate_meta_index: index %d, offset %d\n", index, offset); 58 59 if (msblk->meta_index == NULL) 60 goto not_allocated; 61 62 for (i = 0; i < SQUASHFS_META_SLOTS; i++) { 63 if (msblk->meta_index[i].inode_number == inode->i_ino && 64 msblk->meta_index[i].offset >= offset && 65 msblk->meta_index[i].offset <= index && 66 msblk->meta_index[i].locked == 0) { 67 TRACE("locate_meta_index: entry %d, offset %d\n", i, 68 msblk->meta_index[i].offset); 69 meta = &msblk->meta_index[i]; 70 offset = meta->offset; 71 } 72 } 73 74 if (meta) 75 meta->locked = 1; 76 77 not_allocated: 78 mutex_unlock(&msblk->meta_index_mutex); 79 80 return meta; 81 } 82 83 84 /* 85 * Find and initialise an empty cache slot for index offset. 86 */ 87 static struct meta_index *empty_meta_index(struct inode *inode, int offset, 88 int skip) 89 { 90 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 91 struct meta_index *meta = NULL; 92 int i; 93 94 mutex_lock(&msblk->meta_index_mutex); 95 96 TRACE("empty_meta_index: offset %d, skip %d\n", offset, skip); 97 98 if (msblk->meta_index == NULL) { 99 /* 100 * First time cache index has been used, allocate and 101 * initialise. The cache index could be allocated at 102 * mount time but doing it here means it is allocated only 103 * if a 'large' file is read. 104 */ 105 msblk->meta_index = kcalloc(SQUASHFS_META_SLOTS, 106 sizeof(*(msblk->meta_index)), GFP_KERNEL); 107 if (msblk->meta_index == NULL) { 108 ERROR("Failed to allocate meta_index\n"); 109 goto failed; 110 } 111 for (i = 0; i < SQUASHFS_META_SLOTS; i++) { 112 msblk->meta_index[i].inode_number = 0; 113 msblk->meta_index[i].locked = 0; 114 } 115 msblk->next_meta_index = 0; 116 } 117 118 for (i = SQUASHFS_META_SLOTS; i && 119 msblk->meta_index[msblk->next_meta_index].locked; i--) 120 msblk->next_meta_index = (msblk->next_meta_index + 1) % 121 SQUASHFS_META_SLOTS; 122 123 if (i == 0) { 124 TRACE("empty_meta_index: failed!\n"); 125 goto failed; 126 } 127 128 TRACE("empty_meta_index: returned meta entry %d, %p\n", 129 msblk->next_meta_index, 130 &msblk->meta_index[msblk->next_meta_index]); 131 132 meta = &msblk->meta_index[msblk->next_meta_index]; 133 msblk->next_meta_index = (msblk->next_meta_index + 1) % 134 SQUASHFS_META_SLOTS; 135 136 meta->inode_number = inode->i_ino; 137 meta->offset = offset; 138 meta->skip = skip; 139 meta->entries = 0; 140 meta->locked = 1; 141 142 failed: 143 mutex_unlock(&msblk->meta_index_mutex); 144 return meta; 145 } 146 147 148 static void release_meta_index(struct inode *inode, struct meta_index *meta) 149 { 150 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 151 mutex_lock(&msblk->meta_index_mutex); 152 meta->locked = 0; 153 mutex_unlock(&msblk->meta_index_mutex); 154 } 155 156 157 /* 158 * Read the next n blocks from the block list, starting from 159 * metadata block <start_block, offset>. 160 */ 161 static long long read_indexes(struct super_block *sb, int n, 162 u64 *start_block, int *offset) 163 { 164 int err, i; 165 long long block = 0; 166 __le32 *blist = kmalloc(PAGE_SIZE, GFP_KERNEL); 167 168 if (blist == NULL) { 169 ERROR("read_indexes: Failed to allocate block_list\n"); 170 return -ENOMEM; 171 } 172 173 while (n) { 174 int blocks = min_t(int, n, PAGE_SIZE >> 2); 175 176 err = squashfs_read_metadata(sb, blist, start_block, 177 offset, blocks << 2); 178 if (err < 0) { 179 ERROR("read_indexes: reading block [%llx:%x]\n", 180 *start_block, *offset); 181 goto failure; 182 } 183 184 for (i = 0; i < blocks; i++) { 185 int size = squashfs_block_size(blist[i]); 186 if (size < 0) { 187 err = size; 188 goto failure; 189 } 190 block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size); 191 } 192 n -= blocks; 193 } 194 195 kfree(blist); 196 return block; 197 198 failure: 199 kfree(blist); 200 return err; 201 } 202 203 204 /* 205 * Each cache index slot has SQUASHFS_META_ENTRIES, each of which 206 * can cache one index -> datablock/blocklist-block mapping. We wish 207 * to distribute these over the length of the file, entry[0] maps index x, 208 * entry[1] maps index x + skip, entry[2] maps index x + 2 * skip, and so on. 209 * The larger the file, the greater the skip factor. The skip factor is 210 * limited to the size of the metadata cache (SQUASHFS_CACHED_BLKS) to ensure 211 * the number of metadata blocks that need to be read fits into the cache. 212 * If the skip factor is limited in this way then the file will use multiple 213 * slots. 214 */ 215 static inline int calculate_skip(u64 blocks) 216 { 217 u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1) 218 * SQUASHFS_META_INDEXES); 219 return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1); 220 } 221 222 223 /* 224 * Search and grow the index cache for the specified inode, returning the 225 * on-disk locations of the datablock and block list metadata block 226 * <index_block, index_offset> for index (scaled to nearest cache index). 227 */ 228 static int fill_meta_index(struct inode *inode, int index, 229 u64 *index_block, int *index_offset, u64 *data_block) 230 { 231 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 232 int skip = calculate_skip(i_size_read(inode) >> msblk->block_log); 233 int offset = 0; 234 struct meta_index *meta; 235 struct meta_entry *meta_entry; 236 u64 cur_index_block = squashfs_i(inode)->block_list_start; 237 int cur_offset = squashfs_i(inode)->offset; 238 u64 cur_data_block = squashfs_i(inode)->start; 239 int err, i; 240 241 /* 242 * Scale index to cache index (cache slot entry) 243 */ 244 index /= SQUASHFS_META_INDEXES * skip; 245 246 while (offset < index) { 247 meta = locate_meta_index(inode, offset + 1, index); 248 249 if (meta == NULL) { 250 meta = empty_meta_index(inode, offset + 1, skip); 251 if (meta == NULL) 252 goto all_done; 253 } else { 254 offset = index < meta->offset + meta->entries ? index : 255 meta->offset + meta->entries - 1; 256 meta_entry = &meta->meta_entry[offset - meta->offset]; 257 cur_index_block = meta_entry->index_block + 258 msblk->inode_table; 259 cur_offset = meta_entry->offset; 260 cur_data_block = meta_entry->data_block; 261 TRACE("get_meta_index: offset %d, meta->offset %d, " 262 "meta->entries %d\n", offset, meta->offset, 263 meta->entries); 264 TRACE("get_meta_index: index_block 0x%llx, offset 0x%x" 265 " data_block 0x%llx\n", cur_index_block, 266 cur_offset, cur_data_block); 267 } 268 269 /* 270 * If necessary grow cache slot by reading block list. Cache 271 * slot is extended up to index or to the end of the slot, in 272 * which case further slots will be used. 273 */ 274 for (i = meta->offset + meta->entries; i <= index && 275 i < meta->offset + SQUASHFS_META_ENTRIES; i++) { 276 int blocks = skip * SQUASHFS_META_INDEXES; 277 long long res = read_indexes(inode->i_sb, blocks, 278 &cur_index_block, &cur_offset); 279 280 if (res < 0) { 281 if (meta->entries == 0) 282 /* 283 * Don't leave an empty slot on read 284 * error allocated to this inode... 285 */ 286 meta->inode_number = 0; 287 err = res; 288 goto failed; 289 } 290 291 cur_data_block += res; 292 meta_entry = &meta->meta_entry[i - meta->offset]; 293 meta_entry->index_block = cur_index_block - 294 msblk->inode_table; 295 meta_entry->offset = cur_offset; 296 meta_entry->data_block = cur_data_block; 297 meta->entries++; 298 offset++; 299 } 300 301 TRACE("get_meta_index: meta->offset %d, meta->entries %d\n", 302 meta->offset, meta->entries); 303 304 release_meta_index(inode, meta); 305 } 306 307 all_done: 308 *index_block = cur_index_block; 309 *index_offset = cur_offset; 310 *data_block = cur_data_block; 311 312 /* 313 * Scale cache index (cache slot entry) to index 314 */ 315 return offset * SQUASHFS_META_INDEXES * skip; 316 317 failed: 318 release_meta_index(inode, meta); 319 return err; 320 } 321 322 323 /* 324 * Get the on-disk location and compressed size of the datablock 325 * specified by index. Fill_meta_index() does most of the work. 326 */ 327 static int read_blocklist(struct inode *inode, int index, u64 *block) 328 { 329 u64 start; 330 long long blks; 331 int offset; 332 __le32 size; 333 int res = fill_meta_index(inode, index, &start, &offset, block); 334 335 TRACE("read_blocklist: res %d, index %d, start 0x%llx, offset" 336 " 0x%x, block 0x%llx\n", res, index, start, offset, 337 *block); 338 339 if (res < 0) 340 return res; 341 342 /* 343 * res contains the index of the mapping returned by fill_meta_index(), 344 * this will likely be less than the desired index (because the 345 * meta_index cache works at a higher granularity). Read any 346 * extra block indexes needed. 347 */ 348 if (res < index) { 349 blks = read_indexes(inode->i_sb, index - res, &start, &offset); 350 if (blks < 0) 351 return (int) blks; 352 *block += blks; 353 } 354 355 /* 356 * Read length of block specified by index. 357 */ 358 res = squashfs_read_metadata(inode->i_sb, &size, &start, &offset, 359 sizeof(size)); 360 if (res < 0) 361 return res; 362 return squashfs_block_size(size); 363 } 364 365 void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, int offset, int avail) 366 { 367 int copied; 368 void *pageaddr; 369 370 pageaddr = kmap_atomic(page); 371 copied = squashfs_copy_data(pageaddr, buffer, offset, avail); 372 memset(pageaddr + copied, 0, PAGE_SIZE - copied); 373 kunmap_atomic(pageaddr); 374 375 flush_dcache_page(page); 376 if (copied == avail) 377 SetPageUptodate(page); 378 else 379 SetPageError(page); 380 } 381 382 /* Copy data into page cache */ 383 void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, 384 int bytes, int offset) 385 { 386 struct inode *inode = page->mapping->host; 387 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 388 int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; 389 int start_index = page->index & ~mask, end_index = start_index | mask; 390 391 /* 392 * Loop copying datablock into pages. As the datablock likely covers 393 * many PAGE_SIZE pages (default block size is 128 KiB) explicitly 394 * grab the pages from the page cache, except for the page that we've 395 * been called to fill. 396 */ 397 for (i = start_index; i <= end_index && bytes > 0; i++, 398 bytes -= PAGE_SIZE, offset += PAGE_SIZE) { 399 struct page *push_page; 400 int avail = buffer ? min_t(int, bytes, PAGE_SIZE) : 0; 401 402 TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); 403 404 push_page = (i == page->index) ? page : 405 grab_cache_page_nowait(page->mapping, i); 406 407 if (!push_page) 408 continue; 409 410 if (PageUptodate(push_page)) 411 goto skip_page; 412 413 squashfs_fill_page(push_page, buffer, offset, avail); 414 skip_page: 415 unlock_page(push_page); 416 if (i != page->index) 417 put_page(push_page); 418 } 419 } 420 421 /* Read datablock stored packed inside a fragment (tail-end packed block) */ 422 static int squashfs_readpage_fragment(struct page *page, int expected) 423 { 424 struct inode *inode = page->mapping->host; 425 struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, 426 squashfs_i(inode)->fragment_block, 427 squashfs_i(inode)->fragment_size); 428 int res = buffer->error; 429 430 if (res) 431 ERROR("Unable to read page, block %llx, size %x\n", 432 squashfs_i(inode)->fragment_block, 433 squashfs_i(inode)->fragment_size); 434 else 435 squashfs_copy_cache(page, buffer, expected, 436 squashfs_i(inode)->fragment_offset); 437 438 squashfs_cache_put(buffer); 439 return res; 440 } 441 442 static int squashfs_readpage_sparse(struct page *page, int expected) 443 { 444 squashfs_copy_cache(page, NULL, expected, 0); 445 return 0; 446 } 447 448 static int squashfs_read_folio(struct file *file, struct folio *folio) 449 { 450 struct page *page = &folio->page; 451 struct inode *inode = page->mapping->host; 452 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 453 int index = page->index >> (msblk->block_log - PAGE_SHIFT); 454 int file_end = i_size_read(inode) >> msblk->block_log; 455 int expected = index == file_end ? 456 (i_size_read(inode) & (msblk->block_size - 1)) : 457 msblk->block_size; 458 int res = 0; 459 void *pageaddr; 460 461 TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", 462 page->index, squashfs_i(inode)->start); 463 464 if (page->index >= ((i_size_read(inode) + PAGE_SIZE - 1) >> 465 PAGE_SHIFT)) 466 goto out; 467 468 if (index < file_end || squashfs_i(inode)->fragment_block == 469 SQUASHFS_INVALID_BLK) { 470 u64 block = 0; 471 472 res = read_blocklist(inode, index, &block); 473 if (res < 0) 474 goto error_out; 475 476 if (res == 0) 477 res = squashfs_readpage_sparse(page, expected); 478 else 479 res = squashfs_readpage_block(page, block, res, expected); 480 } else 481 res = squashfs_readpage_fragment(page, expected); 482 483 if (!res) 484 return 0; 485 486 error_out: 487 SetPageError(page); 488 out: 489 pageaddr = kmap_atomic(page); 490 memset(pageaddr, 0, PAGE_SIZE); 491 kunmap_atomic(pageaddr); 492 flush_dcache_page(page); 493 if (res == 0) 494 SetPageUptodate(page); 495 unlock_page(page); 496 497 return res; 498 } 499 500 static int squashfs_readahead_fragment(struct page **page, 501 unsigned int pages, unsigned int expected) 502 { 503 struct inode *inode = page[0]->mapping->host; 504 struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, 505 squashfs_i(inode)->fragment_block, 506 squashfs_i(inode)->fragment_size); 507 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 508 unsigned int n, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; 509 510 if (buffer->error) 511 goto out; 512 513 expected += squashfs_i(inode)->fragment_offset; 514 515 for (n = 0; n < pages; n++) { 516 unsigned int base = (page[n]->index & mask) << PAGE_SHIFT; 517 unsigned int offset = base + squashfs_i(inode)->fragment_offset; 518 519 if (expected > offset) { 520 unsigned int avail = min_t(unsigned int, expected - 521 offset, PAGE_SIZE); 522 523 squashfs_fill_page(page[n], buffer, offset, avail); 524 } 525 526 unlock_page(page[n]); 527 put_page(page[n]); 528 } 529 530 out: 531 squashfs_cache_put(buffer); 532 return buffer->error; 533 } 534 535 static void squashfs_readahead(struct readahead_control *ractl) 536 { 537 struct inode *inode = ractl->mapping->host; 538 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 539 size_t mask = (1UL << msblk->block_log) - 1; 540 unsigned short shift = msblk->block_log - PAGE_SHIFT; 541 loff_t start = readahead_pos(ractl) & ~mask; 542 size_t len = readahead_length(ractl) + readahead_pos(ractl) - start; 543 struct squashfs_page_actor *actor; 544 unsigned int nr_pages = 0; 545 struct page **pages; 546 int i, file_end = i_size_read(inode) >> msblk->block_log; 547 unsigned int max_pages = 1UL << shift; 548 549 readahead_expand(ractl, start, (len | mask) + 1); 550 551 pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL); 552 if (!pages) 553 return; 554 555 for (;;) { 556 pgoff_t index; 557 int res, bsize; 558 u64 block = 0; 559 unsigned int expected; 560 561 nr_pages = __readahead_batch(ractl, pages, max_pages); 562 if (!nr_pages) 563 break; 564 565 if (readahead_pos(ractl) >= i_size_read(inode)) 566 goto skip_pages; 567 568 index = pages[0]->index >> shift; 569 if ((pages[nr_pages - 1]->index >> shift) != index) 570 goto skip_pages; 571 572 expected = index == file_end ? 573 (i_size_read(inode) & (msblk->block_size - 1)) : 574 msblk->block_size; 575 576 if (index == file_end && squashfs_i(inode)->fragment_block != 577 SQUASHFS_INVALID_BLK) { 578 res = squashfs_readahead_fragment(pages, nr_pages, 579 expected); 580 if (res) 581 goto skip_pages; 582 continue; 583 } 584 585 bsize = read_blocklist(inode, index, &block); 586 if (bsize == 0) 587 goto skip_pages; 588 589 actor = squashfs_page_actor_init_special(msblk, pages, nr_pages, 590 expected); 591 if (!actor) 592 goto skip_pages; 593 594 res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); 595 596 kfree(actor); 597 598 if (res == expected) { 599 int bytes; 600 601 /* Last page (if present) may have trailing bytes not filled */ 602 bytes = res % PAGE_SIZE; 603 if (pages[nr_pages - 1]->index == file_end && bytes) 604 memzero_page(pages[nr_pages - 1], bytes, 605 PAGE_SIZE - bytes); 606 607 for (i = 0; i < nr_pages; i++) { 608 flush_dcache_page(pages[i]); 609 SetPageUptodate(pages[i]); 610 } 611 } 612 613 for (i = 0; i < nr_pages; i++) { 614 unlock_page(pages[i]); 615 put_page(pages[i]); 616 } 617 } 618 619 kfree(pages); 620 return; 621 622 skip_pages: 623 for (i = 0; i < nr_pages; i++) { 624 unlock_page(pages[i]); 625 put_page(pages[i]); 626 } 627 kfree(pages); 628 } 629 630 const struct address_space_operations squashfs_aops = { 631 .read_folio = squashfs_read_folio, 632 .readahead = squashfs_readahead 633 }; 634