1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Squashfs - a compressed read only filesystem for Linux 4 * 5 * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 6 * Phillip Lougher <phillip@squashfs.org.uk> 7 * 8 * file.c 9 */ 10 11 /* 12 * This file contains code for handling regular files. A regular file 13 * consists of a sequence of contiguous compressed blocks, and/or a 14 * compressed fragment block (tail-end packed block). The compressed size 15 * of each datablock is stored in a block list contained within the 16 * file inode (itself stored in one or more compressed metadata blocks). 17 * 18 * To speed up access to datablocks when reading 'large' files (256 Mbytes or 19 * larger), the code implements an index cache that caches the mapping from 20 * block index to datablock location on disk. 21 * 22 * The index cache allows Squashfs to handle large files (up to 1.75 TiB) while 23 * retaining a simple and space-efficient block list on disk. The cache 24 * is split into slots, caching up to eight 224 GiB files (128 KiB blocks). 25 * Larger files use multiple slots, with 1.75 TiB files using all 8 slots. 26 * The index cache is designed to be memory efficient, and by default uses 27 * 16 KiB. 28 */ 29 30 #include <linux/fs.h> 31 #include <linux/vfs.h> 32 #include <linux/kernel.h> 33 #include <linux/slab.h> 34 #include <linux/string.h> 35 #include <linux/pagemap.h> 36 #include <linux/mutex.h> 37 38 #include "squashfs_fs.h" 39 #include "squashfs_fs_sb.h" 40 #include "squashfs_fs_i.h" 41 #include "squashfs.h" 42 43 /* 44 * Locate cache slot in range [offset, index] for specified inode. If 45 * there's more than one return the slot closest to index. 46 */ 47 static struct meta_index *locate_meta_index(struct inode *inode, int offset, 48 int index) 49 { 50 struct meta_index *meta = NULL; 51 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 52 int i; 53 54 mutex_lock(&msblk->meta_index_mutex); 55 56 TRACE("locate_meta_index: index %d, offset %d\n", index, offset); 57 58 if (msblk->meta_index == NULL) 59 goto not_allocated; 60 61 for (i = 0; i < SQUASHFS_META_SLOTS; i++) { 62 if (msblk->meta_index[i].inode_number == inode->i_ino && 63 msblk->meta_index[i].offset >= offset && 64 msblk->meta_index[i].offset <= index && 65 msblk->meta_index[i].locked == 0) { 66 TRACE("locate_meta_index: entry %d, offset %d\n", i, 67 msblk->meta_index[i].offset); 68 meta = &msblk->meta_index[i]; 69 offset = meta->offset; 70 } 71 } 72 73 if (meta) 74 meta->locked = 1; 75 76 not_allocated: 77 mutex_unlock(&msblk->meta_index_mutex); 78 79 return meta; 80 } 81 82 83 /* 84 * Find and initialise an empty cache slot for index offset. 85 */ 86 static struct meta_index *empty_meta_index(struct inode *inode, int offset, 87 int skip) 88 { 89 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 90 struct meta_index *meta = NULL; 91 int i; 92 93 mutex_lock(&msblk->meta_index_mutex); 94 95 TRACE("empty_meta_index: offset %d, skip %d\n", offset, skip); 96 97 if (msblk->meta_index == NULL) { 98 /* 99 * First time cache index has been used, allocate and 100 * initialise. The cache index could be allocated at 101 * mount time but doing it here means it is allocated only 102 * if a 'large' file is read. 103 */ 104 msblk->meta_index = kcalloc(SQUASHFS_META_SLOTS, 105 sizeof(*(msblk->meta_index)), GFP_KERNEL); 106 if (msblk->meta_index == NULL) { 107 ERROR("Failed to allocate meta_index\n"); 108 goto failed; 109 } 110 for (i = 0; i < SQUASHFS_META_SLOTS; i++) { 111 msblk->meta_index[i].inode_number = 0; 112 msblk->meta_index[i].locked = 0; 113 } 114 msblk->next_meta_index = 0; 115 } 116 117 for (i = SQUASHFS_META_SLOTS; i && 118 msblk->meta_index[msblk->next_meta_index].locked; i--) 119 msblk->next_meta_index = (msblk->next_meta_index + 1) % 120 SQUASHFS_META_SLOTS; 121 122 if (i == 0) { 123 TRACE("empty_meta_index: failed!\n"); 124 goto failed; 125 } 126 127 TRACE("empty_meta_index: returned meta entry %d, %p\n", 128 msblk->next_meta_index, 129 &msblk->meta_index[msblk->next_meta_index]); 130 131 meta = &msblk->meta_index[msblk->next_meta_index]; 132 msblk->next_meta_index = (msblk->next_meta_index + 1) % 133 SQUASHFS_META_SLOTS; 134 135 meta->inode_number = inode->i_ino; 136 meta->offset = offset; 137 meta->skip = skip; 138 meta->entries = 0; 139 meta->locked = 1; 140 141 failed: 142 mutex_unlock(&msblk->meta_index_mutex); 143 return meta; 144 } 145 146 147 static void release_meta_index(struct inode *inode, struct meta_index *meta) 148 { 149 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 150 mutex_lock(&msblk->meta_index_mutex); 151 meta->locked = 0; 152 mutex_unlock(&msblk->meta_index_mutex); 153 } 154 155 156 /* 157 * Read the next n blocks from the block list, starting from 158 * metadata block <start_block, offset>. 159 */ 160 static long long read_indexes(struct super_block *sb, int n, 161 u64 *start_block, int *offset) 162 { 163 int err, i; 164 long long block = 0; 165 __le32 *blist = kmalloc(PAGE_SIZE, GFP_KERNEL); 166 167 if (blist == NULL) { 168 ERROR("read_indexes: Failed to allocate block_list\n"); 169 return -ENOMEM; 170 } 171 172 while (n) { 173 int blocks = min_t(int, n, PAGE_SIZE >> 2); 174 175 err = squashfs_read_metadata(sb, blist, start_block, 176 offset, blocks << 2); 177 if (err < 0) { 178 ERROR("read_indexes: reading block [%llx:%x]\n", 179 *start_block, *offset); 180 goto failure; 181 } 182 183 for (i = 0; i < blocks; i++) { 184 int size = squashfs_block_size(blist[i]); 185 if (size < 0) { 186 err = size; 187 goto failure; 188 } 189 block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size); 190 } 191 n -= blocks; 192 } 193 194 kfree(blist); 195 return block; 196 197 failure: 198 kfree(blist); 199 return err; 200 } 201 202 203 /* 204 * Each cache index slot has SQUASHFS_META_ENTRIES, each of which 205 * can cache one index -> datablock/blocklist-block mapping. We wish 206 * to distribute these over the length of the file, entry[0] maps index x, 207 * entry[1] maps index x + skip, entry[2] maps index x + 2 * skip, and so on. 208 * The larger the file, the greater the skip factor. The skip factor is 209 * limited to the size of the metadata cache (SQUASHFS_CACHED_BLKS) to ensure 210 * the number of metadata blocks that need to be read fits into the cache. 211 * If the skip factor is limited in this way then the file will use multiple 212 * slots. 213 */ 214 static inline int calculate_skip(u64 blocks) 215 { 216 u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1) 217 * SQUASHFS_META_INDEXES); 218 return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1); 219 } 220 221 222 /* 223 * Search and grow the index cache for the specified inode, returning the 224 * on-disk locations of the datablock and block list metadata block 225 * <index_block, index_offset> for index (scaled to nearest cache index). 226 */ 227 static int fill_meta_index(struct inode *inode, int index, 228 u64 *index_block, int *index_offset, u64 *data_block) 229 { 230 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 231 int skip = calculate_skip(i_size_read(inode) >> msblk->block_log); 232 int offset = 0; 233 struct meta_index *meta; 234 struct meta_entry *meta_entry; 235 u64 cur_index_block = squashfs_i(inode)->block_list_start; 236 int cur_offset = squashfs_i(inode)->offset; 237 u64 cur_data_block = squashfs_i(inode)->start; 238 int err, i; 239 240 /* 241 * Scale index to cache index (cache slot entry) 242 */ 243 index /= SQUASHFS_META_INDEXES * skip; 244 245 while (offset < index) { 246 meta = locate_meta_index(inode, offset + 1, index); 247 248 if (meta == NULL) { 249 meta = empty_meta_index(inode, offset + 1, skip); 250 if (meta == NULL) 251 goto all_done; 252 } else { 253 offset = index < meta->offset + meta->entries ? index : 254 meta->offset + meta->entries - 1; 255 meta_entry = &meta->meta_entry[offset - meta->offset]; 256 cur_index_block = meta_entry->index_block + 257 msblk->inode_table; 258 cur_offset = meta_entry->offset; 259 cur_data_block = meta_entry->data_block; 260 TRACE("get_meta_index: offset %d, meta->offset %d, " 261 "meta->entries %d\n", offset, meta->offset, 262 meta->entries); 263 TRACE("get_meta_index: index_block 0x%llx, offset 0x%x" 264 " data_block 0x%llx\n", cur_index_block, 265 cur_offset, cur_data_block); 266 } 267 268 /* 269 * If necessary grow cache slot by reading block list. Cache 270 * slot is extended up to index or to the end of the slot, in 271 * which case further slots will be used. 272 */ 273 for (i = meta->offset + meta->entries; i <= index && 274 i < meta->offset + SQUASHFS_META_ENTRIES; i++) { 275 int blocks = skip * SQUASHFS_META_INDEXES; 276 long long res = read_indexes(inode->i_sb, blocks, 277 &cur_index_block, &cur_offset); 278 279 if (res < 0) { 280 if (meta->entries == 0) 281 /* 282 * Don't leave an empty slot on read 283 * error allocated to this inode... 284 */ 285 meta->inode_number = 0; 286 err = res; 287 goto failed; 288 } 289 290 cur_data_block += res; 291 meta_entry = &meta->meta_entry[i - meta->offset]; 292 meta_entry->index_block = cur_index_block - 293 msblk->inode_table; 294 meta_entry->offset = cur_offset; 295 meta_entry->data_block = cur_data_block; 296 meta->entries++; 297 offset++; 298 } 299 300 TRACE("get_meta_index: meta->offset %d, meta->entries %d\n", 301 meta->offset, meta->entries); 302 303 release_meta_index(inode, meta); 304 } 305 306 all_done: 307 *index_block = cur_index_block; 308 *index_offset = cur_offset; 309 *data_block = cur_data_block; 310 311 /* 312 * Scale cache index (cache slot entry) to index 313 */ 314 return offset * SQUASHFS_META_INDEXES * skip; 315 316 failed: 317 release_meta_index(inode, meta); 318 return err; 319 } 320 321 322 /* 323 * Get the on-disk location and compressed size of the datablock 324 * specified by index. Fill_meta_index() does most of the work. 325 */ 326 static int read_blocklist(struct inode *inode, int index, u64 *block) 327 { 328 u64 start; 329 long long blks; 330 int offset; 331 __le32 size; 332 int res = fill_meta_index(inode, index, &start, &offset, block); 333 334 TRACE("read_blocklist: res %d, index %d, start 0x%llx, offset" 335 " 0x%x, block 0x%llx\n", res, index, start, offset, 336 *block); 337 338 if (res < 0) 339 return res; 340 341 /* 342 * res contains the index of the mapping returned by fill_meta_index(), 343 * this will likely be less than the desired index (because the 344 * meta_index cache works at a higher granularity). Read any 345 * extra block indexes needed. 346 */ 347 if (res < index) { 348 blks = read_indexes(inode->i_sb, index - res, &start, &offset); 349 if (blks < 0) 350 return (int) blks; 351 *block += blks; 352 } 353 354 /* 355 * Read length of block specified by index. 356 */ 357 res = squashfs_read_metadata(inode->i_sb, &size, &start, &offset, 358 sizeof(size)); 359 if (res < 0) 360 return res; 361 return squashfs_block_size(size); 362 } 363 364 void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, int offset, int avail) 365 { 366 int copied; 367 void *pageaddr; 368 369 pageaddr = kmap_atomic(page); 370 copied = squashfs_copy_data(pageaddr, buffer, offset, avail); 371 memset(pageaddr + copied, 0, PAGE_SIZE - copied); 372 kunmap_atomic(pageaddr); 373 374 flush_dcache_page(page); 375 if (copied == avail) 376 SetPageUptodate(page); 377 else 378 SetPageError(page); 379 } 380 381 /* Copy data into page cache */ 382 void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, 383 int bytes, int offset) 384 { 385 struct inode *inode = page->mapping->host; 386 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 387 int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; 388 int start_index = page->index & ~mask, end_index = start_index | mask; 389 390 /* 391 * Loop copying datablock into pages. As the datablock likely covers 392 * many PAGE_SIZE pages (default block size is 128 KiB) explicitly 393 * grab the pages from the page cache, except for the page that we've 394 * been called to fill. 395 */ 396 for (i = start_index; i <= end_index && bytes > 0; i++, 397 bytes -= PAGE_SIZE, offset += PAGE_SIZE) { 398 struct page *push_page; 399 int avail = buffer ? min_t(int, bytes, PAGE_SIZE) : 0; 400 401 TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); 402 403 push_page = (i == page->index) ? page : 404 grab_cache_page_nowait(page->mapping, i); 405 406 if (!push_page) 407 continue; 408 409 if (PageUptodate(push_page)) 410 goto skip_page; 411 412 squashfs_fill_page(push_page, buffer, offset, avail); 413 skip_page: 414 unlock_page(push_page); 415 if (i != page->index) 416 put_page(push_page); 417 } 418 } 419 420 /* Read datablock stored packed inside a fragment (tail-end packed block) */ 421 static int squashfs_readpage_fragment(struct page *page, int expected) 422 { 423 struct inode *inode = page->mapping->host; 424 struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, 425 squashfs_i(inode)->fragment_block, 426 squashfs_i(inode)->fragment_size); 427 int res = buffer->error; 428 429 if (res) 430 ERROR("Unable to read page, block %llx, size %x\n", 431 squashfs_i(inode)->fragment_block, 432 squashfs_i(inode)->fragment_size); 433 else 434 squashfs_copy_cache(page, buffer, expected, 435 squashfs_i(inode)->fragment_offset); 436 437 squashfs_cache_put(buffer); 438 return res; 439 } 440 441 static int squashfs_readpage_sparse(struct page *page, int expected) 442 { 443 squashfs_copy_cache(page, NULL, expected, 0); 444 return 0; 445 } 446 447 static int squashfs_read_folio(struct file *file, struct folio *folio) 448 { 449 struct page *page = &folio->page; 450 struct inode *inode = page->mapping->host; 451 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 452 int index = page->index >> (msblk->block_log - PAGE_SHIFT); 453 int file_end = i_size_read(inode) >> msblk->block_log; 454 int expected = index == file_end ? 455 (i_size_read(inode) & (msblk->block_size - 1)) : 456 msblk->block_size; 457 int res; 458 void *pageaddr; 459 460 TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", 461 page->index, squashfs_i(inode)->start); 462 463 if (page->index >= ((i_size_read(inode) + PAGE_SIZE - 1) >> 464 PAGE_SHIFT)) 465 goto out; 466 467 if (index < file_end || squashfs_i(inode)->fragment_block == 468 SQUASHFS_INVALID_BLK) { 469 u64 block = 0; 470 int bsize = read_blocklist(inode, index, &block); 471 if (bsize < 0) 472 goto error_out; 473 474 if (bsize == 0) 475 res = squashfs_readpage_sparse(page, expected); 476 else 477 res = squashfs_readpage_block(page, block, bsize, expected); 478 } else 479 res = squashfs_readpage_fragment(page, expected); 480 481 if (!res) 482 return 0; 483 484 error_out: 485 SetPageError(page); 486 out: 487 pageaddr = kmap_atomic(page); 488 memset(pageaddr, 0, PAGE_SIZE); 489 kunmap_atomic(pageaddr); 490 flush_dcache_page(page); 491 if (!PageError(page)) 492 SetPageUptodate(page); 493 unlock_page(page); 494 495 return 0; 496 } 497 498 499 const struct address_space_operations squashfs_aops = { 500 .read_folio = squashfs_read_folio 501 }; 502