1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/ext4/verity.c: fs-verity support for ext4 4 * 5 * Copyright 2019 Google LLC 6 */ 7 8 /* 9 * Implementation of fsverity_operations for ext4. 10 * 11 * ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) past 12 * the end of the file, starting at the first 64K boundary beyond i_size. This 13 * approach works because (a) verity files are readonly, and (b) pages fully 14 * beyond i_size aren't visible to userspace but can be read/written internally 15 * by ext4 with only some relatively small changes to ext4. This approach 16 * avoids having to depend on the EA_INODE feature and on rearchitecturing 17 * ext4's xattr support to support paging multi-gigabyte xattrs into memory, and 18 * to support encrypting xattrs. Note that the verity metadata *must* be 19 * encrypted when the file is, since it contains hashes of the plaintext data. 20 * 21 * Using a 64K boundary rather than a 4K one keeps things ready for 22 * architectures with 64K pages, and it doesn't necessarily waste space on-disk 23 * since there can be a hole between i_size and the start of the Merkle tree. 24 */ 25 26 #include <linux/quotaops.h> 27 28 #include "ext4.h" 29 #include "ext4_extents.h" 30 #include "ext4_jbd2.h" 31 32 static inline loff_t ext4_verity_metadata_pos(const struct inode *inode) 33 { 34 return round_up(inode->i_size, 65536); 35 } 36 37 /* 38 * Read some verity metadata from the inode. __vfs_read() can't be used because 39 * we need to read beyond i_size. 40 */ 41 static int pagecache_read(struct inode *inode, void *buf, size_t count, 42 loff_t pos) 43 { 44 while (count) { 45 struct folio *folio; 46 size_t n; 47 48 folio = read_mapping_folio(inode->i_mapping, pos >> PAGE_SHIFT, 49 NULL); 50 if (IS_ERR(folio)) 51 return PTR_ERR(folio); 52 53 n = memcpy_from_file_folio(buf, folio, pos, count); 54 folio_put(folio); 55 56 buf += n; 57 pos += n; 58 count -= n; 59 } 60 return 0; 61 } 62 63 /* 64 * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY. 65 * kernel_write() can't be used because the file descriptor is readonly. 66 */ 67 static int pagecache_write(struct inode *inode, const void *buf, size_t count, 68 loff_t pos) 69 { 70 struct address_space *mapping = inode->i_mapping; 71 const struct address_space_operations *aops = mapping->a_ops; 72 73 if (pos + count > inode->i_sb->s_maxbytes) 74 return -EFBIG; 75 76 while (count) { 77 size_t n = min_t(size_t, count, 78 PAGE_SIZE - offset_in_page(pos)); 79 struct page *page; 80 void *fsdata = NULL; 81 int res; 82 83 res = aops->write_begin(NULL, mapping, pos, n, &page, &fsdata); 84 if (res) 85 return res; 86 87 memcpy_to_page(page, offset_in_page(pos), buf, n); 88 89 res = aops->write_end(NULL, mapping, pos, n, n, page, fsdata); 90 if (res < 0) 91 return res; 92 if (res != n) 93 return -EIO; 94 95 buf += n; 96 pos += n; 97 count -= n; 98 } 99 return 0; 100 } 101 102 static int ext4_begin_enable_verity(struct file *filp) 103 { 104 struct inode *inode = file_inode(filp); 105 const int credits = 2; /* superblock and inode for ext4_orphan_add() */ 106 handle_t *handle; 107 int err; 108 109 if (IS_DAX(inode) || ext4_test_inode_flag(inode, EXT4_INODE_DAX)) 110 return -EINVAL; 111 112 if (ext4_verity_in_progress(inode)) 113 return -EBUSY; 114 115 /* 116 * Since the file was opened readonly, we have to initialize the jbd 117 * inode and quotas here and not rely on ->open() doing it. This must 118 * be done before evicting the inline data. 119 */ 120 121 err = ext4_inode_attach_jinode(inode); 122 if (err) 123 return err; 124 125 err = dquot_initialize(inode); 126 if (err) 127 return err; 128 129 err = ext4_convert_inline_data(inode); 130 if (err) 131 return err; 132 133 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 134 ext4_warning_inode(inode, 135 "verity is only allowed on extent-based files"); 136 return -EOPNOTSUPP; 137 } 138 139 /* 140 * ext4 uses the last allocated block to find the verity descriptor, so 141 * we must remove any other blocks past EOF which might confuse things. 142 */ 143 err = ext4_truncate(inode); 144 if (err) 145 return err; 146 147 handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); 148 if (IS_ERR(handle)) 149 return PTR_ERR(handle); 150 151 err = ext4_orphan_add(handle, inode); 152 if (err == 0) 153 ext4_set_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); 154 155 ext4_journal_stop(handle); 156 return err; 157 } 158 159 /* 160 * ext4 stores the verity descriptor beginning on the next filesystem block 161 * boundary after the Merkle tree. Then, the descriptor size is stored in the 162 * last 4 bytes of the last allocated filesystem block --- which is either the 163 * block in which the descriptor ends, or the next block after that if there 164 * weren't at least 4 bytes remaining. 165 * 166 * We can't simply store the descriptor in an xattr because it *must* be 167 * encrypted when ext4 encryption is used, but ext4 encryption doesn't encrypt 168 * xattrs. Also, if the descriptor includes a large signature blob it may be 169 * too large to store in an xattr without the EA_INODE feature. 170 */ 171 static int ext4_write_verity_descriptor(struct inode *inode, const void *desc, 172 size_t desc_size, u64 merkle_tree_size) 173 { 174 const u64 desc_pos = round_up(ext4_verity_metadata_pos(inode) + 175 merkle_tree_size, i_blocksize(inode)); 176 const u64 desc_end = desc_pos + desc_size; 177 const __le32 desc_size_disk = cpu_to_le32(desc_size); 178 const u64 desc_size_pos = round_up(desc_end + sizeof(desc_size_disk), 179 i_blocksize(inode)) - 180 sizeof(desc_size_disk); 181 int err; 182 183 err = pagecache_write(inode, desc, desc_size, desc_pos); 184 if (err) 185 return err; 186 187 return pagecache_write(inode, &desc_size_disk, sizeof(desc_size_disk), 188 desc_size_pos); 189 } 190 191 static int ext4_end_enable_verity(struct file *filp, const void *desc, 192 size_t desc_size, u64 merkle_tree_size) 193 { 194 struct inode *inode = file_inode(filp); 195 const int credits = 2; /* superblock and inode for ext4_orphan_del() */ 196 handle_t *handle; 197 struct ext4_iloc iloc; 198 int err = 0; 199 200 /* 201 * If an error already occurred (which fs/verity/ signals by passing 202 * desc == NULL), then only clean-up is needed. 203 */ 204 if (desc == NULL) 205 goto cleanup; 206 207 /* Append the verity descriptor. */ 208 err = ext4_write_verity_descriptor(inode, desc, desc_size, 209 merkle_tree_size); 210 if (err) 211 goto cleanup; 212 213 /* 214 * Write all pages (both data and verity metadata). Note that this must 215 * happen before clearing EXT4_STATE_VERITY_IN_PROGRESS; otherwise pages 216 * beyond i_size won't be written properly. For crash consistency, this 217 * also must happen before the verity inode flag gets persisted. 218 */ 219 err = filemap_write_and_wait(inode->i_mapping); 220 if (err) 221 goto cleanup; 222 223 /* 224 * Finally, set the verity inode flag and remove the inode from the 225 * orphan list (in a single transaction). 226 */ 227 228 handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); 229 if (IS_ERR(handle)) { 230 err = PTR_ERR(handle); 231 goto cleanup; 232 } 233 234 err = ext4_orphan_del(handle, inode); 235 if (err) 236 goto stop_and_cleanup; 237 238 err = ext4_reserve_inode_write(handle, inode, &iloc); 239 if (err) 240 goto stop_and_cleanup; 241 242 ext4_set_inode_flag(inode, EXT4_INODE_VERITY); 243 ext4_set_inode_flags(inode, false); 244 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 245 if (err) 246 goto stop_and_cleanup; 247 248 ext4_journal_stop(handle); 249 250 ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); 251 return 0; 252 253 stop_and_cleanup: 254 ext4_journal_stop(handle); 255 cleanup: 256 /* 257 * Verity failed to be enabled, so clean up by truncating any verity 258 * metadata that was written beyond i_size (both from cache and from 259 * disk), removing the inode from the orphan list (if it wasn't done 260 * already), and clearing EXT4_STATE_VERITY_IN_PROGRESS. 261 */ 262 truncate_inode_pages(inode->i_mapping, inode->i_size); 263 ext4_truncate(inode); 264 ext4_orphan_del(NULL, inode); 265 ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); 266 return err; 267 } 268 269 static int ext4_get_verity_descriptor_location(struct inode *inode, 270 size_t *desc_size_ret, 271 u64 *desc_pos_ret) 272 { 273 struct ext4_ext_path *path; 274 struct ext4_extent *last_extent; 275 u32 end_lblk; 276 u64 desc_size_pos; 277 __le32 desc_size_disk; 278 u32 desc_size; 279 u64 desc_pos; 280 int err; 281 282 /* 283 * Descriptor size is in last 4 bytes of last allocated block. 284 * See ext4_write_verity_descriptor(). 285 */ 286 287 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 288 EXT4_ERROR_INODE(inode, "verity file doesn't use extents"); 289 return -EFSCORRUPTED; 290 } 291 292 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); 293 if (IS_ERR(path)) 294 return PTR_ERR(path); 295 296 last_extent = path[path->p_depth].p_ext; 297 if (!last_extent) { 298 EXT4_ERROR_INODE(inode, "verity file has no extents"); 299 ext4_free_ext_path(path); 300 return -EFSCORRUPTED; 301 } 302 303 end_lblk = le32_to_cpu(last_extent->ee_block) + 304 ext4_ext_get_actual_len(last_extent); 305 desc_size_pos = (u64)end_lblk << inode->i_blkbits; 306 ext4_free_ext_path(path); 307 308 if (desc_size_pos < sizeof(desc_size_disk)) 309 goto bad; 310 desc_size_pos -= sizeof(desc_size_disk); 311 312 err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk), 313 desc_size_pos); 314 if (err) 315 return err; 316 desc_size = le32_to_cpu(desc_size_disk); 317 318 /* 319 * The descriptor is stored just before the desc_size_disk, but starting 320 * on a filesystem block boundary. 321 */ 322 323 if (desc_size > INT_MAX || desc_size > desc_size_pos) 324 goto bad; 325 326 desc_pos = round_down(desc_size_pos - desc_size, i_blocksize(inode)); 327 if (desc_pos < ext4_verity_metadata_pos(inode)) 328 goto bad; 329 330 *desc_size_ret = desc_size; 331 *desc_pos_ret = desc_pos; 332 return 0; 333 334 bad: 335 EXT4_ERROR_INODE(inode, "verity file corrupted; can't find descriptor"); 336 return -EFSCORRUPTED; 337 } 338 339 static int ext4_get_verity_descriptor(struct inode *inode, void *buf, 340 size_t buf_size) 341 { 342 size_t desc_size = 0; 343 u64 desc_pos = 0; 344 int err; 345 346 err = ext4_get_verity_descriptor_location(inode, &desc_size, &desc_pos); 347 if (err) 348 return err; 349 350 if (buf_size) { 351 if (desc_size > buf_size) 352 return -ERANGE; 353 err = pagecache_read(inode, buf, desc_size, desc_pos); 354 if (err) 355 return err; 356 } 357 return desc_size; 358 } 359 360 static struct page *ext4_read_merkle_tree_page(struct inode *inode, 361 pgoff_t index, 362 unsigned long num_ra_pages) 363 { 364 struct folio *folio; 365 366 index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; 367 368 folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0); 369 if (!folio || !folio_test_uptodate(folio)) { 370 DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); 371 372 if (folio) 373 folio_put(folio); 374 else if (num_ra_pages > 1) 375 page_cache_ra_unbounded(&ractl, num_ra_pages, 0); 376 folio = read_mapping_folio(inode->i_mapping, index, NULL); 377 } 378 return folio_file_page(folio, index); 379 } 380 381 static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf, 382 u64 pos, unsigned int size) 383 { 384 pos += ext4_verity_metadata_pos(inode); 385 386 return pagecache_write(inode, buf, size, pos); 387 } 388 389 const struct fsverity_operations ext4_verityops = { 390 .begin_enable_verity = ext4_begin_enable_verity, 391 .end_enable_verity = ext4_end_enable_verity, 392 .get_verity_descriptor = ext4_get_verity_descriptor, 393 .read_merkle_tree_page = ext4_read_merkle_tree_page, 394 .write_merkle_tree_block = ext4_write_merkle_tree_block, 395 }; 396