1c93d8f88SEric Biggers // SPDX-License-Identifier: GPL-2.0 2c93d8f88SEric Biggers /* 3c93d8f88SEric Biggers * fs/ext4/verity.c: fs-verity support for ext4 4c93d8f88SEric Biggers * 5c93d8f88SEric Biggers * Copyright 2019 Google LLC 6c93d8f88SEric Biggers */ 7c93d8f88SEric Biggers 8c93d8f88SEric Biggers /* 9c93d8f88SEric Biggers * Implementation of fsverity_operations for ext4. 10c93d8f88SEric Biggers * 11c93d8f88SEric Biggers * ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) past 12c93d8f88SEric Biggers * the end of the file, starting at the first 64K boundary beyond i_size. This 13c93d8f88SEric Biggers * approach works because (a) verity files are readonly, and (b) pages fully 14c93d8f88SEric Biggers * beyond i_size aren't visible to userspace but can be read/written internally 15c93d8f88SEric Biggers * by ext4 with only some relatively small changes to ext4. This approach 16c93d8f88SEric Biggers * avoids having to depend on the EA_INODE feature and on rearchitecturing 17c93d8f88SEric Biggers * ext4's xattr support to support paging multi-gigabyte xattrs into memory, and 18c93d8f88SEric Biggers * to support encrypting xattrs. Note that the verity metadata *must* be 19c93d8f88SEric Biggers * encrypted when the file is, since it contains hashes of the plaintext data. 20c93d8f88SEric Biggers * 21c93d8f88SEric Biggers * Using a 64K boundary rather than a 4K one keeps things ready for 22c93d8f88SEric Biggers * architectures with 64K pages, and it doesn't necessarily waste space on-disk 23c93d8f88SEric Biggers * since there can be a hole between i_size and the start of the Merkle tree. 24c93d8f88SEric Biggers */ 25c93d8f88SEric Biggers 26c93d8f88SEric Biggers #include <linux/quotaops.h> 27c93d8f88SEric Biggers 28c93d8f88SEric Biggers #include "ext4.h" 29c93d8f88SEric Biggers #include "ext4_extents.h" 30c93d8f88SEric Biggers #include "ext4_jbd2.h" 31c93d8f88SEric Biggers 32c93d8f88SEric Biggers static inline loff_t ext4_verity_metadata_pos(const struct inode *inode) 33c93d8f88SEric Biggers { 34c93d8f88SEric Biggers return round_up(inode->i_size, 65536); 35c93d8f88SEric Biggers } 36c93d8f88SEric Biggers 37c93d8f88SEric Biggers /* 38c93d8f88SEric Biggers * Read some verity metadata from the inode. __vfs_read() can't be used because 39c93d8f88SEric Biggers * we need to read beyond i_size. 40c93d8f88SEric Biggers */ 41c93d8f88SEric Biggers static int pagecache_read(struct inode *inode, void *buf, size_t count, 42c93d8f88SEric Biggers loff_t pos) 43c93d8f88SEric Biggers { 44c93d8f88SEric Biggers while (count) { 45c93d8f88SEric Biggers size_t n = min_t(size_t, count, 46c93d8f88SEric Biggers PAGE_SIZE - offset_in_page(pos)); 47c93d8f88SEric Biggers struct page *page; 48c93d8f88SEric Biggers void *addr; 49c93d8f88SEric Biggers 50c93d8f88SEric Biggers page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT, 51c93d8f88SEric Biggers NULL); 52c93d8f88SEric Biggers if (IS_ERR(page)) 53c93d8f88SEric Biggers return PTR_ERR(page); 54c93d8f88SEric Biggers 55c93d8f88SEric Biggers addr = kmap_atomic(page); 56c93d8f88SEric Biggers memcpy(buf, addr + offset_in_page(pos), n); 57c93d8f88SEric Biggers kunmap_atomic(addr); 58c93d8f88SEric Biggers 59c93d8f88SEric Biggers put_page(page); 60c93d8f88SEric Biggers 61c93d8f88SEric Biggers buf += n; 62c93d8f88SEric Biggers pos += n; 63c93d8f88SEric Biggers count -= n; 64c93d8f88SEric Biggers } 65c93d8f88SEric Biggers return 0; 66c93d8f88SEric Biggers } 67c93d8f88SEric Biggers 68c93d8f88SEric Biggers /* 69c93d8f88SEric Biggers * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY. 70c93d8f88SEric Biggers * kernel_write() can't be used because the file descriptor is readonly. 71c93d8f88SEric Biggers */ 72c93d8f88SEric Biggers static int pagecache_write(struct inode *inode, const void *buf, size_t count, 73c93d8f88SEric Biggers loff_t pos) 74c93d8f88SEric Biggers { 75c93d8f88SEric Biggers if (pos + count > inode->i_sb->s_maxbytes) 76c93d8f88SEric Biggers return -EFBIG; 77c93d8f88SEric Biggers 78c93d8f88SEric Biggers while (count) { 79c93d8f88SEric Biggers size_t n = min_t(size_t, count, 80c93d8f88SEric Biggers PAGE_SIZE - offset_in_page(pos)); 81c93d8f88SEric Biggers struct page *page; 82c93d8f88SEric Biggers void *fsdata; 83c93d8f88SEric Biggers void *addr; 84c93d8f88SEric Biggers int res; 85c93d8f88SEric Biggers 86c93d8f88SEric Biggers res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0, 87c93d8f88SEric Biggers &page, &fsdata); 88c93d8f88SEric Biggers if (res) 89c93d8f88SEric Biggers return res; 90c93d8f88SEric Biggers 91c93d8f88SEric Biggers addr = kmap_atomic(page); 92c93d8f88SEric Biggers memcpy(addr + offset_in_page(pos), buf, n); 93c93d8f88SEric Biggers kunmap_atomic(addr); 94c93d8f88SEric Biggers 95c93d8f88SEric Biggers res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n, 96c93d8f88SEric Biggers page, fsdata); 97c93d8f88SEric Biggers if (res < 0) 98c93d8f88SEric Biggers return res; 99c93d8f88SEric Biggers if (res != n) 100c93d8f88SEric Biggers return -EIO; 101c93d8f88SEric Biggers 102c93d8f88SEric Biggers buf += n; 103c93d8f88SEric Biggers pos += n; 104c93d8f88SEric Biggers count -= n; 105c93d8f88SEric Biggers } 106c93d8f88SEric Biggers return 0; 107c93d8f88SEric Biggers } 108c93d8f88SEric Biggers 109c93d8f88SEric Biggers static int ext4_begin_enable_verity(struct file *filp) 110c93d8f88SEric Biggers { 111c93d8f88SEric Biggers struct inode *inode = file_inode(filp); 112c93d8f88SEric Biggers const int credits = 2; /* superblock and inode for ext4_orphan_add() */ 113c93d8f88SEric Biggers handle_t *handle; 114c93d8f88SEric Biggers int err; 115c93d8f88SEric Biggers 116c93d8f88SEric Biggers if (ext4_verity_in_progress(inode)) 117c93d8f88SEric Biggers return -EBUSY; 118c93d8f88SEric Biggers 119c93d8f88SEric Biggers /* 120c93d8f88SEric Biggers * Since the file was opened readonly, we have to initialize the jbd 121c93d8f88SEric Biggers * inode and quotas here and not rely on ->open() doing it. This must 122c93d8f88SEric Biggers * be done before evicting the inline data. 123c93d8f88SEric Biggers */ 124c93d8f88SEric Biggers 125c93d8f88SEric Biggers err = ext4_inode_attach_jinode(inode); 126c93d8f88SEric Biggers if (err) 127c93d8f88SEric Biggers return err; 128c93d8f88SEric Biggers 129c93d8f88SEric Biggers err = dquot_initialize(inode); 130c93d8f88SEric Biggers if (err) 131c93d8f88SEric Biggers return err; 132c93d8f88SEric Biggers 133c93d8f88SEric Biggers err = ext4_convert_inline_data(inode); 134c93d8f88SEric Biggers if (err) 135c93d8f88SEric Biggers return err; 136c93d8f88SEric Biggers 137c93d8f88SEric Biggers if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 138c93d8f88SEric Biggers ext4_warning_inode(inode, 139c93d8f88SEric Biggers "verity is only allowed on extent-based files"); 140c93d8f88SEric Biggers return -EOPNOTSUPP; 141c93d8f88SEric Biggers } 142c93d8f88SEric Biggers 143c93d8f88SEric Biggers /* 144c93d8f88SEric Biggers * ext4 uses the last allocated block to find the verity descriptor, so 145c93d8f88SEric Biggers * we must remove any other blocks past EOF which might confuse things. 146c93d8f88SEric Biggers */ 147c93d8f88SEric Biggers err = ext4_truncate(inode); 148c93d8f88SEric Biggers if (err) 149c93d8f88SEric Biggers return err; 150c93d8f88SEric Biggers 151c93d8f88SEric Biggers handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); 152c93d8f88SEric Biggers if (IS_ERR(handle)) 153c93d8f88SEric Biggers return PTR_ERR(handle); 154c93d8f88SEric Biggers 155c93d8f88SEric Biggers err = ext4_orphan_add(handle, inode); 156c93d8f88SEric Biggers if (err == 0) 157c93d8f88SEric Biggers ext4_set_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); 158c93d8f88SEric Biggers 159c93d8f88SEric Biggers ext4_journal_stop(handle); 160c93d8f88SEric Biggers return err; 161c93d8f88SEric Biggers } 162c93d8f88SEric Biggers 163c93d8f88SEric Biggers /* 164c93d8f88SEric Biggers * ext4 stores the verity descriptor beginning on the next filesystem block 165c93d8f88SEric Biggers * boundary after the Merkle tree. Then, the descriptor size is stored in the 166c93d8f88SEric Biggers * last 4 bytes of the last allocated filesystem block --- which is either the 167c93d8f88SEric Biggers * block in which the descriptor ends, or the next block after that if there 168c93d8f88SEric Biggers * weren't at least 4 bytes remaining. 169c93d8f88SEric Biggers * 170c93d8f88SEric Biggers * We can't simply store the descriptor in an xattr because it *must* be 171c93d8f88SEric Biggers * encrypted when ext4 encryption is used, but ext4 encryption doesn't encrypt 172c93d8f88SEric Biggers * xattrs. Also, if the descriptor includes a large signature blob it may be 173c93d8f88SEric Biggers * too large to store in an xattr without the EA_INODE feature. 174c93d8f88SEric Biggers */ 175c93d8f88SEric Biggers static int ext4_write_verity_descriptor(struct inode *inode, const void *desc, 176c93d8f88SEric Biggers size_t desc_size, u64 merkle_tree_size) 177c93d8f88SEric Biggers { 178c93d8f88SEric Biggers const u64 desc_pos = round_up(ext4_verity_metadata_pos(inode) + 179c93d8f88SEric Biggers merkle_tree_size, i_blocksize(inode)); 180c93d8f88SEric Biggers const u64 desc_end = desc_pos + desc_size; 181c93d8f88SEric Biggers const __le32 desc_size_disk = cpu_to_le32(desc_size); 182c93d8f88SEric Biggers const u64 desc_size_pos = round_up(desc_end + sizeof(desc_size_disk), 183c93d8f88SEric Biggers i_blocksize(inode)) - 184c93d8f88SEric Biggers sizeof(desc_size_disk); 185c93d8f88SEric Biggers int err; 186c93d8f88SEric Biggers 187c93d8f88SEric Biggers err = pagecache_write(inode, desc, desc_size, desc_pos); 188c93d8f88SEric Biggers if (err) 189c93d8f88SEric Biggers return err; 190c93d8f88SEric Biggers 191c93d8f88SEric Biggers return pagecache_write(inode, &desc_size_disk, sizeof(desc_size_disk), 192c93d8f88SEric Biggers desc_size_pos); 193c93d8f88SEric Biggers } 194c93d8f88SEric Biggers 195c93d8f88SEric Biggers static int ext4_end_enable_verity(struct file *filp, const void *desc, 196c93d8f88SEric Biggers size_t desc_size, u64 merkle_tree_size) 197c93d8f88SEric Biggers { 198c93d8f88SEric Biggers struct inode *inode = file_inode(filp); 199c93d8f88SEric Biggers const int credits = 2; /* superblock and inode for ext4_orphan_del() */ 200c93d8f88SEric Biggers handle_t *handle; 201c93d8f88SEric Biggers int err = 0; 202c93d8f88SEric Biggers int err2; 203c93d8f88SEric Biggers 204c93d8f88SEric Biggers if (desc != NULL) { 205c93d8f88SEric Biggers /* Succeeded; write the verity descriptor. */ 206c93d8f88SEric Biggers err = ext4_write_verity_descriptor(inode, desc, desc_size, 207c93d8f88SEric Biggers merkle_tree_size); 208c93d8f88SEric Biggers 209c93d8f88SEric Biggers /* Write all pages before clearing VERITY_IN_PROGRESS. */ 210c93d8f88SEric Biggers if (!err) 211c93d8f88SEric Biggers err = filemap_write_and_wait(inode->i_mapping); 212c93d8f88SEric Biggers } 213c93d8f88SEric Biggers 214c93d8f88SEric Biggers /* If we failed, truncate anything we wrote past i_size. */ 215c93d8f88SEric Biggers if (desc == NULL || err) 216c93d8f88SEric Biggers ext4_truncate(inode); 217c93d8f88SEric Biggers 218c93d8f88SEric Biggers /* 219c93d8f88SEric Biggers * We must always clean up by clearing EXT4_STATE_VERITY_IN_PROGRESS and 220c93d8f88SEric Biggers * deleting the inode from the orphan list, even if something failed. 221c93d8f88SEric Biggers * If everything succeeded, we'll also set the verity bit in the same 222c93d8f88SEric Biggers * transaction. 223c93d8f88SEric Biggers */ 224c93d8f88SEric Biggers 225c93d8f88SEric Biggers ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); 226c93d8f88SEric Biggers 227c93d8f88SEric Biggers handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); 228c93d8f88SEric Biggers if (IS_ERR(handle)) { 229c93d8f88SEric Biggers ext4_orphan_del(NULL, inode); 230c93d8f88SEric Biggers return PTR_ERR(handle); 231c93d8f88SEric Biggers } 232c93d8f88SEric Biggers 233c93d8f88SEric Biggers err2 = ext4_orphan_del(handle, inode); 234c93d8f88SEric Biggers if (err2) 235c93d8f88SEric Biggers goto out_stop; 236c93d8f88SEric Biggers 237c93d8f88SEric Biggers if (desc != NULL && !err) { 238c93d8f88SEric Biggers struct ext4_iloc iloc; 239c93d8f88SEric Biggers 240c93d8f88SEric Biggers err = ext4_reserve_inode_write(handle, inode, &iloc); 241c93d8f88SEric Biggers if (err) 242c93d8f88SEric Biggers goto out_stop; 243c93d8f88SEric Biggers ext4_set_inode_flag(inode, EXT4_INODE_VERITY); 244c93d8f88SEric Biggers ext4_set_inode_flags(inode); 245c93d8f88SEric Biggers err = ext4_mark_iloc_dirty(handle, inode, &iloc); 246c93d8f88SEric Biggers } 247c93d8f88SEric Biggers out_stop: 248c93d8f88SEric Biggers ext4_journal_stop(handle); 249c93d8f88SEric Biggers return err ?: err2; 250c93d8f88SEric Biggers } 251c93d8f88SEric Biggers 252c93d8f88SEric Biggers static int ext4_get_verity_descriptor_location(struct inode *inode, 253c93d8f88SEric Biggers size_t *desc_size_ret, 254c93d8f88SEric Biggers u64 *desc_pos_ret) 255c93d8f88SEric Biggers { 256c93d8f88SEric Biggers struct ext4_ext_path *path; 257c93d8f88SEric Biggers struct ext4_extent *last_extent; 258c93d8f88SEric Biggers u32 end_lblk; 259c93d8f88SEric Biggers u64 desc_size_pos; 260c93d8f88SEric Biggers __le32 desc_size_disk; 261c93d8f88SEric Biggers u32 desc_size; 262c93d8f88SEric Biggers u64 desc_pos; 263c93d8f88SEric Biggers int err; 264c93d8f88SEric Biggers 265c93d8f88SEric Biggers /* 266c93d8f88SEric Biggers * Descriptor size is in last 4 bytes of last allocated block. 267c93d8f88SEric Biggers * See ext4_write_verity_descriptor(). 268c93d8f88SEric Biggers */ 269c93d8f88SEric Biggers 270c93d8f88SEric Biggers if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 271c93d8f88SEric Biggers EXT4_ERROR_INODE(inode, "verity file doesn't use extents"); 272c93d8f88SEric Biggers return -EFSCORRUPTED; 273c93d8f88SEric Biggers } 274c93d8f88SEric Biggers 275c93d8f88SEric Biggers path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); 276c93d8f88SEric Biggers if (IS_ERR(path)) 277c93d8f88SEric Biggers return PTR_ERR(path); 278c93d8f88SEric Biggers 279c93d8f88SEric Biggers last_extent = path[path->p_depth].p_ext; 280c93d8f88SEric Biggers if (!last_extent) { 281c93d8f88SEric Biggers EXT4_ERROR_INODE(inode, "verity file has no extents"); 282c93d8f88SEric Biggers ext4_ext_drop_refs(path); 283c93d8f88SEric Biggers kfree(path); 284c93d8f88SEric Biggers return -EFSCORRUPTED; 285c93d8f88SEric Biggers } 286c93d8f88SEric Biggers 287c93d8f88SEric Biggers end_lblk = le32_to_cpu(last_extent->ee_block) + 288c93d8f88SEric Biggers ext4_ext_get_actual_len(last_extent); 289c93d8f88SEric Biggers desc_size_pos = (u64)end_lblk << inode->i_blkbits; 290c93d8f88SEric Biggers ext4_ext_drop_refs(path); 291c93d8f88SEric Biggers kfree(path); 292c93d8f88SEric Biggers 293c93d8f88SEric Biggers if (desc_size_pos < sizeof(desc_size_disk)) 294c93d8f88SEric Biggers goto bad; 295c93d8f88SEric Biggers desc_size_pos -= sizeof(desc_size_disk); 296c93d8f88SEric Biggers 297c93d8f88SEric Biggers err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk), 298c93d8f88SEric Biggers desc_size_pos); 299c93d8f88SEric Biggers if (err) 300c93d8f88SEric Biggers return err; 301c93d8f88SEric Biggers desc_size = le32_to_cpu(desc_size_disk); 302c93d8f88SEric Biggers 303c93d8f88SEric Biggers /* 304c93d8f88SEric Biggers * The descriptor is stored just before the desc_size_disk, but starting 305c93d8f88SEric Biggers * on a filesystem block boundary. 306c93d8f88SEric Biggers */ 307c93d8f88SEric Biggers 308c93d8f88SEric Biggers if (desc_size > INT_MAX || desc_size > desc_size_pos) 309c93d8f88SEric Biggers goto bad; 310c93d8f88SEric Biggers 311c93d8f88SEric Biggers desc_pos = round_down(desc_size_pos - desc_size, i_blocksize(inode)); 312c93d8f88SEric Biggers if (desc_pos < ext4_verity_metadata_pos(inode)) 313c93d8f88SEric Biggers goto bad; 314c93d8f88SEric Biggers 315c93d8f88SEric Biggers *desc_size_ret = desc_size; 316c93d8f88SEric Biggers *desc_pos_ret = desc_pos; 317c93d8f88SEric Biggers return 0; 318c93d8f88SEric Biggers 319c93d8f88SEric Biggers bad: 320c93d8f88SEric Biggers EXT4_ERROR_INODE(inode, "verity file corrupted; can't find descriptor"); 321c93d8f88SEric Biggers return -EFSCORRUPTED; 322c93d8f88SEric Biggers } 323c93d8f88SEric Biggers 324c93d8f88SEric Biggers static int ext4_get_verity_descriptor(struct inode *inode, void *buf, 325c93d8f88SEric Biggers size_t buf_size) 326c93d8f88SEric Biggers { 327c93d8f88SEric Biggers size_t desc_size = 0; 328c93d8f88SEric Biggers u64 desc_pos = 0; 329c93d8f88SEric Biggers int err; 330c93d8f88SEric Biggers 331c93d8f88SEric Biggers err = ext4_get_verity_descriptor_location(inode, &desc_size, &desc_pos); 332c93d8f88SEric Biggers if (err) 333c93d8f88SEric Biggers return err; 334c93d8f88SEric Biggers 335c93d8f88SEric Biggers if (buf_size) { 336c93d8f88SEric Biggers if (desc_size > buf_size) 337c93d8f88SEric Biggers return -ERANGE; 338c93d8f88SEric Biggers err = pagecache_read(inode, buf, desc_size, desc_pos); 339c93d8f88SEric Biggers if (err) 340c93d8f88SEric Biggers return err; 341c93d8f88SEric Biggers } 342c93d8f88SEric Biggers return desc_size; 343c93d8f88SEric Biggers } 344c93d8f88SEric Biggers 345c93d8f88SEric Biggers static struct page *ext4_read_merkle_tree_page(struct inode *inode, 346c93d8f88SEric Biggers pgoff_t index) 347c93d8f88SEric Biggers { 348c93d8f88SEric Biggers index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; 349c93d8f88SEric Biggers 350c93d8f88SEric Biggers return read_mapping_page(inode->i_mapping, index, NULL); 351c93d8f88SEric Biggers } 352c93d8f88SEric Biggers 353c93d8f88SEric Biggers static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf, 354c93d8f88SEric Biggers u64 index, int log_blocksize) 355c93d8f88SEric Biggers { 356c93d8f88SEric Biggers loff_t pos = ext4_verity_metadata_pos(inode) + (index << log_blocksize); 357c93d8f88SEric Biggers 358c93d8f88SEric Biggers return pagecache_write(inode, buf, 1 << log_blocksize, pos); 359c93d8f88SEric Biggers } 360c93d8f88SEric Biggers 361c93d8f88SEric Biggers const struct fsverity_operations ext4_verityops = { 362c93d8f88SEric Biggers .begin_enable_verity = ext4_begin_enable_verity, 363c93d8f88SEric Biggers .end_enable_verity = ext4_end_enable_verity, 364c93d8f88SEric Biggers .get_verity_descriptor = ext4_get_verity_descriptor, 365c93d8f88SEric Biggers .read_merkle_tree_page = ext4_read_merkle_tree_page, 366c93d8f88SEric Biggers .write_merkle_tree_block = ext4_write_merkle_tree_block, 367c93d8f88SEric Biggers }; 368