inode.c (9f4813b531a0b8cc502fcfb142937fe4e9104d77) | inode.c (4ea99936a1630f51fc3a2d61a58ec4a1c4b7d55a) |
---|---|
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * linux/fs/ext4/inode.c 4 * 5 * Copyright (C) 1992, 1993, 1994, 1995 6 * Remy Card (card@masi.ibp.fr) 7 * Laboratoire MASI - Institut Blaise Pascal 8 * Universite Pierre et Marie Curie (Paris VI) --- 150 unchanged lines hidden (view full) --- 159 160 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); 161 } 162 return S_ISLNK(inode->i_mode) && inode->i_size && 163 (inode->i_size < EXT4_N_BLOCKS * 4); 164} 165 166/* | 1// SPDX-License-Identifier: GPL-2.0 2/* 3 * linux/fs/ext4/inode.c 4 * 5 * Copyright (C) 1992, 1993, 1994, 1995 6 * Remy Card (card@masi.ibp.fr) 7 * Laboratoire MASI - Institut Blaise Pascal 8 * Universite Pierre et Marie Curie (Paris VI) --- 150 unchanged lines hidden (view full) --- 159 160 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); 161 } 162 return S_ISLNK(inode->i_mode) && inode->i_size && 163 (inode->i_size < EXT4_N_BLOCKS * 4); 164} 165 166/* |
167 * Restart the transaction associated with *handle. This does a commit, 168 * so before we call here everything must be consistently dirtied against 169 * this transaction. 170 */ 171int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, 172 int nblocks) 173{ 174 int ret; 175 176 /* 177 * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this 178 * moment, get_block can be called only for blocks inside i_size since 179 * page cache has been already dropped and writes are blocked by 180 * i_mutex. So we can safely drop the i_data_sem here. 181 */ 182 BUG_ON(EXT4_JOURNAL(inode) == NULL); 183 jbd_debug(2, "restarting handle %p\n", handle); 184 up_write(&EXT4_I(inode)->i_data_sem); 185 ret = ext4_journal_restart(handle, nblocks); 186 down_write(&EXT4_I(inode)->i_data_sem); 187 ext4_discard_preallocations(inode); 188 189 return ret; 190} 191 192/* | |
193 * Called at the last iput() if i_nlink is zero. 194 */ 195void ext4_evict_inode(struct inode *inode) 196{ 197 handle_t *handle; 198 int err; | 167 * Called at the last iput() if i_nlink is zero. 168 */ 169void ext4_evict_inode(struct inode *inode) 170{ 171 handle_t *handle; 172 int err; |
199 int extra_credits = 3; | 173 /* 174 * Credits for final inode cleanup and freeing: 175 * sb + inode (ext4_orphan_del()), block bitmap, group descriptor 176 * (xattr block freeing), bitmap, group descriptor (inode freeing) 177 */ 178 int extra_credits = 6; |
200 struct ext4_xattr_inode_array *ea_inode_array = NULL; 201 202 trace_ext4_evict_inode(inode); 203 204 if (inode->i_nlink) { 205 /* 206 * When journalling data dirty buffers are tracked only in the 207 * journal. So although mm thinks everything is clean and --- 39 unchanged lines hidden (view full) --- 247 * Protect us against freezing - iput() caller didn't have to have any 248 * protection against it 249 */ 250 sb_start_intwrite(inode->i_sb); 251 252 if (!IS_NOQUOTA(inode)) 253 extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb); 254 | 179 struct ext4_xattr_inode_array *ea_inode_array = NULL; 180 181 trace_ext4_evict_inode(inode); 182 183 if (inode->i_nlink) { 184 /* 185 * When journalling data dirty buffers are tracked only in the 186 * journal. So although mm thinks everything is clean and --- 39 unchanged lines hidden (view full) --- 226 * Protect us against freezing - iput() caller didn't have to have any 227 * protection against it 228 */ 229 sb_start_intwrite(inode->i_sb); 230 231 if (!IS_NOQUOTA(inode)) 232 extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb); 233 |
234 /* 235 * Block bitmap, group descriptor, and inode are accounted in both 236 * ext4_blocks_for_truncate() and extra_credits. So subtract 3. 237 */ |
|
255 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, | 238 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, |
256 ext4_blocks_for_truncate(inode)+extra_credits); | 239 ext4_blocks_for_truncate(inode) + extra_credits - 3); |
257 if (IS_ERR(handle)) { 258 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 259 /* 260 * If we're going to skip the normal cleanup, we still need to 261 * make sure that the in-core orphan linked list is properly 262 * cleaned up. 263 */ 264 ext4_orphan_del(NULL, inode); --- 557 unchanged lines hidden (view full) --- 822 return _ext4_get_block(inode, iblock, bh_result, 823 EXT4_GET_BLOCKS_IO_CREATE_EXT); 824} 825 826/* Maximum number of blocks we map for direct IO at once. */ 827#define DIO_MAX_BLOCKS 4096 828 829/* | 240 if (IS_ERR(handle)) { 241 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 242 /* 243 * If we're going to skip the normal cleanup, we still need to 244 * make sure that the in-core orphan linked list is properly 245 * cleaned up. 246 */ 247 ext4_orphan_del(NULL, inode); --- 557 unchanged lines hidden (view full) --- 805 return _ext4_get_block(inode, iblock, bh_result, 806 EXT4_GET_BLOCKS_IO_CREATE_EXT); 807} 808 809/* Maximum number of blocks we map for direct IO at once. */ 810#define DIO_MAX_BLOCKS 4096 811 812/* |
830 * Get blocks function for the cases that need to start a transaction - 831 * generally difference cases of direct IO and DAX IO. It also handles retries 832 * in case of ENOSPC. 833 */ 834static int ext4_get_block_trans(struct inode *inode, sector_t iblock, 835 struct buffer_head *bh_result, int flags) 836{ 837 int dio_credits; 838 handle_t *handle; 839 int retries = 0; 840 int ret; 841 842 /* Trim mapping request to maximum we can map at once for DIO */ 843 if (bh_result->b_size >> inode->i_blkbits > DIO_MAX_BLOCKS) 844 bh_result->b_size = DIO_MAX_BLOCKS << inode->i_blkbits; 845 dio_credits = ext4_chunk_trans_blocks(inode, 846 bh_result->b_size >> inode->i_blkbits); 847retry: 848 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits); 849 if (IS_ERR(handle)) 850 return PTR_ERR(handle); 851 852 ret = _ext4_get_block(inode, iblock, bh_result, flags); 853 ext4_journal_stop(handle); 854 855 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 856 goto retry; 857 return ret; 858} 859 860/* Get block function for DIO reads and writes to inodes without extents */ 861int ext4_dio_get_block(struct inode *inode, sector_t iblock, 862 struct buffer_head *bh, int create) 863{ 864 /* We don't expect handle for direct IO */ 865 WARN_ON_ONCE(ext4_journal_current_handle()); 866 867 if (!create) 868 return _ext4_get_block(inode, iblock, bh, 0); 869 return ext4_get_block_trans(inode, iblock, bh, EXT4_GET_BLOCKS_CREATE); 870} 871 872/* 873 * Get block function for AIO DIO writes when we create unwritten extent if 874 * blocks are not allocated yet. The extent will be converted to written 875 * after IO is complete. 876 */ 877static int ext4_dio_get_block_unwritten_async(struct inode *inode, 878 sector_t iblock, struct buffer_head *bh_result, int create) 879{ 880 int ret; 881 882 /* We don't expect handle for direct IO */ 883 WARN_ON_ONCE(ext4_journal_current_handle()); 884 885 ret = ext4_get_block_trans(inode, iblock, bh_result, 886 EXT4_GET_BLOCKS_IO_CREATE_EXT); 887 888 /* 889 * When doing DIO using unwritten extents, we need io_end to convert 890 * unwritten extents to written on IO completion. We allocate io_end 891 * once we spot unwritten extent and store it in b_private. Generic 892 * DIO code keeps b_private set and furthermore passes the value to 893 * our completion callback in 'private' argument. 894 */ 895 if (!ret && buffer_unwritten(bh_result)) { 896 if (!bh_result->b_private) { 897 ext4_io_end_t *io_end; 898 899 io_end = ext4_init_io_end(inode, GFP_KERNEL); 900 if (!io_end) 901 return -ENOMEM; 902 bh_result->b_private = io_end; 903 ext4_set_io_unwritten_flag(inode, io_end); 904 } 905 set_buffer_defer_completion(bh_result); 906 } 907 908 return ret; 909} 910 911/* 912 * Get block function for non-AIO DIO writes when we create unwritten extent if 913 * blocks are not allocated yet. The extent will be converted to written 914 * after IO is complete by ext4_direct_IO_write(). 915 */ 916static int ext4_dio_get_block_unwritten_sync(struct inode *inode, 917 sector_t iblock, struct buffer_head *bh_result, int create) 918{ 919 int ret; 920 921 /* We don't expect handle for direct IO */ 922 WARN_ON_ONCE(ext4_journal_current_handle()); 923 924 ret = ext4_get_block_trans(inode, iblock, bh_result, 925 EXT4_GET_BLOCKS_IO_CREATE_EXT); 926 927 /* 928 * Mark inode as having pending DIO writes to unwritten extents. 929 * ext4_direct_IO_write() checks this flag and converts extents to 930 * written. 931 */ 932 if (!ret && buffer_unwritten(bh_result)) 933 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 934 935 return ret; 936} 937 938static int ext4_dio_get_block_overwrite(struct inode *inode, sector_t iblock, 939 struct buffer_head *bh_result, int create) 940{ 941 int ret; 942 943 ext4_debug("ext4_dio_get_block_overwrite: inode %lu, create flag %d\n", 944 inode->i_ino, create); 945 /* We don't expect handle for direct IO */ 946 WARN_ON_ONCE(ext4_journal_current_handle()); 947 948 ret = _ext4_get_block(inode, iblock, bh_result, 0); 949 /* 950 * Blocks should have been preallocated! ext4_file_write_iter() checks 951 * that. 952 */ 953 WARN_ON_ONCE(!buffer_mapped(bh_result) || buffer_unwritten(bh_result)); 954 955 return ret; 956} 957 958 959/* | |
960 * `handle' can be NULL if create is zero 961 */ 962struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, 963 ext4_lblk_t block, int map_flags) 964{ 965 struct ext4_map_blocks map; 966 struct buffer_head *bh; 967 int create = map_flags & EXT4_GET_BLOCKS_CREATE; --- 1368 unchanged lines hidden (view full) --- 2336 err = mpage_submit_page(mpd, head->b_page); 2337 if (err < 0) 2338 return err; 2339 } 2340 return lblk < blocks; 2341} 2342 2343/* | 813 * `handle' can be NULL if create is zero 814 */ 815struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, 816 ext4_lblk_t block, int map_flags) 817{ 818 struct ext4_map_blocks map; 819 struct buffer_head *bh; 820 int create = map_flags & EXT4_GET_BLOCKS_CREATE; --- 1368 unchanged lines hidden (view full) --- 2189 err = mpage_submit_page(mpd, head->b_page); 2190 if (err < 0) 2191 return err; 2192 } 2193 return lblk < blocks; 2194} 2195 2196/* |
2197 * mpage_process_page - update page buffers corresponding to changed extent and 2198 * may submit fully mapped page for IO 2199 * 2200 * @mpd - description of extent to map, on return next extent to map 2201 * @m_lblk - logical block mapping. 2202 * @m_pblk - corresponding physical mapping. 2203 * @map_bh - determines on return whether this page requires any further 2204 * mapping or not. 2205 * Scan given page buffers corresponding to changed extent and update buffer 2206 * state according to new extent state. 2207 * We map delalloc buffers to their physical location, clear unwritten bits. 2208 * If the given page is not fully mapped, we update @map to the next extent in 2209 * the given page that needs mapping & return @map_bh as true. 2210 */ 2211static int mpage_process_page(struct mpage_da_data *mpd, struct page *page, 2212 ext4_lblk_t *m_lblk, ext4_fsblk_t *m_pblk, 2213 bool *map_bh) 2214{ 2215 struct buffer_head *head, *bh; 2216 ext4_io_end_t *io_end = mpd->io_submit.io_end; 2217 ext4_lblk_t lblk = *m_lblk; 2218 ext4_fsblk_t pblock = *m_pblk; 2219 int err = 0; 2220 int blkbits = mpd->inode->i_blkbits; 2221 ssize_t io_end_size = 0; 2222 struct ext4_io_end_vec *io_end_vec = ext4_last_io_end_vec(io_end); 2223 2224 bh = head = page_buffers(page); 2225 do { 2226 if (lblk < mpd->map.m_lblk) 2227 continue; 2228 if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { 2229 /* 2230 * Buffer after end of mapped extent. 2231 * Find next buffer in the page to map. 2232 */ 2233 mpd->map.m_len = 0; 2234 mpd->map.m_flags = 0; 2235 io_end_vec->size += io_end_size; 2236 io_end_size = 0; 2237 2238 err = mpage_process_page_bufs(mpd, head, bh, lblk); 2239 if (err > 0) 2240 err = 0; 2241 if (!err && mpd->map.m_len && mpd->map.m_lblk > lblk) { 2242 io_end_vec = ext4_alloc_io_end_vec(io_end); 2243 io_end_vec->offset = mpd->map.m_lblk << blkbits; 2244 } 2245 *map_bh = true; 2246 goto out; 2247 } 2248 if (buffer_delay(bh)) { 2249 clear_buffer_delay(bh); 2250 bh->b_blocknr = pblock++; 2251 } 2252 clear_buffer_unwritten(bh); 2253 io_end_size += (1 << blkbits); 2254 } while (lblk++, (bh = bh->b_this_page) != head); 2255 2256 io_end_vec->size += io_end_size; 2257 io_end_size = 0; 2258 *map_bh = false; 2259out: 2260 *m_lblk = lblk; 2261 *m_pblk = pblock; 2262 return err; 2263} 2264 2265/* |
|
2344 * mpage_map_buffers - update buffers corresponding to changed extent and 2345 * submit fully mapped pages for IO 2346 * 2347 * @mpd - description of extent to map, on return next extent to map 2348 * 2349 * Scan buffers corresponding to changed extent (we expect corresponding pages 2350 * to be already locked) and update buffer state according to new extent state. 2351 * We map delalloc buffers to their physical location, clear unwritten bits, 2352 * and mark buffers as uninit when we perform writes to unwritten extents 2353 * and do extent conversion after IO is finished. If the last page is not fully 2354 * mapped, we update @map to the next extent in the last page that needs 2355 * mapping. Otherwise we submit the page for IO. 2356 */ 2357static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) 2358{ 2359 struct pagevec pvec; 2360 int nr_pages, i; 2361 struct inode *inode = mpd->inode; | 2266 * mpage_map_buffers - update buffers corresponding to changed extent and 2267 * submit fully mapped pages for IO 2268 * 2269 * @mpd - description of extent to map, on return next extent to map 2270 * 2271 * Scan buffers corresponding to changed extent (we expect corresponding pages 2272 * to be already locked) and update buffer state according to new extent state. 2273 * We map delalloc buffers to their physical location, clear unwritten bits, 2274 * and mark buffers as uninit when we perform writes to unwritten extents 2275 * and do extent conversion after IO is finished. If the last page is not fully 2276 * mapped, we update @map to the next extent in the last page that needs 2277 * mapping. Otherwise we submit the page for IO. 2278 */ 2279static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) 2280{ 2281 struct pagevec pvec; 2282 int nr_pages, i; 2283 struct inode *inode = mpd->inode; |
2362 struct buffer_head *head, *bh; | |
2363 int bpp_bits = PAGE_SHIFT - inode->i_blkbits; 2364 pgoff_t start, end; 2365 ext4_lblk_t lblk; | 2284 int bpp_bits = PAGE_SHIFT - inode->i_blkbits; 2285 pgoff_t start, end; 2286 ext4_lblk_t lblk; |
2366 sector_t pblock; | 2287 ext4_fsblk_t pblock; |
2367 int err; | 2288 int err; |
2289 bool map_bh = false; |
|
2368 2369 start = mpd->map.m_lblk >> bpp_bits; 2370 end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits; 2371 lblk = start << bpp_bits; 2372 pblock = mpd->map.m_pblk; 2373 2374 pagevec_init(&pvec); 2375 while (start <= end) { 2376 nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, 2377 &start, end); 2378 if (nr_pages == 0) 2379 break; 2380 for (i = 0; i < nr_pages; i++) { 2381 struct page *page = pvec.pages[i]; 2382 | 2290 2291 start = mpd->map.m_lblk >> bpp_bits; 2292 end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits; 2293 lblk = start << bpp_bits; 2294 pblock = mpd->map.m_pblk; 2295 2296 pagevec_init(&pvec); 2297 while (start <= end) { 2298 nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, 2299 &start, end); 2300 if (nr_pages == 0) 2301 break; 2302 for (i = 0; i < nr_pages; i++) { 2303 struct page *page = pvec.pages[i]; 2304 |
2383 bh = head = page_buffers(page); 2384 do { 2385 if (lblk < mpd->map.m_lblk) 2386 continue; 2387 if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { 2388 /* 2389 * Buffer after end of mapped extent. 2390 * Find next buffer in the page to map. 2391 */ 2392 mpd->map.m_len = 0; 2393 mpd->map.m_flags = 0; 2394 /* 2395 * FIXME: If dioread_nolock supports 2396 * blocksize < pagesize, we need to make 2397 * sure we add size mapped so far to 2398 * io_end->size as the following call 2399 * can submit the page for IO. 2400 */ 2401 err = mpage_process_page_bufs(mpd, head, 2402 bh, lblk); 2403 pagevec_release(&pvec); 2404 if (err > 0) 2405 err = 0; 2406 return err; 2407 } 2408 if (buffer_delay(bh)) { 2409 clear_buffer_delay(bh); 2410 bh->b_blocknr = pblock++; 2411 } 2412 clear_buffer_unwritten(bh); 2413 } while (lblk++, (bh = bh->b_this_page) != head); 2414 | 2305 err = mpage_process_page(mpd, page, &lblk, &pblock, 2306 &map_bh); |
2415 /* | 2307 /* |
2416 * FIXME: This is going to break if dioread_nolock 2417 * supports blocksize < pagesize as we will try to 2418 * convert potentially unmapped parts of inode. | 2308 * If map_bh is true, means page may require further bh 2309 * mapping, or maybe the page was submitted for IO. 2310 * So we return to call further extent mapping. |
2419 */ | 2311 */ |
2420 mpd->io_submit.io_end->size += PAGE_SIZE; | 2312 if (err < 0 || map_bh == true) 2313 goto out; |
2421 /* Page fully mapped - let IO run! */ 2422 err = mpage_submit_page(mpd, page); | 2314 /* Page fully mapped - let IO run! */ 2315 err = mpage_submit_page(mpd, page); |
2423 if (err < 0) { 2424 pagevec_release(&pvec); 2425 return err; 2426 } | 2316 if (err < 0) 2317 goto out; |
2427 } 2428 pagevec_release(&pvec); 2429 } 2430 /* Extent fully mapped and matches with page boundary. We are done. */ 2431 mpd->map.m_len = 0; 2432 mpd->map.m_flags = 0; 2433 return 0; | 2318 } 2319 pagevec_release(&pvec); 2320 } 2321 /* Extent fully mapped and matches with page boundary. We are done. */ 2322 mpd->map.m_len = 0; 2323 mpd->map.m_flags = 0; 2324 return 0; |
2325out: 2326 pagevec_release(&pvec); 2327 return err; |
|
2434} 2435 2436static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) 2437{ 2438 struct inode *inode = mpd->inode; 2439 struct ext4_map_blocks *map = &mpd->map; 2440 int get_blocks_flags; 2441 int err, dioread_nolock; --- 63 unchanged lines hidden (view full) --- 2505 struct mpage_da_data *mpd, 2506 bool *give_up_on_write) 2507{ 2508 struct inode *inode = mpd->inode; 2509 struct ext4_map_blocks *map = &mpd->map; 2510 int err; 2511 loff_t disksize; 2512 int progress = 0; | 2328} 2329 2330static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) 2331{ 2332 struct inode *inode = mpd->inode; 2333 struct ext4_map_blocks *map = &mpd->map; 2334 int get_blocks_flags; 2335 int err, dioread_nolock; --- 63 unchanged lines hidden (view full) --- 2399 struct mpage_da_data *mpd, 2400 bool *give_up_on_write) 2401{ 2402 struct inode *inode = mpd->inode; 2403 struct ext4_map_blocks *map = &mpd->map; 2404 int err; 2405 loff_t disksize; 2406 int progress = 0; |
2407 ext4_io_end_t *io_end = mpd->io_submit.io_end; 2408 struct ext4_io_end_vec *io_end_vec = ext4_alloc_io_end_vec(io_end); |
|
2513 | 2409 |
2514 mpd->io_submit.io_end->offset = 2515 ((loff_t)map->m_lblk) << inode->i_blkbits; | 2410 io_end_vec->offset = ((loff_t)map->m_lblk) << inode->i_blkbits; |
2516 do { 2517 err = mpage_map_one_extent(handle, mpd); 2518 if (err < 0) { 2519 struct super_block *sb = inode->i_sb; 2520 2521 if (ext4_forced_shutdown(EXT4_SB(sb)) || 2522 EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) 2523 goto invalidate_dirty_pages; --- 877 unchanged lines hidden (view full) --- 3401 return !jbd2_transaction_committed(journal, 3402 EXT4_I(inode)->i_datasync_tid); 3403 /* Any metadata buffers to write? */ 3404 if (!list_empty(&inode->i_mapping->private_list)) 3405 return true; 3406 return inode->i_state & I_DIRTY_DATASYNC; 3407} 3408 | 2411 do { 2412 err = mpage_map_one_extent(handle, mpd); 2413 if (err < 0) { 2414 struct super_block *sb = inode->i_sb; 2415 2416 if (ext4_forced_shutdown(EXT4_SB(sb)) || 2417 EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) 2418 goto invalidate_dirty_pages; --- 877 unchanged lines hidden (view full) --- 3296 return !jbd2_transaction_committed(journal, 3297 EXT4_I(inode)->i_datasync_tid); 3298 /* Any metadata buffers to write? */ 3299 if (!list_empty(&inode->i_mapping->private_list)) 3300 return true; 3301 return inode->i_state & I_DIRTY_DATASYNC; 3302} 3303 |
3409static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 3410 unsigned flags, struct iomap *iomap) | 3304static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, 3305 struct ext4_map_blocks *map, loff_t offset, 3306 loff_t length) |
3411{ | 3307{ |
3412 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 3413 unsigned int blkbits = inode->i_blkbits; 3414 unsigned long first_block, last_block; 3415 struct ext4_map_blocks map; 3416 bool delalloc = false; 3417 int ret; | 3308 u8 blkbits = inode->i_blkbits; |
3418 | 3309 |
3419 if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK) 3420 return -EINVAL; 3421 first_block = offset >> blkbits; 3422 last_block = min_t(loff_t, (offset + length - 1) >> blkbits, 3423 EXT4_MAX_LOGICAL_BLOCK); 3424 3425 if (flags & IOMAP_REPORT) { 3426 if (ext4_has_inline_data(inode)) { 3427 ret = ext4_inline_data_iomap(inode, iomap); 3428 if (ret != -EAGAIN) { 3429 if (ret == 0 && offset >= iomap->length) 3430 ret = -ENOENT; 3431 return ret; 3432 } 3433 } 3434 } else { 3435 if (WARN_ON_ONCE(ext4_has_inline_data(inode))) 3436 return -ERANGE; 3437 } 3438 3439 map.m_lblk = first_block; 3440 map.m_len = last_block - first_block + 1; 3441 3442 if (flags & IOMAP_REPORT) { 3443 ret = ext4_map_blocks(NULL, inode, &map, 0); 3444 if (ret < 0) 3445 return ret; 3446 3447 if (ret == 0) { 3448 ext4_lblk_t end = map.m_lblk + map.m_len - 1; 3449 struct extent_status es; 3450 3451 ext4_es_find_extent_range(inode, &ext4_es_is_delayed, 3452 map.m_lblk, end, &es); 3453 3454 if (!es.es_len || es.es_lblk > end) { 3455 /* entire range is a hole */ 3456 } else if (es.es_lblk > map.m_lblk) { 3457 /* range starts with a hole */ 3458 map.m_len = es.es_lblk - map.m_lblk; 3459 } else { 3460 ext4_lblk_t offs = 0; 3461 3462 if (es.es_lblk < map.m_lblk) 3463 offs = map.m_lblk - es.es_lblk; 3464 map.m_lblk = es.es_lblk + offs; 3465 map.m_len = es.es_len - offs; 3466 delalloc = true; 3467 } 3468 } 3469 } else if (flags & IOMAP_WRITE) { 3470 int dio_credits; 3471 handle_t *handle; 3472 int retries = 0; 3473 3474 /* Trim mapping request to maximum we can map at once for DIO */ 3475 if (map.m_len > DIO_MAX_BLOCKS) 3476 map.m_len = DIO_MAX_BLOCKS; 3477 dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); 3478retry: 3479 /* 3480 * Either we allocate blocks and then we don't get unwritten 3481 * extent so we have reserved enough credits, or the blocks 3482 * are already allocated and unwritten and in that case 3483 * extent conversion fits in the credits as well. 3484 */ 3485 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, 3486 dio_credits); 3487 if (IS_ERR(handle)) 3488 return PTR_ERR(handle); 3489 3490 ret = ext4_map_blocks(handle, inode, &map, 3491 EXT4_GET_BLOCKS_CREATE_ZERO); 3492 if (ret < 0) { 3493 ext4_journal_stop(handle); 3494 if (ret == -ENOSPC && 3495 ext4_should_retry_alloc(inode->i_sb, &retries)) 3496 goto retry; 3497 return ret; 3498 } 3499 3500 /* 3501 * If we added blocks beyond i_size, we need to make sure they 3502 * will get truncated if we crash before updating i_size in 3503 * ext4_iomap_end(). For faults we don't need to do that (and 3504 * even cannot because for orphan list operations inode_lock is 3505 * required) - if we happen to instantiate block beyond i_size, 3506 * it is because we race with truncate which has already added 3507 * the inode to the orphan list. 3508 */ 3509 if (!(flags & IOMAP_FAULT) && first_block + map.m_len > 3510 (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) { 3511 int err; 3512 3513 err = ext4_orphan_add(handle, inode); 3514 if (err < 0) { 3515 ext4_journal_stop(handle); 3516 return err; 3517 } 3518 } 3519 ext4_journal_stop(handle); 3520 } else { 3521 ret = ext4_map_blocks(NULL, inode, &map, 0); 3522 if (ret < 0) 3523 return ret; 3524 } 3525 | 3310 /* 3311 * Writes that span EOF might trigger an I/O size update on completion, 3312 * so consider them to be dirty for the purpose of O_DSYNC, even if 3313 * there is no other metadata changes being made or are pending. 3314 */ |
3526 iomap->flags = 0; | 3315 iomap->flags = 0; |
3527 if (ext4_inode_datasync_dirty(inode)) | 3316 if (ext4_inode_datasync_dirty(inode) || 3317 offset + length > i_size_read(inode)) |
3528 iomap->flags |= IOMAP_F_DIRTY; | 3318 iomap->flags |= IOMAP_F_DIRTY; |
3319 3320 if (map->m_flags & EXT4_MAP_NEW) 3321 iomap->flags |= IOMAP_F_NEW; 3322 |
|
3529 iomap->bdev = inode->i_sb->s_bdev; | 3323 iomap->bdev = inode->i_sb->s_bdev; |
3530 iomap->dax_dev = sbi->s_daxdev; 3531 iomap->offset = (u64)first_block << blkbits; 3532 iomap->length = (u64)map.m_len << blkbits; | 3324 iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; 3325 iomap->offset = (u64) map->m_lblk << blkbits; 3326 iomap->length = (u64) map->m_len << blkbits; |
3533 | 3327 |
3534 if (ret == 0) { 3535 iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE; 3536 iomap->addr = IOMAP_NULL_ADDR; | 3328 /* 3329 * Flags passed to ext4_map_blocks() for direct I/O writes can result 3330 * in m_flags having both EXT4_MAP_MAPPED and EXT4_MAP_UNWRITTEN bits 3331 * set. In order for any allocated unwritten extents to be converted 3332 * into written extents correctly within the ->end_io() handler, we 3333 * need to ensure that the iomap->type is set appropriately. Hence, the 3334 * reason why we need to check whether the EXT4_MAP_UNWRITTEN bit has 3335 * been set first. 3336 */ 3337 if (map->m_flags & EXT4_MAP_UNWRITTEN) { 3338 iomap->type = IOMAP_UNWRITTEN; 3339 iomap->addr = (u64) map->m_pblk << blkbits; 3340 } else if (map->m_flags & EXT4_MAP_MAPPED) { 3341 iomap->type = IOMAP_MAPPED; 3342 iomap->addr = (u64) map->m_pblk << blkbits; |
3537 } else { | 3343 } else { |
3538 if (map.m_flags & EXT4_MAP_MAPPED) { 3539 iomap->type = IOMAP_MAPPED; 3540 } else if (map.m_flags & EXT4_MAP_UNWRITTEN) { 3541 iomap->type = IOMAP_UNWRITTEN; 3542 } else { 3543 WARN_ON_ONCE(1); 3544 return -EIO; 3545 } 3546 iomap->addr = (u64)map.m_pblk << blkbits; | 3344 iomap->type = IOMAP_HOLE; 3345 iomap->addr = IOMAP_NULL_ADDR; |
3547 } | 3346 } |
3548 3549 if (map.m_flags & EXT4_MAP_NEW) 3550 iomap->flags |= IOMAP_F_NEW; 3551 3552 return 0; | |
3553} 3554 | 3347} 3348 |
3555static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, 3556 ssize_t written, unsigned flags, struct iomap *iomap) | 3349static int ext4_iomap_alloc(struct inode *inode, struct ext4_map_blocks *map, 3350 unsigned int flags) |
3557{ | 3351{ |
3558 int ret = 0; | |
3559 handle_t *handle; | 3352 handle_t *handle; |
3560 int blkbits = inode->i_blkbits; 3561 bool truncate = false; | 3353 u8 blkbits = inode->i_blkbits; 3354 int ret, dio_credits, m_flags = 0, retries = 0; |
3562 | 3355 |
3563 if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT)) 3564 return 0; | 3356 /* 3357 * Trim the mapping request to the maximum value that we can map at 3358 * once for direct I/O. 3359 */ 3360 if (map->m_len > DIO_MAX_BLOCKS) 3361 map->m_len = DIO_MAX_BLOCKS; 3362 dio_credits = ext4_chunk_trans_blocks(inode, map->m_len); |
3565 | 3363 |
3566 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 3567 if (IS_ERR(handle)) { 3568 ret = PTR_ERR(handle); 3569 goto orphan_del; 3570 } 3571 if (ext4_update_inode_size(inode, offset + written)) 3572 ext4_mark_inode_dirty(handle, inode); | 3364retry: |
3573 /* | 3365 /* |
3574 * We may need to truncate allocated but not written blocks beyond EOF. | 3366 * Either we allocate blocks and then don't get an unwritten extent, so 3367 * in that case we have reserved enough credits. Or, the blocks are 3368 * already allocated and unwritten. In that case, the extent conversion 3369 * fits into the credits as well. |
3575 */ | 3370 */ |
3576 if (iomap->offset + iomap->length > 3577 ALIGN(inode->i_size, 1 << blkbits)) { 3578 ext4_lblk_t written_blk, end_blk; | 3371 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits); 3372 if (IS_ERR(handle)) 3373 return PTR_ERR(handle); |
3579 | 3374 |
3580 written_blk = (offset + written) >> blkbits; 3581 end_blk = (offset + length) >> blkbits; 3582 if (written_blk < end_blk && ext4_can_truncate(inode)) 3583 truncate = true; 3584 } | |
3585 /* | 3375 /* |
3586 * Remove inode from orphan list if we were extending a inode and 3587 * everything went fine. | 3376 * DAX and direct I/O are the only two operations that are currently 3377 * supported with IOMAP_WRITE. |
3588 */ | 3378 */ |
3589 if (!truncate && inode->i_nlink && 3590 !list_empty(&EXT4_I(inode)->i_orphan)) 3591 ext4_orphan_del(handle, inode); | 3379 WARN_ON(!IS_DAX(inode) && !(flags & IOMAP_DIRECT)); 3380 if (IS_DAX(inode)) 3381 m_flags = EXT4_GET_BLOCKS_CREATE_ZERO; 3382 /* 3383 * We use i_size instead of i_disksize here because delalloc writeback 3384 * can complete at any point during the I/O and subsequently push the 3385 * i_disksize out to i_size. This could be beyond where direct I/O is 3386 * happening and thus expose allocated blocks to direct I/O reads. 3387 */ 3388 else if ((map->m_lblk * (1 << blkbits)) >= i_size_read(inode)) 3389 m_flags = EXT4_GET_BLOCKS_CREATE; 3390 else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3391 m_flags = EXT4_GET_BLOCKS_IO_CREATE_EXT; 3392 3393 ret = ext4_map_blocks(handle, inode, map, m_flags); 3394 3395 /* 3396 * We cannot fill holes in indirect tree based inodes as that could 3397 * expose stale data in the case of a crash. Use the magic error code 3398 * to fallback to buffered I/O. 3399 */ 3400 if (!m_flags && !ret) 3401 ret = -ENOTBLK; 3402 |
3592 ext4_journal_stop(handle); | 3403 ext4_journal_stop(handle); |
3593 if (truncate) { 3594 ext4_truncate_failed_write(inode); 3595orphan_del: 3596 /* 3597 * If truncate failed early the inode might still be on the 3598 * orphan list; we need to make sure the inode is removed from 3599 * the orphan list in that case. 3600 */ 3601 if (inode->i_nlink) 3602 ext4_orphan_del(NULL, inode); 3603 } | 3404 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3405 goto retry; 3406 |
3604 return ret; 3605} 3606 | 3407 return ret; 3408} 3409 |
3607const struct iomap_ops ext4_iomap_ops = { 3608 .iomap_begin = ext4_iomap_begin, 3609 .iomap_end = ext4_iomap_end, 3610}; | |
3611 | 3410 |
3612static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, 3613 ssize_t size, void *private) | 3411static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 3412 unsigned flags, struct iomap *iomap, struct iomap *srcmap) |
3614{ | 3413{ |
3615 ext4_io_end_t *io_end = private; | 3414 int ret; 3415 struct ext4_map_blocks map; 3416 u8 blkbits = inode->i_blkbits; |
3616 | 3417 |
3617 /* if not async direct IO just return */ 3618 if (!io_end) 3619 return 0; | 3418 if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK) 3419 return -EINVAL; |
3620 | 3420 |
3621 ext_debug("ext4_end_io_dio(): io_end 0x%p " 3622 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", 3623 io_end, io_end->inode->i_ino, iocb, offset, size); | 3421 if (WARN_ON_ONCE(ext4_has_inline_data(inode))) 3422 return -ERANGE; |
3624 3625 /* | 3423 3424 /* |
3626 * Error during AIO DIO. We cannot convert unwritten extents as the 3627 * data was not written. Just clear the unwritten flag and drop io_end. | 3425 * Calculate the first and last logical blocks respectively. |
3628 */ | 3426 */ |
3629 if (size <= 0) { 3630 ext4_clear_io_unwritten_flag(io_end); 3631 size = 0; 3632 } 3633 io_end->offset = offset; 3634 io_end->size = size; 3635 ext4_put_io_end(io_end); | 3427 map.m_lblk = offset >> blkbits; 3428 map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits, 3429 EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1; |
3636 | 3430 |
3637 return 0; 3638} | 3431 if (flags & IOMAP_WRITE) 3432 ret = ext4_iomap_alloc(inode, &map, flags); 3433 else 3434 ret = ext4_map_blocks(NULL, inode, &map, 0); |
3639 | 3435 |
3640/* 3641 * Handling of direct IO writes. 3642 * 3643 * For ext4 extent files, ext4 will do direct-io write even to holes, 3644 * preallocated extents, and those write extend the file, no need to 3645 * fall back to buffered IO. 3646 * 3647 * For holes, we fallocate those blocks, mark them as unwritten 3648 * If those blocks were preallocated, we mark sure they are split, but 3649 * still keep the range to write as unwritten. 3650 * 3651 * The unwritten extents will be converted to written when DIO is completed. 3652 * For async direct IO, since the IO may still pending when return, we 3653 * set up an end_io call back function, which will do the conversion 3654 * when async direct IO completed. 3655 * 3656 * If the O_DIRECT write will extend the file then add this inode to the 3657 * orphan list. So recovery will truncate it back to the original size 3658 * if the machine crashes during the write. 3659 * 3660 */ 3661static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) 3662{ 3663 struct file *file = iocb->ki_filp; 3664 struct inode *inode = file->f_mapping->host; 3665 struct ext4_inode_info *ei = EXT4_I(inode); 3666 ssize_t ret; 3667 loff_t offset = iocb->ki_pos; 3668 size_t count = iov_iter_count(iter); 3669 int overwrite = 0; 3670 get_block_t *get_block_func = NULL; 3671 int dio_flags = 0; 3672 loff_t final_size = offset + count; 3673 int orphan = 0; 3674 handle_t *handle; | 3436 if (ret < 0) 3437 return ret; |
3675 | 3438 |
3676 if (final_size > inode->i_size || final_size > ei->i_disksize) { 3677 /* Credits for sb + inode write */ 3678 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 3679 if (IS_ERR(handle)) { 3680 ret = PTR_ERR(handle); 3681 goto out; 3682 } 3683 ret = ext4_orphan_add(handle, inode); 3684 if (ret) { 3685 ext4_journal_stop(handle); 3686 goto out; 3687 } 3688 orphan = 1; 3689 ext4_update_i_disksize(inode, inode->i_size); 3690 ext4_journal_stop(handle); 3691 } | 3439 ext4_set_iomap(inode, iomap, &map, offset, length); |
3692 | 3440 |
3693 BUG_ON(iocb->private == NULL); | 3441 return 0; 3442} |
3694 | 3443 |
3444static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, 3445 ssize_t written, unsigned flags, struct iomap *iomap) 3446{ |
|
3695 /* | 3447 /* |
3696 * Make all waiters for direct IO properly wait also for extent 3697 * conversion. This also disallows race between truncate() and 3698 * overwrite DIO as i_dio_count needs to be incremented under i_mutex. | 3448 * Check to see whether an error occurred while writing out the data to 3449 * the allocated blocks. If so, return the magic error code so that we 3450 * fallback to buffered I/O and attempt to complete the remainder of 3451 * the I/O. Any blocks that may have been allocated in preparation for 3452 * the direct I/O will be reused during buffered I/O. |
3699 */ | 3453 */ |
3700 inode_dio_begin(inode); | 3454 if (flags & (IOMAP_WRITE | IOMAP_DIRECT) && written == 0) 3455 return -ENOTBLK; |
3701 | 3456 |
3702 /* If we do a overwrite dio, i_mutex locking can be released */ 3703 overwrite = *((int *)iocb->private); | 3457 return 0; 3458} |
3704 | 3459 |
3705 if (overwrite) 3706 inode_unlock(inode); | 3460const struct iomap_ops ext4_iomap_ops = { 3461 .iomap_begin = ext4_iomap_begin, 3462 .iomap_end = ext4_iomap_end, 3463}; |
3707 | 3464 |
3708 /* 3709 * For extent mapped files we could direct write to holes and fallocate. 3710 * 3711 * Allocated blocks to fill the hole are marked as unwritten to prevent 3712 * parallel buffered read to expose the stale data before DIO complete 3713 * the data IO. 3714 * 3715 * As to previously fallocated extents, ext4 get_block will just simply 3716 * mark the buffer mapped but still keep the extents unwritten. 3717 * 3718 * For non AIO case, we will convert those unwritten extents to written 3719 * after return back from blockdev_direct_IO. That way we save us from 3720 * allocating io_end structure and also the overhead of offloading 3721 * the extent convertion to a workqueue. 3722 * 3723 * For async DIO, the conversion needs to be deferred when the 3724 * IO is completed. The ext4 end_io callback function will be 3725 * called to take care of the conversion work. Here for async 3726 * case, we allocate an io_end structure to hook to the iocb. 3727 */ 3728 iocb->private = NULL; 3729 if (overwrite) 3730 get_block_func = ext4_dio_get_block_overwrite; 3731 else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || 3732 round_down(offset, i_blocksize(inode)) >= inode->i_size) { 3733 get_block_func = ext4_dio_get_block; 3734 dio_flags = DIO_LOCKING | DIO_SKIP_HOLES; 3735 } else if (is_sync_kiocb(iocb)) { 3736 get_block_func = ext4_dio_get_block_unwritten_sync; 3737 dio_flags = DIO_LOCKING; 3738 } else { 3739 get_block_func = ext4_dio_get_block_unwritten_async; 3740 dio_flags = DIO_LOCKING; 3741 } 3742 ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, 3743 get_block_func, ext4_end_io_dio, NULL, 3744 dio_flags); | 3465static bool ext4_iomap_is_delalloc(struct inode *inode, 3466 struct ext4_map_blocks *map) 3467{ 3468 struct extent_status es; 3469 ext4_lblk_t offset = 0, end = map->m_lblk + map->m_len - 1; |
3745 | 3470 |
3746 if (ret > 0 && !overwrite && ext4_test_inode_state(inode, 3747 EXT4_STATE_DIO_UNWRITTEN)) { 3748 int err; 3749 /* 3750 * for non AIO case, since the IO is already 3751 * completed, we could do the conversion right here 3752 */ 3753 err = ext4_convert_unwritten_extents(NULL, inode, 3754 offset, ret); 3755 if (err < 0) 3756 ret = err; 3757 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3758 } | 3471 ext4_es_find_extent_range(inode, &ext4_es_is_delayed, 3472 map->m_lblk, end, &es); |
3759 | 3473 |
3760 inode_dio_end(inode); 3761 /* take i_mutex locking again if we do a ovewrite dio */ 3762 if (overwrite) 3763 inode_lock(inode); | 3474 if (!es.es_len || es.es_lblk > end) 3475 return false; |
3764 | 3476 |
3765 if (ret < 0 && final_size > inode->i_size) 3766 ext4_truncate_failed_write(inode); 3767 3768 /* Handle extending of i_size after direct IO write */ 3769 if (orphan) { 3770 int err; 3771 3772 /* Credits for sb + inode write */ 3773 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 3774 if (IS_ERR(handle)) { 3775 /* 3776 * We wrote the data but cannot extend 3777 * i_size. Bail out. In async io case, we do 3778 * not return error here because we have 3779 * already submmitted the corresponding 3780 * bio. Returning error here makes the caller 3781 * think that this IO is done and failed 3782 * resulting in race with bio's completion 3783 * handler. 3784 */ 3785 if (!ret) 3786 ret = PTR_ERR(handle); 3787 if (inode->i_nlink) 3788 ext4_orphan_del(NULL, inode); 3789 3790 goto out; 3791 } 3792 if (inode->i_nlink) 3793 ext4_orphan_del(handle, inode); 3794 if (ret > 0) { 3795 loff_t end = offset + ret; 3796 if (end > inode->i_size || end > ei->i_disksize) { 3797 ext4_update_i_disksize(inode, end); 3798 if (end > inode->i_size) 3799 i_size_write(inode, end); 3800 /* 3801 * We're going to return a positive `ret' 3802 * here due to non-zero-length I/O, so there's 3803 * no way of reporting error returns from 3804 * ext4_mark_inode_dirty() to userspace. So 3805 * ignore it. 3806 */ 3807 ext4_mark_inode_dirty(handle, inode); 3808 } 3809 } 3810 err = ext4_journal_stop(handle); 3811 if (ret == 0) 3812 ret = err; | 3477 if (es.es_lblk > map->m_lblk) { 3478 map->m_len = es.es_lblk - map->m_lblk; 3479 return false; |
3813 } | 3480 } |
3814out: 3815 return ret; 3816} | |
3817 | 3481 |
3818static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter) 3819{ 3820 struct address_space *mapping = iocb->ki_filp->f_mapping; 3821 struct inode *inode = mapping->host; 3822 size_t count = iov_iter_count(iter); 3823 ssize_t ret; | 3482 offset = map->m_lblk - es.es_lblk; 3483 map->m_len = es.es_len - offset; |
3824 | 3484 |
3825 /* 3826 * Shared inode_lock is enough for us - it protects against concurrent 3827 * writes & truncates and since we take care of writing back page cache, 3828 * we are protected against page writeback as well. 3829 */ 3830 inode_lock_shared(inode); 3831 ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, 3832 iocb->ki_pos + count - 1); 3833 if (ret) 3834 goto out_unlock; 3835 ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, 3836 iter, ext4_dio_get_block, NULL, NULL, 0); 3837out_unlock: 3838 inode_unlock_shared(inode); 3839 return ret; | 3485 return true; |
3840} 3841 | 3486} 3487 |
3842static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | 3488static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, 3489 loff_t length, unsigned int flags, 3490 struct iomap *iomap, struct iomap *srcmap) |
3843{ | 3491{ |
3844 struct file *file = iocb->ki_filp; 3845 struct inode *inode = file->f_mapping->host; 3846 size_t count = iov_iter_count(iter); 3847 loff_t offset = iocb->ki_pos; 3848 ssize_t ret; | 3492 int ret; 3493 bool delalloc = false; 3494 struct ext4_map_blocks map; 3495 u8 blkbits = inode->i_blkbits; |
3849 | 3496 |
3850#ifdef CONFIG_FS_ENCRYPTION 3851 if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) 3852 return 0; 3853#endif 3854 if (fsverity_active(inode)) 3855 return 0; | 3497 if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK) 3498 return -EINVAL; |
3856 | 3499 |
3500 if (ext4_has_inline_data(inode)) { 3501 ret = ext4_inline_data_iomap(inode, iomap); 3502 if (ret != -EAGAIN) { 3503 if (ret == 0 && offset >= iomap->length) 3504 ret = -ENOENT; 3505 return ret; 3506 } 3507 } 3508 |
|
3857 /* | 3509 /* |
3858 * If we are doing data journalling we don't support O_DIRECT | 3510 * Calculate the first and last logical block respectively. |
3859 */ | 3511 */ |
3860 if (ext4_should_journal_data(inode)) 3861 return 0; | 3512 map.m_lblk = offset >> blkbits; 3513 map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits, 3514 EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1; |
3862 | 3515 |
3863 /* Let buffer I/O handle the inline data case. */ 3864 if (ext4_has_inline_data(inode)) 3865 return 0; | 3516 ret = ext4_map_blocks(NULL, inode, &map, 0); 3517 if (ret < 0) 3518 return ret; 3519 if (ret == 0) 3520 delalloc = ext4_iomap_is_delalloc(inode, &map); |
3866 | 3521 |
3867 trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); 3868 if (iov_iter_rw(iter) == READ) 3869 ret = ext4_direct_IO_read(iocb, iter); 3870 else 3871 ret = ext4_direct_IO_write(iocb, iter); 3872 trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); 3873 return ret; | 3522 ext4_set_iomap(inode, iomap, &map, offset, length); 3523 if (delalloc && iomap->type == IOMAP_HOLE) 3524 iomap->type = IOMAP_DELALLOC; 3525 3526 return 0; |
3874} 3875 | 3527} 3528 |
3529const struct iomap_ops ext4_iomap_report_ops = { 3530 .iomap_begin = ext4_iomap_begin_report, 3531}; 3532 |
|
3876/* 3877 * Pages can be marked dirty completely asynchronously from ext4's journalling 3878 * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do 3879 * much here because ->set_page_dirty is called under VFS locks. The page is 3880 * not necessarily locked. 3881 * 3882 * We cannot just dirty the page and leave attached buffers clean, because the 3883 * buffers' dirty state is "definitive". We cannot just set the buffers dirty --- 21 unchanged lines hidden (view full) --- 3905 .writepage = ext4_writepage, 3906 .writepages = ext4_writepages, 3907 .write_begin = ext4_write_begin, 3908 .write_end = ext4_write_end, 3909 .set_page_dirty = ext4_set_page_dirty, 3910 .bmap = ext4_bmap, 3911 .invalidatepage = ext4_invalidatepage, 3912 .releasepage = ext4_releasepage, | 3533/* 3534 * Pages can be marked dirty completely asynchronously from ext4's journalling 3535 * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do 3536 * much here because ->set_page_dirty is called under VFS locks. The page is 3537 * not necessarily locked. 3538 * 3539 * We cannot just dirty the page and leave attached buffers clean, because the 3540 * buffers' dirty state is "definitive". We cannot just set the buffers dirty --- 21 unchanged lines hidden (view full) --- 3562 .writepage = ext4_writepage, 3563 .writepages = ext4_writepages, 3564 .write_begin = ext4_write_begin, 3565 .write_end = ext4_write_end, 3566 .set_page_dirty = ext4_set_page_dirty, 3567 .bmap = ext4_bmap, 3568 .invalidatepage = ext4_invalidatepage, 3569 .releasepage = ext4_releasepage, |
3913 .direct_IO = ext4_direct_IO, | 3570 .direct_IO = noop_direct_IO, |
3914 .migratepage = buffer_migrate_page, 3915 .is_partially_uptodate = block_is_partially_uptodate, 3916 .error_remove_page = generic_error_remove_page, 3917}; 3918 3919static const struct address_space_operations ext4_journalled_aops = { 3920 .readpage = ext4_readpage, 3921 .readpages = ext4_readpages, 3922 .writepage = ext4_writepage, 3923 .writepages = ext4_writepages, 3924 .write_begin = ext4_write_begin, 3925 .write_end = ext4_journalled_write_end, 3926 .set_page_dirty = ext4_journalled_set_page_dirty, 3927 .bmap = ext4_bmap, 3928 .invalidatepage = ext4_journalled_invalidatepage, 3929 .releasepage = ext4_releasepage, | 3571 .migratepage = buffer_migrate_page, 3572 .is_partially_uptodate = block_is_partially_uptodate, 3573 .error_remove_page = generic_error_remove_page, 3574}; 3575 3576static const struct address_space_operations ext4_journalled_aops = { 3577 .readpage = ext4_readpage, 3578 .readpages = ext4_readpages, 3579 .writepage = ext4_writepage, 3580 .writepages = ext4_writepages, 3581 .write_begin = ext4_write_begin, 3582 .write_end = ext4_journalled_write_end, 3583 .set_page_dirty = ext4_journalled_set_page_dirty, 3584 .bmap = ext4_bmap, 3585 .invalidatepage = ext4_journalled_invalidatepage, 3586 .releasepage = ext4_releasepage, |
3930 .direct_IO = ext4_direct_IO, | 3587 .direct_IO = noop_direct_IO, |
3931 .is_partially_uptodate = block_is_partially_uptodate, 3932 .error_remove_page = generic_error_remove_page, 3933}; 3934 3935static const struct address_space_operations ext4_da_aops = { 3936 .readpage = ext4_readpage, 3937 .readpages = ext4_readpages, 3938 .writepage = ext4_writepage, 3939 .writepages = ext4_writepages, 3940 .write_begin = ext4_da_write_begin, 3941 .write_end = ext4_da_write_end, 3942 .set_page_dirty = ext4_set_page_dirty, 3943 .bmap = ext4_bmap, 3944 .invalidatepage = ext4_invalidatepage, 3945 .releasepage = ext4_releasepage, | 3588 .is_partially_uptodate = block_is_partially_uptodate, 3589 .error_remove_page = generic_error_remove_page, 3590}; 3591 3592static const struct address_space_operations ext4_da_aops = { 3593 .readpage = ext4_readpage, 3594 .readpages = ext4_readpages, 3595 .writepage = ext4_writepage, 3596 .writepages = ext4_writepages, 3597 .write_begin = ext4_da_write_begin, 3598 .write_end = ext4_da_write_end, 3599 .set_page_dirty = ext4_set_page_dirty, 3600 .bmap = ext4_bmap, 3601 .invalidatepage = ext4_invalidatepage, 3602 .releasepage = ext4_releasepage, |
3946 .direct_IO = ext4_direct_IO, | 3603 .direct_IO = noop_direct_IO, |
3947 .migratepage = buffer_migrate_page, 3948 .is_partially_uptodate = block_is_partially_uptodate, 3949 .error_remove_page = generic_error_remove_page, 3950}; 3951 3952static const struct address_space_operations ext4_dax_aops = { 3953 .writepages = ext4_dax_writepages, 3954 .direct_IO = noop_direct_IO, --- 1952 unchanged lines hidden (view full) --- 5907 5908static int __ext4_expand_extra_isize(struct inode *inode, 5909 unsigned int new_extra_isize, 5910 struct ext4_iloc *iloc, 5911 handle_t *handle, int *no_expand) 5912{ 5913 struct ext4_inode *raw_inode; 5914 struct ext4_xattr_ibody_header *header; | 3604 .migratepage = buffer_migrate_page, 3605 .is_partially_uptodate = block_is_partially_uptodate, 3606 .error_remove_page = generic_error_remove_page, 3607}; 3608 3609static const struct address_space_operations ext4_dax_aops = { 3610 .writepages = ext4_dax_writepages, 3611 .direct_IO = noop_direct_IO, --- 1952 unchanged lines hidden (view full) --- 5564 5565static int __ext4_expand_extra_isize(struct inode *inode, 5566 unsigned int new_extra_isize, 5567 struct ext4_iloc *iloc, 5568 handle_t *handle, int *no_expand) 5569{ 5570 struct ext4_inode *raw_inode; 5571 struct ext4_xattr_ibody_header *header; |
5572 unsigned int inode_size = EXT4_INODE_SIZE(inode->i_sb); 5573 struct ext4_inode_info *ei = EXT4_I(inode); |
|
5915 int error; 5916 | 5574 int error; 5575 |
5576 /* this was checked at iget time, but double check for good measure */ 5577 if ((EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > inode_size) || 5578 (ei->i_extra_isize & 3)) { 5579 EXT4_ERROR_INODE(inode, "bad extra_isize %u (inode size %u)", 5580 ei->i_extra_isize, 5581 EXT4_INODE_SIZE(inode->i_sb)); 5582 return -EFSCORRUPTED; 5583 } 5584 if ((new_extra_isize < ei->i_extra_isize) || 5585 (new_extra_isize < 4) || 5586 (new_extra_isize > inode_size - EXT4_GOOD_OLD_INODE_SIZE)) 5587 return -EINVAL; /* Should never happen */ 5588 |
|
5917 raw_inode = ext4_raw_inode(iloc); 5918 5919 header = IHDR(inode, raw_inode); 5920 5921 /* No extended attributes present */ 5922 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || 5923 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { 5924 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + --- 35 unchanged lines hidden (view full) --- 5960 * In nojournal mode, we can immediately attempt to expand 5961 * the inode. When journaled, we first need to obtain extra 5962 * buffer credits since we may write into the EA block 5963 * with this same handle. If journal_extend fails, then it will 5964 * only result in a minor loss of functionality for that inode. 5965 * If this is felt to be critical, then e2fsck should be run to 5966 * force a large enough s_min_extra_isize. 5967 */ | 5589 raw_inode = ext4_raw_inode(iloc); 5590 5591 header = IHDR(inode, raw_inode); 5592 5593 /* No extended attributes present */ 5594 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || 5595 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { 5596 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + --- 35 unchanged lines hidden (view full) --- 5632 * In nojournal mode, we can immediately attempt to expand 5633 * the inode. When journaled, we first need to obtain extra 5634 * buffer credits since we may write into the EA block 5635 * with this same handle. If journal_extend fails, then it will 5636 * only result in a minor loss of functionality for that inode. 5637 * If this is felt to be critical, then e2fsck should be run to 5638 * force a large enough s_min_extra_isize. 5639 */ |
5968 if (ext4_handle_valid(handle) && 5969 jbd2_journal_extend(handle, 5970 EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) != 0) | 5640 if (ext4_journal_extend(handle, 5641 EXT4_DATA_TRANS_BLOCKS(inode->i_sb), 0) != 0) |
5971 return -ENOSPC; 5972 5973 if (ext4_write_trylock_xattr(inode, &no_expand) == 0) 5974 return -EBUSY; 5975 5976 error = __ext4_expand_extra_isize(inode, new_extra_isize, &iloc, 5977 handle, &no_expand); 5978 ext4_write_unlock_xattr(inode, &no_expand); --- 318 unchanged lines hidden --- | 5642 return -ENOSPC; 5643 5644 if (ext4_write_trylock_xattr(inode, &no_expand) == 0) 5645 return -EBUSY; 5646 5647 error = __ext4_expand_extra_isize(inode, new_extra_isize, &iloc, 5648 handle, &no_expand); 5649 ext4_write_unlock_xattr(inode, &no_expand); --- 318 unchanged lines hidden --- |