inode.c (9f4813b531a0b8cc502fcfb142937fe4e9104d77) inode.c (4ea99936a1630f51fc3a2d61a58ec4a1c4b7d55a)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/fs/ext4/inode.c
4 *
5 * Copyright (C) 1992, 1993, 1994, 1995
6 * Remy Card (card@masi.ibp.fr)
7 * Laboratoire MASI - Institut Blaise Pascal
8 * Universite Pierre et Marie Curie (Paris VI)

--- 150 unchanged lines hidden (view full) ---

159
160 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
161 }
162 return S_ISLNK(inode->i_mode) && inode->i_size &&
163 (inode->i_size < EXT4_N_BLOCKS * 4);
164}
165
166/*
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/fs/ext4/inode.c
4 *
5 * Copyright (C) 1992, 1993, 1994, 1995
6 * Remy Card (card@masi.ibp.fr)
7 * Laboratoire MASI - Institut Blaise Pascal
8 * Universite Pierre et Marie Curie (Paris VI)

--- 150 unchanged lines hidden (view full) ---

159
160 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
161 }
162 return S_ISLNK(inode->i_mode) && inode->i_size &&
163 (inode->i_size < EXT4_N_BLOCKS * 4);
164}
165
166/*
167 * Restart the transaction associated with *handle. This does a commit,
168 * so before we call here everything must be consistently dirtied against
169 * this transaction.
170 */
171int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
172 int nblocks)
173{
174 int ret;
175
176 /*
177 * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
178 * moment, get_block can be called only for blocks inside i_size since
179 * page cache has been already dropped and writes are blocked by
180 * i_mutex. So we can safely drop the i_data_sem here.
181 */
182 BUG_ON(EXT4_JOURNAL(inode) == NULL);
183 jbd_debug(2, "restarting handle %p\n", handle);
184 up_write(&EXT4_I(inode)->i_data_sem);
185 ret = ext4_journal_restart(handle, nblocks);
186 down_write(&EXT4_I(inode)->i_data_sem);
187 ext4_discard_preallocations(inode);
188
189 return ret;
190}
191
192/*
193 * Called at the last iput() if i_nlink is zero.
194 */
195void ext4_evict_inode(struct inode *inode)
196{
197 handle_t *handle;
198 int err;
167 * Called at the last iput() if i_nlink is zero.
168 */
169void ext4_evict_inode(struct inode *inode)
170{
171 handle_t *handle;
172 int err;
199 int extra_credits = 3;
173 /*
174 * Credits for final inode cleanup and freeing:
175 * sb + inode (ext4_orphan_del()), block bitmap, group descriptor
176 * (xattr block freeing), bitmap, group descriptor (inode freeing)
177 */
178 int extra_credits = 6;
200 struct ext4_xattr_inode_array *ea_inode_array = NULL;
201
202 trace_ext4_evict_inode(inode);
203
204 if (inode->i_nlink) {
205 /*
206 * When journalling data dirty buffers are tracked only in the
207 * journal. So although mm thinks everything is clean and

--- 39 unchanged lines hidden (view full) ---

247 * Protect us against freezing - iput() caller didn't have to have any
248 * protection against it
249 */
250 sb_start_intwrite(inode->i_sb);
251
252 if (!IS_NOQUOTA(inode))
253 extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
254
179 struct ext4_xattr_inode_array *ea_inode_array = NULL;
180
181 trace_ext4_evict_inode(inode);
182
183 if (inode->i_nlink) {
184 /*
185 * When journalling data dirty buffers are tracked only in the
186 * journal. So although mm thinks everything is clean and

--- 39 unchanged lines hidden (view full) ---

226 * Protect us against freezing - iput() caller didn't have to have any
227 * protection against it
228 */
229 sb_start_intwrite(inode->i_sb);
230
231 if (!IS_NOQUOTA(inode))
232 extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
233
234 /*
235 * Block bitmap, group descriptor, and inode are accounted in both
236 * ext4_blocks_for_truncate() and extra_credits. So subtract 3.
237 */
255 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
238 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
256 ext4_blocks_for_truncate(inode)+extra_credits);
239 ext4_blocks_for_truncate(inode) + extra_credits - 3);
257 if (IS_ERR(handle)) {
258 ext4_std_error(inode->i_sb, PTR_ERR(handle));
259 /*
260 * If we're going to skip the normal cleanup, we still need to
261 * make sure that the in-core orphan linked list is properly
262 * cleaned up.
263 */
264 ext4_orphan_del(NULL, inode);

--- 557 unchanged lines hidden (view full) ---

822 return _ext4_get_block(inode, iblock, bh_result,
823 EXT4_GET_BLOCKS_IO_CREATE_EXT);
824}
825
826/* Maximum number of blocks we map for direct IO at once. */
827#define DIO_MAX_BLOCKS 4096
828
829/*
240 if (IS_ERR(handle)) {
241 ext4_std_error(inode->i_sb, PTR_ERR(handle));
242 /*
243 * If we're going to skip the normal cleanup, we still need to
244 * make sure that the in-core orphan linked list is properly
245 * cleaned up.
246 */
247 ext4_orphan_del(NULL, inode);

--- 557 unchanged lines hidden (view full) ---

805 return _ext4_get_block(inode, iblock, bh_result,
806 EXT4_GET_BLOCKS_IO_CREATE_EXT);
807}
808
809/* Maximum number of blocks we map for direct IO at once. */
810#define DIO_MAX_BLOCKS 4096
811
812/*
830 * Get blocks function for the cases that need to start a transaction -
831 * generally difference cases of direct IO and DAX IO. It also handles retries
832 * in case of ENOSPC.
833 */
834static int ext4_get_block_trans(struct inode *inode, sector_t iblock,
835 struct buffer_head *bh_result, int flags)
836{
837 int dio_credits;
838 handle_t *handle;
839 int retries = 0;
840 int ret;
841
842 /* Trim mapping request to maximum we can map at once for DIO */
843 if (bh_result->b_size >> inode->i_blkbits > DIO_MAX_BLOCKS)
844 bh_result->b_size = DIO_MAX_BLOCKS << inode->i_blkbits;
845 dio_credits = ext4_chunk_trans_blocks(inode,
846 bh_result->b_size >> inode->i_blkbits);
847retry:
848 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits);
849 if (IS_ERR(handle))
850 return PTR_ERR(handle);
851
852 ret = _ext4_get_block(inode, iblock, bh_result, flags);
853 ext4_journal_stop(handle);
854
855 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
856 goto retry;
857 return ret;
858}
859
860/* Get block function for DIO reads and writes to inodes without extents */
861int ext4_dio_get_block(struct inode *inode, sector_t iblock,
862 struct buffer_head *bh, int create)
863{
864 /* We don't expect handle for direct IO */
865 WARN_ON_ONCE(ext4_journal_current_handle());
866
867 if (!create)
868 return _ext4_get_block(inode, iblock, bh, 0);
869 return ext4_get_block_trans(inode, iblock, bh, EXT4_GET_BLOCKS_CREATE);
870}
871
872/*
873 * Get block function for AIO DIO writes when we create unwritten extent if
874 * blocks are not allocated yet. The extent will be converted to written
875 * after IO is complete.
876 */
877static int ext4_dio_get_block_unwritten_async(struct inode *inode,
878 sector_t iblock, struct buffer_head *bh_result, int create)
879{
880 int ret;
881
882 /* We don't expect handle for direct IO */
883 WARN_ON_ONCE(ext4_journal_current_handle());
884
885 ret = ext4_get_block_trans(inode, iblock, bh_result,
886 EXT4_GET_BLOCKS_IO_CREATE_EXT);
887
888 /*
889 * When doing DIO using unwritten extents, we need io_end to convert
890 * unwritten extents to written on IO completion. We allocate io_end
891 * once we spot unwritten extent and store it in b_private. Generic
892 * DIO code keeps b_private set and furthermore passes the value to
893 * our completion callback in 'private' argument.
894 */
895 if (!ret && buffer_unwritten(bh_result)) {
896 if (!bh_result->b_private) {
897 ext4_io_end_t *io_end;
898
899 io_end = ext4_init_io_end(inode, GFP_KERNEL);
900 if (!io_end)
901 return -ENOMEM;
902 bh_result->b_private = io_end;
903 ext4_set_io_unwritten_flag(inode, io_end);
904 }
905 set_buffer_defer_completion(bh_result);
906 }
907
908 return ret;
909}
910
911/*
912 * Get block function for non-AIO DIO writes when we create unwritten extent if
913 * blocks are not allocated yet. The extent will be converted to written
914 * after IO is complete by ext4_direct_IO_write().
915 */
916static int ext4_dio_get_block_unwritten_sync(struct inode *inode,
917 sector_t iblock, struct buffer_head *bh_result, int create)
918{
919 int ret;
920
921 /* We don't expect handle for direct IO */
922 WARN_ON_ONCE(ext4_journal_current_handle());
923
924 ret = ext4_get_block_trans(inode, iblock, bh_result,
925 EXT4_GET_BLOCKS_IO_CREATE_EXT);
926
927 /*
928 * Mark inode as having pending DIO writes to unwritten extents.
929 * ext4_direct_IO_write() checks this flag and converts extents to
930 * written.
931 */
932 if (!ret && buffer_unwritten(bh_result))
933 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
934
935 return ret;
936}
937
938static int ext4_dio_get_block_overwrite(struct inode *inode, sector_t iblock,
939 struct buffer_head *bh_result, int create)
940{
941 int ret;
942
943 ext4_debug("ext4_dio_get_block_overwrite: inode %lu, create flag %d\n",
944 inode->i_ino, create);
945 /* We don't expect handle for direct IO */
946 WARN_ON_ONCE(ext4_journal_current_handle());
947
948 ret = _ext4_get_block(inode, iblock, bh_result, 0);
949 /*
950 * Blocks should have been preallocated! ext4_file_write_iter() checks
951 * that.
952 */
953 WARN_ON_ONCE(!buffer_mapped(bh_result) || buffer_unwritten(bh_result));
954
955 return ret;
956}
957
958
959/*
960 * `handle' can be NULL if create is zero
961 */
962struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
963 ext4_lblk_t block, int map_flags)
964{
965 struct ext4_map_blocks map;
966 struct buffer_head *bh;
967 int create = map_flags & EXT4_GET_BLOCKS_CREATE;

--- 1368 unchanged lines hidden (view full) ---

2336 err = mpage_submit_page(mpd, head->b_page);
2337 if (err < 0)
2338 return err;
2339 }
2340 return lblk < blocks;
2341}
2342
2343/*
813 * `handle' can be NULL if create is zero
814 */
815struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
816 ext4_lblk_t block, int map_flags)
817{
818 struct ext4_map_blocks map;
819 struct buffer_head *bh;
820 int create = map_flags & EXT4_GET_BLOCKS_CREATE;

--- 1368 unchanged lines hidden (view full) ---

2189 err = mpage_submit_page(mpd, head->b_page);
2190 if (err < 0)
2191 return err;
2192 }
2193 return lblk < blocks;
2194}
2195
2196/*
2197 * mpage_process_page - update page buffers corresponding to changed extent and
2198 * may submit fully mapped page for IO
2199 *
2200 * @mpd - description of extent to map, on return next extent to map
2201 * @m_lblk - logical block mapping.
2202 * @m_pblk - corresponding physical mapping.
2203 * @map_bh - determines on return whether this page requires any further
2204 * mapping or not.
2205 * Scan given page buffers corresponding to changed extent and update buffer
2206 * state according to new extent state.
2207 * We map delalloc buffers to their physical location, clear unwritten bits.
2208 * If the given page is not fully mapped, we update @map to the next extent in
2209 * the given page that needs mapping & return @map_bh as true.
2210 */
2211static int mpage_process_page(struct mpage_da_data *mpd, struct page *page,
2212 ext4_lblk_t *m_lblk, ext4_fsblk_t *m_pblk,
2213 bool *map_bh)
2214{
2215 struct buffer_head *head, *bh;
2216 ext4_io_end_t *io_end = mpd->io_submit.io_end;
2217 ext4_lblk_t lblk = *m_lblk;
2218 ext4_fsblk_t pblock = *m_pblk;
2219 int err = 0;
2220 int blkbits = mpd->inode->i_blkbits;
2221 ssize_t io_end_size = 0;
2222 struct ext4_io_end_vec *io_end_vec = ext4_last_io_end_vec(io_end);
2223
2224 bh = head = page_buffers(page);
2225 do {
2226 if (lblk < mpd->map.m_lblk)
2227 continue;
2228 if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
2229 /*
2230 * Buffer after end of mapped extent.
2231 * Find next buffer in the page to map.
2232 */
2233 mpd->map.m_len = 0;
2234 mpd->map.m_flags = 0;
2235 io_end_vec->size += io_end_size;
2236 io_end_size = 0;
2237
2238 err = mpage_process_page_bufs(mpd, head, bh, lblk);
2239 if (err > 0)
2240 err = 0;
2241 if (!err && mpd->map.m_len && mpd->map.m_lblk > lblk) {
2242 io_end_vec = ext4_alloc_io_end_vec(io_end);
2243 io_end_vec->offset = mpd->map.m_lblk << blkbits;
2244 }
2245 *map_bh = true;
2246 goto out;
2247 }
2248 if (buffer_delay(bh)) {
2249 clear_buffer_delay(bh);
2250 bh->b_blocknr = pblock++;
2251 }
2252 clear_buffer_unwritten(bh);
2253 io_end_size += (1 << blkbits);
2254 } while (lblk++, (bh = bh->b_this_page) != head);
2255
2256 io_end_vec->size += io_end_size;
2257 io_end_size = 0;
2258 *map_bh = false;
2259out:
2260 *m_lblk = lblk;
2261 *m_pblk = pblock;
2262 return err;
2263}
2264
2265/*
2344 * mpage_map_buffers - update buffers corresponding to changed extent and
2345 * submit fully mapped pages for IO
2346 *
2347 * @mpd - description of extent to map, on return next extent to map
2348 *
2349 * Scan buffers corresponding to changed extent (we expect corresponding pages
2350 * to be already locked) and update buffer state according to new extent state.
2351 * We map delalloc buffers to their physical location, clear unwritten bits,
2352 * and mark buffers as uninit when we perform writes to unwritten extents
2353 * and do extent conversion after IO is finished. If the last page is not fully
2354 * mapped, we update @map to the next extent in the last page that needs
2355 * mapping. Otherwise we submit the page for IO.
2356 */
2357static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
2358{
2359 struct pagevec pvec;
2360 int nr_pages, i;
2361 struct inode *inode = mpd->inode;
2266 * mpage_map_buffers - update buffers corresponding to changed extent and
2267 * submit fully mapped pages for IO
2268 *
2269 * @mpd - description of extent to map, on return next extent to map
2270 *
2271 * Scan buffers corresponding to changed extent (we expect corresponding pages
2272 * to be already locked) and update buffer state according to new extent state.
2273 * We map delalloc buffers to their physical location, clear unwritten bits,
2274 * and mark buffers as uninit when we perform writes to unwritten extents
2275 * and do extent conversion after IO is finished. If the last page is not fully
2276 * mapped, we update @map to the next extent in the last page that needs
2277 * mapping. Otherwise we submit the page for IO.
2278 */
2279static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
2280{
2281 struct pagevec pvec;
2282 int nr_pages, i;
2283 struct inode *inode = mpd->inode;
2362 struct buffer_head *head, *bh;
2363 int bpp_bits = PAGE_SHIFT - inode->i_blkbits;
2364 pgoff_t start, end;
2365 ext4_lblk_t lblk;
2284 int bpp_bits = PAGE_SHIFT - inode->i_blkbits;
2285 pgoff_t start, end;
2286 ext4_lblk_t lblk;
2366 sector_t pblock;
2287 ext4_fsblk_t pblock;
2367 int err;
2288 int err;
2289 bool map_bh = false;
2368
2369 start = mpd->map.m_lblk >> bpp_bits;
2370 end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits;
2371 lblk = start << bpp_bits;
2372 pblock = mpd->map.m_pblk;
2373
2374 pagevec_init(&pvec);
2375 while (start <= end) {
2376 nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping,
2377 &start, end);
2378 if (nr_pages == 0)
2379 break;
2380 for (i = 0; i < nr_pages; i++) {
2381 struct page *page = pvec.pages[i];
2382
2290
2291 start = mpd->map.m_lblk >> bpp_bits;
2292 end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits;
2293 lblk = start << bpp_bits;
2294 pblock = mpd->map.m_pblk;
2295
2296 pagevec_init(&pvec);
2297 while (start <= end) {
2298 nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping,
2299 &start, end);
2300 if (nr_pages == 0)
2301 break;
2302 for (i = 0; i < nr_pages; i++) {
2303 struct page *page = pvec.pages[i];
2304
2383 bh = head = page_buffers(page);
2384 do {
2385 if (lblk < mpd->map.m_lblk)
2386 continue;
2387 if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
2388 /*
2389 * Buffer after end of mapped extent.
2390 * Find next buffer in the page to map.
2391 */
2392 mpd->map.m_len = 0;
2393 mpd->map.m_flags = 0;
2394 /*
2395 * FIXME: If dioread_nolock supports
2396 * blocksize < pagesize, we need to make
2397 * sure we add size mapped so far to
2398 * io_end->size as the following call
2399 * can submit the page for IO.
2400 */
2401 err = mpage_process_page_bufs(mpd, head,
2402 bh, lblk);
2403 pagevec_release(&pvec);
2404 if (err > 0)
2405 err = 0;
2406 return err;
2407 }
2408 if (buffer_delay(bh)) {
2409 clear_buffer_delay(bh);
2410 bh->b_blocknr = pblock++;
2411 }
2412 clear_buffer_unwritten(bh);
2413 } while (lblk++, (bh = bh->b_this_page) != head);
2414
2305 err = mpage_process_page(mpd, page, &lblk, &pblock,
2306 &map_bh);
2415 /*
2307 /*
2416 * FIXME: This is going to break if dioread_nolock
2417 * supports blocksize < pagesize as we will try to
2418 * convert potentially unmapped parts of inode.
2308 * If map_bh is true, means page may require further bh
2309 * mapping, or maybe the page was submitted for IO.
2310 * So we return to call further extent mapping.
2419 */
2311 */
2420 mpd->io_submit.io_end->size += PAGE_SIZE;
2312 if (err < 0 || map_bh == true)
2313 goto out;
2421 /* Page fully mapped - let IO run! */
2422 err = mpage_submit_page(mpd, page);
2314 /* Page fully mapped - let IO run! */
2315 err = mpage_submit_page(mpd, page);
2423 if (err < 0) {
2424 pagevec_release(&pvec);
2425 return err;
2426 }
2316 if (err < 0)
2317 goto out;
2427 }
2428 pagevec_release(&pvec);
2429 }
2430 /* Extent fully mapped and matches with page boundary. We are done. */
2431 mpd->map.m_len = 0;
2432 mpd->map.m_flags = 0;
2433 return 0;
2318 }
2319 pagevec_release(&pvec);
2320 }
2321 /* Extent fully mapped and matches with page boundary. We are done. */
2322 mpd->map.m_len = 0;
2323 mpd->map.m_flags = 0;
2324 return 0;
2325out:
2326 pagevec_release(&pvec);
2327 return err;
2434}
2435
2436static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
2437{
2438 struct inode *inode = mpd->inode;
2439 struct ext4_map_blocks *map = &mpd->map;
2440 int get_blocks_flags;
2441 int err, dioread_nolock;

--- 63 unchanged lines hidden (view full) ---

2505 struct mpage_da_data *mpd,
2506 bool *give_up_on_write)
2507{
2508 struct inode *inode = mpd->inode;
2509 struct ext4_map_blocks *map = &mpd->map;
2510 int err;
2511 loff_t disksize;
2512 int progress = 0;
2328}
2329
2330static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
2331{
2332 struct inode *inode = mpd->inode;
2333 struct ext4_map_blocks *map = &mpd->map;
2334 int get_blocks_flags;
2335 int err, dioread_nolock;

--- 63 unchanged lines hidden (view full) ---

2399 struct mpage_da_data *mpd,
2400 bool *give_up_on_write)
2401{
2402 struct inode *inode = mpd->inode;
2403 struct ext4_map_blocks *map = &mpd->map;
2404 int err;
2405 loff_t disksize;
2406 int progress = 0;
2407 ext4_io_end_t *io_end = mpd->io_submit.io_end;
2408 struct ext4_io_end_vec *io_end_vec = ext4_alloc_io_end_vec(io_end);
2513
2409
2514 mpd->io_submit.io_end->offset =
2515 ((loff_t)map->m_lblk) << inode->i_blkbits;
2410 io_end_vec->offset = ((loff_t)map->m_lblk) << inode->i_blkbits;
2516 do {
2517 err = mpage_map_one_extent(handle, mpd);
2518 if (err < 0) {
2519 struct super_block *sb = inode->i_sb;
2520
2521 if (ext4_forced_shutdown(EXT4_SB(sb)) ||
2522 EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
2523 goto invalidate_dirty_pages;

--- 877 unchanged lines hidden (view full) ---

3401 return !jbd2_transaction_committed(journal,
3402 EXT4_I(inode)->i_datasync_tid);
3403 /* Any metadata buffers to write? */
3404 if (!list_empty(&inode->i_mapping->private_list))
3405 return true;
3406 return inode->i_state & I_DIRTY_DATASYNC;
3407}
3408
2411 do {
2412 err = mpage_map_one_extent(handle, mpd);
2413 if (err < 0) {
2414 struct super_block *sb = inode->i_sb;
2415
2416 if (ext4_forced_shutdown(EXT4_SB(sb)) ||
2417 EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
2418 goto invalidate_dirty_pages;

--- 877 unchanged lines hidden (view full) ---

3296 return !jbd2_transaction_committed(journal,
3297 EXT4_I(inode)->i_datasync_tid);
3298 /* Any metadata buffers to write? */
3299 if (!list_empty(&inode->i_mapping->private_list))
3300 return true;
3301 return inode->i_state & I_DIRTY_DATASYNC;
3302}
3303
3409static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
3410 unsigned flags, struct iomap *iomap)
3304static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
3305 struct ext4_map_blocks *map, loff_t offset,
3306 loff_t length)
3411{
3307{
3412 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3413 unsigned int blkbits = inode->i_blkbits;
3414 unsigned long first_block, last_block;
3415 struct ext4_map_blocks map;
3416 bool delalloc = false;
3417 int ret;
3308 u8 blkbits = inode->i_blkbits;
3418
3309
3419 if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK)
3420 return -EINVAL;
3421 first_block = offset >> blkbits;
3422 last_block = min_t(loff_t, (offset + length - 1) >> blkbits,
3423 EXT4_MAX_LOGICAL_BLOCK);
3424
3425 if (flags & IOMAP_REPORT) {
3426 if (ext4_has_inline_data(inode)) {
3427 ret = ext4_inline_data_iomap(inode, iomap);
3428 if (ret != -EAGAIN) {
3429 if (ret == 0 && offset >= iomap->length)
3430 ret = -ENOENT;
3431 return ret;
3432 }
3433 }
3434 } else {
3435 if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
3436 return -ERANGE;
3437 }
3438
3439 map.m_lblk = first_block;
3440 map.m_len = last_block - first_block + 1;
3441
3442 if (flags & IOMAP_REPORT) {
3443 ret = ext4_map_blocks(NULL, inode, &map, 0);
3444 if (ret < 0)
3445 return ret;
3446
3447 if (ret == 0) {
3448 ext4_lblk_t end = map.m_lblk + map.m_len - 1;
3449 struct extent_status es;
3450
3451 ext4_es_find_extent_range(inode, &ext4_es_is_delayed,
3452 map.m_lblk, end, &es);
3453
3454 if (!es.es_len || es.es_lblk > end) {
3455 /* entire range is a hole */
3456 } else if (es.es_lblk > map.m_lblk) {
3457 /* range starts with a hole */
3458 map.m_len = es.es_lblk - map.m_lblk;
3459 } else {
3460 ext4_lblk_t offs = 0;
3461
3462 if (es.es_lblk < map.m_lblk)
3463 offs = map.m_lblk - es.es_lblk;
3464 map.m_lblk = es.es_lblk + offs;
3465 map.m_len = es.es_len - offs;
3466 delalloc = true;
3467 }
3468 }
3469 } else if (flags & IOMAP_WRITE) {
3470 int dio_credits;
3471 handle_t *handle;
3472 int retries = 0;
3473
3474 /* Trim mapping request to maximum we can map at once for DIO */
3475 if (map.m_len > DIO_MAX_BLOCKS)
3476 map.m_len = DIO_MAX_BLOCKS;
3477 dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
3478retry:
3479 /*
3480 * Either we allocate blocks and then we don't get unwritten
3481 * extent so we have reserved enough credits, or the blocks
3482 * are already allocated and unwritten and in that case
3483 * extent conversion fits in the credits as well.
3484 */
3485 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
3486 dio_credits);
3487 if (IS_ERR(handle))
3488 return PTR_ERR(handle);
3489
3490 ret = ext4_map_blocks(handle, inode, &map,
3491 EXT4_GET_BLOCKS_CREATE_ZERO);
3492 if (ret < 0) {
3493 ext4_journal_stop(handle);
3494 if (ret == -ENOSPC &&
3495 ext4_should_retry_alloc(inode->i_sb, &retries))
3496 goto retry;
3497 return ret;
3498 }
3499
3500 /*
3501 * If we added blocks beyond i_size, we need to make sure they
3502 * will get truncated if we crash before updating i_size in
3503 * ext4_iomap_end(). For faults we don't need to do that (and
3504 * even cannot because for orphan list operations inode_lock is
3505 * required) - if we happen to instantiate block beyond i_size,
3506 * it is because we race with truncate which has already added
3507 * the inode to the orphan list.
3508 */
3509 if (!(flags & IOMAP_FAULT) && first_block + map.m_len >
3510 (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) {
3511 int err;
3512
3513 err = ext4_orphan_add(handle, inode);
3514 if (err < 0) {
3515 ext4_journal_stop(handle);
3516 return err;
3517 }
3518 }
3519 ext4_journal_stop(handle);
3520 } else {
3521 ret = ext4_map_blocks(NULL, inode, &map, 0);
3522 if (ret < 0)
3523 return ret;
3524 }
3525
3310 /*
3311 * Writes that span EOF might trigger an I/O size update on completion,
3312 * so consider them to be dirty for the purpose of O_DSYNC, even if
3313 * there is no other metadata changes being made or are pending.
3314 */
3526 iomap->flags = 0;
3315 iomap->flags = 0;
3527 if (ext4_inode_datasync_dirty(inode))
3316 if (ext4_inode_datasync_dirty(inode) ||
3317 offset + length > i_size_read(inode))
3528 iomap->flags |= IOMAP_F_DIRTY;
3318 iomap->flags |= IOMAP_F_DIRTY;
3319
3320 if (map->m_flags & EXT4_MAP_NEW)
3321 iomap->flags |= IOMAP_F_NEW;
3322
3529 iomap->bdev = inode->i_sb->s_bdev;
3323 iomap->bdev = inode->i_sb->s_bdev;
3530 iomap->dax_dev = sbi->s_daxdev;
3531 iomap->offset = (u64)first_block << blkbits;
3532 iomap->length = (u64)map.m_len << blkbits;
3324 iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
3325 iomap->offset = (u64) map->m_lblk << blkbits;
3326 iomap->length = (u64) map->m_len << blkbits;
3533
3327
3534 if (ret == 0) {
3535 iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE;
3536 iomap->addr = IOMAP_NULL_ADDR;
3328 /*
3329 * Flags passed to ext4_map_blocks() for direct I/O writes can result
3330 * in m_flags having both EXT4_MAP_MAPPED and EXT4_MAP_UNWRITTEN bits
3331 * set. In order for any allocated unwritten extents to be converted
3332 * into written extents correctly within the ->end_io() handler, we
3333 * need to ensure that the iomap->type is set appropriately. Hence, the
3334 * reason why we need to check whether the EXT4_MAP_UNWRITTEN bit has
3335 * been set first.
3336 */
3337 if (map->m_flags & EXT4_MAP_UNWRITTEN) {
3338 iomap->type = IOMAP_UNWRITTEN;
3339 iomap->addr = (u64) map->m_pblk << blkbits;
3340 } else if (map->m_flags & EXT4_MAP_MAPPED) {
3341 iomap->type = IOMAP_MAPPED;
3342 iomap->addr = (u64) map->m_pblk << blkbits;
3537 } else {
3343 } else {
3538 if (map.m_flags & EXT4_MAP_MAPPED) {
3539 iomap->type = IOMAP_MAPPED;
3540 } else if (map.m_flags & EXT4_MAP_UNWRITTEN) {
3541 iomap->type = IOMAP_UNWRITTEN;
3542 } else {
3543 WARN_ON_ONCE(1);
3544 return -EIO;
3545 }
3546 iomap->addr = (u64)map.m_pblk << blkbits;
3344 iomap->type = IOMAP_HOLE;
3345 iomap->addr = IOMAP_NULL_ADDR;
3547 }
3346 }
3548
3549 if (map.m_flags & EXT4_MAP_NEW)
3550 iomap->flags |= IOMAP_F_NEW;
3551
3552 return 0;
3553}
3554
3347}
3348
3555static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
3556 ssize_t written, unsigned flags, struct iomap *iomap)
3349static int ext4_iomap_alloc(struct inode *inode, struct ext4_map_blocks *map,
3350 unsigned int flags)
3557{
3351{
3558 int ret = 0;
3559 handle_t *handle;
3352 handle_t *handle;
3560 int blkbits = inode->i_blkbits;
3561 bool truncate = false;
3353 u8 blkbits = inode->i_blkbits;
3354 int ret, dio_credits, m_flags = 0, retries = 0;
3562
3355
3563 if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
3564 return 0;
3356 /*
3357 * Trim the mapping request to the maximum value that we can map at
3358 * once for direct I/O.
3359 */
3360 if (map->m_len > DIO_MAX_BLOCKS)
3361 map->m_len = DIO_MAX_BLOCKS;
3362 dio_credits = ext4_chunk_trans_blocks(inode, map->m_len);
3565
3363
3566 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3567 if (IS_ERR(handle)) {
3568 ret = PTR_ERR(handle);
3569 goto orphan_del;
3570 }
3571 if (ext4_update_inode_size(inode, offset + written))
3572 ext4_mark_inode_dirty(handle, inode);
3364retry:
3573 /*
3365 /*
3574 * We may need to truncate allocated but not written blocks beyond EOF.
3366 * Either we allocate blocks and then don't get an unwritten extent, so
3367 * in that case we have reserved enough credits. Or, the blocks are
3368 * already allocated and unwritten. In that case, the extent conversion
3369 * fits into the credits as well.
3575 */
3370 */
3576 if (iomap->offset + iomap->length >
3577 ALIGN(inode->i_size, 1 << blkbits)) {
3578 ext4_lblk_t written_blk, end_blk;
3371 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits);
3372 if (IS_ERR(handle))
3373 return PTR_ERR(handle);
3579
3374
3580 written_blk = (offset + written) >> blkbits;
3581 end_blk = (offset + length) >> blkbits;
3582 if (written_blk < end_blk && ext4_can_truncate(inode))
3583 truncate = true;
3584 }
3585 /*
3375 /*
3586 * Remove inode from orphan list if we were extending a inode and
3587 * everything went fine.
3376 * DAX and direct I/O are the only two operations that are currently
3377 * supported with IOMAP_WRITE.
3588 */
3378 */
3589 if (!truncate && inode->i_nlink &&
3590 !list_empty(&EXT4_I(inode)->i_orphan))
3591 ext4_orphan_del(handle, inode);
3379 WARN_ON(!IS_DAX(inode) && !(flags & IOMAP_DIRECT));
3380 if (IS_DAX(inode))
3381 m_flags = EXT4_GET_BLOCKS_CREATE_ZERO;
3382 /*
3383 * We use i_size instead of i_disksize here because delalloc writeback
3384 * can complete at any point during the I/O and subsequently push the
3385 * i_disksize out to i_size. This could be beyond where direct I/O is
3386 * happening and thus expose allocated blocks to direct I/O reads.
3387 */
3388 else if ((map->m_lblk * (1 << blkbits)) >= i_size_read(inode))
3389 m_flags = EXT4_GET_BLOCKS_CREATE;
3390 else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3391 m_flags = EXT4_GET_BLOCKS_IO_CREATE_EXT;
3392
3393 ret = ext4_map_blocks(handle, inode, map, m_flags);
3394
3395 /*
3396 * We cannot fill holes in indirect tree based inodes as that could
3397 * expose stale data in the case of a crash. Use the magic error code
3398 * to fallback to buffered I/O.
3399 */
3400 if (!m_flags && !ret)
3401 ret = -ENOTBLK;
3402
3592 ext4_journal_stop(handle);
3403 ext4_journal_stop(handle);
3593 if (truncate) {
3594 ext4_truncate_failed_write(inode);
3595orphan_del:
3596 /*
3597 * If truncate failed early the inode might still be on the
3598 * orphan list; we need to make sure the inode is removed from
3599 * the orphan list in that case.
3600 */
3601 if (inode->i_nlink)
3602 ext4_orphan_del(NULL, inode);
3603 }
3404 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
3405 goto retry;
3406
3604 return ret;
3605}
3606
3407 return ret;
3408}
3409
3607const struct iomap_ops ext4_iomap_ops = {
3608 .iomap_begin = ext4_iomap_begin,
3609 .iomap_end = ext4_iomap_end,
3610};
3611
3410
3612static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3613 ssize_t size, void *private)
3411static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
3412 unsigned flags, struct iomap *iomap, struct iomap *srcmap)
3614{
3413{
3615 ext4_io_end_t *io_end = private;
3414 int ret;
3415 struct ext4_map_blocks map;
3416 u8 blkbits = inode->i_blkbits;
3616
3417
3617 /* if not async direct IO just return */
3618 if (!io_end)
3619 return 0;
3418 if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK)
3419 return -EINVAL;
3620
3420
3621 ext_debug("ext4_end_io_dio(): io_end 0x%p "
3622 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
3623 io_end, io_end->inode->i_ino, iocb, offset, size);
3421 if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
3422 return -ERANGE;
3624
3625 /*
3423
3424 /*
3626 * Error during AIO DIO. We cannot convert unwritten extents as the
3627 * data was not written. Just clear the unwritten flag and drop io_end.
3425 * Calculate the first and last logical blocks respectively.
3628 */
3426 */
3629 if (size <= 0) {
3630 ext4_clear_io_unwritten_flag(io_end);
3631 size = 0;
3632 }
3633 io_end->offset = offset;
3634 io_end->size = size;
3635 ext4_put_io_end(io_end);
3427 map.m_lblk = offset >> blkbits;
3428 map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits,
3429 EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1;
3636
3430
3637 return 0;
3638}
3431 if (flags & IOMAP_WRITE)
3432 ret = ext4_iomap_alloc(inode, &map, flags);
3433 else
3434 ret = ext4_map_blocks(NULL, inode, &map, 0);
3639
3435
3640/*
3641 * Handling of direct IO writes.
3642 *
3643 * For ext4 extent files, ext4 will do direct-io write even to holes,
3644 * preallocated extents, and those write extend the file, no need to
3645 * fall back to buffered IO.
3646 *
3647 * For holes, we fallocate those blocks, mark them as unwritten
3648 * If those blocks were preallocated, we mark sure they are split, but
3649 * still keep the range to write as unwritten.
3650 *
3651 * The unwritten extents will be converted to written when DIO is completed.
3652 * For async direct IO, since the IO may still pending when return, we
3653 * set up an end_io call back function, which will do the conversion
3654 * when async direct IO completed.
3655 *
3656 * If the O_DIRECT write will extend the file then add this inode to the
3657 * orphan list. So recovery will truncate it back to the original size
3658 * if the machine crashes during the write.
3659 *
3660 */
3661static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
3662{
3663 struct file *file = iocb->ki_filp;
3664 struct inode *inode = file->f_mapping->host;
3665 struct ext4_inode_info *ei = EXT4_I(inode);
3666 ssize_t ret;
3667 loff_t offset = iocb->ki_pos;
3668 size_t count = iov_iter_count(iter);
3669 int overwrite = 0;
3670 get_block_t *get_block_func = NULL;
3671 int dio_flags = 0;
3672 loff_t final_size = offset + count;
3673 int orphan = 0;
3674 handle_t *handle;
3436 if (ret < 0)
3437 return ret;
3675
3438
3676 if (final_size > inode->i_size || final_size > ei->i_disksize) {
3677 /* Credits for sb + inode write */
3678 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3679 if (IS_ERR(handle)) {
3680 ret = PTR_ERR(handle);
3681 goto out;
3682 }
3683 ret = ext4_orphan_add(handle, inode);
3684 if (ret) {
3685 ext4_journal_stop(handle);
3686 goto out;
3687 }
3688 orphan = 1;
3689 ext4_update_i_disksize(inode, inode->i_size);
3690 ext4_journal_stop(handle);
3691 }
3439 ext4_set_iomap(inode, iomap, &map, offset, length);
3692
3440
3693 BUG_ON(iocb->private == NULL);
3441 return 0;
3442}
3694
3443
3444static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
3445 ssize_t written, unsigned flags, struct iomap *iomap)
3446{
3695 /*
3447 /*
3696 * Make all waiters for direct IO properly wait also for extent
3697 * conversion. This also disallows race between truncate() and
3698 * overwrite DIO as i_dio_count needs to be incremented under i_mutex.
3448 * Check to see whether an error occurred while writing out the data to
3449 * the allocated blocks. If so, return the magic error code so that we
3450 * fallback to buffered I/O and attempt to complete the remainder of
3451 * the I/O. Any blocks that may have been allocated in preparation for
3452 * the direct I/O will be reused during buffered I/O.
3699 */
3453 */
3700 inode_dio_begin(inode);
3454 if (flags & (IOMAP_WRITE | IOMAP_DIRECT) && written == 0)
3455 return -ENOTBLK;
3701
3456
3702 /* If we do a overwrite dio, i_mutex locking can be released */
3703 overwrite = *((int *)iocb->private);
3457 return 0;
3458}
3704
3459
3705 if (overwrite)
3706 inode_unlock(inode);
3460const struct iomap_ops ext4_iomap_ops = {
3461 .iomap_begin = ext4_iomap_begin,
3462 .iomap_end = ext4_iomap_end,
3463};
3707
3464
3708 /*
3709 * For extent mapped files we could direct write to holes and fallocate.
3710 *
3711 * Allocated blocks to fill the hole are marked as unwritten to prevent
3712 * parallel buffered read to expose the stale data before DIO complete
3713 * the data IO.
3714 *
3715 * As to previously fallocated extents, ext4 get_block will just simply
3716 * mark the buffer mapped but still keep the extents unwritten.
3717 *
3718 * For non AIO case, we will convert those unwritten extents to written
3719 * after return back from blockdev_direct_IO. That way we save us from
3720 * allocating io_end structure and also the overhead of offloading
3721 * the extent convertion to a workqueue.
3722 *
3723 * For async DIO, the conversion needs to be deferred when the
3724 * IO is completed. The ext4 end_io callback function will be
3725 * called to take care of the conversion work. Here for async
3726 * case, we allocate an io_end structure to hook to the iocb.
3727 */
3728 iocb->private = NULL;
3729 if (overwrite)
3730 get_block_func = ext4_dio_get_block_overwrite;
3731 else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
3732 round_down(offset, i_blocksize(inode)) >= inode->i_size) {
3733 get_block_func = ext4_dio_get_block;
3734 dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
3735 } else if (is_sync_kiocb(iocb)) {
3736 get_block_func = ext4_dio_get_block_unwritten_sync;
3737 dio_flags = DIO_LOCKING;
3738 } else {
3739 get_block_func = ext4_dio_get_block_unwritten_async;
3740 dio_flags = DIO_LOCKING;
3741 }
3742 ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
3743 get_block_func, ext4_end_io_dio, NULL,
3744 dio_flags);
3465static bool ext4_iomap_is_delalloc(struct inode *inode,
3466 struct ext4_map_blocks *map)
3467{
3468 struct extent_status es;
3469 ext4_lblk_t offset = 0, end = map->m_lblk + map->m_len - 1;
3745
3470
3746 if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3747 EXT4_STATE_DIO_UNWRITTEN)) {
3748 int err;
3749 /*
3750 * for non AIO case, since the IO is already
3751 * completed, we could do the conversion right here
3752 */
3753 err = ext4_convert_unwritten_extents(NULL, inode,
3754 offset, ret);
3755 if (err < 0)
3756 ret = err;
3757 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3758 }
3471 ext4_es_find_extent_range(inode, &ext4_es_is_delayed,
3472 map->m_lblk, end, &es);
3759
3473
3760 inode_dio_end(inode);
3761 /* take i_mutex locking again if we do a ovewrite dio */
3762 if (overwrite)
3763 inode_lock(inode);
3474 if (!es.es_len || es.es_lblk > end)
3475 return false;
3764
3476
3765 if (ret < 0 && final_size > inode->i_size)
3766 ext4_truncate_failed_write(inode);
3767
3768 /* Handle extending of i_size after direct IO write */
3769 if (orphan) {
3770 int err;
3771
3772 /* Credits for sb + inode write */
3773 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3774 if (IS_ERR(handle)) {
3775 /*
3776 * We wrote the data but cannot extend
3777 * i_size. Bail out. In async io case, we do
3778 * not return error here because we have
3779 * already submmitted the corresponding
3780 * bio. Returning error here makes the caller
3781 * think that this IO is done and failed
3782 * resulting in race with bio's completion
3783 * handler.
3784 */
3785 if (!ret)
3786 ret = PTR_ERR(handle);
3787 if (inode->i_nlink)
3788 ext4_orphan_del(NULL, inode);
3789
3790 goto out;
3791 }
3792 if (inode->i_nlink)
3793 ext4_orphan_del(handle, inode);
3794 if (ret > 0) {
3795 loff_t end = offset + ret;
3796 if (end > inode->i_size || end > ei->i_disksize) {
3797 ext4_update_i_disksize(inode, end);
3798 if (end > inode->i_size)
3799 i_size_write(inode, end);
3800 /*
3801 * We're going to return a positive `ret'
3802 * here due to non-zero-length I/O, so there's
3803 * no way of reporting error returns from
3804 * ext4_mark_inode_dirty() to userspace. So
3805 * ignore it.
3806 */
3807 ext4_mark_inode_dirty(handle, inode);
3808 }
3809 }
3810 err = ext4_journal_stop(handle);
3811 if (ret == 0)
3812 ret = err;
3477 if (es.es_lblk > map->m_lblk) {
3478 map->m_len = es.es_lblk - map->m_lblk;
3479 return false;
3813 }
3480 }
3814out:
3815 return ret;
3816}
3817
3481
3818static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
3819{
3820 struct address_space *mapping = iocb->ki_filp->f_mapping;
3821 struct inode *inode = mapping->host;
3822 size_t count = iov_iter_count(iter);
3823 ssize_t ret;
3482 offset = map->m_lblk - es.es_lblk;
3483 map->m_len = es.es_len - offset;
3824
3484
3825 /*
3826 * Shared inode_lock is enough for us - it protects against concurrent
3827 * writes & truncates and since we take care of writing back page cache,
3828 * we are protected against page writeback as well.
3829 */
3830 inode_lock_shared(inode);
3831 ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
3832 iocb->ki_pos + count - 1);
3833 if (ret)
3834 goto out_unlock;
3835 ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
3836 iter, ext4_dio_get_block, NULL, NULL, 0);
3837out_unlock:
3838 inode_unlock_shared(inode);
3839 return ret;
3485 return true;
3840}
3841
3486}
3487
3842static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
3488static int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
3489 loff_t length, unsigned int flags,
3490 struct iomap *iomap, struct iomap *srcmap)
3843{
3491{
3844 struct file *file = iocb->ki_filp;
3845 struct inode *inode = file->f_mapping->host;
3846 size_t count = iov_iter_count(iter);
3847 loff_t offset = iocb->ki_pos;
3848 ssize_t ret;
3492 int ret;
3493 bool delalloc = false;
3494 struct ext4_map_blocks map;
3495 u8 blkbits = inode->i_blkbits;
3849
3496
3850#ifdef CONFIG_FS_ENCRYPTION
3851 if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
3852 return 0;
3853#endif
3854 if (fsverity_active(inode))
3855 return 0;
3497 if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK)
3498 return -EINVAL;
3856
3499
3500 if (ext4_has_inline_data(inode)) {
3501 ret = ext4_inline_data_iomap(inode, iomap);
3502 if (ret != -EAGAIN) {
3503 if (ret == 0 && offset >= iomap->length)
3504 ret = -ENOENT;
3505 return ret;
3506 }
3507 }
3508
3857 /*
3509 /*
3858 * If we are doing data journalling we don't support O_DIRECT
3510 * Calculate the first and last logical block respectively.
3859 */
3511 */
3860 if (ext4_should_journal_data(inode))
3861 return 0;
3512 map.m_lblk = offset >> blkbits;
3513 map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits,
3514 EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1;
3862
3515
3863 /* Let buffer I/O handle the inline data case. */
3864 if (ext4_has_inline_data(inode))
3865 return 0;
3516 ret = ext4_map_blocks(NULL, inode, &map, 0);
3517 if (ret < 0)
3518 return ret;
3519 if (ret == 0)
3520 delalloc = ext4_iomap_is_delalloc(inode, &map);
3866
3521
3867 trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
3868 if (iov_iter_rw(iter) == READ)
3869 ret = ext4_direct_IO_read(iocb, iter);
3870 else
3871 ret = ext4_direct_IO_write(iocb, iter);
3872 trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
3873 return ret;
3522 ext4_set_iomap(inode, iomap, &map, offset, length);
3523 if (delalloc && iomap->type == IOMAP_HOLE)
3524 iomap->type = IOMAP_DELALLOC;
3525
3526 return 0;
3874}
3875
3527}
3528
3529const struct iomap_ops ext4_iomap_report_ops = {
3530 .iomap_begin = ext4_iomap_begin_report,
3531};
3532
3876/*
3877 * Pages can be marked dirty completely asynchronously from ext4's journalling
3878 * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do
3879 * much here because ->set_page_dirty is called under VFS locks. The page is
3880 * not necessarily locked.
3881 *
3882 * We cannot just dirty the page and leave attached buffers clean, because the
3883 * buffers' dirty state is "definitive". We cannot just set the buffers dirty

--- 21 unchanged lines hidden (view full) ---

3905 .writepage = ext4_writepage,
3906 .writepages = ext4_writepages,
3907 .write_begin = ext4_write_begin,
3908 .write_end = ext4_write_end,
3909 .set_page_dirty = ext4_set_page_dirty,
3910 .bmap = ext4_bmap,
3911 .invalidatepage = ext4_invalidatepage,
3912 .releasepage = ext4_releasepage,
3533/*
3534 * Pages can be marked dirty completely asynchronously from ext4's journalling
3535 * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do
3536 * much here because ->set_page_dirty is called under VFS locks. The page is
3537 * not necessarily locked.
3538 *
3539 * We cannot just dirty the page and leave attached buffers clean, because the
3540 * buffers' dirty state is "definitive". We cannot just set the buffers dirty

--- 21 unchanged lines hidden (view full) ---

3562 .writepage = ext4_writepage,
3563 .writepages = ext4_writepages,
3564 .write_begin = ext4_write_begin,
3565 .write_end = ext4_write_end,
3566 .set_page_dirty = ext4_set_page_dirty,
3567 .bmap = ext4_bmap,
3568 .invalidatepage = ext4_invalidatepage,
3569 .releasepage = ext4_releasepage,
3913 .direct_IO = ext4_direct_IO,
3570 .direct_IO = noop_direct_IO,
3914 .migratepage = buffer_migrate_page,
3915 .is_partially_uptodate = block_is_partially_uptodate,
3916 .error_remove_page = generic_error_remove_page,
3917};
3918
3919static const struct address_space_operations ext4_journalled_aops = {
3920 .readpage = ext4_readpage,
3921 .readpages = ext4_readpages,
3922 .writepage = ext4_writepage,
3923 .writepages = ext4_writepages,
3924 .write_begin = ext4_write_begin,
3925 .write_end = ext4_journalled_write_end,
3926 .set_page_dirty = ext4_journalled_set_page_dirty,
3927 .bmap = ext4_bmap,
3928 .invalidatepage = ext4_journalled_invalidatepage,
3929 .releasepage = ext4_releasepage,
3571 .migratepage = buffer_migrate_page,
3572 .is_partially_uptodate = block_is_partially_uptodate,
3573 .error_remove_page = generic_error_remove_page,
3574};
3575
3576static const struct address_space_operations ext4_journalled_aops = {
3577 .readpage = ext4_readpage,
3578 .readpages = ext4_readpages,
3579 .writepage = ext4_writepage,
3580 .writepages = ext4_writepages,
3581 .write_begin = ext4_write_begin,
3582 .write_end = ext4_journalled_write_end,
3583 .set_page_dirty = ext4_journalled_set_page_dirty,
3584 .bmap = ext4_bmap,
3585 .invalidatepage = ext4_journalled_invalidatepage,
3586 .releasepage = ext4_releasepage,
3930 .direct_IO = ext4_direct_IO,
3587 .direct_IO = noop_direct_IO,
3931 .is_partially_uptodate = block_is_partially_uptodate,
3932 .error_remove_page = generic_error_remove_page,
3933};
3934
3935static const struct address_space_operations ext4_da_aops = {
3936 .readpage = ext4_readpage,
3937 .readpages = ext4_readpages,
3938 .writepage = ext4_writepage,
3939 .writepages = ext4_writepages,
3940 .write_begin = ext4_da_write_begin,
3941 .write_end = ext4_da_write_end,
3942 .set_page_dirty = ext4_set_page_dirty,
3943 .bmap = ext4_bmap,
3944 .invalidatepage = ext4_invalidatepage,
3945 .releasepage = ext4_releasepage,
3588 .is_partially_uptodate = block_is_partially_uptodate,
3589 .error_remove_page = generic_error_remove_page,
3590};
3591
3592static const struct address_space_operations ext4_da_aops = {
3593 .readpage = ext4_readpage,
3594 .readpages = ext4_readpages,
3595 .writepage = ext4_writepage,
3596 .writepages = ext4_writepages,
3597 .write_begin = ext4_da_write_begin,
3598 .write_end = ext4_da_write_end,
3599 .set_page_dirty = ext4_set_page_dirty,
3600 .bmap = ext4_bmap,
3601 .invalidatepage = ext4_invalidatepage,
3602 .releasepage = ext4_releasepage,
3946 .direct_IO = ext4_direct_IO,
3603 .direct_IO = noop_direct_IO,
3947 .migratepage = buffer_migrate_page,
3948 .is_partially_uptodate = block_is_partially_uptodate,
3949 .error_remove_page = generic_error_remove_page,
3950};
3951
3952static const struct address_space_operations ext4_dax_aops = {
3953 .writepages = ext4_dax_writepages,
3954 .direct_IO = noop_direct_IO,

--- 1952 unchanged lines hidden (view full) ---

5907
5908static int __ext4_expand_extra_isize(struct inode *inode,
5909 unsigned int new_extra_isize,
5910 struct ext4_iloc *iloc,
5911 handle_t *handle, int *no_expand)
5912{
5913 struct ext4_inode *raw_inode;
5914 struct ext4_xattr_ibody_header *header;
3604 .migratepage = buffer_migrate_page,
3605 .is_partially_uptodate = block_is_partially_uptodate,
3606 .error_remove_page = generic_error_remove_page,
3607};
3608
3609static const struct address_space_operations ext4_dax_aops = {
3610 .writepages = ext4_dax_writepages,
3611 .direct_IO = noop_direct_IO,

--- 1952 unchanged lines hidden (view full) ---

5564
5565static int __ext4_expand_extra_isize(struct inode *inode,
5566 unsigned int new_extra_isize,
5567 struct ext4_iloc *iloc,
5568 handle_t *handle, int *no_expand)
5569{
5570 struct ext4_inode *raw_inode;
5571 struct ext4_xattr_ibody_header *header;
5572 unsigned int inode_size = EXT4_INODE_SIZE(inode->i_sb);
5573 struct ext4_inode_info *ei = EXT4_I(inode);
5915 int error;
5916
5574 int error;
5575
5576 /* this was checked at iget time, but double check for good measure */
5577 if ((EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > inode_size) ||
5578 (ei->i_extra_isize & 3)) {
5579 EXT4_ERROR_INODE(inode, "bad extra_isize %u (inode size %u)",
5580 ei->i_extra_isize,
5581 EXT4_INODE_SIZE(inode->i_sb));
5582 return -EFSCORRUPTED;
5583 }
5584 if ((new_extra_isize < ei->i_extra_isize) ||
5585 (new_extra_isize < 4) ||
5586 (new_extra_isize > inode_size - EXT4_GOOD_OLD_INODE_SIZE))
5587 return -EINVAL; /* Should never happen */
5588
5917 raw_inode = ext4_raw_inode(iloc);
5918
5919 header = IHDR(inode, raw_inode);
5920
5921 /* No extended attributes present */
5922 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
5923 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
5924 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +

--- 35 unchanged lines hidden (view full) ---

5960 * In nojournal mode, we can immediately attempt to expand
5961 * the inode. When journaled, we first need to obtain extra
5962 * buffer credits since we may write into the EA block
5963 * with this same handle. If journal_extend fails, then it will
5964 * only result in a minor loss of functionality for that inode.
5965 * If this is felt to be critical, then e2fsck should be run to
5966 * force a large enough s_min_extra_isize.
5967 */
5589 raw_inode = ext4_raw_inode(iloc);
5590
5591 header = IHDR(inode, raw_inode);
5592
5593 /* No extended attributes present */
5594 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
5595 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
5596 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +

--- 35 unchanged lines hidden (view full) ---

5632 * In nojournal mode, we can immediately attempt to expand
5633 * the inode. When journaled, we first need to obtain extra
5634 * buffer credits since we may write into the EA block
5635 * with this same handle. If journal_extend fails, then it will
5636 * only result in a minor loss of functionality for that inode.
5637 * If this is felt to be critical, then e2fsck should be run to
5638 * force a large enough s_min_extra_isize.
5639 */
5968 if (ext4_handle_valid(handle) &&
5969 jbd2_journal_extend(handle,
5970 EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) != 0)
5640 if (ext4_journal_extend(handle,
5641 EXT4_DATA_TRANS_BLOCKS(inode->i_sb), 0) != 0)
5971 return -ENOSPC;
5972
5973 if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
5974 return -EBUSY;
5975
5976 error = __ext4_expand_extra_isize(inode, new_extra_isize, &iloc,
5977 handle, &no_expand);
5978 ext4_write_unlock_xattr(inode, &no_expand);

--- 318 unchanged lines hidden ---
5642 return -ENOSPC;
5643
5644 if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
5645 return -EBUSY;
5646
5647 error = __ext4_expand_extra_isize(inode, new_extra_isize, &iloc,
5648 handle, &no_expand);
5649 ext4_write_unlock_xattr(inode, &no_expand);

--- 318 unchanged lines hidden ---