xref: /openbmc/linux/fs/erofs/data.c (revision 06252e9c)
147e4937aSGao Xiang // SPDX-License-Identifier: GPL-2.0-only
247e4937aSGao Xiang /*
347e4937aSGao Xiang  * Copyright (C) 2017-2018 HUAWEI, Inc.
4592e7cd0SAlexander A. Klimov  *             https://www.huawei.com/
547e4937aSGao Xiang  */
647e4937aSGao Xiang #include "internal.h"
747e4937aSGao Xiang #include <linux/prefetch.h>
8a08e67a0SHuang Jianan #include <linux/iomap.h>
9*06252e9cSGao Xiang #include <linux/dax.h>
1047e4937aSGao Xiang #include <trace/events/erofs.h>
1147e4937aSGao Xiang 
1299634bf3SGao Xiang static void erofs_readendio(struct bio *bio)
1347e4937aSGao Xiang {
1447e4937aSGao Xiang 	struct bio_vec *bvec;
1547e4937aSGao Xiang 	blk_status_t err = bio->bi_status;
1647e4937aSGao Xiang 	struct bvec_iter_all iter_all;
1747e4937aSGao Xiang 
1847e4937aSGao Xiang 	bio_for_each_segment_all(bvec, bio, iter_all) {
1947e4937aSGao Xiang 		struct page *page = bvec->bv_page;
2047e4937aSGao Xiang 
2147e4937aSGao Xiang 		/* page is already locked */
2247e4937aSGao Xiang 		DBG_BUGON(PageUptodate(page));
2347e4937aSGao Xiang 
248d8a09b0SGao Xiang 		if (err)
2547e4937aSGao Xiang 			SetPageError(page);
2647e4937aSGao Xiang 		else
2747e4937aSGao Xiang 			SetPageUptodate(page);
2847e4937aSGao Xiang 
2947e4937aSGao Xiang 		unlock_page(page);
3047e4937aSGao Xiang 		/* page could be reclaimed now */
3147e4937aSGao Xiang 	}
3247e4937aSGao Xiang 	bio_put(bio);
3347e4937aSGao Xiang }
3447e4937aSGao Xiang 
35e655b5b3SGao Xiang struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr)
3647e4937aSGao Xiang {
3755252ab7SGao Xiang 	struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping;
3855252ab7SGao Xiang 	struct page *page;
3947e4937aSGao Xiang 
4055252ab7SGao Xiang 	page = read_cache_page_gfp(mapping, blkaddr,
41618f40eaSGao Xiang 				   mapping_gfp_constraint(mapping, ~__GFP_FS));
4255252ab7SGao Xiang 	/* should already be PageUptodate */
4355252ab7SGao Xiang 	if (!IS_ERR(page))
4455252ab7SGao Xiang 		lock_page(page);
4555252ab7SGao Xiang 	return page;
4647e4937aSGao Xiang }
4747e4937aSGao Xiang 
4847e4937aSGao Xiang static int erofs_map_blocks_flatmode(struct inode *inode,
4947e4937aSGao Xiang 				     struct erofs_map_blocks *map,
5047e4937aSGao Xiang 				     int flags)
5147e4937aSGao Xiang {
5247e4937aSGao Xiang 	int err = 0;
5347e4937aSGao Xiang 	erofs_blk_t nblocks, lastblk;
5447e4937aSGao Xiang 	u64 offset = map->m_la;
55a5876e24SGao Xiang 	struct erofs_inode *vi = EROFS_I(inode);
568a765682SGao Xiang 	bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
5747e4937aSGao Xiang 
5847e4937aSGao Xiang 	trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
5947e4937aSGao Xiang 
6047e4937aSGao Xiang 	nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
618a765682SGao Xiang 	lastblk = nblocks - tailendpacking;
6247e4937aSGao Xiang 
638d8a09b0SGao Xiang 	if (offset >= inode->i_size) {
6447e4937aSGao Xiang 		/* leave out-of-bound access unmapped */
6547e4937aSGao Xiang 		map->m_flags = 0;
6647e4937aSGao Xiang 		map->m_plen = 0;
6747e4937aSGao Xiang 		goto out;
6847e4937aSGao Xiang 	}
6947e4937aSGao Xiang 
7047e4937aSGao Xiang 	/* there is no hole in flatmode */
7147e4937aSGao Xiang 	map->m_flags = EROFS_MAP_MAPPED;
7247e4937aSGao Xiang 
7347e4937aSGao Xiang 	if (offset < blknr_to_addr(lastblk)) {
7447e4937aSGao Xiang 		map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
7547e4937aSGao Xiang 		map->m_plen = blknr_to_addr(lastblk) - offset;
768a765682SGao Xiang 	} else if (tailendpacking) {
7747e4937aSGao Xiang 		/* 2 - inode inline B: inode, [xattrs], inline last blk... */
7847e4937aSGao Xiang 		struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
7947e4937aSGao Xiang 
8047e4937aSGao Xiang 		map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize +
8147e4937aSGao Xiang 			vi->xattr_isize + erofs_blkoff(map->m_la);
8247e4937aSGao Xiang 		map->m_plen = inode->i_size - offset;
8347e4937aSGao Xiang 
8447e4937aSGao Xiang 		/* inline data should be located in one meta block */
8547e4937aSGao Xiang 		if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) {
864f761fa2SGao Xiang 			erofs_err(inode->i_sb,
874f761fa2SGao Xiang 				  "inline data cross block boundary @ nid %llu",
8847e4937aSGao Xiang 				  vi->nid);
8947e4937aSGao Xiang 			DBG_BUGON(1);
9047e4937aSGao Xiang 			err = -EFSCORRUPTED;
9147e4937aSGao Xiang 			goto err_out;
9247e4937aSGao Xiang 		}
9347e4937aSGao Xiang 
9447e4937aSGao Xiang 		map->m_flags |= EROFS_MAP_META;
9547e4937aSGao Xiang 	} else {
964f761fa2SGao Xiang 		erofs_err(inode->i_sb,
974f761fa2SGao Xiang 			  "internal error @ nid: %llu (size %llu), m_la 0x%llx",
9847e4937aSGao Xiang 			  vi->nid, inode->i_size, map->m_la);
9947e4937aSGao Xiang 		DBG_BUGON(1);
10047e4937aSGao Xiang 		err = -EIO;
10147e4937aSGao Xiang 		goto err_out;
10247e4937aSGao Xiang 	}
10347e4937aSGao Xiang 
10447e4937aSGao Xiang out:
10547e4937aSGao Xiang 	map->m_llen = map->m_plen;
10647e4937aSGao Xiang 
10747e4937aSGao Xiang err_out:
10847e4937aSGao Xiang 	trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
10947e4937aSGao Xiang 	return err;
11047e4937aSGao Xiang }
11147e4937aSGao Xiang 
11247e4937aSGao Xiang static inline struct bio *erofs_read_raw_page(struct bio *bio,
11347e4937aSGao Xiang 					      struct address_space *mapping,
11447e4937aSGao Xiang 					      struct page *page,
11547e4937aSGao Xiang 					      erofs_off_t *last_block,
11647e4937aSGao Xiang 					      unsigned int nblocks,
1179f377622SGao Xiang 					      unsigned int *eblks,
11847e4937aSGao Xiang 					      bool ra)
11947e4937aSGao Xiang {
12047e4937aSGao Xiang 	struct inode *const inode = mapping->host;
12147e4937aSGao Xiang 	struct super_block *const sb = inode->i_sb;
12247e4937aSGao Xiang 	erofs_off_t current_block = (erofs_off_t)page->index;
12347e4937aSGao Xiang 	int err;
12447e4937aSGao Xiang 
12547e4937aSGao Xiang 	DBG_BUGON(!nblocks);
12647e4937aSGao Xiang 
12747e4937aSGao Xiang 	if (PageUptodate(page)) {
12847e4937aSGao Xiang 		err = 0;
12947e4937aSGao Xiang 		goto has_updated;
13047e4937aSGao Xiang 	}
13147e4937aSGao Xiang 
13247e4937aSGao Xiang 	/* note that for readpage case, bio also equals to NULL */
13347e4937aSGao Xiang 	if (bio &&
1349f377622SGao Xiang 	    (*last_block + 1 != current_block || !*eblks)) {
13547e4937aSGao Xiang submit_bio_retry:
13694e4e153SGao Xiang 		submit_bio(bio);
13747e4937aSGao Xiang 		bio = NULL;
13847e4937aSGao Xiang 	}
13947e4937aSGao Xiang 
14047e4937aSGao Xiang 	if (!bio) {
14147e4937aSGao Xiang 		struct erofs_map_blocks map = {
14247e4937aSGao Xiang 			.m_la = blknr_to_addr(current_block),
14347e4937aSGao Xiang 		};
14447e4937aSGao Xiang 		erofs_blk_t blknr;
14547e4937aSGao Xiang 		unsigned int blkoff;
14647e4937aSGao Xiang 
1478137824eSYue Hu 		err = erofs_map_blocks_flatmode(inode, &map, EROFS_GET_BLOCKS_RAW);
1488d8a09b0SGao Xiang 		if (err)
14947e4937aSGao Xiang 			goto err_out;
15047e4937aSGao Xiang 
15147e4937aSGao Xiang 		/* zero out the holed page */
1528d8a09b0SGao Xiang 		if (!(map.m_flags & EROFS_MAP_MAPPED)) {
15347e4937aSGao Xiang 			zero_user_segment(page, 0, PAGE_SIZE);
15447e4937aSGao Xiang 			SetPageUptodate(page);
15547e4937aSGao Xiang 
15647e4937aSGao Xiang 			/* imply err = 0, see erofs_map_blocks */
15747e4937aSGao Xiang 			goto has_updated;
15847e4937aSGao Xiang 		}
15947e4937aSGao Xiang 
16047e4937aSGao Xiang 		/* for RAW access mode, m_plen must be equal to m_llen */
16147e4937aSGao Xiang 		DBG_BUGON(map.m_plen != map.m_llen);
16247e4937aSGao Xiang 
16347e4937aSGao Xiang 		blknr = erofs_blknr(map.m_pa);
16447e4937aSGao Xiang 		blkoff = erofs_blkoff(map.m_pa);
16547e4937aSGao Xiang 
16647e4937aSGao Xiang 		/* deal with inline page */
16747e4937aSGao Xiang 		if (map.m_flags & EROFS_MAP_META) {
16847e4937aSGao Xiang 			void *vsrc, *vto;
16947e4937aSGao Xiang 			struct page *ipage;
17047e4937aSGao Xiang 
17147e4937aSGao Xiang 			DBG_BUGON(map.m_plen > PAGE_SIZE);
17247e4937aSGao Xiang 
173e655b5b3SGao Xiang 			ipage = erofs_get_meta_page(inode->i_sb, blknr);
17447e4937aSGao Xiang 
17547e4937aSGao Xiang 			if (IS_ERR(ipage)) {
17647e4937aSGao Xiang 				err = PTR_ERR(ipage);
17747e4937aSGao Xiang 				goto err_out;
17847e4937aSGao Xiang 			}
17947e4937aSGao Xiang 
18047e4937aSGao Xiang 			vsrc = kmap_atomic(ipage);
18147e4937aSGao Xiang 			vto = kmap_atomic(page);
18247e4937aSGao Xiang 			memcpy(vto, vsrc + blkoff, map.m_plen);
18347e4937aSGao Xiang 			memset(vto + map.m_plen, 0, PAGE_SIZE - map.m_plen);
18447e4937aSGao Xiang 			kunmap_atomic(vto);
18547e4937aSGao Xiang 			kunmap_atomic(vsrc);
18647e4937aSGao Xiang 			flush_dcache_page(page);
18747e4937aSGao Xiang 
18847e4937aSGao Xiang 			SetPageUptodate(page);
18947e4937aSGao Xiang 			/* TODO: could we unlock the page earlier? */
19047e4937aSGao Xiang 			unlock_page(ipage);
19147e4937aSGao Xiang 			put_page(ipage);
19247e4937aSGao Xiang 
19347e4937aSGao Xiang 			/* imply err = 0, see erofs_map_blocks */
19447e4937aSGao Xiang 			goto has_updated;
19547e4937aSGao Xiang 		}
19647e4937aSGao Xiang 
19747e4937aSGao Xiang 		/* pa must be block-aligned for raw reading */
19847e4937aSGao Xiang 		DBG_BUGON(erofs_blkoff(map.m_pa));
19947e4937aSGao Xiang 
20047e4937aSGao Xiang 		/* max # of continuous pages */
20147e4937aSGao Xiang 		if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
20247e4937aSGao Xiang 			nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);
20347e4937aSGao Xiang 
2049f377622SGao Xiang 		*eblks = bio_max_segs(nblocks);
2059f377622SGao Xiang 		bio = bio_alloc(GFP_NOIO, *eblks);
206618f40eaSGao Xiang 
207618f40eaSGao Xiang 		bio->bi_end_io = erofs_readendio;
208618f40eaSGao Xiang 		bio_set_dev(bio, sb->s_bdev);
209618f40eaSGao Xiang 		bio->bi_iter.bi_sector = (sector_t)blknr <<
210618f40eaSGao Xiang 			LOG_SECTORS_PER_BLOCK;
2116ea5aad3SGao Xiang 		bio->bi_opf = REQ_OP_READ | (ra ? REQ_RAHEAD : 0);
21247e4937aSGao Xiang 	}
21347e4937aSGao Xiang 
21447e4937aSGao Xiang 	err = bio_add_page(bio, page, PAGE_SIZE, 0);
21547e4937aSGao Xiang 	/* out of the extent or bio is full */
21647e4937aSGao Xiang 	if (err < PAGE_SIZE)
21747e4937aSGao Xiang 		goto submit_bio_retry;
2189f377622SGao Xiang 	--*eblks;
21947e4937aSGao Xiang 	*last_block = current_block;
22047e4937aSGao Xiang 	return bio;
22147e4937aSGao Xiang 
22247e4937aSGao Xiang err_out:
22347e4937aSGao Xiang 	/* for sync reading, set page error immediately */
22447e4937aSGao Xiang 	if (!ra) {
22547e4937aSGao Xiang 		SetPageError(page);
22647e4937aSGao Xiang 		ClearPageUptodate(page);
22747e4937aSGao Xiang 	}
22847e4937aSGao Xiang has_updated:
22947e4937aSGao Xiang 	unlock_page(page);
23047e4937aSGao Xiang 
23147e4937aSGao Xiang 	/* if updated manually, continuous pages has a gap */
23247e4937aSGao Xiang 	if (bio)
23394e4e153SGao Xiang 		submit_bio(bio);
2348d8a09b0SGao Xiang 	return err ? ERR_PTR(err) : NULL;
23547e4937aSGao Xiang }
23647e4937aSGao Xiang 
23747e4937aSGao Xiang /*
23847e4937aSGao Xiang  * since we dont have write or truncate flows, so no inode
23947e4937aSGao Xiang  * locking needs to be held at the moment.
24047e4937aSGao Xiang  */
24147e4937aSGao Xiang static int erofs_raw_access_readpage(struct file *file, struct page *page)
24247e4937aSGao Xiang {
2433f649ab7SKees Cook 	erofs_off_t last_block;
2449f377622SGao Xiang 	unsigned int eblks;
24547e4937aSGao Xiang 	struct bio *bio;
24647e4937aSGao Xiang 
24747e4937aSGao Xiang 	trace_erofs_readpage(page, true);
24847e4937aSGao Xiang 
24947e4937aSGao Xiang 	bio = erofs_read_raw_page(NULL, page->mapping,
2509f377622SGao Xiang 				  page, &last_block, 1, &eblks, false);
25147e4937aSGao Xiang 
25247e4937aSGao Xiang 	if (IS_ERR(bio))
25347e4937aSGao Xiang 		return PTR_ERR(bio);
25447e4937aSGao Xiang 
2559f377622SGao Xiang 	if (bio)
2569f377622SGao Xiang 		submit_bio(bio);
25747e4937aSGao Xiang 	return 0;
25847e4937aSGao Xiang }
25947e4937aSGao Xiang 
2600c07a9f9SMatthew Wilcox (Oracle) static void erofs_raw_access_readahead(struct readahead_control *rac)
26147e4937aSGao Xiang {
2623f649ab7SKees Cook 	erofs_off_t last_block;
2639f377622SGao Xiang 	unsigned int eblks;
26447e4937aSGao Xiang 	struct bio *bio = NULL;
2650c07a9f9SMatthew Wilcox (Oracle) 	struct page *page;
26647e4937aSGao Xiang 
2670c07a9f9SMatthew Wilcox (Oracle) 	trace_erofs_readpages(rac->mapping->host, readahead_index(rac),
2680c07a9f9SMatthew Wilcox (Oracle) 			readahead_count(rac), true);
26947e4937aSGao Xiang 
2700c07a9f9SMatthew Wilcox (Oracle) 	while ((page = readahead_page(rac))) {
27147e4937aSGao Xiang 		prefetchw(&page->flags);
27247e4937aSGao Xiang 
2730c07a9f9SMatthew Wilcox (Oracle) 		bio = erofs_read_raw_page(bio, rac->mapping, page, &last_block,
2749f377622SGao Xiang 				readahead_count(rac), &eblks, true);
27547e4937aSGao Xiang 
27647e4937aSGao Xiang 		/* all the page errors are ignored when readahead */
27747e4937aSGao Xiang 		if (IS_ERR(bio)) {
27847e4937aSGao Xiang 			pr_err("%s, readahead error at page %lu of nid %llu\n",
27947e4937aSGao Xiang 			       __func__, page->index,
2800c07a9f9SMatthew Wilcox (Oracle) 			       EROFS_I(rac->mapping->host)->nid);
28147e4937aSGao Xiang 
28247e4937aSGao Xiang 			bio = NULL;
28347e4937aSGao Xiang 		}
28447e4937aSGao Xiang 
28547e4937aSGao Xiang 		put_page(page);
28647e4937aSGao Xiang 	}
28747e4937aSGao Xiang 
2888d8a09b0SGao Xiang 	if (bio)
28994e4e153SGao Xiang 		submit_bio(bio);
29047e4937aSGao Xiang }
29147e4937aSGao Xiang 
29247e4937aSGao Xiang static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
29347e4937aSGao Xiang {
29447e4937aSGao Xiang 	struct inode *inode = mapping->host;
295d8b3df8bSHuang Jianan 	struct erofs_map_blocks map = {
296d8b3df8bSHuang Jianan 		.m_la = blknr_to_addr(block),
297d8b3df8bSHuang Jianan 	};
29847e4937aSGao Xiang 
299a5876e24SGao Xiang 	if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) {
30047e4937aSGao Xiang 		erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
30147e4937aSGao Xiang 
30247e4937aSGao Xiang 		if (block >> LOG_SECTORS_PER_BLOCK >= blks)
30347e4937aSGao Xiang 			return 0;
30447e4937aSGao Xiang 	}
30547e4937aSGao Xiang 
3068137824eSYue Hu 	if (!erofs_map_blocks_flatmode(inode, &map, EROFS_GET_BLOCKS_RAW))
307d8b3df8bSHuang Jianan 		return erofs_blknr(map.m_pa);
308d8b3df8bSHuang Jianan 
309d8b3df8bSHuang Jianan 	return 0;
31047e4937aSGao Xiang }
31147e4937aSGao Xiang 
312a08e67a0SHuang Jianan static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
313a08e67a0SHuang Jianan 		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
314a08e67a0SHuang Jianan {
315a08e67a0SHuang Jianan 	int ret;
316a08e67a0SHuang Jianan 	struct erofs_map_blocks map;
317a08e67a0SHuang Jianan 
318a08e67a0SHuang Jianan 	map.m_la = offset;
319a08e67a0SHuang Jianan 	map.m_llen = length;
320a08e67a0SHuang Jianan 
321a08e67a0SHuang Jianan 	ret = erofs_map_blocks_flatmode(inode, &map, EROFS_GET_BLOCKS_RAW);
322a08e67a0SHuang Jianan 	if (ret < 0)
323a08e67a0SHuang Jianan 		return ret;
324a08e67a0SHuang Jianan 
325a08e67a0SHuang Jianan 	iomap->bdev = inode->i_sb->s_bdev;
326*06252e9cSGao Xiang 	iomap->dax_dev = EROFS_I_SB(inode)->dax_dev;
327a08e67a0SHuang Jianan 	iomap->offset = map.m_la;
328a08e67a0SHuang Jianan 	iomap->length = map.m_llen;
329a08e67a0SHuang Jianan 	iomap->flags = 0;
330a08e67a0SHuang Jianan 
331a08e67a0SHuang Jianan 	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
332a08e67a0SHuang Jianan 		iomap->type = IOMAP_HOLE;
333a08e67a0SHuang Jianan 		iomap->addr = IOMAP_NULL_ADDR;
334a08e67a0SHuang Jianan 		if (!iomap->length)
335a08e67a0SHuang Jianan 			iomap->length = length;
336a08e67a0SHuang Jianan 		return 0;
337a08e67a0SHuang Jianan 	}
338a08e67a0SHuang Jianan 
339a08e67a0SHuang Jianan 	/* that shouldn't happen for now */
340a08e67a0SHuang Jianan 	if (map.m_flags & EROFS_MAP_META) {
341a08e67a0SHuang Jianan 		DBG_BUGON(1);
342a08e67a0SHuang Jianan 		return -ENOTBLK;
343a08e67a0SHuang Jianan 	}
344a08e67a0SHuang Jianan 	iomap->type = IOMAP_MAPPED;
345a08e67a0SHuang Jianan 	iomap->addr = map.m_pa;
346a08e67a0SHuang Jianan 	return 0;
347a08e67a0SHuang Jianan }
348a08e67a0SHuang Jianan 
349a08e67a0SHuang Jianan static const struct iomap_ops erofs_iomap_ops = {
350a08e67a0SHuang Jianan 	.iomap_begin = erofs_iomap_begin,
351a08e67a0SHuang Jianan };
352a08e67a0SHuang Jianan 
353a08e67a0SHuang Jianan static int erofs_prepare_dio(struct kiocb *iocb, struct iov_iter *to)
354a08e67a0SHuang Jianan {
355a08e67a0SHuang Jianan 	struct inode *inode = file_inode(iocb->ki_filp);
356a08e67a0SHuang Jianan 	loff_t align = iocb->ki_pos | iov_iter_count(to) |
357a08e67a0SHuang Jianan 		iov_iter_alignment(to);
358a08e67a0SHuang Jianan 	struct block_device *bdev = inode->i_sb->s_bdev;
359a08e67a0SHuang Jianan 	unsigned int blksize_mask;
360a08e67a0SHuang Jianan 
361a08e67a0SHuang Jianan 	if (bdev)
362a08e67a0SHuang Jianan 		blksize_mask = (1 << ilog2(bdev_logical_block_size(bdev))) - 1;
363a08e67a0SHuang Jianan 	else
364a08e67a0SHuang Jianan 		blksize_mask = (1 << inode->i_blkbits) - 1;
365a08e67a0SHuang Jianan 
366a08e67a0SHuang Jianan 	if (align & blksize_mask)
367a08e67a0SHuang Jianan 		return -EINVAL;
368a08e67a0SHuang Jianan 
369a08e67a0SHuang Jianan 	/*
370a08e67a0SHuang Jianan 	 * Temporarily fall back tail-packing inline to buffered I/O instead
371a08e67a0SHuang Jianan 	 * since tail-packing inline support relies on an iomap core update.
372a08e67a0SHuang Jianan 	 */
373a08e67a0SHuang Jianan 	if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE &&
374a08e67a0SHuang Jianan 	    iocb->ki_pos + iov_iter_count(to) >
375a08e67a0SHuang Jianan 			rounddown(inode->i_size, EROFS_BLKSIZ))
376a08e67a0SHuang Jianan 		return 1;
377a08e67a0SHuang Jianan 	return 0;
378a08e67a0SHuang Jianan }
379a08e67a0SHuang Jianan 
380a08e67a0SHuang Jianan static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
381a08e67a0SHuang Jianan {
382a08e67a0SHuang Jianan 	/* no need taking (shared) inode lock since it's a ro filesystem */
383a08e67a0SHuang Jianan 	if (!iov_iter_count(to))
384a08e67a0SHuang Jianan 		return 0;
385a08e67a0SHuang Jianan 
386*06252e9cSGao Xiang #ifdef CONFIG_FS_DAX
387*06252e9cSGao Xiang 	if (IS_DAX(iocb->ki_filp->f_mapping->host))
388*06252e9cSGao Xiang 		return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
389*06252e9cSGao Xiang #endif
390a08e67a0SHuang Jianan 	if (iocb->ki_flags & IOCB_DIRECT) {
391a08e67a0SHuang Jianan 		int err = erofs_prepare_dio(iocb, to);
392a08e67a0SHuang Jianan 
393a08e67a0SHuang Jianan 		if (!err)
394a08e67a0SHuang Jianan 			return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
395a08e67a0SHuang Jianan 					    NULL, 0);
396a08e67a0SHuang Jianan 		if (err < 0)
397a08e67a0SHuang Jianan 			return err;
398a08e67a0SHuang Jianan 	}
399a08e67a0SHuang Jianan 	return filemap_read(iocb, to, 0);
400a08e67a0SHuang Jianan }
401a08e67a0SHuang Jianan 
40247e4937aSGao Xiang /* for uncompressed (aligned) files and raw access for other files */
40347e4937aSGao Xiang const struct address_space_operations erofs_raw_access_aops = {
40447e4937aSGao Xiang 	.readpage = erofs_raw_access_readpage,
4050c07a9f9SMatthew Wilcox (Oracle) 	.readahead = erofs_raw_access_readahead,
40647e4937aSGao Xiang 	.bmap = erofs_bmap,
407a08e67a0SHuang Jianan 	.direct_IO = noop_direct_IO,
408a08e67a0SHuang Jianan };
409a08e67a0SHuang Jianan 
410*06252e9cSGao Xiang #ifdef CONFIG_FS_DAX
411*06252e9cSGao Xiang static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf,
412*06252e9cSGao Xiang 		enum page_entry_size pe_size)
413*06252e9cSGao Xiang {
414*06252e9cSGao Xiang 	return dax_iomap_fault(vmf, pe_size, NULL, NULL, &erofs_iomap_ops);
415*06252e9cSGao Xiang }
416*06252e9cSGao Xiang 
417*06252e9cSGao Xiang static vm_fault_t erofs_dax_fault(struct vm_fault *vmf)
418*06252e9cSGao Xiang {
419*06252e9cSGao Xiang 	return erofs_dax_huge_fault(vmf, PE_SIZE_PTE);
420*06252e9cSGao Xiang }
421*06252e9cSGao Xiang 
422*06252e9cSGao Xiang static const struct vm_operations_struct erofs_dax_vm_ops = {
423*06252e9cSGao Xiang 	.fault		= erofs_dax_fault,
424*06252e9cSGao Xiang 	.huge_fault	= erofs_dax_huge_fault,
425*06252e9cSGao Xiang };
426*06252e9cSGao Xiang 
427*06252e9cSGao Xiang static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma)
428*06252e9cSGao Xiang {
429*06252e9cSGao Xiang 	if (!IS_DAX(file_inode(file)))
430*06252e9cSGao Xiang 		return generic_file_readonly_mmap(file, vma);
431*06252e9cSGao Xiang 
432*06252e9cSGao Xiang 	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
433*06252e9cSGao Xiang 		return -EINVAL;
434*06252e9cSGao Xiang 
435*06252e9cSGao Xiang 	vma->vm_ops = &erofs_dax_vm_ops;
436*06252e9cSGao Xiang 	vma->vm_flags |= VM_HUGEPAGE;
437*06252e9cSGao Xiang 	return 0;
438*06252e9cSGao Xiang }
439*06252e9cSGao Xiang #else
440*06252e9cSGao Xiang #define erofs_file_mmap	generic_file_readonly_mmap
441*06252e9cSGao Xiang #endif
442*06252e9cSGao Xiang 
443a08e67a0SHuang Jianan const struct file_operations erofs_file_fops = {
444a08e67a0SHuang Jianan 	.llseek		= generic_file_llseek,
445a08e67a0SHuang Jianan 	.read_iter	= erofs_file_read_iter,
446*06252e9cSGao Xiang 	.mmap		= erofs_file_mmap,
447a08e67a0SHuang Jianan 	.splice_read	= generic_file_splice_read,
44847e4937aSGao Xiang };
449