xref: /openbmc/linux/fs/erofs/data.c (revision fdf80a47)
147e4937aSGao Xiang // SPDX-License-Identifier: GPL-2.0-only
247e4937aSGao Xiang /*
347e4937aSGao Xiang  * Copyright (C) 2017-2018 HUAWEI, Inc.
4592e7cd0SAlexander A. Klimov  *             https://www.huawei.com/
5c5aa903aSGao Xiang  * Copyright (C) 2021, Alibaba Cloud
647e4937aSGao Xiang  */
747e4937aSGao Xiang #include "internal.h"
847e4937aSGao Xiang #include <linux/prefetch.h>
906252e9cSGao Xiang #include <linux/dax.h>
1047e4937aSGao Xiang #include <trace/events/erofs.h>
1147e4937aSGao Xiang 
12e655b5b3SGao Xiang struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr)
1347e4937aSGao Xiang {
1455252ab7SGao Xiang 	struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping;
1555252ab7SGao Xiang 	struct page *page;
1647e4937aSGao Xiang 
1755252ab7SGao Xiang 	page = read_cache_page_gfp(mapping, blkaddr,
18618f40eaSGao Xiang 				   mapping_gfp_constraint(mapping, ~__GFP_FS));
1955252ab7SGao Xiang 	/* should already be PageUptodate */
2055252ab7SGao Xiang 	if (!IS_ERR(page))
2155252ab7SGao Xiang 		lock_page(page);
2255252ab7SGao Xiang 	return page;
2347e4937aSGao Xiang }
2447e4937aSGao Xiang 
25*fdf80a47SGao Xiang void erofs_unmap_metabuf(struct erofs_buf *buf)
26*fdf80a47SGao Xiang {
27*fdf80a47SGao Xiang 	if (buf->kmap_type == EROFS_KMAP)
28*fdf80a47SGao Xiang 		kunmap(buf->page);
29*fdf80a47SGao Xiang 	else if (buf->kmap_type == EROFS_KMAP_ATOMIC)
30*fdf80a47SGao Xiang 		kunmap_atomic(buf->base);
31*fdf80a47SGao Xiang 	buf->base = NULL;
32*fdf80a47SGao Xiang 	buf->kmap_type = EROFS_NO_KMAP;
33*fdf80a47SGao Xiang }
34*fdf80a47SGao Xiang 
35*fdf80a47SGao Xiang void erofs_put_metabuf(struct erofs_buf *buf)
36*fdf80a47SGao Xiang {
37*fdf80a47SGao Xiang 	if (!buf->page)
38*fdf80a47SGao Xiang 		return;
39*fdf80a47SGao Xiang 	erofs_unmap_metabuf(buf);
40*fdf80a47SGao Xiang 	put_page(buf->page);
41*fdf80a47SGao Xiang 	buf->page = NULL;
42*fdf80a47SGao Xiang }
43*fdf80a47SGao Xiang 
44*fdf80a47SGao Xiang void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
45*fdf80a47SGao Xiang 			erofs_blk_t blkaddr, enum erofs_kmap_type type)
46*fdf80a47SGao Xiang {
47*fdf80a47SGao Xiang 	struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping;
48*fdf80a47SGao Xiang 	erofs_off_t offset = blknr_to_addr(blkaddr);
49*fdf80a47SGao Xiang 	pgoff_t index = offset >> PAGE_SHIFT;
50*fdf80a47SGao Xiang 	struct page *page = buf->page;
51*fdf80a47SGao Xiang 
52*fdf80a47SGao Xiang 	if (!page || page->index != index) {
53*fdf80a47SGao Xiang 		erofs_put_metabuf(buf);
54*fdf80a47SGao Xiang 		page = read_cache_page_gfp(mapping, index,
55*fdf80a47SGao Xiang 				mapping_gfp_constraint(mapping, ~__GFP_FS));
56*fdf80a47SGao Xiang 		if (IS_ERR(page))
57*fdf80a47SGao Xiang 			return page;
58*fdf80a47SGao Xiang 		/* should already be PageUptodate, no need to lock page */
59*fdf80a47SGao Xiang 		buf->page = page;
60*fdf80a47SGao Xiang 	}
61*fdf80a47SGao Xiang 	if (buf->kmap_type == EROFS_NO_KMAP) {
62*fdf80a47SGao Xiang 		if (type == EROFS_KMAP)
63*fdf80a47SGao Xiang 			buf->base = kmap(page);
64*fdf80a47SGao Xiang 		else if (type == EROFS_KMAP_ATOMIC)
65*fdf80a47SGao Xiang 			buf->base = kmap_atomic(page);
66*fdf80a47SGao Xiang 		buf->kmap_type = type;
67*fdf80a47SGao Xiang 	} else if (buf->kmap_type != type) {
68*fdf80a47SGao Xiang 		DBG_BUGON(1);
69*fdf80a47SGao Xiang 		return ERR_PTR(-EFAULT);
70*fdf80a47SGao Xiang 	}
71*fdf80a47SGao Xiang 	if (type == EROFS_NO_KMAP)
72*fdf80a47SGao Xiang 		return NULL;
73*fdf80a47SGao Xiang 	return buf->base + (offset & ~PAGE_MASK);
74*fdf80a47SGao Xiang }
75*fdf80a47SGao Xiang 
7647e4937aSGao Xiang static int erofs_map_blocks_flatmode(struct inode *inode,
7747e4937aSGao Xiang 				     struct erofs_map_blocks *map,
7847e4937aSGao Xiang 				     int flags)
7947e4937aSGao Xiang {
8047e4937aSGao Xiang 	erofs_blk_t nblocks, lastblk;
8147e4937aSGao Xiang 	u64 offset = map->m_la;
82a5876e24SGao Xiang 	struct erofs_inode *vi = EROFS_I(inode);
838a765682SGao Xiang 	bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
8447e4937aSGao Xiang 
85*fdf80a47SGao Xiang 	nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
868a765682SGao Xiang 	lastblk = nblocks - tailendpacking;
8747e4937aSGao Xiang 
8847e4937aSGao Xiang 	/* there is no hole in flatmode */
8947e4937aSGao Xiang 	map->m_flags = EROFS_MAP_MAPPED;
9047e4937aSGao Xiang 	if (offset < blknr_to_addr(lastblk)) {
9147e4937aSGao Xiang 		map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
9247e4937aSGao Xiang 		map->m_plen = blknr_to_addr(lastblk) - offset;
938a765682SGao Xiang 	} else if (tailendpacking) {
9447e4937aSGao Xiang 		/* 2 - inode inline B: inode, [xattrs], inline last blk... */
9547e4937aSGao Xiang 		struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
9647e4937aSGao Xiang 
9747e4937aSGao Xiang 		map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize +
9847e4937aSGao Xiang 			vi->xattr_isize + erofs_blkoff(map->m_la);
9947e4937aSGao Xiang 		map->m_plen = inode->i_size - offset;
10047e4937aSGao Xiang 
101469407a3SGao Xiang 		/* inline data should be located in the same meta block */
102469407a3SGao Xiang 		if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) {
1034f761fa2SGao Xiang 			erofs_err(inode->i_sb,
1044f761fa2SGao Xiang 				  "inline data cross block boundary @ nid %llu",
10547e4937aSGao Xiang 				  vi->nid);
10647e4937aSGao Xiang 			DBG_BUGON(1);
107469407a3SGao Xiang 			return -EFSCORRUPTED;
10847e4937aSGao Xiang 		}
10947e4937aSGao Xiang 		map->m_flags |= EROFS_MAP_META;
11047e4937aSGao Xiang 	} else {
1114f761fa2SGao Xiang 		erofs_err(inode->i_sb,
1124f761fa2SGao Xiang 			  "internal error @ nid: %llu (size %llu), m_la 0x%llx",
11347e4937aSGao Xiang 			  vi->nid, inode->i_size, map->m_la);
11447e4937aSGao Xiang 		DBG_BUGON(1);
115469407a3SGao Xiang 		return -EIO;
11647e4937aSGao Xiang 	}
117469407a3SGao Xiang 	return 0;
11847e4937aSGao Xiang }
11947e4937aSGao Xiang 
120c5aa903aSGao Xiang static int erofs_map_blocks(struct inode *inode,
121c5aa903aSGao Xiang 			    struct erofs_map_blocks *map, int flags)
122c5aa903aSGao Xiang {
123c5aa903aSGao Xiang 	struct super_block *sb = inode->i_sb;
124c5aa903aSGao Xiang 	struct erofs_inode *vi = EROFS_I(inode);
125c5aa903aSGao Xiang 	struct erofs_inode_chunk_index *idx;
126*fdf80a47SGao Xiang 	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
127c5aa903aSGao Xiang 	u64 chunknr;
128c5aa903aSGao Xiang 	unsigned int unit;
129c5aa903aSGao Xiang 	erofs_off_t pos;
130*fdf80a47SGao Xiang 	void *kaddr;
131c5aa903aSGao Xiang 	int err = 0;
132c5aa903aSGao Xiang 
133469407a3SGao Xiang 	trace_erofs_map_blocks_enter(inode, map, flags);
134dfeab2e9SGao Xiang 	map->m_deviceid = 0;
135c5aa903aSGao Xiang 	if (map->m_la >= inode->i_size) {
136c5aa903aSGao Xiang 		/* leave out-of-bound access unmapped */
137c5aa903aSGao Xiang 		map->m_flags = 0;
138c5aa903aSGao Xiang 		map->m_plen = 0;
139c5aa903aSGao Xiang 		goto out;
140c5aa903aSGao Xiang 	}
141c5aa903aSGao Xiang 
142469407a3SGao Xiang 	if (vi->datalayout != EROFS_INODE_CHUNK_BASED) {
143469407a3SGao Xiang 		err = erofs_map_blocks_flatmode(inode, map, flags);
144469407a3SGao Xiang 		goto out;
145469407a3SGao Xiang 	}
146c5aa903aSGao Xiang 
147c5aa903aSGao Xiang 	if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
148c5aa903aSGao Xiang 		unit = sizeof(*idx);			/* chunk index */
149c5aa903aSGao Xiang 	else
150c5aa903aSGao Xiang 		unit = EROFS_BLOCK_MAP_ENTRY_SIZE;	/* block map */
151c5aa903aSGao Xiang 
152c5aa903aSGao Xiang 	chunknr = map->m_la >> vi->chunkbits;
153c5aa903aSGao Xiang 	pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
154c5aa903aSGao Xiang 		    vi->xattr_isize, unit) + unit * chunknr;
155c5aa903aSGao Xiang 
156*fdf80a47SGao Xiang 	kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP);
157*fdf80a47SGao Xiang 	if (IS_ERR(kaddr)) {
158*fdf80a47SGao Xiang 		err = PTR_ERR(kaddr);
159469407a3SGao Xiang 		goto out;
160469407a3SGao Xiang 	}
161c5aa903aSGao Xiang 	map->m_la = chunknr << vi->chunkbits;
162c5aa903aSGao Xiang 	map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits,
163c5aa903aSGao Xiang 			    roundup(inode->i_size - map->m_la, EROFS_BLKSIZ));
164c5aa903aSGao Xiang 
165c5aa903aSGao Xiang 	/* handle block map */
166c5aa903aSGao Xiang 	if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
167*fdf80a47SGao Xiang 		__le32 *blkaddr = kaddr + erofs_blkoff(pos);
168c5aa903aSGao Xiang 
169c5aa903aSGao Xiang 		if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
170c5aa903aSGao Xiang 			map->m_flags = 0;
171c5aa903aSGao Xiang 		} else {
172c5aa903aSGao Xiang 			map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr));
173c5aa903aSGao Xiang 			map->m_flags = EROFS_MAP_MAPPED;
174c5aa903aSGao Xiang 		}
175c5aa903aSGao Xiang 		goto out_unlock;
176c5aa903aSGao Xiang 	}
177c5aa903aSGao Xiang 	/* parse chunk indexes */
178*fdf80a47SGao Xiang 	idx = kaddr + erofs_blkoff(pos);
179c5aa903aSGao Xiang 	switch (le32_to_cpu(idx->blkaddr)) {
180c5aa903aSGao Xiang 	case EROFS_NULL_ADDR:
181c5aa903aSGao Xiang 		map->m_flags = 0;
182c5aa903aSGao Xiang 		break;
183c5aa903aSGao Xiang 	default:
184dfeab2e9SGao Xiang 		map->m_deviceid = le16_to_cpu(idx->device_id) &
185dfeab2e9SGao Xiang 			EROFS_SB(sb)->device_id_mask;
186c5aa903aSGao Xiang 		map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr));
187c5aa903aSGao Xiang 		map->m_flags = EROFS_MAP_MAPPED;
188c5aa903aSGao Xiang 		break;
189c5aa903aSGao Xiang 	}
190c5aa903aSGao Xiang out_unlock:
191*fdf80a47SGao Xiang 	erofs_put_metabuf(&buf);
192c5aa903aSGao Xiang out:
193469407a3SGao Xiang 	if (!err)
194c5aa903aSGao Xiang 		map->m_llen = map->m_plen;
195469407a3SGao Xiang 	trace_erofs_map_blocks_exit(inode, map, flags, 0);
196c5aa903aSGao Xiang 	return err;
197c5aa903aSGao Xiang }
198c5aa903aSGao Xiang 
199dfeab2e9SGao Xiang int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
200dfeab2e9SGao Xiang {
201dfeab2e9SGao Xiang 	struct erofs_dev_context *devs = EROFS_SB(sb)->devs;
202dfeab2e9SGao Xiang 	struct erofs_device_info *dif;
203dfeab2e9SGao Xiang 	int id;
204dfeab2e9SGao Xiang 
205dfeab2e9SGao Xiang 	/* primary device by default */
206dfeab2e9SGao Xiang 	map->m_bdev = sb->s_bdev;
207dfeab2e9SGao Xiang 	map->m_daxdev = EROFS_SB(sb)->dax_dev;
208dfeab2e9SGao Xiang 
209dfeab2e9SGao Xiang 	if (map->m_deviceid) {
210dfeab2e9SGao Xiang 		down_read(&devs->rwsem);
211dfeab2e9SGao Xiang 		dif = idr_find(&devs->tree, map->m_deviceid - 1);
212dfeab2e9SGao Xiang 		if (!dif) {
213dfeab2e9SGao Xiang 			up_read(&devs->rwsem);
214dfeab2e9SGao Xiang 			return -ENODEV;
215dfeab2e9SGao Xiang 		}
216dfeab2e9SGao Xiang 		map->m_bdev = dif->bdev;
217dfeab2e9SGao Xiang 		map->m_daxdev = dif->dax_dev;
218dfeab2e9SGao Xiang 		up_read(&devs->rwsem);
219dfeab2e9SGao Xiang 	} else if (devs->extra_devices) {
220dfeab2e9SGao Xiang 		down_read(&devs->rwsem);
221dfeab2e9SGao Xiang 		idr_for_each_entry(&devs->tree, dif, id) {
222dfeab2e9SGao Xiang 			erofs_off_t startoff, length;
223dfeab2e9SGao Xiang 
224dfeab2e9SGao Xiang 			if (!dif->mapped_blkaddr)
225dfeab2e9SGao Xiang 				continue;
226dfeab2e9SGao Xiang 			startoff = blknr_to_addr(dif->mapped_blkaddr);
227dfeab2e9SGao Xiang 			length = blknr_to_addr(dif->blocks);
228dfeab2e9SGao Xiang 
229dfeab2e9SGao Xiang 			if (map->m_pa >= startoff &&
230dfeab2e9SGao Xiang 			    map->m_pa < startoff + length) {
231dfeab2e9SGao Xiang 				map->m_pa -= startoff;
232dfeab2e9SGao Xiang 				map->m_bdev = dif->bdev;
233dfeab2e9SGao Xiang 				map->m_daxdev = dif->dax_dev;
234dfeab2e9SGao Xiang 				break;
235dfeab2e9SGao Xiang 			}
236dfeab2e9SGao Xiang 		}
237dfeab2e9SGao Xiang 		up_read(&devs->rwsem);
238dfeab2e9SGao Xiang 	}
239dfeab2e9SGao Xiang 	return 0;
240dfeab2e9SGao Xiang }
241dfeab2e9SGao Xiang 
242a08e67a0SHuang Jianan static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
243a08e67a0SHuang Jianan 		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
244a08e67a0SHuang Jianan {
245a08e67a0SHuang Jianan 	int ret;
246a08e67a0SHuang Jianan 	struct erofs_map_blocks map;
247dfeab2e9SGao Xiang 	struct erofs_map_dev mdev;
248a08e67a0SHuang Jianan 
249a08e67a0SHuang Jianan 	map.m_la = offset;
250a08e67a0SHuang Jianan 	map.m_llen = length;
251a08e67a0SHuang Jianan 
252c5aa903aSGao Xiang 	ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
253a08e67a0SHuang Jianan 	if (ret < 0)
254a08e67a0SHuang Jianan 		return ret;
255a08e67a0SHuang Jianan 
256dfeab2e9SGao Xiang 	mdev = (struct erofs_map_dev) {
257dfeab2e9SGao Xiang 		.m_deviceid = map.m_deviceid,
258dfeab2e9SGao Xiang 		.m_pa = map.m_pa,
259dfeab2e9SGao Xiang 	};
260dfeab2e9SGao Xiang 	ret = erofs_map_dev(inode->i_sb, &mdev);
261dfeab2e9SGao Xiang 	if (ret)
262dfeab2e9SGao Xiang 		return ret;
263dfeab2e9SGao Xiang 
264dfeab2e9SGao Xiang 	iomap->bdev = mdev.m_bdev;
265dfeab2e9SGao Xiang 	iomap->dax_dev = mdev.m_daxdev;
266a08e67a0SHuang Jianan 	iomap->offset = map.m_la;
267a08e67a0SHuang Jianan 	iomap->length = map.m_llen;
268a08e67a0SHuang Jianan 	iomap->flags = 0;
269771c994eSGao Xiang 	iomap->private = NULL;
270a08e67a0SHuang Jianan 
271a08e67a0SHuang Jianan 	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
272a08e67a0SHuang Jianan 		iomap->type = IOMAP_HOLE;
273a08e67a0SHuang Jianan 		iomap->addr = IOMAP_NULL_ADDR;
274a08e67a0SHuang Jianan 		if (!iomap->length)
275a08e67a0SHuang Jianan 			iomap->length = length;
276a08e67a0SHuang Jianan 		return 0;
277a08e67a0SHuang Jianan 	}
278a08e67a0SHuang Jianan 
279a08e67a0SHuang Jianan 	if (map.m_flags & EROFS_MAP_META) {
280*fdf80a47SGao Xiang 		void *ptr;
281*fdf80a47SGao Xiang 		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
282771c994eSGao Xiang 
283771c994eSGao Xiang 		iomap->type = IOMAP_INLINE;
284*fdf80a47SGao Xiang 		ptr = erofs_read_metabuf(&buf, inode->i_sb,
285*fdf80a47SGao Xiang 					 erofs_blknr(mdev.m_pa), EROFS_KMAP);
286*fdf80a47SGao Xiang 		if (IS_ERR(ptr))
287*fdf80a47SGao Xiang 			return PTR_ERR(ptr);
288*fdf80a47SGao Xiang 		iomap->inline_data = ptr + erofs_blkoff(mdev.m_pa);
289*fdf80a47SGao Xiang 		iomap->private = buf.base;
290771c994eSGao Xiang 	} else {
291a08e67a0SHuang Jianan 		iomap->type = IOMAP_MAPPED;
292dfeab2e9SGao Xiang 		iomap->addr = mdev.m_pa;
293771c994eSGao Xiang 	}
294a08e67a0SHuang Jianan 	return 0;
295a08e67a0SHuang Jianan }
296a08e67a0SHuang Jianan 
297771c994eSGao Xiang static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
298771c994eSGao Xiang 		ssize_t written, unsigned int flags, struct iomap *iomap)
299771c994eSGao Xiang {
300*fdf80a47SGao Xiang 	void *ptr = iomap->private;
301771c994eSGao Xiang 
302*fdf80a47SGao Xiang 	if (ptr) {
303*fdf80a47SGao Xiang 		struct erofs_buf buf = {
304*fdf80a47SGao Xiang 			.page = kmap_to_page(ptr),
305*fdf80a47SGao Xiang 			.base = ptr,
306*fdf80a47SGao Xiang 			.kmap_type = EROFS_KMAP,
307*fdf80a47SGao Xiang 		};
308*fdf80a47SGao Xiang 
309771c994eSGao Xiang 		DBG_BUGON(iomap->type != IOMAP_INLINE);
310*fdf80a47SGao Xiang 		erofs_put_metabuf(&buf);
311771c994eSGao Xiang 	} else {
312771c994eSGao Xiang 		DBG_BUGON(iomap->type == IOMAP_INLINE);
313771c994eSGao Xiang 	}
314771c994eSGao Xiang 	return written;
315771c994eSGao Xiang }
316771c994eSGao Xiang 
317a08e67a0SHuang Jianan static const struct iomap_ops erofs_iomap_ops = {
318a08e67a0SHuang Jianan 	.iomap_begin = erofs_iomap_begin,
319771c994eSGao Xiang 	.iomap_end = erofs_iomap_end,
320a08e67a0SHuang Jianan };
321a08e67a0SHuang Jianan 
322eadcd6b5SGao Xiang int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
323eadcd6b5SGao Xiang 		 u64 start, u64 len)
324eadcd6b5SGao Xiang {
325eadcd6b5SGao Xiang 	if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) {
326eadcd6b5SGao Xiang #ifdef CONFIG_EROFS_FS_ZIP
327eadcd6b5SGao Xiang 		return iomap_fiemap(inode, fieinfo, start, len,
328eadcd6b5SGao Xiang 				    &z_erofs_iomap_report_ops);
329eadcd6b5SGao Xiang #else
330eadcd6b5SGao Xiang 		return -EOPNOTSUPP;
331eadcd6b5SGao Xiang #endif
332eadcd6b5SGao Xiang 	}
333eadcd6b5SGao Xiang 	return iomap_fiemap(inode, fieinfo, start, len, &erofs_iomap_ops);
334eadcd6b5SGao Xiang }
335eadcd6b5SGao Xiang 
336771c994eSGao Xiang /*
337771c994eSGao Xiang  * since we dont have write or truncate flows, so no inode
338771c994eSGao Xiang  * locking needs to be held at the moment.
339771c994eSGao Xiang  */
340771c994eSGao Xiang static int erofs_readpage(struct file *file, struct page *page)
341771c994eSGao Xiang {
342771c994eSGao Xiang 	return iomap_readpage(page, &erofs_iomap_ops);
343771c994eSGao Xiang }
344771c994eSGao Xiang 
345771c994eSGao Xiang static void erofs_readahead(struct readahead_control *rac)
346771c994eSGao Xiang {
347771c994eSGao Xiang 	return iomap_readahead(rac, &erofs_iomap_ops);
348771c994eSGao Xiang }
349771c994eSGao Xiang 
350771c994eSGao Xiang static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
351771c994eSGao Xiang {
352771c994eSGao Xiang 	return iomap_bmap(mapping, block, &erofs_iomap_ops);
353771c994eSGao Xiang }
354771c994eSGao Xiang 
355a08e67a0SHuang Jianan static int erofs_prepare_dio(struct kiocb *iocb, struct iov_iter *to)
356a08e67a0SHuang Jianan {
357a08e67a0SHuang Jianan 	struct inode *inode = file_inode(iocb->ki_filp);
358a08e67a0SHuang Jianan 	loff_t align = iocb->ki_pos | iov_iter_count(to) |
359a08e67a0SHuang Jianan 		iov_iter_alignment(to);
360a08e67a0SHuang Jianan 	struct block_device *bdev = inode->i_sb->s_bdev;
361a08e67a0SHuang Jianan 	unsigned int blksize_mask;
362a08e67a0SHuang Jianan 
363a08e67a0SHuang Jianan 	if (bdev)
364a08e67a0SHuang Jianan 		blksize_mask = (1 << ilog2(bdev_logical_block_size(bdev))) - 1;
365a08e67a0SHuang Jianan 	else
366a08e67a0SHuang Jianan 		blksize_mask = (1 << inode->i_blkbits) - 1;
367a08e67a0SHuang Jianan 
368a08e67a0SHuang Jianan 	if (align & blksize_mask)
369a08e67a0SHuang Jianan 		return -EINVAL;
370a08e67a0SHuang Jianan 	return 0;
371a08e67a0SHuang Jianan }
372a08e67a0SHuang Jianan 
373a08e67a0SHuang Jianan static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
374a08e67a0SHuang Jianan {
375a08e67a0SHuang Jianan 	/* no need taking (shared) inode lock since it's a ro filesystem */
376a08e67a0SHuang Jianan 	if (!iov_iter_count(to))
377a08e67a0SHuang Jianan 		return 0;
378a08e67a0SHuang Jianan 
37906252e9cSGao Xiang #ifdef CONFIG_FS_DAX
38006252e9cSGao Xiang 	if (IS_DAX(iocb->ki_filp->f_mapping->host))
38106252e9cSGao Xiang 		return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
38206252e9cSGao Xiang #endif
383a08e67a0SHuang Jianan 	if (iocb->ki_flags & IOCB_DIRECT) {
384a08e67a0SHuang Jianan 		int err = erofs_prepare_dio(iocb, to);
385a08e67a0SHuang Jianan 
386a08e67a0SHuang Jianan 		if (!err)
387a08e67a0SHuang Jianan 			return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
3884fdccaa0SAndreas Gruenbacher 					    NULL, 0, 0);
389a08e67a0SHuang Jianan 		if (err < 0)
390a08e67a0SHuang Jianan 			return err;
391a08e67a0SHuang Jianan 	}
392a08e67a0SHuang Jianan 	return filemap_read(iocb, to, 0);
393a08e67a0SHuang Jianan }
394a08e67a0SHuang Jianan 
39547e4937aSGao Xiang /* for uncompressed (aligned) files and raw access for other files */
39647e4937aSGao Xiang const struct address_space_operations erofs_raw_access_aops = {
397771c994eSGao Xiang 	.readpage = erofs_readpage,
398771c994eSGao Xiang 	.readahead = erofs_readahead,
39947e4937aSGao Xiang 	.bmap = erofs_bmap,
400a08e67a0SHuang Jianan 	.direct_IO = noop_direct_IO,
401a08e67a0SHuang Jianan };
402a08e67a0SHuang Jianan 
40306252e9cSGao Xiang #ifdef CONFIG_FS_DAX
40406252e9cSGao Xiang static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf,
40506252e9cSGao Xiang 		enum page_entry_size pe_size)
40606252e9cSGao Xiang {
40706252e9cSGao Xiang 	return dax_iomap_fault(vmf, pe_size, NULL, NULL, &erofs_iomap_ops);
40806252e9cSGao Xiang }
40906252e9cSGao Xiang 
41006252e9cSGao Xiang static vm_fault_t erofs_dax_fault(struct vm_fault *vmf)
41106252e9cSGao Xiang {
41206252e9cSGao Xiang 	return erofs_dax_huge_fault(vmf, PE_SIZE_PTE);
41306252e9cSGao Xiang }
41406252e9cSGao Xiang 
41506252e9cSGao Xiang static const struct vm_operations_struct erofs_dax_vm_ops = {
41606252e9cSGao Xiang 	.fault		= erofs_dax_fault,
41706252e9cSGao Xiang 	.huge_fault	= erofs_dax_huge_fault,
41806252e9cSGao Xiang };
41906252e9cSGao Xiang 
42006252e9cSGao Xiang static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma)
42106252e9cSGao Xiang {
42206252e9cSGao Xiang 	if (!IS_DAX(file_inode(file)))
42306252e9cSGao Xiang 		return generic_file_readonly_mmap(file, vma);
42406252e9cSGao Xiang 
42506252e9cSGao Xiang 	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
42606252e9cSGao Xiang 		return -EINVAL;
42706252e9cSGao Xiang 
42806252e9cSGao Xiang 	vma->vm_ops = &erofs_dax_vm_ops;
42906252e9cSGao Xiang 	vma->vm_flags |= VM_HUGEPAGE;
43006252e9cSGao Xiang 	return 0;
43106252e9cSGao Xiang }
43206252e9cSGao Xiang #else
43306252e9cSGao Xiang #define erofs_file_mmap	generic_file_readonly_mmap
43406252e9cSGao Xiang #endif
43506252e9cSGao Xiang 
436a08e67a0SHuang Jianan const struct file_operations erofs_file_fops = {
437a08e67a0SHuang Jianan 	.llseek		= generic_file_llseek,
438a08e67a0SHuang Jianan 	.read_iter	= erofs_file_read_iter,
43906252e9cSGao Xiang 	.mmap		= erofs_file_mmap,
440a08e67a0SHuang Jianan 	.splice_read	= generic_file_splice_read,
44147e4937aSGao Xiang };
442