1ae98043fSRyusuke Konishi // SPDX-License-Identifier: GPL-2.0+
25eb563f5SRyusuke Konishi /*
394ee1d91SRyusuke Konishi * Meta data file for NILFS
45eb563f5SRyusuke Konishi *
55eb563f5SRyusuke Konishi * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
65eb563f5SRyusuke Konishi *
74b420ab4SRyusuke Konishi * Written by Ryusuke Konishi.
85eb563f5SRyusuke Konishi */
95eb563f5SRyusuke Konishi
105eb563f5SRyusuke Konishi #include <linux/buffer_head.h>
115eb563f5SRyusuke Konishi #include <linux/mpage.h>
125eb563f5SRyusuke Konishi #include <linux/mm.h>
135eb563f5SRyusuke Konishi #include <linux/writeback.h>
145eb563f5SRyusuke Konishi #include <linux/backing-dev.h>
155eb563f5SRyusuke Konishi #include <linux/swap.h>
165a0e3ad6STejun Heo #include <linux/slab.h>
175eb563f5SRyusuke Konishi #include "nilfs.h"
1805d0e94bSRyusuke Konishi #include "btnode.h"
195eb563f5SRyusuke Konishi #include "segment.h"
205eb563f5SRyusuke Konishi #include "page.h"
215eb563f5SRyusuke Konishi #include "mdt.h"
222d19961dSRyusuke Konishi #include "alloc.h" /* nilfs_palloc_destroy_cache() */
235eb563f5SRyusuke Konishi
24a9cd207cSHitoshi Mitake #include <trace/events/nilfs2.h>
255eb563f5SRyusuke Konishi
265eb563f5SRyusuke Konishi #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1)
275eb563f5SRyusuke Konishi
285eb563f5SRyusuke Konishi
295eb563f5SRyusuke Konishi static int
nilfs_mdt_insert_new_block(struct inode * inode,unsigned long block,struct buffer_head * bh,void (* init_block)(struct inode *,struct buffer_head *,void *))305eb563f5SRyusuke Konishi nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
315eb563f5SRyusuke Konishi struct buffer_head *bh,
325eb563f5SRyusuke Konishi void (*init_block)(struct inode *,
335eb563f5SRyusuke Konishi struct buffer_head *, void *))
345eb563f5SRyusuke Konishi {
355eb563f5SRyusuke Konishi struct nilfs_inode_info *ii = NILFS_I(inode);
365eb563f5SRyusuke Konishi void *kaddr;
375eb563f5SRyusuke Konishi int ret;
385eb563f5SRyusuke Konishi
395eb563f5SRyusuke Konishi /* Caller exclude read accesses using page lock */
405eb563f5SRyusuke Konishi
415eb563f5SRyusuke Konishi /* set_buffer_new(bh); */
425eb563f5SRyusuke Konishi bh->b_blocknr = 0;
435eb563f5SRyusuke Konishi
445eb563f5SRyusuke Konishi ret = nilfs_bmap_insert(ii->i_bmap, block, (unsigned long)bh);
455eb563f5SRyusuke Konishi if (unlikely(ret))
465eb563f5SRyusuke Konishi return ret;
475eb563f5SRyusuke Konishi
485eb563f5SRyusuke Konishi set_buffer_mapped(bh);
495eb563f5SRyusuke Konishi
507b9c0976SCong Wang kaddr = kmap_atomic(bh->b_page);
5193407472SFabian Frederick memset(kaddr + bh_offset(bh), 0, i_blocksize(inode));
525eb563f5SRyusuke Konishi if (init_block)
535eb563f5SRyusuke Konishi init_block(inode, bh, kaddr);
545eb563f5SRyusuke Konishi flush_dcache_page(bh->b_page);
557b9c0976SCong Wang kunmap_atomic(kaddr);
565eb563f5SRyusuke Konishi
575eb563f5SRyusuke Konishi set_buffer_uptodate(bh);
585fc7b141SRyusuke Konishi mark_buffer_dirty(bh);
595eb563f5SRyusuke Konishi nilfs_mdt_mark_dirty(inode);
60a9cd207cSHitoshi Mitake
61a9cd207cSHitoshi Mitake trace_nilfs2_mdt_insert_new_block(inode, inode->i_ino, block);
62a9cd207cSHitoshi Mitake
635eb563f5SRyusuke Konishi return 0;
645eb563f5SRyusuke Konishi }
655eb563f5SRyusuke Konishi
nilfs_mdt_create_block(struct inode * inode,unsigned long block,struct buffer_head ** out_bh,void (* init_block)(struct inode *,struct buffer_head *,void *))665eb563f5SRyusuke Konishi static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
675eb563f5SRyusuke Konishi struct buffer_head **out_bh,
685eb563f5SRyusuke Konishi void (*init_block)(struct inode *,
695eb563f5SRyusuke Konishi struct buffer_head *,
705eb563f5SRyusuke Konishi void *))
715eb563f5SRyusuke Konishi {
725eb563f5SRyusuke Konishi struct super_block *sb = inode->i_sb;
735eb563f5SRyusuke Konishi struct nilfs_transaction_info ti;
745eb563f5SRyusuke Konishi struct buffer_head *bh;
755eb563f5SRyusuke Konishi int err;
765eb563f5SRyusuke Konishi
775eb563f5SRyusuke Konishi nilfs_transaction_begin(sb, &ti, 0);
785eb563f5SRyusuke Konishi
795eb563f5SRyusuke Konishi err = -ENOMEM;
805eb563f5SRyusuke Konishi bh = nilfs_grab_buffer(inode, inode->i_mapping, block, 0);
815eb563f5SRyusuke Konishi if (unlikely(!bh))
825eb563f5SRyusuke Konishi goto failed_unlock;
835eb563f5SRyusuke Konishi
845eb563f5SRyusuke Konishi err = -EEXIST;
8514351104SRyusuke Konishi if (buffer_uptodate(bh))
865eb563f5SRyusuke Konishi goto failed_bh;
8714351104SRyusuke Konishi
885eb563f5SRyusuke Konishi wait_on_buffer(bh);
895eb563f5SRyusuke Konishi if (buffer_uptodate(bh))
905eb563f5SRyusuke Konishi goto failed_bh;
915eb563f5SRyusuke Konishi
92090fd5b1SRyusuke Konishi bh->b_bdev = sb->s_bdev;
935eb563f5SRyusuke Konishi err = nilfs_mdt_insert_new_block(inode, block, bh, init_block);
945eb563f5SRyusuke Konishi if (likely(!err)) {
955eb563f5SRyusuke Konishi get_bh(bh);
965eb563f5SRyusuke Konishi *out_bh = bh;
975eb563f5SRyusuke Konishi }
985eb563f5SRyusuke Konishi
995eb563f5SRyusuke Konishi failed_bh:
1005eb563f5SRyusuke Konishi unlock_page(bh->b_page);
10109cbfeafSKirill A. Shutemov put_page(bh->b_page);
1025eb563f5SRyusuke Konishi brelse(bh);
1035eb563f5SRyusuke Konishi
1045eb563f5SRyusuke Konishi failed_unlock:
10547420c79SRyusuke Konishi if (likely(!err))
10647420c79SRyusuke Konishi err = nilfs_transaction_commit(sb);
10747420c79SRyusuke Konishi else
10847420c79SRyusuke Konishi nilfs_transaction_abort(sb);
109090fd5b1SRyusuke Konishi
1105eb563f5SRyusuke Konishi return err;
1115eb563f5SRyusuke Konishi }
1125eb563f5SRyusuke Konishi
1135eb563f5SRyusuke Konishi static int
nilfs_mdt_submit_block(struct inode * inode,unsigned long blkoff,blk_opf_t opf,struct buffer_head ** out_bh)114ed451259SBart Van Assche nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, blk_opf_t opf,
115ed451259SBart Van Assche struct buffer_head **out_bh)
1165eb563f5SRyusuke Konishi {
1175eb563f5SRyusuke Konishi struct buffer_head *bh;
1180f3fe33bSRyusuke Konishi __u64 blknum = 0;
1195eb563f5SRyusuke Konishi int ret = -ENOMEM;
1205eb563f5SRyusuke Konishi
1215eb563f5SRyusuke Konishi bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0);
1225eb563f5SRyusuke Konishi if (unlikely(!bh))
1235eb563f5SRyusuke Konishi goto failed;
1245eb563f5SRyusuke Konishi
1255eb563f5SRyusuke Konishi ret = -EEXIST; /* internal code */
1265eb563f5SRyusuke Konishi if (buffer_uptodate(bh))
1275eb563f5SRyusuke Konishi goto out;
1285eb563f5SRyusuke Konishi
129ed451259SBart Van Assche if (opf & REQ_RAHEAD) {
1305eb563f5SRyusuke Konishi if (!trylock_buffer(bh)) {
1315eb563f5SRyusuke Konishi ret = -EBUSY;
1325eb563f5SRyusuke Konishi goto failed_bh;
1335eb563f5SRyusuke Konishi }
134ed451259SBart Van Assche } else /* opf == REQ_OP_READ */
1355eb563f5SRyusuke Konishi lock_buffer(bh);
1365eb563f5SRyusuke Konishi
1375eb563f5SRyusuke Konishi if (buffer_uptodate(bh)) {
1385eb563f5SRyusuke Konishi unlock_buffer(bh);
1395eb563f5SRyusuke Konishi goto out;
1405eb563f5SRyusuke Konishi }
14114351104SRyusuke Konishi
14214351104SRyusuke Konishi ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, &blknum);
1435eb563f5SRyusuke Konishi if (unlikely(ret)) {
1445eb563f5SRyusuke Konishi unlock_buffer(bh);
1455eb563f5SRyusuke Konishi goto failed_bh;
1465eb563f5SRyusuke Konishi }
147c6e07188SRyusuke Konishi map_bh(bh, inode->i_sb, (sector_t)blknum);
1485eb563f5SRyusuke Konishi
1495eb563f5SRyusuke Konishi bh->b_end_io = end_buffer_read_sync;
1505eb563f5SRyusuke Konishi get_bh(bh);
151ed451259SBart Van Assche submit_bh(opf, bh);
1525eb563f5SRyusuke Konishi ret = 0;
153a9cd207cSHitoshi Mitake
154ed451259SBart Van Assche trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff,
155ed451259SBart Van Assche opf & REQ_OP_MASK);
1565eb563f5SRyusuke Konishi out:
1575eb563f5SRyusuke Konishi get_bh(bh);
1585eb563f5SRyusuke Konishi *out_bh = bh;
1595eb563f5SRyusuke Konishi
1605eb563f5SRyusuke Konishi failed_bh:
1615eb563f5SRyusuke Konishi unlock_page(bh->b_page);
16209cbfeafSKirill A. Shutemov put_page(bh->b_page);
1635eb563f5SRyusuke Konishi brelse(bh);
1645eb563f5SRyusuke Konishi failed:
1655eb563f5SRyusuke Konishi return ret;
1665eb563f5SRyusuke Konishi }
1675eb563f5SRyusuke Konishi
nilfs_mdt_read_block(struct inode * inode,unsigned long block,int readahead,struct buffer_head ** out_bh)1685eb563f5SRyusuke Konishi static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
169b34a6506SRyusuke Konishi int readahead, struct buffer_head **out_bh)
1705eb563f5SRyusuke Konishi {
1715eb563f5SRyusuke Konishi struct buffer_head *first_bh, *bh;
1725eb563f5SRyusuke Konishi unsigned long blkoff;
1735eb563f5SRyusuke Konishi int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS;
1745eb563f5SRyusuke Konishi int err;
1755eb563f5SRyusuke Konishi
176ed451259SBart Van Assche err = nilfs_mdt_submit_block(inode, block, REQ_OP_READ, &first_bh);
1775eb563f5SRyusuke Konishi if (err == -EEXIST) /* internal code */
1785eb563f5SRyusuke Konishi goto out;
1795eb563f5SRyusuke Konishi
1805eb563f5SRyusuke Konishi if (unlikely(err))
1815eb563f5SRyusuke Konishi goto failed;
1825eb563f5SRyusuke Konishi
183b34a6506SRyusuke Konishi if (readahead) {
1845eb563f5SRyusuke Konishi blkoff = block + 1;
1855eb563f5SRyusuke Konishi for (i = 0; i < nr_ra_blocks; i++, blkoff++) {
186ed451259SBart Van Assche err = nilfs_mdt_submit_block(inode, blkoff,
187ed451259SBart Van Assche REQ_OP_READ | REQ_RAHEAD, &bh);
1885eb563f5SRyusuke Konishi if (likely(!err || err == -EEXIST))
1895eb563f5SRyusuke Konishi brelse(bh);
1905eb563f5SRyusuke Konishi else if (err != -EBUSY)
191b34a6506SRyusuke Konishi break;
192b34a6506SRyusuke Konishi /* abort readahead if bmap lookup failed */
1935eb563f5SRyusuke Konishi if (!buffer_locked(first_bh))
1945eb563f5SRyusuke Konishi goto out_no_wait;
1955eb563f5SRyusuke Konishi }
196b34a6506SRyusuke Konishi }
1975eb563f5SRyusuke Konishi
1985eb563f5SRyusuke Konishi wait_on_buffer(first_bh);
1995eb563f5SRyusuke Konishi
2005eb563f5SRyusuke Konishi out_no_wait:
2015eb563f5SRyusuke Konishi err = -EIO;
20239a9dccaSRyusuke Konishi if (!buffer_uptodate(first_bh)) {
203a1d0747aSJoe Perches nilfs_err(inode->i_sb,
20439a9dccaSRyusuke Konishi "I/O error reading meta-data file (ino=%lu, block-offset=%lu)",
20539a9dccaSRyusuke Konishi inode->i_ino, block);
2065eb563f5SRyusuke Konishi goto failed_bh;
20739a9dccaSRyusuke Konishi }
2085eb563f5SRyusuke Konishi out:
2095eb563f5SRyusuke Konishi *out_bh = first_bh;
2105eb563f5SRyusuke Konishi return 0;
2115eb563f5SRyusuke Konishi
2125eb563f5SRyusuke Konishi failed_bh:
2135eb563f5SRyusuke Konishi brelse(first_bh);
2145eb563f5SRyusuke Konishi failed:
2155eb563f5SRyusuke Konishi return err;
2165eb563f5SRyusuke Konishi }
2175eb563f5SRyusuke Konishi
2185eb563f5SRyusuke Konishi /**
2195eb563f5SRyusuke Konishi * nilfs_mdt_get_block - read or create a buffer on meta data file.
2205eb563f5SRyusuke Konishi * @inode: inode of the meta data file
2215eb563f5SRyusuke Konishi * @blkoff: block offset
2225eb563f5SRyusuke Konishi * @create: create flag
2235eb563f5SRyusuke Konishi * @init_block: initializer used for newly allocated block
2245eb563f5SRyusuke Konishi * @out_bh: output of a pointer to the buffer_head
2255eb563f5SRyusuke Konishi *
2265eb563f5SRyusuke Konishi * nilfs_mdt_get_block() looks up the specified buffer and tries to create
2275eb563f5SRyusuke Konishi * a new buffer if @create is not zero. On success, the returned buffer is
2285eb563f5SRyusuke Konishi * assured to be either existing or formatted using a buffer lock on success.
2295eb563f5SRyusuke Konishi * @out_bh is substituted only when zero is returned.
2305eb563f5SRyusuke Konishi *
2315eb563f5SRyusuke Konishi * Return Value: On success, it returns 0. On error, the following negative
2325eb563f5SRyusuke Konishi * error code is returned.
2335eb563f5SRyusuke Konishi *
2345eb563f5SRyusuke Konishi * %-ENOMEM - Insufficient memory available.
2355eb563f5SRyusuke Konishi *
2365eb563f5SRyusuke Konishi * %-EIO - I/O error
2375eb563f5SRyusuke Konishi *
2385eb563f5SRyusuke Konishi * %-ENOENT - the specified block does not exist (hole block)
2395eb563f5SRyusuke Konishi *
2405eb563f5SRyusuke Konishi * %-EROFS - Read only filesystem (for create mode)
2415eb563f5SRyusuke Konishi */
nilfs_mdt_get_block(struct inode * inode,unsigned long blkoff,int create,void (* init_block)(struct inode *,struct buffer_head *,void *),struct buffer_head ** out_bh)2425eb563f5SRyusuke Konishi int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
2435eb563f5SRyusuke Konishi void (*init_block)(struct inode *,
2445eb563f5SRyusuke Konishi struct buffer_head *, void *),
2455eb563f5SRyusuke Konishi struct buffer_head **out_bh)
2465eb563f5SRyusuke Konishi {
2475eb563f5SRyusuke Konishi int ret;
2485eb563f5SRyusuke Konishi
2495eb563f5SRyusuke Konishi /* Should be rewritten with merging nilfs_mdt_read_block() */
2505eb563f5SRyusuke Konishi retry:
251b34a6506SRyusuke Konishi ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh);
2525eb563f5SRyusuke Konishi if (!create || ret != -ENOENT)
2535eb563f5SRyusuke Konishi return ret;
2545eb563f5SRyusuke Konishi
2555eb563f5SRyusuke Konishi ret = nilfs_mdt_create_block(inode, blkoff, out_bh, init_block);
2565eb563f5SRyusuke Konishi if (unlikely(ret == -EEXIST)) {
2575eb563f5SRyusuke Konishi /* create = 0; */ /* limit read-create loop retries */
2585eb563f5SRyusuke Konishi goto retry;
2595eb563f5SRyusuke Konishi }
2605eb563f5SRyusuke Konishi return ret;
2615eb563f5SRyusuke Konishi }
2625eb563f5SRyusuke Konishi
2635eb563f5SRyusuke Konishi /**
264fa33915cSRyusuke Konishi * nilfs_mdt_find_block - find and get a buffer on meta data file.
265fa33915cSRyusuke Konishi * @inode: inode of the meta data file
266fa33915cSRyusuke Konishi * @start: start block offset (inclusive)
267fa33915cSRyusuke Konishi * @end: end block offset (inclusive)
268fa33915cSRyusuke Konishi * @blkoff: block offset
269fa33915cSRyusuke Konishi * @out_bh: place to store a pointer to buffer_head struct
270fa33915cSRyusuke Konishi *
271fa33915cSRyusuke Konishi * nilfs_mdt_find_block() looks up an existing block in range of
272fa33915cSRyusuke Konishi * [@start, @end] and stores pointer to a buffer head of the block to
273fa33915cSRyusuke Konishi * @out_bh, and block offset to @blkoff, respectively. @out_bh and
274fa33915cSRyusuke Konishi * @blkoff are substituted only when zero is returned.
275fa33915cSRyusuke Konishi *
276fa33915cSRyusuke Konishi * Return Value: On success, it returns 0. On error, the following negative
277fa33915cSRyusuke Konishi * error code is returned.
278fa33915cSRyusuke Konishi *
279fa33915cSRyusuke Konishi * %-ENOMEM - Insufficient memory available.
280fa33915cSRyusuke Konishi *
281fa33915cSRyusuke Konishi * %-EIO - I/O error
282fa33915cSRyusuke Konishi *
283fa33915cSRyusuke Konishi * %-ENOENT - no block was found in the range
284fa33915cSRyusuke Konishi */
nilfs_mdt_find_block(struct inode * inode,unsigned long start,unsigned long end,unsigned long * blkoff,struct buffer_head ** out_bh)285fa33915cSRyusuke Konishi int nilfs_mdt_find_block(struct inode *inode, unsigned long start,
286fa33915cSRyusuke Konishi unsigned long end, unsigned long *blkoff,
287fa33915cSRyusuke Konishi struct buffer_head **out_bh)
288fa33915cSRyusuke Konishi {
289fa33915cSRyusuke Konishi __u64 next;
290fa33915cSRyusuke Konishi int ret;
291fa33915cSRyusuke Konishi
292fa33915cSRyusuke Konishi if (unlikely(start > end))
293fa33915cSRyusuke Konishi return -ENOENT;
294fa33915cSRyusuke Konishi
295fa33915cSRyusuke Konishi ret = nilfs_mdt_read_block(inode, start, true, out_bh);
296fa33915cSRyusuke Konishi if (!ret) {
297fa33915cSRyusuke Konishi *blkoff = start;
298fa33915cSRyusuke Konishi goto out;
299fa33915cSRyusuke Konishi }
300fa33915cSRyusuke Konishi if (unlikely(ret != -ENOENT || start == ULONG_MAX))
301fa33915cSRyusuke Konishi goto out;
302fa33915cSRyusuke Konishi
303fa33915cSRyusuke Konishi ret = nilfs_bmap_seek_key(NILFS_I(inode)->i_bmap, start + 1, &next);
304fa33915cSRyusuke Konishi if (!ret) {
305fa33915cSRyusuke Konishi if (next <= end) {
306fa33915cSRyusuke Konishi ret = nilfs_mdt_read_block(inode, next, true, out_bh);
307fa33915cSRyusuke Konishi if (!ret)
308fa33915cSRyusuke Konishi *blkoff = next;
309fa33915cSRyusuke Konishi } else {
310fa33915cSRyusuke Konishi ret = -ENOENT;
311fa33915cSRyusuke Konishi }
312fa33915cSRyusuke Konishi }
313fa33915cSRyusuke Konishi out:
314fa33915cSRyusuke Konishi return ret;
315fa33915cSRyusuke Konishi }
316fa33915cSRyusuke Konishi
317fa33915cSRyusuke Konishi /**
3185eb563f5SRyusuke Konishi * nilfs_mdt_delete_block - make a hole on the meta data file.
3195eb563f5SRyusuke Konishi * @inode: inode of the meta data file
3205eb563f5SRyusuke Konishi * @block: block offset
3215eb563f5SRyusuke Konishi *
3225eb563f5SRyusuke Konishi * Return Value: On success, zero is returned.
3235eb563f5SRyusuke Konishi * On error, one of the following negative error code is returned.
3245eb563f5SRyusuke Konishi *
3255eb563f5SRyusuke Konishi * %-ENOMEM - Insufficient memory available.
3265eb563f5SRyusuke Konishi *
3275eb563f5SRyusuke Konishi * %-EIO - I/O error
3285eb563f5SRyusuke Konishi */
nilfs_mdt_delete_block(struct inode * inode,unsigned long block)3295eb563f5SRyusuke Konishi int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
3305eb563f5SRyusuke Konishi {
3315eb563f5SRyusuke Konishi struct nilfs_inode_info *ii = NILFS_I(inode);
3325eb563f5SRyusuke Konishi int err;
3335eb563f5SRyusuke Konishi
3345eb563f5SRyusuke Konishi err = nilfs_bmap_delete(ii->i_bmap, block);
33584338237SRyusuke Konishi if (!err || err == -ENOENT) {
3365eb563f5SRyusuke Konishi nilfs_mdt_mark_dirty(inode);
3375eb563f5SRyusuke Konishi nilfs_mdt_forget_block(inode, block);
3385eb563f5SRyusuke Konishi }
3395eb563f5SRyusuke Konishi return err;
3405eb563f5SRyusuke Konishi }
3415eb563f5SRyusuke Konishi
3425eb563f5SRyusuke Konishi /**
3435eb563f5SRyusuke Konishi * nilfs_mdt_forget_block - discard dirty state and try to remove the page
3445eb563f5SRyusuke Konishi * @inode: inode of the meta data file
3455eb563f5SRyusuke Konishi * @block: block offset
3465eb563f5SRyusuke Konishi *
3475eb563f5SRyusuke Konishi * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and
3485eb563f5SRyusuke Konishi * tries to release the page including the buffer from a page cache.
3495eb563f5SRyusuke Konishi *
3505eb563f5SRyusuke Konishi * Return Value: On success, 0 is returned. On error, one of the following
3515eb563f5SRyusuke Konishi * negative error code is returned.
3525eb563f5SRyusuke Konishi *
3535eb563f5SRyusuke Konishi * %-EBUSY - page has an active buffer.
3545eb563f5SRyusuke Konishi *
3555eb563f5SRyusuke Konishi * %-ENOENT - page cache has no page addressed by the offset.
3565eb563f5SRyusuke Konishi */
nilfs_mdt_forget_block(struct inode * inode,unsigned long block)3575eb563f5SRyusuke Konishi int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
3585eb563f5SRyusuke Konishi {
3595eb563f5SRyusuke Konishi pgoff_t index = (pgoff_t)block >>
36009cbfeafSKirill A. Shutemov (PAGE_SHIFT - inode->i_blkbits);
3615eb563f5SRyusuke Konishi struct page *page;
3625eb563f5SRyusuke Konishi unsigned long first_block;
3635eb563f5SRyusuke Konishi int ret = 0;
3645eb563f5SRyusuke Konishi int still_dirty;
3655eb563f5SRyusuke Konishi
3665eb563f5SRyusuke Konishi page = find_lock_page(inode->i_mapping, index);
3675eb563f5SRyusuke Konishi if (!page)
3685eb563f5SRyusuke Konishi return -ENOENT;
3695eb563f5SRyusuke Konishi
3705eb563f5SRyusuke Konishi wait_on_page_writeback(page);
3715eb563f5SRyusuke Konishi
3725eb563f5SRyusuke Konishi first_block = (unsigned long)index <<
37309cbfeafSKirill A. Shutemov (PAGE_SHIFT - inode->i_blkbits);
3745eb563f5SRyusuke Konishi if (page_has_buffers(page)) {
3755eb563f5SRyusuke Konishi struct buffer_head *bh;
3765eb563f5SRyusuke Konishi
3775eb563f5SRyusuke Konishi bh = nilfs_page_get_nth_block(page, block - first_block);
3785eb563f5SRyusuke Konishi nilfs_forget_buffer(bh);
3795eb563f5SRyusuke Konishi }
3805eb563f5SRyusuke Konishi still_dirty = PageDirty(page);
3815eb563f5SRyusuke Konishi unlock_page(page);
38209cbfeafSKirill A. Shutemov put_page(page);
3835eb563f5SRyusuke Konishi
3845eb563f5SRyusuke Konishi if (still_dirty ||
3855eb563f5SRyusuke Konishi invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0)
3865eb563f5SRyusuke Konishi ret = -EBUSY;
3875eb563f5SRyusuke Konishi return ret;
3885eb563f5SRyusuke Konishi }
3895eb563f5SRyusuke Konishi
nilfs_mdt_fetch_dirty(struct inode * inode)3905eb563f5SRyusuke Konishi int nilfs_mdt_fetch_dirty(struct inode *inode)
3915eb563f5SRyusuke Konishi {
3925eb563f5SRyusuke Konishi struct nilfs_inode_info *ii = NILFS_I(inode);
3935eb563f5SRyusuke Konishi
3945eb563f5SRyusuke Konishi if (nilfs_bmap_test_and_clear_dirty(ii->i_bmap)) {
3955eb563f5SRyusuke Konishi set_bit(NILFS_I_DIRTY, &ii->i_state);
3965eb563f5SRyusuke Konishi return 1;
3975eb563f5SRyusuke Konishi }
3985eb563f5SRyusuke Konishi return test_bit(NILFS_I_DIRTY, &ii->i_state);
3995eb563f5SRyusuke Konishi }
4005eb563f5SRyusuke Konishi
4015eb563f5SRyusuke Konishi static int
nilfs_mdt_write_page(struct page * page,struct writeback_control * wbc)4025eb563f5SRyusuke Konishi nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
4035eb563f5SRyusuke Konishi {
4048c26c4e2SVyacheslav Dubeyko struct inode *inode = page->mapping->host;
405ebdfed4dSRyusuke Konishi struct super_block *sb;
4065eb563f5SRyusuke Konishi int err = 0;
4075eb563f5SRyusuke Konishi
408bc98a42cSDavid Howells if (inode && sb_rdonly(inode->i_sb)) {
4098c26c4e2SVyacheslav Dubeyko /*
4108c26c4e2SVyacheslav Dubeyko * It means that filesystem was remounted in read-only
4118c26c4e2SVyacheslav Dubeyko * mode because of error or metadata corruption. But we
4128c26c4e2SVyacheslav Dubeyko * have dirty pages that try to be flushed in background.
4138c26c4e2SVyacheslav Dubeyko * So, here we simply discard this dirty page.
4148c26c4e2SVyacheslav Dubeyko */
4158c26c4e2SVyacheslav Dubeyko nilfs_clear_dirty_page(page, false);
4168c26c4e2SVyacheslav Dubeyko unlock_page(page);
4178c26c4e2SVyacheslav Dubeyko return -EROFS;
4188c26c4e2SVyacheslav Dubeyko }
4198c26c4e2SVyacheslav Dubeyko
4205eb563f5SRyusuke Konishi redirty_page_for_writepage(wbc, page);
4215eb563f5SRyusuke Konishi unlock_page(page);
4225eb563f5SRyusuke Konishi
423ebdfed4dSRyusuke Konishi if (!inode)
424ebdfed4dSRyusuke Konishi return 0;
425ebdfed4dSRyusuke Konishi
426ebdfed4dSRyusuke Konishi sb = inode->i_sb;
4275eb563f5SRyusuke Konishi
4285eb563f5SRyusuke Konishi if (wbc->sync_mode == WB_SYNC_ALL)
4295eb563f5SRyusuke Konishi err = nilfs_construct_segment(sb);
4305eb563f5SRyusuke Konishi else if (wbc->for_reclaim)
4315eb563f5SRyusuke Konishi nilfs_flush_segment(sb, inode->i_ino);
4325eb563f5SRyusuke Konishi
4335eb563f5SRyusuke Konishi return err;
4345eb563f5SRyusuke Konishi }
4355eb563f5SRyusuke Konishi
4365eb563f5SRyusuke Konishi
4377f09410bSAlexey Dobriyan static const struct address_space_operations def_mdt_aops = {
438e621900aSMatthew Wilcox (Oracle) .dirty_folio = block_dirty_folio,
4397ba13abbSMatthew Wilcox (Oracle) .invalidate_folio = block_invalidate_folio,
4405eb563f5SRyusuke Konishi .writepage = nilfs_mdt_write_page,
4415eb563f5SRyusuke Konishi };
4425eb563f5SRyusuke Konishi
4436e1d5dccSAlexey Dobriyan static const struct inode_operations def_mdt_iops;
444828c0950SAlexey Dobriyan static const struct file_operations def_mdt_fops;
4455eb563f5SRyusuke Konishi
4465e19a995SRyusuke Konishi
nilfs_mdt_init(struct inode * inode,gfp_t gfp_mask,size_t objsz)447f1e89c86SRyusuke Konishi int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz)
4485e19a995SRyusuke Konishi {
4495e19a995SRyusuke Konishi struct nilfs_mdt_info *mi;
4505e19a995SRyusuke Konishi
4515e19a995SRyusuke Konishi mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS);
4525e19a995SRyusuke Konishi if (!mi)
4535e19a995SRyusuke Konishi return -ENOMEM;
4545e19a995SRyusuke Konishi
4555e19a995SRyusuke Konishi init_rwsem(&mi->mi_sem);
4565e19a995SRyusuke Konishi inode->i_private = mi;
4575e19a995SRyusuke Konishi
4585e19a995SRyusuke Konishi inode->i_mode = S_IFREG;
4595e19a995SRyusuke Konishi mapping_set_gfp_mask(inode->i_mapping, gfp_mask);
460f1e89c86SRyusuke Konishi
461f1e89c86SRyusuke Konishi inode->i_op = &def_mdt_iops;
462f1e89c86SRyusuke Konishi inode->i_fop = &def_mdt_fops;
463f1e89c86SRyusuke Konishi inode->i_mapping->a_ops = &def_mdt_aops;
4645e19a995SRyusuke Konishi
4655e19a995SRyusuke Konishi return 0;
4665e19a995SRyusuke Konishi }
4675e19a995SRyusuke Konishi
4682d19961dSRyusuke Konishi /**
4692d19961dSRyusuke Konishi * nilfs_mdt_clear - do cleanup for the metadata file
4702d19961dSRyusuke Konishi * @inode: inode of the metadata file
4712d19961dSRyusuke Konishi */
nilfs_mdt_clear(struct inode * inode)4722d19961dSRyusuke Konishi void nilfs_mdt_clear(struct inode *inode)
4732d19961dSRyusuke Konishi {
4742d19961dSRyusuke Konishi struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
4756e211930SRyusuke Konishi struct nilfs_shadow_map *shadow = mdi->mi_shadow;
4762d19961dSRyusuke Konishi
4772d19961dSRyusuke Konishi if (mdi->mi_palloc_cache)
4782d19961dSRyusuke Konishi nilfs_palloc_destroy_cache(inode);
4796e211930SRyusuke Konishi
4806e211930SRyusuke Konishi if (shadow) {
4816e211930SRyusuke Konishi struct inode *s_inode = shadow->inode;
4826e211930SRyusuke Konishi
4836e211930SRyusuke Konishi shadow->inode = NULL;
4846e211930SRyusuke Konishi iput(s_inode);
4856e211930SRyusuke Konishi mdi->mi_shadow = NULL;
4866e211930SRyusuke Konishi }
4872d19961dSRyusuke Konishi }
4882d19961dSRyusuke Konishi
4892d19961dSRyusuke Konishi /**
4902d19961dSRyusuke Konishi * nilfs_mdt_destroy - release resources used by the metadata file
4912d19961dSRyusuke Konishi * @inode: inode of the metadata file
4922d19961dSRyusuke Konishi */
nilfs_mdt_destroy(struct inode * inode)4932d19961dSRyusuke Konishi void nilfs_mdt_destroy(struct inode *inode)
4942d19961dSRyusuke Konishi {
4952d19961dSRyusuke Konishi struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
4962d19961dSRyusuke Konishi
4972d19961dSRyusuke Konishi kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
4982d19961dSRyusuke Konishi kfree(mdi);
4992d19961dSRyusuke Konishi }
5002d19961dSRyusuke Konishi
nilfs_mdt_set_entry_size(struct inode * inode,unsigned int entry_size,unsigned int header_size)5010c6c44cbSRyusuke Konishi void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size,
5020c6c44cbSRyusuke Konishi unsigned int header_size)
5035eb563f5SRyusuke Konishi {
5045eb563f5SRyusuke Konishi struct nilfs_mdt_info *mi = NILFS_MDT(inode);
5055eb563f5SRyusuke Konishi
5065eb563f5SRyusuke Konishi mi->mi_entry_size = entry_size;
50793407472SFabian Frederick mi->mi_entries_per_block = i_blocksize(inode) / entry_size;
5085eb563f5SRyusuke Konishi mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
5095eb563f5SRyusuke Konishi }
5105eb563f5SRyusuke Konishi
511ebdfed4dSRyusuke Konishi /**
512ebdfed4dSRyusuke Konishi * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file
513ebdfed4dSRyusuke Konishi * @inode: inode of the metadata file
514ebdfed4dSRyusuke Konishi * @shadow: shadow mapping
515ebdfed4dSRyusuke Konishi */
nilfs_mdt_setup_shadow_map(struct inode * inode,struct nilfs_shadow_map * shadow)516ebdfed4dSRyusuke Konishi int nilfs_mdt_setup_shadow_map(struct inode *inode,
517ebdfed4dSRyusuke Konishi struct nilfs_shadow_map *shadow)
518ebdfed4dSRyusuke Konishi {
519ebdfed4dSRyusuke Konishi struct nilfs_mdt_info *mi = NILFS_MDT(inode);
5206e211930SRyusuke Konishi struct inode *s_inode;
521ebdfed4dSRyusuke Konishi
522ebdfed4dSRyusuke Konishi INIT_LIST_HEAD(&shadow->frozen_buffers);
5236e211930SRyusuke Konishi
5246e211930SRyusuke Konishi s_inode = nilfs_iget_for_shadow(inode);
5256e211930SRyusuke Konishi if (IS_ERR(s_inode))
5266e211930SRyusuke Konishi return PTR_ERR(s_inode);
5276e211930SRyusuke Konishi
5286e211930SRyusuke Konishi shadow->inode = s_inode;
529ebdfed4dSRyusuke Konishi mi->mi_shadow = shadow;
530ebdfed4dSRyusuke Konishi return 0;
531ebdfed4dSRyusuke Konishi }
532ebdfed4dSRyusuke Konishi
533ebdfed4dSRyusuke Konishi /**
534ebdfed4dSRyusuke Konishi * nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map
535ebdfed4dSRyusuke Konishi * @inode: inode of the metadata file
536ebdfed4dSRyusuke Konishi */
nilfs_mdt_save_to_shadow_map(struct inode * inode)537ebdfed4dSRyusuke Konishi int nilfs_mdt_save_to_shadow_map(struct inode *inode)
538ebdfed4dSRyusuke Konishi {
539ebdfed4dSRyusuke Konishi struct nilfs_mdt_info *mi = NILFS_MDT(inode);
540ebdfed4dSRyusuke Konishi struct nilfs_inode_info *ii = NILFS_I(inode);
541ebdfed4dSRyusuke Konishi struct nilfs_shadow_map *shadow = mi->mi_shadow;
5426e211930SRyusuke Konishi struct inode *s_inode = shadow->inode;
543ebdfed4dSRyusuke Konishi int ret;
544ebdfed4dSRyusuke Konishi
5456e211930SRyusuke Konishi ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping);
546ebdfed4dSRyusuke Konishi if (ret)
547ebdfed4dSRyusuke Konishi goto out;
548ebdfed4dSRyusuke Konishi
5496e211930SRyusuke Konishi ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping,
550e897be17SRyusuke Konishi ii->i_assoc_inode->i_mapping);
551ebdfed4dSRyusuke Konishi if (ret)
552ebdfed4dSRyusuke Konishi goto out;
553ebdfed4dSRyusuke Konishi
554ebdfed4dSRyusuke Konishi nilfs_bmap_save(ii->i_bmap, &shadow->bmap_store);
555ebdfed4dSRyusuke Konishi out:
556ebdfed4dSRyusuke Konishi return ret;
557ebdfed4dSRyusuke Konishi }
558ebdfed4dSRyusuke Konishi
nilfs_mdt_freeze_buffer(struct inode * inode,struct buffer_head * bh)559b1f6a4f2SRyusuke Konishi int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
560b1f6a4f2SRyusuke Konishi {
561b1f6a4f2SRyusuke Konishi struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
562b1f6a4f2SRyusuke Konishi struct buffer_head *bh_frozen;
563b1f6a4f2SRyusuke Konishi struct page *page;
564b1f6a4f2SRyusuke Konishi int blkbits = inode->i_blkbits;
565b1f6a4f2SRyusuke Konishi
566*6ad4cd7fSMatthew Wilcox (Oracle) page = grab_cache_page(shadow->inode->i_mapping, bh->b_folio->index);
567b1f6a4f2SRyusuke Konishi if (!page)
568a7a8447eSRyusuke Konishi return -ENOMEM;
569b1f6a4f2SRyusuke Konishi
570b1f6a4f2SRyusuke Konishi if (!page_has_buffers(page))
571b1f6a4f2SRyusuke Konishi create_empty_buffers(page, 1 << blkbits, 0);
572b1f6a4f2SRyusuke Konishi
573b1f6a4f2SRyusuke Konishi bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits);
574a7a8447eSRyusuke Konishi
575b1f6a4f2SRyusuke Konishi if (!buffer_uptodate(bh_frozen))
576b1f6a4f2SRyusuke Konishi nilfs_copy_buffer(bh_frozen, bh);
577b1f6a4f2SRyusuke Konishi if (list_empty(&bh_frozen->b_assoc_buffers)) {
578b1f6a4f2SRyusuke Konishi list_add_tail(&bh_frozen->b_assoc_buffers,
579b1f6a4f2SRyusuke Konishi &shadow->frozen_buffers);
580b1f6a4f2SRyusuke Konishi set_buffer_nilfs_redirected(bh);
581b1f6a4f2SRyusuke Konishi } else {
582b1f6a4f2SRyusuke Konishi brelse(bh_frozen); /* already frozen */
583b1f6a4f2SRyusuke Konishi }
584a7a8447eSRyusuke Konishi
585b1f6a4f2SRyusuke Konishi unlock_page(page);
58609cbfeafSKirill A. Shutemov put_page(page);
587a7a8447eSRyusuke Konishi return 0;
588b1f6a4f2SRyusuke Konishi }
589b1f6a4f2SRyusuke Konishi
590b1f6a4f2SRyusuke Konishi struct buffer_head *
nilfs_mdt_get_frozen_buffer(struct inode * inode,struct buffer_head * bh)591b1f6a4f2SRyusuke Konishi nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh)
592b1f6a4f2SRyusuke Konishi {
593b1f6a4f2SRyusuke Konishi struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
594b1f6a4f2SRyusuke Konishi struct buffer_head *bh_frozen = NULL;
595b1f6a4f2SRyusuke Konishi struct page *page;
596b1f6a4f2SRyusuke Konishi int n;
597b1f6a4f2SRyusuke Konishi
598*6ad4cd7fSMatthew Wilcox (Oracle) page = find_lock_page(shadow->inode->i_mapping, bh->b_folio->index);
599b1f6a4f2SRyusuke Konishi if (page) {
600b1f6a4f2SRyusuke Konishi if (page_has_buffers(page)) {
601b1f6a4f2SRyusuke Konishi n = bh_offset(bh) >> inode->i_blkbits;
602b1f6a4f2SRyusuke Konishi bh_frozen = nilfs_page_get_nth_block(page, n);
603b1f6a4f2SRyusuke Konishi }
604b1f6a4f2SRyusuke Konishi unlock_page(page);
60509cbfeafSKirill A. Shutemov put_page(page);
606b1f6a4f2SRyusuke Konishi }
607b1f6a4f2SRyusuke Konishi return bh_frozen;
608b1f6a4f2SRyusuke Konishi }
609b1f6a4f2SRyusuke Konishi
nilfs_release_frozen_buffers(struct nilfs_shadow_map * shadow)610b1f6a4f2SRyusuke Konishi static void nilfs_release_frozen_buffers(struct nilfs_shadow_map *shadow)
611b1f6a4f2SRyusuke Konishi {
612b1f6a4f2SRyusuke Konishi struct list_head *head = &shadow->frozen_buffers;
613b1f6a4f2SRyusuke Konishi struct buffer_head *bh;
614b1f6a4f2SRyusuke Konishi
615b1f6a4f2SRyusuke Konishi while (!list_empty(head)) {
616b1f6a4f2SRyusuke Konishi bh = list_first_entry(head, struct buffer_head,
617b1f6a4f2SRyusuke Konishi b_assoc_buffers);
618b1f6a4f2SRyusuke Konishi list_del_init(&bh->b_assoc_buffers);
619b1f6a4f2SRyusuke Konishi brelse(bh); /* drop ref-count to make it releasable */
620b1f6a4f2SRyusuke Konishi }
621b1f6a4f2SRyusuke Konishi }
622b1f6a4f2SRyusuke Konishi
623ebdfed4dSRyusuke Konishi /**
624ebdfed4dSRyusuke Konishi * nilfs_mdt_restore_from_shadow_map - restore dirty pages and bmap state
625ebdfed4dSRyusuke Konishi * @inode: inode of the metadata file
626ebdfed4dSRyusuke Konishi */
nilfs_mdt_restore_from_shadow_map(struct inode * inode)627ebdfed4dSRyusuke Konishi void nilfs_mdt_restore_from_shadow_map(struct inode *inode)
628ebdfed4dSRyusuke Konishi {
629ebdfed4dSRyusuke Konishi struct nilfs_mdt_info *mi = NILFS_MDT(inode);
630ebdfed4dSRyusuke Konishi struct nilfs_inode_info *ii = NILFS_I(inode);
631ebdfed4dSRyusuke Konishi struct nilfs_shadow_map *shadow = mi->mi_shadow;
632ebdfed4dSRyusuke Konishi
633ebdfed4dSRyusuke Konishi down_write(&mi->mi_sem);
634ebdfed4dSRyusuke Konishi
635ebdfed4dSRyusuke Konishi if (mi->mi_palloc_cache)
636ebdfed4dSRyusuke Konishi nilfs_palloc_clear_cache(inode);
637ebdfed4dSRyusuke Konishi
6388c26c4e2SVyacheslav Dubeyko nilfs_clear_dirty_pages(inode->i_mapping, true);
6396e211930SRyusuke Konishi nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping);
640ebdfed4dSRyusuke Konishi
641e897be17SRyusuke Konishi nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true);
642e897be17SRyusuke Konishi nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping,
6436e211930SRyusuke Konishi NILFS_I(shadow->inode)->i_assoc_inode->i_mapping);
644ebdfed4dSRyusuke Konishi
645ebdfed4dSRyusuke Konishi nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store);
646ebdfed4dSRyusuke Konishi
647ebdfed4dSRyusuke Konishi up_write(&mi->mi_sem);
648ebdfed4dSRyusuke Konishi }
649ebdfed4dSRyusuke Konishi
650ebdfed4dSRyusuke Konishi /**
651ebdfed4dSRyusuke Konishi * nilfs_mdt_clear_shadow_map - truncate pages in shadow map caches
652ebdfed4dSRyusuke Konishi * @inode: inode of the metadata file
653ebdfed4dSRyusuke Konishi */
nilfs_mdt_clear_shadow_map(struct inode * inode)654ebdfed4dSRyusuke Konishi void nilfs_mdt_clear_shadow_map(struct inode *inode)
655ebdfed4dSRyusuke Konishi {
656ebdfed4dSRyusuke Konishi struct nilfs_mdt_info *mi = NILFS_MDT(inode);
657ebdfed4dSRyusuke Konishi struct nilfs_shadow_map *shadow = mi->mi_shadow;
6586e211930SRyusuke Konishi struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode;
659ebdfed4dSRyusuke Konishi
660ebdfed4dSRyusuke Konishi down_write(&mi->mi_sem);
661b1f6a4f2SRyusuke Konishi nilfs_release_frozen_buffers(shadow);
6626e211930SRyusuke Konishi truncate_inode_pages(shadow->inode->i_mapping, 0);
6636e211930SRyusuke Konishi truncate_inode_pages(shadow_btnc_inode->i_mapping, 0);
664ebdfed4dSRyusuke Konishi up_write(&mi->mi_sem);
665ebdfed4dSRyusuke Konishi }
666