xref: /openbmc/linux/fs/ext2/inode.c (revision 9009b455811b0fa1f6b0adfa94db136984db5a38)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   *  linux/fs/ext2/inode.c
4   *
5   * Copyright (C) 1992, 1993, 1994, 1995
6   * Remy Card (card@masi.ibp.fr)
7   * Laboratoire MASI - Institut Blaise Pascal
8   * Universite Pierre et Marie Curie (Paris VI)
9   *
10   *  from
11   *
12   *  linux/fs/minix/inode.c
13   *
14   *  Copyright (C) 1991, 1992  Linus Torvalds
15   *
16   *  Goal-directed block allocation by Stephen Tweedie
17   * 	(sct@dcs.ed.ac.uk), 1993, 1998
18   *  Big-endian to little-endian byte-swapping/bitmaps by
19   *        David S. Miller (davem@caip.rutgers.edu), 1995
20   *  64-bit file support on 64-bit platforms by Jakub Jelinek
21   * 	(jj@sunsite.ms.mff.cuni.cz)
22   *
23   *  Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000
24   */
25  
26  #include <linux/time.h>
27  #include <linux/highuid.h>
28  #include <linux/pagemap.h>
29  #include <linux/dax.h>
30  #include <linux/blkdev.h>
31  #include <linux/quotaops.h>
32  #include <linux/writeback.h>
33  #include <linux/buffer_head.h>
34  #include <linux/mpage.h>
35  #include <linux/fiemap.h>
36  #include <linux/iomap.h>
37  #include <linux/namei.h>
38  #include <linux/uio.h>
39  #include "ext2.h"
40  #include "acl.h"
41  #include "xattr.h"
42  
43  static int __ext2_write_inode(struct inode *inode, int do_sync);
44  
45  /*
46   * Test whether an inode is a fast symlink.
47   */
48  static inline int ext2_inode_is_fast_symlink(struct inode *inode)
49  {
50  	int ea_blocks = EXT2_I(inode)->i_file_acl ?
51  		(inode->i_sb->s_blocksize >> 9) : 0;
52  
53  	return (S_ISLNK(inode->i_mode) &&
54  		inode->i_blocks - ea_blocks == 0);
55  }
56  
57  static void ext2_truncate_blocks(struct inode *inode, loff_t offset);
58  
59  static void ext2_write_failed(struct address_space *mapping, loff_t to)
60  {
61  	struct inode *inode = mapping->host;
62  
63  	if (to > inode->i_size) {
64  		truncate_pagecache(inode, inode->i_size);
65  		ext2_truncate_blocks(inode, inode->i_size);
66  	}
67  }
68  
69  /*
70   * Called at the last iput() if i_nlink is zero.
71   */
72  void ext2_evict_inode(struct inode * inode)
73  {
74  	struct ext2_block_alloc_info *rsv;
75  	int want_delete = 0;
76  
77  	if (!inode->i_nlink && !is_bad_inode(inode)) {
78  		want_delete = 1;
79  		dquot_initialize(inode);
80  	} else {
81  		dquot_drop(inode);
82  	}
83  
84  	truncate_inode_pages_final(&inode->i_data);
85  
86  	if (want_delete) {
87  		sb_start_intwrite(inode->i_sb);
88  		/* set dtime */
89  		EXT2_I(inode)->i_dtime	= ktime_get_real_seconds();
90  		mark_inode_dirty(inode);
91  		__ext2_write_inode(inode, inode_needs_sync(inode));
92  		/* truncate to 0 */
93  		inode->i_size = 0;
94  		if (inode->i_blocks)
95  			ext2_truncate_blocks(inode, 0);
96  		ext2_xattr_delete_inode(inode);
97  	}
98  
99  	invalidate_inode_buffers(inode);
100  	clear_inode(inode);
101  
102  	ext2_discard_reservation(inode);
103  	rsv = EXT2_I(inode)->i_block_alloc_info;
104  	EXT2_I(inode)->i_block_alloc_info = NULL;
105  	if (unlikely(rsv))
106  		kfree(rsv);
107  
108  	if (want_delete) {
109  		ext2_free_inode(inode);
110  		sb_end_intwrite(inode->i_sb);
111  	}
112  }
113  
114  typedef struct {
115  	__le32	*p;
116  	__le32	key;
117  	struct buffer_head *bh;
118  } Indirect;
119  
120  static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
121  {
122  	p->key = *(p->p = v);
123  	p->bh = bh;
124  }
125  
126  static inline int verify_chain(Indirect *from, Indirect *to)
127  {
128  	while (from <= to && from->key == *from->p)
129  		from++;
130  	return (from > to);
131  }
132  
133  /**
134   *	ext2_block_to_path - parse the block number into array of offsets
135   *	@inode: inode in question (we are only interested in its superblock)
136   *	@i_block: block number to be parsed
137   *	@offsets: array to store the offsets in
138   *      @boundary: set this non-zero if the referred-to block is likely to be
139   *             followed (on disk) by an indirect block.
140   *	To store the locations of file's data ext2 uses a data structure common
141   *	for UNIX filesystems - tree of pointers anchored in the inode, with
142   *	data blocks at leaves and indirect blocks in intermediate nodes.
143   *	This function translates the block number into path in that tree -
144   *	return value is the path length and @offsets[n] is the offset of
145   *	pointer to (n+1)th node in the nth one. If @block is out of range
146   *	(negative or too large) warning is printed and zero returned.
147   *
148   *	Note: function doesn't find node addresses, so no IO is needed. All
149   *	we need to know is the capacity of indirect blocks (taken from the
150   *	inode->i_sb).
151   */
152  
153  /*
154   * Portability note: the last comparison (check that we fit into triple
155   * indirect block) is spelled differently, because otherwise on an
156   * architecture with 32-bit longs and 8Kb pages we might get into trouble
157   * if our filesystem had 8Kb blocks. We might use long long, but that would
158   * kill us on x86. Oh, well, at least the sign propagation does not matter -
159   * i_block would have to be negative in the very beginning, so we would not
160   * get there at all.
161   */
162  
163  static int ext2_block_to_path(struct inode *inode,
164  			long i_block, int offsets[4], int *boundary)
165  {
166  	int ptrs = EXT2_ADDR_PER_BLOCK(inode->i_sb);
167  	int ptrs_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb);
168  	const long direct_blocks = EXT2_NDIR_BLOCKS,
169  		indirect_blocks = ptrs,
170  		double_blocks = (1 << (ptrs_bits * 2));
171  	int n = 0;
172  	int final = 0;
173  
174  	if (i_block < 0) {
175  		ext2_msg(inode->i_sb, KERN_WARNING,
176  			"warning: %s: block < 0", __func__);
177  	} else if (i_block < direct_blocks) {
178  		offsets[n++] = i_block;
179  		final = direct_blocks;
180  	} else if ( (i_block -= direct_blocks) < indirect_blocks) {
181  		offsets[n++] = EXT2_IND_BLOCK;
182  		offsets[n++] = i_block;
183  		final = ptrs;
184  	} else if ((i_block -= indirect_blocks) < double_blocks) {
185  		offsets[n++] = EXT2_DIND_BLOCK;
186  		offsets[n++] = i_block >> ptrs_bits;
187  		offsets[n++] = i_block & (ptrs - 1);
188  		final = ptrs;
189  	} else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
190  		offsets[n++] = EXT2_TIND_BLOCK;
191  		offsets[n++] = i_block >> (ptrs_bits * 2);
192  		offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
193  		offsets[n++] = i_block & (ptrs - 1);
194  		final = ptrs;
195  	} else {
196  		ext2_msg(inode->i_sb, KERN_WARNING,
197  			"warning: %s: block is too big", __func__);
198  	}
199  	if (boundary)
200  		*boundary = final - 1 - (i_block & (ptrs - 1));
201  
202  	return n;
203  }
204  
205  /**
206   *	ext2_get_branch - read the chain of indirect blocks leading to data
207   *	@inode: inode in question
208   *	@depth: depth of the chain (1 - direct pointer, etc.)
209   *	@offsets: offsets of pointers in inode/indirect blocks
210   *	@chain: place to store the result
211   *	@err: here we store the error value
212   *
213   *	Function fills the array of triples <key, p, bh> and returns %NULL
214   *	if everything went OK or the pointer to the last filled triple
215   *	(incomplete one) otherwise. Upon the return chain[i].key contains
216   *	the number of (i+1)-th block in the chain (as it is stored in memory,
217   *	i.e. little-endian 32-bit), chain[i].p contains the address of that
218   *	number (it points into struct inode for i==0 and into the bh->b_data
219   *	for i>0) and chain[i].bh points to the buffer_head of i-th indirect
220   *	block for i>0 and NULL for i==0. In other words, it holds the block
221   *	numbers of the chain, addresses they were taken from (and where we can
222   *	verify that chain did not change) and buffer_heads hosting these
223   *	numbers.
224   *
225   *	Function stops when it stumbles upon zero pointer (absent block)
226   *		(pointer to last triple returned, *@err == 0)
227   *	or when it gets an IO error reading an indirect block
228   *		(ditto, *@err == -EIO)
229   *	or when it notices that chain had been changed while it was reading
230   *		(ditto, *@err == -EAGAIN)
231   *	or when it reads all @depth-1 indirect blocks successfully and finds
232   *	the whole chain, all way to the data (returns %NULL, *err == 0).
233   */
234  static Indirect *ext2_get_branch(struct inode *inode,
235  				 int depth,
236  				 int *offsets,
237  				 Indirect chain[4],
238  				 int *err)
239  {
240  	struct super_block *sb = inode->i_sb;
241  	Indirect *p = chain;
242  	struct buffer_head *bh;
243  
244  	*err = 0;
245  	/* i_data is not going away, no lock needed */
246  	add_chain (chain, NULL, EXT2_I(inode)->i_data + *offsets);
247  	if (!p->key)
248  		goto no_block;
249  	while (--depth) {
250  		bh = sb_bread(sb, le32_to_cpu(p->key));
251  		if (!bh)
252  			goto failure;
253  		read_lock(&EXT2_I(inode)->i_meta_lock);
254  		if (!verify_chain(chain, p))
255  			goto changed;
256  		add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
257  		read_unlock(&EXT2_I(inode)->i_meta_lock);
258  		if (!p->key)
259  			goto no_block;
260  	}
261  	return NULL;
262  
263  changed:
264  	read_unlock(&EXT2_I(inode)->i_meta_lock);
265  	brelse(bh);
266  	*err = -EAGAIN;
267  	goto no_block;
268  failure:
269  	*err = -EIO;
270  no_block:
271  	return p;
272  }
273  
274  /**
275   *	ext2_find_near - find a place for allocation with sufficient locality
276   *	@inode: owner
277   *	@ind: descriptor of indirect block.
278   *
279   *	This function returns the preferred place for block allocation.
280   *	It is used when heuristic for sequential allocation fails.
281   *	Rules are:
282   *	  + if there is a block to the left of our position - allocate near it.
283   *	  + if pointer will live in indirect block - allocate near that block.
284   *	  + if pointer will live in inode - allocate in the same cylinder group.
285   *
286   * In the latter case we colour the starting block by the callers PID to
287   * prevent it from clashing with concurrent allocations for a different inode
288   * in the same block group.   The PID is used here so that functionally related
289   * files will be close-by on-disk.
290   *
291   *	Caller must make sure that @ind is valid and will stay that way.
292   */
293  
294  static ext2_fsblk_t ext2_find_near(struct inode *inode, Indirect *ind)
295  {
296  	struct ext2_inode_info *ei = EXT2_I(inode);
297  	__le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
298  	__le32 *p;
299  	ext2_fsblk_t bg_start;
300  	ext2_fsblk_t colour;
301  
302  	/* Try to find previous block */
303  	for (p = ind->p - 1; p >= start; p--)
304  		if (*p)
305  			return le32_to_cpu(*p);
306  
307  	/* No such thing, so let's try location of indirect block */
308  	if (ind->bh)
309  		return ind->bh->b_blocknr;
310  
311  	/*
312  	 * It is going to be referred from inode itself? OK, just put it into
313  	 * the same cylinder group then.
314  	 */
315  	bg_start = ext2_group_first_block_no(inode->i_sb, ei->i_block_group);
316  	colour = (current->pid % 16) *
317  			(EXT2_BLOCKS_PER_GROUP(inode->i_sb) / 16);
318  	return bg_start + colour;
319  }
320  
321  /**
322   *	ext2_find_goal - find a preferred place for allocation.
323   *	@inode: owner
324   *	@block:  block we want
325   *	@partial: pointer to the last triple within a chain
326   *
327   *	Returns preferred place for a block (the goal).
328   */
329  
330  static inline ext2_fsblk_t ext2_find_goal(struct inode *inode, long block,
331  					  Indirect *partial)
332  {
333  	struct ext2_block_alloc_info *block_i;
334  
335  	block_i = EXT2_I(inode)->i_block_alloc_info;
336  
337  	/*
338  	 * try the heuristic for sequential allocation,
339  	 * failing that at least try to get decent locality.
340  	 */
341  	if (block_i && (block == block_i->last_alloc_logical_block + 1)
342  		&& (block_i->last_alloc_physical_block != 0)) {
343  		return block_i->last_alloc_physical_block + 1;
344  	}
345  
346  	return ext2_find_near(inode, partial);
347  }
348  
349  /**
350   *	ext2_blks_to_allocate: Look up the block map and count the number
351   *	of direct blocks need to be allocated for the given branch.
352   *
353   * 	@branch: chain of indirect blocks
354   *	@k: number of blocks need for indirect blocks
355   *	@blks: number of data blocks to be mapped.
356   *	@blocks_to_boundary:  the offset in the indirect block
357   *
358   *	return the number of direct blocks to allocate.
359   */
360  static int
361  ext2_blks_to_allocate(Indirect * branch, int k, unsigned long blks,
362  		int blocks_to_boundary)
363  {
364  	unsigned long count = 0;
365  
366  	/*
367  	 * Simple case, [t,d]Indirect block(s) has not allocated yet
368  	 * then it's clear blocks on that path have not allocated
369  	 */
370  	if (k > 0) {
371  		/* right now don't hanel cross boundary allocation */
372  		if (blks < blocks_to_boundary + 1)
373  			count += blks;
374  		else
375  			count += blocks_to_boundary + 1;
376  		return count;
377  	}
378  
379  	count++;
380  	while (count < blks && count <= blocks_to_boundary
381  		&& le32_to_cpu(*(branch[0].p + count)) == 0) {
382  		count++;
383  	}
384  	return count;
385  }
386  
387  /**
388   *	ext2_alloc_blocks: multiple allocate blocks needed for a branch
389   *	@indirect_blks: the number of blocks need to allocate for indirect
390   *			blocks
391   *	@blks: the number of blocks need to allocate for direct blocks
392   *	@new_blocks: on return it will store the new block numbers for
393   *	the indirect blocks(if needed) and the first direct block,
394   */
395  static int ext2_alloc_blocks(struct inode *inode,
396  			ext2_fsblk_t goal, int indirect_blks, int blks,
397  			ext2_fsblk_t new_blocks[4], int *err)
398  {
399  	int target, i;
400  	unsigned long count = 0;
401  	int index = 0;
402  	ext2_fsblk_t current_block = 0;
403  	int ret = 0;
404  
405  	/*
406  	 * Here we try to allocate the requested multiple blocks at once,
407  	 * on a best-effort basis.
408  	 * To build a branch, we should allocate blocks for
409  	 * the indirect blocks(if not allocated yet), and at least
410  	 * the first direct block of this branch.  That's the
411  	 * minimum number of blocks need to allocate(required)
412  	 */
413  	target = blks + indirect_blks;
414  
415  	while (1) {
416  		count = target;
417  		/* allocating blocks for indirect blocks and direct blocks */
418  		current_block = ext2_new_blocks(inode,goal,&count,err);
419  		if (*err)
420  			goto failed_out;
421  
422  		target -= count;
423  		/* allocate blocks for indirect blocks */
424  		while (index < indirect_blks && count) {
425  			new_blocks[index++] = current_block++;
426  			count--;
427  		}
428  
429  		if (count > 0)
430  			break;
431  	}
432  
433  	/* save the new block number for the first direct block */
434  	new_blocks[index] = current_block;
435  
436  	/* total number of blocks allocated for direct blocks */
437  	ret = count;
438  	*err = 0;
439  	return ret;
440  failed_out:
441  	for (i = 0; i <index; i++)
442  		ext2_free_blocks(inode, new_blocks[i], 1);
443  	if (index)
444  		mark_inode_dirty(inode);
445  	return ret;
446  }
447  
448  /**
449   *	ext2_alloc_branch - allocate and set up a chain of blocks.
450   *	@inode: owner
451   *	@indirect_blks: depth of the chain (number of blocks to allocate)
452   *	@blks: number of allocated direct blocks
453   *	@goal: preferred place for allocation
454   *	@offsets: offsets (in the blocks) to store the pointers to next.
455   *	@branch: place to store the chain in.
456   *
457   *	This function allocates @num blocks, zeroes out all but the last one,
458   *	links them into chain and (if we are synchronous) writes them to disk.
459   *	In other words, it prepares a branch that can be spliced onto the
460   *	inode. It stores the information about that chain in the branch[], in
461   *	the same format as ext2_get_branch() would do. We are calling it after
462   *	we had read the existing part of chain and partial points to the last
463   *	triple of that (one with zero ->key). Upon the exit we have the same
464   *	picture as after the successful ext2_get_block(), except that in one
465   *	place chain is disconnected - *branch->p is still zero (we did not
466   *	set the last link), but branch->key contains the number that should
467   *	be placed into *branch->p to fill that gap.
468   *
469   *	If allocation fails we free all blocks we've allocated (and forget
470   *	their buffer_heads) and return the error value the from failed
471   *	ext2_alloc_block() (normally -ENOSPC). Otherwise we set the chain
472   *	as described above and return 0.
473   */
474  
475  static int ext2_alloc_branch(struct inode *inode,
476  			int indirect_blks, int *blks, ext2_fsblk_t goal,
477  			int *offsets, Indirect *branch)
478  {
479  	int blocksize = inode->i_sb->s_blocksize;
480  	int i, n = 0;
481  	int err = 0;
482  	struct buffer_head *bh;
483  	int num;
484  	ext2_fsblk_t new_blocks[4];
485  	ext2_fsblk_t current_block;
486  
487  	num = ext2_alloc_blocks(inode, goal, indirect_blks,
488  				*blks, new_blocks, &err);
489  	if (err)
490  		return err;
491  
492  	branch[0].key = cpu_to_le32(new_blocks[0]);
493  	/*
494  	 * metadata blocks and data blocks are allocated.
495  	 */
496  	for (n = 1; n <= indirect_blks;  n++) {
497  		/*
498  		 * Get buffer_head for parent block, zero it out
499  		 * and set the pointer to new one, then send
500  		 * parent to disk.
501  		 */
502  		bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
503  		if (unlikely(!bh)) {
504  			err = -ENOMEM;
505  			goto failed;
506  		}
507  		branch[n].bh = bh;
508  		lock_buffer(bh);
509  		memset(bh->b_data, 0, blocksize);
510  		branch[n].p = (__le32 *) bh->b_data + offsets[n];
511  		branch[n].key = cpu_to_le32(new_blocks[n]);
512  		*branch[n].p = branch[n].key;
513  		if ( n == indirect_blks) {
514  			current_block = new_blocks[n];
515  			/*
516  			 * End of chain, update the last new metablock of
517  			 * the chain to point to the new allocated
518  			 * data blocks numbers
519  			 */
520  			for (i=1; i < num; i++)
521  				*(branch[n].p + i) = cpu_to_le32(++current_block);
522  		}
523  		set_buffer_uptodate(bh);
524  		unlock_buffer(bh);
525  		mark_buffer_dirty_inode(bh, inode);
526  		/* We used to sync bh here if IS_SYNC(inode).
527  		 * But we now rely upon generic_write_sync()
528  		 * and b_inode_buffers.  But not for directories.
529  		 */
530  		if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
531  			sync_dirty_buffer(bh);
532  	}
533  	*blks = num;
534  	return err;
535  
536  failed:
537  	for (i = 1; i < n; i++)
538  		bforget(branch[i].bh);
539  	for (i = 0; i < indirect_blks; i++)
540  		ext2_free_blocks(inode, new_blocks[i], 1);
541  	ext2_free_blocks(inode, new_blocks[i], num);
542  	return err;
543  }
544  
545  /**
546   * ext2_splice_branch - splice the allocated branch onto inode.
547   * @inode: owner
548   * @block: (logical) number of block we are adding
549   * @where: location of missing link
550   * @num:   number of indirect blocks we are adding
551   * @blks:  number of direct blocks we are adding
552   *
553   * This function fills the missing link and does all housekeeping needed in
554   * inode (->i_blocks, etc.). In case of success we end up with the full
555   * chain to new block and return 0.
556   */
557  static void ext2_splice_branch(struct inode *inode,
558  			long block, Indirect *where, int num, int blks)
559  {
560  	int i;
561  	struct ext2_block_alloc_info *block_i;
562  	ext2_fsblk_t current_block;
563  
564  	block_i = EXT2_I(inode)->i_block_alloc_info;
565  
566  	/* XXX LOCKING probably should have i_meta_lock ?*/
567  	/* That's it */
568  
569  	*where->p = where->key;
570  
571  	/*
572  	 * Update the host buffer_head or inode to point to more just allocated
573  	 * direct blocks blocks
574  	 */
575  	if (num == 0 && blks > 1) {
576  		current_block = le32_to_cpu(where->key) + 1;
577  		for (i = 1; i < blks; i++)
578  			*(where->p + i ) = cpu_to_le32(current_block++);
579  	}
580  
581  	/*
582  	 * update the most recently allocated logical & physical block
583  	 * in i_block_alloc_info, to assist find the proper goal block for next
584  	 * allocation
585  	 */
586  	if (block_i) {
587  		block_i->last_alloc_logical_block = block + blks - 1;
588  		block_i->last_alloc_physical_block =
589  				le32_to_cpu(where[num].key) + blks - 1;
590  	}
591  
592  	/* We are done with atomic stuff, now do the rest of housekeeping */
593  
594  	/* had we spliced it onto indirect block? */
595  	if (where->bh)
596  		mark_buffer_dirty_inode(where->bh, inode);
597  
598  	inode->i_ctime = current_time(inode);
599  	mark_inode_dirty(inode);
600  }
601  
602  /*
603   * Allocation strategy is simple: if we have to allocate something, we will
604   * have to go the whole way to leaf. So let's do it before attaching anything
605   * to tree, set linkage between the newborn blocks, write them if sync is
606   * required, recheck the path, free and repeat if check fails, otherwise
607   * set the last missing link (that will protect us from any truncate-generated
608   * removals - all blocks on the path are immune now) and possibly force the
609   * write on the parent block.
610   * That has a nice additional property: no special recovery from the failed
611   * allocations is needed - we simply release blocks and do not touch anything
612   * reachable from inode.
613   *
614   * `handle' can be NULL if create == 0.
615   *
616   * return > 0, # of blocks mapped or allocated.
617   * return = 0, if plain lookup failed.
618   * return < 0, error case.
619   */
620  static int ext2_get_blocks(struct inode *inode,
621  			   sector_t iblock, unsigned long maxblocks,
622  			   u32 *bno, bool *new, bool *boundary,
623  			   int create)
624  {
625  	int err;
626  	int offsets[4];
627  	Indirect chain[4];
628  	Indirect *partial;
629  	ext2_fsblk_t goal;
630  	int indirect_blks;
631  	int blocks_to_boundary = 0;
632  	int depth;
633  	struct ext2_inode_info *ei = EXT2_I(inode);
634  	int count = 0;
635  	ext2_fsblk_t first_block = 0;
636  
637  	BUG_ON(maxblocks == 0);
638  
639  	depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
640  
641  	if (depth == 0)
642  		return -EIO;
643  
644  	partial = ext2_get_branch(inode, depth, offsets, chain, &err);
645  	/* Simplest case - block found, no allocation needed */
646  	if (!partial) {
647  		first_block = le32_to_cpu(chain[depth - 1].key);
648  		count++;
649  		/*map more blocks*/
650  		while (count < maxblocks && count <= blocks_to_boundary) {
651  			ext2_fsblk_t blk;
652  
653  			if (!verify_chain(chain, chain + depth - 1)) {
654  				/*
655  				 * Indirect block might be removed by
656  				 * truncate while we were reading it.
657  				 * Handling of that case: forget what we've
658  				 * got now, go to reread.
659  				 */
660  				err = -EAGAIN;
661  				count = 0;
662  				partial = chain + depth - 1;
663  				break;
664  			}
665  			blk = le32_to_cpu(*(chain[depth-1].p + count));
666  			if (blk == first_block + count)
667  				count++;
668  			else
669  				break;
670  		}
671  		if (err != -EAGAIN)
672  			goto got_it;
673  	}
674  
675  	/* Next simple case - plain lookup or failed read of indirect block */
676  	if (!create || err == -EIO)
677  		goto cleanup;
678  
679  	mutex_lock(&ei->truncate_mutex);
680  	/*
681  	 * If the indirect block is missing while we are reading
682  	 * the chain(ext2_get_branch() returns -EAGAIN err), or
683  	 * if the chain has been changed after we grab the semaphore,
684  	 * (either because another process truncated this branch, or
685  	 * another get_block allocated this branch) re-grab the chain to see if
686  	 * the request block has been allocated or not.
687  	 *
688  	 * Since we already block the truncate/other get_block
689  	 * at this point, we will have the current copy of the chain when we
690  	 * splice the branch into the tree.
691  	 */
692  	if (err == -EAGAIN || !verify_chain(chain, partial)) {
693  		while (partial > chain) {
694  			brelse(partial->bh);
695  			partial--;
696  		}
697  		partial = ext2_get_branch(inode, depth, offsets, chain, &err);
698  		if (!partial) {
699  			count++;
700  			mutex_unlock(&ei->truncate_mutex);
701  			goto got_it;
702  		}
703  
704  		if (err) {
705  			mutex_unlock(&ei->truncate_mutex);
706  			goto cleanup;
707  		}
708  	}
709  
710  	/*
711  	 * Okay, we need to do block allocation.  Lazily initialize the block
712  	 * allocation info here if necessary
713  	*/
714  	if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
715  		ext2_init_block_alloc_info(inode);
716  
717  	goal = ext2_find_goal(inode, iblock, partial);
718  
719  	/* the number of blocks need to allocate for [d,t]indirect blocks */
720  	indirect_blks = (chain + depth) - partial - 1;
721  	/*
722  	 * Next look up the indirect map to count the total number of
723  	 * direct blocks to allocate for this branch.
724  	 */
725  	count = ext2_blks_to_allocate(partial, indirect_blks,
726  					maxblocks, blocks_to_boundary);
727  	/*
728  	 * XXX ???? Block out ext2_truncate while we alter the tree
729  	 */
730  	err = ext2_alloc_branch(inode, indirect_blks, &count, goal,
731  				offsets + (partial - chain), partial);
732  
733  	if (err) {
734  		mutex_unlock(&ei->truncate_mutex);
735  		goto cleanup;
736  	}
737  
738  	if (IS_DAX(inode)) {
739  		/*
740  		 * We must unmap blocks before zeroing so that writeback cannot
741  		 * overwrite zeros with stale data from block device page cache.
742  		 */
743  		clean_bdev_aliases(inode->i_sb->s_bdev,
744  				   le32_to_cpu(chain[depth-1].key),
745  				   count);
746  		/*
747  		 * block must be initialised before we put it in the tree
748  		 * so that it's not found by another thread before it's
749  		 * initialised
750  		 */
751  		err = sb_issue_zeroout(inode->i_sb,
752  				le32_to_cpu(chain[depth-1].key), count,
753  				GFP_NOFS);
754  		if (err) {
755  			mutex_unlock(&ei->truncate_mutex);
756  			goto cleanup;
757  		}
758  	}
759  	*new = true;
760  
761  	ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
762  	mutex_unlock(&ei->truncate_mutex);
763  got_it:
764  	if (count > blocks_to_boundary)
765  		*boundary = true;
766  	err = count;
767  	/* Clean up and exit */
768  	partial = chain + depth - 1;	/* the whole chain */
769  cleanup:
770  	while (partial > chain) {
771  		brelse(partial->bh);
772  		partial--;
773  	}
774  	if (err > 0)
775  		*bno = le32_to_cpu(chain[depth-1].key);
776  	return err;
777  }
778  
779  int ext2_get_block(struct inode *inode, sector_t iblock,
780  		struct buffer_head *bh_result, int create)
781  {
782  	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
783  	bool new = false, boundary = false;
784  	u32 bno;
785  	int ret;
786  
787  	ret = ext2_get_blocks(inode, iblock, max_blocks, &bno, &new, &boundary,
788  			create);
789  	if (ret <= 0)
790  		return ret;
791  
792  	map_bh(bh_result, inode->i_sb, bno);
793  	bh_result->b_size = (ret << inode->i_blkbits);
794  	if (new)
795  		set_buffer_new(bh_result);
796  	if (boundary)
797  		set_buffer_boundary(bh_result);
798  	return 0;
799  
800  }
801  
802  #ifdef CONFIG_FS_DAX
803  static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
804  		unsigned flags, struct iomap *iomap, struct iomap *srcmap)
805  {
806  	unsigned int blkbits = inode->i_blkbits;
807  	unsigned long first_block = offset >> blkbits;
808  	unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits;
809  	struct ext2_sb_info *sbi = EXT2_SB(inode->i_sb);
810  	bool new = false, boundary = false;
811  	u32 bno;
812  	int ret;
813  
814  	ret = ext2_get_blocks(inode, first_block, max_blocks,
815  			&bno, &new, &boundary, flags & IOMAP_WRITE);
816  	if (ret < 0)
817  		return ret;
818  
819  	iomap->flags = 0;
820  	iomap->bdev = inode->i_sb->s_bdev;
821  	iomap->offset = (u64)first_block << blkbits;
822  	iomap->dax_dev = sbi->s_daxdev;
823  
824  	if (ret == 0) {
825  		iomap->type = IOMAP_HOLE;
826  		iomap->addr = IOMAP_NULL_ADDR;
827  		iomap->length = 1 << blkbits;
828  	} else {
829  		iomap->type = IOMAP_MAPPED;
830  		iomap->addr = (u64)bno << blkbits;
831  		iomap->length = (u64)ret << blkbits;
832  		iomap->flags |= IOMAP_F_MERGED;
833  	}
834  
835  	if (new)
836  		iomap->flags |= IOMAP_F_NEW;
837  	return 0;
838  }
839  
840  static int
841  ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
842  		ssize_t written, unsigned flags, struct iomap *iomap)
843  {
844  	if (iomap->type == IOMAP_MAPPED &&
845  	    written < length &&
846  	    (flags & IOMAP_WRITE))
847  		ext2_write_failed(inode->i_mapping, offset + length);
848  	return 0;
849  }
850  
851  const struct iomap_ops ext2_iomap_ops = {
852  	.iomap_begin		= ext2_iomap_begin,
853  	.iomap_end		= ext2_iomap_end,
854  };
855  #else
856  /* Define empty ops for !CONFIG_FS_DAX case to avoid ugly ifdefs */
857  const struct iomap_ops ext2_iomap_ops;
858  #endif /* CONFIG_FS_DAX */
859  
860  int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
861  		u64 start, u64 len)
862  {
863  	return generic_block_fiemap(inode, fieinfo, start, len,
864  				    ext2_get_block);
865  }
866  
867  static int ext2_writepage(struct page *page, struct writeback_control *wbc)
868  {
869  	return block_write_full_page(page, ext2_get_block, wbc);
870  }
871  
872  static int ext2_readpage(struct file *file, struct page *page)
873  {
874  	return mpage_readpage(page, ext2_get_block);
875  }
876  
877  static void ext2_readahead(struct readahead_control *rac)
878  {
879  	mpage_readahead(rac, ext2_get_block);
880  }
881  
882  static int
883  ext2_write_begin(struct file *file, struct address_space *mapping,
884  		loff_t pos, unsigned len, unsigned flags,
885  		struct page **pagep, void **fsdata)
886  {
887  	int ret;
888  
889  	ret = block_write_begin(mapping, pos, len, flags, pagep,
890  				ext2_get_block);
891  	if (ret < 0)
892  		ext2_write_failed(mapping, pos + len);
893  	return ret;
894  }
895  
896  static int ext2_write_end(struct file *file, struct address_space *mapping,
897  			loff_t pos, unsigned len, unsigned copied,
898  			struct page *page, void *fsdata)
899  {
900  	int ret;
901  
902  	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
903  	if (ret < len)
904  		ext2_write_failed(mapping, pos + len);
905  	return ret;
906  }
907  
908  static int
909  ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
910  		loff_t pos, unsigned len, unsigned flags,
911  		struct page **pagep, void **fsdata)
912  {
913  	int ret;
914  
915  	ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata,
916  			       ext2_get_block);
917  	if (ret < 0)
918  		ext2_write_failed(mapping, pos + len);
919  	return ret;
920  }
921  
922  static int ext2_nobh_writepage(struct page *page,
923  			struct writeback_control *wbc)
924  {
925  	return nobh_writepage(page, ext2_get_block, wbc);
926  }
927  
928  static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
929  {
930  	return generic_block_bmap(mapping,block,ext2_get_block);
931  }
932  
933  static ssize_t
934  ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
935  {
936  	struct file *file = iocb->ki_filp;
937  	struct address_space *mapping = file->f_mapping;
938  	struct inode *inode = mapping->host;
939  	size_t count = iov_iter_count(iter);
940  	loff_t offset = iocb->ki_pos;
941  	ssize_t ret;
942  
943  	ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
944  	if (ret < 0 && iov_iter_rw(iter) == WRITE)
945  		ext2_write_failed(mapping, offset + count);
946  	return ret;
947  }
948  
949  static int
950  ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
951  {
952  	return mpage_writepages(mapping, wbc, ext2_get_block);
953  }
954  
955  static int
956  ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc)
957  {
958  	struct ext2_sb_info *sbi = EXT2_SB(mapping->host->i_sb);
959  
960  	return dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
961  }
962  
963  const struct address_space_operations ext2_aops = {
964  	.readpage		= ext2_readpage,
965  	.readahead		= ext2_readahead,
966  	.writepage		= ext2_writepage,
967  	.write_begin		= ext2_write_begin,
968  	.write_end		= ext2_write_end,
969  	.bmap			= ext2_bmap,
970  	.direct_IO		= ext2_direct_IO,
971  	.writepages		= ext2_writepages,
972  	.migratepage		= buffer_migrate_page,
973  	.is_partially_uptodate	= block_is_partially_uptodate,
974  	.error_remove_page	= generic_error_remove_page,
975  };
976  
977  const struct address_space_operations ext2_nobh_aops = {
978  	.readpage		= ext2_readpage,
979  	.readahead		= ext2_readahead,
980  	.writepage		= ext2_nobh_writepage,
981  	.write_begin		= ext2_nobh_write_begin,
982  	.write_end		= nobh_write_end,
983  	.bmap			= ext2_bmap,
984  	.direct_IO		= ext2_direct_IO,
985  	.writepages		= ext2_writepages,
986  	.migratepage		= buffer_migrate_page,
987  	.error_remove_page	= generic_error_remove_page,
988  };
989  
990  static const struct address_space_operations ext2_dax_aops = {
991  	.writepages		= ext2_dax_writepages,
992  	.direct_IO		= noop_direct_IO,
993  	.set_page_dirty		= noop_set_page_dirty,
994  	.invalidatepage		= noop_invalidatepage,
995  };
996  
997  /*
998   * Probably it should be a library function... search for first non-zero word
999   * or memcmp with zero_page, whatever is better for particular architecture.
1000   * Linus?
1001   */
1002  static inline int all_zeroes(__le32 *p, __le32 *q)
1003  {
1004  	while (p < q)
1005  		if (*p++)
1006  			return 0;
1007  	return 1;
1008  }
1009  
1010  /**
1011   *	ext2_find_shared - find the indirect blocks for partial truncation.
1012   *	@inode:	  inode in question
1013   *	@depth:	  depth of the affected branch
1014   *	@offsets: offsets of pointers in that branch (see ext2_block_to_path)
1015   *	@chain:	  place to store the pointers to partial indirect blocks
1016   *	@top:	  place to the (detached) top of branch
1017   *
1018   *	This is a helper function used by ext2_truncate().
1019   *
1020   *	When we do truncate() we may have to clean the ends of several indirect
1021   *	blocks but leave the blocks themselves alive. Block is partially
1022   *	truncated if some data below the new i_size is referred from it (and
1023   *	it is on the path to the first completely truncated data block, indeed).
1024   *	We have to free the top of that path along with everything to the right
1025   *	of the path. Since no allocation past the truncation point is possible
1026   *	until ext2_truncate() finishes, we may safely do the latter, but top
1027   *	of branch may require special attention - pageout below the truncation
1028   *	point might try to populate it.
1029   *
1030   *	We atomically detach the top of branch from the tree, store the block
1031   *	number of its root in *@top, pointers to buffer_heads of partially
1032   *	truncated blocks - in @chain[].bh and pointers to their last elements
1033   *	that should not be removed - in @chain[].p. Return value is the pointer
1034   *	to last filled element of @chain.
1035   *
1036   *	The work left to caller to do the actual freeing of subtrees:
1037   *		a) free the subtree starting from *@top
1038   *		b) free the subtrees whose roots are stored in
1039   *			(@chain[i].p+1 .. end of @chain[i].bh->b_data)
1040   *		c) free the subtrees growing from the inode past the @chain[0].p
1041   *			(no partially truncated stuff there).
1042   */
1043  
1044  static Indirect *ext2_find_shared(struct inode *inode,
1045  				int depth,
1046  				int offsets[4],
1047  				Indirect chain[4],
1048  				__le32 *top)
1049  {
1050  	Indirect *partial, *p;
1051  	int k, err;
1052  
1053  	*top = 0;
1054  	for (k = depth; k > 1 && !offsets[k-1]; k--)
1055  		;
1056  	partial = ext2_get_branch(inode, k, offsets, chain, &err);
1057  	if (!partial)
1058  		partial = chain + k-1;
1059  	/*
1060  	 * If the branch acquired continuation since we've looked at it -
1061  	 * fine, it should all survive and (new) top doesn't belong to us.
1062  	 */
1063  	write_lock(&EXT2_I(inode)->i_meta_lock);
1064  	if (!partial->key && *partial->p) {
1065  		write_unlock(&EXT2_I(inode)->i_meta_lock);
1066  		goto no_top;
1067  	}
1068  	for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
1069  		;
1070  	/*
1071  	 * OK, we've found the last block that must survive. The rest of our
1072  	 * branch should be detached before unlocking. However, if that rest
1073  	 * of branch is all ours and does not grow immediately from the inode
1074  	 * it's easier to cheat and just decrement partial->p.
1075  	 */
1076  	if (p == chain + k - 1 && p > chain) {
1077  		p->p--;
1078  	} else {
1079  		*top = *p->p;
1080  		*p->p = 0;
1081  	}
1082  	write_unlock(&EXT2_I(inode)->i_meta_lock);
1083  
1084  	while(partial > p)
1085  	{
1086  		brelse(partial->bh);
1087  		partial--;
1088  	}
1089  no_top:
1090  	return partial;
1091  }
1092  
1093  /**
1094   *	ext2_free_data - free a list of data blocks
1095   *	@inode:	inode we are dealing with
1096   *	@p:	array of block numbers
1097   *	@q:	points immediately past the end of array
1098   *
1099   *	We are freeing all blocks referred from that array (numbers are
1100   *	stored as little-endian 32-bit) and updating @inode->i_blocks
1101   *	appropriately.
1102   */
1103  static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
1104  {
1105  	unsigned long block_to_free = 0, count = 0;
1106  	unsigned long nr;
1107  
1108  	for ( ; p < q ; p++) {
1109  		nr = le32_to_cpu(*p);
1110  		if (nr) {
1111  			*p = 0;
1112  			/* accumulate blocks to free if they're contiguous */
1113  			if (count == 0)
1114  				goto free_this;
1115  			else if (block_to_free == nr - count)
1116  				count++;
1117  			else {
1118  				ext2_free_blocks (inode, block_to_free, count);
1119  				mark_inode_dirty(inode);
1120  			free_this:
1121  				block_to_free = nr;
1122  				count = 1;
1123  			}
1124  		}
1125  	}
1126  	if (count > 0) {
1127  		ext2_free_blocks (inode, block_to_free, count);
1128  		mark_inode_dirty(inode);
1129  	}
1130  }
1131  
1132  /**
1133   *	ext2_free_branches - free an array of branches
1134   *	@inode:	inode we are dealing with
1135   *	@p:	array of block numbers
1136   *	@q:	pointer immediately past the end of array
1137   *	@depth:	depth of the branches to free
1138   *
1139   *	We are freeing all blocks referred from these branches (numbers are
1140   *	stored as little-endian 32-bit) and updating @inode->i_blocks
1141   *	appropriately.
1142   */
1143  static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int depth)
1144  {
1145  	struct buffer_head * bh;
1146  	unsigned long nr;
1147  
1148  	if (depth--) {
1149  		int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
1150  		for ( ; p < q ; p++) {
1151  			nr = le32_to_cpu(*p);
1152  			if (!nr)
1153  				continue;
1154  			*p = 0;
1155  			bh = sb_bread(inode->i_sb, nr);
1156  			/*
1157  			 * A read failure? Report error and clear slot
1158  			 * (should be rare).
1159  			 */
1160  			if (!bh) {
1161  				ext2_error(inode->i_sb, "ext2_free_branches",
1162  					"Read failure, inode=%ld, block=%ld",
1163  					inode->i_ino, nr);
1164  				continue;
1165  			}
1166  			ext2_free_branches(inode,
1167  					   (__le32*)bh->b_data,
1168  					   (__le32*)bh->b_data + addr_per_block,
1169  					   depth);
1170  			bforget(bh);
1171  			ext2_free_blocks(inode, nr, 1);
1172  			mark_inode_dirty(inode);
1173  		}
1174  	} else
1175  		ext2_free_data(inode, p, q);
1176  }
1177  
1178  /* dax_sem must be held when calling this function */
1179  static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
1180  {
1181  	__le32 *i_data = EXT2_I(inode)->i_data;
1182  	struct ext2_inode_info *ei = EXT2_I(inode);
1183  	int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
1184  	int offsets[4];
1185  	Indirect chain[4];
1186  	Indirect *partial;
1187  	__le32 nr = 0;
1188  	int n;
1189  	long iblock;
1190  	unsigned blocksize;
1191  	blocksize = inode->i_sb->s_blocksize;
1192  	iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
1193  
1194  #ifdef CONFIG_FS_DAX
1195  	WARN_ON(!rwsem_is_locked(&ei->dax_sem));
1196  #endif
1197  
1198  	n = ext2_block_to_path(inode, iblock, offsets, NULL);
1199  	if (n == 0)
1200  		return;
1201  
1202  	/*
1203  	 * From here we block out all ext2_get_block() callers who want to
1204  	 * modify the block allocation tree.
1205  	 */
1206  	mutex_lock(&ei->truncate_mutex);
1207  
1208  	if (n == 1) {
1209  		ext2_free_data(inode, i_data+offsets[0],
1210  					i_data + EXT2_NDIR_BLOCKS);
1211  		goto do_indirects;
1212  	}
1213  
1214  	partial = ext2_find_shared(inode, n, offsets, chain, &nr);
1215  	/* Kill the top of shared branch (already detached) */
1216  	if (nr) {
1217  		if (partial == chain)
1218  			mark_inode_dirty(inode);
1219  		else
1220  			mark_buffer_dirty_inode(partial->bh, inode);
1221  		ext2_free_branches(inode, &nr, &nr+1, (chain+n-1) - partial);
1222  	}
1223  	/* Clear the ends of indirect blocks on the shared branch */
1224  	while (partial > chain) {
1225  		ext2_free_branches(inode,
1226  				   partial->p + 1,
1227  				   (__le32*)partial->bh->b_data+addr_per_block,
1228  				   (chain+n-1) - partial);
1229  		mark_buffer_dirty_inode(partial->bh, inode);
1230  		brelse (partial->bh);
1231  		partial--;
1232  	}
1233  do_indirects:
1234  	/* Kill the remaining (whole) subtrees */
1235  	switch (offsets[0]) {
1236  		default:
1237  			nr = i_data[EXT2_IND_BLOCK];
1238  			if (nr) {
1239  				i_data[EXT2_IND_BLOCK] = 0;
1240  				mark_inode_dirty(inode);
1241  				ext2_free_branches(inode, &nr, &nr+1, 1);
1242  			}
1243  			fallthrough;
1244  		case EXT2_IND_BLOCK:
1245  			nr = i_data[EXT2_DIND_BLOCK];
1246  			if (nr) {
1247  				i_data[EXT2_DIND_BLOCK] = 0;
1248  				mark_inode_dirty(inode);
1249  				ext2_free_branches(inode, &nr, &nr+1, 2);
1250  			}
1251  			fallthrough;
1252  		case EXT2_DIND_BLOCK:
1253  			nr = i_data[EXT2_TIND_BLOCK];
1254  			if (nr) {
1255  				i_data[EXT2_TIND_BLOCK] = 0;
1256  				mark_inode_dirty(inode);
1257  				ext2_free_branches(inode, &nr, &nr+1, 3);
1258  			}
1259  			break;
1260  		case EXT2_TIND_BLOCK:
1261  			;
1262  	}
1263  
1264  	ext2_discard_reservation(inode);
1265  
1266  	mutex_unlock(&ei->truncate_mutex);
1267  }
1268  
1269  static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
1270  {
1271  	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1272  	    S_ISLNK(inode->i_mode)))
1273  		return;
1274  	if (ext2_inode_is_fast_symlink(inode))
1275  		return;
1276  
1277  	dax_sem_down_write(EXT2_I(inode));
1278  	__ext2_truncate_blocks(inode, offset);
1279  	dax_sem_up_write(EXT2_I(inode));
1280  }
1281  
1282  static int ext2_setsize(struct inode *inode, loff_t newsize)
1283  {
1284  	int error;
1285  
1286  	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1287  	    S_ISLNK(inode->i_mode)))
1288  		return -EINVAL;
1289  	if (ext2_inode_is_fast_symlink(inode))
1290  		return -EINVAL;
1291  	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1292  		return -EPERM;
1293  
1294  	inode_dio_wait(inode);
1295  
1296  	if (IS_DAX(inode)) {
1297  		error = iomap_zero_range(inode, newsize,
1298  					 PAGE_ALIGN(newsize) - newsize, NULL,
1299  					 &ext2_iomap_ops);
1300  	} else if (test_opt(inode->i_sb, NOBH))
1301  		error = nobh_truncate_page(inode->i_mapping,
1302  				newsize, ext2_get_block);
1303  	else
1304  		error = block_truncate_page(inode->i_mapping,
1305  				newsize, ext2_get_block);
1306  	if (error)
1307  		return error;
1308  
1309  	dax_sem_down_write(EXT2_I(inode));
1310  	truncate_setsize(inode, newsize);
1311  	__ext2_truncate_blocks(inode, newsize);
1312  	dax_sem_up_write(EXT2_I(inode));
1313  
1314  	inode->i_mtime = inode->i_ctime = current_time(inode);
1315  	if (inode_needs_sync(inode)) {
1316  		sync_mapping_buffers(inode->i_mapping);
1317  		sync_inode_metadata(inode, 1);
1318  	} else {
1319  		mark_inode_dirty(inode);
1320  	}
1321  
1322  	return 0;
1323  }
1324  
1325  static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino,
1326  					struct buffer_head **p)
1327  {
1328  	struct buffer_head * bh;
1329  	unsigned long block_group;
1330  	unsigned long block;
1331  	unsigned long offset;
1332  	struct ext2_group_desc * gdp;
1333  
1334  	*p = NULL;
1335  	if ((ino != EXT2_ROOT_INO && ino < EXT2_FIRST_INO(sb)) ||
1336  	    ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
1337  		goto Einval;
1338  
1339  	block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
1340  	gdp = ext2_get_group_desc(sb, block_group, NULL);
1341  	if (!gdp)
1342  		goto Egdp;
1343  	/*
1344  	 * Figure out the offset within the block group inode table
1345  	 */
1346  	offset = ((ino - 1) % EXT2_INODES_PER_GROUP(sb)) * EXT2_INODE_SIZE(sb);
1347  	block = le32_to_cpu(gdp->bg_inode_table) +
1348  		(offset >> EXT2_BLOCK_SIZE_BITS(sb));
1349  	if (!(bh = sb_bread(sb, block)))
1350  		goto Eio;
1351  
1352  	*p = bh;
1353  	offset &= (EXT2_BLOCK_SIZE(sb) - 1);
1354  	return (struct ext2_inode *) (bh->b_data + offset);
1355  
1356  Einval:
1357  	ext2_error(sb, "ext2_get_inode", "bad inode number: %lu",
1358  		   (unsigned long) ino);
1359  	return ERR_PTR(-EINVAL);
1360  Eio:
1361  	ext2_error(sb, "ext2_get_inode",
1362  		   "unable to read inode block - inode=%lu, block=%lu",
1363  		   (unsigned long) ino, block);
1364  Egdp:
1365  	return ERR_PTR(-EIO);
1366  }
1367  
1368  void ext2_set_inode_flags(struct inode *inode)
1369  {
1370  	unsigned int flags = EXT2_I(inode)->i_flags;
1371  
1372  	inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
1373  				S_DIRSYNC | S_DAX);
1374  	if (flags & EXT2_SYNC_FL)
1375  		inode->i_flags |= S_SYNC;
1376  	if (flags & EXT2_APPEND_FL)
1377  		inode->i_flags |= S_APPEND;
1378  	if (flags & EXT2_IMMUTABLE_FL)
1379  		inode->i_flags |= S_IMMUTABLE;
1380  	if (flags & EXT2_NOATIME_FL)
1381  		inode->i_flags |= S_NOATIME;
1382  	if (flags & EXT2_DIRSYNC_FL)
1383  		inode->i_flags |= S_DIRSYNC;
1384  	if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
1385  		inode->i_flags |= S_DAX;
1386  }
1387  
1388  void ext2_set_file_ops(struct inode *inode)
1389  {
1390  	inode->i_op = &ext2_file_inode_operations;
1391  	inode->i_fop = &ext2_file_operations;
1392  	if (IS_DAX(inode))
1393  		inode->i_mapping->a_ops = &ext2_dax_aops;
1394  	else if (test_opt(inode->i_sb, NOBH))
1395  		inode->i_mapping->a_ops = &ext2_nobh_aops;
1396  	else
1397  		inode->i_mapping->a_ops = &ext2_aops;
1398  }
1399  
1400  struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1401  {
1402  	struct ext2_inode_info *ei;
1403  	struct buffer_head * bh = NULL;
1404  	struct ext2_inode *raw_inode;
1405  	struct inode *inode;
1406  	long ret = -EIO;
1407  	int n;
1408  	uid_t i_uid;
1409  	gid_t i_gid;
1410  
1411  	inode = iget_locked(sb, ino);
1412  	if (!inode)
1413  		return ERR_PTR(-ENOMEM);
1414  	if (!(inode->i_state & I_NEW))
1415  		return inode;
1416  
1417  	ei = EXT2_I(inode);
1418  	ei->i_block_alloc_info = NULL;
1419  
1420  	raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
1421  	if (IS_ERR(raw_inode)) {
1422  		ret = PTR_ERR(raw_inode);
1423   		goto bad_inode;
1424  	}
1425  
1426  	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
1427  	i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
1428  	i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
1429  	if (!(test_opt (inode->i_sb, NO_UID32))) {
1430  		i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
1431  		i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
1432  	}
1433  	i_uid_write(inode, i_uid);
1434  	i_gid_write(inode, i_gid);
1435  	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
1436  	inode->i_size = le32_to_cpu(raw_inode->i_size);
1437  	inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
1438  	inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
1439  	inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
1440  	inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
1441  	ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
1442  	/* We now have enough fields to check if the inode was active or not.
1443  	 * This is needed because nfsd might try to access dead inodes
1444  	 * the test is that same one that e2fsck uses
1445  	 * NeilBrown 1999oct15
1446  	 */
1447  	if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) {
1448  		/* this inode is deleted */
1449  		ret = -ESTALE;
1450  		goto bad_inode;
1451  	}
1452  	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
1453  	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
1454  	ext2_set_inode_flags(inode);
1455  	ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
1456  	ei->i_frag_no = raw_inode->i_frag;
1457  	ei->i_frag_size = raw_inode->i_fsize;
1458  	ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
1459  	ei->i_dir_acl = 0;
1460  
1461  	if (ei->i_file_acl &&
1462  	    !ext2_data_block_valid(EXT2_SB(sb), ei->i_file_acl, 1)) {
1463  		ext2_error(sb, "ext2_iget", "bad extended attribute block %u",
1464  			   ei->i_file_acl);
1465  		ret = -EFSCORRUPTED;
1466  		goto bad_inode;
1467  	}
1468  
1469  	if (S_ISREG(inode->i_mode))
1470  		inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
1471  	else
1472  		ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
1473  	if (i_size_read(inode) < 0) {
1474  		ret = -EFSCORRUPTED;
1475  		goto bad_inode;
1476  	}
1477  	ei->i_dtime = 0;
1478  	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
1479  	ei->i_state = 0;
1480  	ei->i_block_group = (ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
1481  	ei->i_dir_start_lookup = 0;
1482  
1483  	/*
1484  	 * NOTE! The in-memory inode i_data array is in little-endian order
1485  	 * even on big-endian machines: we do NOT byteswap the block numbers!
1486  	 */
1487  	for (n = 0; n < EXT2_N_BLOCKS; n++)
1488  		ei->i_data[n] = raw_inode->i_block[n];
1489  
1490  	if (S_ISREG(inode->i_mode)) {
1491  		ext2_set_file_ops(inode);
1492  	} else if (S_ISDIR(inode->i_mode)) {
1493  		inode->i_op = &ext2_dir_inode_operations;
1494  		inode->i_fop = &ext2_dir_operations;
1495  		if (test_opt(inode->i_sb, NOBH))
1496  			inode->i_mapping->a_ops = &ext2_nobh_aops;
1497  		else
1498  			inode->i_mapping->a_ops = &ext2_aops;
1499  	} else if (S_ISLNK(inode->i_mode)) {
1500  		if (ext2_inode_is_fast_symlink(inode)) {
1501  			inode->i_link = (char *)ei->i_data;
1502  			inode->i_op = &ext2_fast_symlink_inode_operations;
1503  			nd_terminate_link(ei->i_data, inode->i_size,
1504  				sizeof(ei->i_data) - 1);
1505  		} else {
1506  			inode->i_op = &ext2_symlink_inode_operations;
1507  			inode_nohighmem(inode);
1508  			if (test_opt(inode->i_sb, NOBH))
1509  				inode->i_mapping->a_ops = &ext2_nobh_aops;
1510  			else
1511  				inode->i_mapping->a_ops = &ext2_aops;
1512  		}
1513  	} else {
1514  		inode->i_op = &ext2_special_inode_operations;
1515  		if (raw_inode->i_block[0])
1516  			init_special_inode(inode, inode->i_mode,
1517  			   old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
1518  		else
1519  			init_special_inode(inode, inode->i_mode,
1520  			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
1521  	}
1522  	brelse (bh);
1523  	unlock_new_inode(inode);
1524  	return inode;
1525  
1526  bad_inode:
1527  	brelse(bh);
1528  	iget_failed(inode);
1529  	return ERR_PTR(ret);
1530  }
1531  
1532  static int __ext2_write_inode(struct inode *inode, int do_sync)
1533  {
1534  	struct ext2_inode_info *ei = EXT2_I(inode);
1535  	struct super_block *sb = inode->i_sb;
1536  	ino_t ino = inode->i_ino;
1537  	uid_t uid = i_uid_read(inode);
1538  	gid_t gid = i_gid_read(inode);
1539  	struct buffer_head * bh;
1540  	struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
1541  	int n;
1542  	int err = 0;
1543  
1544  	if (IS_ERR(raw_inode))
1545   		return -EIO;
1546  
1547  	/* For fields not not tracking in the in-memory inode,
1548  	 * initialise them to zero for new inodes. */
1549  	if (ei->i_state & EXT2_STATE_NEW)
1550  		memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size);
1551  
1552  	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
1553  	if (!(test_opt(sb, NO_UID32))) {
1554  		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
1555  		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
1556  /*
1557   * Fix up interoperability with old kernels. Otherwise, old inodes get
1558   * re-used with the upper 16 bits of the uid/gid intact
1559   */
1560  		if (!ei->i_dtime) {
1561  			raw_inode->i_uid_high = cpu_to_le16(high_16_bits(uid));
1562  			raw_inode->i_gid_high = cpu_to_le16(high_16_bits(gid));
1563  		} else {
1564  			raw_inode->i_uid_high = 0;
1565  			raw_inode->i_gid_high = 0;
1566  		}
1567  	} else {
1568  		raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(uid));
1569  		raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(gid));
1570  		raw_inode->i_uid_high = 0;
1571  		raw_inode->i_gid_high = 0;
1572  	}
1573  	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
1574  	raw_inode->i_size = cpu_to_le32(inode->i_size);
1575  	raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
1576  	raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
1577  	raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
1578  
1579  	raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
1580  	raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
1581  	raw_inode->i_flags = cpu_to_le32(ei->i_flags);
1582  	raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
1583  	raw_inode->i_frag = ei->i_frag_no;
1584  	raw_inode->i_fsize = ei->i_frag_size;
1585  	raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
1586  	if (!S_ISREG(inode->i_mode))
1587  		raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
1588  	else {
1589  		raw_inode->i_size_high = cpu_to_le32(inode->i_size >> 32);
1590  		if (inode->i_size > 0x7fffffffULL) {
1591  			if (!EXT2_HAS_RO_COMPAT_FEATURE(sb,
1592  					EXT2_FEATURE_RO_COMPAT_LARGE_FILE) ||
1593  			    EXT2_SB(sb)->s_es->s_rev_level ==
1594  					cpu_to_le32(EXT2_GOOD_OLD_REV)) {
1595  			       /* If this is the first large file
1596  				* created, add a flag to the superblock.
1597  				*/
1598  				spin_lock(&EXT2_SB(sb)->s_lock);
1599  				ext2_update_dynamic_rev(sb);
1600  				EXT2_SET_RO_COMPAT_FEATURE(sb,
1601  					EXT2_FEATURE_RO_COMPAT_LARGE_FILE);
1602  				spin_unlock(&EXT2_SB(sb)->s_lock);
1603  				ext2_sync_super(sb, EXT2_SB(sb)->s_es, 1);
1604  			}
1605  		}
1606  	}
1607  
1608  	raw_inode->i_generation = cpu_to_le32(inode->i_generation);
1609  	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
1610  		if (old_valid_dev(inode->i_rdev)) {
1611  			raw_inode->i_block[0] =
1612  				cpu_to_le32(old_encode_dev(inode->i_rdev));
1613  			raw_inode->i_block[1] = 0;
1614  		} else {
1615  			raw_inode->i_block[0] = 0;
1616  			raw_inode->i_block[1] =
1617  				cpu_to_le32(new_encode_dev(inode->i_rdev));
1618  			raw_inode->i_block[2] = 0;
1619  		}
1620  	} else for (n = 0; n < EXT2_N_BLOCKS; n++)
1621  		raw_inode->i_block[n] = ei->i_data[n];
1622  	mark_buffer_dirty(bh);
1623  	if (do_sync) {
1624  		sync_dirty_buffer(bh);
1625  		if (buffer_req(bh) && !buffer_uptodate(bh)) {
1626  			printk ("IO error syncing ext2 inode [%s:%08lx]\n",
1627  				sb->s_id, (unsigned long) ino);
1628  			err = -EIO;
1629  		}
1630  	}
1631  	ei->i_state &= ~EXT2_STATE_NEW;
1632  	brelse (bh);
1633  	return err;
1634  }
1635  
1636  int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
1637  {
1638  	return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
1639  }
1640  
1641  int ext2_getattr(struct user_namespace *mnt_userns, const struct path *path,
1642  		 struct kstat *stat, u32 request_mask, unsigned int query_flags)
1643  {
1644  	struct inode *inode = d_inode(path->dentry);
1645  	struct ext2_inode_info *ei = EXT2_I(inode);
1646  	unsigned int flags;
1647  
1648  	flags = ei->i_flags & EXT2_FL_USER_VISIBLE;
1649  	if (flags & EXT2_APPEND_FL)
1650  		stat->attributes |= STATX_ATTR_APPEND;
1651  	if (flags & EXT2_COMPR_FL)
1652  		stat->attributes |= STATX_ATTR_COMPRESSED;
1653  	if (flags & EXT2_IMMUTABLE_FL)
1654  		stat->attributes |= STATX_ATTR_IMMUTABLE;
1655  	if (flags & EXT2_NODUMP_FL)
1656  		stat->attributes |= STATX_ATTR_NODUMP;
1657  	stat->attributes_mask |= (STATX_ATTR_APPEND |
1658  			STATX_ATTR_COMPRESSED |
1659  			STATX_ATTR_ENCRYPTED |
1660  			STATX_ATTR_IMMUTABLE |
1661  			STATX_ATTR_NODUMP);
1662  
1663  	generic_fillattr(&init_user_ns, inode, stat);
1664  	return 0;
1665  }
1666  
1667  int ext2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
1668  		 struct iattr *iattr)
1669  {
1670  	struct inode *inode = d_inode(dentry);
1671  	int error;
1672  
1673  	error = setattr_prepare(&init_user_ns, dentry, iattr);
1674  	if (error)
1675  		return error;
1676  
1677  	if (is_quota_modification(inode, iattr)) {
1678  		error = dquot_initialize(inode);
1679  		if (error)
1680  			return error;
1681  	}
1682  	if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
1683  	    (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
1684  		error = dquot_transfer(inode, iattr);
1685  		if (error)
1686  			return error;
1687  	}
1688  	if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size != inode->i_size) {
1689  		error = ext2_setsize(inode, iattr->ia_size);
1690  		if (error)
1691  			return error;
1692  	}
1693  	setattr_copy(&init_user_ns, inode, iattr);
1694  	if (iattr->ia_valid & ATTR_MODE)
1695  		error = posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
1696  	mark_inode_dirty(inode);
1697  
1698  	return error;
1699  }
1700