xref: /openbmc/linux/fs/ntfs/inode.c (revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2)
1 /**
2  * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project.
3  *
4  * Copyright (c) 2001-2004 Anton Altaparmakov
5  *
6  * This program/include file is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as published
8  * by the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program/include file is distributed in the hope that it will be
12  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
13  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program (in the main directory of the Linux-NTFS
18  * distribution in the file COPYING); if not, write to the Free Software
19  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  */
21 
22 #include <linux/pagemap.h>
23 #include <linux/buffer_head.h>
24 #include <linux/smp_lock.h>
25 #include <linux/quotaops.h>
26 #include <linux/mount.h>
27 
28 #include "aops.h"
29 #include "dir.h"
30 #include "debug.h"
31 #include "inode.h"
32 #include "attrib.h"
33 #include "malloc.h"
34 #include "mft.h"
35 #include "time.h"
36 #include "ntfs.h"
37 
38 /**
39  * ntfs_test_inode - compare two (possibly fake) inodes for equality
40  * @vi:		vfs inode which to test
41  * @na:		ntfs attribute which is being tested with
42  *
43  * Compare the ntfs attribute embedded in the ntfs specific part of the vfs
44  * inode @vi for equality with the ntfs attribute @na.
45  *
46  * If searching for the normal file/directory inode, set @na->type to AT_UNUSED.
47  * @na->name and @na->name_len are then ignored.
48  *
49  * Return 1 if the attributes match and 0 if not.
50  *
51  * NOTE: This function runs with the inode_lock spin lock held so it is not
52  * allowed to sleep.
53  */
54 int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
55 {
56 	ntfs_inode *ni;
57 
58 	if (vi->i_ino != na->mft_no)
59 		return 0;
60 	ni = NTFS_I(vi);
61 	/* If !NInoAttr(ni), @vi is a normal file or directory inode. */
62 	if (likely(!NInoAttr(ni))) {
63 		/* If not looking for a normal inode this is a mismatch. */
64 		if (unlikely(na->type != AT_UNUSED))
65 			return 0;
66 	} else {
67 		/* A fake inode describing an attribute. */
68 		if (ni->type != na->type)
69 			return 0;
70 		if (ni->name_len != na->name_len)
71 			return 0;
72 		if (na->name_len && memcmp(ni->name, na->name,
73 				na->name_len * sizeof(ntfschar)))
74 			return 0;
75 	}
76 	/* Match! */
77 	return 1;
78 }
79 
80 /**
81  * ntfs_init_locked_inode - initialize an inode
82  * @vi:		vfs inode to initialize
83  * @na:		ntfs attribute which to initialize @vi to
84  *
85  * Initialize the vfs inode @vi with the values from the ntfs attribute @na in
86  * order to enable ntfs_test_inode() to do its work.
87  *
88  * If initializing the normal file/directory inode, set @na->type to AT_UNUSED.
89  * In that case, @na->name and @na->name_len should be set to NULL and 0,
90  * respectively. Although that is not strictly necessary as
91  * ntfs_read_inode_locked() will fill them in later.
92  *
93  * Return 0 on success and -errno on error.
94  *
95  * NOTE: This function runs with the inode_lock spin lock held so it is not
96  * allowed to sleep. (Hence the GFP_ATOMIC allocation.)
97  */
98 static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)
99 {
100 	ntfs_inode *ni = NTFS_I(vi);
101 
102 	vi->i_ino = na->mft_no;
103 
104 	ni->type = na->type;
105 	if (na->type == AT_INDEX_ALLOCATION)
106 		NInoSetMstProtected(ni);
107 
108 	ni->name = na->name;
109 	ni->name_len = na->name_len;
110 
111 	/* If initializing a normal inode, we are done. */
112 	if (likely(na->type == AT_UNUSED)) {
113 		BUG_ON(na->name);
114 		BUG_ON(na->name_len);
115 		return 0;
116 	}
117 
118 	/* It is a fake inode. */
119 	NInoSetAttr(ni);
120 
121 	/*
122 	 * We have I30 global constant as an optimization as it is the name
123 	 * in >99.9% of named attributes! The other <0.1% incur a GFP_ATOMIC
124 	 * allocation but that is ok. And most attributes are unnamed anyway,
125 	 * thus the fraction of named attributes with name != I30 is actually
126 	 * absolutely tiny.
127 	 */
128 	if (na->name_len && na->name != I30) {
129 		unsigned int i;
130 
131 		BUG_ON(!na->name);
132 		i = na->name_len * sizeof(ntfschar);
133 		ni->name = (ntfschar*)kmalloc(i + sizeof(ntfschar), GFP_ATOMIC);
134 		if (!ni->name)
135 			return -ENOMEM;
136 		memcpy(ni->name, na->name, i);
137 		ni->name[i] = 0;
138 	}
139 	return 0;
140 }
141 
142 typedef int (*set_t)(struct inode *, void *);
143 static int ntfs_read_locked_inode(struct inode *vi);
144 static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi);
145 static int ntfs_read_locked_index_inode(struct inode *base_vi,
146 		struct inode *vi);
147 
148 /**
149  * ntfs_iget - obtain a struct inode corresponding to a specific normal inode
150  * @sb:		super block of mounted volume
151  * @mft_no:	mft record number / inode number to obtain
152  *
153  * Obtain the struct inode corresponding to a specific normal inode (i.e. a
154  * file or directory).
155  *
156  * If the inode is in the cache, it is just returned with an increased
157  * reference count. Otherwise, a new struct inode is allocated and initialized,
158  * and finally ntfs_read_locked_inode() is called to read in the inode and
159  * fill in the remainder of the inode structure.
160  *
161  * Return the struct inode on success. Check the return value with IS_ERR() and
162  * if true, the function failed and the error code is obtained from PTR_ERR().
163  */
164 struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no)
165 {
166 	struct inode *vi;
167 	ntfs_attr na;
168 	int err;
169 
170 	na.mft_no = mft_no;
171 	na.type = AT_UNUSED;
172 	na.name = NULL;
173 	na.name_len = 0;
174 
175 	vi = iget5_locked(sb, mft_no, (test_t)ntfs_test_inode,
176 			(set_t)ntfs_init_locked_inode, &na);
177 	if (!vi)
178 		return ERR_PTR(-ENOMEM);
179 
180 	err = 0;
181 
182 	/* If this is a freshly allocated inode, need to read it now. */
183 	if (vi->i_state & I_NEW) {
184 		err = ntfs_read_locked_inode(vi);
185 		unlock_new_inode(vi);
186 	}
187 	/*
188 	 * There is no point in keeping bad inodes around if the failure was
189 	 * due to ENOMEM. We want to be able to retry again later.
190 	 */
191 	if (err == -ENOMEM) {
192 		iput(vi);
193 		vi = ERR_PTR(err);
194 	}
195 	return vi;
196 }
197 
198 /**
199  * ntfs_attr_iget - obtain a struct inode corresponding to an attribute
200  * @base_vi:	vfs base inode containing the attribute
201  * @type:	attribute type
202  * @name:	Unicode name of the attribute (NULL if unnamed)
203  * @name_len:	length of @name in Unicode characters (0 if unnamed)
204  *
205  * Obtain the (fake) struct inode corresponding to the attribute specified by
206  * @type, @name, and @name_len, which is present in the base mft record
207  * specified by the vfs inode @base_vi.
208  *
209  * If the attribute inode is in the cache, it is just returned with an
210  * increased reference count. Otherwise, a new struct inode is allocated and
211  * initialized, and finally ntfs_read_locked_attr_inode() is called to read the
212  * attribute and fill in the inode structure.
213  *
214  * Note, for index allocation attributes, you need to use ntfs_index_iget()
215  * instead of ntfs_attr_iget() as working with indices is a lot more complex.
216  *
217  * Return the struct inode of the attribute inode on success. Check the return
218  * value with IS_ERR() and if true, the function failed and the error code is
219  * obtained from PTR_ERR().
220  */
221 struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type,
222 		ntfschar *name, u32 name_len)
223 {
224 	struct inode *vi;
225 	ntfs_attr na;
226 	int err;
227 
228 	/* Make sure no one calls ntfs_attr_iget() for indices. */
229 	BUG_ON(type == AT_INDEX_ALLOCATION);
230 
231 	na.mft_no = base_vi->i_ino;
232 	na.type = type;
233 	na.name = name;
234 	na.name_len = name_len;
235 
236 	vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode,
237 			(set_t)ntfs_init_locked_inode, &na);
238 	if (!vi)
239 		return ERR_PTR(-ENOMEM);
240 
241 	err = 0;
242 
243 	/* If this is a freshly allocated inode, need to read it now. */
244 	if (vi->i_state & I_NEW) {
245 		err = ntfs_read_locked_attr_inode(base_vi, vi);
246 		unlock_new_inode(vi);
247 	}
248 	/*
249 	 * There is no point in keeping bad attribute inodes around. This also
250 	 * simplifies things in that we never need to check for bad attribute
251 	 * inodes elsewhere.
252 	 */
253 	if (err) {
254 		iput(vi);
255 		vi = ERR_PTR(err);
256 	}
257 	return vi;
258 }
259 
260 /**
261  * ntfs_index_iget - obtain a struct inode corresponding to an index
262  * @base_vi:	vfs base inode containing the index related attributes
263  * @name:	Unicode name of the index
264  * @name_len:	length of @name in Unicode characters
265  *
266  * Obtain the (fake) struct inode corresponding to the index specified by @name
267  * and @name_len, which is present in the base mft record specified by the vfs
268  * inode @base_vi.
269  *
270  * If the index inode is in the cache, it is just returned with an increased
271  * reference count.  Otherwise, a new struct inode is allocated and
272  * initialized, and finally ntfs_read_locked_index_inode() is called to read
273  * the index related attributes and fill in the inode structure.
274  *
275  * Return the struct inode of the index inode on success. Check the return
276  * value with IS_ERR() and if true, the function failed and the error code is
277  * obtained from PTR_ERR().
278  */
279 struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
280 		u32 name_len)
281 {
282 	struct inode *vi;
283 	ntfs_attr na;
284 	int err;
285 
286 	na.mft_no = base_vi->i_ino;
287 	na.type = AT_INDEX_ALLOCATION;
288 	na.name = name;
289 	na.name_len = name_len;
290 
291 	vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode,
292 			(set_t)ntfs_init_locked_inode, &na);
293 	if (!vi)
294 		return ERR_PTR(-ENOMEM);
295 
296 	err = 0;
297 
298 	/* If this is a freshly allocated inode, need to read it now. */
299 	if (vi->i_state & I_NEW) {
300 		err = ntfs_read_locked_index_inode(base_vi, vi);
301 		unlock_new_inode(vi);
302 	}
303 	/*
304 	 * There is no point in keeping bad index inodes around.  This also
305 	 * simplifies things in that we never need to check for bad index
306 	 * inodes elsewhere.
307 	 */
308 	if (err) {
309 		iput(vi);
310 		vi = ERR_PTR(err);
311 	}
312 	return vi;
313 }
314 
315 struct inode *ntfs_alloc_big_inode(struct super_block *sb)
316 {
317 	ntfs_inode *ni;
318 
319 	ntfs_debug("Entering.");
320 	ni = (ntfs_inode *)kmem_cache_alloc(ntfs_big_inode_cache,
321 			SLAB_NOFS);
322 	if (likely(ni != NULL)) {
323 		ni->state = 0;
324 		return VFS_I(ni);
325 	}
326 	ntfs_error(sb, "Allocation of NTFS big inode structure failed.");
327 	return NULL;
328 }
329 
330 void ntfs_destroy_big_inode(struct inode *inode)
331 {
332 	ntfs_inode *ni = NTFS_I(inode);
333 
334 	ntfs_debug("Entering.");
335 	BUG_ON(ni->page);
336 	if (!atomic_dec_and_test(&ni->count))
337 		BUG();
338 	kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
339 }
340 
341 static inline ntfs_inode *ntfs_alloc_extent_inode(void)
342 {
343 	ntfs_inode *ni;
344 
345 	ntfs_debug("Entering.");
346 	ni = (ntfs_inode *)kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS);
347 	if (likely(ni != NULL)) {
348 		ni->state = 0;
349 		return ni;
350 	}
351 	ntfs_error(NULL, "Allocation of NTFS inode structure failed.");
352 	return NULL;
353 }
354 
355 static void ntfs_destroy_extent_inode(ntfs_inode *ni)
356 {
357 	ntfs_debug("Entering.");
358 	BUG_ON(ni->page);
359 	if (!atomic_dec_and_test(&ni->count))
360 		BUG();
361 	kmem_cache_free(ntfs_inode_cache, ni);
362 }
363 
364 /**
365  * __ntfs_init_inode - initialize ntfs specific part of an inode
366  * @sb:		super block of mounted volume
367  * @ni:		freshly allocated ntfs inode which to initialize
368  *
369  * Initialize an ntfs inode to defaults.
370  *
371  * NOTE: ni->mft_no, ni->state, ni->type, ni->name, and ni->name_len are left
372  * untouched. Make sure to initialize them elsewhere.
373  *
374  * Return zero on success and -ENOMEM on error.
375  */
376 void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
377 {
378 	ntfs_debug("Entering.");
379 	ni->initialized_size = ni->allocated_size = 0;
380 	ni->seq_no = 0;
381 	atomic_set(&ni->count, 1);
382 	ni->vol = NTFS_SB(sb);
383 	ntfs_init_runlist(&ni->runlist);
384 	init_MUTEX(&ni->mrec_lock);
385 	ni->page = NULL;
386 	ni->page_ofs = 0;
387 	ni->attr_list_size = 0;
388 	ni->attr_list = NULL;
389 	ntfs_init_runlist(&ni->attr_list_rl);
390 	ni->itype.index.bmp_ino = NULL;
391 	ni->itype.index.block_size = 0;
392 	ni->itype.index.vcn_size = 0;
393 	ni->itype.index.collation_rule = 0;
394 	ni->itype.index.block_size_bits = 0;
395 	ni->itype.index.vcn_size_bits = 0;
396 	init_MUTEX(&ni->extent_lock);
397 	ni->nr_extents = 0;
398 	ni->ext.base_ntfs_ino = NULL;
399 }
400 
401 inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
402 		unsigned long mft_no)
403 {
404 	ntfs_inode *ni = ntfs_alloc_extent_inode();
405 
406 	ntfs_debug("Entering.");
407 	if (likely(ni != NULL)) {
408 		__ntfs_init_inode(sb, ni);
409 		ni->mft_no = mft_no;
410 		ni->type = AT_UNUSED;
411 		ni->name = NULL;
412 		ni->name_len = 0;
413 	}
414 	return ni;
415 }
416 
417 /**
418  * ntfs_is_extended_system_file - check if a file is in the $Extend directory
419  * @ctx:	initialized attribute search context
420  *
421  * Search all file name attributes in the inode described by the attribute
422  * search context @ctx and check if any of the names are in the $Extend system
423  * directory.
424  *
425  * Return values:
426  *	   1: file is in $Extend directory
427  *	   0: file is not in $Extend directory
428  *    -errno: failed to determine if the file is in the $Extend directory
429  */
430 static int ntfs_is_extended_system_file(ntfs_attr_search_ctx *ctx)
431 {
432 	int nr_links, err;
433 
434 	/* Restart search. */
435 	ntfs_attr_reinit_search_ctx(ctx);
436 
437 	/* Get number of hard links. */
438 	nr_links = le16_to_cpu(ctx->mrec->link_count);
439 
440 	/* Loop through all hard links. */
441 	while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0,
442 			ctx))) {
443 		FILE_NAME_ATTR *file_name_attr;
444 		ATTR_RECORD *attr = ctx->attr;
445 		u8 *p, *p2;
446 
447 		nr_links--;
448 		/*
449 		 * Maximum sanity checking as we are called on an inode that
450 		 * we suspect might be corrupt.
451 		 */
452 		p = (u8*)attr + le32_to_cpu(attr->length);
453 		if (p < (u8*)ctx->mrec || (u8*)p > (u8*)ctx->mrec +
454 				le32_to_cpu(ctx->mrec->bytes_in_use)) {
455 err_corrupt_attr:
456 			ntfs_error(ctx->ntfs_ino->vol->sb, "Corrupt file name "
457 					"attribute. You should run chkdsk.");
458 			return -EIO;
459 		}
460 		if (attr->non_resident) {
461 			ntfs_error(ctx->ntfs_ino->vol->sb, "Non-resident file "
462 					"name. You should run chkdsk.");
463 			return -EIO;
464 		}
465 		if (attr->flags) {
466 			ntfs_error(ctx->ntfs_ino->vol->sb, "File name with "
467 					"invalid flags. You should run "
468 					"chkdsk.");
469 			return -EIO;
470 		}
471 		if (!(attr->data.resident.flags & RESIDENT_ATTR_IS_INDEXED)) {
472 			ntfs_error(ctx->ntfs_ino->vol->sb, "Unindexed file "
473 					"name. You should run chkdsk.");
474 			return -EIO;
475 		}
476 		file_name_attr = (FILE_NAME_ATTR*)((u8*)attr +
477 				le16_to_cpu(attr->data.resident.value_offset));
478 		p2 = (u8*)attr + le32_to_cpu(attr->data.resident.value_length);
479 		if (p2 < (u8*)attr || p2 > p)
480 			goto err_corrupt_attr;
481 		/* This attribute is ok, but is it in the $Extend directory? */
482 		if (MREF_LE(file_name_attr->parent_directory) == FILE_Extend)
483 			return 1;	/* YES, it's an extended system file. */
484 	}
485 	if (unlikely(err != -ENOENT))
486 		return err;
487 	if (unlikely(nr_links)) {
488 		ntfs_error(ctx->ntfs_ino->vol->sb, "Inode hard link count "
489 				"doesn't match number of name attributes. You "
490 				"should run chkdsk.");
491 		return -EIO;
492 	}
493 	return 0;	/* NO, it is not an extended system file. */
494 }
495 
496 /**
497  * ntfs_read_locked_inode - read an inode from its device
498  * @vi:		inode to read
499  *
500  * ntfs_read_locked_inode() is called from ntfs_iget() to read the inode
501  * described by @vi into memory from the device.
502  *
503  * The only fields in @vi that we need to/can look at when the function is
504  * called are i_sb, pointing to the mounted device's super block, and i_ino,
505  * the number of the inode to load.
506  *
507  * ntfs_read_locked_inode() maps, pins and locks the mft record number i_ino
508  * for reading and sets up the necessary @vi fields as well as initializing
509  * the ntfs inode.
510  *
511  * Q: What locks are held when the function is called?
512  * A: i_state has I_LOCK set, hence the inode is locked, also
513  *    i_count is set to 1, so it is not going to go away
514  *    i_flags is set to 0 and we have no business touching it.  Only an ioctl()
515  *    is allowed to write to them. We should of course be honouring them but
516  *    we need to do that using the IS_* macros defined in include/linux/fs.h.
517  *    In any case ntfs_read_locked_inode() has nothing to do with i_flags.
518  *
519  * Return 0 on success and -errno on error.  In the error case, the inode will
520  * have had make_bad_inode() executed on it.
521  */
522 static int ntfs_read_locked_inode(struct inode *vi)
523 {
524 	ntfs_volume *vol = NTFS_SB(vi->i_sb);
525 	ntfs_inode *ni;
526 	MFT_RECORD *m;
527 	STANDARD_INFORMATION *si;
528 	ntfs_attr_search_ctx *ctx;
529 	int err = 0;
530 
531 	ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
532 
533 	/* Setup the generic vfs inode parts now. */
534 
535 	/* This is the optimal IO size (for stat), not the fs block size. */
536 	vi->i_blksize = PAGE_CACHE_SIZE;
537 	/*
538 	 * This is for checking whether an inode has changed w.r.t. a file so
539 	 * that the file can be updated if necessary (compare with f_version).
540 	 */
541 	vi->i_version = 1;
542 
543 	vi->i_uid = vol->uid;
544 	vi->i_gid = vol->gid;
545 	vi->i_mode = 0;
546 
547 	/*
548 	 * Initialize the ntfs specific part of @vi special casing
549 	 * FILE_MFT which we need to do at mount time.
550 	 */
551 	if (vi->i_ino != FILE_MFT)
552 		ntfs_init_big_inode(vi);
553 	ni = NTFS_I(vi);
554 
555 	m = map_mft_record(ni);
556 	if (IS_ERR(m)) {
557 		err = PTR_ERR(m);
558 		goto err_out;
559 	}
560 	ctx = ntfs_attr_get_search_ctx(ni, m);
561 	if (!ctx) {
562 		err = -ENOMEM;
563 		goto unm_err_out;
564 	}
565 
566 	if (!(m->flags & MFT_RECORD_IN_USE)) {
567 		ntfs_error(vi->i_sb, "Inode is not in use!");
568 		goto unm_err_out;
569 	}
570 	if (m->base_mft_record) {
571 		ntfs_error(vi->i_sb, "Inode is an extent inode!");
572 		goto unm_err_out;
573 	}
574 
575 	/* Transfer information from mft record into vfs and ntfs inodes. */
576 	vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
577 
578 	/*
579 	 * FIXME: Keep in mind that link_count is two for files which have both
580 	 * a long file name and a short file name as separate entries, so if
581 	 * we are hiding short file names this will be too high. Either we need
582 	 * to account for the short file names by subtracting them or we need
583 	 * to make sure we delete files even though i_nlink is not zero which
584 	 * might be tricky due to vfs interactions. Need to think about this
585 	 * some more when implementing the unlink command.
586 	 */
587 	vi->i_nlink = le16_to_cpu(m->link_count);
588 	/*
589 	 * FIXME: Reparse points can have the directory bit set even though
590 	 * they would be S_IFLNK. Need to deal with this further below when we
591 	 * implement reparse points / symbolic links but it will do for now.
592 	 * Also if not a directory, it could be something else, rather than
593 	 * a regular file. But again, will do for now.
594 	 */
595 	/* Everyone gets all permissions. */
596 	vi->i_mode |= S_IRWXUGO;
597 	/* If read-only, noone gets write permissions. */
598 	if (IS_RDONLY(vi))
599 		vi->i_mode &= ~S_IWUGO;
600 	if (m->flags & MFT_RECORD_IS_DIRECTORY) {
601 		vi->i_mode |= S_IFDIR;
602 		/*
603 		 * Apply the directory permissions mask set in the mount
604 		 * options.
605 		 */
606 		vi->i_mode &= ~vol->dmask;
607 		/* Things break without this kludge! */
608 		if (vi->i_nlink > 1)
609 			vi->i_nlink = 1;
610 	} else {
611 		vi->i_mode |= S_IFREG;
612 		/* Apply the file permissions mask set in the mount options. */
613 		vi->i_mode &= ~vol->fmask;
614 	}
615 	/*
616 	 * Find the standard information attribute in the mft record. At this
617 	 * stage we haven't setup the attribute list stuff yet, so this could
618 	 * in fact fail if the standard information is in an extent record, but
619 	 * I don't think this actually ever happens.
620 	 */
621 	err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0, 0, 0, NULL, 0,
622 			ctx);
623 	if (unlikely(err)) {
624 		if (err == -ENOENT) {
625 			/*
626 			 * TODO: We should be performing a hot fix here (if the
627 			 * recover mount option is set) by creating a new
628 			 * attribute.
629 			 */
630 			ntfs_error(vi->i_sb, "$STANDARD_INFORMATION attribute "
631 					"is missing.");
632 		}
633 		goto unm_err_out;
634 	}
635 	/* Get the standard information attribute value. */
636 	si = (STANDARD_INFORMATION*)((char*)ctx->attr +
637 			le16_to_cpu(ctx->attr->data.resident.value_offset));
638 
639 	/* Transfer information from the standard information into vi. */
640 	/*
641 	 * Note: The i_?times do not quite map perfectly onto the NTFS times,
642 	 * but they are close enough, and in the end it doesn't really matter
643 	 * that much...
644 	 */
645 	/*
646 	 * mtime is the last change of the data within the file. Not changed
647 	 * when only metadata is changed, e.g. a rename doesn't affect mtime.
648 	 */
649 	vi->i_mtime = ntfs2utc(si->last_data_change_time);
650 	/*
651 	 * ctime is the last change of the metadata of the file. This obviously
652 	 * always changes, when mtime is changed. ctime can be changed on its
653 	 * own, mtime is then not changed, e.g. when a file is renamed.
654 	 */
655 	vi->i_ctime = ntfs2utc(si->last_mft_change_time);
656 	/*
657 	 * Last access to the data within the file. Not changed during a rename
658 	 * for example but changed whenever the file is written to.
659 	 */
660 	vi->i_atime = ntfs2utc(si->last_access_time);
661 
662 	/* Find the attribute list attribute if present. */
663 	ntfs_attr_reinit_search_ctx(ctx);
664 	err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx);
665 	if (err) {
666 		if (unlikely(err != -ENOENT)) {
667 			ntfs_error(vi->i_sb, "Failed to lookup attribute list "
668 					"attribute.");
669 			goto unm_err_out;
670 		}
671 	} else /* if (!err) */ {
672 		if (vi->i_ino == FILE_MFT)
673 			goto skip_attr_list_load;
674 		ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino);
675 		NInoSetAttrList(ni);
676 		if (ctx->attr->flags & ATTR_IS_ENCRYPTED ||
677 				ctx->attr->flags & ATTR_COMPRESSION_MASK ||
678 				ctx->attr->flags & ATTR_IS_SPARSE) {
679 			ntfs_error(vi->i_sb, "Attribute list attribute is "
680 					"compressed/encrypted/sparse.");
681 			goto unm_err_out;
682 		}
683 		/* Now allocate memory for the attribute list. */
684 		ni->attr_list_size = (u32)ntfs_attr_size(ctx->attr);
685 		ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
686 		if (!ni->attr_list) {
687 			ntfs_error(vi->i_sb, "Not enough memory to allocate "
688 					"buffer for attribute list.");
689 			err = -ENOMEM;
690 			goto unm_err_out;
691 		}
692 		if (ctx->attr->non_resident) {
693 			NInoSetAttrListNonResident(ni);
694 			if (ctx->attr->data.non_resident.lowest_vcn) {
695 				ntfs_error(vi->i_sb, "Attribute list has non "
696 						"zero lowest_vcn.");
697 				goto unm_err_out;
698 			}
699 			/*
700 			 * Setup the runlist. No need for locking as we have
701 			 * exclusive access to the inode at this time.
702 			 */
703 			ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
704 					ctx->attr, NULL);
705 			if (IS_ERR(ni->attr_list_rl.rl)) {
706 				err = PTR_ERR(ni->attr_list_rl.rl);
707 				ni->attr_list_rl.rl = NULL;
708 				ntfs_error(vi->i_sb, "Mapping pairs "
709 						"decompression failed.");
710 				goto unm_err_out;
711 			}
712 			/* Now load the attribute list. */
713 			if ((err = load_attribute_list(vol, &ni->attr_list_rl,
714 					ni->attr_list, ni->attr_list_size,
715 					sle64_to_cpu(ctx->attr->data.
716 					non_resident.initialized_size)))) {
717 				ntfs_error(vi->i_sb, "Failed to load "
718 						"attribute list attribute.");
719 				goto unm_err_out;
720 			}
721 		} else /* if (!ctx.attr->non_resident) */ {
722 			if ((u8*)ctx->attr + le16_to_cpu(
723 					ctx->attr->data.resident.value_offset) +
724 					le32_to_cpu(
725 					ctx->attr->data.resident.value_length) >
726 					(u8*)ctx->mrec + vol->mft_record_size) {
727 				ntfs_error(vi->i_sb, "Corrupt attribute list "
728 						"in inode.");
729 				goto unm_err_out;
730 			}
731 			/* Now copy the attribute list. */
732 			memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu(
733 					ctx->attr->data.resident.value_offset),
734 					le32_to_cpu(
735 					ctx->attr->data.resident.value_length));
736 		}
737 	}
738 skip_attr_list_load:
739 	/*
740 	 * If an attribute list is present we now have the attribute list value
741 	 * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes.
742 	 */
743 	if (S_ISDIR(vi->i_mode)) {
744 		struct inode *bvi;
745 		ntfs_inode *bni;
746 		INDEX_ROOT *ir;
747 		char *ir_end, *index_end;
748 
749 		/* It is a directory, find index root attribute. */
750 		ntfs_attr_reinit_search_ctx(ctx);
751 		err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE,
752 				0, NULL, 0, ctx);
753 		if (unlikely(err)) {
754 			if (err == -ENOENT) {
755 				// FIXME: File is corrupt! Hot-fix with empty
756 				// index root attribute if recovery option is
757 				// set.
758 				ntfs_error(vi->i_sb, "$INDEX_ROOT attribute "
759 						"is missing.");
760 			}
761 			goto unm_err_out;
762 		}
763 		/* Set up the state. */
764 		if (unlikely(ctx->attr->non_resident)) {
765 			ntfs_error(vol->sb, "$INDEX_ROOT attribute is not "
766 					"resident.");
767 			goto unm_err_out;
768 		}
769 		/* Ensure the attribute name is placed before the value. */
770 		if (unlikely(ctx->attr->name_length &&
771 				(le16_to_cpu(ctx->attr->name_offset) >=
772 				le16_to_cpu(ctx->attr->data.resident.
773 				value_offset)))) {
774 			ntfs_error(vol->sb, "$INDEX_ROOT attribute name is "
775 					"placed after the attribute value.");
776 			goto unm_err_out;
777 		}
778 		/*
779 		 * Compressed/encrypted index root just means that the newly
780 		 * created files in that directory should be created compressed/
781 		 * encrypted. However index root cannot be both compressed and
782 		 * encrypted.
783 		 */
784 		if (ctx->attr->flags & ATTR_COMPRESSION_MASK)
785 			NInoSetCompressed(ni);
786 		if (ctx->attr->flags & ATTR_IS_ENCRYPTED) {
787 			if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
788 				ntfs_error(vi->i_sb, "Found encrypted and "
789 						"compressed attribute.");
790 				goto unm_err_out;
791 			}
792 			NInoSetEncrypted(ni);
793 		}
794 		if (ctx->attr->flags & ATTR_IS_SPARSE)
795 			NInoSetSparse(ni);
796 		ir = (INDEX_ROOT*)((char*)ctx->attr + le16_to_cpu(
797 				ctx->attr->data.resident.value_offset));
798 		ir_end = (char*)ir + le32_to_cpu(
799 				ctx->attr->data.resident.value_length);
800 		if (ir_end > (char*)ctx->mrec + vol->mft_record_size) {
801 			ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
802 					"corrupt.");
803 			goto unm_err_out;
804 		}
805 		index_end = (char*)&ir->index +
806 				le32_to_cpu(ir->index.index_length);
807 		if (index_end > ir_end) {
808 			ntfs_error(vi->i_sb, "Directory index is corrupt.");
809 			goto unm_err_out;
810 		}
811 		if (ir->type != AT_FILE_NAME) {
812 			ntfs_error(vi->i_sb, "Indexed attribute is not "
813 					"$FILE_NAME.");
814 			goto unm_err_out;
815 		}
816 		if (ir->collation_rule != COLLATION_FILE_NAME) {
817 			ntfs_error(vi->i_sb, "Index collation rule is not "
818 					"COLLATION_FILE_NAME.");
819 			goto unm_err_out;
820 		}
821 		ni->itype.index.collation_rule = ir->collation_rule;
822 		ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
823 		if (ni->itype.index.block_size &
824 				(ni->itype.index.block_size - 1)) {
825 			ntfs_error(vi->i_sb, "Index block size (%u) is not a "
826 					"power of two.",
827 					ni->itype.index.block_size);
828 			goto unm_err_out;
829 		}
830 		if (ni->itype.index.block_size > PAGE_CACHE_SIZE) {
831 			ntfs_error(vi->i_sb, "Index block size (%u) > "
832 					"PAGE_CACHE_SIZE (%ld) is not "
833 					"supported.  Sorry.",
834 					ni->itype.index.block_size,
835 					PAGE_CACHE_SIZE);
836 			err = -EOPNOTSUPP;
837 			goto unm_err_out;
838 		}
839 		if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
840 			ntfs_error(vi->i_sb, "Index block size (%u) < "
841 					"NTFS_BLOCK_SIZE (%i) is not "
842 					"supported.  Sorry.",
843 					ni->itype.index.block_size,
844 					NTFS_BLOCK_SIZE);
845 			err = -EOPNOTSUPP;
846 			goto unm_err_out;
847 		}
848 		ni->itype.index.block_size_bits =
849 				ffs(ni->itype.index.block_size) - 1;
850 		/* Determine the size of a vcn in the directory index. */
851 		if (vol->cluster_size <= ni->itype.index.block_size) {
852 			ni->itype.index.vcn_size = vol->cluster_size;
853 			ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
854 		} else {
855 			ni->itype.index.vcn_size = vol->sector_size;
856 			ni->itype.index.vcn_size_bits = vol->sector_size_bits;
857 		}
858 
859 		/* Setup the index allocation attribute, even if not present. */
860 		NInoSetMstProtected(ni);
861 		ni->type = AT_INDEX_ALLOCATION;
862 		ni->name = I30;
863 		ni->name_len = 4;
864 
865 		if (!(ir->index.flags & LARGE_INDEX)) {
866 			/* No index allocation. */
867 			vi->i_size = ni->initialized_size =
868 					ni->allocated_size = 0;
869 			/* We are done with the mft record, so we release it. */
870 			ntfs_attr_put_search_ctx(ctx);
871 			unmap_mft_record(ni);
872 			m = NULL;
873 			ctx = NULL;
874 			goto skip_large_dir_stuff;
875 		} /* LARGE_INDEX: Index allocation present. Setup state. */
876 		NInoSetIndexAllocPresent(ni);
877 		/* Find index allocation attribute. */
878 		ntfs_attr_reinit_search_ctx(ctx);
879 		err = ntfs_attr_lookup(AT_INDEX_ALLOCATION, I30, 4,
880 				CASE_SENSITIVE, 0, NULL, 0, ctx);
881 		if (unlikely(err)) {
882 			if (err == -ENOENT)
883 				ntfs_error(vi->i_sb, "$INDEX_ALLOCATION "
884 						"attribute is not present but "
885 						"$INDEX_ROOT indicated it is.");
886 			else
887 				ntfs_error(vi->i_sb, "Failed to lookup "
888 						"$INDEX_ALLOCATION "
889 						"attribute.");
890 			goto unm_err_out;
891 		}
892 		if (!ctx->attr->non_resident) {
893 			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
894 					"is resident.");
895 			goto unm_err_out;
896 		}
897 		/*
898 		 * Ensure the attribute name is placed before the mapping pairs
899 		 * array.
900 		 */
901 		if (unlikely(ctx->attr->name_length &&
902 				(le16_to_cpu(ctx->attr->name_offset) >=
903 				le16_to_cpu(ctx->attr->data.non_resident.
904 				mapping_pairs_offset)))) {
905 			ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name "
906 					"is placed after the mapping pairs "
907 					"array.");
908 			goto unm_err_out;
909 		}
910 		if (ctx->attr->flags & ATTR_IS_ENCRYPTED) {
911 			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
912 					"is encrypted.");
913 			goto unm_err_out;
914 		}
915 		if (ctx->attr->flags & ATTR_IS_SPARSE) {
916 			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
917 					"is sparse.");
918 			goto unm_err_out;
919 		}
920 		if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
921 			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
922 					"is compressed.");
923 			goto unm_err_out;
924 		}
925 		if (ctx->attr->data.non_resident.lowest_vcn) {
926 			ntfs_error(vi->i_sb, "First extent of "
927 					"$INDEX_ALLOCATION attribute has non "
928 					"zero lowest_vcn.");
929 			goto unm_err_out;
930 		}
931 		vi->i_size = sle64_to_cpu(
932 				ctx->attr->data.non_resident.data_size);
933 		ni->initialized_size = sle64_to_cpu(
934 				ctx->attr->data.non_resident.initialized_size);
935 		ni->allocated_size = sle64_to_cpu(
936 				ctx->attr->data.non_resident.allocated_size);
937 		/*
938 		 * We are done with the mft record, so we release it. Otherwise
939 		 * we would deadlock in ntfs_attr_iget().
940 		 */
941 		ntfs_attr_put_search_ctx(ctx);
942 		unmap_mft_record(ni);
943 		m = NULL;
944 		ctx = NULL;
945 		/* Get the index bitmap attribute inode. */
946 		bvi = ntfs_attr_iget(vi, AT_BITMAP, I30, 4);
947 		if (IS_ERR(bvi)) {
948 			ntfs_error(vi->i_sb, "Failed to get bitmap attribute.");
949 			err = PTR_ERR(bvi);
950 			goto unm_err_out;
951 		}
952 		ni->itype.index.bmp_ino = bvi;
953 		bni = NTFS_I(bvi);
954 		if (NInoCompressed(bni) || NInoEncrypted(bni) ||
955 				NInoSparse(bni)) {
956 			ntfs_error(vi->i_sb, "$BITMAP attribute is compressed "
957 					"and/or encrypted and/or sparse.");
958 			goto unm_err_out;
959 		}
960 		/* Consistency check bitmap size vs. index allocation size. */
961 		if ((bvi->i_size << 3) < (vi->i_size >>
962 				ni->itype.index.block_size_bits)) {
963 			ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) "
964 					"for index allocation (0x%llx).",
965 					bvi->i_size << 3, vi->i_size);
966 			goto unm_err_out;
967 		}
968 skip_large_dir_stuff:
969 		/* Setup the operations for this inode. */
970 		vi->i_op = &ntfs_dir_inode_ops;
971 		vi->i_fop = &ntfs_dir_ops;
972 	} else {
973 		/* It is a file. */
974 		ntfs_attr_reinit_search_ctx(ctx);
975 
976 		/* Setup the data attribute, even if not present. */
977 		ni->type = AT_DATA;
978 		ni->name = NULL;
979 		ni->name_len = 0;
980 
981 		/* Find first extent of the unnamed data attribute. */
982 		err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, 0, NULL, 0, ctx);
983 		if (unlikely(err)) {
984 			vi->i_size = ni->initialized_size =
985 					ni->allocated_size = 0;
986 			if (err != -ENOENT) {
987 				ntfs_error(vi->i_sb, "Failed to lookup $DATA "
988 						"attribute.");
989 				goto unm_err_out;
990 			}
991 			/*
992 			 * FILE_Secure does not have an unnamed $DATA
993 			 * attribute, so we special case it here.
994 			 */
995 			if (vi->i_ino == FILE_Secure)
996 				goto no_data_attr_special_case;
997 			/*
998 			 * Most if not all the system files in the $Extend
999 			 * system directory do not have unnamed data
1000 			 * attributes so we need to check if the parent
1001 			 * directory of the file is FILE_Extend and if it is
1002 			 * ignore this error. To do this we need to get the
1003 			 * name of this inode from the mft record as the name
1004 			 * contains the back reference to the parent directory.
1005 			 */
1006 			if (ntfs_is_extended_system_file(ctx) > 0)
1007 				goto no_data_attr_special_case;
1008 			// FIXME: File is corrupt! Hot-fix with empty data
1009 			// attribute if recovery option is set.
1010 			ntfs_error(vi->i_sb, "$DATA attribute is missing.");
1011 			goto unm_err_out;
1012 		}
1013 		/* Setup the state. */
1014 		if (ctx->attr->non_resident) {
1015 			NInoSetNonResident(ni);
1016 			if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
1017 				NInoSetCompressed(ni);
1018 				if (vol->cluster_size > 4096) {
1019 					ntfs_error(vi->i_sb, "Found "
1020 						"compressed data but "
1021 						"compression is disabled due "
1022 						"to cluster size (%i) > 4kiB.",
1023 						vol->cluster_size);
1024 					goto unm_err_out;
1025 				}
1026 				if ((ctx->attr->flags & ATTR_COMPRESSION_MASK)
1027 						!= ATTR_IS_COMPRESSED) {
1028 					ntfs_error(vi->i_sb, "Found "
1029 						"unknown compression method or "
1030 						"corrupt file.");
1031 					goto unm_err_out;
1032 				}
1033 				ni->itype.compressed.block_clusters = 1U <<
1034 						ctx->attr->data.non_resident.
1035 						compression_unit;
1036 				if (ctx->attr->data.non_resident.
1037 						compression_unit != 4) {
1038 					ntfs_error(vi->i_sb, "Found "
1039 						"nonstandard compression unit "
1040 						"(%u instead of 4).  Cannot "
1041 						"handle this.",
1042 						ctx->attr->data.non_resident.
1043 						compression_unit);
1044 					err = -EOPNOTSUPP;
1045 					goto unm_err_out;
1046 				}
1047 				ni->itype.compressed.block_size = 1U << (
1048 						ctx->attr->data.non_resident.
1049 						compression_unit +
1050 						vol->cluster_size_bits);
1051 				ni->itype.compressed.block_size_bits = ffs(
1052 					ni->itype.compressed.block_size) - 1;
1053 			}
1054 			if (ctx->attr->flags & ATTR_IS_ENCRYPTED) {
1055 				if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
1056 					ntfs_error(vi->i_sb, "Found encrypted "
1057 							"and compressed data.");
1058 					goto unm_err_out;
1059 				}
1060 				NInoSetEncrypted(ni);
1061 			}
1062 			if (ctx->attr->flags & ATTR_IS_SPARSE)
1063 				NInoSetSparse(ni);
1064 			if (ctx->attr->data.non_resident.lowest_vcn) {
1065 				ntfs_error(vi->i_sb, "First extent of $DATA "
1066 						"attribute has non zero "
1067 						"lowest_vcn.");
1068 				goto unm_err_out;
1069 			}
1070 			/* Setup all the sizes. */
1071 			vi->i_size = sle64_to_cpu(
1072 					ctx->attr->data.non_resident.data_size);
1073 			ni->initialized_size = sle64_to_cpu(
1074 					ctx->attr->data.non_resident.
1075 					initialized_size);
1076 			ni->allocated_size = sle64_to_cpu(
1077 					ctx->attr->data.non_resident.
1078 					allocated_size);
1079 			if (NInoCompressed(ni)) {
1080 				ni->itype.compressed.size = sle64_to_cpu(
1081 						ctx->attr->data.non_resident.
1082 						compressed_size);
1083 			}
1084 		} else { /* Resident attribute. */
1085 			/*
1086 			 * Make all sizes equal for simplicity in read code
1087 			 * paths. FIXME: Need to keep this in mind when
1088 			 * converting to non-resident attribute in write code
1089 			 * path. (Probably only affects truncate().)
1090 			 */
1091 			vi->i_size = ni->initialized_size = ni->allocated_size =
1092 					le32_to_cpu(
1093 					ctx->attr->data.resident.value_length);
1094 		}
1095 no_data_attr_special_case:
1096 		/* We are done with the mft record, so we release it. */
1097 		ntfs_attr_put_search_ctx(ctx);
1098 		unmap_mft_record(ni);
1099 		m = NULL;
1100 		ctx = NULL;
1101 		/* Setup the operations for this inode. */
1102 		vi->i_op = &ntfs_file_inode_ops;
1103 		vi->i_fop = &ntfs_file_ops;
1104 	}
1105 	if (NInoMstProtected(ni))
1106 		vi->i_mapping->a_ops = &ntfs_mst_aops;
1107 	else
1108 		vi->i_mapping->a_ops = &ntfs_aops;
1109 	/*
1110 	 * The number of 512-byte blocks used on disk (for stat). This is in so
1111 	 * far inaccurate as it doesn't account for any named streams or other
1112 	 * special non-resident attributes, but that is how Windows works, too,
1113 	 * so we are at least consistent with Windows, if not entirely
1114 	 * consistent with the Linux Way. Doing it the Linux Way would cause a
1115 	 * significant slowdown as it would involve iterating over all
1116 	 * attributes in the mft record and adding the allocated/compressed
1117 	 * sizes of all non-resident attributes present to give us the Linux
1118 	 * correct size that should go into i_blocks (after division by 512).
1119 	 */
1120 	if (S_ISDIR(vi->i_mode) || !NInoCompressed(ni))
1121 		vi->i_blocks = ni->allocated_size >> 9;
1122 	else
1123 		vi->i_blocks = ni->itype.compressed.size >> 9;
1124 
1125 	ntfs_debug("Done.");
1126 	return 0;
1127 
1128 unm_err_out:
1129 	if (!err)
1130 		err = -EIO;
1131 	if (ctx)
1132 		ntfs_attr_put_search_ctx(ctx);
1133 	if (m)
1134 		unmap_mft_record(ni);
1135 err_out:
1136 	ntfs_error(vol->sb, "Failed with error code %i.  Marking corrupt "
1137 			"inode 0x%lx as bad.  Run chkdsk.", err, vi->i_ino);
1138 	make_bad_inode(vi);
1139 	if (err != -EOPNOTSUPP && err != -ENOMEM)
1140 		NVolSetErrors(vol);
1141 	return err;
1142 }
1143 
1144 /**
1145  * ntfs_read_locked_attr_inode - read an attribute inode from its base inode
1146  * @base_vi:	base inode
1147  * @vi:		attribute inode to read
1148  *
1149  * ntfs_read_locked_attr_inode() is called from ntfs_attr_iget() to read the
1150  * attribute inode described by @vi into memory from the base mft record
1151  * described by @base_ni.
1152  *
1153  * ntfs_read_locked_attr_inode() maps, pins and locks the base inode for
1154  * reading and looks up the attribute described by @vi before setting up the
1155  * necessary fields in @vi as well as initializing the ntfs inode.
1156  *
1157  * Q: What locks are held when the function is called?
1158  * A: i_state has I_LOCK set, hence the inode is locked, also
1159  *    i_count is set to 1, so it is not going to go away
1160  *
1161  * Return 0 on success and -errno on error.  In the error case, the inode will
1162  * have had make_bad_inode() executed on it.
1163  */
1164 static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1165 {
1166 	ntfs_volume *vol = NTFS_SB(vi->i_sb);
1167 	ntfs_inode *ni, *base_ni;
1168 	MFT_RECORD *m;
1169 	ntfs_attr_search_ctx *ctx;
1170 	int err = 0;
1171 
1172 	ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
1173 
1174 	ntfs_init_big_inode(vi);
1175 
1176 	ni	= NTFS_I(vi);
1177 	base_ni = NTFS_I(base_vi);
1178 
1179 	/* Just mirror the values from the base inode. */
1180 	vi->i_blksize	= base_vi->i_blksize;
1181 	vi->i_version	= base_vi->i_version;
1182 	vi->i_uid	= base_vi->i_uid;
1183 	vi->i_gid	= base_vi->i_gid;
1184 	vi->i_nlink	= base_vi->i_nlink;
1185 	vi->i_mtime	= base_vi->i_mtime;
1186 	vi->i_ctime	= base_vi->i_ctime;
1187 	vi->i_atime	= base_vi->i_atime;
1188 	vi->i_generation = ni->seq_no = base_ni->seq_no;
1189 
1190 	/* Set inode type to zero but preserve permissions. */
1191 	vi->i_mode	= base_vi->i_mode & ~S_IFMT;
1192 
1193 	m = map_mft_record(base_ni);
1194 	if (IS_ERR(m)) {
1195 		err = PTR_ERR(m);
1196 		goto err_out;
1197 	}
1198 	ctx = ntfs_attr_get_search_ctx(base_ni, m);
1199 	if (!ctx) {
1200 		err = -ENOMEM;
1201 		goto unm_err_out;
1202 	}
1203 
1204 	/* Find the attribute. */
1205 	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1206 			CASE_SENSITIVE, 0, NULL, 0, ctx);
1207 	if (unlikely(err))
1208 		goto unm_err_out;
1209 
1210 	if (!ctx->attr->non_resident) {
1211 		/* Ensure the attribute name is placed before the value. */
1212 		if (unlikely(ctx->attr->name_length &&
1213 				(le16_to_cpu(ctx->attr->name_offset) >=
1214 				le16_to_cpu(ctx->attr->data.resident.
1215 				value_offset)))) {
1216 			ntfs_error(vol->sb, "Attribute name is placed after "
1217 					"the attribute value.");
1218 			goto unm_err_out;
1219 		}
1220 		if (NInoMstProtected(ni) || ctx->attr->flags) {
1221 			ntfs_error(vi->i_sb, "Found mst protected attribute "
1222 					"or attribute with non-zero flags but "
1223 					"the attribute is resident.  Please "
1224 					"report you saw this message to "
1225 					"linux-ntfs-dev@lists.sourceforge.net");
1226 			goto unm_err_out;
1227 		}
1228 		/*
1229 		 * Resident attribute. Make all sizes equal for simplicity in
1230 		 * read code paths.
1231 		 */
1232 		vi->i_size = ni->initialized_size = ni->allocated_size =
1233 			le32_to_cpu(ctx->attr->data.resident.value_length);
1234 	} else {
1235 		NInoSetNonResident(ni);
1236 		/*
1237 		 * Ensure the attribute name is placed before the mapping pairs
1238 		 * array.
1239 		 */
1240 		if (unlikely(ctx->attr->name_length &&
1241 				(le16_to_cpu(ctx->attr->name_offset) >=
1242 				le16_to_cpu(ctx->attr->data.non_resident.
1243 				mapping_pairs_offset)))) {
1244 			ntfs_error(vol->sb, "Attribute name is placed after "
1245 					"the mapping pairs array.");
1246 			goto unm_err_out;
1247 		}
1248 		if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
1249 			if (NInoMstProtected(ni)) {
1250 				ntfs_error(vi->i_sb, "Found mst protected "
1251 						"attribute but the attribute "
1252 						"is compressed.  Please report "
1253 						"you saw this message to "
1254 						"linux-ntfs-dev@lists."
1255 						"sourceforge.net");
1256 				goto unm_err_out;
1257 			}
1258 			NInoSetCompressed(ni);
1259 			if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
1260 					ni->name_len)) {
1261 				ntfs_error(vi->i_sb, "Found compressed "
1262 						"non-data or named data "
1263 						"attribute.  Please report "
1264 						"you saw this message to "
1265 						"linux-ntfs-dev@lists."
1266 						"sourceforge.net");
1267 				goto unm_err_out;
1268 			}
1269 			if (vol->cluster_size > 4096) {
1270 				ntfs_error(vi->i_sb, "Found compressed "
1271 						"attribute but compression is "
1272 						"disabled due to cluster size "
1273 						"(%i) > 4kiB.",
1274 						vol->cluster_size);
1275 				goto unm_err_out;
1276 			}
1277 			if ((ctx->attr->flags & ATTR_COMPRESSION_MASK)
1278 					!= ATTR_IS_COMPRESSED) {
1279 				ntfs_error(vi->i_sb, "Found unknown "
1280 						"compression method.");
1281 				goto unm_err_out;
1282 			}
1283 			ni->itype.compressed.block_clusters = 1U <<
1284 					ctx->attr->data.non_resident.
1285 					compression_unit;
1286 			if (ctx->attr->data.non_resident.compression_unit !=
1287 					4) {
1288 				ntfs_error(vi->i_sb, "Found nonstandard "
1289 						"compression unit (%u instead "
1290 						"of 4).  Cannot handle this.",
1291 						ctx->attr->data.non_resident.
1292 						compression_unit);
1293 				err = -EOPNOTSUPP;
1294 				goto unm_err_out;
1295 			}
1296 			ni->itype.compressed.block_size = 1U << (
1297 					ctx->attr->data.non_resident.
1298 					compression_unit +
1299 					vol->cluster_size_bits);
1300 			ni->itype.compressed.block_size_bits = ffs(
1301 				ni->itype.compressed.block_size) - 1;
1302 		}
1303 		if (ctx->attr->flags & ATTR_IS_ENCRYPTED) {
1304 			if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
1305 				ntfs_error(vi->i_sb, "Found encrypted "
1306 						"and compressed data.");
1307 				goto unm_err_out;
1308 			}
1309 			if (NInoMstProtected(ni)) {
1310 				ntfs_error(vi->i_sb, "Found mst protected "
1311 						"attribute but the attribute "
1312 						"is encrypted.  Please report "
1313 						"you saw this message to "
1314 						"linux-ntfs-dev@lists."
1315 						"sourceforge.net");
1316 				goto unm_err_out;
1317 			}
1318 			NInoSetEncrypted(ni);
1319 		}
1320 		if (ctx->attr->flags & ATTR_IS_SPARSE) {
1321 			if (NInoMstProtected(ni)) {
1322 				ntfs_error(vi->i_sb, "Found mst protected "
1323 						"attribute but the attribute "
1324 						"is sparse.  Please report "
1325 						"you saw this message to "
1326 						"linux-ntfs-dev@lists."
1327 						"sourceforge.net");
1328 				goto unm_err_out;
1329 			}
1330 			NInoSetSparse(ni);
1331 		}
1332 		if (ctx->attr->data.non_resident.lowest_vcn) {
1333 			ntfs_error(vi->i_sb, "First extent of attribute has "
1334 					"non-zero lowest_vcn.");
1335 			goto unm_err_out;
1336 		}
1337 		/* Setup all the sizes. */
1338 		vi->i_size = sle64_to_cpu(
1339 				ctx->attr->data.non_resident.data_size);
1340 		ni->initialized_size = sle64_to_cpu(
1341 				ctx->attr->data.non_resident.initialized_size);
1342 		ni->allocated_size = sle64_to_cpu(
1343 				ctx->attr->data.non_resident.allocated_size);
1344 		if (NInoCompressed(ni)) {
1345 			ni->itype.compressed.size = sle64_to_cpu(
1346 					ctx->attr->data.non_resident.
1347 					compressed_size);
1348 		}
1349 	}
1350 
1351 	/* Setup the operations for this attribute inode. */
1352 	vi->i_op = NULL;
1353 	vi->i_fop = NULL;
1354 	if (NInoMstProtected(ni))
1355 		vi->i_mapping->a_ops = &ntfs_mst_aops;
1356 	else
1357 		vi->i_mapping->a_ops = &ntfs_aops;
1358 
1359 	if (!NInoCompressed(ni))
1360 		vi->i_blocks = ni->allocated_size >> 9;
1361 	else
1362 		vi->i_blocks = ni->itype.compressed.size >> 9;
1363 
1364 	/*
1365 	 * Make sure the base inode doesn't go away and attach it to the
1366 	 * attribute inode.
1367 	 */
1368 	igrab(base_vi);
1369 	ni->ext.base_ntfs_ino = base_ni;
1370 	ni->nr_extents = -1;
1371 
1372 	ntfs_attr_put_search_ctx(ctx);
1373 	unmap_mft_record(base_ni);
1374 
1375 	ntfs_debug("Done.");
1376 	return 0;
1377 
1378 unm_err_out:
1379 	if (!err)
1380 		err = -EIO;
1381 	if (ctx)
1382 		ntfs_attr_put_search_ctx(ctx);
1383 	unmap_mft_record(base_ni);
1384 err_out:
1385 	ntfs_error(vol->sb, "Failed with error code %i while reading attribute "
1386 			"inode (mft_no 0x%lx, type 0x%x, name_len %i).  "
1387 			"Marking corrupt inode and base inode 0x%lx as bad.  "
1388 			"Run chkdsk.", err, vi->i_ino, ni->type, ni->name_len,
1389 			base_vi->i_ino);
1390 	make_bad_inode(vi);
1391 	make_bad_inode(base_vi);
1392 	if (err != -ENOMEM)
1393 		NVolSetErrors(vol);
1394 	return err;
1395 }
1396 
1397 /**
1398  * ntfs_read_locked_index_inode - read an index inode from its base inode
1399  * @base_vi:	base inode
1400  * @vi:		index inode to read
1401  *
1402  * ntfs_read_locked_index_inode() is called from ntfs_index_iget() to read the
1403  * index inode described by @vi into memory from the base mft record described
1404  * by @base_ni.
1405  *
1406  * ntfs_read_locked_index_inode() maps, pins and locks the base inode for
1407  * reading and looks up the attributes relating to the index described by @vi
1408  * before setting up the necessary fields in @vi as well as initializing the
1409  * ntfs inode.
1410  *
1411  * Note, index inodes are essentially attribute inodes (NInoAttr() is true)
1412  * with the attribute type set to AT_INDEX_ALLOCATION.  Apart from that, they
1413  * are setup like directory inodes since directories are a special case of
1414  * indices ao they need to be treated in much the same way.  Most importantly,
1415  * for small indices the index allocation attribute might not actually exist.
1416  * However, the index root attribute always exists but this does not need to
1417  * have an inode associated with it and this is why we define a new inode type
1418  * index.  Also, like for directories, we need to have an attribute inode for
1419  * the bitmap attribute corresponding to the index allocation attribute and we
1420  * can store this in the appropriate field of the inode, just like we do for
1421  * normal directory inodes.
1422  *
1423  * Q: What locks are held when the function is called?
1424  * A: i_state has I_LOCK set, hence the inode is locked, also
1425  *    i_count is set to 1, so it is not going to go away
1426  *
1427  * Return 0 on success and -errno on error.  In the error case, the inode will
1428  * have had make_bad_inode() executed on it.
1429  */
1430 static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1431 {
1432 	ntfs_volume *vol = NTFS_SB(vi->i_sb);
1433 	ntfs_inode *ni, *base_ni, *bni;
1434 	struct inode *bvi;
1435 	MFT_RECORD *m;
1436 	ntfs_attr_search_ctx *ctx;
1437 	INDEX_ROOT *ir;
1438 	u8 *ir_end, *index_end;
1439 	int err = 0;
1440 
1441 	ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
1442 	ntfs_init_big_inode(vi);
1443 	ni	= NTFS_I(vi);
1444 	base_ni = NTFS_I(base_vi);
1445 	/* Just mirror the values from the base inode. */
1446 	vi->i_blksize	= base_vi->i_blksize;
1447 	vi->i_version	= base_vi->i_version;
1448 	vi->i_uid	= base_vi->i_uid;
1449 	vi->i_gid	= base_vi->i_gid;
1450 	vi->i_nlink	= base_vi->i_nlink;
1451 	vi->i_mtime	= base_vi->i_mtime;
1452 	vi->i_ctime	= base_vi->i_ctime;
1453 	vi->i_atime	= base_vi->i_atime;
1454 	vi->i_generation = ni->seq_no = base_ni->seq_no;
1455 	/* Set inode type to zero but preserve permissions. */
1456 	vi->i_mode	= base_vi->i_mode & ~S_IFMT;
1457 	/* Map the mft record for the base inode. */
1458 	m = map_mft_record(base_ni);
1459 	if (IS_ERR(m)) {
1460 		err = PTR_ERR(m);
1461 		goto err_out;
1462 	}
1463 	ctx = ntfs_attr_get_search_ctx(base_ni, m);
1464 	if (!ctx) {
1465 		err = -ENOMEM;
1466 		goto unm_err_out;
1467 	}
1468 	/* Find the index root attribute. */
1469 	err = ntfs_attr_lookup(AT_INDEX_ROOT, ni->name, ni->name_len,
1470 			CASE_SENSITIVE, 0, NULL, 0, ctx);
1471 	if (unlikely(err)) {
1472 		if (err == -ENOENT)
1473 			ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
1474 					"missing.");
1475 		goto unm_err_out;
1476 	}
1477 	/* Set up the state. */
1478 	if (unlikely(ctx->attr->non_resident)) {
1479 		ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident.");
1480 		goto unm_err_out;
1481 	}
1482 	/* Ensure the attribute name is placed before the value. */
1483 	if (unlikely(ctx->attr->name_length &&
1484 			(le16_to_cpu(ctx->attr->name_offset) >=
1485 			le16_to_cpu(ctx->attr->data.resident.
1486 			value_offset)))) {
1487 		ntfs_error(vol->sb, "$INDEX_ROOT attribute name is placed "
1488 				"after the attribute value.");
1489 		goto unm_err_out;
1490 	}
1491 	/* Compressed/encrypted/sparse index root is not allowed. */
1492 	if (ctx->attr->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED |
1493 			ATTR_IS_SPARSE)) {
1494 		ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index "
1495 				"root attribute.");
1496 		goto unm_err_out;
1497 	}
1498 	ir = (INDEX_ROOT*)((u8*)ctx->attr +
1499 			le16_to_cpu(ctx->attr->data.resident.value_offset));
1500 	ir_end = (u8*)ir + le32_to_cpu(ctx->attr->data.resident.value_length);
1501 	if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
1502 		ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is corrupt.");
1503 		goto unm_err_out;
1504 	}
1505 	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
1506 	if (index_end > ir_end) {
1507 		ntfs_error(vi->i_sb, "Index is corrupt.");
1508 		goto unm_err_out;
1509 	}
1510 	if (ir->type) {
1511 		ntfs_error(vi->i_sb, "Index type is not 0 (type is 0x%x).",
1512 				le32_to_cpu(ir->type));
1513 		goto unm_err_out;
1514 	}
1515 	ni->itype.index.collation_rule = ir->collation_rule;
1516 	ntfs_debug("Index collation rule is 0x%x.",
1517 			le32_to_cpu(ir->collation_rule));
1518 	ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
1519 	if (ni->itype.index.block_size & (ni->itype.index.block_size - 1)) {
1520 		ntfs_error(vi->i_sb, "Index block size (%u) is not a power of "
1521 				"two.", ni->itype.index.block_size);
1522 		goto unm_err_out;
1523 	}
1524 	if (ni->itype.index.block_size > PAGE_CACHE_SIZE) {
1525 		ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_CACHE_SIZE "
1526 				"(%ld) is not supported.  Sorry.",
1527 				ni->itype.index.block_size, PAGE_CACHE_SIZE);
1528 		err = -EOPNOTSUPP;
1529 		goto unm_err_out;
1530 	}
1531 	if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
1532 		ntfs_error(vi->i_sb, "Index block size (%u) < NTFS_BLOCK_SIZE "
1533 				"(%i) is not supported.  Sorry.",
1534 				ni->itype.index.block_size, NTFS_BLOCK_SIZE);
1535 		err = -EOPNOTSUPP;
1536 		goto unm_err_out;
1537 	}
1538 	ni->itype.index.block_size_bits = ffs(ni->itype.index.block_size) - 1;
1539 	/* Determine the size of a vcn in the index. */
1540 	if (vol->cluster_size <= ni->itype.index.block_size) {
1541 		ni->itype.index.vcn_size = vol->cluster_size;
1542 		ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
1543 	} else {
1544 		ni->itype.index.vcn_size = vol->sector_size;
1545 		ni->itype.index.vcn_size_bits = vol->sector_size_bits;
1546 	}
1547 	/* Check for presence of index allocation attribute. */
1548 	if (!(ir->index.flags & LARGE_INDEX)) {
1549 		/* No index allocation. */
1550 		vi->i_size = ni->initialized_size = ni->allocated_size = 0;
1551 		/* We are done with the mft record, so we release it. */
1552 		ntfs_attr_put_search_ctx(ctx);
1553 		unmap_mft_record(base_ni);
1554 		m = NULL;
1555 		ctx = NULL;
1556 		goto skip_large_index_stuff;
1557 	} /* LARGE_INDEX:  Index allocation present.  Setup state. */
1558 	NInoSetIndexAllocPresent(ni);
1559 	/* Find index allocation attribute. */
1560 	ntfs_attr_reinit_search_ctx(ctx);
1561 	err = ntfs_attr_lookup(AT_INDEX_ALLOCATION, ni->name, ni->name_len,
1562 			CASE_SENSITIVE, 0, NULL, 0, ctx);
1563 	if (unlikely(err)) {
1564 		if (err == -ENOENT)
1565 			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1566 					"not present but $INDEX_ROOT "
1567 					"indicated it is.");
1568 		else
1569 			ntfs_error(vi->i_sb, "Failed to lookup "
1570 					"$INDEX_ALLOCATION attribute.");
1571 		goto unm_err_out;
1572 	}
1573 	if (!ctx->attr->non_resident) {
1574 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1575 				"resident.");
1576 		goto unm_err_out;
1577 	}
1578 	/*
1579 	 * Ensure the attribute name is placed before the mapping pairs array.
1580 	 */
1581 	if (unlikely(ctx->attr->name_length && (le16_to_cpu(
1582 			ctx->attr->name_offset) >= le16_to_cpu(
1583 			ctx->attr->data.non_resident.mapping_pairs_offset)))) {
1584 		ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name is "
1585 				"placed after the mapping pairs array.");
1586 		goto unm_err_out;
1587 	}
1588 	if (ctx->attr->flags & ATTR_IS_ENCRYPTED) {
1589 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1590 				"encrypted.");
1591 		goto unm_err_out;
1592 	}
1593 	if (ctx->attr->flags & ATTR_IS_SPARSE) {
1594 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse.");
1595 		goto unm_err_out;
1596 	}
1597 	if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
1598 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1599 				"compressed.");
1600 		goto unm_err_out;
1601 	}
1602 	if (ctx->attr->data.non_resident.lowest_vcn) {
1603 		ntfs_error(vi->i_sb, "First extent of $INDEX_ALLOCATION "
1604 				"attribute has non zero lowest_vcn.");
1605 		goto unm_err_out;
1606 	}
1607 	vi->i_size = sle64_to_cpu(ctx->attr->data.non_resident.data_size);
1608 	ni->initialized_size = sle64_to_cpu(
1609 			ctx->attr->data.non_resident.initialized_size);
1610 	ni->allocated_size = sle64_to_cpu(
1611 			ctx->attr->data.non_resident.allocated_size);
1612 	/*
1613 	 * We are done with the mft record, so we release it.  Otherwise
1614 	 * we would deadlock in ntfs_attr_iget().
1615 	 */
1616 	ntfs_attr_put_search_ctx(ctx);
1617 	unmap_mft_record(base_ni);
1618 	m = NULL;
1619 	ctx = NULL;
1620 	/* Get the index bitmap attribute inode. */
1621 	bvi = ntfs_attr_iget(base_vi, AT_BITMAP, ni->name, ni->name_len);
1622 	if (IS_ERR(bvi)) {
1623 		ntfs_error(vi->i_sb, "Failed to get bitmap attribute.");
1624 		err = PTR_ERR(bvi);
1625 		goto unm_err_out;
1626 	}
1627 	bni = NTFS_I(bvi);
1628 	if (NInoCompressed(bni) || NInoEncrypted(bni) ||
1629 			NInoSparse(bni)) {
1630 		ntfs_error(vi->i_sb, "$BITMAP attribute is compressed and/or "
1631 				"encrypted and/or sparse.");
1632 		goto iput_unm_err_out;
1633 	}
1634 	/* Consistency check bitmap size vs. index allocation size. */
1635 	if ((bvi->i_size << 3) < (vi->i_size >>
1636 			ni->itype.index.block_size_bits)) {
1637 		ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) for "
1638 				"index allocation (0x%llx).", bvi->i_size << 3,
1639 				vi->i_size);
1640 		goto iput_unm_err_out;
1641 	}
1642 	ni->itype.index.bmp_ino = bvi;
1643 skip_large_index_stuff:
1644 	/* Setup the operations for this index inode. */
1645 	vi->i_op = NULL;
1646 	vi->i_fop = NULL;
1647 	vi->i_mapping->a_ops = &ntfs_mst_aops;
1648 	vi->i_blocks = ni->allocated_size >> 9;
1649 
1650 	/*
1651 	 * Make sure the base inode doesn't go away and attach it to the
1652 	 * index inode.
1653 	 */
1654 	igrab(base_vi);
1655 	ni->ext.base_ntfs_ino = base_ni;
1656 	ni->nr_extents = -1;
1657 
1658 	ntfs_debug("Done.");
1659 	return 0;
1660 
1661 iput_unm_err_out:
1662 	iput(bvi);
1663 unm_err_out:
1664 	if (!err)
1665 		err = -EIO;
1666 	if (ctx)
1667 		ntfs_attr_put_search_ctx(ctx);
1668 	if (m)
1669 		unmap_mft_record(base_ni);
1670 err_out:
1671 	ntfs_error(vi->i_sb, "Failed with error code %i while reading index "
1672 			"inode (mft_no 0x%lx, name_len %i.", err, vi->i_ino,
1673 			ni->name_len);
1674 	make_bad_inode(vi);
1675 	if (err != -EOPNOTSUPP && err != -ENOMEM)
1676 		NVolSetErrors(vol);
1677 	return err;
1678 }
1679 
1680 /**
1681  * ntfs_read_inode_mount - special read_inode for mount time use only
1682  * @vi:		inode to read
1683  *
1684  * Read inode FILE_MFT at mount time, only called with super_block lock
1685  * held from within the read_super() code path.
1686  *
1687  * This function exists because when it is called the page cache for $MFT/$DATA
1688  * is not initialized and hence we cannot get at the contents of mft records
1689  * by calling map_mft_record*().
1690  *
1691  * Further it needs to cope with the circular references problem, i.e. cannot
1692  * load any attributes other than $ATTRIBUTE_LIST until $DATA is loaded, because
1693  * we do not know where the other extent mft records are yet and again, because
1694  * we cannot call map_mft_record*() yet.  Obviously this applies only when an
1695  * attribute list is actually present in $MFT inode.
1696  *
1697  * We solve these problems by starting with the $DATA attribute before anything
1698  * else and iterating using ntfs_attr_lookup($DATA) over all extents.  As each
1699  * extent is found, we ntfs_mapping_pairs_decompress() including the implied
1700  * ntfs_runlists_merge().  Each step of the iteration necessarily provides
1701  * sufficient information for the next step to complete.
1702  *
1703  * This should work but there are two possible pit falls (see inline comments
1704  * below), but only time will tell if they are real pits or just smoke...
1705  */
1706 int ntfs_read_inode_mount(struct inode *vi)
1707 {
1708 	VCN next_vcn, last_vcn, highest_vcn;
1709 	s64 block;
1710 	struct super_block *sb = vi->i_sb;
1711 	ntfs_volume *vol = NTFS_SB(sb);
1712 	struct buffer_head *bh;
1713 	ntfs_inode *ni;
1714 	MFT_RECORD *m = NULL;
1715 	ATTR_RECORD *attr;
1716 	ntfs_attr_search_ctx *ctx;
1717 	unsigned int i, nr_blocks;
1718 	int err;
1719 
1720 	ntfs_debug("Entering.");
1721 
1722 	/* Initialize the ntfs specific part of @vi. */
1723 	ntfs_init_big_inode(vi);
1724 
1725 	ni = NTFS_I(vi);
1726 
1727 	/* Setup the data attribute. It is special as it is mst protected. */
1728 	NInoSetNonResident(ni);
1729 	NInoSetMstProtected(ni);
1730 	ni->type = AT_DATA;
1731 	ni->name = NULL;
1732 	ni->name_len = 0;
1733 
1734 	/*
1735 	 * This sets up our little cheat allowing us to reuse the async read io
1736 	 * completion handler for directories.
1737 	 */
1738 	ni->itype.index.block_size = vol->mft_record_size;
1739 	ni->itype.index.block_size_bits = vol->mft_record_size_bits;
1740 
1741 	/* Very important! Needed to be able to call map_mft_record*(). */
1742 	vol->mft_ino = vi;
1743 
1744 	/* Allocate enough memory to read the first mft record. */
1745 	if (vol->mft_record_size > 64 * 1024) {
1746 		ntfs_error(sb, "Unsupported mft record size %i (max 64kiB).",
1747 				vol->mft_record_size);
1748 		goto err_out;
1749 	}
1750 	i = vol->mft_record_size;
1751 	if (i < sb->s_blocksize)
1752 		i = sb->s_blocksize;
1753 	m = (MFT_RECORD*)ntfs_malloc_nofs(i);
1754 	if (!m) {
1755 		ntfs_error(sb, "Failed to allocate buffer for $MFT record 0.");
1756 		goto err_out;
1757 	}
1758 
1759 	/* Determine the first block of the $MFT/$DATA attribute. */
1760 	block = vol->mft_lcn << vol->cluster_size_bits >>
1761 			sb->s_blocksize_bits;
1762 	nr_blocks = vol->mft_record_size >> sb->s_blocksize_bits;
1763 	if (!nr_blocks)
1764 		nr_blocks = 1;
1765 
1766 	/* Load $MFT/$DATA's first mft record. */
1767 	for (i = 0; i < nr_blocks; i++) {
1768 		bh = sb_bread(sb, block++);
1769 		if (!bh) {
1770 			ntfs_error(sb, "Device read failed.");
1771 			goto err_out;
1772 		}
1773 		memcpy((char*)m + (i << sb->s_blocksize_bits), bh->b_data,
1774 				sb->s_blocksize);
1775 		brelse(bh);
1776 	}
1777 
1778 	/* Apply the mst fixups. */
1779 	if (post_read_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size)) {
1780 		/* FIXME: Try to use the $MFTMirr now. */
1781 		ntfs_error(sb, "MST fixup failed. $MFT is corrupt.");
1782 		goto err_out;
1783 	}
1784 
1785 	/* Need this to sanity check attribute list references to $MFT. */
1786 	vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
1787 
1788 	/* Provides readpage() and sync_page() for map_mft_record(). */
1789 	vi->i_mapping->a_ops = &ntfs_mst_aops;
1790 
1791 	ctx = ntfs_attr_get_search_ctx(ni, m);
1792 	if (!ctx) {
1793 		err = -ENOMEM;
1794 		goto err_out;
1795 	}
1796 
1797 	/* Find the attribute list attribute if present. */
1798 	err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx);
1799 	if (err) {
1800 		if (unlikely(err != -ENOENT)) {
1801 			ntfs_error(sb, "Failed to lookup attribute list "
1802 					"attribute. You should run chkdsk.");
1803 			goto put_err_out;
1804 		}
1805 	} else /* if (!err) */ {
1806 		ATTR_LIST_ENTRY *al_entry, *next_al_entry;
1807 		u8 *al_end;
1808 
1809 		ntfs_debug("Attribute list attribute found in $MFT.");
1810 		NInoSetAttrList(ni);
1811 		if (ctx->attr->flags & ATTR_IS_ENCRYPTED ||
1812 				ctx->attr->flags & ATTR_COMPRESSION_MASK ||
1813 				ctx->attr->flags & ATTR_IS_SPARSE) {
1814 			ntfs_error(sb, "Attribute list attribute is "
1815 					"compressed/encrypted/sparse. Not "
1816 					"allowed. $MFT is corrupt. You should "
1817 					"run chkdsk.");
1818 			goto put_err_out;
1819 		}
1820 		/* Now allocate memory for the attribute list. */
1821 		ni->attr_list_size = (u32)ntfs_attr_size(ctx->attr);
1822 		ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
1823 		if (!ni->attr_list) {
1824 			ntfs_error(sb, "Not enough memory to allocate buffer "
1825 					"for attribute list.");
1826 			goto put_err_out;
1827 		}
1828 		if (ctx->attr->non_resident) {
1829 			NInoSetAttrListNonResident(ni);
1830 			if (ctx->attr->data.non_resident.lowest_vcn) {
1831 				ntfs_error(sb, "Attribute list has non zero "
1832 						"lowest_vcn. $MFT is corrupt. "
1833 						"You should run chkdsk.");
1834 				goto put_err_out;
1835 			}
1836 			/* Setup the runlist. */
1837 			ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
1838 					ctx->attr, NULL);
1839 			if (IS_ERR(ni->attr_list_rl.rl)) {
1840 				err = PTR_ERR(ni->attr_list_rl.rl);
1841 				ni->attr_list_rl.rl = NULL;
1842 				ntfs_error(sb, "Mapping pairs decompression "
1843 						"failed with error code %i.",
1844 						-err);
1845 				goto put_err_out;
1846 			}
1847 			/* Now load the attribute list. */
1848 			if ((err = load_attribute_list(vol, &ni->attr_list_rl,
1849 					ni->attr_list, ni->attr_list_size,
1850 					sle64_to_cpu(ctx->attr->data.
1851 					non_resident.initialized_size)))) {
1852 				ntfs_error(sb, "Failed to load attribute list "
1853 						"attribute with error code %i.",
1854 						-err);
1855 				goto put_err_out;
1856 			}
1857 		} else /* if (!ctx.attr->non_resident) */ {
1858 			if ((u8*)ctx->attr + le16_to_cpu(
1859 					ctx->attr->data.resident.value_offset) +
1860 					le32_to_cpu(
1861 					ctx->attr->data.resident.value_length) >
1862 					(u8*)ctx->mrec + vol->mft_record_size) {
1863 				ntfs_error(sb, "Corrupt attribute list "
1864 						"attribute.");
1865 				goto put_err_out;
1866 			}
1867 			/* Now copy the attribute list. */
1868 			memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu(
1869 					ctx->attr->data.resident.value_offset),
1870 					le32_to_cpu(
1871 					ctx->attr->data.resident.value_length));
1872 		}
1873 		/* The attribute list is now setup in memory. */
1874 		/*
1875 		 * FIXME: I don't know if this case is actually possible.
1876 		 * According to logic it is not possible but I have seen too
1877 		 * many weird things in MS software to rely on logic... Thus we
1878 		 * perform a manual search and make sure the first $MFT/$DATA
1879 		 * extent is in the base inode. If it is not we abort with an
1880 		 * error and if we ever see a report of this error we will need
1881 		 * to do some magic in order to have the necessary mft record
1882 		 * loaded and in the right place in the page cache. But
1883 		 * hopefully logic will prevail and this never happens...
1884 		 */
1885 		al_entry = (ATTR_LIST_ENTRY*)ni->attr_list;
1886 		al_end = (u8*)al_entry + ni->attr_list_size;
1887 		for (;; al_entry = next_al_entry) {
1888 			/* Out of bounds check. */
1889 			if ((u8*)al_entry < ni->attr_list ||
1890 					(u8*)al_entry > al_end)
1891 				goto em_put_err_out;
1892 			/* Catch the end of the attribute list. */
1893 			if ((u8*)al_entry == al_end)
1894 				goto em_put_err_out;
1895 			if (!al_entry->length)
1896 				goto em_put_err_out;
1897 			if ((u8*)al_entry + 6 > al_end || (u8*)al_entry +
1898 					le16_to_cpu(al_entry->length) > al_end)
1899 				goto em_put_err_out;
1900 			next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
1901 					le16_to_cpu(al_entry->length));
1902 			if (le32_to_cpu(al_entry->type) >
1903 					const_le32_to_cpu(AT_DATA))
1904 				goto em_put_err_out;
1905 			if (AT_DATA != al_entry->type)
1906 				continue;
1907 			/* We want an unnamed attribute. */
1908 			if (al_entry->name_length)
1909 				goto em_put_err_out;
1910 			/* Want the first entry, i.e. lowest_vcn == 0. */
1911 			if (al_entry->lowest_vcn)
1912 				goto em_put_err_out;
1913 			/* First entry has to be in the base mft record. */
1914 			if (MREF_LE(al_entry->mft_reference) != vi->i_ino) {
1915 				/* MFT references do not match, logic fails. */
1916 				ntfs_error(sb, "BUG: The first $DATA extent "
1917 						"of $MFT is not in the base "
1918 						"mft record. Please report "
1919 						"you saw this message to "
1920 						"linux-ntfs-dev@lists."
1921 						"sourceforge.net");
1922 				goto put_err_out;
1923 			} else {
1924 				/* Sequence numbers must match. */
1925 				if (MSEQNO_LE(al_entry->mft_reference) !=
1926 						ni->seq_no)
1927 					goto em_put_err_out;
1928 				/* Got it. All is ok. We can stop now. */
1929 				break;
1930 			}
1931 		}
1932 	}
1933 
1934 	ntfs_attr_reinit_search_ctx(ctx);
1935 
1936 	/* Now load all attribute extents. */
1937 	attr = NULL;
1938 	next_vcn = last_vcn = highest_vcn = 0;
1939 	while (!(err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0,
1940 			ctx))) {
1941 		runlist_element *nrl;
1942 
1943 		/* Cache the current attribute. */
1944 		attr = ctx->attr;
1945 		/* $MFT must be non-resident. */
1946 		if (!attr->non_resident) {
1947 			ntfs_error(sb, "$MFT must be non-resident but a "
1948 					"resident extent was found. $MFT is "
1949 					"corrupt. Run chkdsk.");
1950 			goto put_err_out;
1951 		}
1952 		/* $MFT must be uncompressed and unencrypted. */
1953 		if (attr->flags & ATTR_COMPRESSION_MASK ||
1954 				attr->flags & ATTR_IS_ENCRYPTED ||
1955 				attr->flags & ATTR_IS_SPARSE) {
1956 			ntfs_error(sb, "$MFT must be uncompressed, "
1957 					"non-sparse, and unencrypted but a "
1958 					"compressed/sparse/encrypted extent "
1959 					"was found. $MFT is corrupt. Run "
1960 					"chkdsk.");
1961 			goto put_err_out;
1962 		}
1963 		/*
1964 		 * Decompress the mapping pairs array of this extent and merge
1965 		 * the result into the existing runlist. No need for locking
1966 		 * as we have exclusive access to the inode at this time and we
1967 		 * are a mount in progress task, too.
1968 		 */
1969 		nrl = ntfs_mapping_pairs_decompress(vol, attr, ni->runlist.rl);
1970 		if (IS_ERR(nrl)) {
1971 			ntfs_error(sb, "ntfs_mapping_pairs_decompress() "
1972 					"failed with error code %ld.  $MFT is "
1973 					"corrupt.", PTR_ERR(nrl));
1974 			goto put_err_out;
1975 		}
1976 		ni->runlist.rl = nrl;
1977 
1978 		/* Are we in the first extent? */
1979 		if (!next_vcn) {
1980 			if (attr->data.non_resident.lowest_vcn) {
1981 				ntfs_error(sb, "First extent of $DATA "
1982 						"attribute has non zero "
1983 						"lowest_vcn. $MFT is corrupt. "
1984 						"You should run chkdsk.");
1985 				goto put_err_out;
1986 			}
1987 			/* Get the last vcn in the $DATA attribute. */
1988 			last_vcn = sle64_to_cpu(
1989 					attr->data.non_resident.allocated_size)
1990 					>> vol->cluster_size_bits;
1991 			/* Fill in the inode size. */
1992 			vi->i_size = sle64_to_cpu(
1993 					attr->data.non_resident.data_size);
1994 			ni->initialized_size = sle64_to_cpu(attr->data.
1995 					non_resident.initialized_size);
1996 			ni->allocated_size = sle64_to_cpu(
1997 					attr->data.non_resident.allocated_size);
1998 			/*
1999 			 * Verify the number of mft records does not exceed
2000 			 * 2^32 - 1.
2001 			 */
2002 			if ((vi->i_size >> vol->mft_record_size_bits) >=
2003 					(1ULL << 32)) {
2004 				ntfs_error(sb, "$MFT is too big! Aborting.");
2005 				goto put_err_out;
2006 			}
2007 			/*
2008 			 * We have got the first extent of the runlist for
2009 			 * $MFT which means it is now relatively safe to call
2010 			 * the normal ntfs_read_inode() function.
2011 			 * Complete reading the inode, this will actually
2012 			 * re-read the mft record for $MFT, this time entering
2013 			 * it into the page cache with which we complete the
2014 			 * kick start of the volume. It should be safe to do
2015 			 * this now as the first extent of $MFT/$DATA is
2016 			 * already known and we would hope that we don't need
2017 			 * further extents in order to find the other
2018 			 * attributes belonging to $MFT. Only time will tell if
2019 			 * this is really the case. If not we will have to play
2020 			 * magic at this point, possibly duplicating a lot of
2021 			 * ntfs_read_inode() at this point. We will need to
2022 			 * ensure we do enough of its work to be able to call
2023 			 * ntfs_read_inode() on extents of $MFT/$DATA. But lets
2024 			 * hope this never happens...
2025 			 */
2026 			ntfs_read_locked_inode(vi);
2027 			if (is_bad_inode(vi)) {
2028 				ntfs_error(sb, "ntfs_read_inode() of $MFT "
2029 						"failed. BUG or corrupt $MFT. "
2030 						"Run chkdsk and if no errors "
2031 						"are found, please report you "
2032 						"saw this message to "
2033 						"linux-ntfs-dev@lists."
2034 						"sourceforge.net");
2035 				ntfs_attr_put_search_ctx(ctx);
2036 				/* Revert to the safe super operations. */
2037 				ntfs_free(m);
2038 				return -1;
2039 			}
2040 			/*
2041 			 * Re-initialize some specifics about $MFT's inode as
2042 			 * ntfs_read_inode() will have set up the default ones.
2043 			 */
2044 			/* Set uid and gid to root. */
2045 			vi->i_uid = vi->i_gid = 0;
2046 			/* Regular file. No access for anyone. */
2047 			vi->i_mode = S_IFREG;
2048 			/* No VFS initiated operations allowed for $MFT. */
2049 			vi->i_op = &ntfs_empty_inode_ops;
2050 			vi->i_fop = &ntfs_empty_file_ops;
2051 		}
2052 
2053 		/* Get the lowest vcn for the next extent. */
2054 		highest_vcn = sle64_to_cpu(attr->data.non_resident.highest_vcn);
2055 		next_vcn = highest_vcn + 1;
2056 
2057 		/* Only one extent or error, which we catch below. */
2058 		if (next_vcn <= 0)
2059 			break;
2060 
2061 		/* Avoid endless loops due to corruption. */
2062 		if (next_vcn < sle64_to_cpu(
2063 				attr->data.non_resident.lowest_vcn)) {
2064 			ntfs_error(sb, "$MFT has corrupt attribute list "
2065 					"attribute. Run chkdsk.");
2066 			goto put_err_out;
2067 		}
2068 	}
2069 	if (err != -ENOENT) {
2070 		ntfs_error(sb, "Failed to lookup $MFT/$DATA attribute extent. "
2071 				"$MFT is corrupt. Run chkdsk.");
2072 		goto put_err_out;
2073 	}
2074 	if (!attr) {
2075 		ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is "
2076 				"corrupt. Run chkdsk.");
2077 		goto put_err_out;
2078 	}
2079 	if (highest_vcn && highest_vcn != last_vcn - 1) {
2080 		ntfs_error(sb, "Failed to load the complete runlist for "
2081 				"$MFT/$DATA. Driver bug or corrupt $MFT. "
2082 				"Run chkdsk.");
2083 		ntfs_debug("highest_vcn = 0x%llx, last_vcn - 1 = 0x%llx",
2084 				(unsigned long long)highest_vcn,
2085 				(unsigned long long)last_vcn - 1);
2086 		goto put_err_out;
2087 	}
2088 	ntfs_attr_put_search_ctx(ctx);
2089 	ntfs_debug("Done.");
2090 	ntfs_free(m);
2091 	return 0;
2092 
2093 em_put_err_out:
2094 	ntfs_error(sb, "Couldn't find first extent of $DATA attribute in "
2095 			"attribute list. $MFT is corrupt. Run chkdsk.");
2096 put_err_out:
2097 	ntfs_attr_put_search_ctx(ctx);
2098 err_out:
2099 	ntfs_error(sb, "Failed. Marking inode as bad.");
2100 	make_bad_inode(vi);
2101 	ntfs_free(m);
2102 	return -1;
2103 }
2104 
2105 /**
2106  * ntfs_put_inode - handler for when the inode reference count is decremented
2107  * @vi:		vfs inode
2108  *
2109  * The VFS calls ntfs_put_inode() every time the inode reference count (i_count)
2110  * is about to be decremented (but before the decrement itself.
2111  *
2112  * If the inode @vi is a directory with two references, one of which is being
2113  * dropped, we need to put the attribute inode for the directory index bitmap,
2114  * if it is present, otherwise the directory inode would remain pinned for
2115  * ever.
2116  */
2117 void ntfs_put_inode(struct inode *vi)
2118 {
2119 	if (S_ISDIR(vi->i_mode) && atomic_read(&vi->i_count) == 2) {
2120 		ntfs_inode *ni = NTFS_I(vi);
2121 		if (NInoIndexAllocPresent(ni)) {
2122 			struct inode *bvi = NULL;
2123 			down(&vi->i_sem);
2124 			if (atomic_read(&vi->i_count) == 2) {
2125 				bvi = ni->itype.index.bmp_ino;
2126 				if (bvi)
2127 					ni->itype.index.bmp_ino = NULL;
2128 			}
2129 			up(&vi->i_sem);
2130 			if (bvi)
2131 				iput(bvi);
2132 		}
2133 	}
2134 }
2135 
2136 static void __ntfs_clear_inode(ntfs_inode *ni)
2137 {
2138 	/* Free all alocated memory. */
2139 	down_write(&ni->runlist.lock);
2140 	if (ni->runlist.rl) {
2141 		ntfs_free(ni->runlist.rl);
2142 		ni->runlist.rl = NULL;
2143 	}
2144 	up_write(&ni->runlist.lock);
2145 
2146 	if (ni->attr_list) {
2147 		ntfs_free(ni->attr_list);
2148 		ni->attr_list = NULL;
2149 	}
2150 
2151 	down_write(&ni->attr_list_rl.lock);
2152 	if (ni->attr_list_rl.rl) {
2153 		ntfs_free(ni->attr_list_rl.rl);
2154 		ni->attr_list_rl.rl = NULL;
2155 	}
2156 	up_write(&ni->attr_list_rl.lock);
2157 
2158 	if (ni->name_len && ni->name != I30) {
2159 		/* Catch bugs... */
2160 		BUG_ON(!ni->name);
2161 		kfree(ni->name);
2162 	}
2163 }
2164 
2165 void ntfs_clear_extent_inode(ntfs_inode *ni)
2166 {
2167 	ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
2168 
2169 	BUG_ON(NInoAttr(ni));
2170 	BUG_ON(ni->nr_extents != -1);
2171 
2172 #ifdef NTFS_RW
2173 	if (NInoDirty(ni)) {
2174 		if (!is_bad_inode(VFS_I(ni->ext.base_ntfs_ino)))
2175 			ntfs_error(ni->vol->sb, "Clearing dirty extent inode!  "
2176 					"Losing data!  This is a BUG!!!");
2177 		// FIXME:  Do something!!!
2178 	}
2179 #endif /* NTFS_RW */
2180 
2181 	__ntfs_clear_inode(ni);
2182 
2183 	/* Bye, bye... */
2184 	ntfs_destroy_extent_inode(ni);
2185 }
2186 
2187 /**
2188  * ntfs_clear_big_inode - clean up the ntfs specific part of an inode
2189  * @vi:		vfs inode pending annihilation
2190  *
2191  * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
2192  * is called, which deallocates all memory belonging to the NTFS specific part
2193  * of the inode and returns.
2194  *
2195  * If the MFT record is dirty, we commit it before doing anything else.
2196  */
2197 void ntfs_clear_big_inode(struct inode *vi)
2198 {
2199 	ntfs_inode *ni = NTFS_I(vi);
2200 
2201 	/*
2202 	 * If the inode @vi is an index inode we need to put the attribute
2203 	 * inode for the index bitmap, if it is present, otherwise the index
2204 	 * inode would disappear and the attribute inode for the index bitmap
2205 	 * would no longer be referenced from anywhere and thus it would remain
2206 	 * pinned for ever.
2207 	 */
2208 	if (NInoAttr(ni) && (ni->type == AT_INDEX_ALLOCATION) &&
2209 			NInoIndexAllocPresent(ni) && ni->itype.index.bmp_ino) {
2210 		iput(ni->itype.index.bmp_ino);
2211 		ni->itype.index.bmp_ino = NULL;
2212 	}
2213 #ifdef NTFS_RW
2214 	if (NInoDirty(ni)) {
2215 		BOOL was_bad = (is_bad_inode(vi));
2216 
2217 		/* Committing the inode also commits all extent inodes. */
2218 		ntfs_commit_inode(vi);
2219 
2220 		if (!was_bad && (is_bad_inode(vi) || NInoDirty(ni))) {
2221 			ntfs_error(vi->i_sb, "Failed to commit dirty inode "
2222 					"0x%lx.  Losing data!", vi->i_ino);
2223 			// FIXME:  Do something!!!
2224 		}
2225 	}
2226 #endif /* NTFS_RW */
2227 
2228 	/* No need to lock at this stage as no one else has a reference. */
2229 	if (ni->nr_extents > 0) {
2230 		int i;
2231 
2232 		for (i = 0; i < ni->nr_extents; i++)
2233 			ntfs_clear_extent_inode(ni->ext.extent_ntfs_inos[i]);
2234 		kfree(ni->ext.extent_ntfs_inos);
2235 	}
2236 
2237 	__ntfs_clear_inode(ni);
2238 
2239 	if (NInoAttr(ni)) {
2240 		/* Release the base inode if we are holding it. */
2241 		if (ni->nr_extents == -1) {
2242 			iput(VFS_I(ni->ext.base_ntfs_ino));
2243 			ni->nr_extents = 0;
2244 			ni->ext.base_ntfs_ino = NULL;
2245 		}
2246 	}
2247 	return;
2248 }
2249 
2250 /**
2251  * ntfs_show_options - show mount options in /proc/mounts
2252  * @sf:		seq_file in which to write our mount options
2253  * @mnt:	vfs mount whose mount options to display
2254  *
2255  * Called by the VFS once for each mounted ntfs volume when someone reads
2256  * /proc/mounts in order to display the NTFS specific mount options of each
2257  * mount. The mount options of the vfs mount @mnt are written to the seq file
2258  * @sf and success is returned.
2259  */
2260 int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
2261 {
2262 	ntfs_volume *vol = NTFS_SB(mnt->mnt_sb);
2263 	int i;
2264 
2265 	seq_printf(sf, ",uid=%i", vol->uid);
2266 	seq_printf(sf, ",gid=%i", vol->gid);
2267 	if (vol->fmask == vol->dmask)
2268 		seq_printf(sf, ",umask=0%o", vol->fmask);
2269 	else {
2270 		seq_printf(sf, ",fmask=0%o", vol->fmask);
2271 		seq_printf(sf, ",dmask=0%o", vol->dmask);
2272 	}
2273 	seq_printf(sf, ",nls=%s", vol->nls_map->charset);
2274 	if (NVolCaseSensitive(vol))
2275 		seq_printf(sf, ",case_sensitive");
2276 	if (NVolShowSystemFiles(vol))
2277 		seq_printf(sf, ",show_sys_files");
2278 	for (i = 0; on_errors_arr[i].val; i++) {
2279 		if (on_errors_arr[i].val & vol->on_errors)
2280 			seq_printf(sf, ",errors=%s", on_errors_arr[i].str);
2281 	}
2282 	seq_printf(sf, ",mft_zone_multiplier=%i", vol->mft_zone_multiplier);
2283 	return 0;
2284 }
2285 
2286 #ifdef NTFS_RW
2287 
2288 /**
2289  * ntfs_truncate - called when the i_size of an ntfs inode is changed
2290  * @vi:		inode for which the i_size was changed
2291  *
2292  * We do not support i_size changes yet.
2293  *
2294  * The kernel guarantees that @vi is a regular file (S_ISREG() is true) and
2295  * that the change is allowed.
2296  *
2297  * This implies for us that @vi is a file inode rather than a directory, index,
2298  * or attribute inode as well as that @vi is a base inode.
2299  *
2300  * Returns 0 on success or -errno on error.
2301  *
2302  * Called with ->i_sem held.  In all but one case ->i_alloc_sem is held for
2303  * writing.  The only case where ->i_alloc_sem is not held is
2304  * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
2305  * with the current i_size as the offset which means that it is a noop as far
2306  * as ntfs_truncate() is concerned.
2307  */
2308 int ntfs_truncate(struct inode *vi)
2309 {
2310 	ntfs_inode *ni = NTFS_I(vi);
2311 	ntfs_volume *vol = ni->vol;
2312 	ntfs_attr_search_ctx *ctx;
2313 	MFT_RECORD *m;
2314 	const char *te = "  Leaving file length out of sync with i_size.";
2315 	int err;
2316 
2317 	ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
2318 	BUG_ON(NInoAttr(ni));
2319 	BUG_ON(ni->nr_extents < 0);
2320 	m = map_mft_record(ni);
2321 	if (IS_ERR(m)) {
2322 		err = PTR_ERR(m);
2323 		ntfs_error(vi->i_sb, "Failed to map mft record for inode 0x%lx "
2324 				"(error code %d).%s", vi->i_ino, err, te);
2325 		ctx = NULL;
2326 		m = NULL;
2327 		goto err_out;
2328 	}
2329 	ctx = ntfs_attr_get_search_ctx(ni, m);
2330 	if (unlikely(!ctx)) {
2331 		ntfs_error(vi->i_sb, "Failed to allocate a search context for "
2332 				"inode 0x%lx (not enough memory).%s",
2333 				vi->i_ino, te);
2334 		err = -ENOMEM;
2335 		goto err_out;
2336 	}
2337 	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
2338 			CASE_SENSITIVE, 0, NULL, 0, ctx);
2339 	if (unlikely(err)) {
2340 		if (err == -ENOENT)
2341 			ntfs_error(vi->i_sb, "Open attribute is missing from "
2342 					"mft record.  Inode 0x%lx is corrupt.  "
2343 					"Run chkdsk.", vi->i_ino);
2344 		else
2345 			ntfs_error(vi->i_sb, "Failed to lookup attribute in "
2346 					"inode 0x%lx (error code %d).",
2347 					vi->i_ino, err);
2348 		goto err_out;
2349 	}
2350 	/* If the size has not changed there is nothing to do. */
2351 	if (ntfs_attr_size(ctx->attr) == i_size_read(vi))
2352 		goto done;
2353 	// TODO: Implement the truncate...
2354 	ntfs_error(vi->i_sb, "Inode size has changed but this is not "
2355 			"implemented yet.  Resetting inode size to old value. "
2356 			" This is most likely a bug in the ntfs driver!");
2357 	i_size_write(vi, ntfs_attr_size(ctx->attr));
2358 done:
2359 	ntfs_attr_put_search_ctx(ctx);
2360 	unmap_mft_record(ni);
2361 	NInoClearTruncateFailed(ni);
2362 	ntfs_debug("Done.");
2363 	return 0;
2364 err_out:
2365 	if (err != -ENOMEM) {
2366 		NVolSetErrors(vol);
2367 		make_bad_inode(vi);
2368 	}
2369 	if (ctx)
2370 		ntfs_attr_put_search_ctx(ctx);
2371 	if (m)
2372 		unmap_mft_record(ni);
2373 	NInoSetTruncateFailed(ni);
2374 	return err;
2375 }
2376 
2377 /**
2378  * ntfs_truncate_vfs - wrapper for ntfs_truncate() that has no return value
2379  * @vi:		inode for which the i_size was changed
2380  *
2381  * Wrapper for ntfs_truncate() that has no return value.
2382  *
2383  * See ntfs_truncate() description above for details.
2384  */
2385 void ntfs_truncate_vfs(struct inode *vi) {
2386 	ntfs_truncate(vi);
2387 }
2388 
2389 /**
2390  * ntfs_setattr - called from notify_change() when an attribute is being changed
2391  * @dentry:	dentry whose attributes to change
2392  * @attr:	structure describing the attributes and the changes
2393  *
2394  * We have to trap VFS attempts to truncate the file described by @dentry as
2395  * soon as possible, because we do not implement changes in i_size yet.  So we
2396  * abort all i_size changes here.
2397  *
2398  * We also abort all changes of user, group, and mode as we do not implement
2399  * the NTFS ACLs yet.
2400  *
2401  * Called with ->i_sem held.  For the ATTR_SIZE (i.e. ->truncate) case, also
2402  * called with ->i_alloc_sem held for writing.
2403  *
2404  * Basically this is a copy of generic notify_change() and inode_setattr()
2405  * functionality, except we intercept and abort changes in i_size.
2406  */
2407 int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
2408 {
2409 	struct inode *vi = dentry->d_inode;
2410 	int err;
2411 	unsigned int ia_valid = attr->ia_valid;
2412 
2413 	err = inode_change_ok(vi, attr);
2414 	if (err)
2415 		return err;
2416 
2417 	/* We do not support NTFS ACLs yet. */
2418 	if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE)) {
2419 		ntfs_warning(vi->i_sb, "Changes in user/group/mode are not "
2420 				"supported yet, ignoring.");
2421 		err = -EOPNOTSUPP;
2422 		goto out;
2423 	}
2424 
2425 	if (ia_valid & ATTR_SIZE) {
2426 		if (attr->ia_size != i_size_read(vi)) {
2427 			ntfs_warning(vi->i_sb, "Changes in inode size are not "
2428 					"supported yet, ignoring.");
2429 			err = -EOPNOTSUPP;
2430 			// TODO: Implement...
2431 			// err = vmtruncate(vi, attr->ia_size);
2432 			if (err || ia_valid == ATTR_SIZE)
2433 				goto out;
2434 		} else {
2435 			/*
2436 			 * We skipped the truncate but must still update
2437 			 * timestamps.
2438 			 */
2439 			ia_valid |= ATTR_MTIME|ATTR_CTIME;
2440 		}
2441 	}
2442 
2443 	if (ia_valid & ATTR_ATIME)
2444 		vi->i_atime = attr->ia_atime;
2445 	if (ia_valid & ATTR_MTIME)
2446 		vi->i_mtime = attr->ia_mtime;
2447 	if (ia_valid & ATTR_CTIME)
2448 		vi->i_ctime = attr->ia_ctime;
2449 	mark_inode_dirty(vi);
2450 out:
2451 	return err;
2452 }
2453 
2454 /**
2455  * ntfs_write_inode - write out a dirty inode
2456  * @vi:		inode to write out
2457  * @sync:	if true, write out synchronously
2458  *
2459  * Write out a dirty inode to disk including any extent inodes if present.
2460  *
2461  * If @sync is true, commit the inode to disk and wait for io completion.  This
2462  * is done using write_mft_record().
2463  *
2464  * If @sync is false, just schedule the write to happen but do not wait for i/o
2465  * completion.  In 2.6 kernels, scheduling usually happens just by virtue of
2466  * marking the page (and in this case mft record) dirty but we do not implement
2467  * this yet as write_mft_record() largely ignores the @sync parameter and
2468  * always performs synchronous writes.
2469  *
2470  * Return 0 on success and -errno on error.
2471  */
2472 int ntfs_write_inode(struct inode *vi, int sync)
2473 {
2474 	sle64 nt;
2475 	ntfs_inode *ni = NTFS_I(vi);
2476 	ntfs_attr_search_ctx *ctx;
2477 	MFT_RECORD *m;
2478 	STANDARD_INFORMATION *si;
2479 	int err = 0;
2480 	BOOL modified = FALSE;
2481 
2482 	ntfs_debug("Entering for %sinode 0x%lx.", NInoAttr(ni) ? "attr " : "",
2483 			vi->i_ino);
2484 	/*
2485 	 * Dirty attribute inodes are written via their real inodes so just
2486 	 * clean them here.  Access time updates are taken care off when the
2487 	 * real inode is written.
2488 	 */
2489 	if (NInoAttr(ni)) {
2490 		NInoClearDirty(ni);
2491 		ntfs_debug("Done.");
2492 		return 0;
2493 	}
2494 	/* Map, pin, and lock the mft record belonging to the inode. */
2495 	m = map_mft_record(ni);
2496 	if (IS_ERR(m)) {
2497 		err = PTR_ERR(m);
2498 		goto err_out;
2499 	}
2500 	/* Update the access times in the standard information attribute. */
2501 	ctx = ntfs_attr_get_search_ctx(ni, m);
2502 	if (unlikely(!ctx)) {
2503 		err = -ENOMEM;
2504 		goto unm_err_out;
2505 	}
2506 	err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0,
2507 			CASE_SENSITIVE, 0, NULL, 0, ctx);
2508 	if (unlikely(err)) {
2509 		ntfs_attr_put_search_ctx(ctx);
2510 		goto unm_err_out;
2511 	}
2512 	si = (STANDARD_INFORMATION*)((u8*)ctx->attr +
2513 			le16_to_cpu(ctx->attr->data.resident.value_offset));
2514 	/* Update the access times if they have changed. */
2515 	nt = utc2ntfs(vi->i_mtime);
2516 	if (si->last_data_change_time != nt) {
2517 		ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, "
2518 				"new = 0x%llx", vi->i_ino,
2519 				sle64_to_cpu(si->last_data_change_time),
2520 				sle64_to_cpu(nt));
2521 		si->last_data_change_time = nt;
2522 		modified = TRUE;
2523 	}
2524 	nt = utc2ntfs(vi->i_ctime);
2525 	if (si->last_mft_change_time != nt) {
2526 		ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, "
2527 				"new = 0x%llx", vi->i_ino,
2528 				sle64_to_cpu(si->last_mft_change_time),
2529 				sle64_to_cpu(nt));
2530 		si->last_mft_change_time = nt;
2531 		modified = TRUE;
2532 	}
2533 	nt = utc2ntfs(vi->i_atime);
2534 	if (si->last_access_time != nt) {
2535 		ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, "
2536 				"new = 0x%llx", vi->i_ino,
2537 				sle64_to_cpu(si->last_access_time),
2538 				sle64_to_cpu(nt));
2539 		si->last_access_time = nt;
2540 		modified = TRUE;
2541 	}
2542 	/*
2543 	 * If we just modified the standard information attribute we need to
2544 	 * mark the mft record it is in dirty.  We do this manually so that
2545 	 * mark_inode_dirty() is not called which would redirty the inode and
2546 	 * hence result in an infinite loop of trying to write the inode.
2547 	 * There is no need to mark the base inode nor the base mft record
2548 	 * dirty, since we are going to write this mft record below in any case
2549 	 * and the base mft record may actually not have been modified so it
2550 	 * might not need to be written out.
2551 	 * NOTE: It is not a problem when the inode for $MFT itself is being
2552 	 * written out as mark_ntfs_record_dirty() will only set I_DIRTY_PAGES
2553 	 * on the $MFT inode and hence ntfs_write_inode() will not be
2554 	 * re-invoked because of it which in turn is ok since the dirtied mft
2555 	 * record will be cleaned and written out to disk below, i.e. before
2556 	 * this function returns.
2557 	 */
2558 	if (modified && !NInoTestSetDirty(ctx->ntfs_ino))
2559 		mark_ntfs_record_dirty(ctx->ntfs_ino->page,
2560 				ctx->ntfs_ino->page_ofs);
2561 	ntfs_attr_put_search_ctx(ctx);
2562 	/* Now the access times are updated, write the base mft record. */
2563 	if (NInoDirty(ni))
2564 		err = write_mft_record(ni, m, sync);
2565 	/* Write all attached extent mft records. */
2566 	down(&ni->extent_lock);
2567 	if (ni->nr_extents > 0) {
2568 		ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos;
2569 		int i;
2570 
2571 		ntfs_debug("Writing %i extent inodes.", ni->nr_extents);
2572 		for (i = 0; i < ni->nr_extents; i++) {
2573 			ntfs_inode *tni = extent_nis[i];
2574 
2575 			if (NInoDirty(tni)) {
2576 				MFT_RECORD *tm = map_mft_record(tni);
2577 				int ret;
2578 
2579 				if (IS_ERR(tm)) {
2580 					if (!err || err == -ENOMEM)
2581 						err = PTR_ERR(tm);
2582 					continue;
2583 				}
2584 				ret = write_mft_record(tni, tm, sync);
2585 				unmap_mft_record(tni);
2586 				if (unlikely(ret)) {
2587 					if (!err || err == -ENOMEM)
2588 						err = ret;
2589 				}
2590 			}
2591 		}
2592 	}
2593 	up(&ni->extent_lock);
2594 	unmap_mft_record(ni);
2595 	if (unlikely(err))
2596 		goto err_out;
2597 	ntfs_debug("Done.");
2598 	return 0;
2599 unm_err_out:
2600 	unmap_mft_record(ni);
2601 err_out:
2602 	if (err == -ENOMEM) {
2603 		ntfs_warning(vi->i_sb, "Not enough memory to write inode.  "
2604 				"Marking the inode dirty again, so the VFS "
2605 				"retries later.");
2606 		mark_inode_dirty(vi);
2607 	} else {
2608 		ntfs_error(vi->i_sb, "Failed (error code %i):  Marking inode "
2609 				"as bad.  You should run chkdsk.", -err);
2610 		make_bad_inode(vi);
2611 		NVolSetErrors(ni->vol);
2612 	}
2613 	return err;
2614 }
2615 
2616 #endif /* NTFS_RW */
2617