xref: /openbmc/linux/fs/ntfs/dir.c (revision b8265621)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /**
3  * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project.
4  *
5  * Copyright (c) 2001-2007 Anton Altaparmakov
6  * Copyright (c) 2002 Richard Russon
7  */
8 
9 #include <linux/buffer_head.h>
10 #include <linux/slab.h>
11 
12 #include "dir.h"
13 #include "aops.h"
14 #include "attrib.h"
15 #include "mft.h"
16 #include "debug.h"
17 #include "ntfs.h"
18 
19 /**
20  * The little endian Unicode string $I30 as a global constant.
21  */
22 ntfschar I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'),
23 		cpu_to_le16('3'),	cpu_to_le16('0'), 0 };
24 
25 /**
26  * ntfs_lookup_inode_by_name - find an inode in a directory given its name
27  * @dir_ni:	ntfs inode of the directory in which to search for the name
28  * @uname:	Unicode name for which to search in the directory
29  * @uname_len:	length of the name @uname in Unicode characters
30  * @res:	return the found file name if necessary (see below)
31  *
32  * Look for an inode with name @uname in the directory with inode @dir_ni.
33  * ntfs_lookup_inode_by_name() walks the contents of the directory looking for
34  * the Unicode name. If the name is found in the directory, the corresponding
35  * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it
36  * is a 64-bit number containing the sequence number.
37  *
38  * On error, a negative value is returned corresponding to the error code. In
39  * particular if the inode is not found -ENOENT is returned. Note that you
40  * can't just check the return value for being negative, you have to check the
41  * inode number for being negative which you can extract using MREC(return
42  * value).
43  *
44  * Note, @uname_len does not include the (optional) terminating NULL character.
45  *
46  * Note, we look for a case sensitive match first but we also look for a case
47  * insensitive match at the same time. If we find a case insensitive match, we
48  * save that for the case that we don't find an exact match, where we return
49  * the case insensitive match and setup @res (which we allocate!) with the mft
50  * reference, the file name type, length and with a copy of the little endian
51  * Unicode file name itself. If we match a file name which is in the DOS name
52  * space, we only return the mft reference and file name type in @res.
53  * ntfs_lookup() then uses this to find the long file name in the inode itself.
54  * This is to avoid polluting the dcache with short file names. We want them to
55  * work but we don't care for how quickly one can access them. This also fixes
56  * the dcache aliasing issues.
57  *
58  * Locking:  - Caller must hold i_mutex on the directory.
59  *	     - Each page cache page in the index allocation mapping must be
60  *	       locked whilst being accessed otherwise we may find a corrupt
61  *	       page due to it being under ->writepage at the moment which
62  *	       applies the mst protection fixups before writing out and then
63  *	       removes them again after the write is complete after which it
64  *	       unlocks the page.
65  */
66 MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
67 		const int uname_len, ntfs_name **res)
68 {
69 	ntfs_volume *vol = dir_ni->vol;
70 	struct super_block *sb = vol->sb;
71 	MFT_RECORD *m;
72 	INDEX_ROOT *ir;
73 	INDEX_ENTRY *ie;
74 	INDEX_ALLOCATION *ia;
75 	u8 *index_end;
76 	u64 mref;
77 	ntfs_attr_search_ctx *ctx;
78 	int err, rc;
79 	VCN vcn, old_vcn;
80 	struct address_space *ia_mapping;
81 	struct page *page;
82 	u8 *kaddr;
83 	ntfs_name *name = NULL;
84 
85 	BUG_ON(!S_ISDIR(VFS_I(dir_ni)->i_mode));
86 	BUG_ON(NInoAttr(dir_ni));
87 	/* Get hold of the mft record for the directory. */
88 	m = map_mft_record(dir_ni);
89 	if (IS_ERR(m)) {
90 		ntfs_error(sb, "map_mft_record() failed with error code %ld.",
91 				-PTR_ERR(m));
92 		return ERR_MREF(PTR_ERR(m));
93 	}
94 	ctx = ntfs_attr_get_search_ctx(dir_ni, m);
95 	if (unlikely(!ctx)) {
96 		err = -ENOMEM;
97 		goto err_out;
98 	}
99 	/* Find the index root attribute in the mft record. */
100 	err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
101 			0, ctx);
102 	if (unlikely(err)) {
103 		if (err == -ENOENT) {
104 			ntfs_error(sb, "Index root attribute missing in "
105 					"directory inode 0x%lx.",
106 					dir_ni->mft_no);
107 			err = -EIO;
108 		}
109 		goto err_out;
110 	}
111 	/* Get to the index root value (it's been verified in read_inode). */
112 	ir = (INDEX_ROOT*)((u8*)ctx->attr +
113 			le16_to_cpu(ctx->attr->data.resident.value_offset));
114 	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
115 	/* The first index entry. */
116 	ie = (INDEX_ENTRY*)((u8*)&ir->index +
117 			le32_to_cpu(ir->index.entries_offset));
118 	/*
119 	 * Loop until we exceed valid memory (corruption case) or until we
120 	 * reach the last entry.
121 	 */
122 	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
123 		/* Bounds checks. */
124 		if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie +
125 				sizeof(INDEX_ENTRY_HEADER) > index_end ||
126 				(u8*)ie + le16_to_cpu(ie->key_length) >
127 				index_end)
128 			goto dir_err_out;
129 		/*
130 		 * The last entry cannot contain a name. It can however contain
131 		 * a pointer to a child node in the B+tree so we just break out.
132 		 */
133 		if (ie->flags & INDEX_ENTRY_END)
134 			break;
135 		/*
136 		 * We perform a case sensitive comparison and if that matches
137 		 * we are done and return the mft reference of the inode (i.e.
138 		 * the inode number together with the sequence number for
139 		 * consistency checking). We convert it to cpu format before
140 		 * returning.
141 		 */
142 		if (ntfs_are_names_equal(uname, uname_len,
143 				(ntfschar*)&ie->key.file_name.file_name,
144 				ie->key.file_name.file_name_length,
145 				CASE_SENSITIVE, vol->upcase, vol->upcase_len)) {
146 found_it:
147 			/*
148 			 * We have a perfect match, so we don't need to care
149 			 * about having matched imperfectly before, so we can
150 			 * free name and set *res to NULL.
151 			 * However, if the perfect match is a short file name,
152 			 * we need to signal this through *res, so that
153 			 * ntfs_lookup() can fix dcache aliasing issues.
154 			 * As an optimization we just reuse an existing
155 			 * allocation of *res.
156 			 */
157 			if (ie->key.file_name.file_name_type == FILE_NAME_DOS) {
158 				if (!name) {
159 					name = kmalloc(sizeof(ntfs_name),
160 							GFP_NOFS);
161 					if (!name) {
162 						err = -ENOMEM;
163 						goto err_out;
164 					}
165 				}
166 				name->mref = le64_to_cpu(
167 						ie->data.dir.indexed_file);
168 				name->type = FILE_NAME_DOS;
169 				name->len = 0;
170 				*res = name;
171 			} else {
172 				kfree(name);
173 				*res = NULL;
174 			}
175 			mref = le64_to_cpu(ie->data.dir.indexed_file);
176 			ntfs_attr_put_search_ctx(ctx);
177 			unmap_mft_record(dir_ni);
178 			return mref;
179 		}
180 		/*
181 		 * For a case insensitive mount, we also perform a case
182 		 * insensitive comparison (provided the file name is not in the
183 		 * POSIX namespace). If the comparison matches, and the name is
184 		 * in the WIN32 namespace, we cache the filename in *res so
185 		 * that the caller, ntfs_lookup(), can work on it. If the
186 		 * comparison matches, and the name is in the DOS namespace, we
187 		 * only cache the mft reference and the file name type (we set
188 		 * the name length to zero for simplicity).
189 		 */
190 		if (!NVolCaseSensitive(vol) &&
191 				ie->key.file_name.file_name_type &&
192 				ntfs_are_names_equal(uname, uname_len,
193 				(ntfschar*)&ie->key.file_name.file_name,
194 				ie->key.file_name.file_name_length,
195 				IGNORE_CASE, vol->upcase, vol->upcase_len)) {
196 			int name_size = sizeof(ntfs_name);
197 			u8 type = ie->key.file_name.file_name_type;
198 			u8 len = ie->key.file_name.file_name_length;
199 
200 			/* Only one case insensitive matching name allowed. */
201 			if (name) {
202 				ntfs_error(sb, "Found already allocated name "
203 						"in phase 1. Please run chkdsk "
204 						"and if that doesn't find any "
205 						"errors please report you saw "
206 						"this message to "
207 						"linux-ntfs-dev@lists."
208 						"sourceforge.net.");
209 				goto dir_err_out;
210 			}
211 
212 			if (type != FILE_NAME_DOS)
213 				name_size += len * sizeof(ntfschar);
214 			name = kmalloc(name_size, GFP_NOFS);
215 			if (!name) {
216 				err = -ENOMEM;
217 				goto err_out;
218 			}
219 			name->mref = le64_to_cpu(ie->data.dir.indexed_file);
220 			name->type = type;
221 			if (type != FILE_NAME_DOS) {
222 				name->len = len;
223 				memcpy(name->name, ie->key.file_name.file_name,
224 						len * sizeof(ntfschar));
225 			} else
226 				name->len = 0;
227 			*res = name;
228 		}
229 		/*
230 		 * Not a perfect match, need to do full blown collation so we
231 		 * know which way in the B+tree we have to go.
232 		 */
233 		rc = ntfs_collate_names(uname, uname_len,
234 				(ntfschar*)&ie->key.file_name.file_name,
235 				ie->key.file_name.file_name_length, 1,
236 				IGNORE_CASE, vol->upcase, vol->upcase_len);
237 		/*
238 		 * If uname collates before the name of the current entry, there
239 		 * is definitely no such name in this index but we might need to
240 		 * descend into the B+tree so we just break out of the loop.
241 		 */
242 		if (rc == -1)
243 			break;
244 		/* The names are not equal, continue the search. */
245 		if (rc)
246 			continue;
247 		/*
248 		 * Names match with case insensitive comparison, now try the
249 		 * case sensitive comparison, which is required for proper
250 		 * collation.
251 		 */
252 		rc = ntfs_collate_names(uname, uname_len,
253 				(ntfschar*)&ie->key.file_name.file_name,
254 				ie->key.file_name.file_name_length, 1,
255 				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
256 		if (rc == -1)
257 			break;
258 		if (rc)
259 			continue;
260 		/*
261 		 * Perfect match, this will never happen as the
262 		 * ntfs_are_names_equal() call will have gotten a match but we
263 		 * still treat it correctly.
264 		 */
265 		goto found_it;
266 	}
267 	/*
268 	 * We have finished with this index without success. Check for the
269 	 * presence of a child node and if not present return -ENOENT, unless
270 	 * we have got a matching name cached in name in which case return the
271 	 * mft reference associated with it.
272 	 */
273 	if (!(ie->flags & INDEX_ENTRY_NODE)) {
274 		if (name) {
275 			ntfs_attr_put_search_ctx(ctx);
276 			unmap_mft_record(dir_ni);
277 			return name->mref;
278 		}
279 		ntfs_debug("Entry not found.");
280 		err = -ENOENT;
281 		goto err_out;
282 	} /* Child node present, descend into it. */
283 	/* Consistency check: Verify that an index allocation exists. */
284 	if (!NInoIndexAllocPresent(dir_ni)) {
285 		ntfs_error(sb, "No index allocation attribute but index entry "
286 				"requires one. Directory inode 0x%lx is "
287 				"corrupt or driver bug.", dir_ni->mft_no);
288 		goto err_out;
289 	}
290 	/* Get the starting vcn of the index_block holding the child node. */
291 	vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8));
292 	ia_mapping = VFS_I(dir_ni)->i_mapping;
293 	/*
294 	 * We are done with the index root and the mft record. Release them,
295 	 * otherwise we deadlock with ntfs_map_page().
296 	 */
297 	ntfs_attr_put_search_ctx(ctx);
298 	unmap_mft_record(dir_ni);
299 	m = NULL;
300 	ctx = NULL;
301 descend_into_child_node:
302 	/*
303 	 * Convert vcn to index into the index allocation attribute in units
304 	 * of PAGE_SIZE and map the page cache page, reading it from
305 	 * disk if necessary.
306 	 */
307 	page = ntfs_map_page(ia_mapping, vcn <<
308 			dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
309 	if (IS_ERR(page)) {
310 		ntfs_error(sb, "Failed to map directory index page, error %ld.",
311 				-PTR_ERR(page));
312 		err = PTR_ERR(page);
313 		goto err_out;
314 	}
315 	lock_page(page);
316 	kaddr = (u8*)page_address(page);
317 fast_descend_into_child_node:
318 	/* Get to the index allocation block. */
319 	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
320 			dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
321 	/* Bounds checks. */
322 	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
323 		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
324 				"inode 0x%lx or driver bug.", dir_ni->mft_no);
325 		goto unm_err_out;
326 	}
327 	/* Catch multi sector transfer fixup errors. */
328 	if (unlikely(!ntfs_is_indx_record(ia->magic))) {
329 		ntfs_error(sb, "Directory index record with vcn 0x%llx is "
330 				"corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
331 				(unsigned long long)vcn, dir_ni->mft_no);
332 		goto unm_err_out;
333 	}
334 	if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
335 		ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
336 				"different from expected VCN (0x%llx). "
337 				"Directory inode 0x%lx is corrupt or driver "
338 				"bug.", (unsigned long long)
339 				sle64_to_cpu(ia->index_block_vcn),
340 				(unsigned long long)vcn, dir_ni->mft_no);
341 		goto unm_err_out;
342 	}
343 	if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
344 			dir_ni->itype.index.block_size) {
345 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
346 				"0x%lx has a size (%u) differing from the "
347 				"directory specified size (%u). Directory "
348 				"inode is corrupt or driver bug.",
349 				(unsigned long long)vcn, dir_ni->mft_no,
350 				le32_to_cpu(ia->index.allocated_size) + 0x18,
351 				dir_ni->itype.index.block_size);
352 		goto unm_err_out;
353 	}
354 	index_end = (u8*)ia + dir_ni->itype.index.block_size;
355 	if (index_end > kaddr + PAGE_SIZE) {
356 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
357 				"0x%lx crosses page boundary. Impossible! "
358 				"Cannot access! This is probably a bug in the "
359 				"driver.", (unsigned long long)vcn,
360 				dir_ni->mft_no);
361 		goto unm_err_out;
362 	}
363 	index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
364 	if (index_end > (u8*)ia + dir_ni->itype.index.block_size) {
365 		ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
366 				"inode 0x%lx exceeds maximum size.",
367 				(unsigned long long)vcn, dir_ni->mft_no);
368 		goto unm_err_out;
369 	}
370 	/* The first index entry. */
371 	ie = (INDEX_ENTRY*)((u8*)&ia->index +
372 			le32_to_cpu(ia->index.entries_offset));
373 	/*
374 	 * Iterate similar to above big loop but applied to index buffer, thus
375 	 * loop until we exceed valid memory (corruption case) or until we
376 	 * reach the last entry.
377 	 */
378 	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
379 		/* Bounds check. */
380 		if ((u8*)ie < (u8*)ia || (u8*)ie +
381 				sizeof(INDEX_ENTRY_HEADER) > index_end ||
382 				(u8*)ie + le16_to_cpu(ie->key_length) >
383 				index_end) {
384 			ntfs_error(sb, "Index entry out of bounds in "
385 					"directory inode 0x%lx.",
386 					dir_ni->mft_no);
387 			goto unm_err_out;
388 		}
389 		/*
390 		 * The last entry cannot contain a name. It can however contain
391 		 * a pointer to a child node in the B+tree so we just break out.
392 		 */
393 		if (ie->flags & INDEX_ENTRY_END)
394 			break;
395 		/*
396 		 * We perform a case sensitive comparison and if that matches
397 		 * we are done and return the mft reference of the inode (i.e.
398 		 * the inode number together with the sequence number for
399 		 * consistency checking). We convert it to cpu format before
400 		 * returning.
401 		 */
402 		if (ntfs_are_names_equal(uname, uname_len,
403 				(ntfschar*)&ie->key.file_name.file_name,
404 				ie->key.file_name.file_name_length,
405 				CASE_SENSITIVE, vol->upcase, vol->upcase_len)) {
406 found_it2:
407 			/*
408 			 * We have a perfect match, so we don't need to care
409 			 * about having matched imperfectly before, so we can
410 			 * free name and set *res to NULL.
411 			 * However, if the perfect match is a short file name,
412 			 * we need to signal this through *res, so that
413 			 * ntfs_lookup() can fix dcache aliasing issues.
414 			 * As an optimization we just reuse an existing
415 			 * allocation of *res.
416 			 */
417 			if (ie->key.file_name.file_name_type == FILE_NAME_DOS) {
418 				if (!name) {
419 					name = kmalloc(sizeof(ntfs_name),
420 							GFP_NOFS);
421 					if (!name) {
422 						err = -ENOMEM;
423 						goto unm_err_out;
424 					}
425 				}
426 				name->mref = le64_to_cpu(
427 						ie->data.dir.indexed_file);
428 				name->type = FILE_NAME_DOS;
429 				name->len = 0;
430 				*res = name;
431 			} else {
432 				kfree(name);
433 				*res = NULL;
434 			}
435 			mref = le64_to_cpu(ie->data.dir.indexed_file);
436 			unlock_page(page);
437 			ntfs_unmap_page(page);
438 			return mref;
439 		}
440 		/*
441 		 * For a case insensitive mount, we also perform a case
442 		 * insensitive comparison (provided the file name is not in the
443 		 * POSIX namespace). If the comparison matches, and the name is
444 		 * in the WIN32 namespace, we cache the filename in *res so
445 		 * that the caller, ntfs_lookup(), can work on it. If the
446 		 * comparison matches, and the name is in the DOS namespace, we
447 		 * only cache the mft reference and the file name type (we set
448 		 * the name length to zero for simplicity).
449 		 */
450 		if (!NVolCaseSensitive(vol) &&
451 				ie->key.file_name.file_name_type &&
452 				ntfs_are_names_equal(uname, uname_len,
453 				(ntfschar*)&ie->key.file_name.file_name,
454 				ie->key.file_name.file_name_length,
455 				IGNORE_CASE, vol->upcase, vol->upcase_len)) {
456 			int name_size = sizeof(ntfs_name);
457 			u8 type = ie->key.file_name.file_name_type;
458 			u8 len = ie->key.file_name.file_name_length;
459 
460 			/* Only one case insensitive matching name allowed. */
461 			if (name) {
462 				ntfs_error(sb, "Found already allocated name "
463 						"in phase 2. Please run chkdsk "
464 						"and if that doesn't find any "
465 						"errors please report you saw "
466 						"this message to "
467 						"linux-ntfs-dev@lists."
468 						"sourceforge.net.");
469 				unlock_page(page);
470 				ntfs_unmap_page(page);
471 				goto dir_err_out;
472 			}
473 
474 			if (type != FILE_NAME_DOS)
475 				name_size += len * sizeof(ntfschar);
476 			name = kmalloc(name_size, GFP_NOFS);
477 			if (!name) {
478 				err = -ENOMEM;
479 				goto unm_err_out;
480 			}
481 			name->mref = le64_to_cpu(ie->data.dir.indexed_file);
482 			name->type = type;
483 			if (type != FILE_NAME_DOS) {
484 				name->len = len;
485 				memcpy(name->name, ie->key.file_name.file_name,
486 						len * sizeof(ntfschar));
487 			} else
488 				name->len = 0;
489 			*res = name;
490 		}
491 		/*
492 		 * Not a perfect match, need to do full blown collation so we
493 		 * know which way in the B+tree we have to go.
494 		 */
495 		rc = ntfs_collate_names(uname, uname_len,
496 				(ntfschar*)&ie->key.file_name.file_name,
497 				ie->key.file_name.file_name_length, 1,
498 				IGNORE_CASE, vol->upcase, vol->upcase_len);
499 		/*
500 		 * If uname collates before the name of the current entry, there
501 		 * is definitely no such name in this index but we might need to
502 		 * descend into the B+tree so we just break out of the loop.
503 		 */
504 		if (rc == -1)
505 			break;
506 		/* The names are not equal, continue the search. */
507 		if (rc)
508 			continue;
509 		/*
510 		 * Names match with case insensitive comparison, now try the
511 		 * case sensitive comparison, which is required for proper
512 		 * collation.
513 		 */
514 		rc = ntfs_collate_names(uname, uname_len,
515 				(ntfschar*)&ie->key.file_name.file_name,
516 				ie->key.file_name.file_name_length, 1,
517 				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
518 		if (rc == -1)
519 			break;
520 		if (rc)
521 			continue;
522 		/*
523 		 * Perfect match, this will never happen as the
524 		 * ntfs_are_names_equal() call will have gotten a match but we
525 		 * still treat it correctly.
526 		 */
527 		goto found_it2;
528 	}
529 	/*
530 	 * We have finished with this index buffer without success. Check for
531 	 * the presence of a child node.
532 	 */
533 	if (ie->flags & INDEX_ENTRY_NODE) {
534 		if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
535 			ntfs_error(sb, "Index entry with child node found in "
536 					"a leaf node in directory inode 0x%lx.",
537 					dir_ni->mft_no);
538 			goto unm_err_out;
539 		}
540 		/* Child node present, descend into it. */
541 		old_vcn = vcn;
542 		vcn = sle64_to_cpup((sle64*)((u8*)ie +
543 				le16_to_cpu(ie->length) - 8));
544 		if (vcn >= 0) {
545 			/* If vcn is in the same page cache page as old_vcn we
546 			 * recycle the mapped page. */
547 			if (old_vcn << vol->cluster_size_bits >>
548 					PAGE_SHIFT == vcn <<
549 					vol->cluster_size_bits >>
550 					PAGE_SHIFT)
551 				goto fast_descend_into_child_node;
552 			unlock_page(page);
553 			ntfs_unmap_page(page);
554 			goto descend_into_child_node;
555 		}
556 		ntfs_error(sb, "Negative child node vcn in directory inode "
557 				"0x%lx.", dir_ni->mft_no);
558 		goto unm_err_out;
559 	}
560 	/*
561 	 * No child node present, return -ENOENT, unless we have got a matching
562 	 * name cached in name in which case return the mft reference
563 	 * associated with it.
564 	 */
565 	if (name) {
566 		unlock_page(page);
567 		ntfs_unmap_page(page);
568 		return name->mref;
569 	}
570 	ntfs_debug("Entry not found.");
571 	err = -ENOENT;
572 unm_err_out:
573 	unlock_page(page);
574 	ntfs_unmap_page(page);
575 err_out:
576 	if (!err)
577 		err = -EIO;
578 	if (ctx)
579 		ntfs_attr_put_search_ctx(ctx);
580 	if (m)
581 		unmap_mft_record(dir_ni);
582 	if (name) {
583 		kfree(name);
584 		*res = NULL;
585 	}
586 	return ERR_MREF(err);
587 dir_err_out:
588 	ntfs_error(sb, "Corrupt directory.  Aborting lookup.");
589 	goto err_out;
590 }
591 
592 #if 0
593 
594 // TODO: (AIA)
595 // The algorithm embedded in this code will be required for the time when we
596 // want to support adding of entries to directories, where we require correct
597 // collation of file names in order not to cause corruption of the filesystem.
598 
599 /**
600  * ntfs_lookup_inode_by_name - find an inode in a directory given its name
601  * @dir_ni:	ntfs inode of the directory in which to search for the name
602  * @uname:	Unicode name for which to search in the directory
603  * @uname_len:	length of the name @uname in Unicode characters
604  *
605  * Look for an inode with name @uname in the directory with inode @dir_ni.
606  * ntfs_lookup_inode_by_name() walks the contents of the directory looking for
607  * the Unicode name. If the name is found in the directory, the corresponding
608  * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it
609  * is a 64-bit number containing the sequence number.
610  *
611  * On error, a negative value is returned corresponding to the error code. In
612  * particular if the inode is not found -ENOENT is returned. Note that you
613  * can't just check the return value for being negative, you have to check the
614  * inode number for being negative which you can extract using MREC(return
615  * value).
616  *
617  * Note, @uname_len does not include the (optional) terminating NULL character.
618  */
619 u64 ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
620 		const int uname_len)
621 {
622 	ntfs_volume *vol = dir_ni->vol;
623 	struct super_block *sb = vol->sb;
624 	MFT_RECORD *m;
625 	INDEX_ROOT *ir;
626 	INDEX_ENTRY *ie;
627 	INDEX_ALLOCATION *ia;
628 	u8 *index_end;
629 	u64 mref;
630 	ntfs_attr_search_ctx *ctx;
631 	int err, rc;
632 	IGNORE_CASE_BOOL ic;
633 	VCN vcn, old_vcn;
634 	struct address_space *ia_mapping;
635 	struct page *page;
636 	u8 *kaddr;
637 
638 	/* Get hold of the mft record for the directory. */
639 	m = map_mft_record(dir_ni);
640 	if (IS_ERR(m)) {
641 		ntfs_error(sb, "map_mft_record() failed with error code %ld.",
642 				-PTR_ERR(m));
643 		return ERR_MREF(PTR_ERR(m));
644 	}
645 	ctx = ntfs_attr_get_search_ctx(dir_ni, m);
646 	if (!ctx) {
647 		err = -ENOMEM;
648 		goto err_out;
649 	}
650 	/* Find the index root attribute in the mft record. */
651 	err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
652 			0, ctx);
653 	if (unlikely(err)) {
654 		if (err == -ENOENT) {
655 			ntfs_error(sb, "Index root attribute missing in "
656 					"directory inode 0x%lx.",
657 					dir_ni->mft_no);
658 			err = -EIO;
659 		}
660 		goto err_out;
661 	}
662 	/* Get to the index root value (it's been verified in read_inode). */
663 	ir = (INDEX_ROOT*)((u8*)ctx->attr +
664 			le16_to_cpu(ctx->attr->data.resident.value_offset));
665 	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
666 	/* The first index entry. */
667 	ie = (INDEX_ENTRY*)((u8*)&ir->index +
668 			le32_to_cpu(ir->index.entries_offset));
669 	/*
670 	 * Loop until we exceed valid memory (corruption case) or until we
671 	 * reach the last entry.
672 	 */
673 	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
674 		/* Bounds checks. */
675 		if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie +
676 				sizeof(INDEX_ENTRY_HEADER) > index_end ||
677 				(u8*)ie + le16_to_cpu(ie->key_length) >
678 				index_end)
679 			goto dir_err_out;
680 		/*
681 		 * The last entry cannot contain a name. It can however contain
682 		 * a pointer to a child node in the B+tree so we just break out.
683 		 */
684 		if (ie->flags & INDEX_ENTRY_END)
685 			break;
686 		/*
687 		 * If the current entry has a name type of POSIX, the name is
688 		 * case sensitive and not otherwise. This has the effect of us
689 		 * not being able to access any POSIX file names which collate
690 		 * after the non-POSIX one when they only differ in case, but
691 		 * anyone doing screwy stuff like that deserves to burn in
692 		 * hell... Doing that kind of stuff on NT4 actually causes
693 		 * corruption on the partition even when using SP6a and Linux
694 		 * is not involved at all.
695 		 */
696 		ic = ie->key.file_name.file_name_type ? IGNORE_CASE :
697 				CASE_SENSITIVE;
698 		/*
699 		 * If the names match perfectly, we are done and return the
700 		 * mft reference of the inode (i.e. the inode number together
701 		 * with the sequence number for consistency checking. We
702 		 * convert it to cpu format before returning.
703 		 */
704 		if (ntfs_are_names_equal(uname, uname_len,
705 				(ntfschar*)&ie->key.file_name.file_name,
706 				ie->key.file_name.file_name_length, ic,
707 				vol->upcase, vol->upcase_len)) {
708 found_it:
709 			mref = le64_to_cpu(ie->data.dir.indexed_file);
710 			ntfs_attr_put_search_ctx(ctx);
711 			unmap_mft_record(dir_ni);
712 			return mref;
713 		}
714 		/*
715 		 * Not a perfect match, need to do full blown collation so we
716 		 * know which way in the B+tree we have to go.
717 		 */
718 		rc = ntfs_collate_names(uname, uname_len,
719 				(ntfschar*)&ie->key.file_name.file_name,
720 				ie->key.file_name.file_name_length, 1,
721 				IGNORE_CASE, vol->upcase, vol->upcase_len);
722 		/*
723 		 * If uname collates before the name of the current entry, there
724 		 * is definitely no such name in this index but we might need to
725 		 * descend into the B+tree so we just break out of the loop.
726 		 */
727 		if (rc == -1)
728 			break;
729 		/* The names are not equal, continue the search. */
730 		if (rc)
731 			continue;
732 		/*
733 		 * Names match with case insensitive comparison, now try the
734 		 * case sensitive comparison, which is required for proper
735 		 * collation.
736 		 */
737 		rc = ntfs_collate_names(uname, uname_len,
738 				(ntfschar*)&ie->key.file_name.file_name,
739 				ie->key.file_name.file_name_length, 1,
740 				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
741 		if (rc == -1)
742 			break;
743 		if (rc)
744 			continue;
745 		/*
746 		 * Perfect match, this will never happen as the
747 		 * ntfs_are_names_equal() call will have gotten a match but we
748 		 * still treat it correctly.
749 		 */
750 		goto found_it;
751 	}
752 	/*
753 	 * We have finished with this index without success. Check for the
754 	 * presence of a child node.
755 	 */
756 	if (!(ie->flags & INDEX_ENTRY_NODE)) {
757 		/* No child node, return -ENOENT. */
758 		err = -ENOENT;
759 		goto err_out;
760 	} /* Child node present, descend into it. */
761 	/* Consistency check: Verify that an index allocation exists. */
762 	if (!NInoIndexAllocPresent(dir_ni)) {
763 		ntfs_error(sb, "No index allocation attribute but index entry "
764 				"requires one. Directory inode 0x%lx is "
765 				"corrupt or driver bug.", dir_ni->mft_no);
766 		goto err_out;
767 	}
768 	/* Get the starting vcn of the index_block holding the child node. */
769 	vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8);
770 	ia_mapping = VFS_I(dir_ni)->i_mapping;
771 	/*
772 	 * We are done with the index root and the mft record. Release them,
773 	 * otherwise we deadlock with ntfs_map_page().
774 	 */
775 	ntfs_attr_put_search_ctx(ctx);
776 	unmap_mft_record(dir_ni);
777 	m = NULL;
778 	ctx = NULL;
779 descend_into_child_node:
780 	/*
781 	 * Convert vcn to index into the index allocation attribute in units
782 	 * of PAGE_SIZE and map the page cache page, reading it from
783 	 * disk if necessary.
784 	 */
785 	page = ntfs_map_page(ia_mapping, vcn <<
786 			dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
787 	if (IS_ERR(page)) {
788 		ntfs_error(sb, "Failed to map directory index page, error %ld.",
789 				-PTR_ERR(page));
790 		err = PTR_ERR(page);
791 		goto err_out;
792 	}
793 	lock_page(page);
794 	kaddr = (u8*)page_address(page);
795 fast_descend_into_child_node:
796 	/* Get to the index allocation block. */
797 	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
798 			dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
799 	/* Bounds checks. */
800 	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
801 		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
802 				"inode 0x%lx or driver bug.", dir_ni->mft_no);
803 		goto unm_err_out;
804 	}
805 	/* Catch multi sector transfer fixup errors. */
806 	if (unlikely(!ntfs_is_indx_record(ia->magic))) {
807 		ntfs_error(sb, "Directory index record with vcn 0x%llx is "
808 				"corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
809 				(unsigned long long)vcn, dir_ni->mft_no);
810 		goto unm_err_out;
811 	}
812 	if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
813 		ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
814 				"different from expected VCN (0x%llx). "
815 				"Directory inode 0x%lx is corrupt or driver "
816 				"bug.", (unsigned long long)
817 				sle64_to_cpu(ia->index_block_vcn),
818 				(unsigned long long)vcn, dir_ni->mft_no);
819 		goto unm_err_out;
820 	}
821 	if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
822 			dir_ni->itype.index.block_size) {
823 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
824 				"0x%lx has a size (%u) differing from the "
825 				"directory specified size (%u). Directory "
826 				"inode is corrupt or driver bug.",
827 				(unsigned long long)vcn, dir_ni->mft_no,
828 				le32_to_cpu(ia->index.allocated_size) + 0x18,
829 				dir_ni->itype.index.block_size);
830 		goto unm_err_out;
831 	}
832 	index_end = (u8*)ia + dir_ni->itype.index.block_size;
833 	if (index_end > kaddr + PAGE_SIZE) {
834 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
835 				"0x%lx crosses page boundary. Impossible! "
836 				"Cannot access! This is probably a bug in the "
837 				"driver.", (unsigned long long)vcn,
838 				dir_ni->mft_no);
839 		goto unm_err_out;
840 	}
841 	index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
842 	if (index_end > (u8*)ia + dir_ni->itype.index.block_size) {
843 		ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
844 				"inode 0x%lx exceeds maximum size.",
845 				(unsigned long long)vcn, dir_ni->mft_no);
846 		goto unm_err_out;
847 	}
848 	/* The first index entry. */
849 	ie = (INDEX_ENTRY*)((u8*)&ia->index +
850 			le32_to_cpu(ia->index.entries_offset));
851 	/*
852 	 * Iterate similar to above big loop but applied to index buffer, thus
853 	 * loop until we exceed valid memory (corruption case) or until we
854 	 * reach the last entry.
855 	 */
856 	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
857 		/* Bounds check. */
858 		if ((u8*)ie < (u8*)ia || (u8*)ie +
859 				sizeof(INDEX_ENTRY_HEADER) > index_end ||
860 				(u8*)ie + le16_to_cpu(ie->key_length) >
861 				index_end) {
862 			ntfs_error(sb, "Index entry out of bounds in "
863 					"directory inode 0x%lx.",
864 					dir_ni->mft_no);
865 			goto unm_err_out;
866 		}
867 		/*
868 		 * The last entry cannot contain a name. It can however contain
869 		 * a pointer to a child node in the B+tree so we just break out.
870 		 */
871 		if (ie->flags & INDEX_ENTRY_END)
872 			break;
873 		/*
874 		 * If the current entry has a name type of POSIX, the name is
875 		 * case sensitive and not otherwise. This has the effect of us
876 		 * not being able to access any POSIX file names which collate
877 		 * after the non-POSIX one when they only differ in case, but
878 		 * anyone doing screwy stuff like that deserves to burn in
879 		 * hell... Doing that kind of stuff on NT4 actually causes
880 		 * corruption on the partition even when using SP6a and Linux
881 		 * is not involved at all.
882 		 */
883 		ic = ie->key.file_name.file_name_type ? IGNORE_CASE :
884 				CASE_SENSITIVE;
885 		/*
886 		 * If the names match perfectly, we are done and return the
887 		 * mft reference of the inode (i.e. the inode number together
888 		 * with the sequence number for consistency checking. We
889 		 * convert it to cpu format before returning.
890 		 */
891 		if (ntfs_are_names_equal(uname, uname_len,
892 				(ntfschar*)&ie->key.file_name.file_name,
893 				ie->key.file_name.file_name_length, ic,
894 				vol->upcase, vol->upcase_len)) {
895 found_it2:
896 			mref = le64_to_cpu(ie->data.dir.indexed_file);
897 			unlock_page(page);
898 			ntfs_unmap_page(page);
899 			return mref;
900 		}
901 		/*
902 		 * Not a perfect match, need to do full blown collation so we
903 		 * know which way in the B+tree we have to go.
904 		 */
905 		rc = ntfs_collate_names(uname, uname_len,
906 				(ntfschar*)&ie->key.file_name.file_name,
907 				ie->key.file_name.file_name_length, 1,
908 				IGNORE_CASE, vol->upcase, vol->upcase_len);
909 		/*
910 		 * If uname collates before the name of the current entry, there
911 		 * is definitely no such name in this index but we might need to
912 		 * descend into the B+tree so we just break out of the loop.
913 		 */
914 		if (rc == -1)
915 			break;
916 		/* The names are not equal, continue the search. */
917 		if (rc)
918 			continue;
919 		/*
920 		 * Names match with case insensitive comparison, now try the
921 		 * case sensitive comparison, which is required for proper
922 		 * collation.
923 		 */
924 		rc = ntfs_collate_names(uname, uname_len,
925 				(ntfschar*)&ie->key.file_name.file_name,
926 				ie->key.file_name.file_name_length, 1,
927 				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
928 		if (rc == -1)
929 			break;
930 		if (rc)
931 			continue;
932 		/*
933 		 * Perfect match, this will never happen as the
934 		 * ntfs_are_names_equal() call will have gotten a match but we
935 		 * still treat it correctly.
936 		 */
937 		goto found_it2;
938 	}
939 	/*
940 	 * We have finished with this index buffer without success. Check for
941 	 * the presence of a child node.
942 	 */
943 	if (ie->flags & INDEX_ENTRY_NODE) {
944 		if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
945 			ntfs_error(sb, "Index entry with child node found in "
946 					"a leaf node in directory inode 0x%lx.",
947 					dir_ni->mft_no);
948 			goto unm_err_out;
949 		}
950 		/* Child node present, descend into it. */
951 		old_vcn = vcn;
952 		vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8);
953 		if (vcn >= 0) {
954 			/* If vcn is in the same page cache page as old_vcn we
955 			 * recycle the mapped page. */
956 			if (old_vcn << vol->cluster_size_bits >>
957 					PAGE_SHIFT == vcn <<
958 					vol->cluster_size_bits >>
959 					PAGE_SHIFT)
960 				goto fast_descend_into_child_node;
961 			unlock_page(page);
962 			ntfs_unmap_page(page);
963 			goto descend_into_child_node;
964 		}
965 		ntfs_error(sb, "Negative child node vcn in directory inode "
966 				"0x%lx.", dir_ni->mft_no);
967 		goto unm_err_out;
968 	}
969 	/* No child node, return -ENOENT. */
970 	ntfs_debug("Entry not found.");
971 	err = -ENOENT;
972 unm_err_out:
973 	unlock_page(page);
974 	ntfs_unmap_page(page);
975 err_out:
976 	if (!err)
977 		err = -EIO;
978 	if (ctx)
979 		ntfs_attr_put_search_ctx(ctx);
980 	if (m)
981 		unmap_mft_record(dir_ni);
982 	return ERR_MREF(err);
983 dir_err_out:
984 	ntfs_error(sb, "Corrupt directory. Aborting lookup.");
985 	goto err_out;
986 }
987 
988 #endif
989 
990 /**
991  * ntfs_filldir - ntfs specific filldir method
992  * @vol:	current ntfs volume
993  * @ndir:	ntfs inode of current directory
994  * @ia_page:	page in which the index allocation buffer @ie is in resides
995  * @ie:		current index entry
996  * @name:	buffer to use for the converted name
997  * @actor:	what to feed the entries to
998  *
999  * Convert the Unicode @name to the loaded NLS and pass it to the @filldir
1000  * callback.
1001  *
1002  * If @ia_page is not NULL it is the locked page containing the index
1003  * allocation block containing the index entry @ie.
1004  *
1005  * Note, we drop (and then reacquire) the page lock on @ia_page across the
1006  * @filldir() call otherwise we would deadlock with NFSd when it calls ->lookup
1007  * since ntfs_lookup() will lock the same page.  As an optimization, we do not
1008  * retake the lock if we are returning a non-zero value as ntfs_readdir()
1009  * would need to drop the lock immediately anyway.
1010  */
1011 static inline int ntfs_filldir(ntfs_volume *vol,
1012 		ntfs_inode *ndir, struct page *ia_page, INDEX_ENTRY *ie,
1013 		u8 *name, struct dir_context *actor)
1014 {
1015 	unsigned long mref;
1016 	int name_len;
1017 	unsigned dt_type;
1018 	FILE_NAME_TYPE_FLAGS name_type;
1019 
1020 	name_type = ie->key.file_name.file_name_type;
1021 	if (name_type == FILE_NAME_DOS) {
1022 		ntfs_debug("Skipping DOS name space entry.");
1023 		return 0;
1024 	}
1025 	if (MREF_LE(ie->data.dir.indexed_file) == FILE_root) {
1026 		ntfs_debug("Skipping root directory self reference entry.");
1027 		return 0;
1028 	}
1029 	if (MREF_LE(ie->data.dir.indexed_file) < FILE_first_user &&
1030 			!NVolShowSystemFiles(vol)) {
1031 		ntfs_debug("Skipping system file.");
1032 		return 0;
1033 	}
1034 	name_len = ntfs_ucstonls(vol, (ntfschar*)&ie->key.file_name.file_name,
1035 			ie->key.file_name.file_name_length, &name,
1036 			NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
1037 	if (name_len <= 0) {
1038 		ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
1039 				(long long)MREF_LE(ie->data.dir.indexed_file));
1040 		return 0;
1041 	}
1042 	if (ie->key.file_name.file_attributes &
1043 			FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT)
1044 		dt_type = DT_DIR;
1045 	else
1046 		dt_type = DT_REG;
1047 	mref = MREF_LE(ie->data.dir.indexed_file);
1048 	/*
1049 	 * Drop the page lock otherwise we deadlock with NFS when it calls
1050 	 * ->lookup since ntfs_lookup() will lock the same page.
1051 	 */
1052 	if (ia_page)
1053 		unlock_page(ia_page);
1054 	ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode "
1055 			"0x%lx, DT_%s.", name, name_len, actor->pos, mref,
1056 			dt_type == DT_DIR ? "DIR" : "REG");
1057 	if (!dir_emit(actor, name, name_len, mref, dt_type))
1058 		return 1;
1059 	/* Relock the page but not if we are aborting ->readdir. */
1060 	if (ia_page)
1061 		lock_page(ia_page);
1062 	return 0;
1063 }
1064 
1065 /*
1066  * We use the same basic approach as the old NTFS driver, i.e. we parse the
1067  * index root entries and then the index allocation entries that are marked
1068  * as in use in the index bitmap.
1069  *
1070  * While this will return the names in random order this doesn't matter for
1071  * ->readdir but OTOH results in a faster ->readdir.
1072  *
1073  * VFS calls ->readdir without BKL but with i_mutex held. This protects the VFS
1074  * parts (e.g. ->f_pos and ->i_size, and it also protects against directory
1075  * modifications).
1076  *
1077  * Locking:  - Caller must hold i_mutex on the directory.
1078  *	     - Each page cache page in the index allocation mapping must be
1079  *	       locked whilst being accessed otherwise we may find a corrupt
1080  *	       page due to it being under ->writepage at the moment which
1081  *	       applies the mst protection fixups before writing out and then
1082  *	       removes them again after the write is complete after which it
1083  *	       unlocks the page.
1084  */
1085 static int ntfs_readdir(struct file *file, struct dir_context *actor)
1086 {
1087 	s64 ia_pos, ia_start, prev_ia_pos, bmp_pos;
1088 	loff_t i_size;
1089 	struct inode *bmp_vi, *vdir = file_inode(file);
1090 	struct super_block *sb = vdir->i_sb;
1091 	ntfs_inode *ndir = NTFS_I(vdir);
1092 	ntfs_volume *vol = NTFS_SB(sb);
1093 	MFT_RECORD *m;
1094 	INDEX_ROOT *ir = NULL;
1095 	INDEX_ENTRY *ie;
1096 	INDEX_ALLOCATION *ia;
1097 	u8 *name = NULL;
1098 	int rc, err, ir_pos, cur_bmp_pos;
1099 	struct address_space *ia_mapping, *bmp_mapping;
1100 	struct page *bmp_page = NULL, *ia_page = NULL;
1101 	u8 *kaddr, *bmp, *index_end;
1102 	ntfs_attr_search_ctx *ctx;
1103 
1104 	ntfs_debug("Entering for inode 0x%lx, fpos 0x%llx.",
1105 			vdir->i_ino, actor->pos);
1106 	rc = err = 0;
1107 	/* Are we at end of dir yet? */
1108 	i_size = i_size_read(vdir);
1109 	if (actor->pos >= i_size + vol->mft_record_size)
1110 		return 0;
1111 	/* Emulate . and .. for all directories. */
1112 	if (!dir_emit_dots(file, actor))
1113 		return 0;
1114 	m = NULL;
1115 	ctx = NULL;
1116 	/*
1117 	 * Allocate a buffer to store the current name being processed
1118 	 * converted to format determined by current NLS.
1119 	 */
1120 	name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS);
1121 	if (unlikely(!name)) {
1122 		err = -ENOMEM;
1123 		goto err_out;
1124 	}
1125 	/* Are we jumping straight into the index allocation attribute? */
1126 	if (actor->pos >= vol->mft_record_size)
1127 		goto skip_index_root;
1128 	/* Get hold of the mft record for the directory. */
1129 	m = map_mft_record(ndir);
1130 	if (IS_ERR(m)) {
1131 		err = PTR_ERR(m);
1132 		m = NULL;
1133 		goto err_out;
1134 	}
1135 	ctx = ntfs_attr_get_search_ctx(ndir, m);
1136 	if (unlikely(!ctx)) {
1137 		err = -ENOMEM;
1138 		goto err_out;
1139 	}
1140 	/* Get the offset into the index root attribute. */
1141 	ir_pos = (s64)actor->pos;
1142 	/* Find the index root attribute in the mft record. */
1143 	err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
1144 			0, ctx);
1145 	if (unlikely(err)) {
1146 		ntfs_error(sb, "Index root attribute missing in directory "
1147 				"inode 0x%lx.", vdir->i_ino);
1148 		goto err_out;
1149 	}
1150 	/*
1151 	 * Copy the index root attribute value to a buffer so that we can put
1152 	 * the search context and unmap the mft record before calling the
1153 	 * filldir() callback.  We need to do this because of NFSd which calls
1154 	 * ->lookup() from its filldir callback() and this causes NTFS to
1155 	 * deadlock as ntfs_lookup() maps the mft record of the directory and
1156 	 * we have got it mapped here already.  The only solution is for us to
1157 	 * unmap the mft record here so that a call to ntfs_lookup() is able to
1158 	 * map the mft record without deadlocking.
1159 	 */
1160 	rc = le32_to_cpu(ctx->attr->data.resident.value_length);
1161 	ir = kmalloc(rc, GFP_NOFS);
1162 	if (unlikely(!ir)) {
1163 		err = -ENOMEM;
1164 		goto err_out;
1165 	}
1166 	/* Copy the index root value (it has been verified in read_inode). */
1167 	memcpy(ir, (u8*)ctx->attr +
1168 			le16_to_cpu(ctx->attr->data.resident.value_offset), rc);
1169 	ntfs_attr_put_search_ctx(ctx);
1170 	unmap_mft_record(ndir);
1171 	ctx = NULL;
1172 	m = NULL;
1173 	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
1174 	/* The first index entry. */
1175 	ie = (INDEX_ENTRY*)((u8*)&ir->index +
1176 			le32_to_cpu(ir->index.entries_offset));
1177 	/*
1178 	 * Loop until we exceed valid memory (corruption case) or until we
1179 	 * reach the last entry or until filldir tells us it has had enough
1180 	 * or signals an error (both covered by the rc test).
1181 	 */
1182 	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
1183 		ntfs_debug("In index root, offset 0x%zx.", (u8*)ie - (u8*)ir);
1184 		/* Bounds checks. */
1185 		if (unlikely((u8*)ie < (u8*)ir || (u8*)ie +
1186 				sizeof(INDEX_ENTRY_HEADER) > index_end ||
1187 				(u8*)ie + le16_to_cpu(ie->key_length) >
1188 				index_end))
1189 			goto err_out;
1190 		/* The last entry cannot contain a name. */
1191 		if (ie->flags & INDEX_ENTRY_END)
1192 			break;
1193 		/* Skip index root entry if continuing previous readdir. */
1194 		if (ir_pos > (u8*)ie - (u8*)ir)
1195 			continue;
1196 		/* Advance the position even if going to skip the entry. */
1197 		actor->pos = (u8*)ie - (u8*)ir;
1198 		/* Submit the name to the filldir callback. */
1199 		rc = ntfs_filldir(vol, ndir, NULL, ie, name, actor);
1200 		if (rc) {
1201 			kfree(ir);
1202 			goto abort;
1203 		}
1204 	}
1205 	/* We are done with the index root and can free the buffer. */
1206 	kfree(ir);
1207 	ir = NULL;
1208 	/* If there is no index allocation attribute we are finished. */
1209 	if (!NInoIndexAllocPresent(ndir))
1210 		goto EOD;
1211 	/* Advance fpos to the beginning of the index allocation. */
1212 	actor->pos = vol->mft_record_size;
1213 skip_index_root:
1214 	kaddr = NULL;
1215 	prev_ia_pos = -1LL;
1216 	/* Get the offset into the index allocation attribute. */
1217 	ia_pos = (s64)actor->pos - vol->mft_record_size;
1218 	ia_mapping = vdir->i_mapping;
1219 	ntfs_debug("Inode 0x%lx, getting index bitmap.", vdir->i_ino);
1220 	bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4);
1221 	if (IS_ERR(bmp_vi)) {
1222 		ntfs_error(sb, "Failed to get bitmap attribute.");
1223 		err = PTR_ERR(bmp_vi);
1224 		goto err_out;
1225 	}
1226 	bmp_mapping = bmp_vi->i_mapping;
1227 	/* Get the starting bitmap bit position and sanity check it. */
1228 	bmp_pos = ia_pos >> ndir->itype.index.block_size_bits;
1229 	if (unlikely(bmp_pos >> 3 >= i_size_read(bmp_vi))) {
1230 		ntfs_error(sb, "Current index allocation position exceeds "
1231 				"index bitmap size.");
1232 		goto iput_err_out;
1233 	}
1234 	/* Get the starting bit position in the current bitmap page. */
1235 	cur_bmp_pos = bmp_pos & ((PAGE_SIZE * 8) - 1);
1236 	bmp_pos &= ~(u64)((PAGE_SIZE * 8) - 1);
1237 get_next_bmp_page:
1238 	ntfs_debug("Reading bitmap with page index 0x%llx, bit ofs 0x%llx",
1239 			(unsigned long long)bmp_pos >> (3 + PAGE_SHIFT),
1240 			(unsigned long long)bmp_pos &
1241 			(unsigned long long)((PAGE_SIZE * 8) - 1));
1242 	bmp_page = ntfs_map_page(bmp_mapping,
1243 			bmp_pos >> (3 + PAGE_SHIFT));
1244 	if (IS_ERR(bmp_page)) {
1245 		ntfs_error(sb, "Reading index bitmap failed.");
1246 		err = PTR_ERR(bmp_page);
1247 		bmp_page = NULL;
1248 		goto iput_err_out;
1249 	}
1250 	bmp = (u8*)page_address(bmp_page);
1251 	/* Find next index block in use. */
1252 	while (!(bmp[cur_bmp_pos >> 3] & (1 << (cur_bmp_pos & 7)))) {
1253 find_next_index_buffer:
1254 		cur_bmp_pos++;
1255 		/*
1256 		 * If we have reached the end of the bitmap page, get the next
1257 		 * page, and put away the old one.
1258 		 */
1259 		if (unlikely((cur_bmp_pos >> 3) >= PAGE_SIZE)) {
1260 			ntfs_unmap_page(bmp_page);
1261 			bmp_pos += PAGE_SIZE * 8;
1262 			cur_bmp_pos = 0;
1263 			goto get_next_bmp_page;
1264 		}
1265 		/* If we have reached the end of the bitmap, we are done. */
1266 		if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= i_size))
1267 			goto unm_EOD;
1268 		ia_pos = (bmp_pos + cur_bmp_pos) <<
1269 				ndir->itype.index.block_size_bits;
1270 	}
1271 	ntfs_debug("Handling index buffer 0x%llx.",
1272 			(unsigned long long)bmp_pos + cur_bmp_pos);
1273 	/* If the current index buffer is in the same page we reuse the page. */
1274 	if ((prev_ia_pos & (s64)PAGE_MASK) !=
1275 			(ia_pos & (s64)PAGE_MASK)) {
1276 		prev_ia_pos = ia_pos;
1277 		if (likely(ia_page != NULL)) {
1278 			unlock_page(ia_page);
1279 			ntfs_unmap_page(ia_page);
1280 		}
1281 		/*
1282 		 * Map the page cache page containing the current ia_pos,
1283 		 * reading it from disk if necessary.
1284 		 */
1285 		ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_SHIFT);
1286 		if (IS_ERR(ia_page)) {
1287 			ntfs_error(sb, "Reading index allocation data failed.");
1288 			err = PTR_ERR(ia_page);
1289 			ia_page = NULL;
1290 			goto err_out;
1291 		}
1292 		lock_page(ia_page);
1293 		kaddr = (u8*)page_address(ia_page);
1294 	}
1295 	/* Get the current index buffer. */
1296 	ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_MASK &
1297 					  ~(s64)(ndir->itype.index.block_size - 1)));
1298 	/* Bounds checks. */
1299 	if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE)) {
1300 		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
1301 				"inode 0x%lx or driver bug.", vdir->i_ino);
1302 		goto err_out;
1303 	}
1304 	/* Catch multi sector transfer fixup errors. */
1305 	if (unlikely(!ntfs_is_indx_record(ia->magic))) {
1306 		ntfs_error(sb, "Directory index record with vcn 0x%llx is "
1307 				"corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
1308 				(unsigned long long)ia_pos >>
1309 				ndir->itype.index.vcn_size_bits, vdir->i_ino);
1310 		goto err_out;
1311 	}
1312 	if (unlikely(sle64_to_cpu(ia->index_block_vcn) != (ia_pos &
1313 			~(s64)(ndir->itype.index.block_size - 1)) >>
1314 			ndir->itype.index.vcn_size_bits)) {
1315 		ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
1316 				"different from expected VCN (0x%llx). "
1317 				"Directory inode 0x%lx is corrupt or driver "
1318 				"bug. ", (unsigned long long)
1319 				sle64_to_cpu(ia->index_block_vcn),
1320 				(unsigned long long)ia_pos >>
1321 				ndir->itype.index.vcn_size_bits, vdir->i_ino);
1322 		goto err_out;
1323 	}
1324 	if (unlikely(le32_to_cpu(ia->index.allocated_size) + 0x18 !=
1325 			ndir->itype.index.block_size)) {
1326 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
1327 				"0x%lx has a size (%u) differing from the "
1328 				"directory specified size (%u). Directory "
1329 				"inode is corrupt or driver bug.",
1330 				(unsigned long long)ia_pos >>
1331 				ndir->itype.index.vcn_size_bits, vdir->i_ino,
1332 				le32_to_cpu(ia->index.allocated_size) + 0x18,
1333 				ndir->itype.index.block_size);
1334 		goto err_out;
1335 	}
1336 	index_end = (u8*)ia + ndir->itype.index.block_size;
1337 	if (unlikely(index_end > kaddr + PAGE_SIZE)) {
1338 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
1339 				"0x%lx crosses page boundary. Impossible! "
1340 				"Cannot access! This is probably a bug in the "
1341 				"driver.", (unsigned long long)ia_pos >>
1342 				ndir->itype.index.vcn_size_bits, vdir->i_ino);
1343 		goto err_out;
1344 	}
1345 	ia_start = ia_pos & ~(s64)(ndir->itype.index.block_size - 1);
1346 	index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
1347 	if (unlikely(index_end > (u8*)ia + ndir->itype.index.block_size)) {
1348 		ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
1349 				"inode 0x%lx exceeds maximum size.",
1350 				(unsigned long long)ia_pos >>
1351 				ndir->itype.index.vcn_size_bits, vdir->i_ino);
1352 		goto err_out;
1353 	}
1354 	/* The first index entry in this index buffer. */
1355 	ie = (INDEX_ENTRY*)((u8*)&ia->index +
1356 			le32_to_cpu(ia->index.entries_offset));
1357 	/*
1358 	 * Loop until we exceed valid memory (corruption case) or until we
1359 	 * reach the last entry or until filldir tells us it has had enough
1360 	 * or signals an error (both covered by the rc test).
1361 	 */
1362 	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
1363 		ntfs_debug("In index allocation, offset 0x%llx.",
1364 				(unsigned long long)ia_start +
1365 				(unsigned long long)((u8*)ie - (u8*)ia));
1366 		/* Bounds checks. */
1367 		if (unlikely((u8*)ie < (u8*)ia || (u8*)ie +
1368 				sizeof(INDEX_ENTRY_HEADER) > index_end ||
1369 				(u8*)ie + le16_to_cpu(ie->key_length) >
1370 				index_end))
1371 			goto err_out;
1372 		/* The last entry cannot contain a name. */
1373 		if (ie->flags & INDEX_ENTRY_END)
1374 			break;
1375 		/* Skip index block entry if continuing previous readdir. */
1376 		if (ia_pos - ia_start > (u8*)ie - (u8*)ia)
1377 			continue;
1378 		/* Advance the position even if going to skip the entry. */
1379 		actor->pos = (u8*)ie - (u8*)ia +
1380 				(sle64_to_cpu(ia->index_block_vcn) <<
1381 				ndir->itype.index.vcn_size_bits) +
1382 				vol->mft_record_size;
1383 		/*
1384 		 * Submit the name to the @filldir callback.  Note,
1385 		 * ntfs_filldir() drops the lock on @ia_page but it retakes it
1386 		 * before returning, unless a non-zero value is returned in
1387 		 * which case the page is left unlocked.
1388 		 */
1389 		rc = ntfs_filldir(vol, ndir, ia_page, ie, name, actor);
1390 		if (rc) {
1391 			/* @ia_page is already unlocked in this case. */
1392 			ntfs_unmap_page(ia_page);
1393 			ntfs_unmap_page(bmp_page);
1394 			iput(bmp_vi);
1395 			goto abort;
1396 		}
1397 	}
1398 	goto find_next_index_buffer;
1399 unm_EOD:
1400 	if (ia_page) {
1401 		unlock_page(ia_page);
1402 		ntfs_unmap_page(ia_page);
1403 	}
1404 	ntfs_unmap_page(bmp_page);
1405 	iput(bmp_vi);
1406 EOD:
1407 	/* We are finished, set fpos to EOD. */
1408 	actor->pos = i_size + vol->mft_record_size;
1409 abort:
1410 	kfree(name);
1411 	return 0;
1412 err_out:
1413 	if (bmp_page) {
1414 		ntfs_unmap_page(bmp_page);
1415 iput_err_out:
1416 		iput(bmp_vi);
1417 	}
1418 	if (ia_page) {
1419 		unlock_page(ia_page);
1420 		ntfs_unmap_page(ia_page);
1421 	}
1422 	kfree(ir);
1423 	kfree(name);
1424 	if (ctx)
1425 		ntfs_attr_put_search_ctx(ctx);
1426 	if (m)
1427 		unmap_mft_record(ndir);
1428 	if (!err)
1429 		err = -EIO;
1430 	ntfs_debug("Failed. Returning error code %i.", -err);
1431 	return err;
1432 }
1433 
1434 /**
1435  * ntfs_dir_open - called when an inode is about to be opened
1436  * @vi:		inode to be opened
1437  * @filp:	file structure describing the inode
1438  *
1439  * Limit directory size to the page cache limit on architectures where unsigned
1440  * long is 32-bits. This is the most we can do for now without overflowing the
1441  * page cache page index. Doing it this way means we don't run into problems
1442  * because of existing too large directories. It would be better to allow the
1443  * user to read the accessible part of the directory but I doubt very much
1444  * anyone is going to hit this check on a 32-bit architecture, so there is no
1445  * point in adding the extra complexity required to support this.
1446  *
1447  * On 64-bit architectures, the check is hopefully optimized away by the
1448  * compiler.
1449  */
1450 static int ntfs_dir_open(struct inode *vi, struct file *filp)
1451 {
1452 	if (sizeof(unsigned long) < 8) {
1453 		if (i_size_read(vi) > MAX_LFS_FILESIZE)
1454 			return -EFBIG;
1455 	}
1456 	return 0;
1457 }
1458 
1459 #ifdef NTFS_RW
1460 
1461 /**
1462  * ntfs_dir_fsync - sync a directory to disk
1463  * @filp:	directory to be synced
1464  * @dentry:	dentry describing the directory to sync
1465  * @datasync:	if non-zero only flush user data and not metadata
1466  *
1467  * Data integrity sync of a directory to disk.  Used for fsync, fdatasync, and
1468  * msync system calls.  This function is based on file.c::ntfs_file_fsync().
1469  *
1470  * Write the mft record and all associated extent mft records as well as the
1471  * $INDEX_ALLOCATION and $BITMAP attributes and then sync the block device.
1472  *
1473  * If @datasync is true, we do not wait on the inode(s) to be written out
1474  * but we always wait on the page cache pages to be written out.
1475  *
1476  * Note: In the past @filp could be NULL so we ignore it as we don't need it
1477  * anyway.
1478  *
1479  * Locking: Caller must hold i_mutex on the inode.
1480  *
1481  * TODO: We should probably also write all attribute/index inodes associated
1482  * with this inode but since we have no simple way of getting to them we ignore
1483  * this problem for now.  We do write the $BITMAP attribute if it is present
1484  * which is the important one for a directory so things are not too bad.
1485  */
1486 static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
1487 			  int datasync)
1488 {
1489 	struct inode *bmp_vi, *vi = filp->f_mapping->host;
1490 	int err, ret;
1491 	ntfs_attr na;
1492 
1493 	ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
1494 
1495 	err = file_write_and_wait_range(filp, start, end);
1496 	if (err)
1497 		return err;
1498 	inode_lock(vi);
1499 
1500 	BUG_ON(!S_ISDIR(vi->i_mode));
1501 	/* If the bitmap attribute inode is in memory sync it, too. */
1502 	na.mft_no = vi->i_ino;
1503 	na.type = AT_BITMAP;
1504 	na.name = I30;
1505 	na.name_len = 4;
1506 	bmp_vi = ilookup5(vi->i_sb, vi->i_ino, (test_t)ntfs_test_inode, &na);
1507 	if (bmp_vi) {
1508  		write_inode_now(bmp_vi, !datasync);
1509 		iput(bmp_vi);
1510 	}
1511 	ret = __ntfs_write_inode(vi, 1);
1512 	write_inode_now(vi, !datasync);
1513 	err = sync_blockdev(vi->i_sb->s_bdev);
1514 	if (unlikely(err && !ret))
1515 		ret = err;
1516 	if (likely(!ret))
1517 		ntfs_debug("Done.");
1518 	else
1519 		ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx.  Error "
1520 				"%u.", datasync ? "data" : "", vi->i_ino, -ret);
1521 	inode_unlock(vi);
1522 	return ret;
1523 }
1524 
1525 #endif /* NTFS_RW */
1526 
1527 const struct file_operations ntfs_dir_ops = {
1528 	.llseek		= generic_file_llseek,	/* Seek inside directory. */
1529 	.read		= generic_read_dir,	/* Return -EISDIR. */
1530 	.iterate	= ntfs_readdir,		/* Read directory contents. */
1531 #ifdef NTFS_RW
1532 	.fsync		= ntfs_dir_fsync,	/* Sync a directory to disk. */
1533 #endif /* NTFS_RW */
1534 	/*.ioctl	= ,*/			/* Perform function on the
1535 						   mounted filesystem. */
1536 	.open		= ntfs_dir_open,	/* Open directory. */
1537 };
1538