xref: /openbmc/linux/fs/ntfs/index.c (revision 367b8112)
1 /*
2  * index.c - NTFS kernel index handling.  Part of the Linux-NTFS project.
3  *
4  * Copyright (c) 2004-2005 Anton Altaparmakov
5  *
6  * This program/include file is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as published
8  * by the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program/include file is distributed in the hope that it will be
12  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
13  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program (in the main directory of the Linux-NTFS
18  * distribution in the file COPYING); if not, write to the Free Software
19  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  */
21 
22 #include "aops.h"
23 #include "collate.h"
24 #include "debug.h"
25 #include "index.h"
26 #include "ntfs.h"
27 
28 /**
29  * ntfs_index_ctx_get - allocate and initialize a new index context
30  * @idx_ni:	ntfs index inode with which to initialize the context
31  *
32  * Allocate a new index context, initialize it with @idx_ni and return it.
33  * Return NULL if allocation failed.
34  *
35  * Locking:  Caller must hold i_mutex on the index inode.
36  */
37 ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
38 {
39 	ntfs_index_context *ictx;
40 
41 	ictx = kmem_cache_alloc(ntfs_index_ctx_cache, GFP_NOFS);
42 	if (ictx)
43 		*ictx = (ntfs_index_context){ .idx_ni = idx_ni };
44 	return ictx;
45 }
46 
47 /**
48  * ntfs_index_ctx_put - release an index context
49  * @ictx:	index context to free
50  *
51  * Release the index context @ictx, releasing all associated resources.
52  *
53  * Locking:  Caller must hold i_mutex on the index inode.
54  */
55 void ntfs_index_ctx_put(ntfs_index_context *ictx)
56 {
57 	if (ictx->entry) {
58 		if (ictx->is_in_root) {
59 			if (ictx->actx)
60 				ntfs_attr_put_search_ctx(ictx->actx);
61 			if (ictx->base_ni)
62 				unmap_mft_record(ictx->base_ni);
63 		} else {
64 			struct page *page = ictx->page;
65 			if (page) {
66 				BUG_ON(!PageLocked(page));
67 				unlock_page(page);
68 				ntfs_unmap_page(page);
69 			}
70 		}
71 	}
72 	kmem_cache_free(ntfs_index_ctx_cache, ictx);
73 	return;
74 }
75 
76 /**
77  * ntfs_index_lookup - find a key in an index and return its index entry
78  * @key:	[IN] key for which to search in the index
79  * @key_len:	[IN] length of @key in bytes
80  * @ictx:	[IN/OUT] context describing the index and the returned entry
81  *
82  * Before calling ntfs_index_lookup(), @ictx must have been obtained from a
83  * call to ntfs_index_ctx_get().
84  *
85  * Look for the @key in the index specified by the index lookup context @ictx.
86  * ntfs_index_lookup() walks the contents of the index looking for the @key.
87  *
88  * If the @key is found in the index, 0 is returned and @ictx is setup to
89  * describe the index entry containing the matching @key.  @ictx->entry is the
90  * index entry and @ictx->data and @ictx->data_len are the index entry data and
91  * its length in bytes, respectively.
92  *
93  * If the @key is not found in the index, -ENOENT is returned and @ictx is
94  * setup to describe the index entry whose key collates immediately after the
95  * search @key, i.e. this is the position in the index at which an index entry
96  * with a key of @key would need to be inserted.
97  *
98  * If an error occurs return the negative error code and @ictx is left
99  * untouched.
100  *
101  * When finished with the entry and its data, call ntfs_index_ctx_put() to free
102  * the context and other associated resources.
103  *
104  * If the index entry was modified, call flush_dcache_index_entry_page()
105  * immediately after the modification and either ntfs_index_entry_mark_dirty()
106  * or ntfs_index_entry_write() before the call to ntfs_index_ctx_put() to
107  * ensure that the changes are written to disk.
108  *
109  * Locking:  - Caller must hold i_mutex on the index inode.
110  *	     - Each page cache page in the index allocation mapping must be
111  *	       locked whilst being accessed otherwise we may find a corrupt
112  *	       page due to it being under ->writepage at the moment which
113  *	       applies the mst protection fixups before writing out and then
114  *	       removes them again after the write is complete after which it
115  *	       unlocks the page.
116  */
117 int ntfs_index_lookup(const void *key, const int key_len,
118 		ntfs_index_context *ictx)
119 {
120 	VCN vcn, old_vcn;
121 	ntfs_inode *idx_ni = ictx->idx_ni;
122 	ntfs_volume *vol = idx_ni->vol;
123 	struct super_block *sb = vol->sb;
124 	ntfs_inode *base_ni = idx_ni->ext.base_ntfs_ino;
125 	MFT_RECORD *m;
126 	INDEX_ROOT *ir;
127 	INDEX_ENTRY *ie;
128 	INDEX_ALLOCATION *ia;
129 	u8 *index_end, *kaddr;
130 	ntfs_attr_search_ctx *actx;
131 	struct address_space *ia_mapping;
132 	struct page *page;
133 	int rc, err = 0;
134 
135 	ntfs_debug("Entering.");
136 	BUG_ON(!NInoAttr(idx_ni));
137 	BUG_ON(idx_ni->type != AT_INDEX_ALLOCATION);
138 	BUG_ON(idx_ni->nr_extents != -1);
139 	BUG_ON(!base_ni);
140 	BUG_ON(!key);
141 	BUG_ON(key_len <= 0);
142 	if (!ntfs_is_collation_rule_supported(
143 			idx_ni->itype.index.collation_rule)) {
144 		ntfs_error(sb, "Index uses unsupported collation rule 0x%x.  "
145 				"Aborting lookup.", le32_to_cpu(
146 				idx_ni->itype.index.collation_rule));
147 		return -EOPNOTSUPP;
148 	}
149 	/* Get hold of the mft record for the index inode. */
150 	m = map_mft_record(base_ni);
151 	if (IS_ERR(m)) {
152 		ntfs_error(sb, "map_mft_record() failed with error code %ld.",
153 				-PTR_ERR(m));
154 		return PTR_ERR(m);
155 	}
156 	actx = ntfs_attr_get_search_ctx(base_ni, m);
157 	if (unlikely(!actx)) {
158 		err = -ENOMEM;
159 		goto err_out;
160 	}
161 	/* Find the index root attribute in the mft record. */
162 	err = ntfs_attr_lookup(AT_INDEX_ROOT, idx_ni->name, idx_ni->name_len,
163 			CASE_SENSITIVE, 0, NULL, 0, actx);
164 	if (unlikely(err)) {
165 		if (err == -ENOENT) {
166 			ntfs_error(sb, "Index root attribute missing in inode "
167 					"0x%lx.", idx_ni->mft_no);
168 			err = -EIO;
169 		}
170 		goto err_out;
171 	}
172 	/* Get to the index root value (it has been verified in read_inode). */
173 	ir = (INDEX_ROOT*)((u8*)actx->attr +
174 			le16_to_cpu(actx->attr->data.resident.value_offset));
175 	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
176 	/* The first index entry. */
177 	ie = (INDEX_ENTRY*)((u8*)&ir->index +
178 			le32_to_cpu(ir->index.entries_offset));
179 	/*
180 	 * Loop until we exceed valid memory (corruption case) or until we
181 	 * reach the last entry.
182 	 */
183 	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
184 		/* Bounds checks. */
185 		if ((u8*)ie < (u8*)actx->mrec || (u8*)ie +
186 				sizeof(INDEX_ENTRY_HEADER) > index_end ||
187 				(u8*)ie + le16_to_cpu(ie->length) > index_end)
188 			goto idx_err_out;
189 		/*
190 		 * The last entry cannot contain a key.  It can however contain
191 		 * a pointer to a child node in the B+tree so we just break out.
192 		 */
193 		if (ie->flags & INDEX_ENTRY_END)
194 			break;
195 		/* Further bounds checks. */
196 		if ((u32)sizeof(INDEX_ENTRY_HEADER) +
197 				le16_to_cpu(ie->key_length) >
198 				le16_to_cpu(ie->data.vi.data_offset) ||
199 				(u32)le16_to_cpu(ie->data.vi.data_offset) +
200 				le16_to_cpu(ie->data.vi.data_length) >
201 				le16_to_cpu(ie->length))
202 			goto idx_err_out;
203 		/* If the keys match perfectly, we setup @ictx and return 0. */
204 		if ((key_len == le16_to_cpu(ie->key_length)) && !memcmp(key,
205 				&ie->key, key_len)) {
206 ir_done:
207 			ictx->is_in_root = true;
208 			ictx->ir = ir;
209 			ictx->actx = actx;
210 			ictx->base_ni = base_ni;
211 			ictx->ia = NULL;
212 			ictx->page = NULL;
213 done:
214 			ictx->entry = ie;
215 			ictx->data = (u8*)ie +
216 					le16_to_cpu(ie->data.vi.data_offset);
217 			ictx->data_len = le16_to_cpu(ie->data.vi.data_length);
218 			ntfs_debug("Done.");
219 			return err;
220 		}
221 		/*
222 		 * Not a perfect match, need to do full blown collation so we
223 		 * know which way in the B+tree we have to go.
224 		 */
225 		rc = ntfs_collate(vol, idx_ni->itype.index.collation_rule, key,
226 				key_len, &ie->key, le16_to_cpu(ie->key_length));
227 		/*
228 		 * If @key collates before the key of the current entry, there
229 		 * is definitely no such key in this index but we might need to
230 		 * descend into the B+tree so we just break out of the loop.
231 		 */
232 		if (rc == -1)
233 			break;
234 		/*
235 		 * A match should never happen as the memcmp() call should have
236 		 * cought it, but we still treat it correctly.
237 		 */
238 		if (!rc)
239 			goto ir_done;
240 		/* The keys are not equal, continue the search. */
241 	}
242 	/*
243 	 * We have finished with this index without success.  Check for the
244 	 * presence of a child node and if not present setup @ictx and return
245 	 * -ENOENT.
246 	 */
247 	if (!(ie->flags & INDEX_ENTRY_NODE)) {
248 		ntfs_debug("Entry not found.");
249 		err = -ENOENT;
250 		goto ir_done;
251 	} /* Child node present, descend into it. */
252 	/* Consistency check: Verify that an index allocation exists. */
253 	if (!NInoIndexAllocPresent(idx_ni)) {
254 		ntfs_error(sb, "No index allocation attribute but index entry "
255 				"requires one.  Inode 0x%lx is corrupt or "
256 				"driver bug.", idx_ni->mft_no);
257 		goto err_out;
258 	}
259 	/* Get the starting vcn of the index_block holding the child node. */
260 	vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8));
261 	ia_mapping = VFS_I(idx_ni)->i_mapping;
262 	/*
263 	 * We are done with the index root and the mft record.  Release them,
264 	 * otherwise we deadlock with ntfs_map_page().
265 	 */
266 	ntfs_attr_put_search_ctx(actx);
267 	unmap_mft_record(base_ni);
268 	m = NULL;
269 	actx = NULL;
270 descend_into_child_node:
271 	/*
272 	 * Convert vcn to index into the index allocation attribute in units
273 	 * of PAGE_CACHE_SIZE and map the page cache page, reading it from
274 	 * disk if necessary.
275 	 */
276 	page = ntfs_map_page(ia_mapping, vcn <<
277 			idx_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT);
278 	if (IS_ERR(page)) {
279 		ntfs_error(sb, "Failed to map index page, error %ld.",
280 				-PTR_ERR(page));
281 		err = PTR_ERR(page);
282 		goto err_out;
283 	}
284 	lock_page(page);
285 	kaddr = (u8*)page_address(page);
286 fast_descend_into_child_node:
287 	/* Get to the index allocation block. */
288 	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
289 			idx_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK));
290 	/* Bounds checks. */
291 	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
292 		ntfs_error(sb, "Out of bounds check failed.  Corrupt inode "
293 				"0x%lx or driver bug.", idx_ni->mft_no);
294 		goto unm_err_out;
295 	}
296 	/* Catch multi sector transfer fixup errors. */
297 	if (unlikely(!ntfs_is_indx_record(ia->magic))) {
298 		ntfs_error(sb, "Index record with vcn 0x%llx is corrupt.  "
299 				"Corrupt inode 0x%lx.  Run chkdsk.",
300 				(long long)vcn, idx_ni->mft_no);
301 		goto unm_err_out;
302 	}
303 	if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
304 		ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
305 				"different from expected VCN (0x%llx).  Inode "
306 				"0x%lx is corrupt or driver bug.",
307 				(unsigned long long)
308 				sle64_to_cpu(ia->index_block_vcn),
309 				(unsigned long long)vcn, idx_ni->mft_no);
310 		goto unm_err_out;
311 	}
312 	if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
313 			idx_ni->itype.index.block_size) {
314 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of inode 0x%lx has "
315 				"a size (%u) differing from the index "
316 				"specified size (%u).  Inode is corrupt or "
317 				"driver bug.", (unsigned long long)vcn,
318 				idx_ni->mft_no,
319 				le32_to_cpu(ia->index.allocated_size) + 0x18,
320 				idx_ni->itype.index.block_size);
321 		goto unm_err_out;
322 	}
323 	index_end = (u8*)ia + idx_ni->itype.index.block_size;
324 	if (index_end > kaddr + PAGE_CACHE_SIZE) {
325 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of inode 0x%lx "
326 				"crosses page boundary.  Impossible!  Cannot "
327 				"access!  This is probably a bug in the "
328 				"driver.", (unsigned long long)vcn,
329 				idx_ni->mft_no);
330 		goto unm_err_out;
331 	}
332 	index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
333 	if (index_end > (u8*)ia + idx_ni->itype.index.block_size) {
334 		ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of inode "
335 				"0x%lx exceeds maximum size.",
336 				(unsigned long long)vcn, idx_ni->mft_no);
337 		goto unm_err_out;
338 	}
339 	/* The first index entry. */
340 	ie = (INDEX_ENTRY*)((u8*)&ia->index +
341 			le32_to_cpu(ia->index.entries_offset));
342 	/*
343 	 * Iterate similar to above big loop but applied to index buffer, thus
344 	 * loop until we exceed valid memory (corruption case) or until we
345 	 * reach the last entry.
346 	 */
347 	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
348 		/* Bounds checks. */
349 		if ((u8*)ie < (u8*)ia || (u8*)ie +
350 				sizeof(INDEX_ENTRY_HEADER) > index_end ||
351 				(u8*)ie + le16_to_cpu(ie->length) > index_end) {
352 			ntfs_error(sb, "Index entry out of bounds in inode "
353 					"0x%lx.", idx_ni->mft_no);
354 			goto unm_err_out;
355 		}
356 		/*
357 		 * The last entry cannot contain a key.  It can however contain
358 		 * a pointer to a child node in the B+tree so we just break out.
359 		 */
360 		if (ie->flags & INDEX_ENTRY_END)
361 			break;
362 		/* Further bounds checks. */
363 		if ((u32)sizeof(INDEX_ENTRY_HEADER) +
364 				le16_to_cpu(ie->key_length) >
365 				le16_to_cpu(ie->data.vi.data_offset) ||
366 				(u32)le16_to_cpu(ie->data.vi.data_offset) +
367 				le16_to_cpu(ie->data.vi.data_length) >
368 				le16_to_cpu(ie->length)) {
369 			ntfs_error(sb, "Index entry out of bounds in inode "
370 					"0x%lx.", idx_ni->mft_no);
371 			goto unm_err_out;
372 		}
373 		/* If the keys match perfectly, we setup @ictx and return 0. */
374 		if ((key_len == le16_to_cpu(ie->key_length)) && !memcmp(key,
375 				&ie->key, key_len)) {
376 ia_done:
377 			ictx->is_in_root = false;
378 			ictx->actx = NULL;
379 			ictx->base_ni = NULL;
380 			ictx->ia = ia;
381 			ictx->page = page;
382 			goto done;
383 		}
384 		/*
385 		 * Not a perfect match, need to do full blown collation so we
386 		 * know which way in the B+tree we have to go.
387 		 */
388 		rc = ntfs_collate(vol, idx_ni->itype.index.collation_rule, key,
389 				key_len, &ie->key, le16_to_cpu(ie->key_length));
390 		/*
391 		 * If @key collates before the key of the current entry, there
392 		 * is definitely no such key in this index but we might need to
393 		 * descend into the B+tree so we just break out of the loop.
394 		 */
395 		if (rc == -1)
396 			break;
397 		/*
398 		 * A match should never happen as the memcmp() call should have
399 		 * cought it, but we still treat it correctly.
400 		 */
401 		if (!rc)
402 			goto ia_done;
403 		/* The keys are not equal, continue the search. */
404 	}
405 	/*
406 	 * We have finished with this index buffer without success.  Check for
407 	 * the presence of a child node and if not present return -ENOENT.
408 	 */
409 	if (!(ie->flags & INDEX_ENTRY_NODE)) {
410 		ntfs_debug("Entry not found.");
411 		err = -ENOENT;
412 		goto ia_done;
413 	}
414 	if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
415 		ntfs_error(sb, "Index entry with child node found in a leaf "
416 				"node in inode 0x%lx.", idx_ni->mft_no);
417 		goto unm_err_out;
418 	}
419 	/* Child node present, descend into it. */
420 	old_vcn = vcn;
421 	vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8));
422 	if (vcn >= 0) {
423 		/*
424 		 * If vcn is in the same page cache page as old_vcn we recycle
425 		 * the mapped page.
426 		 */
427 		if (old_vcn << vol->cluster_size_bits >>
428 				PAGE_CACHE_SHIFT == vcn <<
429 				vol->cluster_size_bits >>
430 				PAGE_CACHE_SHIFT)
431 			goto fast_descend_into_child_node;
432 		unlock_page(page);
433 		ntfs_unmap_page(page);
434 		goto descend_into_child_node;
435 	}
436 	ntfs_error(sb, "Negative child node vcn in inode 0x%lx.",
437 			idx_ni->mft_no);
438 unm_err_out:
439 	unlock_page(page);
440 	ntfs_unmap_page(page);
441 err_out:
442 	if (!err)
443 		err = -EIO;
444 	if (actx)
445 		ntfs_attr_put_search_ctx(actx);
446 	if (m)
447 		unmap_mft_record(base_ni);
448 	return err;
449 idx_err_out:
450 	ntfs_error(sb, "Corrupt index.  Aborting lookup.");
451 	goto err_out;
452 }
453