xref: /openbmc/linux/fs/ntfs3/fsntfs.c (revision 25b892b5)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *
4  * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
5  *
6  */
7 
8 #include <linux/blkdev.h>
9 #include <linux/buffer_head.h>
10 #include <linux/fs.h>
11 #include <linux/nls.h>
12 
13 #include "debug.h"
14 #include "ntfs.h"
15 #include "ntfs_fs.h"
16 
17 // clang-format off
18 const struct cpu_str NAME_MFT = {
19 	4, 0, { '$', 'M', 'F', 'T' },
20 };
21 const struct cpu_str NAME_MIRROR = {
22 	8, 0, { '$', 'M', 'F', 'T', 'M', 'i', 'r', 'r' },
23 };
24 const struct cpu_str NAME_LOGFILE = {
25 	8, 0, { '$', 'L', 'o', 'g', 'F', 'i', 'l', 'e' },
26 };
27 const struct cpu_str NAME_VOLUME = {
28 	7, 0, { '$', 'V', 'o', 'l', 'u', 'm', 'e' },
29 };
30 const struct cpu_str NAME_ATTRDEF = {
31 	8, 0, { '$', 'A', 't', 't', 'r', 'D', 'e', 'f' },
32 };
33 const struct cpu_str NAME_ROOT = {
34 	1, 0, { '.' },
35 };
36 const struct cpu_str NAME_BITMAP = {
37 	7, 0, { '$', 'B', 'i', 't', 'm', 'a', 'p' },
38 };
39 const struct cpu_str NAME_BOOT = {
40 	5, 0, { '$', 'B', 'o', 'o', 't' },
41 };
42 const struct cpu_str NAME_BADCLUS = {
43 	8, 0, { '$', 'B', 'a', 'd', 'C', 'l', 'u', 's' },
44 };
45 const struct cpu_str NAME_QUOTA = {
46 	6, 0, { '$', 'Q', 'u', 'o', 't', 'a' },
47 };
48 const struct cpu_str NAME_SECURE = {
49 	7, 0, { '$', 'S', 'e', 'c', 'u', 'r', 'e' },
50 };
51 const struct cpu_str NAME_UPCASE = {
52 	7, 0, { '$', 'U', 'p', 'C', 'a', 's', 'e' },
53 };
54 const struct cpu_str NAME_EXTEND = {
55 	7, 0, { '$', 'E', 'x', 't', 'e', 'n', 'd' },
56 };
57 const struct cpu_str NAME_OBJID = {
58 	6, 0, { '$', 'O', 'b', 'j', 'I', 'd' },
59 };
60 const struct cpu_str NAME_REPARSE = {
61 	8, 0, { '$', 'R', 'e', 'p', 'a', 'r', 's', 'e' },
62 };
63 const struct cpu_str NAME_USNJRNL = {
64 	8, 0, { '$', 'U', 's', 'n', 'J', 'r', 'n', 'l' },
65 };
66 const __le16 BAD_NAME[4] = {
67 	cpu_to_le16('$'), cpu_to_le16('B'), cpu_to_le16('a'), cpu_to_le16('d'),
68 };
69 const __le16 I30_NAME[4] = {
70 	cpu_to_le16('$'), cpu_to_le16('I'), cpu_to_le16('3'), cpu_to_le16('0'),
71 };
72 const __le16 SII_NAME[4] = {
73 	cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('I'), cpu_to_le16('I'),
74 };
75 const __le16 SDH_NAME[4] = {
76 	cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('D'), cpu_to_le16('H'),
77 };
78 const __le16 SDS_NAME[4] = {
79 	cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('D'), cpu_to_le16('S'),
80 };
81 const __le16 SO_NAME[2] = {
82 	cpu_to_le16('$'), cpu_to_le16('O'),
83 };
84 const __le16 SQ_NAME[2] = {
85 	cpu_to_le16('$'), cpu_to_le16('Q'),
86 };
87 const __le16 SR_NAME[2] = {
88 	cpu_to_le16('$'), cpu_to_le16('R'),
89 };
90 
91 #ifdef CONFIG_NTFS3_LZX_XPRESS
92 const __le16 WOF_NAME[17] = {
93 	cpu_to_le16('W'), cpu_to_le16('o'), cpu_to_le16('f'), cpu_to_le16('C'),
94 	cpu_to_le16('o'), cpu_to_le16('m'), cpu_to_le16('p'), cpu_to_le16('r'),
95 	cpu_to_le16('e'), cpu_to_le16('s'), cpu_to_le16('s'), cpu_to_le16('e'),
96 	cpu_to_le16('d'), cpu_to_le16('D'), cpu_to_le16('a'), cpu_to_le16('t'),
97 	cpu_to_le16('a'),
98 };
99 #endif
100 
101 // clang-format on
102 
103 /*
104  * ntfs_fix_pre_write - Insert fixups into @rhdr before writing to disk.
105  */
106 bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes)
107 {
108 	u16 *fixup, *ptr;
109 	u16 sample;
110 	u16 fo = le16_to_cpu(rhdr->fix_off);
111 	u16 fn = le16_to_cpu(rhdr->fix_num);
112 
113 	if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- ||
114 	    fn * SECTOR_SIZE > bytes) {
115 		return false;
116 	}
117 
118 	/* Get fixup pointer. */
119 	fixup = Add2Ptr(rhdr, fo);
120 
121 	if (*fixup >= 0x7FFF)
122 		*fixup = 1;
123 	else
124 		*fixup += 1;
125 
126 	sample = *fixup;
127 
128 	ptr = Add2Ptr(rhdr, SECTOR_SIZE - sizeof(short));
129 
130 	while (fn--) {
131 		*++fixup = *ptr;
132 		*ptr = sample;
133 		ptr += SECTOR_SIZE / sizeof(short);
134 	}
135 	return true;
136 }
137 
138 /*
139  * ntfs_fix_post_read - Remove fixups after reading from disk.
140  *
141  * Return: < 0 if error, 0 if ok, 1 if need to update fixups.
142  */
143 int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes,
144 		       bool simple)
145 {
146 	int ret;
147 	u16 *fixup, *ptr;
148 	u16 sample, fo, fn;
149 
150 	fo = le16_to_cpu(rhdr->fix_off);
151 	fn = simple ? ((bytes >> SECTOR_SHIFT) + 1)
152 		    : le16_to_cpu(rhdr->fix_num);
153 
154 	/* Check errors. */
155 	if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- ||
156 	    fn * SECTOR_SIZE > bytes) {
157 		return -EINVAL; /* Native chkntfs returns ok! */
158 	}
159 
160 	/* Get fixup pointer. */
161 	fixup = Add2Ptr(rhdr, fo);
162 	sample = *fixup;
163 	ptr = Add2Ptr(rhdr, SECTOR_SIZE - sizeof(short));
164 	ret = 0;
165 
166 	while (fn--) {
167 		/* Test current word. */
168 		if (*ptr != sample) {
169 			/* Fixup does not match! Is it serious error? */
170 			ret = -E_NTFS_FIXUP;
171 		}
172 
173 		/* Replace fixup. */
174 		*ptr = *++fixup;
175 		ptr += SECTOR_SIZE / sizeof(short);
176 	}
177 
178 	return ret;
179 }
180 
181 /*
182  * ntfs_extend_init - Load $Extend file.
183  */
184 int ntfs_extend_init(struct ntfs_sb_info *sbi)
185 {
186 	int err;
187 	struct super_block *sb = sbi->sb;
188 	struct inode *inode, *inode2;
189 	struct MFT_REF ref;
190 
191 	if (sbi->volume.major_ver < 3) {
192 		ntfs_notice(sb, "Skip $Extend 'cause NTFS version");
193 		return 0;
194 	}
195 
196 	ref.low = cpu_to_le32(MFT_REC_EXTEND);
197 	ref.high = 0;
198 	ref.seq = cpu_to_le16(MFT_REC_EXTEND);
199 	inode = ntfs_iget5(sb, &ref, &NAME_EXTEND);
200 	if (IS_ERR(inode)) {
201 		err = PTR_ERR(inode);
202 		ntfs_err(sb, "Failed to load $Extend.");
203 		inode = NULL;
204 		goto out;
205 	}
206 
207 	/* If ntfs_iget5() reads from disk it never returns bad inode. */
208 	if (!S_ISDIR(inode->i_mode)) {
209 		err = -EINVAL;
210 		goto out;
211 	}
212 
213 	/* Try to find $ObjId */
214 	inode2 = dir_search_u(inode, &NAME_OBJID, NULL);
215 	if (inode2 && !IS_ERR(inode2)) {
216 		if (is_bad_inode(inode2)) {
217 			iput(inode2);
218 		} else {
219 			sbi->objid.ni = ntfs_i(inode2);
220 			sbi->objid_no = inode2->i_ino;
221 		}
222 	}
223 
224 	/* Try to find $Quota */
225 	inode2 = dir_search_u(inode, &NAME_QUOTA, NULL);
226 	if (inode2 && !IS_ERR(inode2)) {
227 		sbi->quota_no = inode2->i_ino;
228 		iput(inode2);
229 	}
230 
231 	/* Try to find $Reparse */
232 	inode2 = dir_search_u(inode, &NAME_REPARSE, NULL);
233 	if (inode2 && !IS_ERR(inode2)) {
234 		sbi->reparse.ni = ntfs_i(inode2);
235 		sbi->reparse_no = inode2->i_ino;
236 	}
237 
238 	/* Try to find $UsnJrnl */
239 	inode2 = dir_search_u(inode, &NAME_USNJRNL, NULL);
240 	if (inode2 && !IS_ERR(inode2)) {
241 		sbi->usn_jrnl_no = inode2->i_ino;
242 		iput(inode2);
243 	}
244 
245 	err = 0;
246 out:
247 	iput(inode);
248 	return err;
249 }
250 
251 int ntfs_loadlog_and_replay(struct ntfs_inode *ni, struct ntfs_sb_info *sbi)
252 {
253 	int err = 0;
254 	struct super_block *sb = sbi->sb;
255 	bool initialized = false;
256 	struct MFT_REF ref;
257 	struct inode *inode;
258 
259 	/* Check for 4GB. */
260 	if (ni->vfs_inode.i_size >= 0x100000000ull) {
261 		ntfs_err(sb, "\x24LogFile is too big");
262 		err = -EINVAL;
263 		goto out;
264 	}
265 
266 	sbi->flags |= NTFS_FLAGS_LOG_REPLAYING;
267 
268 	ref.low = cpu_to_le32(MFT_REC_MFT);
269 	ref.high = 0;
270 	ref.seq = cpu_to_le16(1);
271 
272 	inode = ntfs_iget5(sb, &ref, NULL);
273 
274 	if (IS_ERR(inode))
275 		inode = NULL;
276 
277 	if (!inode) {
278 		/* Try to use MFT copy. */
279 		u64 t64 = sbi->mft.lbo;
280 
281 		sbi->mft.lbo = sbi->mft.lbo2;
282 		inode = ntfs_iget5(sb, &ref, NULL);
283 		sbi->mft.lbo = t64;
284 		if (IS_ERR(inode))
285 			inode = NULL;
286 	}
287 
288 	if (!inode) {
289 		err = -EINVAL;
290 		ntfs_err(sb, "Failed to load $MFT.");
291 		goto out;
292 	}
293 
294 	sbi->mft.ni = ntfs_i(inode);
295 
296 	/* LogFile should not contains attribute list. */
297 	err = ni_load_all_mi(sbi->mft.ni);
298 	if (!err)
299 		err = log_replay(ni, &initialized);
300 
301 	iput(inode);
302 	sbi->mft.ni = NULL;
303 
304 	sync_blockdev(sb->s_bdev);
305 	invalidate_bdev(sb->s_bdev);
306 
307 	if (sbi->flags & NTFS_FLAGS_NEED_REPLAY) {
308 		err = 0;
309 		goto out;
310 	}
311 
312 	if (sb_rdonly(sb) || !initialized)
313 		goto out;
314 
315 	/* Fill LogFile by '-1' if it is initialized. */
316 	err = ntfs_bio_fill_1(sbi, &ni->file.run);
317 
318 out:
319 	sbi->flags &= ~NTFS_FLAGS_LOG_REPLAYING;
320 
321 	return err;
322 }
323 
324 /*
325  * ntfs_query_def
326  *
327  * Return: Current ATTR_DEF_ENTRY for given attribute type.
328  */
329 const struct ATTR_DEF_ENTRY *ntfs_query_def(struct ntfs_sb_info *sbi,
330 					    enum ATTR_TYPE type)
331 {
332 	int type_in = le32_to_cpu(type);
333 	size_t min_idx = 0;
334 	size_t max_idx = sbi->def_entries - 1;
335 
336 	while (min_idx <= max_idx) {
337 		size_t i = min_idx + ((max_idx - min_idx) >> 1);
338 		const struct ATTR_DEF_ENTRY *entry = sbi->def_table + i;
339 		int diff = le32_to_cpu(entry->type) - type_in;
340 
341 		if (!diff)
342 			return entry;
343 		if (diff < 0)
344 			min_idx = i + 1;
345 		else if (i)
346 			max_idx = i - 1;
347 		else
348 			return NULL;
349 	}
350 	return NULL;
351 }
352 
353 /*
354  * ntfs_look_for_free_space - Look for a free space in bitmap.
355  */
356 int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len,
357 			     CLST *new_lcn, CLST *new_len,
358 			     enum ALLOCATE_OPT opt)
359 {
360 	int err;
361 	CLST alen = 0;
362 	struct super_block *sb = sbi->sb;
363 	size_t alcn, zlen, zeroes, zlcn, zlen2, ztrim, new_zlen;
364 	struct wnd_bitmap *wnd = &sbi->used.bitmap;
365 
366 	down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
367 	if (opt & ALLOCATE_MFT) {
368 		zlen = wnd_zone_len(wnd);
369 
370 		if (!zlen) {
371 			err = ntfs_refresh_zone(sbi);
372 			if (err)
373 				goto out;
374 			zlen = wnd_zone_len(wnd);
375 		}
376 
377 		if (!zlen) {
378 			ntfs_err(sbi->sb, "no free space to extend mft");
379 			goto out;
380 		}
381 
382 		lcn = wnd_zone_bit(wnd);
383 		alen = zlen > len ? len : zlen;
384 
385 		wnd_zone_set(wnd, lcn + alen, zlen - alen);
386 
387 		err = wnd_set_used(wnd, lcn, alen);
388 		if (err) {
389 			up_write(&wnd->rw_lock);
390 			return err;
391 		}
392 		alcn = lcn;
393 		goto out;
394 	}
395 	/*
396 	 * 'Cause cluster 0 is always used this value means that we should use
397 	 * cached value of 'next_free_lcn' to improve performance.
398 	 */
399 	if (!lcn)
400 		lcn = sbi->used.next_free_lcn;
401 
402 	if (lcn >= wnd->nbits)
403 		lcn = 0;
404 
405 	alen = wnd_find(wnd, len, lcn, BITMAP_FIND_MARK_AS_USED, &alcn);
406 	if (alen)
407 		goto out;
408 
409 	/* Try to use clusters from MftZone. */
410 	zlen = wnd_zone_len(wnd);
411 	zeroes = wnd_zeroes(wnd);
412 
413 	/* Check too big request */
414 	if (len > zeroes + zlen || zlen <= NTFS_MIN_MFT_ZONE)
415 		goto out;
416 
417 	/* How many clusters to cat from zone. */
418 	zlcn = wnd_zone_bit(wnd);
419 	zlen2 = zlen >> 1;
420 	ztrim = len > zlen ? zlen : (len > zlen2 ? len : zlen2);
421 	new_zlen = zlen - ztrim;
422 
423 	if (new_zlen < NTFS_MIN_MFT_ZONE) {
424 		new_zlen = NTFS_MIN_MFT_ZONE;
425 		if (new_zlen > zlen)
426 			new_zlen = zlen;
427 	}
428 
429 	wnd_zone_set(wnd, zlcn, new_zlen);
430 
431 	/* Allocate continues clusters. */
432 	alen = wnd_find(wnd, len, 0,
433 			BITMAP_FIND_MARK_AS_USED | BITMAP_FIND_FULL, &alcn);
434 
435 out:
436 	if (alen) {
437 		err = 0;
438 		*new_len = alen;
439 		*new_lcn = alcn;
440 
441 		ntfs_unmap_meta(sb, alcn, alen);
442 
443 		/* Set hint for next requests. */
444 		if (!(opt & ALLOCATE_MFT))
445 			sbi->used.next_free_lcn = alcn + alen;
446 	} else {
447 		err = -ENOSPC;
448 	}
449 
450 	up_write(&wnd->rw_lock);
451 	return err;
452 }
453 
454 /*
455  * ntfs_extend_mft - Allocate additional MFT records.
456  *
457  * sbi->mft.bitmap is locked for write.
458  *
459  * NOTE: recursive:
460  *	ntfs_look_free_mft ->
461  *	ntfs_extend_mft ->
462  *	attr_set_size ->
463  *	ni_insert_nonresident ->
464  *	ni_insert_attr ->
465  *	ni_ins_attr_ext ->
466  *	ntfs_look_free_mft ->
467  *	ntfs_extend_mft
468  *
469  * To avoid recursive always allocate space for two new MFT records
470  * see attrib.c: "at least two MFT to avoid recursive loop".
471  */
472 static int ntfs_extend_mft(struct ntfs_sb_info *sbi)
473 {
474 	int err;
475 	struct ntfs_inode *ni = sbi->mft.ni;
476 	size_t new_mft_total;
477 	u64 new_mft_bytes, new_bitmap_bytes;
478 	struct ATTRIB *attr;
479 	struct wnd_bitmap *wnd = &sbi->mft.bitmap;
480 
481 	new_mft_total = (wnd->nbits + MFT_INCREASE_CHUNK + 127) & (CLST)~127;
482 	new_mft_bytes = (u64)new_mft_total << sbi->record_bits;
483 
484 	/* Step 1: Resize $MFT::DATA. */
485 	down_write(&ni->file.run_lock);
486 	err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run,
487 			    new_mft_bytes, NULL, false, &attr);
488 
489 	if (err) {
490 		up_write(&ni->file.run_lock);
491 		goto out;
492 	}
493 
494 	attr->nres.valid_size = attr->nres.data_size;
495 	new_mft_total = le64_to_cpu(attr->nres.alloc_size) >> sbi->record_bits;
496 	ni->mi.dirty = true;
497 
498 	/* Step 2: Resize $MFT::BITMAP. */
499 	new_bitmap_bytes = bitmap_size(new_mft_total);
500 
501 	err = attr_set_size(ni, ATTR_BITMAP, NULL, 0, &sbi->mft.bitmap.run,
502 			    new_bitmap_bytes, &new_bitmap_bytes, true, NULL);
503 
504 	/* Refresh MFT Zone if necessary. */
505 	down_write_nested(&sbi->used.bitmap.rw_lock, BITMAP_MUTEX_CLUSTERS);
506 
507 	ntfs_refresh_zone(sbi);
508 
509 	up_write(&sbi->used.bitmap.rw_lock);
510 	up_write(&ni->file.run_lock);
511 
512 	if (err)
513 		goto out;
514 
515 	err = wnd_extend(wnd, new_mft_total);
516 
517 	if (err)
518 		goto out;
519 
520 	ntfs_clear_mft_tail(sbi, sbi->mft.used, new_mft_total);
521 
522 	err = _ni_write_inode(&ni->vfs_inode, 0);
523 out:
524 	return err;
525 }
526 
527 /*
528  * ntfs_look_free_mft - Look for a free MFT record.
529  */
530 int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft,
531 		       struct ntfs_inode *ni, struct mft_inode **mi)
532 {
533 	int err = 0;
534 	size_t zbit, zlen, from, to, fr;
535 	size_t mft_total;
536 	struct MFT_REF ref;
537 	struct super_block *sb = sbi->sb;
538 	struct wnd_bitmap *wnd = &sbi->mft.bitmap;
539 	u32 ir;
540 
541 	static_assert(sizeof(sbi->mft.reserved_bitmap) * 8 >=
542 		      MFT_REC_FREE - MFT_REC_RESERVED);
543 
544 	if (!mft)
545 		down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT);
546 
547 	zlen = wnd_zone_len(wnd);
548 
549 	/* Always reserve space for MFT. */
550 	if (zlen) {
551 		if (mft) {
552 			zbit = wnd_zone_bit(wnd);
553 			*rno = zbit;
554 			wnd_zone_set(wnd, zbit + 1, zlen - 1);
555 		}
556 		goto found;
557 	}
558 
559 	/* No MFT zone. Find the nearest to '0' free MFT. */
560 	if (!wnd_find(wnd, 1, MFT_REC_FREE, 0, &zbit)) {
561 		/* Resize MFT */
562 		mft_total = wnd->nbits;
563 
564 		err = ntfs_extend_mft(sbi);
565 		if (!err) {
566 			zbit = mft_total;
567 			goto reserve_mft;
568 		}
569 
570 		if (!mft || MFT_REC_FREE == sbi->mft.next_reserved)
571 			goto out;
572 
573 		err = 0;
574 
575 		/*
576 		 * Look for free record reserved area [11-16) ==
577 		 * [MFT_REC_RESERVED, MFT_REC_FREE ) MFT bitmap always
578 		 * marks it as used.
579 		 */
580 		if (!sbi->mft.reserved_bitmap) {
581 			/* Once per session create internal bitmap for 5 bits. */
582 			sbi->mft.reserved_bitmap = 0xFF;
583 
584 			ref.high = 0;
585 			for (ir = MFT_REC_RESERVED; ir < MFT_REC_FREE; ir++) {
586 				struct inode *i;
587 				struct ntfs_inode *ni;
588 				struct MFT_REC *mrec;
589 
590 				ref.low = cpu_to_le32(ir);
591 				ref.seq = cpu_to_le16(ir);
592 
593 				i = ntfs_iget5(sb, &ref, NULL);
594 				if (IS_ERR(i)) {
595 next:
596 					ntfs_notice(
597 						sb,
598 						"Invalid reserved record %x",
599 						ref.low);
600 					continue;
601 				}
602 				if (is_bad_inode(i)) {
603 					iput(i);
604 					goto next;
605 				}
606 
607 				ni = ntfs_i(i);
608 
609 				mrec = ni->mi.mrec;
610 
611 				if (!is_rec_base(mrec))
612 					goto next;
613 
614 				if (mrec->hard_links)
615 					goto next;
616 
617 				if (!ni_std(ni))
618 					goto next;
619 
620 				if (ni_find_attr(ni, NULL, NULL, ATTR_NAME,
621 						 NULL, 0, NULL, NULL))
622 					goto next;
623 
624 				__clear_bit(ir - MFT_REC_RESERVED,
625 					    &sbi->mft.reserved_bitmap);
626 			}
627 		}
628 
629 		/* Scan 5 bits for zero. Bit 0 == MFT_REC_RESERVED */
630 		zbit = find_next_zero_bit(&sbi->mft.reserved_bitmap,
631 					  MFT_REC_FREE, MFT_REC_RESERVED);
632 		if (zbit >= MFT_REC_FREE) {
633 			sbi->mft.next_reserved = MFT_REC_FREE;
634 			goto out;
635 		}
636 
637 		zlen = 1;
638 		sbi->mft.next_reserved = zbit;
639 	} else {
640 reserve_mft:
641 		zlen = zbit == MFT_REC_FREE ? (MFT_REC_USER - MFT_REC_FREE) : 4;
642 		if (zbit + zlen > wnd->nbits)
643 			zlen = wnd->nbits - zbit;
644 
645 		while (zlen > 1 && !wnd_is_free(wnd, zbit, zlen))
646 			zlen -= 1;
647 
648 		/* [zbit, zbit + zlen) will be used for MFT itself. */
649 		from = sbi->mft.used;
650 		if (from < zbit)
651 			from = zbit;
652 		to = zbit + zlen;
653 		if (from < to) {
654 			ntfs_clear_mft_tail(sbi, from, to);
655 			sbi->mft.used = to;
656 		}
657 	}
658 
659 	if (mft) {
660 		*rno = zbit;
661 		zbit += 1;
662 		zlen -= 1;
663 	}
664 
665 	wnd_zone_set(wnd, zbit, zlen);
666 
667 found:
668 	if (!mft) {
669 		/* The request to get record for general purpose. */
670 		if (sbi->mft.next_free < MFT_REC_USER)
671 			sbi->mft.next_free = MFT_REC_USER;
672 
673 		for (;;) {
674 			if (sbi->mft.next_free >= sbi->mft.bitmap.nbits) {
675 			} else if (!wnd_find(wnd, 1, MFT_REC_USER, 0, &fr)) {
676 				sbi->mft.next_free = sbi->mft.bitmap.nbits;
677 			} else {
678 				*rno = fr;
679 				sbi->mft.next_free = *rno + 1;
680 				break;
681 			}
682 
683 			err = ntfs_extend_mft(sbi);
684 			if (err)
685 				goto out;
686 		}
687 	}
688 
689 	if (ni && !ni_add_subrecord(ni, *rno, mi)) {
690 		err = -ENOMEM;
691 		goto out;
692 	}
693 
694 	/* We have found a record that are not reserved for next MFT. */
695 	if (*rno >= MFT_REC_FREE)
696 		wnd_set_used(wnd, *rno, 1);
697 	else if (*rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited)
698 		__set_bit(*rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap);
699 
700 out:
701 	if (!mft)
702 		up_write(&wnd->rw_lock);
703 
704 	return err;
705 }
706 
707 /*
708  * ntfs_mark_rec_free - Mark record as free.
709  */
710 void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno)
711 {
712 	struct wnd_bitmap *wnd = &sbi->mft.bitmap;
713 
714 	down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT);
715 	if (rno >= wnd->nbits)
716 		goto out;
717 
718 	if (rno >= MFT_REC_FREE) {
719 		if (!wnd_is_used(wnd, rno, 1))
720 			ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
721 		else
722 			wnd_set_free(wnd, rno, 1);
723 	} else if (rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited) {
724 		__clear_bit(rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap);
725 	}
726 
727 	if (rno < wnd_zone_bit(wnd))
728 		wnd_zone_set(wnd, rno, 1);
729 	else if (rno < sbi->mft.next_free && rno >= MFT_REC_USER)
730 		sbi->mft.next_free = rno;
731 
732 out:
733 	up_write(&wnd->rw_lock);
734 }
735 
736 /*
737  * ntfs_clear_mft_tail - Format empty records [from, to).
738  *
739  * sbi->mft.bitmap is locked for write.
740  */
741 int ntfs_clear_mft_tail(struct ntfs_sb_info *sbi, size_t from, size_t to)
742 {
743 	int err;
744 	u32 rs;
745 	u64 vbo;
746 	struct runs_tree *run;
747 	struct ntfs_inode *ni;
748 
749 	if (from >= to)
750 		return 0;
751 
752 	rs = sbi->record_size;
753 	ni = sbi->mft.ni;
754 	run = &ni->file.run;
755 
756 	down_read(&ni->file.run_lock);
757 	vbo = (u64)from * rs;
758 	for (; from < to; from++, vbo += rs) {
759 		struct ntfs_buffers nb;
760 
761 		err = ntfs_get_bh(sbi, run, vbo, rs, &nb);
762 		if (err)
763 			goto out;
764 
765 		err = ntfs_write_bh(sbi, &sbi->new_rec->rhdr, &nb, 0);
766 		nb_put(&nb);
767 		if (err)
768 			goto out;
769 	}
770 
771 out:
772 	sbi->mft.used = from;
773 	up_read(&ni->file.run_lock);
774 	return err;
775 }
776 
777 /*
778  * ntfs_refresh_zone - Refresh MFT zone.
779  *
780  * sbi->used.bitmap is locked for rw.
781  * sbi->mft.bitmap is locked for write.
782  * sbi->mft.ni->file.run_lock for write.
783  */
784 int ntfs_refresh_zone(struct ntfs_sb_info *sbi)
785 {
786 	CLST zone_limit, zone_max, lcn, vcn, len;
787 	size_t lcn_s, zlen;
788 	struct wnd_bitmap *wnd = &sbi->used.bitmap;
789 	struct ntfs_inode *ni = sbi->mft.ni;
790 
791 	/* Do not change anything unless we have non empty MFT zone. */
792 	if (wnd_zone_len(wnd))
793 		return 0;
794 
795 	/*
796 	 * Compute the MFT zone at two steps.
797 	 * It would be nice if we are able to allocate 1/8 of
798 	 * total clusters for MFT but not more then 512 MB.
799 	 */
800 	zone_limit = (512 * 1024 * 1024) >> sbi->cluster_bits;
801 	zone_max = wnd->nbits >> 3;
802 	if (zone_max > zone_limit)
803 		zone_max = zone_limit;
804 
805 	vcn = bytes_to_cluster(sbi,
806 			       (u64)sbi->mft.bitmap.nbits << sbi->record_bits);
807 
808 	if (!run_lookup_entry(&ni->file.run, vcn - 1, &lcn, &len, NULL))
809 		lcn = SPARSE_LCN;
810 
811 	/* We should always find Last Lcn for MFT. */
812 	if (lcn == SPARSE_LCN)
813 		return -EINVAL;
814 
815 	lcn_s = lcn + 1;
816 
817 	/* Try to allocate clusters after last MFT run. */
818 	zlen = wnd_find(wnd, zone_max, lcn_s, 0, &lcn_s);
819 	if (!zlen) {
820 		ntfs_notice(sbi->sb, "MftZone: unavailable");
821 		return 0;
822 	}
823 
824 	/* Truncate too large zone. */
825 	wnd_zone_set(wnd, lcn_s, zlen);
826 
827 	return 0;
828 }
829 
830 /*
831  * ntfs_update_mftmirr - Update $MFTMirr data.
832  */
833 int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait)
834 {
835 	int err;
836 	struct super_block *sb = sbi->sb;
837 	u32 blocksize = sb->s_blocksize;
838 	sector_t block1, block2;
839 	u32 bytes;
840 
841 	if (!(sbi->flags & NTFS_FLAGS_MFTMIRR))
842 		return 0;
843 
844 	err = 0;
845 	bytes = sbi->mft.recs_mirr << sbi->record_bits;
846 	block1 = sbi->mft.lbo >> sb->s_blocksize_bits;
847 	block2 = sbi->mft.lbo2 >> sb->s_blocksize_bits;
848 
849 	for (; bytes >= blocksize; bytes -= blocksize) {
850 		struct buffer_head *bh1, *bh2;
851 
852 		bh1 = sb_bread(sb, block1++);
853 		if (!bh1) {
854 			err = -EIO;
855 			goto out;
856 		}
857 
858 		bh2 = sb_getblk(sb, block2++);
859 		if (!bh2) {
860 			put_bh(bh1);
861 			err = -EIO;
862 			goto out;
863 		}
864 
865 		if (buffer_locked(bh2))
866 			__wait_on_buffer(bh2);
867 
868 		lock_buffer(bh2);
869 		memcpy(bh2->b_data, bh1->b_data, blocksize);
870 		set_buffer_uptodate(bh2);
871 		mark_buffer_dirty(bh2);
872 		unlock_buffer(bh2);
873 
874 		put_bh(bh1);
875 		bh1 = NULL;
876 
877 		if (wait)
878 			err = sync_dirty_buffer(bh2);
879 
880 		put_bh(bh2);
881 		if (err)
882 			goto out;
883 	}
884 
885 	sbi->flags &= ~NTFS_FLAGS_MFTMIRR;
886 
887 out:
888 	return err;
889 }
890 
891 /*
892  * ntfs_set_state
893  *
894  * Mount: ntfs_set_state(NTFS_DIRTY_DIRTY)
895  * Umount: ntfs_set_state(NTFS_DIRTY_CLEAR)
896  * NTFS error: ntfs_set_state(NTFS_DIRTY_ERROR)
897  */
898 int ntfs_set_state(struct ntfs_sb_info *sbi, enum NTFS_DIRTY_FLAGS dirty)
899 {
900 	int err;
901 	struct ATTRIB *attr;
902 	struct VOLUME_INFO *info;
903 	struct mft_inode *mi;
904 	struct ntfs_inode *ni;
905 
906 	/*
907 	 * Do not change state if fs was real_dirty.
908 	 * Do not change state if fs already dirty(clear).
909 	 * Do not change any thing if mounted read only.
910 	 */
911 	if (sbi->volume.real_dirty || sb_rdonly(sbi->sb))
912 		return 0;
913 
914 	/* Check cached value. */
915 	if ((dirty == NTFS_DIRTY_CLEAR ? 0 : VOLUME_FLAG_DIRTY) ==
916 	    (sbi->volume.flags & VOLUME_FLAG_DIRTY))
917 		return 0;
918 
919 	ni = sbi->volume.ni;
920 	if (!ni)
921 		return -EINVAL;
922 
923 	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_DIRTY);
924 
925 	attr = ni_find_attr(ni, NULL, NULL, ATTR_VOL_INFO, NULL, 0, NULL, &mi);
926 	if (!attr) {
927 		err = -EINVAL;
928 		goto out;
929 	}
930 
931 	info = resident_data_ex(attr, SIZEOF_ATTRIBUTE_VOLUME_INFO);
932 	if (!info) {
933 		err = -EINVAL;
934 		goto out;
935 	}
936 
937 	switch (dirty) {
938 	case NTFS_DIRTY_ERROR:
939 		ntfs_notice(sbi->sb, "Mark volume as dirty due to NTFS errors");
940 		sbi->volume.real_dirty = true;
941 		fallthrough;
942 	case NTFS_DIRTY_DIRTY:
943 		info->flags |= VOLUME_FLAG_DIRTY;
944 		break;
945 	case NTFS_DIRTY_CLEAR:
946 		info->flags &= ~VOLUME_FLAG_DIRTY;
947 		break;
948 	}
949 	/* Cache current volume flags. */
950 	sbi->volume.flags = info->flags;
951 	mi->dirty = true;
952 	err = 0;
953 
954 out:
955 	ni_unlock(ni);
956 	if (err)
957 		return err;
958 
959 	mark_inode_dirty(&ni->vfs_inode);
960 	/* verify(!ntfs_update_mftmirr()); */
961 
962 	/*
963 	 * If we used wait=1, sync_inode_metadata waits for the io for the
964 	 * inode to finish. It hangs when media is removed.
965 	 * So wait=0 is sent down to sync_inode_metadata
966 	 * and filemap_fdatawrite is used for the data blocks.
967 	 */
968 	err = sync_inode_metadata(&ni->vfs_inode, 0);
969 	if (!err)
970 		err = filemap_fdatawrite(ni->vfs_inode.i_mapping);
971 
972 	return err;
973 }
974 
975 /*
976  * security_hash - Calculates a hash of security descriptor.
977  */
978 static inline __le32 security_hash(const void *sd, size_t bytes)
979 {
980 	u32 hash = 0;
981 	const __le32 *ptr = sd;
982 
983 	bytes >>= 2;
984 	while (bytes--)
985 		hash = ((hash >> 0x1D) | (hash << 3)) + le32_to_cpu(*ptr++);
986 	return cpu_to_le32(hash);
987 }
988 
989 int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer)
990 {
991 	struct block_device *bdev = sb->s_bdev;
992 	u32 blocksize = sb->s_blocksize;
993 	u64 block = lbo >> sb->s_blocksize_bits;
994 	u32 off = lbo & (blocksize - 1);
995 	u32 op = blocksize - off;
996 
997 	for (; bytes; block += 1, off = 0, op = blocksize) {
998 		struct buffer_head *bh = __bread(bdev, block, blocksize);
999 
1000 		if (!bh)
1001 			return -EIO;
1002 
1003 		if (op > bytes)
1004 			op = bytes;
1005 
1006 		memcpy(buffer, bh->b_data + off, op);
1007 
1008 		put_bh(bh);
1009 
1010 		bytes -= op;
1011 		buffer = Add2Ptr(buffer, op);
1012 	}
1013 
1014 	return 0;
1015 }
1016 
1017 int ntfs_sb_write(struct super_block *sb, u64 lbo, size_t bytes,
1018 		  const void *buf, int wait)
1019 {
1020 	u32 blocksize = sb->s_blocksize;
1021 	struct block_device *bdev = sb->s_bdev;
1022 	sector_t block = lbo >> sb->s_blocksize_bits;
1023 	u32 off = lbo & (blocksize - 1);
1024 	u32 op = blocksize - off;
1025 	struct buffer_head *bh;
1026 
1027 	if (!wait && (sb->s_flags & SB_SYNCHRONOUS))
1028 		wait = 1;
1029 
1030 	for (; bytes; block += 1, off = 0, op = blocksize) {
1031 		if (op > bytes)
1032 			op = bytes;
1033 
1034 		if (op < blocksize) {
1035 			bh = __bread(bdev, block, blocksize);
1036 			if (!bh) {
1037 				ntfs_err(sb, "failed to read block %llx",
1038 					 (u64)block);
1039 				return -EIO;
1040 			}
1041 		} else {
1042 			bh = __getblk(bdev, block, blocksize);
1043 			if (!bh)
1044 				return -ENOMEM;
1045 		}
1046 
1047 		if (buffer_locked(bh))
1048 			__wait_on_buffer(bh);
1049 
1050 		lock_buffer(bh);
1051 		if (buf) {
1052 			memcpy(bh->b_data + off, buf, op);
1053 			buf = Add2Ptr(buf, op);
1054 		} else {
1055 			memset(bh->b_data + off, -1, op);
1056 		}
1057 
1058 		set_buffer_uptodate(bh);
1059 		mark_buffer_dirty(bh);
1060 		unlock_buffer(bh);
1061 
1062 		if (wait) {
1063 			int err = sync_dirty_buffer(bh);
1064 
1065 			if (err) {
1066 				ntfs_err(
1067 					sb,
1068 					"failed to sync buffer at block %llx, error %d",
1069 					(u64)block, err);
1070 				put_bh(bh);
1071 				return err;
1072 			}
1073 		}
1074 
1075 		put_bh(bh);
1076 
1077 		bytes -= op;
1078 	}
1079 	return 0;
1080 }
1081 
1082 int ntfs_sb_write_run(struct ntfs_sb_info *sbi, const struct runs_tree *run,
1083 		      u64 vbo, const void *buf, size_t bytes)
1084 {
1085 	struct super_block *sb = sbi->sb;
1086 	u8 cluster_bits = sbi->cluster_bits;
1087 	u32 off = vbo & sbi->cluster_mask;
1088 	CLST lcn, clen, vcn = vbo >> cluster_bits, vcn_next;
1089 	u64 lbo, len;
1090 	size_t idx;
1091 
1092 	if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx))
1093 		return -ENOENT;
1094 
1095 	if (lcn == SPARSE_LCN)
1096 		return -EINVAL;
1097 
1098 	lbo = ((u64)lcn << cluster_bits) + off;
1099 	len = ((u64)clen << cluster_bits) - off;
1100 
1101 	for (;;) {
1102 		u32 op = len < bytes ? len : bytes;
1103 		int err = ntfs_sb_write(sb, lbo, op, buf, 0);
1104 
1105 		if (err)
1106 			return err;
1107 
1108 		bytes -= op;
1109 		if (!bytes)
1110 			break;
1111 
1112 		vcn_next = vcn + clen;
1113 		if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
1114 		    vcn != vcn_next)
1115 			return -ENOENT;
1116 
1117 		if (lcn == SPARSE_LCN)
1118 			return -EINVAL;
1119 
1120 		if (buf)
1121 			buf = Add2Ptr(buf, op);
1122 
1123 		lbo = ((u64)lcn << cluster_bits);
1124 		len = ((u64)clen << cluster_bits);
1125 	}
1126 
1127 	return 0;
1128 }
1129 
1130 struct buffer_head *ntfs_bread_run(struct ntfs_sb_info *sbi,
1131 				   const struct runs_tree *run, u64 vbo)
1132 {
1133 	struct super_block *sb = sbi->sb;
1134 	u8 cluster_bits = sbi->cluster_bits;
1135 	CLST lcn;
1136 	u64 lbo;
1137 
1138 	if (!run_lookup_entry(run, vbo >> cluster_bits, &lcn, NULL, NULL))
1139 		return ERR_PTR(-ENOENT);
1140 
1141 	lbo = ((u64)lcn << cluster_bits) + (vbo & sbi->cluster_mask);
1142 
1143 	return ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
1144 }
1145 
1146 int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run,
1147 		     u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb)
1148 {
1149 	int err;
1150 	struct super_block *sb = sbi->sb;
1151 	u32 blocksize = sb->s_blocksize;
1152 	u8 cluster_bits = sbi->cluster_bits;
1153 	u32 off = vbo & sbi->cluster_mask;
1154 	u32 nbh = 0;
1155 	CLST vcn_next, vcn = vbo >> cluster_bits;
1156 	CLST lcn, clen;
1157 	u64 lbo, len;
1158 	size_t idx;
1159 	struct buffer_head *bh;
1160 
1161 	if (!run) {
1162 		/* First reading of $Volume + $MFTMirr + $LogFile goes here. */
1163 		if (vbo > MFT_REC_VOL * sbi->record_size) {
1164 			err = -ENOENT;
1165 			goto out;
1166 		}
1167 
1168 		/* Use absolute boot's 'MFTCluster' to read record. */
1169 		lbo = vbo + sbi->mft.lbo;
1170 		len = sbi->record_size;
1171 	} else if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) {
1172 		err = -ENOENT;
1173 		goto out;
1174 	} else {
1175 		if (lcn == SPARSE_LCN) {
1176 			err = -EINVAL;
1177 			goto out;
1178 		}
1179 
1180 		lbo = ((u64)lcn << cluster_bits) + off;
1181 		len = ((u64)clen << cluster_bits) - off;
1182 	}
1183 
1184 	off = lbo & (blocksize - 1);
1185 	if (nb) {
1186 		nb->off = off;
1187 		nb->bytes = bytes;
1188 	}
1189 
1190 	for (;;) {
1191 		u32 len32 = len >= bytes ? bytes : len;
1192 		sector_t block = lbo >> sb->s_blocksize_bits;
1193 
1194 		do {
1195 			u32 op = blocksize - off;
1196 
1197 			if (op > len32)
1198 				op = len32;
1199 
1200 			bh = ntfs_bread(sb, block);
1201 			if (!bh) {
1202 				err = -EIO;
1203 				goto out;
1204 			}
1205 
1206 			if (buf) {
1207 				memcpy(buf, bh->b_data + off, op);
1208 				buf = Add2Ptr(buf, op);
1209 			}
1210 
1211 			if (!nb) {
1212 				put_bh(bh);
1213 			} else if (nbh >= ARRAY_SIZE(nb->bh)) {
1214 				err = -EINVAL;
1215 				goto out;
1216 			} else {
1217 				nb->bh[nbh++] = bh;
1218 				nb->nbufs = nbh;
1219 			}
1220 
1221 			bytes -= op;
1222 			if (!bytes)
1223 				return 0;
1224 			len32 -= op;
1225 			block += 1;
1226 			off = 0;
1227 
1228 		} while (len32);
1229 
1230 		vcn_next = vcn + clen;
1231 		if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
1232 		    vcn != vcn_next) {
1233 			err = -ENOENT;
1234 			goto out;
1235 		}
1236 
1237 		if (lcn == SPARSE_LCN) {
1238 			err = -EINVAL;
1239 			goto out;
1240 		}
1241 
1242 		lbo = ((u64)lcn << cluster_bits);
1243 		len = ((u64)clen << cluster_bits);
1244 	}
1245 
1246 out:
1247 	if (!nbh)
1248 		return err;
1249 
1250 	while (nbh) {
1251 		put_bh(nb->bh[--nbh]);
1252 		nb->bh[nbh] = NULL;
1253 	}
1254 
1255 	nb->nbufs = 0;
1256 	return err;
1257 }
1258 
1259 /*
1260  * ntfs_read_bh
1261  *
1262  * Return: < 0 if error, 0 if ok, -E_NTFS_FIXUP if need to update fixups.
1263  */
1264 int ntfs_read_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo,
1265 		 struct NTFS_RECORD_HEADER *rhdr, u32 bytes,
1266 		 struct ntfs_buffers *nb)
1267 {
1268 	int err = ntfs_read_run_nb(sbi, run, vbo, rhdr, bytes, nb);
1269 
1270 	if (err)
1271 		return err;
1272 	return ntfs_fix_post_read(rhdr, nb->bytes, true);
1273 }
1274 
1275 int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo,
1276 		u32 bytes, struct ntfs_buffers *nb)
1277 {
1278 	int err = 0;
1279 	struct super_block *sb = sbi->sb;
1280 	u32 blocksize = sb->s_blocksize;
1281 	u8 cluster_bits = sbi->cluster_bits;
1282 	CLST vcn_next, vcn = vbo >> cluster_bits;
1283 	u32 off;
1284 	u32 nbh = 0;
1285 	CLST lcn, clen;
1286 	u64 lbo, len;
1287 	size_t idx;
1288 
1289 	nb->bytes = bytes;
1290 
1291 	if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) {
1292 		err = -ENOENT;
1293 		goto out;
1294 	}
1295 
1296 	off = vbo & sbi->cluster_mask;
1297 	lbo = ((u64)lcn << cluster_bits) + off;
1298 	len = ((u64)clen << cluster_bits) - off;
1299 
1300 	nb->off = off = lbo & (blocksize - 1);
1301 
1302 	for (;;) {
1303 		u32 len32 = len < bytes ? len : bytes;
1304 		sector_t block = lbo >> sb->s_blocksize_bits;
1305 
1306 		do {
1307 			u32 op;
1308 			struct buffer_head *bh;
1309 
1310 			if (nbh >= ARRAY_SIZE(nb->bh)) {
1311 				err = -EINVAL;
1312 				goto out;
1313 			}
1314 
1315 			op = blocksize - off;
1316 			if (op > len32)
1317 				op = len32;
1318 
1319 			if (op == blocksize) {
1320 				bh = sb_getblk(sb, block);
1321 				if (!bh) {
1322 					err = -ENOMEM;
1323 					goto out;
1324 				}
1325 				if (buffer_locked(bh))
1326 					__wait_on_buffer(bh);
1327 				set_buffer_uptodate(bh);
1328 			} else {
1329 				bh = ntfs_bread(sb, block);
1330 				if (!bh) {
1331 					err = -EIO;
1332 					goto out;
1333 				}
1334 			}
1335 
1336 			nb->bh[nbh++] = bh;
1337 			bytes -= op;
1338 			if (!bytes) {
1339 				nb->nbufs = nbh;
1340 				return 0;
1341 			}
1342 
1343 			block += 1;
1344 			len32 -= op;
1345 			off = 0;
1346 		} while (len32);
1347 
1348 		vcn_next = vcn + clen;
1349 		if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) ||
1350 		    vcn != vcn_next) {
1351 			err = -ENOENT;
1352 			goto out;
1353 		}
1354 
1355 		lbo = ((u64)lcn << cluster_bits);
1356 		len = ((u64)clen << cluster_bits);
1357 	}
1358 
1359 out:
1360 	while (nbh) {
1361 		put_bh(nb->bh[--nbh]);
1362 		nb->bh[nbh] = NULL;
1363 	}
1364 
1365 	nb->nbufs = 0;
1366 
1367 	return err;
1368 }
1369 
1370 int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr,
1371 		  struct ntfs_buffers *nb, int sync)
1372 {
1373 	int err = 0;
1374 	struct super_block *sb = sbi->sb;
1375 	u32 block_size = sb->s_blocksize;
1376 	u32 bytes = nb->bytes;
1377 	u32 off = nb->off;
1378 	u16 fo = le16_to_cpu(rhdr->fix_off);
1379 	u16 fn = le16_to_cpu(rhdr->fix_num);
1380 	u32 idx;
1381 	__le16 *fixup;
1382 	__le16 sample;
1383 
1384 	if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- ||
1385 	    fn * SECTOR_SIZE > bytes) {
1386 		return -EINVAL;
1387 	}
1388 
1389 	for (idx = 0; bytes && idx < nb->nbufs; idx += 1, off = 0) {
1390 		u32 op = block_size - off;
1391 		char *bh_data;
1392 		struct buffer_head *bh = nb->bh[idx];
1393 		__le16 *ptr, *end_data;
1394 
1395 		if (op > bytes)
1396 			op = bytes;
1397 
1398 		if (buffer_locked(bh))
1399 			__wait_on_buffer(bh);
1400 
1401 		lock_buffer(nb->bh[idx]);
1402 
1403 		bh_data = bh->b_data + off;
1404 		end_data = Add2Ptr(bh_data, op);
1405 		memcpy(bh_data, rhdr, op);
1406 
1407 		if (!idx) {
1408 			u16 t16;
1409 
1410 			fixup = Add2Ptr(bh_data, fo);
1411 			sample = *fixup;
1412 			t16 = le16_to_cpu(sample);
1413 			if (t16 >= 0x7FFF) {
1414 				sample = *fixup = cpu_to_le16(1);
1415 			} else {
1416 				sample = cpu_to_le16(t16 + 1);
1417 				*fixup = sample;
1418 			}
1419 
1420 			*(__le16 *)Add2Ptr(rhdr, fo) = sample;
1421 		}
1422 
1423 		ptr = Add2Ptr(bh_data, SECTOR_SIZE - sizeof(short));
1424 
1425 		do {
1426 			*++fixup = *ptr;
1427 			*ptr = sample;
1428 			ptr += SECTOR_SIZE / sizeof(short);
1429 		} while (ptr < end_data);
1430 
1431 		set_buffer_uptodate(bh);
1432 		mark_buffer_dirty(bh);
1433 		unlock_buffer(bh);
1434 
1435 		if (sync) {
1436 			int err2 = sync_dirty_buffer(bh);
1437 
1438 			if (!err && err2)
1439 				err = err2;
1440 		}
1441 
1442 		bytes -= op;
1443 		rhdr = Add2Ptr(rhdr, op);
1444 	}
1445 
1446 	return err;
1447 }
1448 
1449 static inline struct bio *ntfs_alloc_bio(u32 nr_vecs)
1450 {
1451 	struct bio *bio = bio_alloc(GFP_NOFS | __GFP_HIGH, nr_vecs);
1452 
1453 	if (!bio && (current->flags & PF_MEMALLOC)) {
1454 		while (!bio && (nr_vecs /= 2))
1455 			bio = bio_alloc(GFP_NOFS | __GFP_HIGH, nr_vecs);
1456 	}
1457 	return bio;
1458 }
1459 
1460 /*
1461  * ntfs_bio_pages - Read/write pages from/to disk.
1462  */
1463 int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run,
1464 		   struct page **pages, u32 nr_pages, u64 vbo, u32 bytes,
1465 		   u32 op)
1466 {
1467 	int err = 0;
1468 	struct bio *new, *bio = NULL;
1469 	struct super_block *sb = sbi->sb;
1470 	struct block_device *bdev = sb->s_bdev;
1471 	struct page *page;
1472 	u8 cluster_bits = sbi->cluster_bits;
1473 	CLST lcn, clen, vcn, vcn_next;
1474 	u32 add, off, page_idx;
1475 	u64 lbo, len;
1476 	size_t run_idx;
1477 	struct blk_plug plug;
1478 
1479 	if (!bytes)
1480 		return 0;
1481 
1482 	blk_start_plug(&plug);
1483 
1484 	/* Align vbo and bytes to be 512 bytes aligned. */
1485 	lbo = (vbo + bytes + 511) & ~511ull;
1486 	vbo = vbo & ~511ull;
1487 	bytes = lbo - vbo;
1488 
1489 	vcn = vbo >> cluster_bits;
1490 	if (!run_lookup_entry(run, vcn, &lcn, &clen, &run_idx)) {
1491 		err = -ENOENT;
1492 		goto out;
1493 	}
1494 	off = vbo & sbi->cluster_mask;
1495 	page_idx = 0;
1496 	page = pages[0];
1497 
1498 	for (;;) {
1499 		lbo = ((u64)lcn << cluster_bits) + off;
1500 		len = ((u64)clen << cluster_bits) - off;
1501 new_bio:
1502 		new = ntfs_alloc_bio(nr_pages - page_idx);
1503 		if (!new) {
1504 			err = -ENOMEM;
1505 			goto out;
1506 		}
1507 		if (bio) {
1508 			bio_chain(bio, new);
1509 			submit_bio(bio);
1510 		}
1511 		bio = new;
1512 		bio_set_dev(bio, bdev);
1513 		bio->bi_iter.bi_sector = lbo >> 9;
1514 		bio->bi_opf = op;
1515 
1516 		while (len) {
1517 			off = vbo & (PAGE_SIZE - 1);
1518 			add = off + len > PAGE_SIZE ? (PAGE_SIZE - off) : len;
1519 
1520 			if (bio_add_page(bio, page, add, off) < add)
1521 				goto new_bio;
1522 
1523 			if (bytes <= add)
1524 				goto out;
1525 			bytes -= add;
1526 			vbo += add;
1527 
1528 			if (add + off == PAGE_SIZE) {
1529 				page_idx += 1;
1530 				if (WARN_ON(page_idx >= nr_pages)) {
1531 					err = -EINVAL;
1532 					goto out;
1533 				}
1534 				page = pages[page_idx];
1535 			}
1536 
1537 			if (len <= add)
1538 				break;
1539 			len -= add;
1540 			lbo += add;
1541 		}
1542 
1543 		vcn_next = vcn + clen;
1544 		if (!run_get_entry(run, ++run_idx, &vcn, &lcn, &clen) ||
1545 		    vcn != vcn_next) {
1546 			err = -ENOENT;
1547 			goto out;
1548 		}
1549 		off = 0;
1550 	}
1551 out:
1552 	if (bio) {
1553 		if (!err)
1554 			err = submit_bio_wait(bio);
1555 		bio_put(bio);
1556 	}
1557 	blk_finish_plug(&plug);
1558 
1559 	return err;
1560 }
1561 
1562 /*
1563  * ntfs_bio_fill_1 - Helper for ntfs_loadlog_and_replay().
1564  *
1565  * Fill on-disk logfile range by (-1)
1566  * this means empty logfile.
1567  */
1568 int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run)
1569 {
1570 	int err = 0;
1571 	struct super_block *sb = sbi->sb;
1572 	struct block_device *bdev = sb->s_bdev;
1573 	u8 cluster_bits = sbi->cluster_bits;
1574 	struct bio *new, *bio = NULL;
1575 	CLST lcn, clen;
1576 	u64 lbo, len;
1577 	size_t run_idx;
1578 	struct page *fill;
1579 	void *kaddr;
1580 	struct blk_plug plug;
1581 
1582 	fill = alloc_page(GFP_KERNEL);
1583 	if (!fill)
1584 		return -ENOMEM;
1585 
1586 	kaddr = kmap_atomic(fill);
1587 	memset(kaddr, -1, PAGE_SIZE);
1588 	kunmap_atomic(kaddr);
1589 	flush_dcache_page(fill);
1590 	lock_page(fill);
1591 
1592 	if (!run_lookup_entry(run, 0, &lcn, &clen, &run_idx)) {
1593 		err = -ENOENT;
1594 		goto out;
1595 	}
1596 
1597 	/*
1598 	 * TODO: Try blkdev_issue_write_same.
1599 	 */
1600 	blk_start_plug(&plug);
1601 	do {
1602 		lbo = (u64)lcn << cluster_bits;
1603 		len = (u64)clen << cluster_bits;
1604 new_bio:
1605 		new = ntfs_alloc_bio(BIO_MAX_VECS);
1606 		if (!new) {
1607 			err = -ENOMEM;
1608 			break;
1609 		}
1610 		if (bio) {
1611 			bio_chain(bio, new);
1612 			submit_bio(bio);
1613 		}
1614 		bio = new;
1615 		bio_set_dev(bio, bdev);
1616 		bio->bi_opf = REQ_OP_WRITE;
1617 		bio->bi_iter.bi_sector = lbo >> 9;
1618 
1619 		for (;;) {
1620 			u32 add = len > PAGE_SIZE ? PAGE_SIZE : len;
1621 
1622 			if (bio_add_page(bio, fill, add, 0) < add)
1623 				goto new_bio;
1624 
1625 			lbo += add;
1626 			if (len <= add)
1627 				break;
1628 			len -= add;
1629 		}
1630 	} while (run_get_entry(run, ++run_idx, NULL, &lcn, &clen));
1631 
1632 	if (bio) {
1633 		if (!err)
1634 			err = submit_bio_wait(bio);
1635 		bio_put(bio);
1636 	}
1637 	blk_finish_plug(&plug);
1638 out:
1639 	unlock_page(fill);
1640 	put_page(fill);
1641 
1642 	return err;
1643 }
1644 
1645 int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run,
1646 		    u64 vbo, u64 *lbo, u64 *bytes)
1647 {
1648 	u32 off;
1649 	CLST lcn, len;
1650 	u8 cluster_bits = sbi->cluster_bits;
1651 
1652 	if (!run_lookup_entry(run, vbo >> cluster_bits, &lcn, &len, NULL))
1653 		return -ENOENT;
1654 
1655 	off = vbo & sbi->cluster_mask;
1656 	*lbo = lcn == SPARSE_LCN ? -1 : (((u64)lcn << cluster_bits) + off);
1657 	*bytes = ((u64)len << cluster_bits) - off;
1658 
1659 	return 0;
1660 }
1661 
1662 struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir)
1663 {
1664 	int err = 0;
1665 	struct super_block *sb = sbi->sb;
1666 	struct inode *inode = new_inode(sb);
1667 	struct ntfs_inode *ni;
1668 
1669 	if (!inode)
1670 		return ERR_PTR(-ENOMEM);
1671 
1672 	ni = ntfs_i(inode);
1673 
1674 	err = mi_format_new(&ni->mi, sbi, rno, dir ? RECORD_FLAG_DIR : 0,
1675 			    false);
1676 	if (err)
1677 		goto out;
1678 
1679 	inode->i_ino = rno;
1680 	if (insert_inode_locked(inode) < 0) {
1681 		err = -EIO;
1682 		goto out;
1683 	}
1684 
1685 out:
1686 	if (err) {
1687 		iput(inode);
1688 		ni = ERR_PTR(err);
1689 	}
1690 	return ni;
1691 }
1692 
1693 /*
1694  * O:BAG:BAD:(A;OICI;FA;;;WD)
1695  * Owner S-1-5-32-544 (Administrators)
1696  * Group S-1-5-32-544 (Administrators)
1697  * ACE: allow S-1-1-0 (Everyone) with FILE_ALL_ACCESS
1698  */
1699 const u8 s_default_security[] __aligned(8) = {
1700 	0x01, 0x00, 0x04, 0x80, 0x30, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
1701 	0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x1C, 0x00,
1702 	0x01, 0x00, 0x00, 0x00, 0x00, 0x03, 0x14, 0x00, 0xFF, 0x01, 0x1F, 0x00,
1703 	0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
1704 	0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x20, 0x00, 0x00, 0x00,
1705 	0x20, 0x02, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05,
1706 	0x20, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00,
1707 };
1708 
1709 static_assert(sizeof(s_default_security) == 0x50);
1710 
1711 static inline u32 sid_length(const struct SID *sid)
1712 {
1713 	return struct_size(sid, SubAuthority, sid->SubAuthorityCount);
1714 }
1715 
1716 /*
1717  * is_acl_valid
1718  *
1719  * Thanks Mark Harmstone for idea.
1720  */
1721 static bool is_acl_valid(const struct ACL *acl, u32 len)
1722 {
1723 	const struct ACE_HEADER *ace;
1724 	u32 i;
1725 	u16 ace_count, ace_size;
1726 
1727 	if (acl->AclRevision != ACL_REVISION &&
1728 	    acl->AclRevision != ACL_REVISION_DS) {
1729 		/*
1730 		 * This value should be ACL_REVISION, unless the ACL contains an
1731 		 * object-specific ACE, in which case this value must be ACL_REVISION_DS.
1732 		 * All ACEs in an ACL must be at the same revision level.
1733 		 */
1734 		return false;
1735 	}
1736 
1737 	if (acl->Sbz1)
1738 		return false;
1739 
1740 	if (le16_to_cpu(acl->AclSize) > len)
1741 		return false;
1742 
1743 	if (acl->Sbz2)
1744 		return false;
1745 
1746 	len -= sizeof(struct ACL);
1747 	ace = (struct ACE_HEADER *)&acl[1];
1748 	ace_count = le16_to_cpu(acl->AceCount);
1749 
1750 	for (i = 0; i < ace_count; i++) {
1751 		if (len < sizeof(struct ACE_HEADER))
1752 			return false;
1753 
1754 		ace_size = le16_to_cpu(ace->AceSize);
1755 		if (len < ace_size)
1756 			return false;
1757 
1758 		len -= ace_size;
1759 		ace = Add2Ptr(ace, ace_size);
1760 	}
1761 
1762 	return true;
1763 }
1764 
1765 bool is_sd_valid(const struct SECURITY_DESCRIPTOR_RELATIVE *sd, u32 len)
1766 {
1767 	u32 sd_owner, sd_group, sd_sacl, sd_dacl;
1768 
1769 	if (len < sizeof(struct SECURITY_DESCRIPTOR_RELATIVE))
1770 		return false;
1771 
1772 	if (sd->Revision != 1)
1773 		return false;
1774 
1775 	if (sd->Sbz1)
1776 		return false;
1777 
1778 	if (!(sd->Control & SE_SELF_RELATIVE))
1779 		return false;
1780 
1781 	sd_owner = le32_to_cpu(sd->Owner);
1782 	if (sd_owner) {
1783 		const struct SID *owner = Add2Ptr(sd, sd_owner);
1784 
1785 		if (sd_owner + offsetof(struct SID, SubAuthority) > len)
1786 			return false;
1787 
1788 		if (owner->Revision != 1)
1789 			return false;
1790 
1791 		if (sd_owner + sid_length(owner) > len)
1792 			return false;
1793 	}
1794 
1795 	sd_group = le32_to_cpu(sd->Group);
1796 	if (sd_group) {
1797 		const struct SID *group = Add2Ptr(sd, sd_group);
1798 
1799 		if (sd_group + offsetof(struct SID, SubAuthority) > len)
1800 			return false;
1801 
1802 		if (group->Revision != 1)
1803 			return false;
1804 
1805 		if (sd_group + sid_length(group) > len)
1806 			return false;
1807 	}
1808 
1809 	sd_sacl = le32_to_cpu(sd->Sacl);
1810 	if (sd_sacl) {
1811 		const struct ACL *sacl = Add2Ptr(sd, sd_sacl);
1812 
1813 		if (sd_sacl + sizeof(struct ACL) > len)
1814 			return false;
1815 
1816 		if (!is_acl_valid(sacl, len - sd_sacl))
1817 			return false;
1818 	}
1819 
1820 	sd_dacl = le32_to_cpu(sd->Dacl);
1821 	if (sd_dacl) {
1822 		const struct ACL *dacl = Add2Ptr(sd, sd_dacl);
1823 
1824 		if (sd_dacl + sizeof(struct ACL) > len)
1825 			return false;
1826 
1827 		if (!is_acl_valid(dacl, len - sd_dacl))
1828 			return false;
1829 	}
1830 
1831 	return true;
1832 }
1833 
1834 /*
1835  * ntfs_security_init - Load and parse $Secure.
1836  */
1837 int ntfs_security_init(struct ntfs_sb_info *sbi)
1838 {
1839 	int err;
1840 	struct super_block *sb = sbi->sb;
1841 	struct inode *inode;
1842 	struct ntfs_inode *ni;
1843 	struct MFT_REF ref;
1844 	struct ATTRIB *attr;
1845 	struct ATTR_LIST_ENTRY *le;
1846 	u64 sds_size;
1847 	size_t off;
1848 	struct NTFS_DE *ne;
1849 	struct NTFS_DE_SII *sii_e;
1850 	struct ntfs_fnd *fnd_sii = NULL;
1851 	const struct INDEX_ROOT *root_sii;
1852 	const struct INDEX_ROOT *root_sdh;
1853 	struct ntfs_index *indx_sdh = &sbi->security.index_sdh;
1854 	struct ntfs_index *indx_sii = &sbi->security.index_sii;
1855 
1856 	ref.low = cpu_to_le32(MFT_REC_SECURE);
1857 	ref.high = 0;
1858 	ref.seq = cpu_to_le16(MFT_REC_SECURE);
1859 
1860 	inode = ntfs_iget5(sb, &ref, &NAME_SECURE);
1861 	if (IS_ERR(inode)) {
1862 		err = PTR_ERR(inode);
1863 		ntfs_err(sb, "Failed to load $Secure.");
1864 		inode = NULL;
1865 		goto out;
1866 	}
1867 
1868 	ni = ntfs_i(inode);
1869 
1870 	le = NULL;
1871 
1872 	attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SDH_NAME,
1873 			    ARRAY_SIZE(SDH_NAME), NULL, NULL);
1874 	if (!attr) {
1875 		err = -EINVAL;
1876 		goto out;
1877 	}
1878 
1879 	root_sdh = resident_data(attr);
1880 	if (root_sdh->type != ATTR_ZERO ||
1881 	    root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH) {
1882 		err = -EINVAL;
1883 		goto out;
1884 	}
1885 
1886 	err = indx_init(indx_sdh, sbi, attr, INDEX_MUTEX_SDH);
1887 	if (err)
1888 		goto out;
1889 
1890 	attr = ni_find_attr(ni, attr, &le, ATTR_ROOT, SII_NAME,
1891 			    ARRAY_SIZE(SII_NAME), NULL, NULL);
1892 	if (!attr) {
1893 		err = -EINVAL;
1894 		goto out;
1895 	}
1896 
1897 	root_sii = resident_data(attr);
1898 	if (root_sii->type != ATTR_ZERO ||
1899 	    root_sii->rule != NTFS_COLLATION_TYPE_UINT) {
1900 		err = -EINVAL;
1901 		goto out;
1902 	}
1903 
1904 	err = indx_init(indx_sii, sbi, attr, INDEX_MUTEX_SII);
1905 	if (err)
1906 		goto out;
1907 
1908 	fnd_sii = fnd_get();
1909 	if (!fnd_sii) {
1910 		err = -ENOMEM;
1911 		goto out;
1912 	}
1913 
1914 	sds_size = inode->i_size;
1915 
1916 	/* Find the last valid Id. */
1917 	sbi->security.next_id = SECURITY_ID_FIRST;
1918 	/* Always write new security at the end of bucket. */
1919 	sbi->security.next_off =
1920 		ALIGN(sds_size - SecurityDescriptorsBlockSize, 16);
1921 
1922 	off = 0;
1923 	ne = NULL;
1924 
1925 	for (;;) {
1926 		u32 next_id;
1927 
1928 		err = indx_find_raw(indx_sii, ni, root_sii, &ne, &off, fnd_sii);
1929 		if (err || !ne)
1930 			break;
1931 
1932 		sii_e = (struct NTFS_DE_SII *)ne;
1933 		if (le16_to_cpu(ne->view.data_size) < SIZEOF_SECURITY_HDR)
1934 			continue;
1935 
1936 		next_id = le32_to_cpu(sii_e->sec_id) + 1;
1937 		if (next_id >= sbi->security.next_id)
1938 			sbi->security.next_id = next_id;
1939 	}
1940 
1941 	sbi->security.ni = ni;
1942 	inode = NULL;
1943 out:
1944 	iput(inode);
1945 	fnd_put(fnd_sii);
1946 
1947 	return err;
1948 }
1949 
1950 /*
1951  * ntfs_get_security_by_id - Read security descriptor by id.
1952  */
1953 int ntfs_get_security_by_id(struct ntfs_sb_info *sbi, __le32 security_id,
1954 			    struct SECURITY_DESCRIPTOR_RELATIVE **sd,
1955 			    size_t *size)
1956 {
1957 	int err;
1958 	int diff;
1959 	struct ntfs_inode *ni = sbi->security.ni;
1960 	struct ntfs_index *indx = &sbi->security.index_sii;
1961 	void *p = NULL;
1962 	struct NTFS_DE_SII *sii_e;
1963 	struct ntfs_fnd *fnd_sii;
1964 	struct SECURITY_HDR d_security;
1965 	const struct INDEX_ROOT *root_sii;
1966 	u32 t32;
1967 
1968 	*sd = NULL;
1969 
1970 	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_SECURITY);
1971 
1972 	fnd_sii = fnd_get();
1973 	if (!fnd_sii) {
1974 		err = -ENOMEM;
1975 		goto out;
1976 	}
1977 
1978 	root_sii = indx_get_root(indx, ni, NULL, NULL);
1979 	if (!root_sii) {
1980 		err = -EINVAL;
1981 		goto out;
1982 	}
1983 
1984 	/* Try to find this SECURITY descriptor in SII indexes. */
1985 	err = indx_find(indx, ni, root_sii, &security_id, sizeof(security_id),
1986 			NULL, &diff, (struct NTFS_DE **)&sii_e, fnd_sii);
1987 	if (err)
1988 		goto out;
1989 
1990 	if (diff)
1991 		goto out;
1992 
1993 	t32 = le32_to_cpu(sii_e->sec_hdr.size);
1994 	if (t32 < SIZEOF_SECURITY_HDR) {
1995 		err = -EINVAL;
1996 		goto out;
1997 	}
1998 
1999 	if (t32 > SIZEOF_SECURITY_HDR + 0x10000) {
2000 		/* Looks like too big security. 0x10000 - is arbitrary big number. */
2001 		err = -EFBIG;
2002 		goto out;
2003 	}
2004 
2005 	*size = t32 - SIZEOF_SECURITY_HDR;
2006 
2007 	p = kmalloc(*size, GFP_NOFS);
2008 	if (!p) {
2009 		err = -ENOMEM;
2010 		goto out;
2011 	}
2012 
2013 	err = ntfs_read_run_nb(sbi, &ni->file.run,
2014 			       le64_to_cpu(sii_e->sec_hdr.off), &d_security,
2015 			       sizeof(d_security), NULL);
2016 	if (err)
2017 		goto out;
2018 
2019 	if (memcmp(&d_security, &sii_e->sec_hdr, SIZEOF_SECURITY_HDR)) {
2020 		err = -EINVAL;
2021 		goto out;
2022 	}
2023 
2024 	err = ntfs_read_run_nb(sbi, &ni->file.run,
2025 			       le64_to_cpu(sii_e->sec_hdr.off) +
2026 				       SIZEOF_SECURITY_HDR,
2027 			       p, *size, NULL);
2028 	if (err)
2029 		goto out;
2030 
2031 	*sd = p;
2032 	p = NULL;
2033 
2034 out:
2035 	kfree(p);
2036 	fnd_put(fnd_sii);
2037 	ni_unlock(ni);
2038 
2039 	return err;
2040 }
2041 
2042 /*
2043  * ntfs_insert_security - Insert security descriptor into $Secure::SDS.
2044  *
2045  * SECURITY Descriptor Stream data is organized into chunks of 256K bytes
2046  * and it contains a mirror copy of each security descriptor.  When writing
2047  * to a security descriptor at location X, another copy will be written at
2048  * location (X+256K).
2049  * When writing a security descriptor that will cross the 256K boundary,
2050  * the pointer will be advanced by 256K to skip
2051  * over the mirror portion.
2052  */
2053 int ntfs_insert_security(struct ntfs_sb_info *sbi,
2054 			 const struct SECURITY_DESCRIPTOR_RELATIVE *sd,
2055 			 u32 size_sd, __le32 *security_id, bool *inserted)
2056 {
2057 	int err, diff;
2058 	struct ntfs_inode *ni = sbi->security.ni;
2059 	struct ntfs_index *indx_sdh = &sbi->security.index_sdh;
2060 	struct ntfs_index *indx_sii = &sbi->security.index_sii;
2061 	struct NTFS_DE_SDH *e;
2062 	struct NTFS_DE_SDH sdh_e;
2063 	struct NTFS_DE_SII sii_e;
2064 	struct SECURITY_HDR *d_security;
2065 	u32 new_sec_size = size_sd + SIZEOF_SECURITY_HDR;
2066 	u32 aligned_sec_size = ALIGN(new_sec_size, 16);
2067 	struct SECURITY_KEY hash_key;
2068 	struct ntfs_fnd *fnd_sdh = NULL;
2069 	const struct INDEX_ROOT *root_sdh;
2070 	const struct INDEX_ROOT *root_sii;
2071 	u64 mirr_off, new_sds_size;
2072 	u32 next, left;
2073 
2074 	static_assert((1 << Log2OfSecurityDescriptorsBlockSize) ==
2075 		      SecurityDescriptorsBlockSize);
2076 
2077 	hash_key.hash = security_hash(sd, size_sd);
2078 	hash_key.sec_id = SECURITY_ID_INVALID;
2079 
2080 	if (inserted)
2081 		*inserted = false;
2082 	*security_id = SECURITY_ID_INVALID;
2083 
2084 	/* Allocate a temporal buffer. */
2085 	d_security = kzalloc(aligned_sec_size, GFP_NOFS);
2086 	if (!d_security)
2087 		return -ENOMEM;
2088 
2089 	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_SECURITY);
2090 
2091 	fnd_sdh = fnd_get();
2092 	if (!fnd_sdh) {
2093 		err = -ENOMEM;
2094 		goto out;
2095 	}
2096 
2097 	root_sdh = indx_get_root(indx_sdh, ni, NULL, NULL);
2098 	if (!root_sdh) {
2099 		err = -EINVAL;
2100 		goto out;
2101 	}
2102 
2103 	root_sii = indx_get_root(indx_sii, ni, NULL, NULL);
2104 	if (!root_sii) {
2105 		err = -EINVAL;
2106 		goto out;
2107 	}
2108 
2109 	/*
2110 	 * Check if such security already exists.
2111 	 * Use "SDH" and hash -> to get the offset in "SDS".
2112 	 */
2113 	err = indx_find(indx_sdh, ni, root_sdh, &hash_key, sizeof(hash_key),
2114 			&d_security->key.sec_id, &diff, (struct NTFS_DE **)&e,
2115 			fnd_sdh);
2116 	if (err)
2117 		goto out;
2118 
2119 	while (e) {
2120 		if (le32_to_cpu(e->sec_hdr.size) == new_sec_size) {
2121 			err = ntfs_read_run_nb(sbi, &ni->file.run,
2122 					       le64_to_cpu(e->sec_hdr.off),
2123 					       d_security, new_sec_size, NULL);
2124 			if (err)
2125 				goto out;
2126 
2127 			if (le32_to_cpu(d_security->size) == new_sec_size &&
2128 			    d_security->key.hash == hash_key.hash &&
2129 			    !memcmp(d_security + 1, sd, size_sd)) {
2130 				*security_id = d_security->key.sec_id;
2131 				/* Such security already exists. */
2132 				err = 0;
2133 				goto out;
2134 			}
2135 		}
2136 
2137 		err = indx_find_sort(indx_sdh, ni, root_sdh,
2138 				     (struct NTFS_DE **)&e, fnd_sdh);
2139 		if (err)
2140 			goto out;
2141 
2142 		if (!e || e->key.hash != hash_key.hash)
2143 			break;
2144 	}
2145 
2146 	/* Zero unused space. */
2147 	next = sbi->security.next_off & (SecurityDescriptorsBlockSize - 1);
2148 	left = SecurityDescriptorsBlockSize - next;
2149 
2150 	/* Zero gap until SecurityDescriptorsBlockSize. */
2151 	if (left < new_sec_size) {
2152 		/* Zero "left" bytes from sbi->security.next_off. */
2153 		sbi->security.next_off += SecurityDescriptorsBlockSize + left;
2154 	}
2155 
2156 	/* Zero tail of previous security. */
2157 	//used = ni->vfs_inode.i_size & (SecurityDescriptorsBlockSize - 1);
2158 
2159 	/*
2160 	 * Example:
2161 	 * 0x40438 == ni->vfs_inode.i_size
2162 	 * 0x00440 == sbi->security.next_off
2163 	 * need to zero [0x438-0x440)
2164 	 * if (next > used) {
2165 	 *  u32 tozero = next - used;
2166 	 *  zero "tozero" bytes from sbi->security.next_off - tozero
2167 	 */
2168 
2169 	/* Format new security descriptor. */
2170 	d_security->key.hash = hash_key.hash;
2171 	d_security->key.sec_id = cpu_to_le32(sbi->security.next_id);
2172 	d_security->off = cpu_to_le64(sbi->security.next_off);
2173 	d_security->size = cpu_to_le32(new_sec_size);
2174 	memcpy(d_security + 1, sd, size_sd);
2175 
2176 	/* Write main SDS bucket. */
2177 	err = ntfs_sb_write_run(sbi, &ni->file.run, sbi->security.next_off,
2178 				d_security, aligned_sec_size);
2179 
2180 	if (err)
2181 		goto out;
2182 
2183 	mirr_off = sbi->security.next_off + SecurityDescriptorsBlockSize;
2184 	new_sds_size = mirr_off + aligned_sec_size;
2185 
2186 	if (new_sds_size > ni->vfs_inode.i_size) {
2187 		err = attr_set_size(ni, ATTR_DATA, SDS_NAME,
2188 				    ARRAY_SIZE(SDS_NAME), &ni->file.run,
2189 				    new_sds_size, &new_sds_size, false, NULL);
2190 		if (err)
2191 			goto out;
2192 	}
2193 
2194 	/* Write copy SDS bucket. */
2195 	err = ntfs_sb_write_run(sbi, &ni->file.run, mirr_off, d_security,
2196 				aligned_sec_size);
2197 	if (err)
2198 		goto out;
2199 
2200 	/* Fill SII entry. */
2201 	sii_e.de.view.data_off =
2202 		cpu_to_le16(offsetof(struct NTFS_DE_SII, sec_hdr));
2203 	sii_e.de.view.data_size = cpu_to_le16(SIZEOF_SECURITY_HDR);
2204 	sii_e.de.view.res = 0;
2205 	sii_e.de.size = cpu_to_le16(SIZEOF_SII_DIRENTRY);
2206 	sii_e.de.key_size = cpu_to_le16(sizeof(d_security->key.sec_id));
2207 	sii_e.de.flags = 0;
2208 	sii_e.de.res = 0;
2209 	sii_e.sec_id = d_security->key.sec_id;
2210 	memcpy(&sii_e.sec_hdr, d_security, SIZEOF_SECURITY_HDR);
2211 
2212 	err = indx_insert_entry(indx_sii, ni, &sii_e.de, NULL, NULL, 0);
2213 	if (err)
2214 		goto out;
2215 
2216 	/* Fill SDH entry. */
2217 	sdh_e.de.view.data_off =
2218 		cpu_to_le16(offsetof(struct NTFS_DE_SDH, sec_hdr));
2219 	sdh_e.de.view.data_size = cpu_to_le16(SIZEOF_SECURITY_HDR);
2220 	sdh_e.de.view.res = 0;
2221 	sdh_e.de.size = cpu_to_le16(SIZEOF_SDH_DIRENTRY);
2222 	sdh_e.de.key_size = cpu_to_le16(sizeof(sdh_e.key));
2223 	sdh_e.de.flags = 0;
2224 	sdh_e.de.res = 0;
2225 	sdh_e.key.hash = d_security->key.hash;
2226 	sdh_e.key.sec_id = d_security->key.sec_id;
2227 	memcpy(&sdh_e.sec_hdr, d_security, SIZEOF_SECURITY_HDR);
2228 	sdh_e.magic[0] = cpu_to_le16('I');
2229 	sdh_e.magic[1] = cpu_to_le16('I');
2230 
2231 	fnd_clear(fnd_sdh);
2232 	err = indx_insert_entry(indx_sdh, ni, &sdh_e.de, (void *)(size_t)1,
2233 				fnd_sdh, 0);
2234 	if (err)
2235 		goto out;
2236 
2237 	*security_id = d_security->key.sec_id;
2238 	if (inserted)
2239 		*inserted = true;
2240 
2241 	/* Update Id and offset for next descriptor. */
2242 	sbi->security.next_id += 1;
2243 	sbi->security.next_off += aligned_sec_size;
2244 
2245 out:
2246 	fnd_put(fnd_sdh);
2247 	mark_inode_dirty(&ni->vfs_inode);
2248 	ni_unlock(ni);
2249 	kfree(d_security);
2250 
2251 	return err;
2252 }
2253 
2254 /*
2255  * ntfs_reparse_init - Load and parse $Extend/$Reparse.
2256  */
2257 int ntfs_reparse_init(struct ntfs_sb_info *sbi)
2258 {
2259 	int err;
2260 	struct ntfs_inode *ni = sbi->reparse.ni;
2261 	struct ntfs_index *indx = &sbi->reparse.index_r;
2262 	struct ATTRIB *attr;
2263 	struct ATTR_LIST_ENTRY *le;
2264 	const struct INDEX_ROOT *root_r;
2265 
2266 	if (!ni)
2267 		return 0;
2268 
2269 	le = NULL;
2270 	attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SR_NAME,
2271 			    ARRAY_SIZE(SR_NAME), NULL, NULL);
2272 	if (!attr) {
2273 		err = -EINVAL;
2274 		goto out;
2275 	}
2276 
2277 	root_r = resident_data(attr);
2278 	if (root_r->type != ATTR_ZERO ||
2279 	    root_r->rule != NTFS_COLLATION_TYPE_UINTS) {
2280 		err = -EINVAL;
2281 		goto out;
2282 	}
2283 
2284 	err = indx_init(indx, sbi, attr, INDEX_MUTEX_SR);
2285 	if (err)
2286 		goto out;
2287 
2288 out:
2289 	return err;
2290 }
2291 
2292 /*
2293  * ntfs_objid_init - Load and parse $Extend/$ObjId.
2294  */
2295 int ntfs_objid_init(struct ntfs_sb_info *sbi)
2296 {
2297 	int err;
2298 	struct ntfs_inode *ni = sbi->objid.ni;
2299 	struct ntfs_index *indx = &sbi->objid.index_o;
2300 	struct ATTRIB *attr;
2301 	struct ATTR_LIST_ENTRY *le;
2302 	const struct INDEX_ROOT *root;
2303 
2304 	if (!ni)
2305 		return 0;
2306 
2307 	le = NULL;
2308 	attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SO_NAME,
2309 			    ARRAY_SIZE(SO_NAME), NULL, NULL);
2310 	if (!attr) {
2311 		err = -EINVAL;
2312 		goto out;
2313 	}
2314 
2315 	root = resident_data(attr);
2316 	if (root->type != ATTR_ZERO ||
2317 	    root->rule != NTFS_COLLATION_TYPE_UINTS) {
2318 		err = -EINVAL;
2319 		goto out;
2320 	}
2321 
2322 	err = indx_init(indx, sbi, attr, INDEX_MUTEX_SO);
2323 	if (err)
2324 		goto out;
2325 
2326 out:
2327 	return err;
2328 }
2329 
2330 int ntfs_objid_remove(struct ntfs_sb_info *sbi, struct GUID *guid)
2331 {
2332 	int err;
2333 	struct ntfs_inode *ni = sbi->objid.ni;
2334 	struct ntfs_index *indx = &sbi->objid.index_o;
2335 
2336 	if (!ni)
2337 		return -EINVAL;
2338 
2339 	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_OBJID);
2340 
2341 	err = indx_delete_entry(indx, ni, guid, sizeof(*guid), NULL);
2342 
2343 	mark_inode_dirty(&ni->vfs_inode);
2344 	ni_unlock(ni);
2345 
2346 	return err;
2347 }
2348 
2349 int ntfs_insert_reparse(struct ntfs_sb_info *sbi, __le32 rtag,
2350 			const struct MFT_REF *ref)
2351 {
2352 	int err;
2353 	struct ntfs_inode *ni = sbi->reparse.ni;
2354 	struct ntfs_index *indx = &sbi->reparse.index_r;
2355 	struct NTFS_DE_R re;
2356 
2357 	if (!ni)
2358 		return -EINVAL;
2359 
2360 	memset(&re, 0, sizeof(re));
2361 
2362 	re.de.view.data_off = cpu_to_le16(offsetof(struct NTFS_DE_R, zero));
2363 	re.de.size = cpu_to_le16(sizeof(struct NTFS_DE_R));
2364 	re.de.key_size = cpu_to_le16(sizeof(re.key));
2365 
2366 	re.key.ReparseTag = rtag;
2367 	memcpy(&re.key.ref, ref, sizeof(*ref));
2368 
2369 	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_REPARSE);
2370 
2371 	err = indx_insert_entry(indx, ni, &re.de, NULL, NULL, 0);
2372 
2373 	mark_inode_dirty(&ni->vfs_inode);
2374 	ni_unlock(ni);
2375 
2376 	return err;
2377 }
2378 
2379 int ntfs_remove_reparse(struct ntfs_sb_info *sbi, __le32 rtag,
2380 			const struct MFT_REF *ref)
2381 {
2382 	int err, diff;
2383 	struct ntfs_inode *ni = sbi->reparse.ni;
2384 	struct ntfs_index *indx = &sbi->reparse.index_r;
2385 	struct ntfs_fnd *fnd = NULL;
2386 	struct REPARSE_KEY rkey;
2387 	struct NTFS_DE_R *re;
2388 	struct INDEX_ROOT *root_r;
2389 
2390 	if (!ni)
2391 		return -EINVAL;
2392 
2393 	rkey.ReparseTag = rtag;
2394 	rkey.ref = *ref;
2395 
2396 	mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_REPARSE);
2397 
2398 	if (rtag) {
2399 		err = indx_delete_entry(indx, ni, &rkey, sizeof(rkey), NULL);
2400 		goto out1;
2401 	}
2402 
2403 	fnd = fnd_get();
2404 	if (!fnd) {
2405 		err = -ENOMEM;
2406 		goto out1;
2407 	}
2408 
2409 	root_r = indx_get_root(indx, ni, NULL, NULL);
2410 	if (!root_r) {
2411 		err = -EINVAL;
2412 		goto out;
2413 	}
2414 
2415 	/* 1 - forces to ignore rkey.ReparseTag when comparing keys. */
2416 	err = indx_find(indx, ni, root_r, &rkey, sizeof(rkey), (void *)1, &diff,
2417 			(struct NTFS_DE **)&re, fnd);
2418 	if (err)
2419 		goto out;
2420 
2421 	if (memcmp(&re->key.ref, ref, sizeof(*ref))) {
2422 		/* Impossible. Looks like volume corrupt? */
2423 		goto out;
2424 	}
2425 
2426 	memcpy(&rkey, &re->key, sizeof(rkey));
2427 
2428 	fnd_put(fnd);
2429 	fnd = NULL;
2430 
2431 	err = indx_delete_entry(indx, ni, &rkey, sizeof(rkey), NULL);
2432 	if (err)
2433 		goto out;
2434 
2435 out:
2436 	fnd_put(fnd);
2437 
2438 out1:
2439 	mark_inode_dirty(&ni->vfs_inode);
2440 	ni_unlock(ni);
2441 
2442 	return err;
2443 }
2444 
2445 static inline void ntfs_unmap_and_discard(struct ntfs_sb_info *sbi, CLST lcn,
2446 					  CLST len)
2447 {
2448 	ntfs_unmap_meta(sbi->sb, lcn, len);
2449 	ntfs_discard(sbi, lcn, len);
2450 }
2451 
2452 void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim)
2453 {
2454 	CLST end, i;
2455 	struct wnd_bitmap *wnd = &sbi->used.bitmap;
2456 
2457 	down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
2458 	if (!wnd_is_used(wnd, lcn, len)) {
2459 		ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
2460 
2461 		end = lcn + len;
2462 		len = 0;
2463 		for (i = lcn; i < end; i++) {
2464 			if (wnd_is_used(wnd, i, 1)) {
2465 				if (!len)
2466 					lcn = i;
2467 				len += 1;
2468 				continue;
2469 			}
2470 
2471 			if (!len)
2472 				continue;
2473 
2474 			if (trim)
2475 				ntfs_unmap_and_discard(sbi, lcn, len);
2476 
2477 			wnd_set_free(wnd, lcn, len);
2478 			len = 0;
2479 		}
2480 
2481 		if (!len)
2482 			goto out;
2483 	}
2484 
2485 	if (trim)
2486 		ntfs_unmap_and_discard(sbi, lcn, len);
2487 	wnd_set_free(wnd, lcn, len);
2488 
2489 out:
2490 	up_write(&wnd->rw_lock);
2491 }
2492 
2493 /*
2494  * run_deallocate - Deallocate clusters.
2495  */
2496 int run_deallocate(struct ntfs_sb_info *sbi, struct runs_tree *run, bool trim)
2497 {
2498 	CLST lcn, len;
2499 	size_t idx = 0;
2500 
2501 	while (run_get_entry(run, idx++, NULL, &lcn, &len)) {
2502 		if (lcn == SPARSE_LCN)
2503 			continue;
2504 
2505 		mark_as_free_ex(sbi, lcn, len, trim);
2506 	}
2507 
2508 	return 0;
2509 }
2510