xref: /openbmc/linux/fs/ubifs/dir.c (revision b6dcefde)
1 /* * This file is part of UBIFS.
2  *
3  * Copyright (C) 2006-2008 Nokia Corporation.
4  * Copyright (C) 2006, 2007 University of Szeged, Hungary
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published by
8  * the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; if not, write to the Free Software Foundation, Inc., 51
17  * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18  *
19  * Authors: Artem Bityutskiy (Битюцкий Артём)
20  *          Adrian Hunter
21  *          Zoltan Sogor
22  */
23 
24 /*
25  * This file implements directory operations.
26  *
27  * All FS operations in this file allocate budget before writing anything to the
28  * media. If they fail to allocate it, the error is returned. The only
29  * exceptions are 'ubifs_unlink()' and 'ubifs_rmdir()' which keep working even
30  * if they unable to allocate the budget, because deletion %-ENOSPC failure is
31  * not what users are usually ready to get. UBIFS budgeting subsystem has some
32  * space reserved for these purposes.
33  *
34  * All operations in this file write all inodes which they change straight
35  * away, instead of marking them dirty. For example, 'ubifs_link()' changes
36  * @i_size of the parent inode and writes the parent inode together with the
37  * target inode. This was done to simplify file-system recovery which would
38  * otherwise be very difficult to do. The only exception is rename which marks
39  * the re-named inode dirty (because its @i_ctime is updated) but does not
40  * write it, but just marks it as dirty.
41  */
42 
43 #include "ubifs.h"
44 
45 /**
46  * inherit_flags - inherit flags of the parent inode.
47  * @dir: parent inode
48  * @mode: new inode mode flags
49  *
50  * This is a helper function for 'ubifs_new_inode()' which inherits flag of the
51  * parent directory inode @dir. UBIFS inodes inherit the following flags:
52  * o %UBIFS_COMPR_FL, which is useful to switch compression on/of on
53  *   sub-directory basis;
54  * o %UBIFS_SYNC_FL - useful for the same reasons;
55  * o %UBIFS_DIRSYNC_FL - similar, but relevant only to directories.
56  *
57  * This function returns the inherited flags.
58  */
59 static int inherit_flags(const struct inode *dir, int mode)
60 {
61 	int flags;
62 	const struct ubifs_inode *ui = ubifs_inode(dir);
63 
64 	if (!S_ISDIR(dir->i_mode))
65 		/*
66 		 * The parent is not a directory, which means that an extended
67 		 * attribute inode is being created. No flags.
68 		 */
69 		return 0;
70 
71 	flags = ui->flags & (UBIFS_COMPR_FL | UBIFS_SYNC_FL | UBIFS_DIRSYNC_FL);
72 	if (!S_ISDIR(mode))
73 		/* The "DIRSYNC" flag only applies to directories */
74 		flags &= ~UBIFS_DIRSYNC_FL;
75 	return flags;
76 }
77 
78 /**
79  * ubifs_new_inode - allocate new UBIFS inode object.
80  * @c: UBIFS file-system description object
81  * @dir: parent directory inode
82  * @mode: inode mode flags
83  *
84  * This function finds an unused inode number, allocates new inode and
85  * initializes it. Returns new inode in case of success and an error code in
86  * case of failure.
87  */
88 struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
89 			      int mode)
90 {
91 	struct inode *inode;
92 	struct ubifs_inode *ui;
93 
94 	inode = new_inode(c->vfs_sb);
95 	ui = ubifs_inode(inode);
96 	if (!inode)
97 		return ERR_PTR(-ENOMEM);
98 
99 	/*
100 	 * Set 'S_NOCMTIME' to prevent VFS form updating [mc]time of inodes and
101 	 * marking them dirty in file write path (see 'file_update_time()').
102 	 * UBIFS has to fully control "clean <-> dirty" transitions of inodes
103 	 * to make budgeting work.
104 	 */
105 	inode->i_flags |= (S_NOCMTIME);
106 
107 	inode->i_uid = current_fsuid();
108 	if (dir->i_mode & S_ISGID) {
109 		inode->i_gid = dir->i_gid;
110 		if (S_ISDIR(mode))
111 			mode |= S_ISGID;
112 	} else
113 		inode->i_gid = current_fsgid();
114 	inode->i_mode = mode;
115 	inode->i_mtime = inode->i_atime = inode->i_ctime =
116 			 ubifs_current_time(inode);
117 	inode->i_mapping->nrpages = 0;
118 	/* Disable readahead */
119 	inode->i_mapping->backing_dev_info = &c->bdi;
120 
121 	switch (mode & S_IFMT) {
122 	case S_IFREG:
123 		inode->i_mapping->a_ops = &ubifs_file_address_operations;
124 		inode->i_op = &ubifs_file_inode_operations;
125 		inode->i_fop = &ubifs_file_operations;
126 		break;
127 	case S_IFDIR:
128 		inode->i_op  = &ubifs_dir_inode_operations;
129 		inode->i_fop = &ubifs_dir_operations;
130 		inode->i_size = ui->ui_size = UBIFS_INO_NODE_SZ;
131 		break;
132 	case S_IFLNK:
133 		inode->i_op = &ubifs_symlink_inode_operations;
134 		break;
135 	case S_IFSOCK:
136 	case S_IFIFO:
137 	case S_IFBLK:
138 	case S_IFCHR:
139 		inode->i_op  = &ubifs_file_inode_operations;
140 		break;
141 	default:
142 		BUG();
143 	}
144 
145 	ui->flags = inherit_flags(dir, mode);
146 	ubifs_set_inode_flags(inode);
147 	if (S_ISREG(mode))
148 		ui->compr_type = c->default_compr;
149 	else
150 		ui->compr_type = UBIFS_COMPR_NONE;
151 	ui->synced_i_size = 0;
152 
153 	spin_lock(&c->cnt_lock);
154 	/* Inode number overflow is currently not supported */
155 	if (c->highest_inum >= INUM_WARN_WATERMARK) {
156 		if (c->highest_inum >= INUM_WATERMARK) {
157 			spin_unlock(&c->cnt_lock);
158 			ubifs_err("out of inode numbers");
159 			make_bad_inode(inode);
160 			iput(inode);
161 			return ERR_PTR(-EINVAL);
162 		}
163 		ubifs_warn("running out of inode numbers (current %lu, max %d)",
164 			   (unsigned long)c->highest_inum, INUM_WATERMARK);
165 	}
166 
167 	inode->i_ino = ++c->highest_inum;
168 	/*
169 	 * The creation sequence number remains with this inode for its
170 	 * lifetime. All nodes for this inode have a greater sequence number,
171 	 * and so it is possible to distinguish obsolete nodes belonging to a
172 	 * previous incarnation of the same inode number - for example, for the
173 	 * purpose of rebuilding the index.
174 	 */
175 	ui->creat_sqnum = ++c->max_sqnum;
176 	spin_unlock(&c->cnt_lock);
177 	return inode;
178 }
179 
180 #ifdef CONFIG_UBIFS_FS_DEBUG
181 
182 static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm)
183 {
184 	if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
185 		return 0;
186 	if (le16_to_cpu(dent->nlen) != nm->len)
187 		return -EINVAL;
188 	if (memcmp(dent->name, nm->name, nm->len))
189 		return -EINVAL;
190 	return 0;
191 }
192 
193 #else
194 
195 #define dbg_check_name(dent, nm) 0
196 
197 #endif
198 
199 static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
200 				   struct nameidata *nd)
201 {
202 	int err;
203 	union ubifs_key key;
204 	struct inode *inode = NULL;
205 	struct ubifs_dent_node *dent;
206 	struct ubifs_info *c = dir->i_sb->s_fs_info;
207 
208 	dbg_gen("'%.*s' in dir ino %lu",
209 		dentry->d_name.len, dentry->d_name.name, dir->i_ino);
210 
211 	if (dentry->d_name.len > UBIFS_MAX_NLEN)
212 		return ERR_PTR(-ENAMETOOLONG);
213 
214 	dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
215 	if (!dent)
216 		return ERR_PTR(-ENOMEM);
217 
218 	dent_key_init(c, &key, dir->i_ino, &dentry->d_name);
219 
220 	err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name);
221 	if (err) {
222 		if (err == -ENOENT) {
223 			dbg_gen("not found");
224 			goto done;
225 		}
226 		goto out;
227 	}
228 
229 	if (dbg_check_name(dent, &dentry->d_name)) {
230 		err = -EINVAL;
231 		goto out;
232 	}
233 
234 	inode = ubifs_iget(dir->i_sb, le64_to_cpu(dent->inum));
235 	if (IS_ERR(inode)) {
236 		/*
237 		 * This should not happen. Probably the file-system needs
238 		 * checking.
239 		 */
240 		err = PTR_ERR(inode);
241 		ubifs_err("dead directory entry '%.*s', error %d",
242 			  dentry->d_name.len, dentry->d_name.name, err);
243 		ubifs_ro_mode(c, err);
244 		goto out;
245 	}
246 
247 done:
248 	kfree(dent);
249 	/*
250 	 * Note, d_splice_alias() would be required instead if we supported
251 	 * NFS.
252 	 */
253 	d_add(dentry, inode);
254 	return NULL;
255 
256 out:
257 	kfree(dent);
258 	return ERR_PTR(err);
259 }
260 
261 static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode,
262 			struct nameidata *nd)
263 {
264 	struct inode *inode;
265 	struct ubifs_info *c = dir->i_sb->s_fs_info;
266 	int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
267 	struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
268 					.dirtied_ino = 1 };
269 	struct ubifs_inode *dir_ui = ubifs_inode(dir);
270 
271 	/*
272 	 * Budget request settings: new inode, new direntry, changing the
273 	 * parent directory inode.
274 	 */
275 
276 	dbg_gen("dent '%.*s', mode %#x in dir ino %lu",
277 		dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
278 
279 	err = ubifs_budget_space(c, &req);
280 	if (err)
281 		return err;
282 
283 	inode = ubifs_new_inode(c, dir, mode);
284 	if (IS_ERR(inode)) {
285 		err = PTR_ERR(inode);
286 		goto out_budg;
287 	}
288 
289 	mutex_lock(&dir_ui->ui_mutex);
290 	dir->i_size += sz_change;
291 	dir_ui->ui_size = dir->i_size;
292 	dir->i_mtime = dir->i_ctime = inode->i_ctime;
293 	err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
294 	if (err)
295 		goto out_cancel;
296 	mutex_unlock(&dir_ui->ui_mutex);
297 
298 	ubifs_release_budget(c, &req);
299 	insert_inode_hash(inode);
300 	d_instantiate(dentry, inode);
301 	return 0;
302 
303 out_cancel:
304 	dir->i_size -= sz_change;
305 	dir_ui->ui_size = dir->i_size;
306 	mutex_unlock(&dir_ui->ui_mutex);
307 	make_bad_inode(inode);
308 	iput(inode);
309 out_budg:
310 	ubifs_release_budget(c, &req);
311 	ubifs_err("cannot create regular file, error %d", err);
312 	return err;
313 }
314 
315 /**
316  * vfs_dent_type - get VFS directory entry type.
317  * @type: UBIFS directory entry type
318  *
319  * This function converts UBIFS directory entry type into VFS directory entry
320  * type.
321  */
322 static unsigned int vfs_dent_type(uint8_t type)
323 {
324 	switch (type) {
325 	case UBIFS_ITYPE_REG:
326 		return DT_REG;
327 	case UBIFS_ITYPE_DIR:
328 		return DT_DIR;
329 	case UBIFS_ITYPE_LNK:
330 		return DT_LNK;
331 	case UBIFS_ITYPE_BLK:
332 		return DT_BLK;
333 	case UBIFS_ITYPE_CHR:
334 		return DT_CHR;
335 	case UBIFS_ITYPE_FIFO:
336 		return DT_FIFO;
337 	case UBIFS_ITYPE_SOCK:
338 		return DT_SOCK;
339 	default:
340 		BUG();
341 	}
342 	return 0;
343 }
344 
345 /*
346  * The classical Unix view for directory is that it is a linear array of
347  * (name, inode number) entries. Linux/VFS assumes this model as well.
348  * Particularly, 'readdir()' call wants us to return a directory entry offset
349  * which later may be used to continue 'readdir()'ing the directory or to
350  * 'seek()' to that specific direntry. Obviously UBIFS does not really fit this
351  * model because directory entries are identified by keys, which may collide.
352  *
353  * UBIFS uses directory entry hash value for directory offsets, so
354  * 'seekdir()'/'telldir()' may not always work because of possible key
355  * collisions. But UBIFS guarantees that consecutive 'readdir()' calls work
356  * properly by means of saving full directory entry name in the private field
357  * of the file description object.
358  *
359  * This means that UBIFS cannot support NFS which requires full
360  * 'seekdir()'/'telldir()' support.
361  */
362 static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
363 {
364 	int err, over = 0;
365 	struct qstr nm;
366 	union ubifs_key key;
367 	struct ubifs_dent_node *dent;
368 	struct inode *dir = file->f_path.dentry->d_inode;
369 	struct ubifs_info *c = dir->i_sb->s_fs_info;
370 
371 	dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos);
372 
373 	if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2)
374 		/*
375 		 * The directory was seek'ed to a senseless position or there
376 		 * are no more entries.
377 		 */
378 		return 0;
379 
380 	/* File positions 0 and 1 correspond to "." and ".." */
381 	if (file->f_pos == 0) {
382 		ubifs_assert(!file->private_data);
383 		over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR);
384 		if (over)
385 			return 0;
386 		file->f_pos = 1;
387 	}
388 
389 	if (file->f_pos == 1) {
390 		ubifs_assert(!file->private_data);
391 		over = filldir(dirent, "..", 2, 1,
392 			       parent_ino(file->f_path.dentry), DT_DIR);
393 		if (over)
394 			return 0;
395 
396 		/* Find the first entry in TNC and save it */
397 		lowest_dent_key(c, &key, dir->i_ino);
398 		nm.name = NULL;
399 		dent = ubifs_tnc_next_ent(c, &key, &nm);
400 		if (IS_ERR(dent)) {
401 			err = PTR_ERR(dent);
402 			goto out;
403 		}
404 
405 		file->f_pos = key_hash_flash(c, &dent->key);
406 		file->private_data = dent;
407 	}
408 
409 	dent = file->private_data;
410 	if (!dent) {
411 		/*
412 		 * The directory was seek'ed to and is now readdir'ed.
413 		 * Find the entry corresponding to @file->f_pos or the
414 		 * closest one.
415 		 */
416 		dent_key_init_hash(c, &key, dir->i_ino, file->f_pos);
417 		nm.name = NULL;
418 		dent = ubifs_tnc_next_ent(c, &key, &nm);
419 		if (IS_ERR(dent)) {
420 			err = PTR_ERR(dent);
421 			goto out;
422 		}
423 		file->f_pos = key_hash_flash(c, &dent->key);
424 		file->private_data = dent;
425 	}
426 
427 	while (1) {
428 		dbg_gen("feed '%s', ino %llu, new f_pos %#x",
429 			dent->name, (unsigned long long)le64_to_cpu(dent->inum),
430 			key_hash_flash(c, &dent->key));
431 		ubifs_assert(le64_to_cpu(dent->ch.sqnum) >
432 			     ubifs_inode(dir)->creat_sqnum);
433 
434 		nm.len = le16_to_cpu(dent->nlen);
435 		over = filldir(dirent, dent->name, nm.len, file->f_pos,
436 			       le64_to_cpu(dent->inum),
437 			       vfs_dent_type(dent->type));
438 		if (over)
439 			return 0;
440 
441 		/* Switch to the next entry */
442 		key_read(c, &dent->key, &key);
443 		nm.name = dent->name;
444 		dent = ubifs_tnc_next_ent(c, &key, &nm);
445 		if (IS_ERR(dent)) {
446 			err = PTR_ERR(dent);
447 			goto out;
448 		}
449 
450 		kfree(file->private_data);
451 		file->f_pos = key_hash_flash(c, &dent->key);
452 		file->private_data = dent;
453 		cond_resched();
454 	}
455 
456 out:
457 	if (err != -ENOENT) {
458 		ubifs_err("cannot find next direntry, error %d", err);
459 		return err;
460 	}
461 
462 	kfree(file->private_data);
463 	file->private_data = NULL;
464 	file->f_pos = 2;
465 	return 0;
466 }
467 
468 /* If a directory is seeked, we have to free saved readdir() state */
469 static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin)
470 {
471 	kfree(file->private_data);
472 	file->private_data = NULL;
473 	return generic_file_llseek(file, offset, origin);
474 }
475 
476 /* Free saved readdir() state when the directory is closed */
477 static int ubifs_dir_release(struct inode *dir, struct file *file)
478 {
479 	kfree(file->private_data);
480 	file->private_data = NULL;
481 	return 0;
482 }
483 
484 /**
485  * lock_2_inodes - a wrapper for locking two UBIFS inodes.
486  * @inode1: first inode
487  * @inode2: second inode
488  *
489  * We do not implement any tricks to guarantee strict lock ordering, because
490  * VFS has already done it for us on the @i_mutex. So this is just a simple
491  * wrapper function.
492  */
493 static void lock_2_inodes(struct inode *inode1, struct inode *inode2)
494 {
495 	mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
496 	mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
497 }
498 
499 /**
500  * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes.
501  * @inode1: first inode
502  * @inode2: second inode
503  */
504 static void unlock_2_inodes(struct inode *inode1, struct inode *inode2)
505 {
506 	mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
507 	mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
508 }
509 
510 static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
511 		      struct dentry *dentry)
512 {
513 	struct ubifs_info *c = dir->i_sb->s_fs_info;
514 	struct inode *inode = old_dentry->d_inode;
515 	struct ubifs_inode *ui = ubifs_inode(inode);
516 	struct ubifs_inode *dir_ui = ubifs_inode(dir);
517 	int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
518 	struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2,
519 				.dirtied_ino_d = ALIGN(ui->data_len, 8) };
520 
521 	/*
522 	 * Budget request settings: new direntry, changing the target inode,
523 	 * changing the parent inode.
524 	 */
525 
526 	dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu",
527 		dentry->d_name.len, dentry->d_name.name, inode->i_ino,
528 		inode->i_nlink, dir->i_ino);
529 	ubifs_assert(mutex_is_locked(&dir->i_mutex));
530 	ubifs_assert(mutex_is_locked(&inode->i_mutex));
531 
532 	/*
533 	 * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
534 	 * otherwise has the potential to corrupt the orphan inode list.
535 	 *
536 	 * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
537 	 * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
538 	 * lock 'dirA->i_mutex', so this is possible. Both of the functions
539 	 * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
540 	 * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
541 	 * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
542 	 * to the list of orphans. After this, 'vfs_link()' will link
543 	 * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
544 	 * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
545 	 * to the list of orphans.
546 	 */
547 	 if (inode->i_nlink == 0)
548 		 return -ENOENT;
549 
550 	err = dbg_check_synced_i_size(inode);
551 	if (err)
552 		return err;
553 
554 	err = ubifs_budget_space(c, &req);
555 	if (err)
556 		return err;
557 
558 	lock_2_inodes(dir, inode);
559 	inc_nlink(inode);
560 	atomic_inc(&inode->i_count);
561 	inode->i_ctime = ubifs_current_time(inode);
562 	dir->i_size += sz_change;
563 	dir_ui->ui_size = dir->i_size;
564 	dir->i_mtime = dir->i_ctime = inode->i_ctime;
565 	err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
566 	if (err)
567 		goto out_cancel;
568 	unlock_2_inodes(dir, inode);
569 
570 	ubifs_release_budget(c, &req);
571 	d_instantiate(dentry, inode);
572 	return 0;
573 
574 out_cancel:
575 	dir->i_size -= sz_change;
576 	dir_ui->ui_size = dir->i_size;
577 	drop_nlink(inode);
578 	unlock_2_inodes(dir, inode);
579 	ubifs_release_budget(c, &req);
580 	iput(inode);
581 	return err;
582 }
583 
584 static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
585 {
586 	struct ubifs_info *c = dir->i_sb->s_fs_info;
587 	struct inode *inode = dentry->d_inode;
588 	struct ubifs_inode *dir_ui = ubifs_inode(dir);
589 	int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
590 	int err, budgeted = 1;
591 	struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 };
592 
593 	/*
594 	 * Budget request settings: deletion direntry, deletion inode (+1 for
595 	 * @dirtied_ino), changing the parent directory inode. If budgeting
596 	 * fails, go ahead anyway because we have extra space reserved for
597 	 * deletions.
598 	 */
599 
600 	dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu",
601 		dentry->d_name.len, dentry->d_name.name, inode->i_ino,
602 		inode->i_nlink, dir->i_ino);
603 	ubifs_assert(mutex_is_locked(&dir->i_mutex));
604 	ubifs_assert(mutex_is_locked(&inode->i_mutex));
605 	err = dbg_check_synced_i_size(inode);
606 	if (err)
607 		return err;
608 
609 	err = ubifs_budget_space(c, &req);
610 	if (err) {
611 		if (err != -ENOSPC)
612 			return err;
613 		budgeted = 0;
614 	}
615 
616 	lock_2_inodes(dir, inode);
617 	inode->i_ctime = ubifs_current_time(dir);
618 	drop_nlink(inode);
619 	dir->i_size -= sz_change;
620 	dir_ui->ui_size = dir->i_size;
621 	dir->i_mtime = dir->i_ctime = inode->i_ctime;
622 	err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0);
623 	if (err)
624 		goto out_cancel;
625 	unlock_2_inodes(dir, inode);
626 
627 	if (budgeted)
628 		ubifs_release_budget(c, &req);
629 	else {
630 		/* We've deleted something - clean the "no space" flags */
631 		c->nospace = c->nospace_rp = 0;
632 		smp_wmb();
633 	}
634 	return 0;
635 
636 out_cancel:
637 	dir->i_size += sz_change;
638 	dir_ui->ui_size = dir->i_size;
639 	inc_nlink(inode);
640 	unlock_2_inodes(dir, inode);
641 	if (budgeted)
642 		ubifs_release_budget(c, &req);
643 	return err;
644 }
645 
646 /**
647  * check_dir_empty - check if a directory is empty or not.
648  * @c: UBIFS file-system description object
649  * @dir: VFS inode object of the directory to check
650  *
651  * This function checks if directory @dir is empty. Returns zero if the
652  * directory is empty, %-ENOTEMPTY if it is not, and other negative error codes
653  * in case of of errors.
654  */
655 static int check_dir_empty(struct ubifs_info *c, struct inode *dir)
656 {
657 	struct qstr nm = { .name = NULL };
658 	struct ubifs_dent_node *dent;
659 	union ubifs_key key;
660 	int err;
661 
662 	lowest_dent_key(c, &key, dir->i_ino);
663 	dent = ubifs_tnc_next_ent(c, &key, &nm);
664 	if (IS_ERR(dent)) {
665 		err = PTR_ERR(dent);
666 		if (err == -ENOENT)
667 			err = 0;
668 	} else {
669 		kfree(dent);
670 		err = -ENOTEMPTY;
671 	}
672 	return err;
673 }
674 
675 static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
676 {
677 	struct ubifs_info *c = dir->i_sb->s_fs_info;
678 	struct inode *inode = dentry->d_inode;
679 	int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
680 	int err, budgeted = 1;
681 	struct ubifs_inode *dir_ui = ubifs_inode(dir);
682 	struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 };
683 
684 	/*
685 	 * Budget request settings: deletion direntry, deletion inode and
686 	 * changing the parent inode. If budgeting fails, go ahead anyway
687 	 * because we have extra space reserved for deletions.
688 	 */
689 
690 	dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len,
691 		dentry->d_name.name, inode->i_ino, dir->i_ino);
692 	ubifs_assert(mutex_is_locked(&dir->i_mutex));
693 	ubifs_assert(mutex_is_locked(&inode->i_mutex));
694 	err = check_dir_empty(c, dentry->d_inode);
695 	if (err)
696 		return err;
697 
698 	err = ubifs_budget_space(c, &req);
699 	if (err) {
700 		if (err != -ENOSPC)
701 			return err;
702 		budgeted = 0;
703 	}
704 
705 	lock_2_inodes(dir, inode);
706 	inode->i_ctime = ubifs_current_time(dir);
707 	clear_nlink(inode);
708 	drop_nlink(dir);
709 	dir->i_size -= sz_change;
710 	dir_ui->ui_size = dir->i_size;
711 	dir->i_mtime = dir->i_ctime = inode->i_ctime;
712 	err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0);
713 	if (err)
714 		goto out_cancel;
715 	unlock_2_inodes(dir, inode);
716 
717 	if (budgeted)
718 		ubifs_release_budget(c, &req);
719 	else {
720 		/* We've deleted something - clean the "no space" flags */
721 		c->nospace = c->nospace_rp = 0;
722 		smp_wmb();
723 	}
724 	return 0;
725 
726 out_cancel:
727 	dir->i_size += sz_change;
728 	dir_ui->ui_size = dir->i_size;
729 	inc_nlink(dir);
730 	inc_nlink(inode);
731 	inc_nlink(inode);
732 	unlock_2_inodes(dir, inode);
733 	if (budgeted)
734 		ubifs_release_budget(c, &req);
735 	return err;
736 }
737 
738 static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
739 {
740 	struct inode *inode;
741 	struct ubifs_inode *dir_ui = ubifs_inode(dir);
742 	struct ubifs_info *c = dir->i_sb->s_fs_info;
743 	int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
744 	struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 };
745 
746 	/*
747 	 * Budget request settings: new inode, new direntry and changing parent
748 	 * directory inode.
749 	 */
750 
751 	dbg_gen("dent '%.*s', mode %#x in dir ino %lu",
752 		dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
753 
754 	err = ubifs_budget_space(c, &req);
755 	if (err)
756 		return err;
757 
758 	inode = ubifs_new_inode(c, dir, S_IFDIR | mode);
759 	if (IS_ERR(inode)) {
760 		err = PTR_ERR(inode);
761 		goto out_budg;
762 	}
763 
764 	mutex_lock(&dir_ui->ui_mutex);
765 	insert_inode_hash(inode);
766 	inc_nlink(inode);
767 	inc_nlink(dir);
768 	dir->i_size += sz_change;
769 	dir_ui->ui_size = dir->i_size;
770 	dir->i_mtime = dir->i_ctime = inode->i_ctime;
771 	err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
772 	if (err) {
773 		ubifs_err("cannot create directory, error %d", err);
774 		goto out_cancel;
775 	}
776 	mutex_unlock(&dir_ui->ui_mutex);
777 
778 	ubifs_release_budget(c, &req);
779 	d_instantiate(dentry, inode);
780 	return 0;
781 
782 out_cancel:
783 	dir->i_size -= sz_change;
784 	dir_ui->ui_size = dir->i_size;
785 	drop_nlink(dir);
786 	mutex_unlock(&dir_ui->ui_mutex);
787 	make_bad_inode(inode);
788 	iput(inode);
789 out_budg:
790 	ubifs_release_budget(c, &req);
791 	return err;
792 }
793 
794 static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
795 		       int mode, dev_t rdev)
796 {
797 	struct inode *inode;
798 	struct ubifs_inode *ui;
799 	struct ubifs_inode *dir_ui = ubifs_inode(dir);
800 	struct ubifs_info *c = dir->i_sb->s_fs_info;
801 	union ubifs_dev_desc *dev = NULL;
802 	int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
803 	int err, devlen = 0;
804 	struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
805 					.new_ino_d = ALIGN(devlen, 8),
806 					.dirtied_ino = 1 };
807 
808 	/*
809 	 * Budget request settings: new inode, new direntry and changing parent
810 	 * directory inode.
811 	 */
812 
813 	dbg_gen("dent '%.*s' in dir ino %lu",
814 		dentry->d_name.len, dentry->d_name.name, dir->i_ino);
815 
816 	if (!new_valid_dev(rdev))
817 		return -EINVAL;
818 
819 	if (S_ISBLK(mode) || S_ISCHR(mode)) {
820 		dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS);
821 		if (!dev)
822 			return -ENOMEM;
823 		devlen = ubifs_encode_dev(dev, rdev);
824 	}
825 
826 	err = ubifs_budget_space(c, &req);
827 	if (err) {
828 		kfree(dev);
829 		return err;
830 	}
831 
832 	inode = ubifs_new_inode(c, dir, mode);
833 	if (IS_ERR(inode)) {
834 		kfree(dev);
835 		err = PTR_ERR(inode);
836 		goto out_budg;
837 	}
838 
839 	init_special_inode(inode, inode->i_mode, rdev);
840 	inode->i_size = ubifs_inode(inode)->ui_size = devlen;
841 	ui = ubifs_inode(inode);
842 	ui->data = dev;
843 	ui->data_len = devlen;
844 
845 	mutex_lock(&dir_ui->ui_mutex);
846 	dir->i_size += sz_change;
847 	dir_ui->ui_size = dir->i_size;
848 	dir->i_mtime = dir->i_ctime = inode->i_ctime;
849 	err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
850 	if (err)
851 		goto out_cancel;
852 	mutex_unlock(&dir_ui->ui_mutex);
853 
854 	ubifs_release_budget(c, &req);
855 	insert_inode_hash(inode);
856 	d_instantiate(dentry, inode);
857 	return 0;
858 
859 out_cancel:
860 	dir->i_size -= sz_change;
861 	dir_ui->ui_size = dir->i_size;
862 	mutex_unlock(&dir_ui->ui_mutex);
863 	make_bad_inode(inode);
864 	iput(inode);
865 out_budg:
866 	ubifs_release_budget(c, &req);
867 	return err;
868 }
869 
870 static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
871 			 const char *symname)
872 {
873 	struct inode *inode;
874 	struct ubifs_inode *ui;
875 	struct ubifs_inode *dir_ui = ubifs_inode(dir);
876 	struct ubifs_info *c = dir->i_sb->s_fs_info;
877 	int err, len = strlen(symname);
878 	int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
879 	struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
880 					.new_ino_d = ALIGN(len, 8),
881 					.dirtied_ino = 1 };
882 
883 	/*
884 	 * Budget request settings: new inode, new direntry and changing parent
885 	 * directory inode.
886 	 */
887 
888 	dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len,
889 		dentry->d_name.name, symname, dir->i_ino);
890 
891 	if (len > UBIFS_MAX_INO_DATA)
892 		return -ENAMETOOLONG;
893 
894 	err = ubifs_budget_space(c, &req);
895 	if (err)
896 		return err;
897 
898 	inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO);
899 	if (IS_ERR(inode)) {
900 		err = PTR_ERR(inode);
901 		goto out_budg;
902 	}
903 
904 	ui = ubifs_inode(inode);
905 	ui->data = kmalloc(len + 1, GFP_NOFS);
906 	if (!ui->data) {
907 		err = -ENOMEM;
908 		goto out_inode;
909 	}
910 
911 	memcpy(ui->data, symname, len);
912 	((char *)ui->data)[len] = '\0';
913 	/*
914 	 * The terminating zero byte is not written to the flash media and it
915 	 * is put just to make later in-memory string processing simpler. Thus,
916 	 * data length is @len, not @len + %1.
917 	 */
918 	ui->data_len = len;
919 	inode->i_size = ubifs_inode(inode)->ui_size = len;
920 
921 	mutex_lock(&dir_ui->ui_mutex);
922 	dir->i_size += sz_change;
923 	dir_ui->ui_size = dir->i_size;
924 	dir->i_mtime = dir->i_ctime = inode->i_ctime;
925 	err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
926 	if (err)
927 		goto out_cancel;
928 	mutex_unlock(&dir_ui->ui_mutex);
929 
930 	ubifs_release_budget(c, &req);
931 	insert_inode_hash(inode);
932 	d_instantiate(dentry, inode);
933 	return 0;
934 
935 out_cancel:
936 	dir->i_size -= sz_change;
937 	dir_ui->ui_size = dir->i_size;
938 	mutex_unlock(&dir_ui->ui_mutex);
939 out_inode:
940 	make_bad_inode(inode);
941 	iput(inode);
942 out_budg:
943 	ubifs_release_budget(c, &req);
944 	return err;
945 }
946 
947 /**
948  * lock_3_inodes - a wrapper for locking three UBIFS inodes.
949  * @inode1: first inode
950  * @inode2: second inode
951  * @inode3: third inode
952  *
953  * This function is used for 'ubifs_rename()' and @inode1 may be the same as
954  * @inode2 whereas @inode3 may be %NULL.
955  *
956  * We do not implement any tricks to guarantee strict lock ordering, because
957  * VFS has already done it for us on the @i_mutex. So this is just a simple
958  * wrapper function.
959  */
960 static void lock_3_inodes(struct inode *inode1, struct inode *inode2,
961 			  struct inode *inode3)
962 {
963 	mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
964 	if (inode2 != inode1)
965 		mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
966 	if (inode3)
967 		mutex_lock_nested(&ubifs_inode(inode3)->ui_mutex, WB_MUTEX_3);
968 }
969 
970 /**
971  * unlock_3_inodes - a wrapper for unlocking three UBIFS inodes for rename.
972  * @inode1: first inode
973  * @inode2: second inode
974  * @inode3: third inode
975  */
976 static void unlock_3_inodes(struct inode *inode1, struct inode *inode2,
977 			    struct inode *inode3)
978 {
979 	if (inode3)
980 		mutex_unlock(&ubifs_inode(inode3)->ui_mutex);
981 	if (inode1 != inode2)
982 		mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
983 	mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
984 }
985 
986 static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
987 			struct inode *new_dir, struct dentry *new_dentry)
988 {
989 	struct ubifs_info *c = old_dir->i_sb->s_fs_info;
990 	struct inode *old_inode = old_dentry->d_inode;
991 	struct inode *new_inode = new_dentry->d_inode;
992 	struct ubifs_inode *old_inode_ui = ubifs_inode(old_inode);
993 	int err, release, sync = 0, move = (new_dir != old_dir);
994 	int is_dir = S_ISDIR(old_inode->i_mode);
995 	int unlink = !!new_inode;
996 	int new_sz = CALC_DENT_SIZE(new_dentry->d_name.len);
997 	int old_sz = CALC_DENT_SIZE(old_dentry->d_name.len);
998 	struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1,
999 					.dirtied_ino = 3 };
1000 	struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
1001 			.dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
1002 	struct timespec time;
1003 
1004 	/*
1005 	 * Budget request settings: deletion direntry, new direntry, removing
1006 	 * the old inode, and changing old and new parent directory inodes.
1007 	 *
1008 	 * However, this operation also marks the target inode as dirty and
1009 	 * does not write it, so we allocate budget for the target inode
1010 	 * separately.
1011 	 */
1012 
1013 	dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in "
1014 		"dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name,
1015 		old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len,
1016 		new_dentry->d_name.name, new_dir->i_ino);
1017 	ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
1018 	ubifs_assert(mutex_is_locked(&new_dir->i_mutex));
1019 	if (unlink)
1020 		ubifs_assert(mutex_is_locked(&new_inode->i_mutex));
1021 
1022 
1023 	if (unlink && is_dir) {
1024 		err = check_dir_empty(c, new_inode);
1025 		if (err)
1026 			return err;
1027 	}
1028 
1029 	err = ubifs_budget_space(c, &req);
1030 	if (err)
1031 		return err;
1032 	err = ubifs_budget_space(c, &ino_req);
1033 	if (err) {
1034 		ubifs_release_budget(c, &req);
1035 		return err;
1036 	}
1037 
1038 	lock_3_inodes(old_dir, new_dir, new_inode);
1039 
1040 	/*
1041 	 * Like most other Unix systems, set the @i_ctime for inodes on a
1042 	 * rename.
1043 	 */
1044 	time = ubifs_current_time(old_dir);
1045 	old_inode->i_ctime = time;
1046 
1047 	/* We must adjust parent link count when renaming directories */
1048 	if (is_dir) {
1049 		if (move) {
1050 			/*
1051 			 * @old_dir loses a link because we are moving
1052 			 * @old_inode to a different directory.
1053 			 */
1054 			drop_nlink(old_dir);
1055 			/*
1056 			 * @new_dir only gains a link if we are not also
1057 			 * overwriting an existing directory.
1058 			 */
1059 			if (!unlink)
1060 				inc_nlink(new_dir);
1061 		} else {
1062 			/*
1063 			 * @old_inode is not moving to a different directory,
1064 			 * but @old_dir still loses a link if we are
1065 			 * overwriting an existing directory.
1066 			 */
1067 			if (unlink)
1068 				drop_nlink(old_dir);
1069 		}
1070 	}
1071 
1072 	old_dir->i_size -= old_sz;
1073 	ubifs_inode(old_dir)->ui_size = old_dir->i_size;
1074 	old_dir->i_mtime = old_dir->i_ctime = time;
1075 	new_dir->i_mtime = new_dir->i_ctime = time;
1076 
1077 	/*
1078 	 * And finally, if we unlinked a direntry which happened to have the
1079 	 * same name as the moved direntry, we have to decrement @i_nlink of
1080 	 * the unlinked inode and change its ctime.
1081 	 */
1082 	if (unlink) {
1083 		/*
1084 		 * Directories cannot have hard-links, so if this is a
1085 		 * directory, decrement its @i_nlink twice because an empty
1086 		 * directory has @i_nlink 2.
1087 		 */
1088 		if (is_dir)
1089 			drop_nlink(new_inode);
1090 		new_inode->i_ctime = time;
1091 		drop_nlink(new_inode);
1092 	} else {
1093 		new_dir->i_size += new_sz;
1094 		ubifs_inode(new_dir)->ui_size = new_dir->i_size;
1095 	}
1096 
1097 	/*
1098 	 * Do not ask 'ubifs_jnl_rename()' to flush write-buffer if @old_inode
1099 	 * is dirty, because this will be done later on at the end of
1100 	 * 'ubifs_rename()'.
1101 	 */
1102 	if (IS_SYNC(old_inode)) {
1103 		sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir);
1104 		if (unlink && IS_SYNC(new_inode))
1105 			sync = 1;
1106 	}
1107 	err = ubifs_jnl_rename(c, old_dir, old_dentry, new_dir, new_dentry,
1108 			       sync);
1109 	if (err)
1110 		goto out_cancel;
1111 
1112 	unlock_3_inodes(old_dir, new_dir, new_inode);
1113 	ubifs_release_budget(c, &req);
1114 
1115 	mutex_lock(&old_inode_ui->ui_mutex);
1116 	release = old_inode_ui->dirty;
1117 	mark_inode_dirty_sync(old_inode);
1118 	mutex_unlock(&old_inode_ui->ui_mutex);
1119 
1120 	if (release)
1121 		ubifs_release_budget(c, &ino_req);
1122 	if (IS_SYNC(old_inode))
1123 		err = old_inode->i_sb->s_op->write_inode(old_inode, 1);
1124 	return err;
1125 
1126 out_cancel:
1127 	if (unlink) {
1128 		if (is_dir)
1129 			inc_nlink(new_inode);
1130 		inc_nlink(new_inode);
1131 	} else {
1132 		new_dir->i_size -= new_sz;
1133 		ubifs_inode(new_dir)->ui_size = new_dir->i_size;
1134 	}
1135 	old_dir->i_size += old_sz;
1136 	ubifs_inode(old_dir)->ui_size = old_dir->i_size;
1137 	if (is_dir) {
1138 		if (move) {
1139 			inc_nlink(old_dir);
1140 			if (!unlink)
1141 				drop_nlink(new_dir);
1142 		} else {
1143 			if (unlink)
1144 				inc_nlink(old_dir);
1145 		}
1146 	}
1147 	unlock_3_inodes(old_dir, new_dir, new_inode);
1148 	ubifs_release_budget(c, &ino_req);
1149 	ubifs_release_budget(c, &req);
1150 	return err;
1151 }
1152 
1153 int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1154 		  struct kstat *stat)
1155 {
1156 	loff_t size;
1157 	struct inode *inode = dentry->d_inode;
1158 	struct ubifs_inode *ui = ubifs_inode(inode);
1159 
1160 	mutex_lock(&ui->ui_mutex);
1161 	stat->dev = inode->i_sb->s_dev;
1162 	stat->ino = inode->i_ino;
1163 	stat->mode = inode->i_mode;
1164 	stat->nlink = inode->i_nlink;
1165 	stat->uid = inode->i_uid;
1166 	stat->gid = inode->i_gid;
1167 	stat->rdev = inode->i_rdev;
1168 	stat->atime = inode->i_atime;
1169 	stat->mtime = inode->i_mtime;
1170 	stat->ctime = inode->i_ctime;
1171 	stat->blksize = UBIFS_BLOCK_SIZE;
1172 	stat->size = ui->ui_size;
1173 
1174 	/*
1175 	 * Unfortunately, the 'stat()' system call was designed for block
1176 	 * device based file systems, and it is not appropriate for UBIFS,
1177 	 * because UBIFS does not have notion of "block". For example, it is
1178 	 * difficult to tell how many block a directory takes - it actually
1179 	 * takes less than 300 bytes, but we have to round it to block size,
1180 	 * which introduces large mistake. This makes utilities like 'du' to
1181 	 * report completely senseless numbers. This is the reason why UBIFS
1182 	 * goes the same way as JFFS2 - it reports zero blocks for everything
1183 	 * but regular files, which makes more sense than reporting completely
1184 	 * wrong sizes.
1185 	 */
1186 	if (S_ISREG(inode->i_mode)) {
1187 		size = ui->xattr_size;
1188 		size += stat->size;
1189 		size = ALIGN(size, UBIFS_BLOCK_SIZE);
1190 		/*
1191 		 * Note, user-space expects 512-byte blocks count irrespectively
1192 		 * of what was reported in @stat->size.
1193 		 */
1194 		stat->blocks = size >> 9;
1195 	} else
1196 		stat->blocks = 0;
1197 	mutex_unlock(&ui->ui_mutex);
1198 	return 0;
1199 }
1200 
1201 const struct inode_operations ubifs_dir_inode_operations = {
1202 	.lookup      = ubifs_lookup,
1203 	.create      = ubifs_create,
1204 	.link        = ubifs_link,
1205 	.symlink     = ubifs_symlink,
1206 	.unlink      = ubifs_unlink,
1207 	.mkdir       = ubifs_mkdir,
1208 	.rmdir       = ubifs_rmdir,
1209 	.mknod       = ubifs_mknod,
1210 	.rename      = ubifs_rename,
1211 	.setattr     = ubifs_setattr,
1212 	.getattr     = ubifs_getattr,
1213 #ifdef CONFIG_UBIFS_FS_XATTR
1214 	.setxattr    = ubifs_setxattr,
1215 	.getxattr    = ubifs_getxattr,
1216 	.listxattr   = ubifs_listxattr,
1217 	.removexattr = ubifs_removexattr,
1218 #endif
1219 };
1220 
1221 const struct file_operations ubifs_dir_operations = {
1222 	.llseek         = ubifs_dir_llseek,
1223 	.release        = ubifs_dir_release,
1224 	.read           = generic_read_dir,
1225 	.readdir        = ubifs_readdir,
1226 	.fsync          = ubifs_fsync,
1227 	.unlocked_ioctl = ubifs_ioctl,
1228 #ifdef CONFIG_COMPAT
1229 	.compat_ioctl   = ubifs_compat_ioctl,
1230 #endif
1231 };
1232