1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* -*- mode: c; c-basic-offset: 8; -*- 3 * vim: noexpandtab sw=8 ts=8 sts=0: 4 * 5 * dcache.c 6 * 7 * dentry cache handling code 8 * 9 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 10 */ 11 12 #include <linux/fs.h> 13 #include <linux/types.h> 14 #include <linux/slab.h> 15 #include <linux/namei.h> 16 17 #include <cluster/masklog.h> 18 19 #include "ocfs2.h" 20 21 #include "alloc.h" 22 #include "dcache.h" 23 #include "dlmglue.h" 24 #include "file.h" 25 #include "inode.h" 26 #include "ocfs2_trace.h" 27 28 void ocfs2_dentry_attach_gen(struct dentry *dentry) 29 { 30 unsigned long gen = 31 OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen; 32 BUG_ON(d_inode(dentry)); 33 dentry->d_fsdata = (void *)gen; 34 } 35 36 37 static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) 38 { 39 struct inode *inode; 40 int ret = 0; /* if all else fails, just return false */ 41 struct ocfs2_super *osb; 42 43 if (flags & LOOKUP_RCU) 44 return -ECHILD; 45 46 inode = d_inode(dentry); 47 osb = OCFS2_SB(dentry->d_sb); 48 49 trace_ocfs2_dentry_revalidate(dentry, dentry->d_name.len, 50 dentry->d_name.name); 51 52 /* For a negative dentry - 53 * check the generation number of the parent and compare with the 54 * one stored in the inode. 55 */ 56 if (inode == NULL) { 57 unsigned long gen = (unsigned long) dentry->d_fsdata; 58 unsigned long pgen; 59 spin_lock(&dentry->d_lock); 60 pgen = OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen; 61 spin_unlock(&dentry->d_lock); 62 trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len, 63 dentry->d_name.name, 64 pgen, gen); 65 if (gen != pgen) 66 goto bail; 67 goto valid; 68 } 69 70 BUG_ON(!osb); 71 72 if (inode == osb->root_inode || is_bad_inode(inode)) 73 goto bail; 74 75 spin_lock(&OCFS2_I(inode)->ip_lock); 76 /* did we or someone else delete this inode? */ 77 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { 78 spin_unlock(&OCFS2_I(inode)->ip_lock); 79 trace_ocfs2_dentry_revalidate_delete( 80 (unsigned long long)OCFS2_I(inode)->ip_blkno); 81 goto bail; 82 } 83 spin_unlock(&OCFS2_I(inode)->ip_lock); 84 85 /* 86 * We don't need a cluster lock to test this because once an 87 * inode nlink hits zero, it never goes back. 88 */ 89 if (inode->i_nlink == 0) { 90 trace_ocfs2_dentry_revalidate_orphaned( 91 (unsigned long long)OCFS2_I(inode)->ip_blkno, 92 S_ISDIR(inode->i_mode)); 93 goto bail; 94 } 95 96 /* 97 * If the last lookup failed to create dentry lock, let us 98 * redo it. 99 */ 100 if (!dentry->d_fsdata) { 101 trace_ocfs2_dentry_revalidate_nofsdata( 102 (unsigned long long)OCFS2_I(inode)->ip_blkno); 103 goto bail; 104 } 105 106 valid: 107 ret = 1; 108 109 bail: 110 trace_ocfs2_dentry_revalidate_ret(ret); 111 return ret; 112 } 113 114 static int ocfs2_match_dentry(struct dentry *dentry, 115 u64 parent_blkno, 116 int skip_unhashed) 117 { 118 struct inode *parent; 119 120 /* 121 * ocfs2_lookup() does a d_splice_alias() _before_ attaching 122 * to the lock data, so we skip those here, otherwise 123 * ocfs2_dentry_attach_lock() will get its original dentry 124 * back. 125 */ 126 if (!dentry->d_fsdata) 127 return 0; 128 129 if (!dentry->d_parent) 130 return 0; 131 132 if (skip_unhashed && d_unhashed(dentry)) 133 return 0; 134 135 parent = d_inode(dentry->d_parent); 136 /* Negative parent dentry? */ 137 if (!parent) 138 return 0; 139 140 /* Name is in a different directory. */ 141 if (OCFS2_I(parent)->ip_blkno != parent_blkno) 142 return 0; 143 144 return 1; 145 } 146 147 /* 148 * Walk the inode alias list, and find a dentry which has a given 149 * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it 150 * is looking for a dentry_lock reference. The downconvert thread is 151 * looking to unhash aliases, so we allow it to skip any that already 152 * have that property. 153 */ 154 struct dentry *ocfs2_find_local_alias(struct inode *inode, 155 u64 parent_blkno, 156 int skip_unhashed) 157 { 158 struct dentry *dentry; 159 160 spin_lock(&inode->i_lock); 161 hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { 162 spin_lock(&dentry->d_lock); 163 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { 164 trace_ocfs2_find_local_alias(dentry->d_name.len, 165 dentry->d_name.name); 166 167 dget_dlock(dentry); 168 spin_unlock(&dentry->d_lock); 169 spin_unlock(&inode->i_lock); 170 return dentry; 171 } 172 spin_unlock(&dentry->d_lock); 173 } 174 spin_unlock(&inode->i_lock); 175 return NULL; 176 } 177 178 DEFINE_SPINLOCK(dentry_attach_lock); 179 180 /* 181 * Attach this dentry to a cluster lock. 182 * 183 * Dentry locks cover all links in a given directory to a particular 184 * inode. We do this so that ocfs2 can build a lock name which all 185 * nodes in the cluster can agree on at all times. Shoving full names 186 * in the cluster lock won't work due to size restrictions. Covering 187 * links inside of a directory is a good compromise because it still 188 * allows us to use the parent directory lock to synchronize 189 * operations. 190 * 191 * Call this function with the parent dir semaphore and the parent dir 192 * cluster lock held. 193 * 194 * The dir semaphore will protect us from having to worry about 195 * concurrent processes on our node trying to attach a lock at the 196 * same time. 197 * 198 * The dir cluster lock (held at either PR or EX mode) protects us 199 * from unlink and rename on other nodes. 200 * 201 * A dput() can happen asynchronously due to pruning, so we cover 202 * attaching and detaching the dentry lock with a 203 * dentry_attach_lock. 204 * 205 * A node which has done lookup on a name retains a protected read 206 * lock until final dput. If the user requests and unlink or rename, 207 * the protected read is upgraded to an exclusive lock. Other nodes 208 * who have seen the dentry will then be informed that they need to 209 * downgrade their lock, which will involve d_delete on the 210 * dentry. This happens in ocfs2_dentry_convert_worker(). 211 */ 212 int ocfs2_dentry_attach_lock(struct dentry *dentry, 213 struct inode *inode, 214 u64 parent_blkno) 215 { 216 int ret; 217 struct dentry *alias; 218 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 219 220 trace_ocfs2_dentry_attach_lock(dentry->d_name.len, dentry->d_name.name, 221 (unsigned long long)parent_blkno, dl); 222 223 /* 224 * Negative dentry. We ignore these for now. 225 * 226 * XXX: Could we can improve ocfs2_dentry_revalidate() by 227 * tracking these? 228 */ 229 if (!inode) 230 return 0; 231 232 if (d_really_is_negative(dentry) && dentry->d_fsdata) { 233 /* Converting a negative dentry to positive 234 Clear dentry->d_fsdata */ 235 dentry->d_fsdata = dl = NULL; 236 } 237 238 if (dl) { 239 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, 240 " \"%pd\": old parent: %llu, new: %llu\n", 241 dentry, 242 (unsigned long long)parent_blkno, 243 (unsigned long long)dl->dl_parent_blkno); 244 return 0; 245 } 246 247 alias = ocfs2_find_local_alias(inode, parent_blkno, 0); 248 if (alias) { 249 /* 250 * Great, an alias exists, which means we must have a 251 * dentry lock already. We can just grab the lock off 252 * the alias and add it to the list. 253 * 254 * We're depending here on the fact that this dentry 255 * was found and exists in the dcache and so must have 256 * a reference to the dentry_lock because we can't 257 * race creates. Final dput() cannot happen on it 258 * since we have it pinned, so our reference is safe. 259 */ 260 dl = alias->d_fsdata; 261 mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n", 262 (unsigned long long)parent_blkno, 263 (unsigned long long)OCFS2_I(inode)->ip_blkno); 264 265 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, 266 " \"%pd\": old parent: %llu, new: %llu\n", 267 dentry, 268 (unsigned long long)parent_blkno, 269 (unsigned long long)dl->dl_parent_blkno); 270 271 trace_ocfs2_dentry_attach_lock_found(dl->dl_lockres.l_name, 272 (unsigned long long)parent_blkno, 273 (unsigned long long)OCFS2_I(inode)->ip_blkno); 274 275 goto out_attach; 276 } 277 278 /* 279 * There are no other aliases 280 */ 281 dl = kmalloc(sizeof(*dl), GFP_NOFS); 282 if (!dl) { 283 ret = -ENOMEM; 284 mlog_errno(ret); 285 return ret; 286 } 287 288 dl->dl_count = 0; 289 /* 290 * Does this have to happen below, for all attaches, in case 291 * the struct inode gets blown away by the downconvert thread? 292 */ 293 dl->dl_inode = igrab(inode); 294 dl->dl_parent_blkno = parent_blkno; 295 ocfs2_dentry_lock_res_init(dl, parent_blkno, inode); 296 297 out_attach: 298 spin_lock(&dentry_attach_lock); 299 dentry->d_fsdata = dl; 300 dl->dl_count++; 301 spin_unlock(&dentry_attach_lock); 302 303 /* 304 * This actually gets us our PRMODE level lock. From now on, 305 * we'll have a notification if one of these names is 306 * destroyed on another node. 307 */ 308 ret = ocfs2_dentry_lock(dentry, 0); 309 if (!ret) 310 ocfs2_dentry_unlock(dentry, 0); 311 else 312 mlog_errno(ret); 313 314 /* 315 * In case of error, manually free the allocation and do the iput(). 316 * We need to do this because error here means no d_instantiate(), 317 * which means iput() will not be called during dput(dentry). 318 */ 319 if (ret < 0 && !alias) { 320 ocfs2_lock_res_free(&dl->dl_lockres); 321 BUG_ON(dl->dl_count != 1); 322 spin_lock(&dentry_attach_lock); 323 dentry->d_fsdata = NULL; 324 spin_unlock(&dentry_attach_lock); 325 kfree(dl); 326 iput(inode); 327 } 328 329 dput(alias); 330 331 return ret; 332 } 333 334 /* 335 * ocfs2_dentry_iput() and friends. 336 * 337 * At this point, our particular dentry is detached from the inodes 338 * alias list, so there's no way that the locking code can find it. 339 * 340 * The interesting stuff happens when we determine that our lock needs 341 * to go away because this is the last subdir alias in the 342 * system. This function needs to handle a couple things: 343 * 344 * 1) Synchronizing lock shutdown with the downconvert threads. This 345 * is already handled for us via the lockres release drop function 346 * called in ocfs2_release_dentry_lock() 347 * 348 * 2) A race may occur when we're doing our lock shutdown and 349 * another process wants to create a new dentry lock. Right now we 350 * let them race, which means that for a very short while, this 351 * node might have two locks on a lock resource. This should be a 352 * problem though because one of them is in the process of being 353 * thrown out. 354 */ 355 static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, 356 struct ocfs2_dentry_lock *dl) 357 { 358 iput(dl->dl_inode); 359 ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); 360 ocfs2_lock_res_free(&dl->dl_lockres); 361 kfree(dl); 362 } 363 364 void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 365 struct ocfs2_dentry_lock *dl) 366 { 367 int unlock = 0; 368 369 BUG_ON(dl->dl_count == 0); 370 371 spin_lock(&dentry_attach_lock); 372 dl->dl_count--; 373 unlock = !dl->dl_count; 374 spin_unlock(&dentry_attach_lock); 375 376 if (unlock) 377 ocfs2_drop_dentry_lock(osb, dl); 378 } 379 380 static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode) 381 { 382 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 383 384 if (!dl) { 385 /* 386 * No dentry lock is ok if we're disconnected or 387 * unhashed. 388 */ 389 if (!(dentry->d_flags & DCACHE_DISCONNECTED) && 390 !d_unhashed(dentry)) { 391 unsigned long long ino = 0ULL; 392 if (inode) 393 ino = (unsigned long long)OCFS2_I(inode)->ip_blkno; 394 mlog(ML_ERROR, "Dentry is missing cluster lock. " 395 "inode: %llu, d_flags: 0x%x, d_name: %pd\n", 396 ino, dentry->d_flags, dentry); 397 } 398 399 goto out; 400 } 401 402 mlog_bug_on_msg(dl->dl_count == 0, "dentry: %pd, count: %u\n", 403 dentry, dl->dl_count); 404 405 ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl); 406 407 out: 408 iput(inode); 409 } 410 411 /* 412 * d_move(), but keep the locks in sync. 413 * 414 * When we are done, "dentry" will have the parent dir and name of 415 * "target", which will be thrown away. 416 * 417 * We manually update the lock of "dentry" if need be. 418 * 419 * "target" doesn't have it's dentry lock touched - we allow the later 420 * dput() to handle this for us. 421 * 422 * This is called during ocfs2_rename(), while holding parent 423 * directory locks. The dentries have already been deleted on other 424 * nodes via ocfs2_remote_dentry_delete(). 425 * 426 * Normally, the VFS handles the d_move() for the file system, after 427 * the ->rename() callback. OCFS2 wants to handle this internally, so 428 * the new lock can be created atomically with respect to the cluster. 429 */ 430 void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target, 431 struct inode *old_dir, struct inode *new_dir) 432 { 433 int ret; 434 struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb); 435 struct inode *inode = d_inode(dentry); 436 437 /* 438 * Move within the same directory, so the actual lock info won't 439 * change. 440 * 441 * XXX: Is there any advantage to dropping the lock here? 442 */ 443 if (old_dir == new_dir) 444 goto out_move; 445 446 ocfs2_dentry_lock_put(osb, dentry->d_fsdata); 447 448 dentry->d_fsdata = NULL; 449 ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno); 450 if (ret) 451 mlog_errno(ret); 452 453 out_move: 454 d_move(dentry, target); 455 } 456 457 const struct dentry_operations ocfs2_dentry_ops = { 458 .d_revalidate = ocfs2_dentry_revalidate, 459 .d_iput = ocfs2_dentry_iput, 460 }; 461