1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* -*- mode: c; c-basic-offset: 8; -*- 3 * vim: noexpandtab sw=8 ts=8 sts=0: 4 * 5 * dcache.c 6 * 7 * dentry cache handling code 8 * 9 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 10 */ 11 12 #include <linux/fs.h> 13 #include <linux/types.h> 14 #include <linux/slab.h> 15 #include <linux/namei.h> 16 17 #include <cluster/masklog.h> 18 19 #include "ocfs2.h" 20 21 #include "alloc.h" 22 #include "dcache.h" 23 #include "dlmglue.h" 24 #include "file.h" 25 #include "inode.h" 26 #include "ocfs2_trace.h" 27 28 void ocfs2_dentry_attach_gen(struct dentry *dentry) 29 { 30 unsigned long gen = 31 OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen; 32 BUG_ON(d_inode(dentry)); 33 dentry->d_fsdata = (void *)gen; 34 } 35 36 37 static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) 38 { 39 struct inode *inode; 40 int ret = 0; /* if all else fails, just return false */ 41 struct ocfs2_super *osb; 42 43 if (flags & LOOKUP_RCU) 44 return -ECHILD; 45 46 inode = d_inode(dentry); 47 osb = OCFS2_SB(dentry->d_sb); 48 49 trace_ocfs2_dentry_revalidate(dentry, dentry->d_name.len, 50 dentry->d_name.name); 51 52 /* For a negative dentry - 53 * check the generation number of the parent and compare with the 54 * one stored in the inode. 55 */ 56 if (inode == NULL) { 57 unsigned long gen = (unsigned long) dentry->d_fsdata; 58 unsigned long pgen; 59 spin_lock(&dentry->d_lock); 60 pgen = OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen; 61 spin_unlock(&dentry->d_lock); 62 trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len, 63 dentry->d_name.name, 64 pgen, gen); 65 if (gen != pgen) 66 goto bail; 67 goto valid; 68 } 69 70 BUG_ON(!osb); 71 72 if (inode == osb->root_inode || is_bad_inode(inode)) 73 goto bail; 74 75 spin_lock(&OCFS2_I(inode)->ip_lock); 76 /* did we or someone else delete this inode? */ 77 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { 78 spin_unlock(&OCFS2_I(inode)->ip_lock); 79 trace_ocfs2_dentry_revalidate_delete( 80 (unsigned long long)OCFS2_I(inode)->ip_blkno); 81 goto bail; 82 } 83 spin_unlock(&OCFS2_I(inode)->ip_lock); 84 85 /* 86 * We don't need a cluster lock to test this because once an 87 * inode nlink hits zero, it never goes back. 88 */ 89 if (inode->i_nlink == 0) { 90 trace_ocfs2_dentry_revalidate_orphaned( 91 (unsigned long long)OCFS2_I(inode)->ip_blkno, 92 S_ISDIR(inode->i_mode)); 93 goto bail; 94 } 95 96 /* 97 * If the last lookup failed to create dentry lock, let us 98 * redo it. 99 */ 100 if (!dentry->d_fsdata) { 101 trace_ocfs2_dentry_revalidate_nofsdata( 102 (unsigned long long)OCFS2_I(inode)->ip_blkno); 103 goto bail; 104 } 105 106 valid: 107 ret = 1; 108 109 bail: 110 trace_ocfs2_dentry_revalidate_ret(ret); 111 return ret; 112 } 113 114 static int ocfs2_match_dentry(struct dentry *dentry, 115 u64 parent_blkno, 116 int skip_unhashed) 117 { 118 struct inode *parent; 119 120 /* 121 * ocfs2_lookup() does a d_splice_alias() _before_ attaching 122 * to the lock data, so we skip those here, otherwise 123 * ocfs2_dentry_attach_lock() will get its original dentry 124 * back. 125 */ 126 if (!dentry->d_fsdata) 127 return 0; 128 129 if (!dentry->d_parent) 130 return 0; 131 132 if (skip_unhashed && d_unhashed(dentry)) 133 return 0; 134 135 parent = d_inode(dentry->d_parent); 136 /* Negative parent dentry? */ 137 if (!parent) 138 return 0; 139 140 /* Name is in a different directory. */ 141 if (OCFS2_I(parent)->ip_blkno != parent_blkno) 142 return 0; 143 144 return 1; 145 } 146 147 /* 148 * Walk the inode alias list, and find a dentry which has a given 149 * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it 150 * is looking for a dentry_lock reference. The downconvert thread is 151 * looking to unhash aliases, so we allow it to skip any that already 152 * have that property. 153 */ 154 struct dentry *ocfs2_find_local_alias(struct inode *inode, 155 u64 parent_blkno, 156 int skip_unhashed) 157 { 158 struct dentry *dentry; 159 160 spin_lock(&inode->i_lock); 161 hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { 162 spin_lock(&dentry->d_lock); 163 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { 164 trace_ocfs2_find_local_alias(dentry->d_name.len, 165 dentry->d_name.name); 166 167 dget_dlock(dentry); 168 spin_unlock(&dentry->d_lock); 169 spin_unlock(&inode->i_lock); 170 return dentry; 171 } 172 spin_unlock(&dentry->d_lock); 173 } 174 spin_unlock(&inode->i_lock); 175 return NULL; 176 } 177 178 DEFINE_SPINLOCK(dentry_attach_lock); 179 180 /* 181 * Attach this dentry to a cluster lock. 182 * 183 * Dentry locks cover all links in a given directory to a particular 184 * inode. We do this so that ocfs2 can build a lock name which all 185 * nodes in the cluster can agree on at all times. Shoving full names 186 * in the cluster lock won't work due to size restrictions. Covering 187 * links inside of a directory is a good compromise because it still 188 * allows us to use the parent directory lock to synchronize 189 * operations. 190 * 191 * Call this function with the parent dir semaphore and the parent dir 192 * cluster lock held. 193 * 194 * The dir semaphore will protect us from having to worry about 195 * concurrent processes on our node trying to attach a lock at the 196 * same time. 197 * 198 * The dir cluster lock (held at either PR or EX mode) protects us 199 * from unlink and rename on other nodes. 200 * 201 * A dput() can happen asynchronously due to pruning, so we cover 202 * attaching and detaching the dentry lock with a 203 * dentry_attach_lock. 204 * 205 * A node which has done lookup on a name retains a protected read 206 * lock until final dput. If the user requests and unlink or rename, 207 * the protected read is upgraded to an exclusive lock. Other nodes 208 * who have seen the dentry will then be informed that they need to 209 * downgrade their lock, which will involve d_delete on the 210 * dentry. This happens in ocfs2_dentry_convert_worker(). 211 */ 212 int ocfs2_dentry_attach_lock(struct dentry *dentry, 213 struct inode *inode, 214 u64 parent_blkno) 215 { 216 int ret; 217 struct dentry *alias; 218 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 219 220 trace_ocfs2_dentry_attach_lock(dentry->d_name.len, dentry->d_name.name, 221 (unsigned long long)parent_blkno, dl); 222 223 /* 224 * Negative dentry. We ignore these for now. 225 * 226 * XXX: Could we can improve ocfs2_dentry_revalidate() by 227 * tracking these? 228 */ 229 if (!inode) 230 return 0; 231 232 if (d_really_is_negative(dentry) && dentry->d_fsdata) { 233 /* Converting a negative dentry to positive 234 Clear dentry->d_fsdata */ 235 dentry->d_fsdata = dl = NULL; 236 } 237 238 if (dl) { 239 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, 240 " \"%pd\": old parent: %llu, new: %llu\n", 241 dentry, 242 (unsigned long long)parent_blkno, 243 (unsigned long long)dl->dl_parent_blkno); 244 return 0; 245 } 246 247 alias = ocfs2_find_local_alias(inode, parent_blkno, 0); 248 if (alias) { 249 /* 250 * Great, an alias exists, which means we must have a 251 * dentry lock already. We can just grab the lock off 252 * the alias and add it to the list. 253 * 254 * We're depending here on the fact that this dentry 255 * was found and exists in the dcache and so must have 256 * a reference to the dentry_lock because we can't 257 * race creates. Final dput() cannot happen on it 258 * since we have it pinned, so our reference is safe. 259 */ 260 dl = alias->d_fsdata; 261 mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n", 262 (unsigned long long)parent_blkno, 263 (unsigned long long)OCFS2_I(inode)->ip_blkno); 264 265 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, 266 " \"%pd\": old parent: %llu, new: %llu\n", 267 dentry, 268 (unsigned long long)parent_blkno, 269 (unsigned long long)dl->dl_parent_blkno); 270 271 trace_ocfs2_dentry_attach_lock_found(dl->dl_lockres.l_name, 272 (unsigned long long)parent_blkno, 273 (unsigned long long)OCFS2_I(inode)->ip_blkno); 274 275 goto out_attach; 276 } 277 278 /* 279 * There are no other aliases 280 */ 281 dl = kmalloc(sizeof(*dl), GFP_NOFS); 282 if (!dl) { 283 ret = -ENOMEM; 284 mlog_errno(ret); 285 return ret; 286 } 287 288 dl->dl_count = 0; 289 /* 290 * Does this have to happen below, for all attaches, in case 291 * the struct inode gets blown away by the downconvert thread? 292 */ 293 dl->dl_inode = igrab(inode); 294 dl->dl_parent_blkno = parent_blkno; 295 ocfs2_dentry_lock_res_init(dl, parent_blkno, inode); 296 297 out_attach: 298 spin_lock(&dentry_attach_lock); 299 if (unlikely(dentry->d_fsdata && !alias)) { 300 /* d_fsdata is set by a racing thread which is doing 301 * the same thing as this thread is doing. Leave the racing 302 * thread going ahead and we return here. 303 */ 304 spin_unlock(&dentry_attach_lock); 305 iput(dl->dl_inode); 306 ocfs2_lock_res_free(&dl->dl_lockres); 307 kfree(dl); 308 return 0; 309 } 310 311 dentry->d_fsdata = dl; 312 dl->dl_count++; 313 spin_unlock(&dentry_attach_lock); 314 315 /* 316 * This actually gets us our PRMODE level lock. From now on, 317 * we'll have a notification if one of these names is 318 * destroyed on another node. 319 */ 320 ret = ocfs2_dentry_lock(dentry, 0); 321 if (!ret) 322 ocfs2_dentry_unlock(dentry, 0); 323 else 324 mlog_errno(ret); 325 326 /* 327 * In case of error, manually free the allocation and do the iput(). 328 * We need to do this because error here means no d_instantiate(), 329 * which means iput() will not be called during dput(dentry). 330 */ 331 if (ret < 0 && !alias) { 332 ocfs2_lock_res_free(&dl->dl_lockres); 333 BUG_ON(dl->dl_count != 1); 334 spin_lock(&dentry_attach_lock); 335 dentry->d_fsdata = NULL; 336 spin_unlock(&dentry_attach_lock); 337 kfree(dl); 338 iput(inode); 339 } 340 341 dput(alias); 342 343 return ret; 344 } 345 346 /* 347 * ocfs2_dentry_iput() and friends. 348 * 349 * At this point, our particular dentry is detached from the inodes 350 * alias list, so there's no way that the locking code can find it. 351 * 352 * The interesting stuff happens when we determine that our lock needs 353 * to go away because this is the last subdir alias in the 354 * system. This function needs to handle a couple things: 355 * 356 * 1) Synchronizing lock shutdown with the downconvert threads. This 357 * is already handled for us via the lockres release drop function 358 * called in ocfs2_release_dentry_lock() 359 * 360 * 2) A race may occur when we're doing our lock shutdown and 361 * another process wants to create a new dentry lock. Right now we 362 * let them race, which means that for a very short while, this 363 * node might have two locks on a lock resource. This should be a 364 * problem though because one of them is in the process of being 365 * thrown out. 366 */ 367 static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, 368 struct ocfs2_dentry_lock *dl) 369 { 370 iput(dl->dl_inode); 371 ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); 372 ocfs2_lock_res_free(&dl->dl_lockres); 373 kfree(dl); 374 } 375 376 void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 377 struct ocfs2_dentry_lock *dl) 378 { 379 int unlock = 0; 380 381 BUG_ON(dl->dl_count == 0); 382 383 spin_lock(&dentry_attach_lock); 384 dl->dl_count--; 385 unlock = !dl->dl_count; 386 spin_unlock(&dentry_attach_lock); 387 388 if (unlock) 389 ocfs2_drop_dentry_lock(osb, dl); 390 } 391 392 static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode) 393 { 394 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 395 396 if (!dl) { 397 /* 398 * No dentry lock is ok if we're disconnected or 399 * unhashed. 400 */ 401 if (!(dentry->d_flags & DCACHE_DISCONNECTED) && 402 !d_unhashed(dentry)) { 403 unsigned long long ino = 0ULL; 404 if (inode) 405 ino = (unsigned long long)OCFS2_I(inode)->ip_blkno; 406 mlog(ML_ERROR, "Dentry is missing cluster lock. " 407 "inode: %llu, d_flags: 0x%x, d_name: %pd\n", 408 ino, dentry->d_flags, dentry); 409 } 410 411 goto out; 412 } 413 414 mlog_bug_on_msg(dl->dl_count == 0, "dentry: %pd, count: %u\n", 415 dentry, dl->dl_count); 416 417 ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl); 418 419 out: 420 iput(inode); 421 } 422 423 /* 424 * d_move(), but keep the locks in sync. 425 * 426 * When we are done, "dentry" will have the parent dir and name of 427 * "target", which will be thrown away. 428 * 429 * We manually update the lock of "dentry" if need be. 430 * 431 * "target" doesn't have it's dentry lock touched - we allow the later 432 * dput() to handle this for us. 433 * 434 * This is called during ocfs2_rename(), while holding parent 435 * directory locks. The dentries have already been deleted on other 436 * nodes via ocfs2_remote_dentry_delete(). 437 * 438 * Normally, the VFS handles the d_move() for the file system, after 439 * the ->rename() callback. OCFS2 wants to handle this internally, so 440 * the new lock can be created atomically with respect to the cluster. 441 */ 442 void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target, 443 struct inode *old_dir, struct inode *new_dir) 444 { 445 int ret; 446 struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb); 447 struct inode *inode = d_inode(dentry); 448 449 /* 450 * Move within the same directory, so the actual lock info won't 451 * change. 452 * 453 * XXX: Is there any advantage to dropping the lock here? 454 */ 455 if (old_dir == new_dir) 456 goto out_move; 457 458 ocfs2_dentry_lock_put(osb, dentry->d_fsdata); 459 460 dentry->d_fsdata = NULL; 461 ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno); 462 if (ret) 463 mlog_errno(ret); 464 465 out_move: 466 d_move(dentry, target); 467 } 468 469 const struct dentry_operations ocfs2_dentry_ops = { 470 .d_revalidate = ocfs2_dentry_revalidate, 471 .d_iput = ocfs2_dentry_iput, 472 }; 473