1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/slab.h> 12 #include <linux/namei.h> 13 #include <linux/file.h> 14 #include <linux/xattr.h> 15 #include <linux/rbtree.h> 16 #include <linux/security.h> 17 #include <linux/cred.h> 18 #include "overlayfs.h" 19 20 struct ovl_cache_entry { 21 unsigned int len; 22 unsigned int type; 23 u64 ino; 24 struct list_head l_node; 25 struct rb_node node; 26 struct ovl_cache_entry *next_maybe_whiteout; 27 bool is_whiteout; 28 char name[]; 29 }; 30 31 struct ovl_dir_cache { 32 long refcount; 33 u64 version; 34 struct list_head entries; 35 }; 36 37 struct ovl_readdir_data { 38 struct dir_context ctx; 39 struct dentry *dentry; 40 bool is_lowest; 41 struct rb_root root; 42 struct list_head *list; 43 struct list_head middle; 44 struct ovl_cache_entry *first_maybe_whiteout; 45 int count; 46 int err; 47 bool d_type_supported; 48 }; 49 50 struct ovl_dir_file { 51 bool is_real; 52 bool is_upper; 53 struct ovl_dir_cache *cache; 54 struct list_head *cursor; 55 struct file *realfile; 56 struct file *upperfile; 57 }; 58 59 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) 60 { 61 return container_of(n, struct ovl_cache_entry, node); 62 } 63 64 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, 65 const char *name, int len) 66 { 67 struct rb_node *node = root->rb_node; 68 int cmp; 69 70 while (node) { 71 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); 72 73 cmp = strncmp(name, p->name, len); 74 if (cmp > 0) 75 node = p->node.rb_right; 76 else if (cmp < 0 || len < p->len) 77 node = p->node.rb_left; 78 else 79 return p; 80 } 81 82 return NULL; 83 } 84 85 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, 86 const char *name, int len, 87 u64 ino, unsigned int d_type) 88 { 89 struct ovl_cache_entry *p; 90 size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); 91 92 p = kmalloc(size, GFP_KERNEL); 93 if (!p) 94 return NULL; 95 96 memcpy(p->name, name, len); 97 p->name[len] = '\0'; 98 p->len = len; 99 p->type = d_type; 100 p->ino = ino; 101 p->is_whiteout = false; 102 103 if (d_type == DT_CHR) { 104 p->next_maybe_whiteout = rdd->first_maybe_whiteout; 105 rdd->first_maybe_whiteout = p; 106 } 107 return p; 108 } 109 110 static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, 111 const char *name, int len, u64 ino, 112 unsigned int d_type) 113 { 114 struct rb_node **newp = &rdd->root.rb_node; 115 struct rb_node *parent = NULL; 116 struct ovl_cache_entry *p; 117 118 while (*newp) { 119 int cmp; 120 struct ovl_cache_entry *tmp; 121 122 parent = *newp; 123 tmp = ovl_cache_entry_from_node(*newp); 124 cmp = strncmp(name, tmp->name, len); 125 if (cmp > 0) 126 newp = &tmp->node.rb_right; 127 else if (cmp < 0 || len < tmp->len) 128 newp = &tmp->node.rb_left; 129 else 130 return 0; 131 } 132 133 p = ovl_cache_entry_new(rdd, name, len, ino, d_type); 134 if (p == NULL) 135 return -ENOMEM; 136 137 list_add_tail(&p->l_node, rdd->list); 138 rb_link_node(&p->node, parent, newp); 139 rb_insert_color(&p->node, &rdd->root); 140 141 return 0; 142 } 143 144 static int ovl_fill_lowest(struct ovl_readdir_data *rdd, 145 const char *name, int namelen, 146 loff_t offset, u64 ino, unsigned int d_type) 147 { 148 struct ovl_cache_entry *p; 149 150 p = ovl_cache_entry_find(&rdd->root, name, namelen); 151 if (p) { 152 list_move_tail(&p->l_node, &rdd->middle); 153 } else { 154 p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); 155 if (p == NULL) 156 rdd->err = -ENOMEM; 157 else 158 list_add_tail(&p->l_node, &rdd->middle); 159 } 160 161 return rdd->err; 162 } 163 164 void ovl_cache_free(struct list_head *list) 165 { 166 struct ovl_cache_entry *p; 167 struct ovl_cache_entry *n; 168 169 list_for_each_entry_safe(p, n, list, l_node) 170 kfree(p); 171 172 INIT_LIST_HEAD(list); 173 } 174 175 static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) 176 { 177 struct ovl_dir_cache *cache = od->cache; 178 179 WARN_ON(cache->refcount <= 0); 180 cache->refcount--; 181 if (!cache->refcount) { 182 if (ovl_dir_cache(dentry) == cache) 183 ovl_set_dir_cache(dentry, NULL); 184 185 ovl_cache_free(&cache->entries); 186 kfree(cache); 187 } 188 } 189 190 static int ovl_fill_merge(struct dir_context *ctx, const char *name, 191 int namelen, loff_t offset, u64 ino, 192 unsigned int d_type) 193 { 194 struct ovl_readdir_data *rdd = 195 container_of(ctx, struct ovl_readdir_data, ctx); 196 197 rdd->count++; 198 if (!rdd->is_lowest) 199 return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); 200 else 201 return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type); 202 } 203 204 static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) 205 { 206 int err; 207 struct ovl_cache_entry *p; 208 struct dentry *dentry; 209 const struct cred *old_cred; 210 211 old_cred = ovl_override_creds(rdd->dentry->d_sb); 212 213 err = down_write_killable(&dir->d_inode->i_rwsem); 214 if (!err) { 215 while (rdd->first_maybe_whiteout) { 216 p = rdd->first_maybe_whiteout; 217 rdd->first_maybe_whiteout = p->next_maybe_whiteout; 218 dentry = lookup_one_len(p->name, dir, p->len); 219 if (!IS_ERR(dentry)) { 220 p->is_whiteout = ovl_is_whiteout(dentry); 221 dput(dentry); 222 } 223 } 224 inode_unlock(dir->d_inode); 225 } 226 revert_creds(old_cred); 227 228 return err; 229 } 230 231 static inline int ovl_dir_read(struct path *realpath, 232 struct ovl_readdir_data *rdd) 233 { 234 struct file *realfile; 235 int err; 236 237 realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); 238 if (IS_ERR(realfile)) 239 return PTR_ERR(realfile); 240 241 rdd->first_maybe_whiteout = NULL; 242 rdd->ctx.pos = 0; 243 do { 244 rdd->count = 0; 245 rdd->err = 0; 246 err = iterate_dir(realfile, &rdd->ctx); 247 if (err >= 0) 248 err = rdd->err; 249 } while (!err && rdd->count); 250 251 if (!err && rdd->first_maybe_whiteout && rdd->dentry) 252 err = ovl_check_whiteouts(realpath->dentry, rdd); 253 254 fput(realfile); 255 256 return err; 257 } 258 259 static void ovl_dir_reset(struct file *file) 260 { 261 struct ovl_dir_file *od = file->private_data; 262 struct ovl_dir_cache *cache = od->cache; 263 struct dentry *dentry = file->f_path.dentry; 264 enum ovl_path_type type = ovl_path_type(dentry); 265 266 if (cache && ovl_dentry_version_get(dentry) != cache->version) { 267 ovl_cache_put(od, dentry); 268 od->cache = NULL; 269 od->cursor = NULL; 270 } 271 WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type)); 272 if (od->is_real && OVL_TYPE_MERGE(type)) 273 od->is_real = false; 274 } 275 276 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) 277 { 278 int err; 279 struct path realpath; 280 struct ovl_readdir_data rdd = { 281 .ctx.actor = ovl_fill_merge, 282 .dentry = dentry, 283 .list = list, 284 .root = RB_ROOT, 285 .is_lowest = false, 286 }; 287 int idx, next; 288 289 for (idx = 0; idx != -1; idx = next) { 290 next = ovl_path_next(idx, dentry, &realpath); 291 292 if (next != -1) { 293 err = ovl_dir_read(&realpath, &rdd); 294 if (err) 295 break; 296 } else { 297 /* 298 * Insert lowest layer entries before upper ones, this 299 * allows offsets to be reasonably constant 300 */ 301 list_add(&rdd.middle, rdd.list); 302 rdd.is_lowest = true; 303 err = ovl_dir_read(&realpath, &rdd); 304 list_del(&rdd.middle); 305 } 306 } 307 return err; 308 } 309 310 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) 311 { 312 struct list_head *p; 313 loff_t off = 0; 314 315 list_for_each(p, &od->cache->entries) { 316 if (off >= pos) 317 break; 318 off++; 319 } 320 /* Cursor is safe since the cache is stable */ 321 od->cursor = p; 322 } 323 324 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) 325 { 326 int res; 327 struct ovl_dir_cache *cache; 328 329 cache = ovl_dir_cache(dentry); 330 if (cache && ovl_dentry_version_get(dentry) == cache->version) { 331 cache->refcount++; 332 return cache; 333 } 334 ovl_set_dir_cache(dentry, NULL); 335 336 cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); 337 if (!cache) 338 return ERR_PTR(-ENOMEM); 339 340 cache->refcount = 1; 341 INIT_LIST_HEAD(&cache->entries); 342 343 res = ovl_dir_read_merged(dentry, &cache->entries); 344 if (res) { 345 ovl_cache_free(&cache->entries); 346 kfree(cache); 347 return ERR_PTR(res); 348 } 349 350 cache->version = ovl_dentry_version_get(dentry); 351 ovl_set_dir_cache(dentry, cache); 352 353 return cache; 354 } 355 356 static int ovl_iterate(struct file *file, struct dir_context *ctx) 357 { 358 struct ovl_dir_file *od = file->private_data; 359 struct dentry *dentry = file->f_path.dentry; 360 struct ovl_cache_entry *p; 361 362 if (!ctx->pos) 363 ovl_dir_reset(file); 364 365 if (od->is_real) 366 return iterate_dir(od->realfile, ctx); 367 368 if (!od->cache) { 369 struct ovl_dir_cache *cache; 370 371 cache = ovl_cache_get(dentry); 372 if (IS_ERR(cache)) 373 return PTR_ERR(cache); 374 375 od->cache = cache; 376 ovl_seek_cursor(od, ctx->pos); 377 } 378 379 while (od->cursor != &od->cache->entries) { 380 p = list_entry(od->cursor, struct ovl_cache_entry, l_node); 381 if (!p->is_whiteout) 382 if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) 383 break; 384 od->cursor = p->l_node.next; 385 ctx->pos++; 386 } 387 return 0; 388 } 389 390 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) 391 { 392 loff_t res; 393 struct ovl_dir_file *od = file->private_data; 394 395 inode_lock(file_inode(file)); 396 if (!file->f_pos) 397 ovl_dir_reset(file); 398 399 if (od->is_real) { 400 res = vfs_llseek(od->realfile, offset, origin); 401 file->f_pos = od->realfile->f_pos; 402 } else { 403 res = -EINVAL; 404 405 switch (origin) { 406 case SEEK_CUR: 407 offset += file->f_pos; 408 break; 409 case SEEK_SET: 410 break; 411 default: 412 goto out_unlock; 413 } 414 if (offset < 0) 415 goto out_unlock; 416 417 if (offset != file->f_pos) { 418 file->f_pos = offset; 419 if (od->cache) 420 ovl_seek_cursor(od, offset); 421 } 422 res = offset; 423 } 424 out_unlock: 425 inode_unlock(file_inode(file)); 426 427 return res; 428 } 429 430 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, 431 int datasync) 432 { 433 struct ovl_dir_file *od = file->private_data; 434 struct dentry *dentry = file->f_path.dentry; 435 struct file *realfile = od->realfile; 436 437 /* 438 * Need to check if we started out being a lower dir, but got copied up 439 */ 440 if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { 441 struct inode *inode = file_inode(file); 442 443 realfile = lockless_dereference(od->upperfile); 444 if (!realfile) { 445 struct path upperpath; 446 447 ovl_path_upper(dentry, &upperpath); 448 realfile = ovl_path_open(&upperpath, O_RDONLY); 449 smp_mb__before_spinlock(); 450 inode_lock(inode); 451 if (!od->upperfile) { 452 if (IS_ERR(realfile)) { 453 inode_unlock(inode); 454 return PTR_ERR(realfile); 455 } 456 od->upperfile = realfile; 457 } else { 458 /* somebody has beaten us to it */ 459 if (!IS_ERR(realfile)) 460 fput(realfile); 461 realfile = od->upperfile; 462 } 463 inode_unlock(inode); 464 } 465 } 466 467 return vfs_fsync_range(realfile, start, end, datasync); 468 } 469 470 static int ovl_dir_release(struct inode *inode, struct file *file) 471 { 472 struct ovl_dir_file *od = file->private_data; 473 474 if (od->cache) { 475 inode_lock(inode); 476 ovl_cache_put(od, file->f_path.dentry); 477 inode_unlock(inode); 478 } 479 fput(od->realfile); 480 if (od->upperfile) 481 fput(od->upperfile); 482 kfree(od); 483 484 return 0; 485 } 486 487 static int ovl_dir_open(struct inode *inode, struct file *file) 488 { 489 struct path realpath; 490 struct file *realfile; 491 struct ovl_dir_file *od; 492 enum ovl_path_type type; 493 494 od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); 495 if (!od) 496 return -ENOMEM; 497 498 type = ovl_path_real(file->f_path.dentry, &realpath); 499 realfile = ovl_path_open(&realpath, file->f_flags); 500 if (IS_ERR(realfile)) { 501 kfree(od); 502 return PTR_ERR(realfile); 503 } 504 od->realfile = realfile; 505 od->is_real = !OVL_TYPE_MERGE(type); 506 od->is_upper = OVL_TYPE_UPPER(type); 507 file->private_data = od; 508 509 return 0; 510 } 511 512 const struct file_operations ovl_dir_operations = { 513 .read = generic_read_dir, 514 .open = ovl_dir_open, 515 .iterate = ovl_iterate, 516 .llseek = ovl_dir_llseek, 517 .fsync = ovl_dir_fsync, 518 .release = ovl_dir_release, 519 }; 520 521 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) 522 { 523 int err; 524 struct ovl_cache_entry *p; 525 526 err = ovl_dir_read_merged(dentry, list); 527 if (err) 528 return err; 529 530 err = 0; 531 532 list_for_each_entry(p, list, l_node) { 533 if (p->is_whiteout) 534 continue; 535 536 if (p->name[0] == '.') { 537 if (p->len == 1) 538 continue; 539 if (p->len == 2 && p->name[1] == '.') 540 continue; 541 } 542 err = -ENOTEMPTY; 543 break; 544 } 545 546 return err; 547 } 548 549 void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) 550 { 551 struct ovl_cache_entry *p; 552 553 inode_lock_nested(upper->d_inode, I_MUTEX_CHILD); 554 list_for_each_entry(p, list, l_node) { 555 struct dentry *dentry; 556 557 if (!p->is_whiteout) 558 continue; 559 560 dentry = lookup_one_len(p->name, upper, p->len); 561 if (IS_ERR(dentry)) { 562 pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n", 563 upper->d_name.name, p->len, p->name, 564 (int) PTR_ERR(dentry)); 565 continue; 566 } 567 if (dentry->d_inode) 568 ovl_cleanup(upper->d_inode, dentry); 569 dput(dentry); 570 } 571 inode_unlock(upper->d_inode); 572 } 573 574 static int ovl_check_d_type(struct dir_context *ctx, const char *name, 575 int namelen, loff_t offset, u64 ino, 576 unsigned int d_type) 577 { 578 struct ovl_readdir_data *rdd = 579 container_of(ctx, struct ovl_readdir_data, ctx); 580 581 /* Even if d_type is not supported, DT_DIR is returned for . and .. */ 582 if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen)) 583 return 0; 584 585 if (d_type != DT_UNKNOWN) 586 rdd->d_type_supported = true; 587 588 return 0; 589 } 590 591 /* 592 * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values 593 * if error is encountered. 594 */ 595 int ovl_check_d_type_supported(struct path *realpath) 596 { 597 int err; 598 struct ovl_readdir_data rdd = { 599 .ctx.actor = ovl_check_d_type, 600 .d_type_supported = false, 601 }; 602 603 err = ovl_dir_read(realpath, &rdd); 604 if (err) 605 return err; 606 607 return rdd.d_type_supported; 608 } 609 610 static void ovl_workdir_cleanup_recurse(struct path *path, int level) 611 { 612 int err; 613 struct inode *dir = path->dentry->d_inode; 614 LIST_HEAD(list); 615 struct ovl_cache_entry *p; 616 struct ovl_readdir_data rdd = { 617 .ctx.actor = ovl_fill_merge, 618 .dentry = NULL, 619 .list = &list, 620 .root = RB_ROOT, 621 .is_lowest = false, 622 }; 623 624 err = ovl_dir_read(path, &rdd); 625 if (err) 626 goto out; 627 628 inode_lock_nested(dir, I_MUTEX_PARENT); 629 list_for_each_entry(p, &list, l_node) { 630 struct dentry *dentry; 631 632 if (p->name[0] == '.') { 633 if (p->len == 1) 634 continue; 635 if (p->len == 2 && p->name[1] == '.') 636 continue; 637 } 638 dentry = lookup_one_len(p->name, path->dentry, p->len); 639 if (IS_ERR(dentry)) 640 continue; 641 if (dentry->d_inode) 642 ovl_workdir_cleanup(dir, path->mnt, dentry, level); 643 dput(dentry); 644 } 645 inode_unlock(dir); 646 out: 647 ovl_cache_free(&list); 648 } 649 650 void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, 651 struct dentry *dentry, int level) 652 { 653 int err; 654 655 if (!d_is_dir(dentry) || level > 1) { 656 ovl_cleanup(dir, dentry); 657 return; 658 } 659 660 err = ovl_do_rmdir(dir, dentry); 661 if (err) { 662 struct path path = { .mnt = mnt, .dentry = dentry }; 663 664 inode_unlock(dir); 665 ovl_workdir_cleanup_recurse(&path, level + 1); 666 inode_lock_nested(dir, I_MUTEX_PARENT); 667 ovl_cleanup(dir, dentry); 668 } 669 } 670