1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/slab.h> 12 #include <linux/namei.h> 13 #include <linux/file.h> 14 #include <linux/xattr.h> 15 #include <linux/rbtree.h> 16 #include <linux/security.h> 17 #include <linux/cred.h> 18 #include "overlayfs.h" 19 20 struct ovl_cache_entry { 21 unsigned int len; 22 unsigned int type; 23 u64 ino; 24 struct list_head l_node; 25 struct rb_node node; 26 struct ovl_cache_entry *next_maybe_whiteout; 27 bool is_whiteout; 28 char name[]; 29 }; 30 31 struct ovl_dir_cache { 32 long refcount; 33 u64 version; 34 struct list_head entries; 35 }; 36 37 struct ovl_readdir_data { 38 struct dir_context ctx; 39 bool is_lowest; 40 struct rb_root root; 41 struct list_head *list; 42 struct list_head middle; 43 struct ovl_cache_entry *first_maybe_whiteout; 44 int count; 45 int err; 46 bool d_type_supported; 47 }; 48 49 struct ovl_dir_file { 50 bool is_real; 51 bool is_upper; 52 struct ovl_dir_cache *cache; 53 struct list_head *cursor; 54 struct file *realfile; 55 struct file *upperfile; 56 }; 57 58 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) 59 { 60 return container_of(n, struct ovl_cache_entry, node); 61 } 62 63 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, 64 const char *name, int len) 65 { 66 struct rb_node *node = root->rb_node; 67 int cmp; 68 69 while (node) { 70 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); 71 72 cmp = strncmp(name, p->name, len); 73 if (cmp > 0) 74 node = p->node.rb_right; 75 else if (cmp < 0 || len < p->len) 76 node = p->node.rb_left; 77 else 78 return p; 79 } 80 81 return NULL; 82 } 83 84 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, 85 const char *name, int len, 86 u64 ino, unsigned int d_type) 87 { 88 struct ovl_cache_entry *p; 89 size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); 90 91 p = kmalloc(size, GFP_KERNEL); 92 if (!p) 93 return NULL; 94 95 memcpy(p->name, name, len); 96 p->name[len] = '\0'; 97 p->len = len; 98 p->type = d_type; 99 p->ino = ino; 100 p->is_whiteout = false; 101 102 if (d_type == DT_CHR) { 103 p->next_maybe_whiteout = rdd->first_maybe_whiteout; 104 rdd->first_maybe_whiteout = p; 105 } 106 return p; 107 } 108 109 static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, 110 const char *name, int len, u64 ino, 111 unsigned int d_type) 112 { 113 struct rb_node **newp = &rdd->root.rb_node; 114 struct rb_node *parent = NULL; 115 struct ovl_cache_entry *p; 116 117 while (*newp) { 118 int cmp; 119 struct ovl_cache_entry *tmp; 120 121 parent = *newp; 122 tmp = ovl_cache_entry_from_node(*newp); 123 cmp = strncmp(name, tmp->name, len); 124 if (cmp > 0) 125 newp = &tmp->node.rb_right; 126 else if (cmp < 0 || len < tmp->len) 127 newp = &tmp->node.rb_left; 128 else 129 return 0; 130 } 131 132 p = ovl_cache_entry_new(rdd, name, len, ino, d_type); 133 if (p == NULL) 134 return -ENOMEM; 135 136 list_add_tail(&p->l_node, rdd->list); 137 rb_link_node(&p->node, parent, newp); 138 rb_insert_color(&p->node, &rdd->root); 139 140 return 0; 141 } 142 143 static int ovl_fill_lowest(struct ovl_readdir_data *rdd, 144 const char *name, int namelen, 145 loff_t offset, u64 ino, unsigned int d_type) 146 { 147 struct ovl_cache_entry *p; 148 149 p = ovl_cache_entry_find(&rdd->root, name, namelen); 150 if (p) { 151 list_move_tail(&p->l_node, &rdd->middle); 152 } else { 153 p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); 154 if (p == NULL) 155 rdd->err = -ENOMEM; 156 else 157 list_add_tail(&p->l_node, &rdd->middle); 158 } 159 160 return rdd->err; 161 } 162 163 void ovl_cache_free(struct list_head *list) 164 { 165 struct ovl_cache_entry *p; 166 struct ovl_cache_entry *n; 167 168 list_for_each_entry_safe(p, n, list, l_node) 169 kfree(p); 170 171 INIT_LIST_HEAD(list); 172 } 173 174 static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) 175 { 176 struct ovl_dir_cache *cache = od->cache; 177 178 WARN_ON(cache->refcount <= 0); 179 cache->refcount--; 180 if (!cache->refcount) { 181 if (ovl_dir_cache(dentry) == cache) 182 ovl_set_dir_cache(dentry, NULL); 183 184 ovl_cache_free(&cache->entries); 185 kfree(cache); 186 } 187 } 188 189 static int ovl_fill_merge(struct dir_context *ctx, const char *name, 190 int namelen, loff_t offset, u64 ino, 191 unsigned int d_type) 192 { 193 struct ovl_readdir_data *rdd = 194 container_of(ctx, struct ovl_readdir_data, ctx); 195 196 rdd->count++; 197 if (!rdd->is_lowest) 198 return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); 199 else 200 return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type); 201 } 202 203 static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) 204 { 205 int err; 206 struct ovl_cache_entry *p; 207 struct dentry *dentry; 208 const struct cred *old_cred; 209 struct cred *override_cred; 210 211 override_cred = prepare_creds(); 212 if (!override_cred) 213 return -ENOMEM; 214 215 /* 216 * CAP_DAC_OVERRIDE for lookup 217 */ 218 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 219 old_cred = override_creds(override_cred); 220 221 err = mutex_lock_killable(&dir->d_inode->i_mutex); 222 if (!err) { 223 while (rdd->first_maybe_whiteout) { 224 p = rdd->first_maybe_whiteout; 225 rdd->first_maybe_whiteout = p->next_maybe_whiteout; 226 dentry = lookup_one_len(p->name, dir, p->len); 227 if (!IS_ERR(dentry)) { 228 p->is_whiteout = ovl_is_whiteout(dentry); 229 dput(dentry); 230 } 231 } 232 inode_unlock(dir->d_inode); 233 } 234 revert_creds(old_cred); 235 put_cred(override_cred); 236 237 return err; 238 } 239 240 static inline int ovl_dir_read(struct path *realpath, 241 struct ovl_readdir_data *rdd) 242 { 243 struct file *realfile; 244 int err; 245 246 realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); 247 if (IS_ERR(realfile)) 248 return PTR_ERR(realfile); 249 250 rdd->first_maybe_whiteout = NULL; 251 rdd->ctx.pos = 0; 252 do { 253 rdd->count = 0; 254 rdd->err = 0; 255 err = iterate_dir(realfile, &rdd->ctx); 256 if (err >= 0) 257 err = rdd->err; 258 } while (!err && rdd->count); 259 260 if (!err && rdd->first_maybe_whiteout) 261 err = ovl_check_whiteouts(realpath->dentry, rdd); 262 263 fput(realfile); 264 265 return err; 266 } 267 268 static void ovl_dir_reset(struct file *file) 269 { 270 struct ovl_dir_file *od = file->private_data; 271 struct ovl_dir_cache *cache = od->cache; 272 struct dentry *dentry = file->f_path.dentry; 273 enum ovl_path_type type = ovl_path_type(dentry); 274 275 if (cache && ovl_dentry_version_get(dentry) != cache->version) { 276 ovl_cache_put(od, dentry); 277 od->cache = NULL; 278 od->cursor = NULL; 279 } 280 WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type)); 281 if (od->is_real && OVL_TYPE_MERGE(type)) 282 od->is_real = false; 283 } 284 285 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) 286 { 287 int err; 288 struct path realpath; 289 struct ovl_readdir_data rdd = { 290 .ctx.actor = ovl_fill_merge, 291 .list = list, 292 .root = RB_ROOT, 293 .is_lowest = false, 294 }; 295 int idx, next; 296 297 for (idx = 0; idx != -1; idx = next) { 298 next = ovl_path_next(idx, dentry, &realpath); 299 300 if (next != -1) { 301 err = ovl_dir_read(&realpath, &rdd); 302 if (err) 303 break; 304 } else { 305 /* 306 * Insert lowest layer entries before upper ones, this 307 * allows offsets to be reasonably constant 308 */ 309 list_add(&rdd.middle, rdd.list); 310 rdd.is_lowest = true; 311 err = ovl_dir_read(&realpath, &rdd); 312 list_del(&rdd.middle); 313 } 314 } 315 return err; 316 } 317 318 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) 319 { 320 struct list_head *p; 321 loff_t off = 0; 322 323 list_for_each(p, &od->cache->entries) { 324 if (off >= pos) 325 break; 326 off++; 327 } 328 /* Cursor is safe since the cache is stable */ 329 od->cursor = p; 330 } 331 332 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) 333 { 334 int res; 335 struct ovl_dir_cache *cache; 336 337 cache = ovl_dir_cache(dentry); 338 if (cache && ovl_dentry_version_get(dentry) == cache->version) { 339 cache->refcount++; 340 return cache; 341 } 342 ovl_set_dir_cache(dentry, NULL); 343 344 cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); 345 if (!cache) 346 return ERR_PTR(-ENOMEM); 347 348 cache->refcount = 1; 349 INIT_LIST_HEAD(&cache->entries); 350 351 res = ovl_dir_read_merged(dentry, &cache->entries); 352 if (res) { 353 ovl_cache_free(&cache->entries); 354 kfree(cache); 355 return ERR_PTR(res); 356 } 357 358 cache->version = ovl_dentry_version_get(dentry); 359 ovl_set_dir_cache(dentry, cache); 360 361 return cache; 362 } 363 364 static int ovl_iterate(struct file *file, struct dir_context *ctx) 365 { 366 struct ovl_dir_file *od = file->private_data; 367 struct dentry *dentry = file->f_path.dentry; 368 struct ovl_cache_entry *p; 369 370 if (!ctx->pos) 371 ovl_dir_reset(file); 372 373 if (od->is_real) 374 return iterate_dir(od->realfile, ctx); 375 376 if (!od->cache) { 377 struct ovl_dir_cache *cache; 378 379 cache = ovl_cache_get(dentry); 380 if (IS_ERR(cache)) 381 return PTR_ERR(cache); 382 383 od->cache = cache; 384 ovl_seek_cursor(od, ctx->pos); 385 } 386 387 while (od->cursor != &od->cache->entries) { 388 p = list_entry(od->cursor, struct ovl_cache_entry, l_node); 389 if (!p->is_whiteout) 390 if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) 391 break; 392 od->cursor = p->l_node.next; 393 ctx->pos++; 394 } 395 return 0; 396 } 397 398 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) 399 { 400 loff_t res; 401 struct ovl_dir_file *od = file->private_data; 402 403 inode_lock(file_inode(file)); 404 if (!file->f_pos) 405 ovl_dir_reset(file); 406 407 if (od->is_real) { 408 res = vfs_llseek(od->realfile, offset, origin); 409 file->f_pos = od->realfile->f_pos; 410 } else { 411 res = -EINVAL; 412 413 switch (origin) { 414 case SEEK_CUR: 415 offset += file->f_pos; 416 break; 417 case SEEK_SET: 418 break; 419 default: 420 goto out_unlock; 421 } 422 if (offset < 0) 423 goto out_unlock; 424 425 if (offset != file->f_pos) { 426 file->f_pos = offset; 427 if (od->cache) 428 ovl_seek_cursor(od, offset); 429 } 430 res = offset; 431 } 432 out_unlock: 433 inode_unlock(file_inode(file)); 434 435 return res; 436 } 437 438 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, 439 int datasync) 440 { 441 struct ovl_dir_file *od = file->private_data; 442 struct dentry *dentry = file->f_path.dentry; 443 struct file *realfile = od->realfile; 444 445 /* 446 * Need to check if we started out being a lower dir, but got copied up 447 */ 448 if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { 449 struct inode *inode = file_inode(file); 450 451 realfile = lockless_dereference(od->upperfile); 452 if (!realfile) { 453 struct path upperpath; 454 455 ovl_path_upper(dentry, &upperpath); 456 realfile = ovl_path_open(&upperpath, O_RDONLY); 457 smp_mb__before_spinlock(); 458 inode_lock(inode); 459 if (!od->upperfile) { 460 if (IS_ERR(realfile)) { 461 inode_unlock(inode); 462 return PTR_ERR(realfile); 463 } 464 od->upperfile = realfile; 465 } else { 466 /* somebody has beaten us to it */ 467 if (!IS_ERR(realfile)) 468 fput(realfile); 469 realfile = od->upperfile; 470 } 471 inode_unlock(inode); 472 } 473 } 474 475 return vfs_fsync_range(realfile, start, end, datasync); 476 } 477 478 static int ovl_dir_release(struct inode *inode, struct file *file) 479 { 480 struct ovl_dir_file *od = file->private_data; 481 482 if (od->cache) { 483 inode_lock(inode); 484 ovl_cache_put(od, file->f_path.dentry); 485 inode_unlock(inode); 486 } 487 fput(od->realfile); 488 if (od->upperfile) 489 fput(od->upperfile); 490 kfree(od); 491 492 return 0; 493 } 494 495 static int ovl_dir_open(struct inode *inode, struct file *file) 496 { 497 struct path realpath; 498 struct file *realfile; 499 struct ovl_dir_file *od; 500 enum ovl_path_type type; 501 502 od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); 503 if (!od) 504 return -ENOMEM; 505 506 type = ovl_path_real(file->f_path.dentry, &realpath); 507 realfile = ovl_path_open(&realpath, file->f_flags); 508 if (IS_ERR(realfile)) { 509 kfree(od); 510 return PTR_ERR(realfile); 511 } 512 od->realfile = realfile; 513 od->is_real = !OVL_TYPE_MERGE(type); 514 od->is_upper = OVL_TYPE_UPPER(type); 515 file->private_data = od; 516 517 return 0; 518 } 519 520 const struct file_operations ovl_dir_operations = { 521 .read = generic_read_dir, 522 .open = ovl_dir_open, 523 .iterate = ovl_iterate, 524 .llseek = ovl_dir_llseek, 525 .fsync = ovl_dir_fsync, 526 .release = ovl_dir_release, 527 }; 528 529 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) 530 { 531 int err; 532 struct ovl_cache_entry *p; 533 534 err = ovl_dir_read_merged(dentry, list); 535 if (err) 536 return err; 537 538 err = 0; 539 540 list_for_each_entry(p, list, l_node) { 541 if (p->is_whiteout) 542 continue; 543 544 if (p->name[0] == '.') { 545 if (p->len == 1) 546 continue; 547 if (p->len == 2 && p->name[1] == '.') 548 continue; 549 } 550 err = -ENOTEMPTY; 551 break; 552 } 553 554 return err; 555 } 556 557 void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) 558 { 559 struct ovl_cache_entry *p; 560 561 inode_lock_nested(upper->d_inode, I_MUTEX_CHILD); 562 list_for_each_entry(p, list, l_node) { 563 struct dentry *dentry; 564 565 if (!p->is_whiteout) 566 continue; 567 568 dentry = lookup_one_len(p->name, upper, p->len); 569 if (IS_ERR(dentry)) { 570 pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n", 571 upper->d_name.name, p->len, p->name, 572 (int) PTR_ERR(dentry)); 573 continue; 574 } 575 if (dentry->d_inode) 576 ovl_cleanup(upper->d_inode, dentry); 577 dput(dentry); 578 } 579 inode_unlock(upper->d_inode); 580 } 581 582 static int ovl_check_d_type(struct dir_context *ctx, const char *name, 583 int namelen, loff_t offset, u64 ino, 584 unsigned int d_type) 585 { 586 struct ovl_readdir_data *rdd = 587 container_of(ctx, struct ovl_readdir_data, ctx); 588 589 /* Even if d_type is not supported, DT_DIR is returned for . and .. */ 590 if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen)) 591 return 0; 592 593 if (d_type != DT_UNKNOWN) 594 rdd->d_type_supported = true; 595 596 return 0; 597 } 598 599 /* 600 * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values 601 * if error is encountered. 602 */ 603 int ovl_check_d_type_supported(struct path *realpath) 604 { 605 int err; 606 struct ovl_readdir_data rdd = { 607 .ctx.actor = ovl_check_d_type, 608 .d_type_supported = false, 609 }; 610 611 err = ovl_dir_read(realpath, &rdd); 612 if (err) 613 return err; 614 615 return rdd.d_type_supported; 616 } 617