1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/slab.h> 12 #include <linux/file.h> 13 #include <linux/splice.h> 14 #include <linux/xattr.h> 15 #include <linux/security.h> 16 #include <linux/uaccess.h> 17 #include <linux/sched.h> 18 #include <linux/namei.h> 19 #include "overlayfs.h" 20 21 #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) 22 23 int ovl_copy_xattr(struct dentry *old, struct dentry *new) 24 { 25 ssize_t list_size, size; 26 char *buf, *name, *value; 27 int error; 28 29 if (!old->d_inode->i_op->getxattr || 30 !new->d_inode->i_op->getxattr) 31 return 0; 32 33 list_size = vfs_listxattr(old, NULL, 0); 34 if (list_size <= 0) { 35 if (list_size == -EOPNOTSUPP) 36 return 0; 37 return list_size; 38 } 39 40 buf = kzalloc(list_size, GFP_KERNEL); 41 if (!buf) 42 return -ENOMEM; 43 44 error = -ENOMEM; 45 value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); 46 if (!value) 47 goto out; 48 49 list_size = vfs_listxattr(old, buf, list_size); 50 if (list_size <= 0) { 51 error = list_size; 52 goto out_free_value; 53 } 54 55 for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { 56 size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); 57 if (size <= 0) { 58 error = size; 59 goto out_free_value; 60 } 61 error = vfs_setxattr(new, name, value, size, 0); 62 if (error) 63 goto out_free_value; 64 } 65 66 out_free_value: 67 kfree(value); 68 out: 69 kfree(buf); 70 return error; 71 } 72 73 static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) 74 { 75 struct file *old_file; 76 struct file *new_file; 77 loff_t old_pos = 0; 78 loff_t new_pos = 0; 79 int error = 0; 80 81 if (len == 0) 82 return 0; 83 84 old_file = ovl_path_open(old, O_RDONLY); 85 if (IS_ERR(old_file)) 86 return PTR_ERR(old_file); 87 88 new_file = ovl_path_open(new, O_WRONLY); 89 if (IS_ERR(new_file)) { 90 error = PTR_ERR(new_file); 91 goto out_fput; 92 } 93 94 /* FIXME: copy up sparse files efficiently */ 95 while (len) { 96 size_t this_len = OVL_COPY_UP_CHUNK_SIZE; 97 long bytes; 98 99 if (len < this_len) 100 this_len = len; 101 102 if (signal_pending_state(TASK_KILLABLE, current)) { 103 error = -EINTR; 104 break; 105 } 106 107 bytes = do_splice_direct(old_file, &old_pos, 108 new_file, &new_pos, 109 this_len, SPLICE_F_MOVE); 110 if (bytes <= 0) { 111 error = bytes; 112 break; 113 } 114 WARN_ON(old_pos != new_pos); 115 116 len -= bytes; 117 } 118 119 fput(new_file); 120 out_fput: 121 fput(old_file); 122 return error; 123 } 124 125 static char *ovl_read_symlink(struct dentry *realdentry) 126 { 127 int res; 128 char *buf; 129 struct inode *inode = realdentry->d_inode; 130 mm_segment_t old_fs; 131 132 res = -EINVAL; 133 if (!inode->i_op->readlink) 134 goto err; 135 136 res = -ENOMEM; 137 buf = (char *) __get_free_page(GFP_KERNEL); 138 if (!buf) 139 goto err; 140 141 old_fs = get_fs(); 142 set_fs(get_ds()); 143 /* The cast to a user pointer is valid due to the set_fs() */ 144 res = inode->i_op->readlink(realdentry, 145 (char __user *)buf, PAGE_SIZE - 1); 146 set_fs(old_fs); 147 if (res < 0) { 148 free_page((unsigned long) buf); 149 goto err; 150 } 151 buf[res] = '\0'; 152 153 return buf; 154 155 err: 156 return ERR_PTR(res); 157 } 158 159 static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) 160 { 161 struct iattr attr = { 162 .ia_valid = 163 ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, 164 .ia_atime = stat->atime, 165 .ia_mtime = stat->mtime, 166 }; 167 168 return notify_change(upperdentry, &attr, NULL); 169 } 170 171 int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) 172 { 173 int err = 0; 174 175 if (!S_ISLNK(stat->mode)) { 176 struct iattr attr = { 177 .ia_valid = ATTR_MODE, 178 .ia_mode = stat->mode, 179 }; 180 err = notify_change(upperdentry, &attr, NULL); 181 } 182 if (!err) { 183 struct iattr attr = { 184 .ia_valid = ATTR_UID | ATTR_GID, 185 .ia_uid = stat->uid, 186 .ia_gid = stat->gid, 187 }; 188 err = notify_change(upperdentry, &attr, NULL); 189 } 190 if (!err) 191 ovl_set_timestamps(upperdentry, stat); 192 193 return err; 194 195 } 196 197 static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, 198 struct dentry *dentry, struct path *lowerpath, 199 struct kstat *stat, struct iattr *attr, 200 const char *link) 201 { 202 struct inode *wdir = workdir->d_inode; 203 struct inode *udir = upperdir->d_inode; 204 struct dentry *newdentry = NULL; 205 struct dentry *upper = NULL; 206 umode_t mode = stat->mode; 207 int err; 208 209 newdentry = ovl_lookup_temp(workdir, dentry); 210 err = PTR_ERR(newdentry); 211 if (IS_ERR(newdentry)) 212 goto out; 213 214 upper = lookup_one_len(dentry->d_name.name, upperdir, 215 dentry->d_name.len); 216 err = PTR_ERR(upper); 217 if (IS_ERR(upper)) 218 goto out1; 219 220 /* Can't properly set mode on creation because of the umask */ 221 stat->mode &= S_IFMT; 222 err = ovl_create_real(wdir, newdentry, stat, link, NULL, true); 223 stat->mode = mode; 224 if (err) 225 goto out2; 226 227 if (S_ISREG(stat->mode)) { 228 struct path upperpath; 229 ovl_path_upper(dentry, &upperpath); 230 BUG_ON(upperpath.dentry != NULL); 231 upperpath.dentry = newdentry; 232 233 err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); 234 if (err) 235 goto out_cleanup; 236 } 237 238 err = ovl_copy_xattr(lowerpath->dentry, newdentry); 239 if (err) 240 goto out_cleanup; 241 242 mutex_lock(&newdentry->d_inode->i_mutex); 243 err = ovl_set_attr(newdentry, stat); 244 if (!err && attr) 245 err = notify_change(newdentry, attr, NULL); 246 mutex_unlock(&newdentry->d_inode->i_mutex); 247 if (err) 248 goto out_cleanup; 249 250 err = ovl_do_rename(wdir, newdentry, udir, upper, 0); 251 if (err) 252 goto out_cleanup; 253 254 ovl_dentry_update(dentry, newdentry); 255 newdentry = NULL; 256 257 /* 258 * Non-directores become opaque when copied up. 259 */ 260 if (!S_ISDIR(stat->mode)) 261 ovl_dentry_set_opaque(dentry, true); 262 out2: 263 dput(upper); 264 out1: 265 dput(newdentry); 266 out: 267 return err; 268 269 out_cleanup: 270 ovl_cleanup(wdir, newdentry); 271 goto out; 272 } 273 274 /* 275 * Copy up a single dentry 276 * 277 * Directory renames only allowed on "pure upper" (already created on 278 * upper filesystem, never copied up). Directories which are on lower or 279 * are merged may not be renamed. For these -EXDEV is returned and 280 * userspace has to deal with it. This means, when copying up a 281 * directory we can rely on it and ancestors being stable. 282 * 283 * Non-directory renames start with copy up of source if necessary. The 284 * actual rename will only proceed once the copy up was successful. Copy 285 * up uses upper parent i_mutex for exclusion. Since rename can change 286 * d_parent it is possible that the copy up will lock the old parent. At 287 * that point the file will have already been copied up anyway. 288 */ 289 int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, 290 struct path *lowerpath, struct kstat *stat, 291 struct iattr *attr) 292 { 293 struct dentry *workdir = ovl_workdir(dentry); 294 int err; 295 struct kstat pstat; 296 struct path parentpath; 297 struct dentry *upperdir; 298 struct dentry *upperdentry; 299 const struct cred *old_cred; 300 struct cred *override_cred; 301 char *link = NULL; 302 303 ovl_path_upper(parent, &parentpath); 304 upperdir = parentpath.dentry; 305 306 err = vfs_getattr(&parentpath, &pstat); 307 if (err) 308 return err; 309 310 if (S_ISLNK(stat->mode)) { 311 link = ovl_read_symlink(lowerpath->dentry); 312 if (IS_ERR(link)) 313 return PTR_ERR(link); 314 } 315 316 err = -ENOMEM; 317 override_cred = prepare_creds(); 318 if (!override_cred) 319 goto out_free_link; 320 321 override_cred->fsuid = stat->uid; 322 override_cred->fsgid = stat->gid; 323 /* 324 * CAP_SYS_ADMIN for copying up extended attributes 325 * CAP_DAC_OVERRIDE for create 326 * CAP_FOWNER for chmod, timestamp update 327 * CAP_FSETID for chmod 328 * CAP_CHOWN for chown 329 * CAP_MKNOD for mknod 330 */ 331 cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 332 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 333 cap_raise(override_cred->cap_effective, CAP_FOWNER); 334 cap_raise(override_cred->cap_effective, CAP_FSETID); 335 cap_raise(override_cred->cap_effective, CAP_CHOWN); 336 cap_raise(override_cred->cap_effective, CAP_MKNOD); 337 old_cred = override_creds(override_cred); 338 339 err = -EIO; 340 if (lock_rename(workdir, upperdir) != NULL) { 341 pr_err("overlayfs: failed to lock workdir+upperdir\n"); 342 goto out_unlock; 343 } 344 upperdentry = ovl_dentry_upper(dentry); 345 if (upperdentry) { 346 unlock_rename(workdir, upperdir); 347 err = 0; 348 /* Raced with another copy-up? Do the setattr here */ 349 if (attr) { 350 mutex_lock(&upperdentry->d_inode->i_mutex); 351 err = notify_change(upperdentry, attr, NULL); 352 mutex_unlock(&upperdentry->d_inode->i_mutex); 353 } 354 goto out_put_cred; 355 } 356 357 err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, 358 stat, attr, link); 359 if (!err) { 360 /* Restore timestamps on parent (best effort) */ 361 ovl_set_timestamps(upperdir, &pstat); 362 } 363 out_unlock: 364 unlock_rename(workdir, upperdir); 365 out_put_cred: 366 revert_creds(old_cred); 367 put_cred(override_cred); 368 369 out_free_link: 370 if (link) 371 free_page((unsigned long) link); 372 373 return err; 374 } 375 376 int ovl_copy_up(struct dentry *dentry) 377 { 378 int err; 379 380 err = 0; 381 while (!err) { 382 struct dentry *next; 383 struct dentry *parent; 384 struct path lowerpath; 385 struct kstat stat; 386 enum ovl_path_type type = ovl_path_type(dentry); 387 388 if (type != OVL_PATH_LOWER) 389 break; 390 391 next = dget(dentry); 392 /* find the topmost dentry not yet copied up */ 393 for (;;) { 394 parent = dget_parent(next); 395 396 type = ovl_path_type(parent); 397 if (type != OVL_PATH_LOWER) 398 break; 399 400 dput(next); 401 next = parent; 402 } 403 404 ovl_path_lower(next, &lowerpath); 405 err = vfs_getattr(&lowerpath, &stat); 406 if (!err) 407 err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL); 408 409 dput(parent); 410 dput(next); 411 } 412 413 return err; 414 } 415