1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/slab.h> 12 #include <linux/file.h> 13 #include <linux/splice.h> 14 #include <linux/xattr.h> 15 #include <linux/security.h> 16 #include <linux/uaccess.h> 17 #include <linux/sched.h> 18 #include <linux/namei.h> 19 #include "overlayfs.h" 20 21 #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) 22 23 int ovl_copy_xattr(struct dentry *old, struct dentry *new) 24 { 25 ssize_t list_size, size, value_size = 0; 26 char *buf, *name, *value = NULL; 27 int uninitialized_var(error); 28 29 if (!old->d_inode->i_op->getxattr || 30 !new->d_inode->i_op->getxattr) 31 return 0; 32 33 list_size = vfs_listxattr(old, NULL, 0); 34 if (list_size <= 0) { 35 if (list_size == -EOPNOTSUPP) 36 return 0; 37 return list_size; 38 } 39 40 buf = kzalloc(list_size, GFP_KERNEL); 41 if (!buf) 42 return -ENOMEM; 43 44 list_size = vfs_listxattr(old, buf, list_size); 45 if (list_size <= 0) { 46 error = list_size; 47 goto out; 48 } 49 50 for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { 51 retry: 52 size = vfs_getxattr(old, name, value, value_size); 53 if (size == -ERANGE) 54 size = vfs_getxattr(old, name, NULL, 0); 55 56 if (size < 0) { 57 error = size; 58 break; 59 } 60 61 if (size > value_size) { 62 void *new; 63 64 new = krealloc(value, size, GFP_KERNEL); 65 if (!new) { 66 error = -ENOMEM; 67 break; 68 } 69 value = new; 70 value_size = size; 71 goto retry; 72 } 73 74 error = vfs_setxattr(new, name, value, size, 0); 75 if (error) 76 break; 77 } 78 kfree(value); 79 out: 80 kfree(buf); 81 return error; 82 } 83 84 static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) 85 { 86 struct file *old_file; 87 struct file *new_file; 88 loff_t old_pos = 0; 89 loff_t new_pos = 0; 90 int error = 0; 91 92 if (len == 0) 93 return 0; 94 95 old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY); 96 if (IS_ERR(old_file)) 97 return PTR_ERR(old_file); 98 99 new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY); 100 if (IS_ERR(new_file)) { 101 error = PTR_ERR(new_file); 102 goto out_fput; 103 } 104 105 /* FIXME: copy up sparse files efficiently */ 106 while (len) { 107 size_t this_len = OVL_COPY_UP_CHUNK_SIZE; 108 long bytes; 109 110 if (len < this_len) 111 this_len = len; 112 113 if (signal_pending_state(TASK_KILLABLE, current)) { 114 error = -EINTR; 115 break; 116 } 117 118 bytes = do_splice_direct(old_file, &old_pos, 119 new_file, &new_pos, 120 this_len, SPLICE_F_MOVE); 121 if (bytes <= 0) { 122 error = bytes; 123 break; 124 } 125 WARN_ON(old_pos != new_pos); 126 127 len -= bytes; 128 } 129 130 fput(new_file); 131 out_fput: 132 fput(old_file); 133 return error; 134 } 135 136 static char *ovl_read_symlink(struct dentry *realdentry) 137 { 138 int res; 139 char *buf; 140 struct inode *inode = realdentry->d_inode; 141 mm_segment_t old_fs; 142 143 res = -EINVAL; 144 if (!inode->i_op->readlink) 145 goto err; 146 147 res = -ENOMEM; 148 buf = (char *) __get_free_page(GFP_KERNEL); 149 if (!buf) 150 goto err; 151 152 old_fs = get_fs(); 153 set_fs(get_ds()); 154 /* The cast to a user pointer is valid due to the set_fs() */ 155 res = inode->i_op->readlink(realdentry, 156 (char __user *)buf, PAGE_SIZE - 1); 157 set_fs(old_fs); 158 if (res < 0) { 159 free_page((unsigned long) buf); 160 goto err; 161 } 162 buf[res] = '\0'; 163 164 return buf; 165 166 err: 167 return ERR_PTR(res); 168 } 169 170 static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) 171 { 172 struct iattr attr = { 173 .ia_valid = 174 ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, 175 .ia_atime = stat->atime, 176 .ia_mtime = stat->mtime, 177 }; 178 179 return notify_change(upperdentry, &attr, NULL); 180 } 181 182 int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) 183 { 184 int err = 0; 185 186 if (!S_ISLNK(stat->mode)) { 187 struct iattr attr = { 188 .ia_valid = ATTR_MODE, 189 .ia_mode = stat->mode, 190 }; 191 err = notify_change(upperdentry, &attr, NULL); 192 } 193 if (!err) { 194 struct iattr attr = { 195 .ia_valid = ATTR_UID | ATTR_GID, 196 .ia_uid = stat->uid, 197 .ia_gid = stat->gid, 198 }; 199 err = notify_change(upperdentry, &attr, NULL); 200 } 201 if (!err) 202 ovl_set_timestamps(upperdentry, stat); 203 204 return err; 205 } 206 207 static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, 208 struct dentry *dentry, struct path *lowerpath, 209 struct kstat *stat, const char *link) 210 { 211 struct inode *wdir = workdir->d_inode; 212 struct inode *udir = upperdir->d_inode; 213 struct dentry *newdentry = NULL; 214 struct dentry *upper = NULL; 215 umode_t mode = stat->mode; 216 int err; 217 218 newdentry = ovl_lookup_temp(workdir, dentry); 219 err = PTR_ERR(newdentry); 220 if (IS_ERR(newdentry)) 221 goto out; 222 223 upper = lookup_one_len(dentry->d_name.name, upperdir, 224 dentry->d_name.len); 225 err = PTR_ERR(upper); 226 if (IS_ERR(upper)) 227 goto out1; 228 229 /* Can't properly set mode on creation because of the umask */ 230 stat->mode &= S_IFMT; 231 err = ovl_create_real(wdir, newdentry, stat, link, NULL, true); 232 stat->mode = mode; 233 if (err) 234 goto out2; 235 236 if (S_ISREG(stat->mode)) { 237 struct path upperpath; 238 ovl_path_upper(dentry, &upperpath); 239 BUG_ON(upperpath.dentry != NULL); 240 upperpath.dentry = newdentry; 241 242 err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); 243 if (err) 244 goto out_cleanup; 245 } 246 247 err = ovl_copy_xattr(lowerpath->dentry, newdentry); 248 if (err) 249 goto out_cleanup; 250 251 inode_lock(newdentry->d_inode); 252 err = ovl_set_attr(newdentry, stat); 253 inode_unlock(newdentry->d_inode); 254 if (err) 255 goto out_cleanup; 256 257 err = ovl_do_rename(wdir, newdentry, udir, upper, 0); 258 if (err) 259 goto out_cleanup; 260 261 ovl_dentry_update(dentry, newdentry); 262 newdentry = NULL; 263 264 /* 265 * Non-directores become opaque when copied up. 266 */ 267 if (!S_ISDIR(stat->mode)) 268 ovl_dentry_set_opaque(dentry, true); 269 out2: 270 dput(upper); 271 out1: 272 dput(newdentry); 273 out: 274 return err; 275 276 out_cleanup: 277 ovl_cleanup(wdir, newdentry); 278 goto out2; 279 } 280 281 /* 282 * Copy up a single dentry 283 * 284 * Directory renames only allowed on "pure upper" (already created on 285 * upper filesystem, never copied up). Directories which are on lower or 286 * are merged may not be renamed. For these -EXDEV is returned and 287 * userspace has to deal with it. This means, when copying up a 288 * directory we can rely on it and ancestors being stable. 289 * 290 * Non-directory renames start with copy up of source if necessary. The 291 * actual rename will only proceed once the copy up was successful. Copy 292 * up uses upper parent i_mutex for exclusion. Since rename can change 293 * d_parent it is possible that the copy up will lock the old parent. At 294 * that point the file will have already been copied up anyway. 295 */ 296 int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, 297 struct path *lowerpath, struct kstat *stat) 298 { 299 struct dentry *workdir = ovl_workdir(dentry); 300 int err; 301 struct kstat pstat; 302 struct path parentpath; 303 struct dentry *upperdir; 304 struct dentry *upperdentry; 305 const struct cred *old_cred; 306 struct cred *override_cred; 307 char *link = NULL; 308 309 if (WARN_ON(!workdir)) 310 return -EROFS; 311 312 ovl_path_upper(parent, &parentpath); 313 upperdir = parentpath.dentry; 314 315 err = vfs_getattr(&parentpath, &pstat); 316 if (err) 317 return err; 318 319 if (S_ISLNK(stat->mode)) { 320 link = ovl_read_symlink(lowerpath->dentry); 321 if (IS_ERR(link)) 322 return PTR_ERR(link); 323 } 324 325 err = -ENOMEM; 326 override_cred = prepare_creds(); 327 if (!override_cred) 328 goto out_free_link; 329 330 override_cred->fsuid = stat->uid; 331 override_cred->fsgid = stat->gid; 332 /* 333 * CAP_SYS_ADMIN for copying up extended attributes 334 * CAP_DAC_OVERRIDE for create 335 * CAP_FOWNER for chmod, timestamp update 336 * CAP_FSETID for chmod 337 * CAP_CHOWN for chown 338 * CAP_MKNOD for mknod 339 */ 340 cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 341 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 342 cap_raise(override_cred->cap_effective, CAP_FOWNER); 343 cap_raise(override_cred->cap_effective, CAP_FSETID); 344 cap_raise(override_cred->cap_effective, CAP_CHOWN); 345 cap_raise(override_cred->cap_effective, CAP_MKNOD); 346 old_cred = override_creds(override_cred); 347 348 err = -EIO; 349 if (lock_rename(workdir, upperdir) != NULL) { 350 pr_err("overlayfs: failed to lock workdir+upperdir\n"); 351 goto out_unlock; 352 } 353 upperdentry = ovl_dentry_upper(dentry); 354 if (upperdentry) { 355 /* Raced with another copy-up? Nothing to do, then... */ 356 err = 0; 357 goto out_unlock; 358 } 359 360 err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, 361 stat, link); 362 if (!err) { 363 /* Restore timestamps on parent (best effort) */ 364 ovl_set_timestamps(upperdir, &pstat); 365 } 366 out_unlock: 367 unlock_rename(workdir, upperdir); 368 revert_creds(old_cred); 369 put_cred(override_cred); 370 371 out_free_link: 372 if (link) 373 free_page((unsigned long) link); 374 375 return err; 376 } 377 378 int ovl_copy_up(struct dentry *dentry) 379 { 380 int err; 381 382 err = 0; 383 while (!err) { 384 struct dentry *next; 385 struct dentry *parent; 386 struct path lowerpath; 387 struct kstat stat; 388 enum ovl_path_type type = ovl_path_type(dentry); 389 390 if (OVL_TYPE_UPPER(type)) 391 break; 392 393 next = dget(dentry); 394 /* find the topmost dentry not yet copied up */ 395 for (;;) { 396 parent = dget_parent(next); 397 398 type = ovl_path_type(parent); 399 if (OVL_TYPE_UPPER(type)) 400 break; 401 402 dput(next); 403 next = parent; 404 } 405 406 ovl_path_lower(next, &lowerpath); 407 err = vfs_getattr(&lowerpath, &stat); 408 if (!err) 409 err = ovl_copy_up_one(parent, next, &lowerpath, &stat); 410 411 dput(parent); 412 dput(next); 413 } 414 415 return err; 416 } 417