1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/sched.h> 11 #include <linux/slab.h> 12 #include <linux/spinlock.h> 13 #include <linux/completion.h> 14 #include <linux/buffer_head.h> 15 #include <linux/posix_acl.h> 16 #include <linux/sort.h> 17 #include <linux/gfs2_ondisk.h> 18 #include <linux/crc32.h> 19 #include <linux/lm_interface.h> 20 #include <linux/security.h> 21 #include <linux/time.h> 22 23 #include "gfs2.h" 24 #include "incore.h" 25 #include "acl.h" 26 #include "bmap.h" 27 #include "dir.h" 28 #include "eattr.h" 29 #include "glock.h" 30 #include "glops.h" 31 #include "inode.h" 32 #include "log.h" 33 #include "meta_io.h" 34 #include "ops_address.h" 35 #include "ops_inode.h" 36 #include "quota.h" 37 #include "rgrp.h" 38 #include "trans.h" 39 #include "util.h" 40 41 struct gfs2_inum_range_host { 42 u64 ir_start; 43 u64 ir_length; 44 }; 45 46 static int iget_test(struct inode *inode, void *opaque) 47 { 48 struct gfs2_inode *ip = GFS2_I(inode); 49 u64 *no_addr = opaque; 50 51 if (ip->i_no_addr == *no_addr && test_bit(GIF_USER, &ip->i_flags)) 52 return 1; 53 54 return 0; 55 } 56 57 static int iget_set(struct inode *inode, void *opaque) 58 { 59 struct gfs2_inode *ip = GFS2_I(inode); 60 u64 *no_addr = opaque; 61 62 inode->i_ino = (unsigned long)*no_addr; 63 ip->i_no_addr = *no_addr; 64 set_bit(GIF_USER, &ip->i_flags); 65 return 0; 66 } 67 68 struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) 69 { 70 unsigned long hash = (unsigned long)no_addr; 71 return ilookup5(sb, hash, iget_test, &no_addr); 72 } 73 74 static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) 75 { 76 unsigned long hash = (unsigned long)no_addr; 77 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 78 } 79 80 struct gfs2_skip_data { 81 u64 no_addr; 82 int skipped; 83 }; 84 85 static int iget_skip_test(struct inode *inode, void *opaque) 86 { 87 struct gfs2_inode *ip = GFS2_I(inode); 88 struct gfs2_skip_data *data = opaque; 89 90 if (ip->i_no_addr == data->no_addr && test_bit(GIF_USER, &ip->i_flags)){ 91 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ 92 data->skipped = 1; 93 return 0; 94 } 95 return 1; 96 } 97 return 0; 98 } 99 100 static int iget_skip_set(struct inode *inode, void *opaque) 101 { 102 struct gfs2_inode *ip = GFS2_I(inode); 103 struct gfs2_skip_data *data = opaque; 104 105 if (data->skipped) 106 return 1; 107 inode->i_ino = (unsigned long)(data->no_addr); 108 ip->i_no_addr = data->no_addr; 109 set_bit(GIF_USER, &ip->i_flags); 110 return 0; 111 } 112 113 static struct inode *gfs2_iget_skip(struct super_block *sb, 114 u64 no_addr) 115 { 116 struct gfs2_skip_data data; 117 unsigned long hash = (unsigned long)no_addr; 118 119 data.no_addr = no_addr; 120 data.skipped = 0; 121 return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data); 122 } 123 124 /** 125 * GFS2 lookup code fills in vfs inode contents based on info obtained 126 * from directory entry inside gfs2_inode_lookup(). This has caused issues 127 * with NFS code path since its get_dentry routine doesn't have the relevant 128 * directory entry when gfs2_inode_lookup() is invoked. Part of the code 129 * segment inside gfs2_inode_lookup code needs to get moved around. 130 * 131 * Clean up I_LOCK and I_NEW as well. 132 **/ 133 134 void gfs2_set_iop(struct inode *inode) 135 { 136 struct gfs2_sbd *sdp = GFS2_SB(inode); 137 umode_t mode = inode->i_mode; 138 139 if (S_ISREG(mode)) { 140 inode->i_op = &gfs2_file_iops; 141 if (sdp->sd_args.ar_localflocks) 142 inode->i_fop = &gfs2_file_fops_nolock; 143 else 144 inode->i_fop = &gfs2_file_fops; 145 } else if (S_ISDIR(mode)) { 146 inode->i_op = &gfs2_dir_iops; 147 if (sdp->sd_args.ar_localflocks) 148 inode->i_fop = &gfs2_dir_fops_nolock; 149 else 150 inode->i_fop = &gfs2_dir_fops; 151 } else if (S_ISLNK(mode)) { 152 inode->i_op = &gfs2_symlink_iops; 153 } else { 154 inode->i_op = &gfs2_file_iops; 155 init_special_inode(inode, inode->i_mode, inode->i_rdev); 156 } 157 158 unlock_new_inode(inode); 159 } 160 161 /** 162 * gfs2_inode_lookup - Lookup an inode 163 * @sb: The super block 164 * @no_addr: The inode number 165 * @type: The type of the inode 166 * @skip_freeing: set this not return an inode if it is currently being freed. 167 * 168 * Returns: A VFS inode, or an error 169 */ 170 171 struct inode *gfs2_inode_lookup(struct super_block *sb, 172 unsigned int type, 173 u64 no_addr, 174 u64 no_formal_ino, int skip_freeing) 175 { 176 struct inode *inode; 177 struct gfs2_inode *ip; 178 struct gfs2_glock *io_gl; 179 int error; 180 181 if (skip_freeing) 182 inode = gfs2_iget_skip(sb, no_addr); 183 else 184 inode = gfs2_iget(sb, no_addr); 185 ip = GFS2_I(inode); 186 187 if (!inode) 188 return ERR_PTR(-ENOBUFS); 189 190 if (inode->i_state & I_NEW) { 191 struct gfs2_sbd *sdp = GFS2_SB(inode); 192 ip->i_no_formal_ino = no_formal_ino; 193 194 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 195 if (unlikely(error)) 196 goto fail; 197 ip->i_gl->gl_object = ip; 198 199 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 200 if (unlikely(error)) 201 goto fail_put; 202 203 set_bit(GIF_INVALID, &ip->i_flags); 204 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 205 if (unlikely(error)) 206 goto fail_iopen; 207 ip->i_iopen_gh.gh_gl->gl_object = ip; 208 209 gfs2_glock_put(io_gl); 210 211 if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) 212 goto gfs2_nfsbypass; 213 214 inode->i_mode = DT2IF(type); 215 216 /* 217 * We must read the inode in order to work out its type in 218 * this case. Note that this doesn't happen often as we normally 219 * know the type beforehand. This code path only occurs during 220 * unlinked inode recovery (where it is safe to do this glock, 221 * which is not true in the general case). 222 */ 223 if (type == DT_UNKNOWN) { 224 struct gfs2_holder gh; 225 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 226 if (unlikely(error)) 227 goto fail_glock; 228 /* Inode is now uptodate */ 229 gfs2_glock_dq_uninit(&gh); 230 } 231 232 gfs2_set_iop(inode); 233 } 234 235 gfs2_nfsbypass: 236 return inode; 237 fail_glock: 238 gfs2_glock_dq(&ip->i_iopen_gh); 239 fail_iopen: 240 gfs2_glock_put(io_gl); 241 fail_put: 242 ip->i_gl->gl_object = NULL; 243 gfs2_glock_put(ip->i_gl); 244 fail: 245 iget_failed(inode); 246 return ERR_PTR(error); 247 } 248 249 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) 250 { 251 struct gfs2_dinode_host *di = &ip->i_di; 252 const struct gfs2_dinode *str = buf; 253 struct timespec atime; 254 u16 height, depth; 255 256 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) 257 goto corrupt; 258 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); 259 ip->i_inode.i_mode = be32_to_cpu(str->di_mode); 260 ip->i_inode.i_rdev = 0; 261 switch (ip->i_inode.i_mode & S_IFMT) { 262 case S_IFBLK: 263 case S_IFCHR: 264 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), 265 be32_to_cpu(str->di_minor)); 266 break; 267 }; 268 269 ip->i_inode.i_uid = be32_to_cpu(str->di_uid); 270 ip->i_inode.i_gid = be32_to_cpu(str->di_gid); 271 /* 272 * We will need to review setting the nlink count here in the 273 * light of the forthcoming ro bind mount work. This is a reminder 274 * to do that. 275 */ 276 ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); 277 di->di_size = be64_to_cpu(str->di_size); 278 i_size_write(&ip->i_inode, di->di_size); 279 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); 280 atime.tv_sec = be64_to_cpu(str->di_atime); 281 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); 282 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) 283 ip->i_inode.i_atime = atime; 284 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); 285 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); 286 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); 287 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); 288 289 ip->i_goal = be64_to_cpu(str->di_goal_meta); 290 di->di_generation = be64_to_cpu(str->di_generation); 291 292 di->di_flags = be32_to_cpu(str->di_flags); 293 gfs2_set_inode_flags(&ip->i_inode); 294 height = be16_to_cpu(str->di_height); 295 if (unlikely(height > GFS2_MAX_META_HEIGHT)) 296 goto corrupt; 297 ip->i_height = (u8)height; 298 299 depth = be16_to_cpu(str->di_depth); 300 if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) 301 goto corrupt; 302 ip->i_depth = (u8)depth; 303 di->di_entries = be32_to_cpu(str->di_entries); 304 305 di->di_eattr = be64_to_cpu(str->di_eattr); 306 if (S_ISREG(ip->i_inode.i_mode)) 307 gfs2_set_aops(&ip->i_inode); 308 309 return 0; 310 corrupt: 311 if (gfs2_consist_inode(ip)) 312 gfs2_dinode_print(ip); 313 return -EIO; 314 } 315 316 /** 317 * gfs2_inode_refresh - Refresh the incore copy of the dinode 318 * @ip: The GFS2 inode 319 * 320 * Returns: errno 321 */ 322 323 int gfs2_inode_refresh(struct gfs2_inode *ip) 324 { 325 struct buffer_head *dibh; 326 int error; 327 328 error = gfs2_meta_inode_buffer(ip, &dibh); 329 if (error) 330 return error; 331 332 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) { 333 brelse(dibh); 334 return -EIO; 335 } 336 337 error = gfs2_dinode_in(ip, dibh->b_data); 338 brelse(dibh); 339 clear_bit(GIF_INVALID, &ip->i_flags); 340 341 return error; 342 } 343 344 int gfs2_dinode_dealloc(struct gfs2_inode *ip) 345 { 346 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 347 struct gfs2_alloc *al; 348 struct gfs2_rgrpd *rgd; 349 int error; 350 351 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { 352 if (gfs2_consist_inode(ip)) 353 gfs2_dinode_print(ip); 354 return -EIO; 355 } 356 357 al = gfs2_alloc_get(ip); 358 if (!al) 359 return -ENOMEM; 360 361 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 362 if (error) 363 goto out; 364 365 error = gfs2_rindex_hold(sdp, &al->al_ri_gh); 366 if (error) 367 goto out_qs; 368 369 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 370 if (!rgd) { 371 gfs2_consist_inode(ip); 372 error = -EIO; 373 goto out_rindex_relse; 374 } 375 376 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, 377 &al->al_rgd_gh); 378 if (error) 379 goto out_rindex_relse; 380 381 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1); 382 if (error) 383 goto out_rg_gunlock; 384 385 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); 386 set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags); 387 388 gfs2_free_di(rgd, ip); 389 390 gfs2_trans_end(sdp); 391 clear_bit(GLF_STICKY, &ip->i_gl->gl_flags); 392 393 out_rg_gunlock: 394 gfs2_glock_dq_uninit(&al->al_rgd_gh); 395 out_rindex_relse: 396 gfs2_glock_dq_uninit(&al->al_ri_gh); 397 out_qs: 398 gfs2_quota_unhold(ip); 399 out: 400 gfs2_alloc_put(ip); 401 return error; 402 } 403 404 /** 405 * gfs2_change_nlink - Change nlink count on inode 406 * @ip: The GFS2 inode 407 * @diff: The change in the nlink count required 408 * 409 * Returns: errno 410 */ 411 int gfs2_change_nlink(struct gfs2_inode *ip, int diff) 412 { 413 struct buffer_head *dibh; 414 u32 nlink; 415 int error; 416 417 BUG_ON(diff != 1 && diff != -1); 418 nlink = ip->i_inode.i_nlink + diff; 419 420 /* If we are reducing the nlink count, but the new value ends up being 421 bigger than the old one, we must have underflowed. */ 422 if (diff < 0 && nlink > ip->i_inode.i_nlink) { 423 if (gfs2_consist_inode(ip)) 424 gfs2_dinode_print(ip); 425 return -EIO; 426 } 427 428 error = gfs2_meta_inode_buffer(ip, &dibh); 429 if (error) 430 return error; 431 432 if (diff > 0) 433 inc_nlink(&ip->i_inode); 434 else 435 drop_nlink(&ip->i_inode); 436 437 ip->i_inode.i_ctime = CURRENT_TIME; 438 439 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 440 gfs2_dinode_out(ip, dibh->b_data); 441 brelse(dibh); 442 mark_inode_dirty(&ip->i_inode); 443 444 if (ip->i_inode.i_nlink == 0) 445 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ 446 447 return error; 448 } 449 450 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) 451 { 452 struct qstr qstr; 453 struct inode *inode; 454 gfs2_str2qstr(&qstr, name); 455 inode = gfs2_lookupi(dip, &qstr, 1); 456 /* gfs2_lookupi has inconsistent callers: vfs 457 * related routines expect NULL for no entry found, 458 * gfs2_lookup_simple callers expect ENOENT 459 * and do not check for NULL. 460 */ 461 if (inode == NULL) 462 return ERR_PTR(-ENOENT); 463 else 464 return inode; 465 } 466 467 468 /** 469 * gfs2_lookupi - Look up a filename in a directory and return its inode 470 * @d_gh: An initialized holder for the directory glock 471 * @name: The name of the inode to look for 472 * @is_root: If 1, ignore the caller's permissions 473 * @i_gh: An uninitialized holder for the new inode glock 474 * 475 * This can be called via the VFS filldir function when NFS is doing 476 * a readdirplus and the inode which its intending to stat isn't 477 * already in cache. In this case we must not take the directory glock 478 * again, since the readdir call will have already taken that lock. 479 * 480 * Returns: errno 481 */ 482 483 struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 484 int is_root) 485 { 486 struct super_block *sb = dir->i_sb; 487 struct gfs2_inode *dip = GFS2_I(dir); 488 struct gfs2_holder d_gh; 489 int error = 0; 490 struct inode *inode = NULL; 491 int unlock = 0; 492 493 if (!name->len || name->len > GFS2_FNAMESIZE) 494 return ERR_PTR(-ENAMETOOLONG); 495 496 if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) || 497 (name->len == 2 && memcmp(name->name, "..", 2) == 0 && 498 dir == sb->s_root->d_inode)) { 499 igrab(dir); 500 return dir; 501 } 502 503 if (gfs2_glock_is_locked_by_me(dip->i_gl) == NULL) { 504 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); 505 if (error) 506 return ERR_PTR(error); 507 unlock = 1; 508 } 509 510 if (!is_root) { 511 error = gfs2_permission(dir, MAY_EXEC); 512 if (error) 513 goto out; 514 } 515 516 inode = gfs2_dir_search(dir, name); 517 if (IS_ERR(inode)) 518 error = PTR_ERR(inode); 519 out: 520 if (unlock) 521 gfs2_glock_dq_uninit(&d_gh); 522 if (error == -ENOENT) 523 return NULL; 524 return inode ? inode : ERR_PTR(error); 525 } 526 527 static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf) 528 { 529 const struct gfs2_inum_range *str = buf; 530 531 ir->ir_start = be64_to_cpu(str->ir_start); 532 ir->ir_length = be64_to_cpu(str->ir_length); 533 } 534 535 static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf) 536 { 537 struct gfs2_inum_range *str = buf; 538 539 str->ir_start = cpu_to_be64(ir->ir_start); 540 str->ir_length = cpu_to_be64(ir->ir_length); 541 } 542 543 static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) 544 { 545 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); 546 struct buffer_head *bh; 547 struct gfs2_inum_range_host ir; 548 int error; 549 550 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 551 if (error) 552 return error; 553 mutex_lock(&sdp->sd_inum_mutex); 554 555 error = gfs2_meta_inode_buffer(ip, &bh); 556 if (error) { 557 mutex_unlock(&sdp->sd_inum_mutex); 558 gfs2_trans_end(sdp); 559 return error; 560 } 561 562 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 563 564 if (ir.ir_length) { 565 *formal_ino = ir.ir_start++; 566 ir.ir_length--; 567 gfs2_trans_add_bh(ip->i_gl, bh, 1); 568 gfs2_inum_range_out(&ir, 569 bh->b_data + sizeof(struct gfs2_dinode)); 570 brelse(bh); 571 mutex_unlock(&sdp->sd_inum_mutex); 572 gfs2_trans_end(sdp); 573 return 0; 574 } 575 576 brelse(bh); 577 578 mutex_unlock(&sdp->sd_inum_mutex); 579 gfs2_trans_end(sdp); 580 581 return 1; 582 } 583 584 static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino) 585 { 586 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); 587 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode); 588 struct gfs2_holder gh; 589 struct buffer_head *bh; 590 struct gfs2_inum_range_host ir; 591 int error; 592 593 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 594 if (error) 595 return error; 596 597 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0); 598 if (error) 599 goto out; 600 mutex_lock(&sdp->sd_inum_mutex); 601 602 error = gfs2_meta_inode_buffer(ip, &bh); 603 if (error) 604 goto out_end_trans; 605 606 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 607 608 if (!ir.ir_length) { 609 struct buffer_head *m_bh; 610 u64 x, y; 611 __be64 z; 612 613 error = gfs2_meta_inode_buffer(m_ip, &m_bh); 614 if (error) 615 goto out_brelse; 616 617 z = *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)); 618 x = y = be64_to_cpu(z); 619 ir.ir_start = x; 620 ir.ir_length = GFS2_INUM_QUANTUM; 621 x += GFS2_INUM_QUANTUM; 622 if (x < y) 623 gfs2_consist_inode(m_ip); 624 z = cpu_to_be64(x); 625 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); 626 *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = z; 627 628 brelse(m_bh); 629 } 630 631 *formal_ino = ir.ir_start++; 632 ir.ir_length--; 633 634 gfs2_trans_add_bh(ip->i_gl, bh, 1); 635 gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 636 637 out_brelse: 638 brelse(bh); 639 out_end_trans: 640 mutex_unlock(&sdp->sd_inum_mutex); 641 gfs2_trans_end(sdp); 642 out: 643 gfs2_glock_dq_uninit(&gh); 644 return error; 645 } 646 647 static int pick_formal_ino(struct gfs2_sbd *sdp, u64 *inum) 648 { 649 int error; 650 651 error = pick_formal_ino_1(sdp, inum); 652 if (error <= 0) 653 return error; 654 655 error = pick_formal_ino_2(sdp, inum); 656 657 return error; 658 } 659 660 /** 661 * create_ok - OK to create a new on-disk inode here? 662 * @dip: Directory in which dinode is to be created 663 * @name: Name of new dinode 664 * @mode: 665 * 666 * Returns: errno 667 */ 668 669 static int create_ok(struct gfs2_inode *dip, const struct qstr *name, 670 unsigned int mode) 671 { 672 int error; 673 674 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 675 if (error) 676 return error; 677 678 /* Don't create entries in an unlinked directory */ 679 if (!dip->i_inode.i_nlink) 680 return -EPERM; 681 682 error = gfs2_dir_check(&dip->i_inode, name, NULL); 683 switch (error) { 684 case -ENOENT: 685 error = 0; 686 break; 687 case 0: 688 return -EEXIST; 689 default: 690 return error; 691 } 692 693 if (dip->i_di.di_entries == (u32)-1) 694 return -EFBIG; 695 if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) 696 return -EMLINK; 697 698 return 0; 699 } 700 701 static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, 702 unsigned int *uid, unsigned int *gid) 703 { 704 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && 705 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { 706 if (S_ISDIR(*mode)) 707 *mode |= S_ISUID; 708 else if (dip->i_inode.i_uid != current->fsuid) 709 *mode &= ~07111; 710 *uid = dip->i_inode.i_uid; 711 } else 712 *uid = current->fsuid; 713 714 if (dip->i_inode.i_mode & S_ISGID) { 715 if (S_ISDIR(*mode)) 716 *mode |= S_ISGID; 717 *gid = dip->i_inode.i_gid; 718 } else 719 *gid = current->fsgid; 720 } 721 722 static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) 723 { 724 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 725 int error; 726 727 if (gfs2_alloc_get(dip) == NULL) 728 return -ENOMEM; 729 730 dip->i_alloc->al_requested = RES_DINODE; 731 error = gfs2_inplace_reserve(dip); 732 if (error) 733 goto out; 734 735 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS, 0); 736 if (error) 737 goto out_ipreserv; 738 739 *no_addr = gfs2_alloc_di(dip, generation); 740 741 gfs2_trans_end(sdp); 742 743 out_ipreserv: 744 gfs2_inplace_release(dip); 745 out: 746 gfs2_alloc_put(dip); 747 return error; 748 } 749 750 /** 751 * init_dinode - Fill in a new dinode structure 752 * @dip: the directory this inode is being created in 753 * @gl: The glock covering the new inode 754 * @inum: the inode number 755 * @mode: the file permissions 756 * @uid: 757 * @gid: 758 * 759 */ 760 761 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 762 const struct gfs2_inum_host *inum, unsigned int mode, 763 unsigned int uid, unsigned int gid, 764 const u64 *generation, dev_t dev, struct buffer_head **bhp) 765 { 766 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 767 struct gfs2_dinode *di; 768 struct buffer_head *dibh; 769 struct timespec tv = CURRENT_TIME; 770 771 dibh = gfs2_meta_new(gl, inum->no_addr); 772 gfs2_trans_add_bh(gl, dibh, 1); 773 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); 774 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 775 di = (struct gfs2_dinode *)dibh->b_data; 776 777 di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino); 778 di->di_num.no_addr = cpu_to_be64(inum->no_addr); 779 di->di_mode = cpu_to_be32(mode); 780 di->di_uid = cpu_to_be32(uid); 781 di->di_gid = cpu_to_be32(gid); 782 di->di_nlink = 0; 783 di->di_size = 0; 784 di->di_blocks = cpu_to_be64(1); 785 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); 786 di->di_major = cpu_to_be32(MAJOR(dev)); 787 di->di_minor = cpu_to_be32(MINOR(dev)); 788 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 789 di->di_generation = cpu_to_be64(*generation); 790 di->di_flags = 0; 791 792 if (S_ISREG(mode)) { 793 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) || 794 gfs2_tune_get(sdp, gt_new_files_jdata)) 795 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); 796 } else if (S_ISDIR(mode)) { 797 di->di_flags |= cpu_to_be32(dip->i_di.di_flags & 798 GFS2_DIF_INHERIT_JDATA); 799 } 800 801 di->__pad1 = 0; 802 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); 803 di->di_height = 0; 804 di->__pad2 = 0; 805 di->__pad3 = 0; 806 di->di_depth = 0; 807 di->di_entries = 0; 808 memset(&di->__pad4, 0, sizeof(di->__pad4)); 809 di->di_eattr = 0; 810 di->di_atime_nsec = cpu_to_be32(tv.tv_nsec); 811 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); 812 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 813 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 814 815 set_buffer_uptodate(dibh); 816 817 *bhp = dibh; 818 } 819 820 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 821 unsigned int mode, const struct gfs2_inum_host *inum, 822 const u64 *generation, dev_t dev, struct buffer_head **bhp) 823 { 824 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 825 unsigned int uid, gid; 826 int error; 827 828 munge_mode_uid_gid(dip, &mode, &uid, &gid); 829 if (!gfs2_alloc_get(dip)) 830 return -ENOMEM; 831 832 error = gfs2_quota_lock(dip, uid, gid); 833 if (error) 834 goto out; 835 836 error = gfs2_quota_check(dip, uid, gid); 837 if (error) 838 goto out_quota; 839 840 error = gfs2_trans_begin(sdp, RES_DINODE + RES_QUOTA, 0); 841 if (error) 842 goto out_quota; 843 844 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); 845 gfs2_quota_change(dip, +1, uid, gid); 846 gfs2_trans_end(sdp); 847 848 out_quota: 849 gfs2_quota_unlock(dip); 850 out: 851 gfs2_alloc_put(dip); 852 return error; 853 } 854 855 static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, 856 struct gfs2_inode *ip) 857 { 858 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 859 struct gfs2_alloc *al; 860 int alloc_required; 861 struct buffer_head *dibh; 862 int error; 863 864 al = gfs2_alloc_get(dip); 865 if (!al) 866 return -ENOMEM; 867 868 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 869 if (error) 870 goto fail; 871 872 error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); 873 if (alloc_required < 0) 874 goto fail_quota_locks; 875 if (alloc_required) { 876 error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); 877 if (error) 878 goto fail_quota_locks; 879 880 al->al_requested = sdp->sd_max_dirres; 881 882 error = gfs2_inplace_reserve(dip); 883 if (error) 884 goto fail_quota_locks; 885 886 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 887 al->al_rgd->rd_length + 888 2 * RES_DINODE + 889 RES_STATFS + RES_QUOTA, 0); 890 if (error) 891 goto fail_ipreserv; 892 } else { 893 error = gfs2_trans_begin(sdp, RES_LEAF + 2 * RES_DINODE, 0); 894 if (error) 895 goto fail_quota_locks; 896 } 897 898 error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode)); 899 if (error) 900 goto fail_end_trans; 901 902 error = gfs2_meta_inode_buffer(ip, &dibh); 903 if (error) 904 goto fail_end_trans; 905 ip->i_inode.i_nlink = 1; 906 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 907 gfs2_dinode_out(ip, dibh->b_data); 908 brelse(dibh); 909 return 0; 910 911 fail_end_trans: 912 gfs2_trans_end(sdp); 913 914 fail_ipreserv: 915 if (dip->i_alloc->al_rgd) 916 gfs2_inplace_release(dip); 917 918 fail_quota_locks: 919 gfs2_quota_unlock(dip); 920 921 fail: 922 gfs2_alloc_put(dip); 923 return error; 924 } 925 926 static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) 927 { 928 int err; 929 size_t len; 930 void *value; 931 char *name; 932 struct gfs2_ea_request er; 933 934 err = security_inode_init_security(&ip->i_inode, &dip->i_inode, 935 &name, &value, &len); 936 937 if (err) { 938 if (err == -EOPNOTSUPP) 939 return 0; 940 return err; 941 } 942 943 memset(&er, 0, sizeof(struct gfs2_ea_request)); 944 945 er.er_type = GFS2_EATYPE_SECURITY; 946 er.er_name = name; 947 er.er_data = value; 948 er.er_name_len = strlen(name); 949 er.er_data_len = len; 950 951 err = gfs2_ea_set_i(ip, &er); 952 953 kfree(value); 954 kfree(name); 955 956 return err; 957 } 958 959 /** 960 * gfs2_createi - Create a new inode 961 * @ghs: An array of two holders 962 * @name: The name of the new file 963 * @mode: the permissions on the new inode 964 * 965 * @ghs[0] is an initialized holder for the directory 966 * @ghs[1] is the holder for the inode lock 967 * 968 * If the return value is not NULL, the glocks on both the directory and the new 969 * file are held. A transaction has been started and an inplace reservation 970 * is held, as well. 971 * 972 * Returns: An inode 973 */ 974 975 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, 976 unsigned int mode, dev_t dev) 977 { 978 struct inode *inode = NULL; 979 struct gfs2_inode *dip = ghs->gh_gl->gl_object; 980 struct inode *dir = &dip->i_inode; 981 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 982 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 983 int error; 984 u64 generation; 985 struct buffer_head *bh = NULL; 986 987 if (!name->len || name->len > GFS2_FNAMESIZE) 988 return ERR_PTR(-ENAMETOOLONG); 989 990 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); 991 error = gfs2_glock_nq(ghs); 992 if (error) 993 goto fail; 994 995 error = create_ok(dip, name, mode); 996 if (error) 997 goto fail_gunlock; 998 999 error = pick_formal_ino(sdp, &inum.no_formal_ino); 1000 if (error) 1001 goto fail_gunlock; 1002 1003 error = alloc_dinode(dip, &inum.no_addr, &generation); 1004 if (error) 1005 goto fail_gunlock; 1006 1007 error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, 1008 LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 1009 if (error) 1010 goto fail_gunlock; 1011 1012 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); 1013 if (error) 1014 goto fail_gunlock2; 1015 1016 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), 1017 inum.no_addr, 1018 inum.no_formal_ino, 0); 1019 if (IS_ERR(inode)) 1020 goto fail_gunlock2; 1021 1022 error = gfs2_inode_refresh(GFS2_I(inode)); 1023 if (error) 1024 goto fail_gunlock2; 1025 1026 error = gfs2_acl_create(dip, GFS2_I(inode)); 1027 if (error) 1028 goto fail_gunlock2; 1029 1030 error = gfs2_security_init(dip, GFS2_I(inode)); 1031 if (error) 1032 goto fail_gunlock2; 1033 1034 error = link_dinode(dip, name, GFS2_I(inode)); 1035 if (error) 1036 goto fail_gunlock2; 1037 1038 if (bh) 1039 brelse(bh); 1040 return inode; 1041 1042 fail_gunlock2: 1043 gfs2_glock_dq_uninit(ghs + 1); 1044 if (inode && !IS_ERR(inode)) 1045 iput(inode); 1046 fail_gunlock: 1047 gfs2_glock_dq(ghs); 1048 fail: 1049 if (bh) 1050 brelse(bh); 1051 return ERR_PTR(error); 1052 } 1053 1054 /** 1055 * gfs2_rmdiri - Remove a directory 1056 * @dip: The parent directory of the directory to be removed 1057 * @name: The name of the directory to be removed 1058 * @ip: The GFS2 inode of the directory to be removed 1059 * 1060 * Assumes Glocks on dip and ip are held 1061 * 1062 * Returns: errno 1063 */ 1064 1065 int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, 1066 struct gfs2_inode *ip) 1067 { 1068 struct qstr dotname; 1069 int error; 1070 1071 if (ip->i_di.di_entries != 2) { 1072 if (gfs2_consist_inode(ip)) 1073 gfs2_dinode_print(ip); 1074 return -EIO; 1075 } 1076 1077 error = gfs2_dir_del(dip, name); 1078 if (error) 1079 return error; 1080 1081 error = gfs2_change_nlink(dip, -1); 1082 if (error) 1083 return error; 1084 1085 gfs2_str2qstr(&dotname, "."); 1086 error = gfs2_dir_del(ip, &dotname); 1087 if (error) 1088 return error; 1089 1090 gfs2_str2qstr(&dotname, ".."); 1091 error = gfs2_dir_del(ip, &dotname); 1092 if (error) 1093 return error; 1094 1095 /* It looks odd, but it really should be done twice */ 1096 error = gfs2_change_nlink(ip, -1); 1097 if (error) 1098 return error; 1099 1100 error = gfs2_change_nlink(ip, -1); 1101 if (error) 1102 return error; 1103 1104 return error; 1105 } 1106 1107 /* 1108 * gfs2_unlink_ok - check to see that a inode is still in a directory 1109 * @dip: the directory 1110 * @name: the name of the file 1111 * @ip: the inode 1112 * 1113 * Assumes that the lock on (at least) @dip is held. 1114 * 1115 * Returns: 0 if the parent/child relationship is correct, errno if it isn't 1116 */ 1117 1118 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, 1119 const struct gfs2_inode *ip) 1120 { 1121 int error; 1122 1123 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) 1124 return -EPERM; 1125 1126 if ((dip->i_inode.i_mode & S_ISVTX) && 1127 dip->i_inode.i_uid != current->fsuid && 1128 ip->i_inode.i_uid != current->fsuid && !capable(CAP_FOWNER)) 1129 return -EPERM; 1130 1131 if (IS_APPEND(&dip->i_inode)) 1132 return -EPERM; 1133 1134 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 1135 if (error) 1136 return error; 1137 1138 error = gfs2_dir_check(&dip->i_inode, name, ip); 1139 if (error) 1140 return error; 1141 1142 return 0; 1143 } 1144 1145 /** 1146 * gfs2_readlinki - return the contents of a symlink 1147 * @ip: the symlink's inode 1148 * @buf: a pointer to the buffer to be filled 1149 * @len: a pointer to the length of @buf 1150 * 1151 * If @buf is too small, a piece of memory is kmalloc()ed and needs 1152 * to be freed by the caller. 1153 * 1154 * Returns: errno 1155 */ 1156 1157 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) 1158 { 1159 struct gfs2_holder i_gh; 1160 struct buffer_head *dibh; 1161 unsigned int x; 1162 int error; 1163 1164 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); 1165 error = gfs2_glock_nq(&i_gh); 1166 if (error) { 1167 gfs2_holder_uninit(&i_gh); 1168 return error; 1169 } 1170 1171 if (!ip->i_di.di_size) { 1172 gfs2_consist_inode(ip); 1173 error = -EIO; 1174 goto out; 1175 } 1176 1177 error = gfs2_meta_inode_buffer(ip, &dibh); 1178 if (error) 1179 goto out; 1180 1181 x = ip->i_di.di_size + 1; 1182 if (x > *len) { 1183 *buf = kmalloc(x, GFP_NOFS); 1184 if (!*buf) { 1185 error = -ENOMEM; 1186 goto out_brelse; 1187 } 1188 } 1189 1190 memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); 1191 *len = x; 1192 1193 out_brelse: 1194 brelse(dibh); 1195 out: 1196 gfs2_glock_dq_uninit(&i_gh); 1197 return error; 1198 } 1199 1200 static int 1201 __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1202 { 1203 struct buffer_head *dibh; 1204 int error; 1205 1206 error = gfs2_meta_inode_buffer(ip, &dibh); 1207 if (!error) { 1208 error = inode_setattr(&ip->i_inode, attr); 1209 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); 1210 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1211 gfs2_dinode_out(ip, dibh->b_data); 1212 brelse(dibh); 1213 } 1214 return error; 1215 } 1216 1217 /** 1218 * gfs2_setattr_simple - 1219 * @ip: 1220 * @attr: 1221 * 1222 * Called with a reference on the vnode. 1223 * 1224 * Returns: errno 1225 */ 1226 1227 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1228 { 1229 int error; 1230 1231 if (current->journal_info) 1232 return __gfs2_setattr_simple(ip, attr); 1233 1234 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE, 0); 1235 if (error) 1236 return error; 1237 1238 error = __gfs2_setattr_simple(ip, attr); 1239 gfs2_trans_end(GFS2_SB(&ip->i_inode)); 1240 return error; 1241 } 1242 1243 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) 1244 { 1245 const struct gfs2_dinode_host *di = &ip->i_di; 1246 struct gfs2_dinode *str = buf; 1247 1248 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 1249 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); 1250 str->di_header.__pad0 = 0; 1251 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); 1252 str->di_header.__pad1 = 0; 1253 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 1254 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 1255 str->di_mode = cpu_to_be32(ip->i_inode.i_mode); 1256 str->di_uid = cpu_to_be32(ip->i_inode.i_uid); 1257 str->di_gid = cpu_to_be32(ip->i_inode.i_gid); 1258 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); 1259 str->di_size = cpu_to_be64(di->di_size); 1260 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 1261 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1262 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); 1263 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); 1264 1265 str->di_goal_meta = cpu_to_be64(ip->i_goal); 1266 str->di_goal_data = cpu_to_be64(ip->i_goal); 1267 str->di_generation = cpu_to_be64(di->di_generation); 1268 1269 str->di_flags = cpu_to_be32(di->di_flags); 1270 str->di_height = cpu_to_be16(ip->i_height); 1271 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && 1272 !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ? 1273 GFS2_FORMAT_DE : 0); 1274 str->di_depth = cpu_to_be16(ip->i_depth); 1275 str->di_entries = cpu_to_be32(di->di_entries); 1276 1277 str->di_eattr = cpu_to_be64(di->di_eattr); 1278 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); 1279 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); 1280 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); 1281 } 1282 1283 void gfs2_dinode_print(const struct gfs2_inode *ip) 1284 { 1285 const struct gfs2_dinode_host *di = &ip->i_di; 1286 1287 printk(KERN_INFO " no_formal_ino = %llu\n", 1288 (unsigned long long)ip->i_no_formal_ino); 1289 printk(KERN_INFO " no_addr = %llu\n", 1290 (unsigned long long)ip->i_no_addr); 1291 printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); 1292 printk(KERN_INFO " blocks = %llu\n", 1293 (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); 1294 printk(KERN_INFO " i_goal = %llu\n", 1295 (unsigned long long)ip->i_goal); 1296 printk(KERN_INFO " di_flags = 0x%.8X\n", di->di_flags); 1297 printk(KERN_INFO " i_height = %u\n", ip->i_height); 1298 printk(KERN_INFO " i_depth = %u\n", ip->i_depth); 1299 printk(KERN_INFO " di_entries = %u\n", di->di_entries); 1300 printk(KERN_INFO " di_eattr = %llu\n", 1301 (unsigned long long)di->di_eattr); 1302 } 1303 1304