1 /* 2 * hugetlbpage-backed filesystem. Based on ramfs. 3 * 4 * William Irwin, 2002 5 * 6 * Copyright (C) 2002 Linus Torvalds. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/thread_info.h> 11 #include <asm/current.h> 12 #include <linux/sched.h> /* remove ASAP */ 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/file.h> 16 #include <linux/kernel.h> 17 #include <linux/writeback.h> 18 #include <linux/pagemap.h> 19 #include <linux/highmem.h> 20 #include <linux/init.h> 21 #include <linux/string.h> 22 #include <linux/capability.h> 23 #include <linux/ctype.h> 24 #include <linux/backing-dev.h> 25 #include <linux/hugetlb.h> 26 #include <linux/pagevec.h> 27 #include <linux/parser.h> 28 #include <linux/mman.h> 29 #include <linux/quotaops.h> 30 #include <linux/slab.h> 31 #include <linux/dnotify.h> 32 #include <linux/statfs.h> 33 #include <linux/security.h> 34 35 #include <asm/uaccess.h> 36 37 /* some random number */ 38 #define HUGETLBFS_MAGIC 0x958458f6 39 40 static const struct super_operations hugetlbfs_ops; 41 static const struct address_space_operations hugetlbfs_aops; 42 const struct file_operations hugetlbfs_file_operations; 43 static const struct inode_operations hugetlbfs_dir_inode_operations; 44 static const struct inode_operations hugetlbfs_inode_operations; 45 46 static struct backing_dev_info hugetlbfs_backing_dev_info = { 47 .ra_pages = 0, /* No readahead */ 48 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 49 }; 50 51 int sysctl_hugetlb_shm_group; 52 53 enum { 54 Opt_size, Opt_nr_inodes, 55 Opt_mode, Opt_uid, Opt_gid, 56 Opt_err, 57 }; 58 59 static match_table_t tokens = { 60 {Opt_size, "size=%s"}, 61 {Opt_nr_inodes, "nr_inodes=%s"}, 62 {Opt_mode, "mode=%o"}, 63 {Opt_uid, "uid=%u"}, 64 {Opt_gid, "gid=%u"}, 65 {Opt_err, NULL}, 66 }; 67 68 static void huge_pagevec_release(struct pagevec *pvec) 69 { 70 int i; 71 72 for (i = 0; i < pagevec_count(pvec); ++i) 73 put_page(pvec->pages[i]); 74 75 pagevec_reinit(pvec); 76 } 77 78 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) 79 { 80 struct inode *inode = file->f_path.dentry->d_inode; 81 loff_t len, vma_len; 82 int ret; 83 84 /* 85 * vma alignment has already been checked by prepare_hugepage_range. 86 * If you add any error returns here, do so after setting VM_HUGETLB, 87 * so is_vm_hugetlb_page tests below unmap_region go the right way 88 * when do_mmap_pgoff unwinds (may be important on powerpc and ia64). 89 */ 90 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 91 vma->vm_ops = &hugetlb_vm_ops; 92 93 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 94 95 mutex_lock(&inode->i_mutex); 96 file_accessed(file); 97 98 ret = -ENOMEM; 99 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 100 101 if (vma->vm_flags & VM_MAYSHARE && 102 hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), 103 len >> HPAGE_SHIFT)) 104 goto out; 105 106 ret = 0; 107 hugetlb_prefault_arch_hook(vma->vm_mm); 108 if (vma->vm_flags & VM_WRITE && inode->i_size < len) 109 inode->i_size = len; 110 out: 111 mutex_unlock(&inode->i_mutex); 112 113 return ret; 114 } 115 116 /* 117 * Called under down_write(mmap_sem). 118 */ 119 120 #ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 121 static unsigned long 122 hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 123 unsigned long len, unsigned long pgoff, unsigned long flags) 124 { 125 struct mm_struct *mm = current->mm; 126 struct vm_area_struct *vma; 127 unsigned long start_addr; 128 129 if (len & ~HPAGE_MASK) 130 return -EINVAL; 131 if (len > TASK_SIZE) 132 return -ENOMEM; 133 134 if (flags & MAP_FIXED) { 135 if (prepare_hugepage_range(addr, len, pgoff)) 136 return -EINVAL; 137 return addr; 138 } 139 140 if (addr) { 141 addr = ALIGN(addr, HPAGE_SIZE); 142 vma = find_vma(mm, addr); 143 if (TASK_SIZE - len >= addr && 144 (!vma || addr + len <= vma->vm_start)) 145 return addr; 146 } 147 148 start_addr = mm->free_area_cache; 149 150 if (len <= mm->cached_hole_size) 151 start_addr = TASK_UNMAPPED_BASE; 152 153 full_search: 154 addr = ALIGN(start_addr, HPAGE_SIZE); 155 156 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 157 /* At this point: (!vma || addr < vma->vm_end). */ 158 if (TASK_SIZE - len < addr) { 159 /* 160 * Start a new search - just in case we missed 161 * some holes. 162 */ 163 if (start_addr != TASK_UNMAPPED_BASE) { 164 start_addr = TASK_UNMAPPED_BASE; 165 goto full_search; 166 } 167 return -ENOMEM; 168 } 169 170 if (!vma || addr + len <= vma->vm_start) 171 return addr; 172 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 173 } 174 } 175 #endif 176 177 /* 178 * Read a page. Again trivial. If it didn't already exist 179 * in the page cache, it is zero-filled. 180 */ 181 static int hugetlbfs_readpage(struct file *file, struct page * page) 182 { 183 unlock_page(page); 184 return -EINVAL; 185 } 186 187 static int hugetlbfs_prepare_write(struct file *file, 188 struct page *page, unsigned offset, unsigned to) 189 { 190 return -EINVAL; 191 } 192 193 static int hugetlbfs_commit_write(struct file *file, 194 struct page *page, unsigned offset, unsigned to) 195 { 196 return -EINVAL; 197 } 198 199 static void truncate_huge_page(struct page *page) 200 { 201 cancel_dirty_page(page, /* No IO accounting for huge pages? */0); 202 ClearPageUptodate(page); 203 remove_from_page_cache(page); 204 put_page(page); 205 } 206 207 static void truncate_hugepages(struct inode *inode, loff_t lstart) 208 { 209 struct address_space *mapping = &inode->i_data; 210 const pgoff_t start = lstart >> HPAGE_SHIFT; 211 struct pagevec pvec; 212 pgoff_t next; 213 int i, freed = 0; 214 215 pagevec_init(&pvec, 0); 216 next = start; 217 while (1) { 218 if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 219 if (next == start) 220 break; 221 next = start; 222 continue; 223 } 224 225 for (i = 0; i < pagevec_count(&pvec); ++i) { 226 struct page *page = pvec.pages[i]; 227 228 lock_page(page); 229 if (page->index > next) 230 next = page->index; 231 ++next; 232 truncate_huge_page(page); 233 unlock_page(page); 234 hugetlb_put_quota(mapping); 235 freed++; 236 } 237 huge_pagevec_release(&pvec); 238 } 239 BUG_ON(!lstart && mapping->nrpages); 240 hugetlb_unreserve_pages(inode, start, freed); 241 } 242 243 static void hugetlbfs_delete_inode(struct inode *inode) 244 { 245 truncate_hugepages(inode, 0); 246 clear_inode(inode); 247 } 248 249 static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) 250 { 251 struct super_block *sb = inode->i_sb; 252 253 if (!hlist_unhashed(&inode->i_hash)) { 254 if (!(inode->i_state & (I_DIRTY|I_LOCK))) 255 list_move(&inode->i_list, &inode_unused); 256 inodes_stat.nr_unused++; 257 if (!sb || (sb->s_flags & MS_ACTIVE)) { 258 spin_unlock(&inode_lock); 259 return; 260 } 261 inode->i_state |= I_WILL_FREE; 262 spin_unlock(&inode_lock); 263 /* 264 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK 265 * in our backing_dev_info. 266 */ 267 write_inode_now(inode, 1); 268 spin_lock(&inode_lock); 269 inode->i_state &= ~I_WILL_FREE; 270 inodes_stat.nr_unused--; 271 hlist_del_init(&inode->i_hash); 272 } 273 list_del_init(&inode->i_list); 274 list_del_init(&inode->i_sb_list); 275 inode->i_state |= I_FREEING; 276 inodes_stat.nr_inodes--; 277 spin_unlock(&inode_lock); 278 truncate_hugepages(inode, 0); 279 clear_inode(inode); 280 destroy_inode(inode); 281 } 282 283 static void hugetlbfs_drop_inode(struct inode *inode) 284 { 285 if (!inode->i_nlink) 286 generic_delete_inode(inode); 287 else 288 hugetlbfs_forget_inode(inode); 289 } 290 291 static inline void 292 hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) 293 { 294 struct vm_area_struct *vma; 295 struct prio_tree_iter iter; 296 297 vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) { 298 unsigned long v_offset; 299 300 /* 301 * Can the expression below overflow on 32-bit arches? 302 * No, because the prio_tree returns us only those vmas 303 * which overlap the truncated area starting at pgoff, 304 * and no vma on a 32-bit arch can span beyond the 4GB. 305 */ 306 if (vma->vm_pgoff < pgoff) 307 v_offset = (pgoff - vma->vm_pgoff) << PAGE_SHIFT; 308 else 309 v_offset = 0; 310 311 __unmap_hugepage_range(vma, 312 vma->vm_start + v_offset, vma->vm_end); 313 } 314 } 315 316 /* 317 * Expanding truncates are not allowed. 318 */ 319 static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) 320 { 321 pgoff_t pgoff; 322 struct address_space *mapping = inode->i_mapping; 323 324 if (offset > inode->i_size) 325 return -EINVAL; 326 327 BUG_ON(offset & ~HPAGE_MASK); 328 pgoff = offset >> PAGE_SHIFT; 329 330 inode->i_size = offset; 331 spin_lock(&mapping->i_mmap_lock); 332 if (!prio_tree_empty(&mapping->i_mmap)) 333 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 334 spin_unlock(&mapping->i_mmap_lock); 335 truncate_hugepages(inode, offset); 336 return 0; 337 } 338 339 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) 340 { 341 struct inode *inode = dentry->d_inode; 342 int error; 343 unsigned int ia_valid = attr->ia_valid; 344 345 BUG_ON(!inode); 346 347 error = inode_change_ok(inode, attr); 348 if (error) 349 goto out; 350 351 if (ia_valid & ATTR_SIZE) { 352 error = -EINVAL; 353 if (!(attr->ia_size & ~HPAGE_MASK)) 354 error = hugetlb_vmtruncate(inode, attr->ia_size); 355 if (error) 356 goto out; 357 attr->ia_valid &= ~ATTR_SIZE; 358 } 359 error = inode_setattr(inode, attr); 360 out: 361 return error; 362 } 363 364 static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 365 gid_t gid, int mode, dev_t dev) 366 { 367 struct inode *inode; 368 369 inode = new_inode(sb); 370 if (inode) { 371 struct hugetlbfs_inode_info *info; 372 inode->i_mode = mode; 373 inode->i_uid = uid; 374 inode->i_gid = gid; 375 inode->i_blocks = 0; 376 inode->i_mapping->a_ops = &hugetlbfs_aops; 377 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 378 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 379 INIT_LIST_HEAD(&inode->i_mapping->private_list); 380 info = HUGETLBFS_I(inode); 381 mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL); 382 switch (mode & S_IFMT) { 383 default: 384 init_special_inode(inode, mode, dev); 385 break; 386 case S_IFREG: 387 inode->i_op = &hugetlbfs_inode_operations; 388 inode->i_fop = &hugetlbfs_file_operations; 389 break; 390 case S_IFDIR: 391 inode->i_op = &hugetlbfs_dir_inode_operations; 392 inode->i_fop = &simple_dir_operations; 393 394 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 395 inc_nlink(inode); 396 break; 397 case S_IFLNK: 398 inode->i_op = &page_symlink_inode_operations; 399 break; 400 } 401 } 402 return inode; 403 } 404 405 /* 406 * File creation. Allocate an inode, and we're done.. 407 */ 408 static int hugetlbfs_mknod(struct inode *dir, 409 struct dentry *dentry, int mode, dev_t dev) 410 { 411 struct inode *inode; 412 int error = -ENOSPC; 413 gid_t gid; 414 415 if (dir->i_mode & S_ISGID) { 416 gid = dir->i_gid; 417 if (S_ISDIR(mode)) 418 mode |= S_ISGID; 419 } else { 420 gid = current->fsgid; 421 } 422 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, gid, mode, dev); 423 if (inode) { 424 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 425 d_instantiate(dentry, inode); 426 dget(dentry); /* Extra count - pin the dentry in core */ 427 error = 0; 428 } 429 return error; 430 } 431 432 static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 433 { 434 int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0); 435 if (!retval) 436 inc_nlink(dir); 437 return retval; 438 } 439 440 static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 441 { 442 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); 443 } 444 445 static int hugetlbfs_symlink(struct inode *dir, 446 struct dentry *dentry, const char *symname) 447 { 448 struct inode *inode; 449 int error = -ENOSPC; 450 gid_t gid; 451 452 if (dir->i_mode & S_ISGID) 453 gid = dir->i_gid; 454 else 455 gid = current->fsgid; 456 457 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, 458 gid, S_IFLNK|S_IRWXUGO, 0); 459 if (inode) { 460 int l = strlen(symname)+1; 461 error = page_symlink(inode, symname, l); 462 if (!error) { 463 d_instantiate(dentry, inode); 464 dget(dentry); 465 } else 466 iput(inode); 467 } 468 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 469 470 return error; 471 } 472 473 /* 474 * mark the head page dirty 475 */ 476 static int hugetlbfs_set_page_dirty(struct page *page) 477 { 478 struct page *head = compound_head(page); 479 480 SetPageDirty(head); 481 return 0; 482 } 483 484 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) 485 { 486 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); 487 488 buf->f_type = HUGETLBFS_MAGIC; 489 buf->f_bsize = HPAGE_SIZE; 490 if (sbinfo) { 491 spin_lock(&sbinfo->stat_lock); 492 /* If no limits set, just report 0 for max/free/used 493 * blocks, like simple_statfs() */ 494 if (sbinfo->max_blocks >= 0) { 495 buf->f_blocks = sbinfo->max_blocks; 496 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 497 buf->f_files = sbinfo->max_inodes; 498 buf->f_ffree = sbinfo->free_inodes; 499 } 500 spin_unlock(&sbinfo->stat_lock); 501 } 502 buf->f_namelen = NAME_MAX; 503 return 0; 504 } 505 506 static void hugetlbfs_put_super(struct super_block *sb) 507 { 508 struct hugetlbfs_sb_info *sbi = HUGETLBFS_SB(sb); 509 510 if (sbi) { 511 sb->s_fs_info = NULL; 512 kfree(sbi); 513 } 514 } 515 516 static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo) 517 { 518 if (sbinfo->free_inodes >= 0) { 519 spin_lock(&sbinfo->stat_lock); 520 if (unlikely(!sbinfo->free_inodes)) { 521 spin_unlock(&sbinfo->stat_lock); 522 return 0; 523 } 524 sbinfo->free_inodes--; 525 spin_unlock(&sbinfo->stat_lock); 526 } 527 528 return 1; 529 } 530 531 static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo) 532 { 533 if (sbinfo->free_inodes >= 0) { 534 spin_lock(&sbinfo->stat_lock); 535 sbinfo->free_inodes++; 536 spin_unlock(&sbinfo->stat_lock); 537 } 538 } 539 540 541 static struct kmem_cache *hugetlbfs_inode_cachep; 542 543 static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) 544 { 545 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); 546 struct hugetlbfs_inode_info *p; 547 548 if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo))) 549 return NULL; 550 p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL); 551 if (unlikely(!p)) { 552 hugetlbfs_inc_free_inodes(sbinfo); 553 return NULL; 554 } 555 return &p->vfs_inode; 556 } 557 558 static void hugetlbfs_destroy_inode(struct inode *inode) 559 { 560 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); 561 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); 562 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 563 } 564 565 static const struct address_space_operations hugetlbfs_aops = { 566 .readpage = hugetlbfs_readpage, 567 .prepare_write = hugetlbfs_prepare_write, 568 .commit_write = hugetlbfs_commit_write, 569 .set_page_dirty = hugetlbfs_set_page_dirty, 570 }; 571 572 573 static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) 574 { 575 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; 576 577 inode_init_once(&ei->vfs_inode); 578 } 579 580 const struct file_operations hugetlbfs_file_operations = { 581 .mmap = hugetlbfs_file_mmap, 582 .fsync = simple_sync_file, 583 .get_unmapped_area = hugetlb_get_unmapped_area, 584 }; 585 586 static const struct inode_operations hugetlbfs_dir_inode_operations = { 587 .create = hugetlbfs_create, 588 .lookup = simple_lookup, 589 .link = simple_link, 590 .unlink = simple_unlink, 591 .symlink = hugetlbfs_symlink, 592 .mkdir = hugetlbfs_mkdir, 593 .rmdir = simple_rmdir, 594 .mknod = hugetlbfs_mknod, 595 .rename = simple_rename, 596 .setattr = hugetlbfs_setattr, 597 }; 598 599 static const struct inode_operations hugetlbfs_inode_operations = { 600 .setattr = hugetlbfs_setattr, 601 }; 602 603 static const struct super_operations hugetlbfs_ops = { 604 .alloc_inode = hugetlbfs_alloc_inode, 605 .destroy_inode = hugetlbfs_destroy_inode, 606 .statfs = hugetlbfs_statfs, 607 .delete_inode = hugetlbfs_delete_inode, 608 .drop_inode = hugetlbfs_drop_inode, 609 .put_super = hugetlbfs_put_super, 610 }; 611 612 static int 613 hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) 614 { 615 char *p, *rest; 616 substring_t args[MAX_OPT_ARGS]; 617 int option; 618 619 if (!options) 620 return 0; 621 622 while ((p = strsep(&options, ",")) != NULL) { 623 int token; 624 if (!*p) 625 continue; 626 627 token = match_token(p, tokens, args); 628 switch (token) { 629 case Opt_uid: 630 if (match_int(&args[0], &option)) 631 goto bad_val; 632 pconfig->uid = option; 633 break; 634 635 case Opt_gid: 636 if (match_int(&args[0], &option)) 637 goto bad_val; 638 pconfig->gid = option; 639 break; 640 641 case Opt_mode: 642 if (match_octal(&args[0], &option)) 643 goto bad_val; 644 pconfig->mode = option & 0777U; 645 break; 646 647 case Opt_size: { 648 unsigned long long size; 649 /* memparse() will accept a K/M/G without a digit */ 650 if (!isdigit(*args[0].from)) 651 goto bad_val; 652 size = memparse(args[0].from, &rest); 653 if (*rest == '%') { 654 size <<= HPAGE_SHIFT; 655 size *= max_huge_pages; 656 do_div(size, 100); 657 } 658 pconfig->nr_blocks = (size >> HPAGE_SHIFT); 659 break; 660 } 661 662 case Opt_nr_inodes: 663 /* memparse() will accept a K/M/G without a digit */ 664 if (!isdigit(*args[0].from)) 665 goto bad_val; 666 pconfig->nr_inodes = memparse(args[0].from, &rest); 667 break; 668 669 default: 670 printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", 671 p); 672 return -EINVAL; 673 break; 674 } 675 } 676 return 0; 677 678 bad_val: 679 printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", 680 args[0].from, p); 681 return 1; 682 } 683 684 static int 685 hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) 686 { 687 struct inode * inode; 688 struct dentry * root; 689 int ret; 690 struct hugetlbfs_config config; 691 struct hugetlbfs_sb_info *sbinfo; 692 693 config.nr_blocks = -1; /* No limit on size by default */ 694 config.nr_inodes = -1; /* No limit on number of inodes by default */ 695 config.uid = current->fsuid; 696 config.gid = current->fsgid; 697 config.mode = 0755; 698 ret = hugetlbfs_parse_options(data, &config); 699 if (ret) 700 return ret; 701 702 sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL); 703 if (!sbinfo) 704 return -ENOMEM; 705 sb->s_fs_info = sbinfo; 706 spin_lock_init(&sbinfo->stat_lock); 707 sbinfo->max_blocks = config.nr_blocks; 708 sbinfo->free_blocks = config.nr_blocks; 709 sbinfo->max_inodes = config.nr_inodes; 710 sbinfo->free_inodes = config.nr_inodes; 711 sb->s_maxbytes = MAX_LFS_FILESIZE; 712 sb->s_blocksize = HPAGE_SIZE; 713 sb->s_blocksize_bits = HPAGE_SHIFT; 714 sb->s_magic = HUGETLBFS_MAGIC; 715 sb->s_op = &hugetlbfs_ops; 716 sb->s_time_gran = 1; 717 inode = hugetlbfs_get_inode(sb, config.uid, config.gid, 718 S_IFDIR | config.mode, 0); 719 if (!inode) 720 goto out_free; 721 722 root = d_alloc_root(inode); 723 if (!root) { 724 iput(inode); 725 goto out_free; 726 } 727 sb->s_root = root; 728 return 0; 729 out_free: 730 kfree(sbinfo); 731 return -ENOMEM; 732 } 733 734 int hugetlb_get_quota(struct address_space *mapping) 735 { 736 int ret = 0; 737 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 738 739 if (sbinfo->free_blocks > -1) { 740 spin_lock(&sbinfo->stat_lock); 741 if (sbinfo->free_blocks > 0) 742 sbinfo->free_blocks--; 743 else 744 ret = -ENOMEM; 745 spin_unlock(&sbinfo->stat_lock); 746 } 747 748 return ret; 749 } 750 751 void hugetlb_put_quota(struct address_space *mapping) 752 { 753 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 754 755 if (sbinfo->free_blocks > -1) { 756 spin_lock(&sbinfo->stat_lock); 757 sbinfo->free_blocks++; 758 spin_unlock(&sbinfo->stat_lock); 759 } 760 } 761 762 static int hugetlbfs_get_sb(struct file_system_type *fs_type, 763 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 764 { 765 return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt); 766 } 767 768 static struct file_system_type hugetlbfs_fs_type = { 769 .name = "hugetlbfs", 770 .get_sb = hugetlbfs_get_sb, 771 .kill_sb = kill_litter_super, 772 }; 773 774 static struct vfsmount *hugetlbfs_vfsmount; 775 776 static int can_do_hugetlb_shm(void) 777 { 778 return likely(capable(CAP_IPC_LOCK) || 779 in_group_p(sysctl_hugetlb_shm_group) || 780 can_do_mlock()); 781 } 782 783 struct file *hugetlb_file_setup(const char *name, size_t size) 784 { 785 int error = -ENOMEM; 786 struct file *file; 787 struct inode *inode; 788 struct dentry *dentry, *root; 789 struct qstr quick_string; 790 791 if (!hugetlbfs_vfsmount) 792 return ERR_PTR(-ENOENT); 793 794 if (!can_do_hugetlb_shm()) 795 return ERR_PTR(-EPERM); 796 797 if (!user_shm_lock(size, current->user)) 798 return ERR_PTR(-ENOMEM); 799 800 root = hugetlbfs_vfsmount->mnt_root; 801 quick_string.name = name; 802 quick_string.len = strlen(quick_string.name); 803 quick_string.hash = 0; 804 dentry = d_alloc(root, &quick_string); 805 if (!dentry) 806 goto out_shm_unlock; 807 808 error = -ENFILE; 809 file = get_empty_filp(); 810 if (!file) 811 goto out_dentry; 812 813 error = -ENOSPC; 814 inode = hugetlbfs_get_inode(root->d_sb, current->fsuid, 815 current->fsgid, S_IFREG | S_IRWXUGO, 0); 816 if (!inode) 817 goto out_file; 818 819 error = -ENOMEM; 820 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT)) 821 goto out_inode; 822 823 d_instantiate(dentry, inode); 824 inode->i_size = size; 825 inode->i_nlink = 0; 826 file->f_path.mnt = mntget(hugetlbfs_vfsmount); 827 file->f_path.dentry = dentry; 828 file->f_mapping = inode->i_mapping; 829 file->f_op = &hugetlbfs_file_operations; 830 file->f_mode = FMODE_WRITE | FMODE_READ; 831 return file; 832 833 out_inode: 834 iput(inode); 835 out_file: 836 put_filp(file); 837 out_dentry: 838 dput(dentry); 839 out_shm_unlock: 840 user_shm_unlock(size, current->user); 841 return ERR_PTR(error); 842 } 843 844 static int __init init_hugetlbfs_fs(void) 845 { 846 int error; 847 struct vfsmount *vfsmount; 848 849 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", 850 sizeof(struct hugetlbfs_inode_info), 851 0, 0, init_once); 852 if (hugetlbfs_inode_cachep == NULL) 853 return -ENOMEM; 854 855 error = register_filesystem(&hugetlbfs_fs_type); 856 if (error) 857 goto out; 858 859 vfsmount = kern_mount(&hugetlbfs_fs_type); 860 861 if (!IS_ERR(vfsmount)) { 862 hugetlbfs_vfsmount = vfsmount; 863 return 0; 864 } 865 866 error = PTR_ERR(vfsmount); 867 868 out: 869 if (error) 870 kmem_cache_destroy(hugetlbfs_inode_cachep); 871 return error; 872 } 873 874 static void __exit exit_hugetlbfs_fs(void) 875 { 876 kmem_cache_destroy(hugetlbfs_inode_cachep); 877 unregister_filesystem(&hugetlbfs_fs_type); 878 } 879 880 module_init(init_hugetlbfs_fs) 881 module_exit(exit_hugetlbfs_fs) 882 883 MODULE_LICENSE("GPL"); 884