1 /* 2 * hugetlbpage-backed filesystem. Based on ramfs. 3 * 4 * William Irwin, 2002 5 * 6 * Copyright (C) 2002 Linus Torvalds. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/thread_info.h> 11 #include <asm/current.h> 12 #include <linux/sched.h> /* remove ASAP */ 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/file.h> 16 #include <linux/writeback.h> 17 #include <linux/pagemap.h> 18 #include <linux/highmem.h> 19 #include <linux/init.h> 20 #include <linux/string.h> 21 #include <linux/capability.h> 22 #include <linux/backing-dev.h> 23 #include <linux/hugetlb.h> 24 #include <linux/pagevec.h> 25 #include <linux/mman.h> 26 #include <linux/quotaops.h> 27 #include <linux/slab.h> 28 #include <linux/dnotify.h> 29 #include <linux/statfs.h> 30 #include <linux/security.h> 31 32 #include <asm/uaccess.h> 33 34 /* some random number */ 35 #define HUGETLBFS_MAGIC 0x958458f6 36 37 static const struct super_operations hugetlbfs_ops; 38 static const struct address_space_operations hugetlbfs_aops; 39 const struct file_operations hugetlbfs_file_operations; 40 static const struct inode_operations hugetlbfs_dir_inode_operations; 41 static const struct inode_operations hugetlbfs_inode_operations; 42 43 static struct backing_dev_info hugetlbfs_backing_dev_info = { 44 .ra_pages = 0, /* No readahead */ 45 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 46 }; 47 48 int sysctl_hugetlb_shm_group; 49 50 static void huge_pagevec_release(struct pagevec *pvec) 51 { 52 int i; 53 54 for (i = 0; i < pagevec_count(pvec); ++i) 55 put_page(pvec->pages[i]); 56 57 pagevec_reinit(pvec); 58 } 59 60 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) 61 { 62 struct inode *inode = file->f_path.dentry->d_inode; 63 loff_t len, vma_len; 64 int ret; 65 66 /* 67 * vma alignment has already been checked by prepare_hugepage_range. 68 * If you add any error returns here, do so after setting VM_HUGETLB, 69 * so is_vm_hugetlb_page tests below unmap_region go the right way 70 * when do_mmap_pgoff unwinds (may be important on powerpc and ia64). 71 */ 72 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 73 vma->vm_ops = &hugetlb_vm_ops; 74 75 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 76 77 mutex_lock(&inode->i_mutex); 78 file_accessed(file); 79 80 ret = -ENOMEM; 81 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 82 83 if (vma->vm_flags & VM_MAYSHARE && 84 hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), 85 len >> HPAGE_SHIFT)) 86 goto out; 87 88 ret = 0; 89 hugetlb_prefault_arch_hook(vma->vm_mm); 90 if (vma->vm_flags & VM_WRITE && inode->i_size < len) 91 inode->i_size = len; 92 out: 93 mutex_unlock(&inode->i_mutex); 94 95 return ret; 96 } 97 98 /* 99 * Called under down_write(mmap_sem). 100 */ 101 102 #ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 103 static unsigned long 104 hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 105 unsigned long len, unsigned long pgoff, unsigned long flags) 106 { 107 struct mm_struct *mm = current->mm; 108 struct vm_area_struct *vma; 109 unsigned long start_addr; 110 111 if (len & ~HPAGE_MASK) 112 return -EINVAL; 113 if (len > TASK_SIZE) 114 return -ENOMEM; 115 116 if (flags & MAP_FIXED) { 117 if (prepare_hugepage_range(addr, len, pgoff)) 118 return -EINVAL; 119 return addr; 120 } 121 122 if (addr) { 123 addr = ALIGN(addr, HPAGE_SIZE); 124 vma = find_vma(mm, addr); 125 if (TASK_SIZE - len >= addr && 126 (!vma || addr + len <= vma->vm_start)) 127 return addr; 128 } 129 130 start_addr = mm->free_area_cache; 131 132 if (len <= mm->cached_hole_size) 133 start_addr = TASK_UNMAPPED_BASE; 134 135 full_search: 136 addr = ALIGN(start_addr, HPAGE_SIZE); 137 138 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 139 /* At this point: (!vma || addr < vma->vm_end). */ 140 if (TASK_SIZE - len < addr) { 141 /* 142 * Start a new search - just in case we missed 143 * some holes. 144 */ 145 if (start_addr != TASK_UNMAPPED_BASE) { 146 start_addr = TASK_UNMAPPED_BASE; 147 goto full_search; 148 } 149 return -ENOMEM; 150 } 151 152 if (!vma || addr + len <= vma->vm_start) 153 return addr; 154 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 155 } 156 } 157 #endif 158 159 /* 160 * Read a page. Again trivial. If it didn't already exist 161 * in the page cache, it is zero-filled. 162 */ 163 static int hugetlbfs_readpage(struct file *file, struct page * page) 164 { 165 unlock_page(page); 166 return -EINVAL; 167 } 168 169 static int hugetlbfs_prepare_write(struct file *file, 170 struct page *page, unsigned offset, unsigned to) 171 { 172 return -EINVAL; 173 } 174 175 static int hugetlbfs_commit_write(struct file *file, 176 struct page *page, unsigned offset, unsigned to) 177 { 178 return -EINVAL; 179 } 180 181 static void truncate_huge_page(struct page *page) 182 { 183 cancel_dirty_page(page, /* No IO accounting for huge pages? */0); 184 ClearPageUptodate(page); 185 remove_from_page_cache(page); 186 put_page(page); 187 } 188 189 static void truncate_hugepages(struct inode *inode, loff_t lstart) 190 { 191 struct address_space *mapping = &inode->i_data; 192 const pgoff_t start = lstart >> HPAGE_SHIFT; 193 struct pagevec pvec; 194 pgoff_t next; 195 int i, freed = 0; 196 197 pagevec_init(&pvec, 0); 198 next = start; 199 while (1) { 200 if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 201 if (next == start) 202 break; 203 next = start; 204 continue; 205 } 206 207 for (i = 0; i < pagevec_count(&pvec); ++i) { 208 struct page *page = pvec.pages[i]; 209 210 lock_page(page); 211 if (page->index > next) 212 next = page->index; 213 ++next; 214 truncate_huge_page(page); 215 unlock_page(page); 216 hugetlb_put_quota(mapping); 217 freed++; 218 } 219 huge_pagevec_release(&pvec); 220 } 221 BUG_ON(!lstart && mapping->nrpages); 222 hugetlb_unreserve_pages(inode, start, freed); 223 } 224 225 static void hugetlbfs_delete_inode(struct inode *inode) 226 { 227 truncate_hugepages(inode, 0); 228 clear_inode(inode); 229 } 230 231 static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) 232 { 233 struct super_block *sb = inode->i_sb; 234 235 if (!hlist_unhashed(&inode->i_hash)) { 236 if (!(inode->i_state & (I_DIRTY|I_LOCK))) 237 list_move(&inode->i_list, &inode_unused); 238 inodes_stat.nr_unused++; 239 if (!sb || (sb->s_flags & MS_ACTIVE)) { 240 spin_unlock(&inode_lock); 241 return; 242 } 243 inode->i_state |= I_WILL_FREE; 244 spin_unlock(&inode_lock); 245 /* 246 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK 247 * in our backing_dev_info. 248 */ 249 write_inode_now(inode, 1); 250 spin_lock(&inode_lock); 251 inode->i_state &= ~I_WILL_FREE; 252 inodes_stat.nr_unused--; 253 hlist_del_init(&inode->i_hash); 254 } 255 list_del_init(&inode->i_list); 256 list_del_init(&inode->i_sb_list); 257 inode->i_state |= I_FREEING; 258 inodes_stat.nr_inodes--; 259 spin_unlock(&inode_lock); 260 truncate_hugepages(inode, 0); 261 clear_inode(inode); 262 destroy_inode(inode); 263 } 264 265 static void hugetlbfs_drop_inode(struct inode *inode) 266 { 267 if (!inode->i_nlink) 268 generic_delete_inode(inode); 269 else 270 hugetlbfs_forget_inode(inode); 271 } 272 273 static inline void 274 hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) 275 { 276 struct vm_area_struct *vma; 277 struct prio_tree_iter iter; 278 279 vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) { 280 unsigned long v_offset; 281 282 /* 283 * Can the expression below overflow on 32-bit arches? 284 * No, because the prio_tree returns us only those vmas 285 * which overlap the truncated area starting at pgoff, 286 * and no vma on a 32-bit arch can span beyond the 4GB. 287 */ 288 if (vma->vm_pgoff < pgoff) 289 v_offset = (pgoff - vma->vm_pgoff) << PAGE_SHIFT; 290 else 291 v_offset = 0; 292 293 __unmap_hugepage_range(vma, 294 vma->vm_start + v_offset, vma->vm_end); 295 } 296 } 297 298 /* 299 * Expanding truncates are not allowed. 300 */ 301 static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) 302 { 303 pgoff_t pgoff; 304 struct address_space *mapping = inode->i_mapping; 305 306 if (offset > inode->i_size) 307 return -EINVAL; 308 309 BUG_ON(offset & ~HPAGE_MASK); 310 pgoff = offset >> PAGE_SHIFT; 311 312 inode->i_size = offset; 313 spin_lock(&mapping->i_mmap_lock); 314 if (!prio_tree_empty(&mapping->i_mmap)) 315 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 316 spin_unlock(&mapping->i_mmap_lock); 317 truncate_hugepages(inode, offset); 318 return 0; 319 } 320 321 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) 322 { 323 struct inode *inode = dentry->d_inode; 324 int error; 325 unsigned int ia_valid = attr->ia_valid; 326 327 BUG_ON(!inode); 328 329 error = inode_change_ok(inode, attr); 330 if (error) 331 goto out; 332 333 if (ia_valid & ATTR_SIZE) { 334 error = -EINVAL; 335 if (!(attr->ia_size & ~HPAGE_MASK)) 336 error = hugetlb_vmtruncate(inode, attr->ia_size); 337 if (error) 338 goto out; 339 attr->ia_valid &= ~ATTR_SIZE; 340 } 341 error = inode_setattr(inode, attr); 342 out: 343 return error; 344 } 345 346 static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 347 gid_t gid, int mode, dev_t dev) 348 { 349 struct inode *inode; 350 351 inode = new_inode(sb); 352 if (inode) { 353 struct hugetlbfs_inode_info *info; 354 inode->i_mode = mode; 355 inode->i_uid = uid; 356 inode->i_gid = gid; 357 inode->i_blocks = 0; 358 inode->i_mapping->a_ops = &hugetlbfs_aops; 359 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 360 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 361 INIT_LIST_HEAD(&inode->i_mapping->private_list); 362 info = HUGETLBFS_I(inode); 363 mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL); 364 switch (mode & S_IFMT) { 365 default: 366 init_special_inode(inode, mode, dev); 367 break; 368 case S_IFREG: 369 inode->i_op = &hugetlbfs_inode_operations; 370 inode->i_fop = &hugetlbfs_file_operations; 371 break; 372 case S_IFDIR: 373 inode->i_op = &hugetlbfs_dir_inode_operations; 374 inode->i_fop = &simple_dir_operations; 375 376 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 377 inc_nlink(inode); 378 break; 379 case S_IFLNK: 380 inode->i_op = &page_symlink_inode_operations; 381 break; 382 } 383 } 384 return inode; 385 } 386 387 /* 388 * File creation. Allocate an inode, and we're done.. 389 */ 390 static int hugetlbfs_mknod(struct inode *dir, 391 struct dentry *dentry, int mode, dev_t dev) 392 { 393 struct inode *inode; 394 int error = -ENOSPC; 395 gid_t gid; 396 397 if (dir->i_mode & S_ISGID) { 398 gid = dir->i_gid; 399 if (S_ISDIR(mode)) 400 mode |= S_ISGID; 401 } else { 402 gid = current->fsgid; 403 } 404 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, gid, mode, dev); 405 if (inode) { 406 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 407 d_instantiate(dentry, inode); 408 dget(dentry); /* Extra count - pin the dentry in core */ 409 error = 0; 410 } 411 return error; 412 } 413 414 static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 415 { 416 int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0); 417 if (!retval) 418 inc_nlink(dir); 419 return retval; 420 } 421 422 static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 423 { 424 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); 425 } 426 427 static int hugetlbfs_symlink(struct inode *dir, 428 struct dentry *dentry, const char *symname) 429 { 430 struct inode *inode; 431 int error = -ENOSPC; 432 gid_t gid; 433 434 if (dir->i_mode & S_ISGID) 435 gid = dir->i_gid; 436 else 437 gid = current->fsgid; 438 439 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, 440 gid, S_IFLNK|S_IRWXUGO, 0); 441 if (inode) { 442 int l = strlen(symname)+1; 443 error = page_symlink(inode, symname, l); 444 if (!error) { 445 d_instantiate(dentry, inode); 446 dget(dentry); 447 } else 448 iput(inode); 449 } 450 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 451 452 return error; 453 } 454 455 /* 456 * mark the head page dirty 457 */ 458 static int hugetlbfs_set_page_dirty(struct page *page) 459 { 460 struct page *head = compound_head(page); 461 462 SetPageDirty(head); 463 return 0; 464 } 465 466 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) 467 { 468 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); 469 470 buf->f_type = HUGETLBFS_MAGIC; 471 buf->f_bsize = HPAGE_SIZE; 472 if (sbinfo) { 473 spin_lock(&sbinfo->stat_lock); 474 /* If no limits set, just report 0 for max/free/used 475 * blocks, like simple_statfs() */ 476 if (sbinfo->max_blocks >= 0) { 477 buf->f_blocks = sbinfo->max_blocks; 478 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 479 buf->f_files = sbinfo->max_inodes; 480 buf->f_ffree = sbinfo->free_inodes; 481 } 482 spin_unlock(&sbinfo->stat_lock); 483 } 484 buf->f_namelen = NAME_MAX; 485 return 0; 486 } 487 488 static void hugetlbfs_put_super(struct super_block *sb) 489 { 490 struct hugetlbfs_sb_info *sbi = HUGETLBFS_SB(sb); 491 492 if (sbi) { 493 sb->s_fs_info = NULL; 494 kfree(sbi); 495 } 496 } 497 498 static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo) 499 { 500 if (sbinfo->free_inodes >= 0) { 501 spin_lock(&sbinfo->stat_lock); 502 if (unlikely(!sbinfo->free_inodes)) { 503 spin_unlock(&sbinfo->stat_lock); 504 return 0; 505 } 506 sbinfo->free_inodes--; 507 spin_unlock(&sbinfo->stat_lock); 508 } 509 510 return 1; 511 } 512 513 static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo) 514 { 515 if (sbinfo->free_inodes >= 0) { 516 spin_lock(&sbinfo->stat_lock); 517 sbinfo->free_inodes++; 518 spin_unlock(&sbinfo->stat_lock); 519 } 520 } 521 522 523 static struct kmem_cache *hugetlbfs_inode_cachep; 524 525 static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) 526 { 527 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); 528 struct hugetlbfs_inode_info *p; 529 530 if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo))) 531 return NULL; 532 p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL); 533 if (unlikely(!p)) { 534 hugetlbfs_inc_free_inodes(sbinfo); 535 return NULL; 536 } 537 return &p->vfs_inode; 538 } 539 540 static void hugetlbfs_destroy_inode(struct inode *inode) 541 { 542 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); 543 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); 544 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 545 } 546 547 static const struct address_space_operations hugetlbfs_aops = { 548 .readpage = hugetlbfs_readpage, 549 .prepare_write = hugetlbfs_prepare_write, 550 .commit_write = hugetlbfs_commit_write, 551 .set_page_dirty = hugetlbfs_set_page_dirty, 552 }; 553 554 555 static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) 556 { 557 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; 558 559 inode_init_once(&ei->vfs_inode); 560 } 561 562 const struct file_operations hugetlbfs_file_operations = { 563 .mmap = hugetlbfs_file_mmap, 564 .fsync = simple_sync_file, 565 .get_unmapped_area = hugetlb_get_unmapped_area, 566 }; 567 568 static const struct inode_operations hugetlbfs_dir_inode_operations = { 569 .create = hugetlbfs_create, 570 .lookup = simple_lookup, 571 .link = simple_link, 572 .unlink = simple_unlink, 573 .symlink = hugetlbfs_symlink, 574 .mkdir = hugetlbfs_mkdir, 575 .rmdir = simple_rmdir, 576 .mknod = hugetlbfs_mknod, 577 .rename = simple_rename, 578 .setattr = hugetlbfs_setattr, 579 }; 580 581 static const struct inode_operations hugetlbfs_inode_operations = { 582 .setattr = hugetlbfs_setattr, 583 }; 584 585 static const struct super_operations hugetlbfs_ops = { 586 .alloc_inode = hugetlbfs_alloc_inode, 587 .destroy_inode = hugetlbfs_destroy_inode, 588 .statfs = hugetlbfs_statfs, 589 .delete_inode = hugetlbfs_delete_inode, 590 .drop_inode = hugetlbfs_drop_inode, 591 .put_super = hugetlbfs_put_super, 592 }; 593 594 static int 595 hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) 596 { 597 char *opt, *value, *rest; 598 599 if (!options) 600 return 0; 601 while ((opt = strsep(&options, ",")) != NULL) { 602 if (!*opt) 603 continue; 604 605 value = strchr(opt, '='); 606 if (!value || !*value) 607 return -EINVAL; 608 else 609 *value++ = '\0'; 610 611 if (!strcmp(opt, "uid")) 612 pconfig->uid = simple_strtoul(value, &value, 0); 613 else if (!strcmp(opt, "gid")) 614 pconfig->gid = simple_strtoul(value, &value, 0); 615 else if (!strcmp(opt, "mode")) 616 pconfig->mode = simple_strtoul(value,&value,0) & 0777U; 617 else if (!strcmp(opt, "size")) { 618 unsigned long long size = memparse(value, &rest); 619 if (*rest == '%') { 620 size <<= HPAGE_SHIFT; 621 size *= max_huge_pages; 622 do_div(size, 100); 623 rest++; 624 } 625 pconfig->nr_blocks = (size >> HPAGE_SHIFT); 626 value = rest; 627 } else if (!strcmp(opt,"nr_inodes")) { 628 pconfig->nr_inodes = memparse(value, &rest); 629 value = rest; 630 } else 631 return -EINVAL; 632 633 if (*value) 634 return -EINVAL; 635 } 636 return 0; 637 } 638 639 static int 640 hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) 641 { 642 struct inode * inode; 643 struct dentry * root; 644 int ret; 645 struct hugetlbfs_config config; 646 struct hugetlbfs_sb_info *sbinfo; 647 648 config.nr_blocks = -1; /* No limit on size by default */ 649 config.nr_inodes = -1; /* No limit on number of inodes by default */ 650 config.uid = current->fsuid; 651 config.gid = current->fsgid; 652 config.mode = 0755; 653 ret = hugetlbfs_parse_options(data, &config); 654 655 if (ret) 656 return ret; 657 658 sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL); 659 if (!sbinfo) 660 return -ENOMEM; 661 sb->s_fs_info = sbinfo; 662 spin_lock_init(&sbinfo->stat_lock); 663 sbinfo->max_blocks = config.nr_blocks; 664 sbinfo->free_blocks = config.nr_blocks; 665 sbinfo->max_inodes = config.nr_inodes; 666 sbinfo->free_inodes = config.nr_inodes; 667 sb->s_maxbytes = MAX_LFS_FILESIZE; 668 sb->s_blocksize = HPAGE_SIZE; 669 sb->s_blocksize_bits = HPAGE_SHIFT; 670 sb->s_magic = HUGETLBFS_MAGIC; 671 sb->s_op = &hugetlbfs_ops; 672 sb->s_time_gran = 1; 673 inode = hugetlbfs_get_inode(sb, config.uid, config.gid, 674 S_IFDIR | config.mode, 0); 675 if (!inode) 676 goto out_free; 677 678 root = d_alloc_root(inode); 679 if (!root) { 680 iput(inode); 681 goto out_free; 682 } 683 sb->s_root = root; 684 return 0; 685 out_free: 686 kfree(sbinfo); 687 return -ENOMEM; 688 } 689 690 int hugetlb_get_quota(struct address_space *mapping) 691 { 692 int ret = 0; 693 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 694 695 if (sbinfo->free_blocks > -1) { 696 spin_lock(&sbinfo->stat_lock); 697 if (sbinfo->free_blocks > 0) 698 sbinfo->free_blocks--; 699 else 700 ret = -ENOMEM; 701 spin_unlock(&sbinfo->stat_lock); 702 } 703 704 return ret; 705 } 706 707 void hugetlb_put_quota(struct address_space *mapping) 708 { 709 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 710 711 if (sbinfo->free_blocks > -1) { 712 spin_lock(&sbinfo->stat_lock); 713 sbinfo->free_blocks++; 714 spin_unlock(&sbinfo->stat_lock); 715 } 716 } 717 718 static int hugetlbfs_get_sb(struct file_system_type *fs_type, 719 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 720 { 721 return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt); 722 } 723 724 static struct file_system_type hugetlbfs_fs_type = { 725 .name = "hugetlbfs", 726 .get_sb = hugetlbfs_get_sb, 727 .kill_sb = kill_litter_super, 728 }; 729 730 static struct vfsmount *hugetlbfs_vfsmount; 731 732 static int can_do_hugetlb_shm(void) 733 { 734 return likely(capable(CAP_IPC_LOCK) || 735 in_group_p(sysctl_hugetlb_shm_group) || 736 can_do_mlock()); 737 } 738 739 struct file *hugetlb_file_setup(const char *name, size_t size) 740 { 741 int error = -ENOMEM; 742 struct file *file; 743 struct inode *inode; 744 struct dentry *dentry, *root; 745 struct qstr quick_string; 746 747 if (!hugetlbfs_vfsmount) 748 return ERR_PTR(-ENOENT); 749 750 if (!can_do_hugetlb_shm()) 751 return ERR_PTR(-EPERM); 752 753 if (!user_shm_lock(size, current->user)) 754 return ERR_PTR(-ENOMEM); 755 756 root = hugetlbfs_vfsmount->mnt_root; 757 quick_string.name = name; 758 quick_string.len = strlen(quick_string.name); 759 quick_string.hash = 0; 760 dentry = d_alloc(root, &quick_string); 761 if (!dentry) 762 goto out_shm_unlock; 763 764 error = -ENFILE; 765 file = get_empty_filp(); 766 if (!file) 767 goto out_dentry; 768 769 error = -ENOSPC; 770 inode = hugetlbfs_get_inode(root->d_sb, current->fsuid, 771 current->fsgid, S_IFREG | S_IRWXUGO, 0); 772 if (!inode) 773 goto out_file; 774 775 error = -ENOMEM; 776 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT)) 777 goto out_inode; 778 779 d_instantiate(dentry, inode); 780 inode->i_size = size; 781 inode->i_nlink = 0; 782 file->f_path.mnt = mntget(hugetlbfs_vfsmount); 783 file->f_path.dentry = dentry; 784 file->f_mapping = inode->i_mapping; 785 file->f_op = &hugetlbfs_file_operations; 786 file->f_mode = FMODE_WRITE | FMODE_READ; 787 return file; 788 789 out_inode: 790 iput(inode); 791 out_file: 792 put_filp(file); 793 out_dentry: 794 dput(dentry); 795 out_shm_unlock: 796 user_shm_unlock(size, current->user); 797 return ERR_PTR(error); 798 } 799 800 static int __init init_hugetlbfs_fs(void) 801 { 802 int error; 803 struct vfsmount *vfsmount; 804 805 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", 806 sizeof(struct hugetlbfs_inode_info), 807 0, 0, init_once, NULL); 808 if (hugetlbfs_inode_cachep == NULL) 809 return -ENOMEM; 810 811 error = register_filesystem(&hugetlbfs_fs_type); 812 if (error) 813 goto out; 814 815 vfsmount = kern_mount(&hugetlbfs_fs_type); 816 817 if (!IS_ERR(vfsmount)) { 818 hugetlbfs_vfsmount = vfsmount; 819 return 0; 820 } 821 822 error = PTR_ERR(vfsmount); 823 824 out: 825 if (error) 826 kmem_cache_destroy(hugetlbfs_inode_cachep); 827 return error; 828 } 829 830 static void __exit exit_hugetlbfs_fs(void) 831 { 832 kmem_cache_destroy(hugetlbfs_inode_cachep); 833 unregister_filesystem(&hugetlbfs_fs_type); 834 } 835 836 module_init(init_hugetlbfs_fs) 837 module_exit(exit_hugetlbfs_fs) 838 839 MODULE_LICENSE("GPL"); 840