1 /* 2 * hugetlbpage-backed filesystem. Based on ramfs. 3 * 4 * William Irwin, 2002 5 * 6 * Copyright (C) 2002 Linus Torvalds. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/thread_info.h> 11 #include <asm/current.h> 12 #include <linux/sched.h> /* remove ASAP */ 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/file.h> 16 #include <linux/writeback.h> 17 #include <linux/pagemap.h> 18 #include <linux/highmem.h> 19 #include <linux/init.h> 20 #include <linux/string.h> 21 #include <linux/backing-dev.h> 22 #include <linux/hugetlb.h> 23 #include <linux/pagevec.h> 24 #include <linux/quotaops.h> 25 #include <linux/slab.h> 26 #include <linux/dnotify.h> 27 #include <linux/statfs.h> 28 #include <linux/security.h> 29 30 #include <asm/uaccess.h> 31 32 /* some random number */ 33 #define HUGETLBFS_MAGIC 0x958458f6 34 35 static struct super_operations hugetlbfs_ops; 36 static struct address_space_operations hugetlbfs_aops; 37 struct file_operations hugetlbfs_file_operations; 38 static struct inode_operations hugetlbfs_dir_inode_operations; 39 static struct inode_operations hugetlbfs_inode_operations; 40 41 static struct backing_dev_info hugetlbfs_backing_dev_info = { 42 .ra_pages = 0, /* No readahead */ 43 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 44 }; 45 46 int sysctl_hugetlb_shm_group; 47 48 static void huge_pagevec_release(struct pagevec *pvec) 49 { 50 int i; 51 52 for (i = 0; i < pagevec_count(pvec); ++i) 53 put_page(pvec->pages[i]); 54 55 pagevec_reinit(pvec); 56 } 57 58 /* 59 * huge_pages_needed tries to determine the number of new huge pages that 60 * will be required to fully populate this VMA. This will be equal to 61 * the size of the VMA in huge pages minus the number of huge pages 62 * (covered by this VMA) that are found in the page cache. 63 * 64 * Result is in bytes to be compatible with is_hugepage_mem_enough() 65 */ 66 static unsigned long 67 huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma) 68 { 69 int i; 70 struct pagevec pvec; 71 unsigned long start = vma->vm_start; 72 unsigned long end = vma->vm_end; 73 unsigned long hugepages = (end - start) >> HPAGE_SHIFT; 74 pgoff_t next = vma->vm_pgoff; 75 pgoff_t endpg = next + ((end - start) >> PAGE_SHIFT); 76 77 pagevec_init(&pvec, 0); 78 while (next < endpg) { 79 if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) 80 break; 81 for (i = 0; i < pagevec_count(&pvec); i++) { 82 struct page *page = pvec.pages[i]; 83 if (page->index > next) 84 next = page->index; 85 if (page->index >= endpg) 86 break; 87 next++; 88 hugepages--; 89 } 90 huge_pagevec_release(&pvec); 91 } 92 return hugepages << HPAGE_SHIFT; 93 } 94 95 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) 96 { 97 struct inode *inode = file->f_dentry->d_inode; 98 struct address_space *mapping = inode->i_mapping; 99 unsigned long bytes; 100 loff_t len, vma_len; 101 int ret; 102 103 if (vma->vm_pgoff & (HPAGE_SIZE / PAGE_SIZE - 1)) 104 return -EINVAL; 105 106 if (vma->vm_start & ~HPAGE_MASK) 107 return -EINVAL; 108 109 if (vma->vm_end & ~HPAGE_MASK) 110 return -EINVAL; 111 112 if (vma->vm_end - vma->vm_start < HPAGE_SIZE) 113 return -EINVAL; 114 115 bytes = huge_pages_needed(mapping, vma); 116 if (!is_hugepage_mem_enough(bytes)) 117 return -ENOMEM; 118 119 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 120 121 mutex_lock(&inode->i_mutex); 122 file_accessed(file); 123 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 124 vma->vm_ops = &hugetlb_vm_ops; 125 126 ret = -ENOMEM; 127 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 128 if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) 129 goto out; 130 131 ret = 0; 132 hugetlb_prefault_arch_hook(vma->vm_mm); 133 if (inode->i_size < len) 134 inode->i_size = len; 135 out: 136 mutex_unlock(&inode->i_mutex); 137 138 return ret; 139 } 140 141 /* 142 * Called under down_write(mmap_sem). 143 */ 144 145 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 146 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 147 unsigned long len, unsigned long pgoff, unsigned long flags); 148 #else 149 static unsigned long 150 hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 151 unsigned long len, unsigned long pgoff, unsigned long flags) 152 { 153 struct mm_struct *mm = current->mm; 154 struct vm_area_struct *vma; 155 unsigned long start_addr; 156 157 if (len & ~HPAGE_MASK) 158 return -EINVAL; 159 if (len > TASK_SIZE) 160 return -ENOMEM; 161 162 if (addr) { 163 addr = ALIGN(addr, HPAGE_SIZE); 164 vma = find_vma(mm, addr); 165 if (TASK_SIZE - len >= addr && 166 (!vma || addr + len <= vma->vm_start)) 167 return addr; 168 } 169 170 start_addr = mm->free_area_cache; 171 172 if (len <= mm->cached_hole_size) 173 start_addr = TASK_UNMAPPED_BASE; 174 175 full_search: 176 addr = ALIGN(start_addr, HPAGE_SIZE); 177 178 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 179 /* At this point: (!vma || addr < vma->vm_end). */ 180 if (TASK_SIZE - len < addr) { 181 /* 182 * Start a new search - just in case we missed 183 * some holes. 184 */ 185 if (start_addr != TASK_UNMAPPED_BASE) { 186 start_addr = TASK_UNMAPPED_BASE; 187 goto full_search; 188 } 189 return -ENOMEM; 190 } 191 192 if (!vma || addr + len <= vma->vm_start) 193 return addr; 194 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 195 } 196 } 197 #endif 198 199 /* 200 * Read a page. Again trivial. If it didn't already exist 201 * in the page cache, it is zero-filled. 202 */ 203 static int hugetlbfs_readpage(struct file *file, struct page * page) 204 { 205 unlock_page(page); 206 return -EINVAL; 207 } 208 209 static int hugetlbfs_prepare_write(struct file *file, 210 struct page *page, unsigned offset, unsigned to) 211 { 212 return -EINVAL; 213 } 214 215 static int hugetlbfs_commit_write(struct file *file, 216 struct page *page, unsigned offset, unsigned to) 217 { 218 return -EINVAL; 219 } 220 221 static void truncate_huge_page(struct page *page) 222 { 223 clear_page_dirty(page); 224 ClearPageUptodate(page); 225 remove_from_page_cache(page); 226 put_page(page); 227 } 228 229 static void truncate_hugepages(struct address_space *mapping, loff_t lstart) 230 { 231 const pgoff_t start = lstart >> HPAGE_SHIFT; 232 struct pagevec pvec; 233 pgoff_t next; 234 int i; 235 236 pagevec_init(&pvec, 0); 237 next = start; 238 while (1) { 239 if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 240 if (next == start) 241 break; 242 next = start; 243 continue; 244 } 245 246 for (i = 0; i < pagevec_count(&pvec); ++i) { 247 struct page *page = pvec.pages[i]; 248 249 lock_page(page); 250 if (page->index > next) 251 next = page->index; 252 ++next; 253 truncate_huge_page(page); 254 unlock_page(page); 255 hugetlb_put_quota(mapping); 256 } 257 huge_pagevec_release(&pvec); 258 } 259 BUG_ON(!lstart && mapping->nrpages); 260 } 261 262 static void hugetlbfs_delete_inode(struct inode *inode) 263 { 264 if (inode->i_data.nrpages) 265 truncate_hugepages(&inode->i_data, 0); 266 clear_inode(inode); 267 } 268 269 static void hugetlbfs_forget_inode(struct inode *inode) 270 { 271 struct super_block *sb = inode->i_sb; 272 273 if (!hlist_unhashed(&inode->i_hash)) { 274 if (!(inode->i_state & (I_DIRTY|I_LOCK))) 275 list_move(&inode->i_list, &inode_unused); 276 inodes_stat.nr_unused++; 277 if (!sb || (sb->s_flags & MS_ACTIVE)) { 278 spin_unlock(&inode_lock); 279 return; 280 } 281 inode->i_state |= I_WILL_FREE; 282 spin_unlock(&inode_lock); 283 /* 284 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK 285 * in our backing_dev_info. 286 */ 287 write_inode_now(inode, 1); 288 spin_lock(&inode_lock); 289 inode->i_state &= ~I_WILL_FREE; 290 inodes_stat.nr_unused--; 291 hlist_del_init(&inode->i_hash); 292 } 293 list_del_init(&inode->i_list); 294 list_del_init(&inode->i_sb_list); 295 inode->i_state |= I_FREEING; 296 inodes_stat.nr_inodes--; 297 spin_unlock(&inode_lock); 298 if (inode->i_data.nrpages) 299 truncate_hugepages(&inode->i_data, 0); 300 clear_inode(inode); 301 destroy_inode(inode); 302 } 303 304 static void hugetlbfs_drop_inode(struct inode *inode) 305 { 306 if (!inode->i_nlink) 307 generic_delete_inode(inode); 308 else 309 hugetlbfs_forget_inode(inode); 310 } 311 312 /* 313 * h_pgoff is in HPAGE_SIZE units. 314 * vma->vm_pgoff is in PAGE_SIZE units. 315 */ 316 static inline void 317 hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) 318 { 319 struct vm_area_struct *vma; 320 struct prio_tree_iter iter; 321 322 vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) { 323 unsigned long h_vm_pgoff; 324 unsigned long v_offset; 325 326 h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); 327 v_offset = (h_pgoff - h_vm_pgoff) << HPAGE_SHIFT; 328 /* 329 * Is this VMA fully outside the truncation point? 330 */ 331 if (h_vm_pgoff >= h_pgoff) 332 v_offset = 0; 333 334 unmap_hugepage_range(vma, 335 vma->vm_start + v_offset, vma->vm_end); 336 } 337 } 338 339 /* 340 * Expanding truncates are not allowed. 341 */ 342 static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) 343 { 344 unsigned long pgoff; 345 struct address_space *mapping = inode->i_mapping; 346 347 if (offset > inode->i_size) 348 return -EINVAL; 349 350 BUG_ON(offset & ~HPAGE_MASK); 351 pgoff = offset >> HPAGE_SHIFT; 352 353 inode->i_size = offset; 354 spin_lock(&mapping->i_mmap_lock); 355 if (!prio_tree_empty(&mapping->i_mmap)) 356 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 357 spin_unlock(&mapping->i_mmap_lock); 358 truncate_hugepages(mapping, offset); 359 return 0; 360 } 361 362 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) 363 { 364 struct inode *inode = dentry->d_inode; 365 int error; 366 unsigned int ia_valid = attr->ia_valid; 367 368 BUG_ON(!inode); 369 370 error = inode_change_ok(inode, attr); 371 if (error) 372 goto out; 373 374 if (ia_valid & ATTR_SIZE) { 375 error = -EINVAL; 376 if (!(attr->ia_size & ~HPAGE_MASK)) 377 error = hugetlb_vmtruncate(inode, attr->ia_size); 378 if (error) 379 goto out; 380 attr->ia_valid &= ~ATTR_SIZE; 381 } 382 error = inode_setattr(inode, attr); 383 out: 384 return error; 385 } 386 387 static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 388 gid_t gid, int mode, dev_t dev) 389 { 390 struct inode *inode; 391 392 inode = new_inode(sb); 393 if (inode) { 394 struct hugetlbfs_inode_info *info; 395 inode->i_mode = mode; 396 inode->i_uid = uid; 397 inode->i_gid = gid; 398 inode->i_blksize = HPAGE_SIZE; 399 inode->i_blocks = 0; 400 inode->i_mapping->a_ops = &hugetlbfs_aops; 401 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 402 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 403 info = HUGETLBFS_I(inode); 404 mpol_shared_policy_init(&info->policy); 405 switch (mode & S_IFMT) { 406 default: 407 init_special_inode(inode, mode, dev); 408 break; 409 case S_IFREG: 410 inode->i_op = &hugetlbfs_inode_operations; 411 inode->i_fop = &hugetlbfs_file_operations; 412 break; 413 case S_IFDIR: 414 inode->i_op = &hugetlbfs_dir_inode_operations; 415 inode->i_fop = &simple_dir_operations; 416 417 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 418 inode->i_nlink++; 419 break; 420 case S_IFLNK: 421 inode->i_op = &page_symlink_inode_operations; 422 break; 423 } 424 } 425 return inode; 426 } 427 428 /* 429 * File creation. Allocate an inode, and we're done.. 430 */ 431 static int hugetlbfs_mknod(struct inode *dir, 432 struct dentry *dentry, int mode, dev_t dev) 433 { 434 struct inode *inode; 435 int error = -ENOSPC; 436 gid_t gid; 437 438 if (dir->i_mode & S_ISGID) { 439 gid = dir->i_gid; 440 if (S_ISDIR(mode)) 441 mode |= S_ISGID; 442 } else { 443 gid = current->fsgid; 444 } 445 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, gid, mode, dev); 446 if (inode) { 447 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 448 d_instantiate(dentry, inode); 449 dget(dentry); /* Extra count - pin the dentry in core */ 450 error = 0; 451 } 452 return error; 453 } 454 455 static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 456 { 457 int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0); 458 if (!retval) 459 dir->i_nlink++; 460 return retval; 461 } 462 463 static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 464 { 465 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); 466 } 467 468 static int hugetlbfs_symlink(struct inode *dir, 469 struct dentry *dentry, const char *symname) 470 { 471 struct inode *inode; 472 int error = -ENOSPC; 473 gid_t gid; 474 475 if (dir->i_mode & S_ISGID) 476 gid = dir->i_gid; 477 else 478 gid = current->fsgid; 479 480 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, 481 gid, S_IFLNK|S_IRWXUGO, 0); 482 if (inode) { 483 int l = strlen(symname)+1; 484 error = page_symlink(inode, symname, l); 485 if (!error) { 486 d_instantiate(dentry, inode); 487 dget(dentry); 488 } else 489 iput(inode); 490 } 491 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 492 493 return error; 494 } 495 496 /* 497 * For direct-IO reads into hugetlb pages 498 */ 499 static int hugetlbfs_set_page_dirty(struct page *page) 500 { 501 return 0; 502 } 503 504 static int hugetlbfs_statfs(struct super_block *sb, struct kstatfs *buf) 505 { 506 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); 507 508 buf->f_type = HUGETLBFS_MAGIC; 509 buf->f_bsize = HPAGE_SIZE; 510 if (sbinfo) { 511 spin_lock(&sbinfo->stat_lock); 512 /* If no limits set, just report 0 for max/free/used 513 * blocks, like simple_statfs() */ 514 if (sbinfo->max_blocks >= 0) { 515 buf->f_blocks = sbinfo->max_blocks; 516 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 517 buf->f_files = sbinfo->max_inodes; 518 buf->f_ffree = sbinfo->free_inodes; 519 } 520 spin_unlock(&sbinfo->stat_lock); 521 } 522 buf->f_namelen = NAME_MAX; 523 return 0; 524 } 525 526 static void hugetlbfs_put_super(struct super_block *sb) 527 { 528 struct hugetlbfs_sb_info *sbi = HUGETLBFS_SB(sb); 529 530 if (sbi) { 531 sb->s_fs_info = NULL; 532 kfree(sbi); 533 } 534 } 535 536 static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo) 537 { 538 if (sbinfo->free_inodes >= 0) { 539 spin_lock(&sbinfo->stat_lock); 540 if (unlikely(!sbinfo->free_inodes)) { 541 spin_unlock(&sbinfo->stat_lock); 542 return 0; 543 } 544 sbinfo->free_inodes--; 545 spin_unlock(&sbinfo->stat_lock); 546 } 547 548 return 1; 549 } 550 551 static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo) 552 { 553 if (sbinfo->free_inodes >= 0) { 554 spin_lock(&sbinfo->stat_lock); 555 sbinfo->free_inodes++; 556 spin_unlock(&sbinfo->stat_lock); 557 } 558 } 559 560 561 static kmem_cache_t *hugetlbfs_inode_cachep; 562 563 static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) 564 { 565 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); 566 struct hugetlbfs_inode_info *p; 567 568 if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo))) 569 return NULL; 570 p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL); 571 if (unlikely(!p)) { 572 hugetlbfs_inc_free_inodes(sbinfo); 573 return NULL; 574 } 575 return &p->vfs_inode; 576 } 577 578 static void hugetlbfs_destroy_inode(struct inode *inode) 579 { 580 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); 581 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); 582 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 583 } 584 585 static struct address_space_operations hugetlbfs_aops = { 586 .readpage = hugetlbfs_readpage, 587 .prepare_write = hugetlbfs_prepare_write, 588 .commit_write = hugetlbfs_commit_write, 589 .set_page_dirty = hugetlbfs_set_page_dirty, 590 }; 591 592 593 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) 594 { 595 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; 596 597 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 598 SLAB_CTOR_CONSTRUCTOR) 599 inode_init_once(&ei->vfs_inode); 600 } 601 602 struct file_operations hugetlbfs_file_operations = { 603 .mmap = hugetlbfs_file_mmap, 604 .fsync = simple_sync_file, 605 .get_unmapped_area = hugetlb_get_unmapped_area, 606 }; 607 608 static struct inode_operations hugetlbfs_dir_inode_operations = { 609 .create = hugetlbfs_create, 610 .lookup = simple_lookup, 611 .link = simple_link, 612 .unlink = simple_unlink, 613 .symlink = hugetlbfs_symlink, 614 .mkdir = hugetlbfs_mkdir, 615 .rmdir = simple_rmdir, 616 .mknod = hugetlbfs_mknod, 617 .rename = simple_rename, 618 .setattr = hugetlbfs_setattr, 619 }; 620 621 static struct inode_operations hugetlbfs_inode_operations = { 622 .setattr = hugetlbfs_setattr, 623 }; 624 625 static struct super_operations hugetlbfs_ops = { 626 .alloc_inode = hugetlbfs_alloc_inode, 627 .destroy_inode = hugetlbfs_destroy_inode, 628 .statfs = hugetlbfs_statfs, 629 .delete_inode = hugetlbfs_delete_inode, 630 .drop_inode = hugetlbfs_drop_inode, 631 .put_super = hugetlbfs_put_super, 632 }; 633 634 static int 635 hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) 636 { 637 char *opt, *value, *rest; 638 639 if (!options) 640 return 0; 641 while ((opt = strsep(&options, ",")) != NULL) { 642 if (!*opt) 643 continue; 644 645 value = strchr(opt, '='); 646 if (!value || !*value) 647 return -EINVAL; 648 else 649 *value++ = '\0'; 650 651 if (!strcmp(opt, "uid")) 652 pconfig->uid = simple_strtoul(value, &value, 0); 653 else if (!strcmp(opt, "gid")) 654 pconfig->gid = simple_strtoul(value, &value, 0); 655 else if (!strcmp(opt, "mode")) 656 pconfig->mode = simple_strtoul(value,&value,0) & 0777U; 657 else if (!strcmp(opt, "size")) { 658 unsigned long long size = memparse(value, &rest); 659 if (*rest == '%') { 660 size <<= HPAGE_SHIFT; 661 size *= max_huge_pages; 662 do_div(size, 100); 663 rest++; 664 } 665 size &= HPAGE_MASK; 666 pconfig->nr_blocks = (size >> HPAGE_SHIFT); 667 value = rest; 668 } else if (!strcmp(opt,"nr_inodes")) { 669 pconfig->nr_inodes = memparse(value, &rest); 670 value = rest; 671 } else 672 return -EINVAL; 673 674 if (*value) 675 return -EINVAL; 676 } 677 return 0; 678 } 679 680 static int 681 hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) 682 { 683 struct inode * inode; 684 struct dentry * root; 685 int ret; 686 struct hugetlbfs_config config; 687 struct hugetlbfs_sb_info *sbinfo; 688 689 config.nr_blocks = -1; /* No limit on size by default */ 690 config.nr_inodes = -1; /* No limit on number of inodes by default */ 691 config.uid = current->fsuid; 692 config.gid = current->fsgid; 693 config.mode = 0755; 694 ret = hugetlbfs_parse_options(data, &config); 695 696 if (ret) 697 return ret; 698 699 sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL); 700 if (!sbinfo) 701 return -ENOMEM; 702 sb->s_fs_info = sbinfo; 703 spin_lock_init(&sbinfo->stat_lock); 704 sbinfo->max_blocks = config.nr_blocks; 705 sbinfo->free_blocks = config.nr_blocks; 706 sbinfo->max_inodes = config.nr_inodes; 707 sbinfo->free_inodes = config.nr_inodes; 708 sb->s_maxbytes = MAX_LFS_FILESIZE; 709 sb->s_blocksize = HPAGE_SIZE; 710 sb->s_blocksize_bits = HPAGE_SHIFT; 711 sb->s_magic = HUGETLBFS_MAGIC; 712 sb->s_op = &hugetlbfs_ops; 713 sb->s_time_gran = 1; 714 inode = hugetlbfs_get_inode(sb, config.uid, config.gid, 715 S_IFDIR | config.mode, 0); 716 if (!inode) 717 goto out_free; 718 719 root = d_alloc_root(inode); 720 if (!root) { 721 iput(inode); 722 goto out_free; 723 } 724 sb->s_root = root; 725 return 0; 726 out_free: 727 kfree(sbinfo); 728 return -ENOMEM; 729 } 730 731 int hugetlb_get_quota(struct address_space *mapping) 732 { 733 int ret = 0; 734 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 735 736 if (sbinfo->free_blocks > -1) { 737 spin_lock(&sbinfo->stat_lock); 738 if (sbinfo->free_blocks > 0) 739 sbinfo->free_blocks--; 740 else 741 ret = -ENOMEM; 742 spin_unlock(&sbinfo->stat_lock); 743 } 744 745 return ret; 746 } 747 748 void hugetlb_put_quota(struct address_space *mapping) 749 { 750 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 751 752 if (sbinfo->free_blocks > -1) { 753 spin_lock(&sbinfo->stat_lock); 754 sbinfo->free_blocks++; 755 spin_unlock(&sbinfo->stat_lock); 756 } 757 } 758 759 static struct super_block *hugetlbfs_get_sb(struct file_system_type *fs_type, 760 int flags, const char *dev_name, void *data) 761 { 762 return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super); 763 } 764 765 static struct file_system_type hugetlbfs_fs_type = { 766 .name = "hugetlbfs", 767 .get_sb = hugetlbfs_get_sb, 768 .kill_sb = kill_litter_super, 769 }; 770 771 static struct vfsmount *hugetlbfs_vfsmount; 772 773 /* 774 * Return the next identifier for a shm file 775 */ 776 static unsigned long hugetlbfs_counter(void) 777 { 778 static DEFINE_SPINLOCK(lock); 779 static unsigned long counter; 780 unsigned long ret; 781 782 spin_lock(&lock); 783 ret = ++counter; 784 spin_unlock(&lock); 785 return ret; 786 } 787 788 static int can_do_hugetlb_shm(void) 789 { 790 return likely(capable(CAP_IPC_LOCK) || 791 in_group_p(sysctl_hugetlb_shm_group) || 792 can_do_mlock()); 793 } 794 795 struct file *hugetlb_zero_setup(size_t size) 796 { 797 int error = -ENOMEM; 798 struct file *file; 799 struct inode *inode; 800 struct dentry *dentry, *root; 801 struct qstr quick_string; 802 char buf[16]; 803 804 if (!can_do_hugetlb_shm()) 805 return ERR_PTR(-EPERM); 806 807 if (!is_hugepage_mem_enough(size)) 808 return ERR_PTR(-ENOMEM); 809 810 if (!user_shm_lock(size, current->user)) 811 return ERR_PTR(-ENOMEM); 812 813 root = hugetlbfs_vfsmount->mnt_root; 814 snprintf(buf, 16, "%lu", hugetlbfs_counter()); 815 quick_string.name = buf; 816 quick_string.len = strlen(quick_string.name); 817 quick_string.hash = 0; 818 dentry = d_alloc(root, &quick_string); 819 if (!dentry) 820 goto out_shm_unlock; 821 822 error = -ENFILE; 823 file = get_empty_filp(); 824 if (!file) 825 goto out_dentry; 826 827 error = -ENOSPC; 828 inode = hugetlbfs_get_inode(root->d_sb, current->fsuid, 829 current->fsgid, S_IFREG | S_IRWXUGO, 0); 830 if (!inode) 831 goto out_file; 832 833 d_instantiate(dentry, inode); 834 inode->i_size = size; 835 inode->i_nlink = 0; 836 file->f_vfsmnt = mntget(hugetlbfs_vfsmount); 837 file->f_dentry = dentry; 838 file->f_mapping = inode->i_mapping; 839 file->f_op = &hugetlbfs_file_operations; 840 file->f_mode = FMODE_WRITE | FMODE_READ; 841 return file; 842 843 out_file: 844 put_filp(file); 845 out_dentry: 846 dput(dentry); 847 out_shm_unlock: 848 user_shm_unlock(size, current->user); 849 return ERR_PTR(error); 850 } 851 852 static int __init init_hugetlbfs_fs(void) 853 { 854 int error; 855 struct vfsmount *vfsmount; 856 857 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", 858 sizeof(struct hugetlbfs_inode_info), 859 0, 0, init_once, NULL); 860 if (hugetlbfs_inode_cachep == NULL) 861 return -ENOMEM; 862 863 error = register_filesystem(&hugetlbfs_fs_type); 864 if (error) 865 goto out; 866 867 vfsmount = kern_mount(&hugetlbfs_fs_type); 868 869 if (!IS_ERR(vfsmount)) { 870 hugetlbfs_vfsmount = vfsmount; 871 return 0; 872 } 873 874 error = PTR_ERR(vfsmount); 875 876 out: 877 if (error) 878 kmem_cache_destroy(hugetlbfs_inode_cachep); 879 return error; 880 } 881 882 static void __exit exit_hugetlbfs_fs(void) 883 { 884 kmem_cache_destroy(hugetlbfs_inode_cachep); 885 unregister_filesystem(&hugetlbfs_fs_type); 886 } 887 888 module_init(init_hugetlbfs_fs) 889 module_exit(exit_hugetlbfs_fs) 890 891 MODULE_LICENSE("GPL"); 892