Lines Matching +full:chg +full:- +full:status

1 // SPDX-License-Identifier: GPL-2.0-only
60 return cma_pages_valid(hugetlb_cma[folio_nid(folio)], &folio->page,
104 if (spool->count)
106 if (spool->max_hpages != -1)
107 return spool->used_hpages == 0;
108 if (spool->min_hpages != -1)
109 return spool->rsv_hpages == spool->min_hpages;
117 spin_unlock_irqrestore(&spool->lock, irq_flags);
123 if (spool->min_hpages != -1)
124 hugetlb_acct_memory(spool->hstate,
125 -spool->min_hpages);
139 spin_lock_init(&spool->lock);
140 spool->count = 1;
141 spool->max_hpages = max_hpages;
142 spool->hstate = h;
143 spool->min_hpages = min_hpages;
145 if (min_hpages != -1 && hugetlb_acct_memory(h, min_hpages)) {
149 spool->rsv_hpages = min_hpages;
158 spin_lock_irqsave(&spool->lock, flags);
159 BUG_ON(!spool->count);
160 spool->count--;
166 * Return -ENOMEM if there are not enough resources to satisfy the
180 spin_lock_irq(&spool->lock);
182 if (spool->max_hpages != -1) { /* maximum size accounting */
183 if ((spool->used_hpages + delta) <= spool->max_hpages)
184 spool->used_hpages += delta;
186 ret = -ENOMEM;
192 if (spool->min_hpages != -1 && spool->rsv_hpages) {
193 if (delta > spool->rsv_hpages) {
198 ret = delta - spool->rsv_hpages;
199 spool->rsv_hpages = 0;
202 spool->rsv_hpages -= delta;
207 spin_unlock_irq(&spool->lock);
226 spin_lock_irqsave(&spool->lock, flags);
228 if (spool->max_hpages != -1) /* maximum size accounting */
229 spool->used_hpages -= delta;
232 if (spool->min_hpages != -1 && spool->used_hpages < spool->min_hpages) {
233 if (spool->rsv_hpages + delta <= spool->min_hpages)
236 ret = spool->rsv_hpages + delta - spool->min_hpages;
238 spool->rsv_hpages += delta;
239 if (spool->rsv_hpages > spool->min_hpages)
240 spool->rsv_hpages = spool->min_hpages;
254 return HUGETLBFS_SB(inode->i_sb)->spool;
259 return subpool_inode(file_inode(vma->vm_file));
268 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
270 down_read(&vma_lock->rw_sema);
274 down_read(&resv_map->rw_sema);
281 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
283 up_read(&vma_lock->rw_sema);
287 up_read(&resv_map->rw_sema);
294 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
296 down_write(&vma_lock->rw_sema);
300 down_write(&resv_map->rw_sema);
307 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
309 up_write(&vma_lock->rw_sema);
313 up_write(&resv_map->rw_sema);
321 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
323 return down_write_trylock(&vma_lock->rw_sema);
327 return down_write_trylock(&resv_map->rw_sema);
336 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
338 lockdep_assert_held(&vma_lock->rw_sema);
342 lockdep_assert_held(&resv_map->rw_sema);
356 struct vm_area_struct *vma = vma_lock->vma;
361 * Semaphore synchronizes access to vma_lock->vma field.
363 vma_lock->vma = NULL;
364 vma->vm_private_data = NULL;
365 up_write(&vma_lock->rw_sema);
366 kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
372 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
379 up_write(&resv_map->rw_sema);
391 if (vma->vm_private_data) {
392 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
394 down_write(&vma_lock->rw_sema);
404 if (!vma || !(vma->vm_flags & VM_MAYSHARE))
407 /* Should never get here with non-NULL vm_private_data */
408 if (vma->vm_private_data)
427 kref_init(&vma_lock->refs);
428 init_rwsem(&vma_lock->rw_sema);
429 vma_lock->vma = vma;
430 vma->vm_private_data = vma_lock;
441 VM_BUG_ON(resv->region_cache_count <= 0);
443 resv->region_cache_count--;
444 nrg = list_first_entry(&resv->region_cache, struct file_region, link);
445 list_del(&nrg->link);
447 nrg->from = from;
448 nrg->to = to;
457 nrg->reservation_counter = rg->reservation_counter;
458 nrg->css = rg->css;
459 if (rg->css)
460 css_get(rg->css);
472 nrg->reservation_counter =
473 &h_cg->rsvd_hugepage[hstate_index(h)];
474 nrg->css = &h_cg->css;
476 * The caller will hold exactly one h_cg->css reference for the
481 * exactly one h_cg->css reference, we should do css_get for
485 css_get(&h_cg->css);
486 if (!resv->pages_per_hpage)
487 resv->pages_per_hpage = pages_per_huge_page(h);
491 VM_BUG_ON(resv->pages_per_hpage != pages_per_huge_page(h));
493 nrg->reservation_counter = NULL;
494 nrg->css = NULL;
502 if (rg->css)
503 css_put(rg->css);
511 return rg->reservation_counter == org->reservation_counter &&
512 rg->css == org->css;
524 if (&prg->link != &resv->regions && prg->to == rg->from &&
526 prg->to = rg->to;
528 list_del(&rg->link);
536 if (&nrg->link != &resv->regions && nrg->from == rg->to &&
538 nrg->from = rg->from;
540 list_del(&rg->link);
556 list_add(&nrg->link, rg);
561 return to - from;
565 * Must be called with resv->lock held.
577 struct list_head *head = &resv->regions;
586 * [last_accounted_offset, iter->from), at every iteration, with some
591 if (iter->from < f) {
595 if (iter->to > last_accounted_offset)
596 last_accounted_offset = iter->to;
603 if (iter->from >= t) {
604 rg = iter->link.prev;
608 /* Add an entry for last_accounted_offset -> iter->from, and
611 if (iter->from > last_accounted_offset)
612 add += hugetlb_resv_map_add(resv, iter->link.prev,
614 iter->from, h, h_cg,
617 last_accounted_offset = iter->to;
624 rg = head->prev;
632 /* Must be called with resv->lock acquired. Will drop lock to allocate entries.
636 __must_hold(&resv->lock)
653 while (resv->region_cache_count <
654 (resv->adds_in_progress + regions_needed)) {
655 to_allocate = resv->adds_in_progress + regions_needed -
656 resv->region_cache_count;
662 VM_BUG_ON(resv->region_cache_count < resv->adds_in_progress);
664 spin_unlock(&resv->lock);
669 list_add(&trg->link, &allocated_regions);
672 spin_lock(&resv->lock);
674 list_splice(&allocated_regions, &resv->region_cache);
675 resv->region_cache_count += to_allocate;
682 list_del(&rg->link);
685 return -ENOMEM;
700 * this operation and we were not able to allocate, it returns -ENOMEM.
711 spin_lock(&resv->lock);
728 resv->region_cache_count <
729 resv->adds_in_progress +
730 (actual_regions_needed - in_regions_needed)) {
734 VM_BUG_ON(t - f <= 1);
737 resv, actual_regions_needed - in_regions_needed)) {
738 return -ENOMEM;
746 resv->adds_in_progress -= in_regions_needed;
748 spin_unlock(&resv->lock);
764 * resv->adds_in_progress. This value needs to be provided to a follow up call
769 * zero. -ENOMEM is returned if a new file_region structure or cache entry
775 long chg = 0;
777 spin_lock(&resv->lock);
780 chg = add_reservation_in_range(resv, f, t, NULL, NULL,
787 return -ENOMEM;
789 resv->adds_in_progress += *out_regions_needed;
791 spin_unlock(&resv->lock);
792 return chg;
811 spin_lock(&resv->lock);
812 VM_BUG_ON(!resv->region_cache_count);
813 resv->adds_in_progress -= regions_needed;
814 spin_unlock(&resv->lock);
826 * be allocated. If the allocation fails, -ENOMEM will be returned.
828 * a region and possibly return -ENOMEM. Callers specifying
829 * t == LONG_MAX do not need to check for -ENOMEM error.
833 struct list_head *head = &resv->regions;
839 spin_lock(&resv->lock);
848 if (rg->to <= f && (rg->to != rg->from || rg->to != f))
851 if (rg->from >= t)
854 if (f > rg->from && t < rg->to) { /* Must split region */
860 resv->region_cache_count > resv->adds_in_progress) {
861 nrg = list_first_entry(&resv->region_cache,
864 list_del(&nrg->link);
865 resv->region_cache_count--;
869 spin_unlock(&resv->lock);
872 return -ENOMEM;
876 del += t - f;
878 resv, rg, t - f, false);
881 nrg->from = t;
882 nrg->to = rg->to;
886 INIT_LIST_HEAD(&nrg->link);
889 rg->to = f;
891 list_add(&nrg->link, &rg->link);
896 if (f <= rg->from && t >= rg->to) { /* Remove entire region */
897 del += rg->to - rg->from;
899 rg->to - rg->from, true);
900 list_del(&rg->link);
905 if (f <= rg->from) { /* Trim beginning of region */
907 t - rg->from, false);
909 del += t - rg->from;
910 rg->from = t;
913 rg->to - f, false);
915 del += rg->to - f;
916 rg->to = f;
920 spin_unlock(&resv->lock);
960 struct list_head *head = &resv->regions;
962 long chg = 0;
964 spin_lock(&resv->lock);
970 if (rg->to <= f)
972 if (rg->from >= t)
975 seg_from = max(rg->from, f);
976 seg_to = min(rg->to, t);
978 chg += seg_to - seg_from;
980 spin_unlock(&resv->lock);
982 return chg;
992 return ((address - vma->vm_start) >> huge_page_shift(h)) +
993 (vma->vm_pgoff >> huge_page_order(h));
1004 * vma_kernel_pagesize - Page size granularity for this VMA.
1014 if (vma->vm_ops && vma->vm_ops->pagesize)
1015 return vma->vm_ops->pagesize(vma);
1023 * architectures where it differs, an architecture-specific 'strong'
1061 return (unsigned long)vma->vm_private_data;
1067 vma->vm_private_data = (void *)value;
1077 resv_map->reservation_counter = NULL;
1078 resv_map->pages_per_hpage = 0;
1079 resv_map->css = NULL;
1081 resv_map->reservation_counter =
1082 &h_cg->rsvd_hugepage[hstate_index(h)];
1083 resv_map->pages_per_hpage = pages_per_huge_page(h);
1084 resv_map->css = &h_cg->css;
1100 kref_init(&resv_map->refs);
1101 spin_lock_init(&resv_map->lock);
1102 INIT_LIST_HEAD(&resv_map->regions);
1103 init_rwsem(&resv_map->rw_sema);
1105 resv_map->adds_in_progress = 0;
1109 * re-initialized to the proper values, to indicate that hugetlb cgroup
1110 * reservations are to be un-charged from here.
1114 INIT_LIST_HEAD(&resv_map->region_cache);
1115 list_add(&rg->link, &resv_map->region_cache);
1116 resv_map->region_cache_count = 1;
1124 struct list_head *head = &resv_map->region_cache;
1132 list_del(&rg->link);
1136 VM_BUG_ON(resv_map->adds_in_progress);
1148 * The VERY common case is inode->mapping == &inode->i_data but,
1151 return (struct resv_map *)(&inode->i_data)->private_data;
1157 if (vma->vm_flags & VM_MAYSHARE) {
1158 struct address_space *mapping = vma->vm_file->f_mapping;
1159 struct inode *inode = mapping->host;
1172 VM_BUG_ON_VMA(vma->vm_flags & VM_MAYSHARE, vma);
1180 VM_BUG_ON_VMA(vma->vm_flags & VM_MAYSHARE, vma);
1194 return !(vma->vm_flags & VM_MAYSHARE) &&
1204 * - For shared mappings this is a per-vma semaphore that may be
1210 * - For MAP_PRIVATE mappings, this is the reserve map which does
1212 * not guaranteed to succeed, even if read-only.
1214 if (vma->vm_flags & VM_MAYSHARE) {
1215 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
1217 if (vma_lock && vma_lock->vma != vma)
1218 vma->vm_private_data = NULL;
1220 vma->vm_private_data = NULL;
1225 * Called with mm->mmap_lock writer semaphore held.
1248 kref_put(&reservations->refs, resv_map_release);
1255 static bool vma_has_reserves(struct vm_area_struct *vma, long chg)
1257 if (vma->vm_flags & VM_NORESERVE) {
1259 * This address is already reserved by other process(chg == 0),
1265 * properly, so add work-around here.
1267 if (vma->vm_flags & VM_MAYSHARE && chg == 0)
1274 if (vma->vm_flags & VM_MAYSHARE) {
1280 * use. This situation is indicated if chg != 0.
1282 if (chg)
1296 * Examine the value of chg to determine if reserves
1298 * Very Subtle - The value of chg comes from a previous
1303 * account. Therefore, the meaning of chg is the same
1308 if (chg)
1324 list_move(&folio->lru, &h->hugepage_freelists[nid]);
1325 h->free_huge_pages++;
1326 h->free_huge_pages_node[nid]++;
1334 bool pin = !!(current->flags & PF_MEMALLOC_PIN);
1337 list_for_each_entry(folio, &h->hugepage_freelists[nid], lru) {
1344 list_move(&folio->lru, &h->hugepage_activelist);
1347 h->free_huge_pages--;
1348 h->free_huge_pages_node[nid]--;
1393 return h->free_huge_pages - h->resv_huge_pages;
1399 long chg)
1412 if (!vma_has_reserves(vma, chg) && !available_huge_pages(h))
1434 if (folio && !avoid_reserve && vma_has_reserves(vma, chg)) {
1436 h->resv_huge_pages--;
1449 * nodes_allowed previously, so h->next_node_to_{alloc|free} might
1481 nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed);
1482 h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed);
1488 * helper for remove_pool_huge_page() - return the previously saved
1499 nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed);
1500 h->next_nid_to_free = next_node_allowed(nid, nodes_allowed);
1509 nr_nodes--)
1515 nr_nodes--)
1517 /* used to demote non-gigantic_huge pages as well */
1525 atomic_set(&folio->_entire_mapcount, 0);
1526 atomic_set(&folio->_nr_pages_mapped, 0);
1527 atomic_set(&folio->_pincount, 0);
1531 p->flags &= ~PAGE_FLAGS_CHECK_AT_FREE;
1532 p->mapping = NULL;
1563 if (cma_release(hugetlb_cma[nid], &folio->page, 1 << order))
1659 list_del(&folio->lru);
1662 h->free_huge_pages--;
1663 h->free_huge_pages_node[nid]--;
1666 h->surplus_huge_pages--;
1667 h->surplus_huge_pages_node[nid]--;
1685 h->nr_huge_pages--;
1686 h->nr_huge_pages_node[nid]--;
1711 INIT_LIST_HEAD(&folio->lru);
1712 h->nr_huge_pages++;
1713 h->nr_huge_pages_node[nid]++;
1716 h->surplus_huge_pages++;
1717 h->surplus_huge_pages_node[nid]++;
1743 arch_clear_hugepage_flags(&folio->page);
1760 if (hugetlb_vmemmap_restore(h, &folio->page)) {
1790 * Non-gigantic pages demoted from CMA allocated gigantic pages
1798 INIT_LIST_HEAD(&folio->_deferred_list);
1799 __free_pages(&folio->page, huge_page_order(h));
1810 * freed and frees them one-by-one. As the page->mapping pointer is going
1828 node = node->next;
1829 page->mapping = NULL;
1866 if (llist_add((struct llist_node *)&folio->mapping, &hpage_freelist))
1910 __ClearPageAnonExclusive(&folio->page);
1911 folio->mapping = NULL;
1941 h->resv_huge_pages++;
1947 } else if (h->surplus_huge_pages_node[nid]) {
1953 arch_clear_hugepage_flags(&folio->page);
1965 h->nr_huge_pages++;
1966 h->nr_huge_pages_node[nid]++;
1971 hugetlb_vmemmap_optimize(h, &folio->page);
1972 INIT_LIST_HEAD(&folio->lru);
2000 * boot, it's safer to be consistent with the not-gigantic
2038 set_compound_head(p, &folio->page);
2043 atomic_set(&folio->_entire_mapcount, -1);
2044 atomic_set(&folio->_nr_pages_mapped, 0);
2045 atomic_set(&folio->_pincount, 0);
2103 compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
2105 compound_idx = page - page_head;
2199 * Free pages and try again - ONCE!
2258 if ((!acct_surplus || h->surplus_huge_pages_node[node]) &&
2259 !list_empty(&h->hugepage_freelists[node])) {
2260 page = list_entry(h->hugepage_freelists[node].next,
2273 * nothing for in-use hugepages and non-hugepages.
2276 * -ENOMEM: failed to allocate vmemmap pages to free the freed hugepages
2280 * -EBUSY: failed to dissolved free hugepages or the hugepage is in-use
2287 int rc = -EBUSY;
2315 * Theoretically, we should return -EBUSY when we
2326 h->max_huge_pages--;
2337 rc = hugetlb_vmemmap_restore(h, &folio->page);
2343 h->max_huge_pages++;
2399 if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages)
2415 if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
2422 h->surplus_huge_pages++;
2423 h->surplus_huge_pages_node[folio_nid(folio)]++;
2531 if (mpol->mode == MPOL_BIND &&
2533 cpuset_nodemask_valid_mems_allowed(&mpol->nodes)))
2534 return &mpol->nodes;
2556 needed = (h->resv_huge_pages + delta) - h->free_huge_pages;
2558 h->resv_huge_pages += delta;
2564 ret = -ENOMEM;
2581 list_add(&folio->lru, &surplus_list);
2591 needed = (h->resv_huge_pages + delta) -
2592 (h->free_huge_pages + allocated);
2612 h->resv_huge_pages += delta;
2617 if ((--needed) < 0)
2653 h->resv_huge_pages -= unused_resv_pages;
2660 * by pre-allocated pages. Only free surplus pages.
2662 nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
2670 * on-line nodes with memory and will handle the hstate accounting.
2672 while (nr_pages--) {
2677 list_add(&page->lru, &page_list);
2756 if (vma->vm_flags & VM_MAYSHARE) {
2766 if (vma->vm_flags & VM_MAYSHARE) {
2779 if (vma->vm_flags & VM_MAYSHARE || mode == VMA_DEL_RESV)
2788 * Subtle - The reserve map for private mappings has the
2908 if (!(vma->vm_flags & VM_MAYSHARE))
2927 * alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve
2951 return -ENOMEM;
2966 * Fail with -EBUSY if not possible.
2970 ret = isolated ? 0 : -EBUSY;
3021 int ret = -EBUSY;
3039 * alloc_contig_range and them. Return -ENOMEM as this has the effect
3043 return -ENOMEM;
3073 return ERR_PTR(-ENOMEM);
3086 return ERR_PTR(-ENOSPC);
3130 h->resv_huge_pages--;
3132 list_add(&folio->lru, &h->hugepage_activelist);
3164 hugetlb_acct_memory(h, -rsv_adjust);
3184 return ERR_PTR(-ENOSPC);
3219 INIT_LIST_HEAD(&m->list);
3220 list_add(&m->list, &huge_boot_pages);
3221 m->hstate = h;
3236 struct hstate *h = m->hstate;
3252 * other side-effects, like CommitLimit going negative.
3263 for (i = 0; i < h->max_huge_pages_node[nid]; ++i) {
3279 if (i == h->max_huge_pages_node[nid])
3284 h->max_huge_pages_node[nid], buf, nid, i);
3285 h->max_huge_pages -= (h->max_huge_pages_node[nid] - i);
3286 h->max_huge_pages_node[nid] = i;
3303 if (h->max_huge_pages_node[i] > 0) {
3315 * Bit mask controlling how hard we retry per-node allocations.
3327 /* bit mask controlling how hard we retry per-node allocations */
3331 for (i = 0; i < h->max_huge_pages; ++i) {
3341 if (i < h->max_huge_pages) {
3346 h->max_huge_pages, buf, i);
3347 h->max_huge_pages = i;
3363 * h->demote_order is initially 0.
3364 * - We can not demote gigantic pages if runtime freeing
3366 * - If CMA allocation is possible, we can not demote
3371 if (hugetlb_cma_size && h->order <= HUGETLB_PAGE_ORDER)
3376 if (h2->order < h->order &&
3377 h2->order > h->demote_order)
3378 h->demote_order = h2->order;
3391 pr_info("HugeTLB: registered %s page size, pre-allocated %ld pages\n",
3392 buf, h->free_huge_pages);
3414 struct list_head *freel = &h->hugepage_freelists[i];
3416 if (count >= h->nr_huge_pages)
3421 list_add(&page->lru, &page_list);
3438 * Increment or decrement surplus_huge_pages. Keep node-specific counters
3439 * balanced by operating on them in a round-robin fashion.
3448 VM_BUG_ON(delta != -1 && delta != 1);
3452 if (h->surplus_huge_pages_node[node])
3457 if (h->surplus_huge_pages_node[node] <
3458 h->nr_huge_pages_node[node])
3465 h->surplus_huge_pages += delta;
3466 h->surplus_huge_pages_node[node] += delta;
3470 #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
3480 * Bit mask controlling how hard we retry per-node allocations.
3487 return -ENOMEM;
3493 mutex_lock(&h->resize_lock);
3506 count += h->nr_huge_pages - h->nr_huge_pages_node[nid];
3527 mutex_unlock(&h->resize_lock);
3529 return -EINVAL;
3545 while (h->surplus_huge_pages && count > persistent_huge_pages(h)) {
3546 if (!adjust_pool_surplus(h, nodes_allowed, -1))
3567 /* Bail for signals. Probably ctrl-c from user */
3587 min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages;
3599 list_add(&page->lru, &page_list);
3612 h->max_huge_pages = persistent_huge_pages(h);
3614 mutex_unlock(&h->resize_lock);
3629 target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
3634 rc = hugetlb_vmemmap_restore(h, &folio->page);
3654 * Note that we already hold h->resize_lock. To prevent deadlock,
3657 mutex_lock(&target_hstate->resize_lock);
3664 target_hstate->order);
3666 prep_compound_page(subpage, target_hstate->order);
3671 mutex_unlock(&target_hstate->resize_lock);
3679 h->max_huge_pages--;
3680 target_hstate->max_huge_pages +=
3695 if (!h->demote_order) {
3697 return -EINVAL; /* internal error */
3701 list_for_each_entry(folio, &h->hugepage_freelists[node], lru) {
3710 * Return -EBUSY so that caller will not retry.
3712 return -EBUSY;
3752 nr_huge_pages = h->nr_huge_pages;
3754 nr_huge_pages = h->nr_huge_pages_node[nid];
3767 return -EINVAL;
3825 * hstate attribute for optionally mempolicy-based constraint on persistent
3848 return sysfs_emit(buf, "%lu\n", h->nr_overcommit_huge_pages);
3859 return -EINVAL;
3866 h->nr_overcommit_huge_pages = input;
3882 free_huge_pages = h->free_huge_pages;
3884 free_huge_pages = h->free_huge_pages_node[nid];
3894 return sysfs_emit(buf, "%lu\n", h->resv_huge_pages);
3907 surplus_huge_pages = h->surplus_huge_pages;
3909 surplus_huge_pages = h->surplus_huge_pages_node[nid];
3938 mutex_lock(&h->resize_lock);
3947 nr_available = h->free_huge_pages_node[nid];
3949 nr_available = h->free_huge_pages;
3950 nr_available -= h->resv_huge_pages;
3958 nr_demote--;
3962 mutex_unlock(&h->resize_lock);
3974 unsigned long demote_size = (PAGE_SIZE << h->demote_order) / SZ_1K;
3991 return -EINVAL;
3992 demote_order = demote_hstate->order;
3994 return -EINVAL;
3998 if (demote_order >= h->order)
3999 return -EINVAL;
4002 mutex_lock(&h->resize_lock);
4003 h->demote_order = demote_order;
4004 mutex_unlock(&h->resize_lock);
4043 hstate_kobjs[hi] = kobject_create_and_add(h->name, parent);
4045 return -ENOMEM;
4054 if (h->demote_order) {
4058 pr_warn("HugeTLB unable to create demote interfaces for %s\n", h->name);
4073 * node_hstate/s - associate per node hstate attributes, via their kobjects,
4100 * kobj_to_node_hstate - lookup global hstate for node device hstate attr kobj.
4101 * Returns node id via non-NULL nidp.
4111 if (nhs->hstate_kobjs[i] == kobj) {
4124 * No-op if no hstate attributes attached.
4129 struct node_hstate *nhs = &node_hstates[node->dev.id];
4131 if (!nhs->hugepages_kobj)
4136 struct kobject *hstate_kobj = nhs->hstate_kobjs[idx];
4140 if (h->demote_order)
4144 nhs->hstate_kobjs[idx] = NULL;
4147 kobject_put(nhs->hugepages_kobj);
4148 nhs->hugepages_kobj = NULL;
4154 * No-op if attributes already registered.
4159 struct node_hstate *nhs = &node_hstates[node->dev.id];
4165 if (nhs->hugepages_kobj)
4168 nhs->hugepages_kobj = kobject_create_and_add("hugepages",
4169 &node->dev.kobj);
4170 if (!nhs->hugepages_kobj)
4174 err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
4175 nhs->hstate_kobjs,
4179 h->name, node->dev.id);
4188 * devices of nodes that have memory. All on-line nodes should have
4204 *nidp = -1;
4233 pr_err("HugeTLB: Unable to add hstate %s", h->name);
4257 pr_warn("HugeTLB: huge pages not supported, ignoring associated command-line parameters\n");
4338 __mutex_init(&h->resize_lock, "resize mutex", &h->resize_key);
4339 h->order = order;
4340 h->mask = ~(huge_page_size(h) - 1);
4342 INIT_LIST_HEAD(&h->hugepage_freelists[i]);
4343 INIT_LIST_HEAD(&h->hugepage_activelist);
4344 h->next_nid_to_alloc = first_memory_node;
4345 h->next_nid_to_free = first_memory_node;
4346 snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
4364 parsed_hstate->max_huge_pages = 0;
4365 memset(parsed_hstate->max_huge_pages_node, 0,
4366 sizeof(parsed_hstate->max_huge_pages_node));
4401 mhp = &parsed_hstate->max_huge_pages;
4428 parsed_hstate->max_huge_pages_node[node] = tmp;
4507 hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT);
4535 hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT);
4566 unsigned int *array = h->free_huge_pages_node;
4600 unsigned long tmp = h->max_huge_pages;
4604 return -EOPNOTSUPP;
4643 return -EOPNOTSUPP;
4645 tmp = h->nr_overcommit_huge_pages;
4648 return -EINVAL;
4657 h->nr_overcommit_huge_pages = tmp;
4713 unsigned long count = h->nr_huge_pages;
4725 h->free_huge_pages,
4726 h->resv_huge_pages,
4727 h->surplus_huge_pages,
4745 nid, h->nr_huge_pages_node[nid],
4746 nid, h->free_huge_pages_node[nid],
4747 nid, h->surplus_huge_pages_node[nid]);
4760 h->nr_huge_pages_node[nid],
4761 h->free_huge_pages_node[nid],
4762 h->surplus_huge_pages_node[nid],
4769 K(atomic_long_read(&mm->hugetlb_usage)));
4779 nr_total_pages += h->nr_huge_pages * pages_per_huge_page(h);
4785 int ret = -ENOMEM;
4826 return_unused_surplus_pages(h, (unsigned long) -delta);
4848 kref_get(&resv->refs);
4857 if (vma->vm_flags & VM_MAYSHARE) {
4858 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
4861 if (vma_lock->vma != vma) {
4862 vma->vm_private_data = NULL;
4885 start = vma_hugecache_offset(h, vma, vma->vm_start);
4886 end = vma_hugecache_offset(h, vma, vma->vm_end);
4888 reserve = (end - start) - region_count(resv, start, end);
4896 hugetlb_acct_memory(h, -gbl_reserve);
4899 kref_put(&resv->refs, resv_map_release);
4905 return -EINVAL;
4908 * PMD sharing is only possible for PUD_SIZE-aligned address ranges
4921 if (floor >= vma->vm_start && ceil <= vma->vm_end)
4935 * handle_mm_fault() to try to instantiate regular-sized pages in the
4968 vma->vm_page_prot)));
4971 vma->vm_page_prot));
4974 entry = arch_make_huge_pte(entry, shift, vma->vm_flags);
5019 pte_t newpte = make_huge_pte(vma, &new_folio->page, 1);
5025 set_huge_pte_at(vma->vm_mm, addr, ptep, newpte, sz);
5026 hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
5037 bool cow = is_cow_mapping(src_vma->vm_flags);
5047 src_vma->vm_start,
5048 src_vma->vm_end);
5051 raw_write_seqcount_begin(&src->write_protect_seq);
5063 for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) {
5072 ret = -ENOMEM;
5140 * When pre-allocating the page or copying data, we
5145 page_dup_file_rmap(&pte_folio->page, true);
5146 } else if (page_try_dup_anon_rmap(&pte_folio->page,
5211 raw_write_seqcount_end(&src->write_protect_seq);
5225 struct mm_struct *mm = vma->vm_mm;
5253 struct address_space *mapping = vma->vm_file->f_mapping;
5255 struct mm_struct *mm = vma->vm_mm;
5303 flush_hugetlb_tlb_range(vma, old_end - len, old_end);
5308 return len + old_addr - old_end;
5315 struct mm_struct *mm = vma->vm_mm;
5367 * If the pte was wr-protected by uffd-wp in any of the
5369 * drop the uffd-wp bit in this zap, then replace the
5406 /* Leave a uffd-wp pte marker if needed */
5445 if (!vma->vm_file) /* hugetlbfs_file_mmap error */
5450 if (vma->vm_file)
5451 i_mmap_lock_write(vma->vm_file->f_mapping);
5457 zap_flags_t zap_flags = details ? details->zap_flags : 0;
5459 if (!vma->vm_file) /* hugetlbfs_file_mmap error */
5477 if (vma->vm_file)
5478 i_mmap_unlock_write(vma->vm_file->f_mapping);
5488 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
5492 tlb_gather_mmu(&tlb, vma->vm_mm);
5519 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) +
5520 vma->vm_pgoff;
5521 mapping = vma->vm_file->f_mapping;
5529 vma_interval_tree_foreach(iter_vma, &mapping->i_mmap, pgoff, pgoff) {
5539 if (iter_vma->vm_flags & VM_MAYSHARE)
5545 * areas. This is because a future no-page fault on this VMA
5577 * Never handle CoW for uffd-wp protected pages. It should be only
5578 * handled when the uffd-wp protection is removed.
5582 * uffd-wp bit first.
5588 * hugetlb does not support FOLL_FORCE-style write faults that keep the
5591 if (WARN_ON_ONCE(!unshare && !(vma->vm_flags & VM_WRITE)))
5595 if (vma->vm_flags & VM_MAYSHARE) {
5606 * If no-one else is actually using this page, we're the exclusive
5610 if (!PageAnonExclusive(&old_folio->page))
5611 page_move_anon_rmap(&old_folio->page, vma);
5619 PageAnonExclusive(&old_folio->page), &old_folio->page);
5652 struct address_space *mapping = vma->vm_file->f_mapping;
5671 unmap_ref_private(mm, vma, &old_folio->page, haddr);
5681 * race occurs while re-acquiring page table
5718 pte_t newpte = make_huge_pte(vma, &new_folio->page, !unshare);
5722 page_remove_rmap(&old_folio->page, vma, true);
5756 struct address_space *mapping = vma->vm_file->f_mapping;
5770 struct inode *inode = mapping->host;
5785 * by non-hugetlbfs specific code paths.
5789 spin_lock(&inode->i_lock);
5790 inode->i_blocks += blocks_per_huge_page(h);
5791 spin_unlock(&inode->i_lock);
5872 current->pid);
5883 size = i_size_read(mapping->host) >> huge_page_shift(h);
5890 * without pgtable lock, we need to re-test under
5893 * either changed or during-changing ptes and retry
5935 clear_huge_page(&folio->page, address, pages_per_huge_page(h));
5939 if (vma->vm_flags & VM_MAYSHARE) {
5943 * err can't be -EEXIST which implies someone
5995 if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
6013 page_dup_file_rmap(&folio->page, true);
6014 new_pte = make_huge_pte(vma, &folio->page, ((vma->vm_flags & VM_WRITE)
6015 && (vma->vm_flags & VM_SHARED)));
6017 * If this pte was previously wr-protected, keep it wr-protected even
6025 if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
6068 return hash & (num_fault_mutexes - 1);
6107 mapping = vma->vm_file->f_mapping;
6182 !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(entry)) {
6201 /* Handle userfault-wp first, before trying to lock more pages */
6286 struct mm_struct *dst_mm = dst_vma->vm_mm;
6290 struct address_space *mapping = dst_vma->vm_file->f_mapping;
6293 int vm_shared = dst_vma->vm_flags & VM_SHARED;
6296 int ret = -ENOMEM;
6307 return -EEXIST;
6314 /* No need to invalidate - it was non-present before */
6322 ret = -EFAULT;
6329 * a non-missing case. Return -EEXIST.
6333 ret = -EEXIST;
6339 ret = -ENOMEM;
6348 ret = -ENOENT;
6360 ret = -ENOMEM;
6374 ret = -EEXIST;
6382 ret = -ENOMEM;
6404 size = i_size_read(mapping->host) >> huge_page_shift(h);
6405 ret = -EFAULT;
6423 ret = -EIO;
6429 * registered, we firstly wr-protect a none pte which has no page cache
6432 ret = -EEXIST;
6437 page_dup_file_rmap(&folio->page, true);
6442 * For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY
6448 writable = dst_vma->vm_flags & VM_WRITE;
6450 _dst_pte = make_huge_pte(dst_vma, &folio->page, writable);
6467 /* No need to invalidate - it was non-present before */
6495 struct mm_struct *mm = vma->vm_mm;
6520 page = ERR_PTR(-EMLINK);
6528 * Note that page may be a sub-page, and with vmemmap
6543 *page_mask = (1U << huge_page_order(h)) - 1;
6556 page = ERR_PTR(-EFAULT);
6565 struct mm_struct *mm = vma->vm_mm;
6591 i_mmap_lock_write(vma->vm_file->f_mapping);
6602 * Userfaultfd wr-protect requires pgtable
6603 * pre-allocations to install pte markers.
6607 pages = -ENOMEM;
6614 * When uffd-wp is enabled on the vma, unshare
6658 /* Safe to modify directly (non-present->none). */
6666 pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
6676 /* Safe to modify directly (none->non-present). */
6701 i_mmap_unlock_write(vma->vm_file->f_mapping);
6705 return pages > 0 ? (pages << h->order) : pages;
6714 long chg = -1, add = -1;
6744 * to reserve the full area even if read-only as mprotect() may be
6745 * called to make the mapping read-write. Assume !vma is a shm mapping
6747 if (!vma || vma->vm_flags & VM_MAYSHARE) {
6755 chg = region_chg(resv_map, from, to, &regions_needed);
6762 chg = to - from;
6768 if (chg < 0)
6772 chg * pages_per_huge_page(h), &h_cg) < 0)
6775 if (vma && !(vma->vm_flags & VM_MAYSHARE) && h_cg) {
6787 gbl_reserve = hugepage_subpool_get_pages(spool, chg);
6803 * the reservation was consumed. Private mappings are per-VMA and
6809 if (!vma || vma->vm_flags & VM_MAYSHARE) {
6813 hugetlb_acct_memory(h, -gbl_reserve);
6815 } else if (unlikely(chg > add)) {
6827 * reference to h_cg->css. See comment below for detail.
6831 (chg - add) * pages_per_huge_page(h), h_cg);
6834 chg - add);
6835 hugetlb_acct_memory(h, -rsv_adjust);
6839 * h_cg->css. So we should release the reference held
6849 /* put back original number of pages, chg */
6850 (void)hugepage_subpool_put_pages(spool, chg);
6853 chg * pages_per_huge_page(h), h_cg);
6856 if (!vma || vma->vm_flags & VM_MAYSHARE)
6860 if (chg >= 0 && add < 0)
6863 kref_put(&resv_map->refs, resv_map_release);
6874 long chg = 0;
6883 chg = region_del(resv_map, start, end);
6889 if (chg < 0)
6890 return chg;
6893 spin_lock(&inode->i_lock);
6894 inode->i_blocks -= (blocks_per_huge_page(h) * freed);
6895 spin_unlock(&inode->i_lock);
6901 * Note that !resv_map implies freed == 0. So (chg - freed)
6904 gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed));
6905 hugetlb_acct_memory(h, -gbl_reserve);
6915 unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
6916 svma->vm_start;
6921 unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED_MASK;
6922 unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED_MASK;
6933 !svma->vm_private_data)
6951 if (!(vma->vm_flags & VM_MAYSHARE))
6953 if (!vma->vm_private_data) /* vma lock required for sharing */
6968 unsigned long v_start = ALIGN(vma->vm_start, PUD_SIZE),
6969 v_end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);
6975 if (!(vma->vm_flags & VM_MAYSHARE) || !(v_end > v_start) ||
6992 * pud has to be populated inside the same i_mmap_rwsem section - otherwise
6999 struct address_space *mapping = vma->vm_file->f_mapping;
7000 pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
7001 vma->vm_pgoff;
7008 vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
7026 spin_lock(&mm->page_table_lock);
7034 spin_unlock(&mm->page_table_lock);
7057 i_mmap_assert_write_locked(vma->vm_file->f_mapping);
7131 * huge_pte_offset() - Walk the page table to resolve the hugepage
7156 /* must be pud huge, non-present or none */
7163 /* must be pmd huge, non-present or none */
7169 * page in a page table page mapping size. Used to skip non-present
7179 return P4D_SIZE - PUD_SIZE;
7181 return PUD_SIZE - PMD_SIZE;
7193 return PUD_SIZE - PMD_SIZE;
7216 list_move_tail(&folio->lru, list);
7235 ret = -EBUSY;
7256 list_move_tail(&folio->lru, &(folio_hstate(folio))->hugepage_activelist);
7266 set_page_owner_migrate_reason(&new_folio->page, reason);
7272 * the temporary status.
7274 * Also note that we have to transfer the per-node surplus state
7276 * the per-node's.
7287 * There is no need to transfer the per-node surplus state
7293 if (h->surplus_huge_pages_node[old_nid]) {
7294 h->surplus_huge_pages_node[old_nid]--;
7295 h->surplus_huge_pages_node[new_nid]++;
7307 struct mm_struct *mm = vma->vm_mm;
7313 if (!(vma->vm_flags & VM_MAYSHARE))
7328 i_mmap_lock_write(vma->vm_file->f_mapping);
7338 i_mmap_unlock_write(vma->vm_file->f_mapping);
7353 hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
7354 ALIGN_DOWN(vma->vm_end, PUD_SIZE));
7416 hugetlb_cma_size -= hugetlb_cma_size_in_node[nid];
7424 hugetlb_cma_size -= hugetlb_cma_size_in_node[nid];
7463 size = min(per_node, hugetlb_cma_size - reserved);