Lines Matching +full:non +full:- +full:disruptive

1 // SPDX-License-Identifier: GPL-2.0-only
5 * SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
50 #include "pgalloc-track.h"
53 static unsigned int __ro_after_init ioremap_max_page_shift = BITS_PER_LONG - 1;
104 return -ENOMEM; in vmap_pte_range()
136 if ((end - addr) != PMD_SIZE) in vmap_try_huge_pmd()
160 return -ENOMEM; in vmap_pmd_range()
171 return -ENOMEM; in vmap_pmd_range()
172 } while (pmd++, phys_addr += (next - addr), addr = next, addr != end); in vmap_pmd_range()
186 if ((end - addr) != PUD_SIZE) in vmap_try_huge_pud()
210 return -ENOMEM; in vmap_pud_range()
222 return -ENOMEM; in vmap_pud_range()
223 } while (pud++, phys_addr += (next - addr), addr = next, addr != end); in vmap_pud_range()
237 if ((end - addr) != P4D_SIZE) in vmap_try_huge_p4d()
261 return -ENOMEM; in vmap_p4d_range()
273 return -ENOMEM; in vmap_p4d_range()
274 } while (p4d++, phys_addr += (next - addr), addr = next, addr != end); in vmap_p4d_range()
299 } while (pgd++, phys_addr += (next - addr), addr = next, addr != end); in vmap_range_noflush()
409 * or be re-mapped for something else, if TLB flushes are being delayed or
443 * vunmap_range - unmap kernel virtual addresses
445 * @end: end of the VM area to unmap (non-inclusive)
448 * caches. Any subsequent access to the address before it has been re-mapped
471 return -ENOMEM; in vmap_pages_pte_range()
476 return -EBUSY; in vmap_pages_pte_range()
478 return -ENOMEM; in vmap_pages_pte_range()
480 return -EINVAL; in vmap_pages_pte_range()
498 return -ENOMEM; in vmap_pages_pmd_range()
502 return -ENOMEM; in vmap_pages_pmd_range()
516 return -ENOMEM; in vmap_pages_pud_range()
520 return -ENOMEM; in vmap_pages_pud_range()
534 return -ENOMEM; in vmap_pages_p4d_range()
538 return -ENOMEM; in vmap_pages_p4d_range()
582 unsigned int i, nr = (end - addr) >> PAGE_SHIFT; in __vmap_pages_range_noflush()
590 for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) { in __vmap_pages_range_noflush()
617 * vmap_pages_range - map pages to a kernel virtual address
619 * @end: end of the VM area to map (non-inclusive)
626 * 0 on success, -errno on failure.
641 * ARM, x86-64 and sparc64 put modules in a special place, in is_vmalloc_or_module_addr()
716 * Map a vmalloc()-space virtual address to the physical page frame number.
757 * This augment red-black tree represents the free vmap space.
758 * All vmap_area objects in this tree are sorted by va->va_start
763 * of its sub-tree, right or left. Therefore it is possible to
778 return (va->va_end - va->va_start); in va_size()
787 return va ? va->subtree_max_size : 0; in get_subtree_max_size()
817 if (tmp->va_end > addr) { in find_vmap_area_exceed_addr()
819 if (tmp->va_start <= addr) in find_vmap_area_exceed_addr()
822 n = n->rb_left; in find_vmap_area_exceed_addr()
824 n = n->rb_right; in find_vmap_area_exceed_addr()
832 struct rb_node *n = root->rb_node; in __find_vmap_area()
840 if (addr < va->va_start) in __find_vmap_area()
841 n = n->rb_left; in __find_vmap_area()
842 else if (addr >= va->va_end) in __find_vmap_area()
843 n = n->rb_right; in __find_vmap_area()
868 link = &root->rb_node; in find_va_links()
880 * it link, where the new va->rb_node will be attached to. in find_va_links()
890 if (va->va_end <= tmp_va->va_start) in find_va_links()
891 link = &(*link)->rb_left; in find_va_links()
892 else if (va->va_start >= tmp_va->va_end) in find_va_links()
893 link = &(*link)->rb_right; in find_va_links()
895 WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx\n", in find_va_links()
896 va->va_start, va->va_end, tmp_va->va_start, tmp_va->va_end); in find_va_links()
902 *parent = &tmp_va->rb_node; in find_va_links()
913 * The red-black tree where we try to find VA neighbors in get_va_next_sibling()
920 list = &rb_entry(parent, struct vmap_area, rb_node)->list; in get_va_next_sibling()
921 return (&parent->rb_right == link ? list->next : list); in get_va_next_sibling()
934 head = &rb_entry(parent, struct vmap_area, rb_node)->list; in __link_va()
935 if (&parent->rb_right != link) in __link_va()
936 head = head->prev; in __link_va()
939 /* Insert to the rb-tree */ in __link_va()
940 rb_link_node(&va->rb_node, parent, link); in __link_va()
944 * to the tree. We do not set va->subtree_max_size to in __link_va()
953 rb_insert_augmented(&va->rb_node, in __link_va()
955 va->subtree_max_size = 0; in __link_va()
957 rb_insert_color(&va->rb_node, root); in __link_va()
960 /* Address-sort this list */ in __link_va()
961 list_add(&va->list, head); in __link_va()
983 if (WARN_ON(RB_EMPTY_NODE(&va->rb_node))) in __unlink_va()
987 rb_erase_augmented(&va->rb_node, in __unlink_va()
990 rb_erase(&va->rb_node, root); in __unlink_va()
992 list_del_init(&va->list); in __unlink_va()
993 RB_CLEAR_NODE(&va->rb_node); in __unlink_va()
1016 get_subtree_max_size(va->rb_node.rb_left), in compute_subtree_max_size()
1017 get_subtree_max_size(va->rb_node.rb_right)); in compute_subtree_max_size()
1028 if (computed_size != va->subtree_max_size) in augment_tree_propagate_check()
1030 va_size(va), va->subtree_max_size); in augment_tree_propagate_check()
1042 * - After VA has been inserted to the tree(free path);
1043 * - After VA has been shrunk(allocation path);
1044 * - After VA has been increased(merging path).
1050 * 4--8
1054 * 2--2 8--8
1060 * node becomes 4--6.
1070 free_vmap_area_rb_augment_cb_propagate(&va->rb_node, NULL); in augment_tree_propagate_from()
1109 * Merge de-allocated chunk of VA memory with previous
1147 * |<------VA------>|<-----Next----->| in __merge_or_add_vmap_area()
1153 if (sibling->va_start == va->va_end) { in __merge_or_add_vmap_area()
1154 sibling->va_start = va->va_start; in __merge_or_add_vmap_area()
1168 * |<-----Prev----->|<------VA------>| in __merge_or_add_vmap_area()
1172 if (next->prev != head) { in __merge_or_add_vmap_area()
1173 sibling = list_entry(next->prev, struct vmap_area, list); in __merge_or_add_vmap_area()
1174 if (sibling->va_end == va->va_start) { in __merge_or_add_vmap_area()
1185 sibling->va_end = va->va_end; in __merge_or_add_vmap_area()
1227 if (va->va_start > vstart) in is_within_this_va()
1228 nva_start_addr = ALIGN(va->va_start, align); in is_within_this_va()
1237 return (nva_start_addr + size <= va->va_end); in is_within_this_va()
1256 node = root->rb_node; in find_vmap_lowest_match()
1259 length = adjust_search_size ? size + align - 1 : size; in find_vmap_lowest_match()
1264 if (get_subtree_max_size(node->rb_left) >= length && in find_vmap_lowest_match()
1265 vstart < va->va_start) { in find_vmap_lowest_match()
1266 node = node->rb_left; in find_vmap_lowest_match()
1273 * sub-tree if it does not have a free block that is in find_vmap_lowest_match()
1276 if (get_subtree_max_size(node->rb_right) >= length) { in find_vmap_lowest_match()
1277 node = node->rb_right; in find_vmap_lowest_match()
1282 * OK. We roll back and find the first right sub-tree, in find_vmap_lowest_match()
1292 if (get_subtree_max_size(node->rb_right) >= length && in find_vmap_lowest_match()
1293 vstart <= va->va_start) { in find_vmap_lowest_match()
1297 * to enter same sub-tree after it has already been checked in find_vmap_lowest_match()
1300 vstart = va->va_start + 1; in find_vmap_lowest_match()
1301 node = node->rb_right; in find_vmap_lowest_match()
1365 if (nva_start_addr < va->va_start || in classify_va_fit_type()
1366 nva_start_addr + size > va->va_end) in classify_va_fit_type()
1370 if (va->va_start == nva_start_addr) { in classify_va_fit_type()
1371 if (va->va_end == nva_start_addr + size) in classify_va_fit_type()
1375 } else if (va->va_end == nva_start_addr + size) { in classify_va_fit_type()
1398 * |---------------| in adjust_va_to_fit_type()
1408 * |-------|-------| in adjust_va_to_fit_type()
1410 va->va_start += size; in adjust_va_to_fit_type()
1417 * |-------|-------| in adjust_va_to_fit_type()
1419 va->va_end = nva_start_addr; in adjust_va_to_fit_type()
1426 * |---|-------|---| in adjust_va_to_fit_type()
1431 * For percpu allocator we do not do any pre-allocation in adjust_va_to_fit_type()
1457 return -1; in adjust_va_to_fit_type()
1463 lva->va_start = va->va_start; in adjust_va_to_fit_type()
1464 lva->va_end = nva_start_addr; in adjust_va_to_fit_type()
1469 va->va_start = nva_start_addr + size; in adjust_va_to_fit_type()
1471 return -1; in adjust_va_to_fit_type()
1478 insert_vmap_area_augment(lva, &va->rb_node, root, head); in adjust_va_to_fit_type()
1507 if (align <= PAGE_SIZE || (align > PAGE_SIZE && (vend - vstart) == size)) in __alloc_vmap_area()
1514 if (va->va_start > vstart) in __alloc_vmap_area()
1515 nva_start_addr = ALIGN(va->va_start, align); in __alloc_vmap_area()
1565 * We do it in non-atomic context, thus it allows us to use more in preload_this_cpu_lock()
1595 return ERR_PTR(-EINVAL); in alloc_vmap_area()
1598 return ERR_PTR(-EBUSY); in alloc_vmap_area()
1605 return ERR_PTR(-ENOMEM); in alloc_vmap_area()
1611 kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask); in alloc_vmap_area()
1628 va->va_start = addr; in alloc_vmap_area()
1629 va->va_end = addr + size; in alloc_vmap_area()
1630 va->vm = NULL; in alloc_vmap_area()
1631 va->flags = va_flags; in alloc_vmap_area()
1637 BUG_ON(!IS_ALIGNED(va->va_start, align)); in alloc_vmap_area()
1638 BUG_ON(va->va_start < vstart); in alloc_vmap_area()
1639 BUG_ON(va->va_end > vend); in alloc_vmap_area()
1669 return ERR_PTR(-EBUSY); in alloc_vmap_area()
1718 /* for per-CPU blocks */
1722 * Purges all lazily-freed vmap areas.
1743 struct vmap_area, list)->va_start); in __purge_vmap_area_lazy()
1747 struct vmap_area, list)->va_end); in __purge_vmap_area_lazy()
1754 unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT; in __purge_vmap_area_lazy()
1755 unsigned long orig_start = va->va_start; in __purge_vmap_area_lazy()
1756 unsigned long orig_end = va->va_end; in __purge_vmap_area_lazy()
1759 * Finally insert or merge lazily-freed area. It is in __purge_vmap_area_lazy()
1771 va->va_start, va->va_end); in __purge_vmap_area_lazy()
1820 unsigned long va_start = va->va_start; in free_vmap_area_noflush()
1823 if (WARN_ON_ONCE(!list_empty(&va->list))) in free_vmap_area_noflush()
1826 nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >> in free_vmap_area_noflush()
1849 flush_cache_vunmap(va->va_start, va->va_end); in free_unmap_vmap_area()
1850 vunmap_range_noflush(va->va_start, va->va_end); in free_unmap_vmap_area()
1852 flush_tlb_kernel_range(va->va_start, va->va_end); in free_unmap_vmap_area()
1889 * to #define VMALLOC_SPACE (VMALLOC_END-VMALLOC_START). Guess
1913 * regular operations: Purge if vb->free is less than 1/4 of the capacity.
1918 #define VMAP_BLOCK 0x2 /* mark out the vmap_block sub-type*/
1927 * be allocated. If it is an issue, we can use rb-tree
1952 * A per-cpu vmap_block_queue is used in both ways, to serialize
1955 * overload it, since we already have the per-cpu array which is
1969 * |------|------|------|------|------|------|...<vmap address space>
1972 * - CPU_1 invokes vm_unmap_ram(6), 6 belongs to CPU0 zone, thus
1973 * it access: CPU0/INDEX0 -> vmap_blocks -> xa_lock;
1975 * - CPU_2 invokes vm_unmap_ram(11), 11 belongs to CPU1 zone, thus
1976 * it access: CPU1/INDEX1 -> vmap_blocks -> xa_lock;
1978 * - CPU_0 invokes vm_unmap_ram(20), 20 belongs to CPU2 zone, thus
1979 * it access: CPU2/INDEX2 -> vmap_blocks -> xa_lock.
1992 * if an index points on it which is nr_cpu_ids - 1. in addr_to_vb_xa()
2009 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1); in addr_to_vb_idx()
2024 * new_vmap_block - allocates new vmap_block and occupies 2^order pages in this
2029 * Return: virtual address in a newly allocated block or ERR_PTR(-errno)
2046 return ERR_PTR(-ENOMEM); in new_vmap_block()
2057 vaddr = vmap_block_vaddr(va->va_start, 0); in new_vmap_block()
2058 spin_lock_init(&vb->lock); in new_vmap_block()
2059 vb->va = va; in new_vmap_block()
2062 bitmap_zero(vb->used_map, VMAP_BBMAP_BITS); in new_vmap_block()
2063 vb->free = VMAP_BBMAP_BITS - (1UL << order); in new_vmap_block()
2064 vb->dirty = 0; in new_vmap_block()
2065 vb->dirty_min = VMAP_BBMAP_BITS; in new_vmap_block()
2066 vb->dirty_max = 0; in new_vmap_block()
2067 bitmap_set(vb->used_map, 0, (1UL << order)); in new_vmap_block()
2068 INIT_LIST_HEAD(&vb->free_list); in new_vmap_block()
2069 vb->cpu = raw_smp_processor_id(); in new_vmap_block()
2071 xa = addr_to_vb_xa(va->va_start); in new_vmap_block()
2072 vb_idx = addr_to_vb_idx(va->va_start); in new_vmap_block()
2081 * rather than vb->cpu due to task migration, which in new_vmap_block()
2086 vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu); in new_vmap_block()
2087 spin_lock(&vbq->lock); in new_vmap_block()
2088 list_add_tail_rcu(&vb->free_list, &vbq->free); in new_vmap_block()
2089 spin_unlock(&vbq->lock); in new_vmap_block()
2099 xa = addr_to_vb_xa(vb->va->va_start); in free_vmap_block()
2100 tmp = xa_erase(xa, addr_to_vb_idx(vb->va->va_start)); in free_vmap_block()
2104 unlink_va(vb->va, &vmap_area_root); in free_vmap_block()
2107 free_vmap_area_noflush(vb->va); in free_vmap_block()
2114 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, vb->cpu); in purge_fragmented_block()
2116 if (vb->free + vb->dirty != VMAP_BBMAP_BITS || in purge_fragmented_block()
2117 vb->dirty == VMAP_BBMAP_BITS) in purge_fragmented_block()
2121 if (!(force_purge || vb->free < VMAP_PURGE_THRESHOLD)) in purge_fragmented_block()
2125 WRITE_ONCE(vb->free, 0); in purge_fragmented_block()
2127 WRITE_ONCE(vb->dirty, VMAP_BBMAP_BITS); in purge_fragmented_block()
2128 vb->dirty_min = 0; in purge_fragmented_block()
2129 vb->dirty_max = VMAP_BBMAP_BITS; in purge_fragmented_block()
2130 spin_lock(&vbq->lock); in purge_fragmented_block()
2131 list_del_rcu(&vb->free_list); in purge_fragmented_block()
2132 spin_unlock(&vbq->lock); in purge_fragmented_block()
2133 list_add_tail(&vb->purge, purge_list); in purge_fragmented_block()
2142 list_del(&vb->purge); in free_purged_blocks()
2154 list_for_each_entry_rcu(vb, &vbq->free, free_list) { in purge_fragmented_blocks()
2155 unsigned long free = READ_ONCE(vb->free); in purge_fragmented_blocks()
2156 unsigned long dirty = READ_ONCE(vb->dirty); in purge_fragmented_blocks()
2162 spin_lock(&vb->lock); in purge_fragmented_blocks()
2164 spin_unlock(&vb->lock); in purge_fragmented_blocks()
2199 list_for_each_entry_rcu(vb, &vbq->free, free_list) { in vb_alloc()
2202 if (READ_ONCE(vb->free) < (1UL << order)) in vb_alloc()
2205 spin_lock(&vb->lock); in vb_alloc()
2206 if (vb->free < (1UL << order)) { in vb_alloc()
2207 spin_unlock(&vb->lock); in vb_alloc()
2211 pages_off = VMAP_BBMAP_BITS - vb->free; in vb_alloc()
2212 vaddr = vmap_block_vaddr(vb->va->va_start, pages_off); in vb_alloc()
2213 WRITE_ONCE(vb->free, vb->free - (1UL << order)); in vb_alloc()
2214 bitmap_set(vb->used_map, pages_off, (1UL << order)); in vb_alloc()
2215 if (vb->free == 0) { in vb_alloc()
2216 spin_lock(&vbq->lock); in vb_alloc()
2217 list_del_rcu(&vb->free_list); in vb_alloc()
2218 spin_unlock(&vbq->lock); in vb_alloc()
2221 spin_unlock(&vb->lock); in vb_alloc()
2247 offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT; in vb_free()
2252 spin_lock(&vb->lock); in vb_free()
2253 bitmap_clear(vb->used_map, offset, (1UL << order)); in vb_free()
2254 spin_unlock(&vb->lock); in vb_free()
2261 spin_lock(&vb->lock); in vb_free()
2264 vb->dirty_min = min(vb->dirty_min, offset); in vb_free()
2265 vb->dirty_max = max(vb->dirty_max, offset + (1UL << order)); in vb_free()
2267 WRITE_ONCE(vb->dirty, vb->dirty + (1UL << order)); in vb_free()
2268 if (vb->dirty == VMAP_BBMAP_BITS) { in vb_free()
2269 BUG_ON(vb->free); in vb_free()
2270 spin_unlock(&vb->lock); in vb_free()
2273 spin_unlock(&vb->lock); in vb_free()
2292 xa_for_each(&vbq->vmap_blocks, idx, vb) { in _vm_unmap_aliases()
2293 spin_lock(&vb->lock); in _vm_unmap_aliases()
2301 vb->dirty_max && vb->dirty != VMAP_BBMAP_BITS) { in _vm_unmap_aliases()
2302 unsigned long va_start = vb->va->va_start; in _vm_unmap_aliases()
2305 s = va_start + (vb->dirty_min << PAGE_SHIFT); in _vm_unmap_aliases()
2306 e = va_start + (vb->dirty_max << PAGE_SHIFT); in _vm_unmap_aliases()
2312 vb->dirty_min = VMAP_BBMAP_BITS; in _vm_unmap_aliases()
2313 vb->dirty_max = 0; in _vm_unmap_aliases()
2317 spin_unlock(&vb->lock); in _vm_unmap_aliases()
2329 * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
2351 * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
2379 debug_check_no_locks_freed((void *)va->va_start, in vm_unmap_ram()
2380 (va->va_end - va->va_start)); in vm_unmap_ram()
2386 * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
2392 * faster than vmap so it's good. But if you mix long-life and short-life
2395 * the end. Please use this function for short-lived objects.
2418 addr = va->va_start; in vm_map_ram()
2430 * With hardware tag-based KASAN, marking is skipped for in vm_map_ram()
2431 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). in vm_map_ram()
2444 return vm->page_order; in vm_area_page_order()
2453 vm->page_order = order; in set_vm_area_page_order()
2460 * vm_area_add_early - add vmap area early during boot
2464 * vmalloc_init() is called. @vm->addr, @vm->size, and @vm->flags
2474 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { in vm_area_add_early()
2475 if (tmp->addr >= vm->addr) { in vm_area_add_early()
2476 BUG_ON(tmp->addr < vm->addr + vm->size); in vm_area_add_early()
2479 BUG_ON(tmp->addr + tmp->size > vm->addr); in vm_area_add_early()
2481 vm->next = *p; in vm_area_add_early()
2486 * vm_area_register_early - register vmap area early during boot
2491 * vmalloc_init() is called. @vm->size and @vm->flags should contain
2493 * vm->addr contains the allocated address.
2504 for (p = &vmlist; (cur = *p) != NULL; p = &cur->next) { in vm_area_register_early()
2505 if ((unsigned long)cur->addr - addr >= vm->size) in vm_area_register_early()
2507 addr = ALIGN((unsigned long)cur->addr + cur->size, align); in vm_area_register_early()
2510 BUG_ON(addr > VMALLOC_END - vm->size); in vm_area_register_early()
2511 vm->addr = (void *)addr; in vm_area_register_early()
2512 vm->next = *p; in vm_area_register_early()
2514 kasan_populate_early_vm_area_shadow(vm->addr, vm->size); in vm_area_register_early()
2525 * -|-----|.....|-----|-----|-----|.....|- in vmap_init_free_space()
2527 * |<--------------------------------->| in vmap_init_free_space()
2530 if (busy->va_start - vmap_start > 0) { in vmap_init_free_space()
2533 free->va_start = vmap_start; in vmap_init_free_space()
2534 free->va_end = busy->va_start; in vmap_init_free_space()
2542 vmap_start = busy->va_end; in vmap_init_free_space()
2545 if (vmap_end - vmap_start > 0) { in vmap_init_free_space()
2548 free->va_start = vmap_start; in vmap_init_free_space()
2549 free->va_end = vmap_end; in vmap_init_free_space()
2561 vm->flags = flags; in setup_vmalloc_vm_locked()
2562 vm->addr = (void *)va->va_start; in setup_vmalloc_vm_locked()
2563 vm->size = va->va_end - va->va_start; in setup_vmalloc_vm_locked()
2564 vm->caller = caller; in setup_vmalloc_vm_locked()
2565 va->vm = vm; in setup_vmalloc_vm_locked()
2584 vm->flags &= ~VM_UNINITIALIZED; in clear_vm_uninitialized_flag()
2621 * Mark pages for non-VM_ALLOC mappings as accessible. Do it now as a in __get_vm_area_node()
2622 * best-effort approach, as they can be mapped outside of vmalloc code. in __get_vm_area_node()
2625 * With hardware tag-based KASAN, marking is skipped for in __get_vm_area_node()
2626 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). in __get_vm_area_node()
2629 area->addr = kasan_unpoison_vmalloc(area->addr, requested_size, in __get_vm_area_node()
2644 * get_vm_area - reserve a contiguous kernel virtual area
2671 * find_vm_area - find a continuous kernel virtual area
2688 return va->vm; in find_vm_area()
2692 * remove_vm_area - find and remove a continuous kernel virtual area
2713 if (!va || !va->vm) in remove_vm_area()
2715 vm = va->vm; in remove_vm_area()
2717 debug_check_no_locks_freed(vm->addr, get_vm_area_size(vm)); in remove_vm_area()
2718 debug_check_no_obj_freed(vm->addr, get_vm_area_size(vm)); in remove_vm_area()
2720 kasan_poison_vmalloc(vm->addr, get_vm_area_size(vm)); in remove_vm_area()
2732 for (i = 0; i < area->nr_pages; i++) in set_area_direct_map()
2733 if (page_address(area->pages[i])) in set_area_direct_map()
2734 set_direct_map(area->pages[i]); in set_area_direct_map()
2751 for (i = 0; i < area->nr_pages; i += 1U << page_order) { in vm_reset_perms()
2752 unsigned long addr = (unsigned long)page_address(area->pages[i]); in vm_reset_perms()
2779 llist_for_each_safe(llnode, t, llist_del_all(&p->list)) in delayed_vfree_work()
2784 * vfree_atomic - release memory allocated by vmalloc()
2803 if (addr && llist_add((struct llist_node *)addr, &p->list)) in vfree_atomic()
2804 schedule_work(&p->wq); in vfree_atomic()
2808 * vfree - Release memory allocated by vmalloc()
2822 * conventions for vfree() arch-dependent would be a really bad idea).
2848 if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS)) in vfree()
2850 for (i = 0; i < vm->nr_pages; i++) { in vfree()
2851 struct page *page = vm->pages[i]; in vfree()
2854 if (!(vm->flags & VM_MAP_PUT_PAGES)) in vfree()
2855 mod_memcg_page_state(page, MEMCG_VMALLOC, -1); in vfree()
2857 * High-order allocs for huge vmallocs are split, so in vfree()
2858 * can be freed as an array of order-0 allocations in vfree()
2863 if (!(vm->flags & VM_MAP_PUT_PAGES)) in vfree()
2864 atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages); in vfree()
2865 kvfree(vm->pages); in vfree()
2871 * vunmap - release virtual mapping obtained by vmap()
2899 * vmap - map an array of pages into virtually contiguous space
2902 * @flags: vm_area->flags
2940 addr = (unsigned long)area->addr; in vmap()
2943 vunmap(area->addr); in vmap()
2948 area->pages = pages; in vmap()
2949 area->nr_pages = count; in vmap()
2951 return area->addr; in vmap()
2965 unsigned long pfn = data->pfns[data->idx]; in vmap_pfn_apply()
2969 return -EINVAL; in vmap_pfn_apply()
2971 ptent = pte_mkspecial(pfn_pte(pfn, data->prot)); in vmap_pfn_apply()
2974 data->idx++; in vmap_pfn_apply()
2979 * vmap_pfn - map an array of PFNs into virtually contiguous space
2996 if (apply_to_page_range(&init_mm, (unsigned long)area->addr, in vmap_pfn()
3002 flush_cache_vmap((unsigned long)area->addr, in vmap_pfn()
3003 (unsigned long)area->addr + count * PAGE_SIZE); in vmap_pfn()
3005 return area->addr; in vmap_pfn()
3021 * For order-0 pages we make use of bulk allocator, if in vm_area_alloc_pages()
3034 * A maximum allowed request is hard-coded and is 100 in vm_area_alloc_pages()
3036 * long preemption off scenario in the bulk-allocator in vm_area_alloc_pages()
3039 nr_pages_request = min(100U, nr_pages - nr_allocated); in vm_area_alloc_pages()
3069 * potentially dangerous (pre-mature OOM, disruptive reclaim in vm_area_alloc_pages()
3075 /* High-order pages or fallback path if "bulk" fails. */ in vm_area_alloc_pages()
3090 * small-page vmallocs). Some drivers do their own refcounting in vm_area_alloc_pages()
3091 * on vmalloc_to_page() pages, some use page->mapping, in vm_area_alloc_pages()
3092 * page->lru, etc. in vm_area_alloc_pages()
3098 * Careful, we allocate and map page-order pages, but in vm_area_alloc_pages()
3118 unsigned long addr = (unsigned long)area->addr; in __vmalloc_area_node()
3133 area->pages = __vmalloc_node(array_size, 1, nested_gfp, node, in __vmalloc_area_node()
3134 area->caller); in __vmalloc_area_node()
3136 area->pages = kmalloc_node(array_size, nested_gfp, node); in __vmalloc_area_node()
3139 if (!area->pages) { in __vmalloc_area_node()
3147 set_vm_area_page_order(area, page_shift - PAGE_SHIFT); in __vmalloc_area_node()
3150 area->nr_pages = vm_area_alloc_pages(gfp_mask | __GFP_NOWARN, in __vmalloc_area_node()
3151 node, page_order, nr_small_pages, area->pages); in __vmalloc_area_node()
3153 atomic_long_add(area->nr_pages, &nr_vmalloc_pages); in __vmalloc_area_node()
3157 for (i = 0; i < area->nr_pages; i++) in __vmalloc_area_node()
3158 mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1); in __vmalloc_area_node()
3165 if (area->nr_pages != nr_small_pages) { in __vmalloc_area_node()
3168 * also:- in __vmalloc_area_node()
3170 * - a pending fatal signal in __vmalloc_area_node()
3171 * - insufficient huge page-order pages in __vmalloc_area_node()
3173 * Since we always retry allocations at order-0 in the huge page in __vmalloc_area_node()
3179 area->nr_pages * PAGE_SIZE); in __vmalloc_area_node()
3193 ret = vmap_pages_range(addr, addr + size, prot, area->pages, in __vmalloc_area_node()
3207 area->nr_pages * PAGE_SIZE); in __vmalloc_area_node()
3211 return area->addr; in __vmalloc_area_node()
3214 vfree(area->addr); in __vmalloc_area_node()
3219 * __vmalloc_node_range - allocate virtually contiguous memory
3340 * Tag-based KASAN modes only assign tags to normal non-executable in __vmalloc_node_range()
3348 area->addr = kasan_unpoison_vmalloc(area->addr, real_size, kasan_flags); in __vmalloc_node_range()
3361 return area->addr; in __vmalloc_node_range()
3375 * __vmalloc_node - allocate virtually contiguous memory
3385 * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
3416 * vmalloc - allocate virtually contiguous memory
3435 * vmalloc_huge - allocate virtually contiguous memory, allow huge pages
3455 * vzalloc - allocate virtually contiguous memory with zero fill
3475 * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
3493 * vmalloc_node - allocate memory on a specific node
3513 * vzalloc_node - allocate memory on a specific node with zero fill
3543 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
3559 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
3590 remains -= copied; in zero_iter()
3596 return count - remains; in zero_iter()
3616 length = PAGE_SIZE - offset; in aligned_vread_iter()
3635 remains -= copied; in aligned_vread_iter()
3641 return count - remains; in aligned_vread_iter()
3678 spin_lock(&vb->lock); in vmap_ram_vread_iter()
3679 if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) { in vmap_ram_vread_iter()
3680 spin_unlock(&vb->lock); in vmap_ram_vread_iter()
3684 for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) { in vmap_ram_vread_iter()
3690 start = vmap_block_vaddr(vb->va->va_start, rs); in vmap_ram_vread_iter()
3693 size_t to_zero = min_t(size_t, start - addr, remains); in vmap_ram_vread_iter()
3697 remains -= zeroed; in vmap_ram_vread_iter()
3705 n = ((re - rs + 1) << PAGE_SHIFT) - offset; in vmap_ram_vread_iter()
3712 remains -= copied; in vmap_ram_vread_iter()
3718 spin_unlock(&vb->lock); in vmap_ram_vread_iter()
3721 /* zero-fill the left dirty or free regions */ in vmap_ram_vread_iter()
3722 return count - remains + zero_iter(iter, remains); in vmap_ram_vread_iter()
3725 spin_unlock(&vb->lock); in vmap_ram_vread_iter()
3726 return count - remains; in vmap_ram_vread_iter()
3730 * vread_iter() - read vmalloc area in a safe way to an iterator.
3738 * proper area of @buf. If there are memory holes, they'll be zero-filled.
3764 count = -(unsigned long) addr; in vread_iter()
3774 if ((unsigned long)addr + remains <= va->va_start) in vread_iter()
3783 vm = va->vm; in vread_iter()
3784 flags = va->flags & VMAP_FLAGS_MASK; in vread_iter()
3786 * VMAP_BLOCK indicates a sub-type of vm_map_ram area, need in vread_iter()
3794 if (vm && (vm->flags & VM_UNINITIALIZED)) in vread_iter()
3800 vaddr = (char *) va->va_start; in vread_iter()
3807 size_t to_zero = min_t(size_t, vaddr - addr, remains); in vread_iter()
3811 remains -= zeroed; in vread_iter()
3817 n = vaddr + size - addr; in vread_iter()
3823 else if (!(vm->flags & VM_IOREMAP)) in vread_iter()
3829 remains -= copied; in vread_iter()
3837 /* zero-fill memory holes */ in vread_iter()
3838 return count - remains + zero_iter(iter, remains); in vread_iter()
3843 return count - remains; in vread_iter()
3847 * remap_vmalloc_range_partial - map vmalloc pages to userspace
3854 * Returns: 0 for success, -Exxx on failure
3872 return -EINVAL; in remap_vmalloc_range_partial()
3877 return -EINVAL; in remap_vmalloc_range_partial()
3881 return -EINVAL; in remap_vmalloc_range_partial()
3883 if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT))) in remap_vmalloc_range_partial()
3884 return -EINVAL; in remap_vmalloc_range_partial()
3888 return -EINVAL; in remap_vmalloc_range_partial()
3901 size -= PAGE_SIZE; in remap_vmalloc_range_partial()
3910 * remap_vmalloc_range - map vmalloc pages to userspace
3915 * Returns: 0 for success, -Exxx on failure
3926 return remap_vmalloc_range_partial(vma, vma->vm_start, in remap_vmalloc_range()
3928 vma->vm_end - vma->vm_start); in remap_vmalloc_range()
3935 ret = remove_vm_area(area->addr); in free_vm_area()
3948 * pvm_find_va_enclose_addr - find the vmap_area @addr belongs to
3953 * i.e. va->va_start < addr && va->va_end < addr or NULL
3967 if (tmp->va_start <= addr) { in pvm_find_va_enclose_addr()
3969 if (tmp->va_end >= addr) in pvm_find_va_enclose_addr()
3972 n = n->rb_right; in pvm_find_va_enclose_addr()
3974 n = n->rb_left; in pvm_find_va_enclose_addr()
3982 * pvm_determine_end_from_reverse - find the highest aligned address
3985 * in - the VA we start the search(reverse order);
3986 * out - the VA with the highest aligned end address.
3994 unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); in pvm_determine_end_from_reverse()
4000 addr = min((*va)->va_end & ~(align - 1), vmalloc_end); in pvm_determine_end_from_reverse()
4001 if ((*va)->va_start < addr) in pvm_determine_end_from_reverse()
4010 * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
4027 * does everything top-down and scans free blocks from the end looking
4038 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); in pcpu_get_vm_areas()
4068 if (vmalloc_end - vmalloc_start < last_end) { in pcpu_get_vm_areas()
4087 /* start scanning - we scan from the top, begin with the last area */ in pcpu_get_vm_areas()
4093 base = pvm_determine_end_from_reverse(&va, align) - end; in pcpu_get_vm_areas()
4113 if (base + end > va->va_end) { in pcpu_get_vm_areas()
4114 base = pvm_determine_end_from_reverse(&va, align) - end; in pcpu_get_vm_areas()
4122 if (base + start < va->va_start) { in pcpu_get_vm_areas()
4123 va = node_to_va(rb_prev(&va->rb_node)); in pcpu_get_vm_areas()
4124 base = pvm_determine_end_from_reverse(&va, align) - end; in pcpu_get_vm_areas()
4133 area = (area + nr_vms - 1) % nr_vms; in pcpu_get_vm_areas()
4163 va->va_start = start; in pcpu_get_vm_areas()
4164 va->va_end = start + size; in pcpu_get_vm_areas()
4171 if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area])) in pcpu_get_vm_areas()
4186 * Mark allocated areas as accessible. Do it now as a best-effort in pcpu_get_vm_areas()
4188 * With hardware tag-based KASAN, marking is skipped for in pcpu_get_vm_areas()
4189 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). in pcpu_get_vm_areas()
4192 vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr, in pcpu_get_vm_areas()
4193 vms[area]->size, KASAN_VMALLOC_PROT_NORMAL); in pcpu_get_vm_areas()
4205 while (area--) { in pcpu_get_vm_areas()
4206 orig_start = vas[area]->va_start; in pcpu_get_vm_areas()
4207 orig_end = vas[area]->va_end; in pcpu_get_vm_areas()
4212 va->va_start, va->va_end); in pcpu_get_vm_areas()
4256 orig_start = vas[area]->va_start; in pcpu_get_vm_areas()
4257 orig_end = vas[area]->va_end; in pcpu_get_vm_areas()
4262 va->va_start, va->va_end); in pcpu_get_vm_areas()
4273 * pcpu_free_vm_areas - free vmalloc areas for percpu allocator
4307 vm = va->vm; in vmalloc_dump_obj()
4312 addr = (unsigned long)vm->addr; in vmalloc_dump_obj()
4313 caller = vm->caller; in vmalloc_dump_obj()
4314 nr_pages = vm->nr_pages; in vmalloc_dump_obj()
4316 pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n", in vmalloc_dump_obj()
4349 unsigned int nr, *counters = m->private; in show_numa_info()
4355 if (v->flags & VM_UNINITIALIZED) in show_numa_info()
4362 for (nr = 0; nr < v->nr_pages; nr += step) in show_numa_info()
4363 counters[page_to_nid(v->pages[nr])] += step; in show_numa_info()
4376 seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n", in show_purge_info()
4377 (void *)va->va_start, (void *)va->va_end, in show_purge_info()
4378 va->va_end - va->va_start); in show_purge_info()
4390 if (!va->vm) { in s_show()
4391 if (va->flags & VMAP_RAM) in s_show()
4392 seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n", in s_show()
4393 (void *)va->va_start, (void *)va->va_end, in s_show()
4394 va->va_end - va->va_start); in s_show()
4399 v = va->vm; in s_show()
4401 seq_printf(m, "0x%pK-0x%pK %7ld", in s_show()
4402 v->addr, v->addr + v->size, v->size); in s_show()
4404 if (v->caller) in s_show()
4405 seq_printf(m, " %pS", v->caller); in s_show()
4407 if (v->nr_pages) in s_show()
4408 seq_printf(m, " pages=%d", v->nr_pages); in s_show()
4410 if (v->phys_addr) in s_show()
4411 seq_printf(m, " phys=%pa", &v->phys_addr); in s_show()
4413 if (v->flags & VM_IOREMAP) in s_show()
4416 if (v->flags & VM_ALLOC) in s_show()
4419 if (v->flags & VM_MAP) in s_show()
4422 if (v->flags & VM_USERMAP) in s_show()
4425 if (v->flags & VM_DMA_COHERENT) in s_show()
4426 seq_puts(m, " dma-coherent"); in s_show()
4428 if (is_vmalloc_addr(v->pages)) in s_show()
4438 if (list_is_last(&va->list, &vmap_area_list)) in s_show()
4481 spin_lock_init(&vbq->lock); in vmalloc_init()
4482 INIT_LIST_HEAD(&vbq->free); in vmalloc_init()
4484 init_llist_head(&p->list); in vmalloc_init()
4485 INIT_WORK(&p->wq, delayed_vfree_work); in vmalloc_init()
4486 xa_init(&vbq->vmap_blocks); in vmalloc_init()
4490 for (tmp = vmlist; tmp; tmp = tmp->next) { in vmalloc_init()
4495 va->va_start = (unsigned long)tmp->addr; in vmalloc_init()
4496 va->va_end = va->va_start + tmp->size; in vmalloc_init()
4497 va->vm = tmp; in vmalloc_init()