1 /* 2 * mm/percpu-vm.c - vmalloc area based chunk allocation 3 * 4 * Copyright (C) 2010 SUSE Linux Products GmbH 5 * Copyright (C) 2010 Tejun Heo <tj@kernel.org> 6 * 7 * This file is released under the GPLv2. 8 * 9 * Chunks are mapped into vmalloc areas and populated page by page. 10 * This is the default chunk allocator. 11 */ 12 13 static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, 14 unsigned int cpu, int page_idx) 15 { 16 /* must not be used on pre-mapped chunk */ 17 WARN_ON(chunk->immutable); 18 19 return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); 20 } 21 22 /** 23 * pcpu_get_pages_and_bitmap - get temp pages array and bitmap 24 * @chunk: chunk of interest 25 * @bitmapp: output parameter for bitmap 26 * @may_alloc: may allocate the array 27 * 28 * Returns pointer to array of pointers to struct page and bitmap, 29 * both of which can be indexed with pcpu_page_idx(). The returned 30 * array is cleared to zero and *@bitmapp is copied from 31 * @chunk->populated. Note that there is only one array and bitmap 32 * and access exclusion is the caller's responsibility. 33 * 34 * CONTEXT: 35 * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc. 36 * Otherwise, don't care. 37 * 38 * RETURNS: 39 * Pointer to temp pages array on success, NULL on failure. 40 */ 41 static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, 42 unsigned long **bitmapp, 43 bool may_alloc) 44 { 45 static struct page **pages; 46 static unsigned long *bitmap; 47 size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); 48 size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * 49 sizeof(unsigned long); 50 51 if (!pages || !bitmap) { 52 if (may_alloc && !pages) 53 pages = pcpu_mem_alloc(pages_size); 54 if (may_alloc && !bitmap) 55 bitmap = pcpu_mem_alloc(bitmap_size); 56 if (!pages || !bitmap) 57 return NULL; 58 } 59 60 memset(pages, 0, pages_size); 61 bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); 62 63 *bitmapp = bitmap; 64 return pages; 65 } 66 67 /** 68 * pcpu_free_pages - free pages which were allocated for @chunk 69 * @chunk: chunk pages were allocated for 70 * @pages: array of pages to be freed, indexed by pcpu_page_idx() 71 * @populated: populated bitmap 72 * @page_start: page index of the first page to be freed 73 * @page_end: page index of the last page to be freed + 1 74 * 75 * Free pages [@page_start and @page_end) in @pages for all units. 76 * The pages were allocated for @chunk. 77 */ 78 static void pcpu_free_pages(struct pcpu_chunk *chunk, 79 struct page **pages, unsigned long *populated, 80 int page_start, int page_end) 81 { 82 unsigned int cpu; 83 int i; 84 85 for_each_possible_cpu(cpu) { 86 for (i = page_start; i < page_end; i++) { 87 struct page *page = pages[pcpu_page_idx(cpu, i)]; 88 89 if (page) 90 __free_page(page); 91 } 92 } 93 } 94 95 /** 96 * pcpu_alloc_pages - allocates pages for @chunk 97 * @chunk: target chunk 98 * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() 99 * @populated: populated bitmap 100 * @page_start: page index of the first page to be allocated 101 * @page_end: page index of the last page to be allocated + 1 102 * 103 * Allocate pages [@page_start,@page_end) into @pages for all units. 104 * The allocation is for @chunk. Percpu core doesn't care about the 105 * content of @pages and will pass it verbatim to pcpu_map_pages(). 106 */ 107 static int pcpu_alloc_pages(struct pcpu_chunk *chunk, 108 struct page **pages, unsigned long *populated, 109 int page_start, int page_end) 110 { 111 const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; 112 unsigned int cpu; 113 int i; 114 115 for_each_possible_cpu(cpu) { 116 for (i = page_start; i < page_end; i++) { 117 struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; 118 119 *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); 120 if (!*pagep) { 121 pcpu_free_pages(chunk, pages, populated, 122 page_start, page_end); 123 return -ENOMEM; 124 } 125 } 126 } 127 return 0; 128 } 129 130 /** 131 * pcpu_pre_unmap_flush - flush cache prior to unmapping 132 * @chunk: chunk the regions to be flushed belongs to 133 * @page_start: page index of the first page to be flushed 134 * @page_end: page index of the last page to be flushed + 1 135 * 136 * Pages in [@page_start,@page_end) of @chunk are about to be 137 * unmapped. Flush cache. As each flushing trial can be very 138 * expensive, issue flush on the whole region at once rather than 139 * doing it for each cpu. This could be an overkill but is more 140 * scalable. 141 */ 142 static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, 143 int page_start, int page_end) 144 { 145 flush_cache_vunmap( 146 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), 147 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); 148 } 149 150 static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) 151 { 152 unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); 153 } 154 155 /** 156 * pcpu_unmap_pages - unmap pages out of a pcpu_chunk 157 * @chunk: chunk of interest 158 * @pages: pages array which can be used to pass information to free 159 * @populated: populated bitmap 160 * @page_start: page index of the first page to unmap 161 * @page_end: page index of the last page to unmap + 1 162 * 163 * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. 164 * Corresponding elements in @pages were cleared by the caller and can 165 * be used to carry information to pcpu_free_pages() which will be 166 * called after all unmaps are finished. The caller should call 167 * proper pre/post flush functions. 168 */ 169 static void pcpu_unmap_pages(struct pcpu_chunk *chunk, 170 struct page **pages, unsigned long *populated, 171 int page_start, int page_end) 172 { 173 unsigned int cpu; 174 int i; 175 176 for_each_possible_cpu(cpu) { 177 for (i = page_start; i < page_end; i++) { 178 struct page *page; 179 180 page = pcpu_chunk_page(chunk, cpu, i); 181 WARN_ON(!page); 182 pages[pcpu_page_idx(cpu, i)] = page; 183 } 184 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), 185 page_end - page_start); 186 } 187 188 for (i = page_start; i < page_end; i++) 189 __clear_bit(i, populated); 190 } 191 192 /** 193 * pcpu_post_unmap_tlb_flush - flush TLB after unmapping 194 * @chunk: pcpu_chunk the regions to be flushed belong to 195 * @page_start: page index of the first page to be flushed 196 * @page_end: page index of the last page to be flushed + 1 197 * 198 * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush 199 * TLB for the regions. This can be skipped if the area is to be 200 * returned to vmalloc as vmalloc will handle TLB flushing lazily. 201 * 202 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once 203 * for the whole region. 204 */ 205 static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, 206 int page_start, int page_end) 207 { 208 flush_tlb_kernel_range( 209 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), 210 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); 211 } 212 213 static int __pcpu_map_pages(unsigned long addr, struct page **pages, 214 int nr_pages) 215 { 216 return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, 217 PAGE_KERNEL, pages); 218 } 219 220 /** 221 * pcpu_map_pages - map pages into a pcpu_chunk 222 * @chunk: chunk of interest 223 * @pages: pages array containing pages to be mapped 224 * @populated: populated bitmap 225 * @page_start: page index of the first page to map 226 * @page_end: page index of the last page to map + 1 227 * 228 * For each cpu, map pages [@page_start,@page_end) into @chunk. The 229 * caller is responsible for calling pcpu_post_map_flush() after all 230 * mappings are complete. 231 * 232 * This function is responsible for setting corresponding bits in 233 * @chunk->populated bitmap and whatever is necessary for reverse 234 * lookup (addr -> chunk). 235 */ 236 static int pcpu_map_pages(struct pcpu_chunk *chunk, 237 struct page **pages, unsigned long *populated, 238 int page_start, int page_end) 239 { 240 unsigned int cpu, tcpu; 241 int i, err; 242 243 for_each_possible_cpu(cpu) { 244 err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), 245 &pages[pcpu_page_idx(cpu, page_start)], 246 page_end - page_start); 247 if (err < 0) 248 goto err; 249 } 250 251 /* mapping successful, link chunk and mark populated */ 252 for (i = page_start; i < page_end; i++) { 253 for_each_possible_cpu(cpu) 254 pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], 255 chunk); 256 __set_bit(i, populated); 257 } 258 259 return 0; 260 261 err: 262 for_each_possible_cpu(tcpu) { 263 if (tcpu == cpu) 264 break; 265 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), 266 page_end - page_start); 267 } 268 return err; 269 } 270 271 /** 272 * pcpu_post_map_flush - flush cache after mapping 273 * @chunk: pcpu_chunk the regions to be flushed belong to 274 * @page_start: page index of the first page to be flushed 275 * @page_end: page index of the last page to be flushed + 1 276 * 277 * Pages [@page_start,@page_end) of @chunk have been mapped. Flush 278 * cache. 279 * 280 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once 281 * for the whole region. 282 */ 283 static void pcpu_post_map_flush(struct pcpu_chunk *chunk, 284 int page_start, int page_end) 285 { 286 flush_cache_vmap( 287 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), 288 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); 289 } 290 291 /** 292 * pcpu_populate_chunk - populate and map an area of a pcpu_chunk 293 * @chunk: chunk of interest 294 * @off: offset to the area to populate 295 * @size: size of the area to populate in bytes 296 * 297 * For each cpu, populate and map pages [@page_start,@page_end) into 298 * @chunk. The area is cleared on return. 299 * 300 * CONTEXT: 301 * pcpu_alloc_mutex, does GFP_KERNEL allocation. 302 */ 303 static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) 304 { 305 int page_start = PFN_DOWN(off); 306 int page_end = PFN_UP(off + size); 307 int free_end = page_start, unmap_end = page_start; 308 struct page **pages; 309 unsigned long *populated; 310 unsigned int cpu; 311 int rs, re, rc; 312 313 /* quick path, check whether all pages are already there */ 314 rs = page_start; 315 pcpu_next_pop(chunk, &rs, &re, page_end); 316 if (rs == page_start && re == page_end) 317 goto clear; 318 319 /* need to allocate and map pages, this chunk can't be immutable */ 320 WARN_ON(chunk->immutable); 321 322 pages = pcpu_get_pages_and_bitmap(chunk, &populated, true); 323 if (!pages) 324 return -ENOMEM; 325 326 /* alloc and map */ 327 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { 328 rc = pcpu_alloc_pages(chunk, pages, populated, rs, re); 329 if (rc) 330 goto err_free; 331 free_end = re; 332 } 333 334 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { 335 rc = pcpu_map_pages(chunk, pages, populated, rs, re); 336 if (rc) 337 goto err_unmap; 338 unmap_end = re; 339 } 340 pcpu_post_map_flush(chunk, page_start, page_end); 341 342 /* commit new bitmap */ 343 bitmap_copy(chunk->populated, populated, pcpu_unit_pages); 344 clear: 345 for_each_possible_cpu(cpu) 346 memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); 347 return 0; 348 349 err_unmap: 350 pcpu_pre_unmap_flush(chunk, page_start, unmap_end); 351 pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end) 352 pcpu_unmap_pages(chunk, pages, populated, rs, re); 353 pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end); 354 err_free: 355 pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end) 356 pcpu_free_pages(chunk, pages, populated, rs, re); 357 return rc; 358 } 359 360 /** 361 * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk 362 * @chunk: chunk to depopulate 363 * @off: offset to the area to depopulate 364 * @size: size of the area to depopulate in bytes 365 * @flush: whether to flush cache and tlb or not 366 * 367 * For each cpu, depopulate and unmap pages [@page_start,@page_end) 368 * from @chunk. If @flush is true, vcache is flushed before unmapping 369 * and tlb after. 370 * 371 * CONTEXT: 372 * pcpu_alloc_mutex. 373 */ 374 static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) 375 { 376 int page_start = PFN_DOWN(off); 377 int page_end = PFN_UP(off + size); 378 struct page **pages; 379 unsigned long *populated; 380 int rs, re; 381 382 /* quick path, check whether it's empty already */ 383 rs = page_start; 384 pcpu_next_unpop(chunk, &rs, &re, page_end); 385 if (rs == page_start && re == page_end) 386 return; 387 388 /* immutable chunks can't be depopulated */ 389 WARN_ON(chunk->immutable); 390 391 /* 392 * If control reaches here, there must have been at least one 393 * successful population attempt so the temp pages array must 394 * be available now. 395 */ 396 pages = pcpu_get_pages_and_bitmap(chunk, &populated, false); 397 BUG_ON(!pages); 398 399 /* unmap and free */ 400 pcpu_pre_unmap_flush(chunk, page_start, page_end); 401 402 pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) 403 pcpu_unmap_pages(chunk, pages, populated, rs, re); 404 405 /* no need to flush tlb, vmalloc will handle it lazily */ 406 407 pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) 408 pcpu_free_pages(chunk, pages, populated, rs, re); 409 410 /* commit new bitmap */ 411 bitmap_copy(chunk->populated, populated, pcpu_unit_pages); 412 } 413 414 static struct pcpu_chunk *pcpu_create_chunk(void) 415 { 416 struct pcpu_chunk *chunk; 417 struct vm_struct **vms; 418 419 chunk = pcpu_alloc_chunk(); 420 if (!chunk) 421 return NULL; 422 423 vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, 424 pcpu_nr_groups, pcpu_atom_size); 425 if (!vms) { 426 pcpu_free_chunk(chunk); 427 return NULL; 428 } 429 430 chunk->data = vms; 431 chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; 432 return chunk; 433 } 434 435 static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) 436 { 437 if (chunk && chunk->data) 438 pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); 439 pcpu_free_chunk(chunk); 440 } 441 442 static struct page *pcpu_addr_to_page(void *addr) 443 { 444 return vmalloc_to_page(addr); 445 } 446 447 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) 448 { 449 /* no extra restriction */ 450 return 0; 451 } 452