1 /* 2 * mm/percpu-vm.c - vmalloc area based chunk allocation 3 * 4 * Copyright (C) 2010 SUSE Linux Products GmbH 5 * Copyright (C) 2010 Tejun Heo <tj@kernel.org> 6 * 7 * This file is released under the GPLv2. 8 * 9 * Chunks are mapped into vmalloc areas and populated page by page. 10 * This is the default chunk allocator. 11 */ 12 13 static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, 14 unsigned int cpu, int page_idx) 15 { 16 /* must not be used on pre-mapped chunk */ 17 WARN_ON(chunk->immutable); 18 19 return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); 20 } 21 22 /** 23 * pcpu_get_pages_and_bitmap - get temp pages array and bitmap 24 * @chunk: chunk of interest 25 * @bitmapp: output parameter for bitmap 26 * @may_alloc: may allocate the array 27 * 28 * Returns pointer to array of pointers to struct page and bitmap, 29 * both of which can be indexed with pcpu_page_idx(). The returned 30 * array is cleared to zero and *@bitmapp is copied from 31 * @chunk->populated. Note that there is only one array and bitmap 32 * and access exclusion is the caller's responsibility. 33 * 34 * CONTEXT: 35 * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc. 36 * Otherwise, don't care. 37 * 38 * RETURNS: 39 * Pointer to temp pages array on success, NULL on failure. 40 */ 41 static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, 42 unsigned long **bitmapp, 43 bool may_alloc) 44 { 45 static struct page **pages; 46 static unsigned long *bitmap; 47 size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); 48 size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * 49 sizeof(unsigned long); 50 51 if (!pages || !bitmap) { 52 if (may_alloc && !pages) 53 pages = pcpu_mem_zalloc(pages_size); 54 if (may_alloc && !bitmap) 55 bitmap = pcpu_mem_zalloc(bitmap_size); 56 if (!pages || !bitmap) 57 return NULL; 58 } 59 60 bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); 61 62 *bitmapp = bitmap; 63 return pages; 64 } 65 66 /** 67 * pcpu_free_pages - free pages which were allocated for @chunk 68 * @chunk: chunk pages were allocated for 69 * @pages: array of pages to be freed, indexed by pcpu_page_idx() 70 * @populated: populated bitmap 71 * @page_start: page index of the first page to be freed 72 * @page_end: page index of the last page to be freed + 1 73 * 74 * Free pages [@page_start and @page_end) in @pages for all units. 75 * The pages were allocated for @chunk. 76 */ 77 static void pcpu_free_pages(struct pcpu_chunk *chunk, 78 struct page **pages, unsigned long *populated, 79 int page_start, int page_end) 80 { 81 unsigned int cpu; 82 int i; 83 84 for_each_possible_cpu(cpu) { 85 for (i = page_start; i < page_end; i++) { 86 struct page *page = pages[pcpu_page_idx(cpu, i)]; 87 88 if (page) 89 __free_page(page); 90 } 91 } 92 } 93 94 /** 95 * pcpu_alloc_pages - allocates pages for @chunk 96 * @chunk: target chunk 97 * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() 98 * @populated: populated bitmap 99 * @page_start: page index of the first page to be allocated 100 * @page_end: page index of the last page to be allocated + 1 101 * 102 * Allocate pages [@page_start,@page_end) into @pages for all units. 103 * The allocation is for @chunk. Percpu core doesn't care about the 104 * content of @pages and will pass it verbatim to pcpu_map_pages(). 105 */ 106 static int pcpu_alloc_pages(struct pcpu_chunk *chunk, 107 struct page **pages, unsigned long *populated, 108 int page_start, int page_end) 109 { 110 const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; 111 unsigned int cpu, tcpu; 112 int i; 113 114 for_each_possible_cpu(cpu) { 115 for (i = page_start; i < page_end; i++) { 116 struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; 117 118 *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); 119 if (!*pagep) 120 goto err; 121 } 122 } 123 return 0; 124 125 err: 126 while (--i >= page_start) 127 __free_page(pages[pcpu_page_idx(cpu, i)]); 128 129 for_each_possible_cpu(tcpu) { 130 if (tcpu == cpu) 131 break; 132 for (i = page_start; i < page_end; i++) 133 __free_page(pages[pcpu_page_idx(tcpu, i)]); 134 } 135 return -ENOMEM; 136 } 137 138 /** 139 * pcpu_pre_unmap_flush - flush cache prior to unmapping 140 * @chunk: chunk the regions to be flushed belongs to 141 * @page_start: page index of the first page to be flushed 142 * @page_end: page index of the last page to be flushed + 1 143 * 144 * Pages in [@page_start,@page_end) of @chunk are about to be 145 * unmapped. Flush cache. As each flushing trial can be very 146 * expensive, issue flush on the whole region at once rather than 147 * doing it for each cpu. This could be an overkill but is more 148 * scalable. 149 */ 150 static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, 151 int page_start, int page_end) 152 { 153 flush_cache_vunmap( 154 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 155 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 156 } 157 158 static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) 159 { 160 unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); 161 } 162 163 /** 164 * pcpu_unmap_pages - unmap pages out of a pcpu_chunk 165 * @chunk: chunk of interest 166 * @pages: pages array which can be used to pass information to free 167 * @populated: populated bitmap 168 * @page_start: page index of the first page to unmap 169 * @page_end: page index of the last page to unmap + 1 170 * 171 * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. 172 * Corresponding elements in @pages were cleared by the caller and can 173 * be used to carry information to pcpu_free_pages() which will be 174 * called after all unmaps are finished. The caller should call 175 * proper pre/post flush functions. 176 */ 177 static void pcpu_unmap_pages(struct pcpu_chunk *chunk, 178 struct page **pages, unsigned long *populated, 179 int page_start, int page_end) 180 { 181 unsigned int cpu; 182 int i; 183 184 for_each_possible_cpu(cpu) { 185 for (i = page_start; i < page_end; i++) { 186 struct page *page; 187 188 page = pcpu_chunk_page(chunk, cpu, i); 189 WARN_ON(!page); 190 pages[pcpu_page_idx(cpu, i)] = page; 191 } 192 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), 193 page_end - page_start); 194 } 195 196 bitmap_clear(populated, page_start, page_end - page_start); 197 } 198 199 /** 200 * pcpu_post_unmap_tlb_flush - flush TLB after unmapping 201 * @chunk: pcpu_chunk the regions to be flushed belong to 202 * @page_start: page index of the first page to be flushed 203 * @page_end: page index of the last page to be flushed + 1 204 * 205 * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush 206 * TLB for the regions. This can be skipped if the area is to be 207 * returned to vmalloc as vmalloc will handle TLB flushing lazily. 208 * 209 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once 210 * for the whole region. 211 */ 212 static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, 213 int page_start, int page_end) 214 { 215 flush_tlb_kernel_range( 216 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 217 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 218 } 219 220 static int __pcpu_map_pages(unsigned long addr, struct page **pages, 221 int nr_pages) 222 { 223 return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, 224 PAGE_KERNEL, pages); 225 } 226 227 /** 228 * pcpu_map_pages - map pages into a pcpu_chunk 229 * @chunk: chunk of interest 230 * @pages: pages array containing pages to be mapped 231 * @populated: populated bitmap 232 * @page_start: page index of the first page to map 233 * @page_end: page index of the last page to map + 1 234 * 235 * For each cpu, map pages [@page_start,@page_end) into @chunk. The 236 * caller is responsible for calling pcpu_post_map_flush() after all 237 * mappings are complete. 238 * 239 * This function is responsible for setting corresponding bits in 240 * @chunk->populated bitmap and whatever is necessary for reverse 241 * lookup (addr -> chunk). 242 */ 243 static int pcpu_map_pages(struct pcpu_chunk *chunk, 244 struct page **pages, unsigned long *populated, 245 int page_start, int page_end) 246 { 247 unsigned int cpu, tcpu; 248 int i, err; 249 250 for_each_possible_cpu(cpu) { 251 err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), 252 &pages[pcpu_page_idx(cpu, page_start)], 253 page_end - page_start); 254 if (err < 0) 255 goto err; 256 } 257 258 /* mapping successful, link chunk and mark populated */ 259 for (i = page_start; i < page_end; i++) { 260 for_each_possible_cpu(cpu) 261 pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], 262 chunk); 263 __set_bit(i, populated); 264 } 265 266 return 0; 267 268 err: 269 for_each_possible_cpu(tcpu) { 270 if (tcpu == cpu) 271 break; 272 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), 273 page_end - page_start); 274 } 275 pcpu_post_unmap_tlb_flush(chunk, page_start, page_end); 276 return err; 277 } 278 279 /** 280 * pcpu_post_map_flush - flush cache after mapping 281 * @chunk: pcpu_chunk the regions to be flushed belong to 282 * @page_start: page index of the first page to be flushed 283 * @page_end: page index of the last page to be flushed + 1 284 * 285 * Pages [@page_start,@page_end) of @chunk have been mapped. Flush 286 * cache. 287 * 288 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once 289 * for the whole region. 290 */ 291 static void pcpu_post_map_flush(struct pcpu_chunk *chunk, 292 int page_start, int page_end) 293 { 294 flush_cache_vmap( 295 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 296 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 297 } 298 299 /** 300 * pcpu_populate_chunk - populate and map an area of a pcpu_chunk 301 * @chunk: chunk of interest 302 * @off: offset to the area to populate 303 * @size: size of the area to populate in bytes 304 * 305 * For each cpu, populate and map pages [@page_start,@page_end) into 306 * @chunk. The area is cleared on return. 307 * 308 * CONTEXT: 309 * pcpu_alloc_mutex, does GFP_KERNEL allocation. 310 */ 311 static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) 312 { 313 int page_start = PFN_DOWN(off); 314 int page_end = PFN_UP(off + size); 315 int free_end = page_start, unmap_end = page_start; 316 struct page **pages; 317 unsigned long *populated; 318 unsigned int cpu; 319 int rs, re, rc; 320 321 /* quick path, check whether all pages are already there */ 322 rs = page_start; 323 pcpu_next_pop(chunk, &rs, &re, page_end); 324 if (rs == page_start && re == page_end) 325 goto clear; 326 327 /* need to allocate and map pages, this chunk can't be immutable */ 328 WARN_ON(chunk->immutable); 329 330 pages = pcpu_get_pages_and_bitmap(chunk, &populated, true); 331 if (!pages) 332 return -ENOMEM; 333 334 /* alloc and map */ 335 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { 336 rc = pcpu_alloc_pages(chunk, pages, populated, rs, re); 337 if (rc) 338 goto err_free; 339 free_end = re; 340 } 341 342 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { 343 rc = pcpu_map_pages(chunk, pages, populated, rs, re); 344 if (rc) 345 goto err_unmap; 346 unmap_end = re; 347 } 348 pcpu_post_map_flush(chunk, page_start, page_end); 349 350 /* commit new bitmap */ 351 bitmap_copy(chunk->populated, populated, pcpu_unit_pages); 352 clear: 353 for_each_possible_cpu(cpu) 354 memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); 355 return 0; 356 357 err_unmap: 358 pcpu_pre_unmap_flush(chunk, page_start, unmap_end); 359 pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end) 360 pcpu_unmap_pages(chunk, pages, populated, rs, re); 361 pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end); 362 err_free: 363 pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end) 364 pcpu_free_pages(chunk, pages, populated, rs, re); 365 return rc; 366 } 367 368 /** 369 * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk 370 * @chunk: chunk to depopulate 371 * @off: offset to the area to depopulate 372 * @size: size of the area to depopulate in bytes 373 * 374 * For each cpu, depopulate and unmap pages [@page_start,@page_end) 375 * from @chunk. If @flush is true, vcache is flushed before unmapping 376 * and tlb after. 377 * 378 * CONTEXT: 379 * pcpu_alloc_mutex. 380 */ 381 static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) 382 { 383 int page_start = PFN_DOWN(off); 384 int page_end = PFN_UP(off + size); 385 struct page **pages; 386 unsigned long *populated; 387 int rs, re; 388 389 /* quick path, check whether it's empty already */ 390 rs = page_start; 391 pcpu_next_unpop(chunk, &rs, &re, page_end); 392 if (rs == page_start && re == page_end) 393 return; 394 395 /* immutable chunks can't be depopulated */ 396 WARN_ON(chunk->immutable); 397 398 /* 399 * If control reaches here, there must have been at least one 400 * successful population attempt so the temp pages array must 401 * be available now. 402 */ 403 pages = pcpu_get_pages_and_bitmap(chunk, &populated, false); 404 BUG_ON(!pages); 405 406 /* unmap and free */ 407 pcpu_pre_unmap_flush(chunk, page_start, page_end); 408 409 pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) 410 pcpu_unmap_pages(chunk, pages, populated, rs, re); 411 412 /* no need to flush tlb, vmalloc will handle it lazily */ 413 414 pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) 415 pcpu_free_pages(chunk, pages, populated, rs, re); 416 417 /* commit new bitmap */ 418 bitmap_copy(chunk->populated, populated, pcpu_unit_pages); 419 } 420 421 static struct pcpu_chunk *pcpu_create_chunk(void) 422 { 423 struct pcpu_chunk *chunk; 424 struct vm_struct **vms; 425 426 chunk = pcpu_alloc_chunk(); 427 if (!chunk) 428 return NULL; 429 430 vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, 431 pcpu_nr_groups, pcpu_atom_size); 432 if (!vms) { 433 pcpu_free_chunk(chunk); 434 return NULL; 435 } 436 437 chunk->data = vms; 438 chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; 439 return chunk; 440 } 441 442 static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) 443 { 444 if (chunk && chunk->data) 445 pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); 446 pcpu_free_chunk(chunk); 447 } 448 449 static struct page *pcpu_addr_to_page(void *addr) 450 { 451 return vmalloc_to_page(addr); 452 } 453 454 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) 455 { 456 /* no extra restriction */ 457 return 0; 458 } 459