1 /* 2 * mm/percpu-vm.c - vmalloc area based chunk allocation 3 * 4 * Copyright (C) 2010 SUSE Linux Products GmbH 5 * Copyright (C) 2010 Tejun Heo <tj@kernel.org> 6 * 7 * This file is released under the GPLv2. 8 * 9 * Chunks are mapped into vmalloc areas and populated page by page. 10 * This is the default chunk allocator. 11 */ 12 13 static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, 14 unsigned int cpu, int page_idx) 15 { 16 /* must not be used on pre-mapped chunk */ 17 WARN_ON(chunk->immutable); 18 19 return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); 20 } 21 22 /** 23 * pcpu_get_pages_and_bitmap - get temp pages array and bitmap 24 * @chunk: chunk of interest 25 * @bitmapp: output parameter for bitmap 26 * @may_alloc: may allocate the array 27 * 28 * Returns pointer to array of pointers to struct page and bitmap, 29 * both of which can be indexed with pcpu_page_idx(). The returned 30 * array is cleared to zero and *@bitmapp is copied from 31 * @chunk->populated. Note that there is only one array and bitmap 32 * and access exclusion is the caller's responsibility. 33 * 34 * CONTEXT: 35 * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc. 36 * Otherwise, don't care. 37 * 38 * RETURNS: 39 * Pointer to temp pages array on success, NULL on failure. 40 */ 41 static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, 42 unsigned long **bitmapp, 43 bool may_alloc) 44 { 45 static struct page **pages; 46 static unsigned long *bitmap; 47 size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); 48 size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * 49 sizeof(unsigned long); 50 51 if (!pages || !bitmap) { 52 if (may_alloc && !pages) 53 pages = pcpu_mem_zalloc(pages_size); 54 if (may_alloc && !bitmap) 55 bitmap = pcpu_mem_zalloc(bitmap_size); 56 if (!pages || !bitmap) 57 return NULL; 58 } 59 60 bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); 61 62 *bitmapp = bitmap; 63 return pages; 64 } 65 66 /** 67 * pcpu_free_pages - free pages which were allocated for @chunk 68 * @chunk: chunk pages were allocated for 69 * @pages: array of pages to be freed, indexed by pcpu_page_idx() 70 * @populated: populated bitmap 71 * @page_start: page index of the first page to be freed 72 * @page_end: page index of the last page to be freed + 1 73 * 74 * Free pages [@page_start and @page_end) in @pages for all units. 75 * The pages were allocated for @chunk. 76 */ 77 static void pcpu_free_pages(struct pcpu_chunk *chunk, 78 struct page **pages, unsigned long *populated, 79 int page_start, int page_end) 80 { 81 unsigned int cpu; 82 int i; 83 84 for_each_possible_cpu(cpu) { 85 for (i = page_start; i < page_end; i++) { 86 struct page *page = pages[pcpu_page_idx(cpu, i)]; 87 88 if (page) 89 __free_page(page); 90 } 91 } 92 } 93 94 /** 95 * pcpu_alloc_pages - allocates pages for @chunk 96 * @chunk: target chunk 97 * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() 98 * @populated: populated bitmap 99 * @page_start: page index of the first page to be allocated 100 * @page_end: page index of the last page to be allocated + 1 101 * 102 * Allocate pages [@page_start,@page_end) into @pages for all units. 103 * The allocation is for @chunk. Percpu core doesn't care about the 104 * content of @pages and will pass it verbatim to pcpu_map_pages(). 105 */ 106 static int pcpu_alloc_pages(struct pcpu_chunk *chunk, 107 struct page **pages, unsigned long *populated, 108 int page_start, int page_end) 109 { 110 const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; 111 unsigned int cpu; 112 int i; 113 114 for_each_possible_cpu(cpu) { 115 for (i = page_start; i < page_end; i++) { 116 struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; 117 118 *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); 119 if (!*pagep) { 120 pcpu_free_pages(chunk, pages, populated, 121 page_start, page_end); 122 return -ENOMEM; 123 } 124 } 125 } 126 return 0; 127 } 128 129 /** 130 * pcpu_pre_unmap_flush - flush cache prior to unmapping 131 * @chunk: chunk the regions to be flushed belongs to 132 * @page_start: page index of the first page to be flushed 133 * @page_end: page index of the last page to be flushed + 1 134 * 135 * Pages in [@page_start,@page_end) of @chunk are about to be 136 * unmapped. Flush cache. As each flushing trial can be very 137 * expensive, issue flush on the whole region at once rather than 138 * doing it for each cpu. This could be an overkill but is more 139 * scalable. 140 */ 141 static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, 142 int page_start, int page_end) 143 { 144 flush_cache_vunmap( 145 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 146 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 147 } 148 149 static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) 150 { 151 unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); 152 } 153 154 /** 155 * pcpu_unmap_pages - unmap pages out of a pcpu_chunk 156 * @chunk: chunk of interest 157 * @pages: pages array which can be used to pass information to free 158 * @populated: populated bitmap 159 * @page_start: page index of the first page to unmap 160 * @page_end: page index of the last page to unmap + 1 161 * 162 * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. 163 * Corresponding elements in @pages were cleared by the caller and can 164 * be used to carry information to pcpu_free_pages() which will be 165 * called after all unmaps are finished. The caller should call 166 * proper pre/post flush functions. 167 */ 168 static void pcpu_unmap_pages(struct pcpu_chunk *chunk, 169 struct page **pages, unsigned long *populated, 170 int page_start, int page_end) 171 { 172 unsigned int cpu; 173 int i; 174 175 for_each_possible_cpu(cpu) { 176 for (i = page_start; i < page_end; i++) { 177 struct page *page; 178 179 page = pcpu_chunk_page(chunk, cpu, i); 180 WARN_ON(!page); 181 pages[pcpu_page_idx(cpu, i)] = page; 182 } 183 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), 184 page_end - page_start); 185 } 186 187 bitmap_clear(populated, page_start, page_end - page_start); 188 } 189 190 /** 191 * pcpu_post_unmap_tlb_flush - flush TLB after unmapping 192 * @chunk: pcpu_chunk the regions to be flushed belong to 193 * @page_start: page index of the first page to be flushed 194 * @page_end: page index of the last page to be flushed + 1 195 * 196 * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush 197 * TLB for the regions. This can be skipped if the area is to be 198 * returned to vmalloc as vmalloc will handle TLB flushing lazily. 199 * 200 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once 201 * for the whole region. 202 */ 203 static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, 204 int page_start, int page_end) 205 { 206 flush_tlb_kernel_range( 207 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 208 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 209 } 210 211 static int __pcpu_map_pages(unsigned long addr, struct page **pages, 212 int nr_pages) 213 { 214 return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, 215 PAGE_KERNEL, pages); 216 } 217 218 /** 219 * pcpu_map_pages - map pages into a pcpu_chunk 220 * @chunk: chunk of interest 221 * @pages: pages array containing pages to be mapped 222 * @populated: populated bitmap 223 * @page_start: page index of the first page to map 224 * @page_end: page index of the last page to map + 1 225 * 226 * For each cpu, map pages [@page_start,@page_end) into @chunk. The 227 * caller is responsible for calling pcpu_post_map_flush() after all 228 * mappings are complete. 229 * 230 * This function is responsible for setting corresponding bits in 231 * @chunk->populated bitmap and whatever is necessary for reverse 232 * lookup (addr -> chunk). 233 */ 234 static int pcpu_map_pages(struct pcpu_chunk *chunk, 235 struct page **pages, unsigned long *populated, 236 int page_start, int page_end) 237 { 238 unsigned int cpu, tcpu; 239 int i, err; 240 241 for_each_possible_cpu(cpu) { 242 err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), 243 &pages[pcpu_page_idx(cpu, page_start)], 244 page_end - page_start); 245 if (err < 0) 246 goto err; 247 } 248 249 /* mapping successful, link chunk and mark populated */ 250 for (i = page_start; i < page_end; i++) { 251 for_each_possible_cpu(cpu) 252 pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], 253 chunk); 254 __set_bit(i, populated); 255 } 256 257 return 0; 258 259 err: 260 for_each_possible_cpu(tcpu) { 261 if (tcpu == cpu) 262 break; 263 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), 264 page_end - page_start); 265 } 266 return err; 267 } 268 269 /** 270 * pcpu_post_map_flush - flush cache after mapping 271 * @chunk: pcpu_chunk the regions to be flushed belong to 272 * @page_start: page index of the first page to be flushed 273 * @page_end: page index of the last page to be flushed + 1 274 * 275 * Pages [@page_start,@page_end) of @chunk have been mapped. Flush 276 * cache. 277 * 278 * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once 279 * for the whole region. 280 */ 281 static void pcpu_post_map_flush(struct pcpu_chunk *chunk, 282 int page_start, int page_end) 283 { 284 flush_cache_vmap( 285 pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), 286 pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); 287 } 288 289 /** 290 * pcpu_populate_chunk - populate and map an area of a pcpu_chunk 291 * @chunk: chunk of interest 292 * @off: offset to the area to populate 293 * @size: size of the area to populate in bytes 294 * 295 * For each cpu, populate and map pages [@page_start,@page_end) into 296 * @chunk. The area is cleared on return. 297 * 298 * CONTEXT: 299 * pcpu_alloc_mutex, does GFP_KERNEL allocation. 300 */ 301 static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) 302 { 303 int page_start = PFN_DOWN(off); 304 int page_end = PFN_UP(off + size); 305 int free_end = page_start, unmap_end = page_start; 306 struct page **pages; 307 unsigned long *populated; 308 unsigned int cpu; 309 int rs, re, rc; 310 311 /* quick path, check whether all pages are already there */ 312 rs = page_start; 313 pcpu_next_pop(chunk, &rs, &re, page_end); 314 if (rs == page_start && re == page_end) 315 goto clear; 316 317 /* need to allocate and map pages, this chunk can't be immutable */ 318 WARN_ON(chunk->immutable); 319 320 pages = pcpu_get_pages_and_bitmap(chunk, &populated, true); 321 if (!pages) 322 return -ENOMEM; 323 324 /* alloc and map */ 325 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { 326 rc = pcpu_alloc_pages(chunk, pages, populated, rs, re); 327 if (rc) 328 goto err_free; 329 free_end = re; 330 } 331 332 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { 333 rc = pcpu_map_pages(chunk, pages, populated, rs, re); 334 if (rc) 335 goto err_unmap; 336 unmap_end = re; 337 } 338 pcpu_post_map_flush(chunk, page_start, page_end); 339 340 /* commit new bitmap */ 341 bitmap_copy(chunk->populated, populated, pcpu_unit_pages); 342 clear: 343 for_each_possible_cpu(cpu) 344 memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); 345 return 0; 346 347 err_unmap: 348 pcpu_pre_unmap_flush(chunk, page_start, unmap_end); 349 pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end) 350 pcpu_unmap_pages(chunk, pages, populated, rs, re); 351 pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end); 352 err_free: 353 pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end) 354 pcpu_free_pages(chunk, pages, populated, rs, re); 355 return rc; 356 } 357 358 /** 359 * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk 360 * @chunk: chunk to depopulate 361 * @off: offset to the area to depopulate 362 * @size: size of the area to depopulate in bytes 363 * 364 * For each cpu, depopulate and unmap pages [@page_start,@page_end) 365 * from @chunk. If @flush is true, vcache is flushed before unmapping 366 * and tlb after. 367 * 368 * CONTEXT: 369 * pcpu_alloc_mutex. 370 */ 371 static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) 372 { 373 int page_start = PFN_DOWN(off); 374 int page_end = PFN_UP(off + size); 375 struct page **pages; 376 unsigned long *populated; 377 int rs, re; 378 379 /* quick path, check whether it's empty already */ 380 rs = page_start; 381 pcpu_next_unpop(chunk, &rs, &re, page_end); 382 if (rs == page_start && re == page_end) 383 return; 384 385 /* immutable chunks can't be depopulated */ 386 WARN_ON(chunk->immutable); 387 388 /* 389 * If control reaches here, there must have been at least one 390 * successful population attempt so the temp pages array must 391 * be available now. 392 */ 393 pages = pcpu_get_pages_and_bitmap(chunk, &populated, false); 394 BUG_ON(!pages); 395 396 /* unmap and free */ 397 pcpu_pre_unmap_flush(chunk, page_start, page_end); 398 399 pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) 400 pcpu_unmap_pages(chunk, pages, populated, rs, re); 401 402 /* no need to flush tlb, vmalloc will handle it lazily */ 403 404 pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) 405 pcpu_free_pages(chunk, pages, populated, rs, re); 406 407 /* commit new bitmap */ 408 bitmap_copy(chunk->populated, populated, pcpu_unit_pages); 409 } 410 411 static struct pcpu_chunk *pcpu_create_chunk(void) 412 { 413 struct pcpu_chunk *chunk; 414 struct vm_struct **vms; 415 416 chunk = pcpu_alloc_chunk(); 417 if (!chunk) 418 return NULL; 419 420 vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, 421 pcpu_nr_groups, pcpu_atom_size); 422 if (!vms) { 423 pcpu_free_chunk(chunk); 424 return NULL; 425 } 426 427 chunk->data = vms; 428 chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; 429 return chunk; 430 } 431 432 static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) 433 { 434 if (chunk && chunk->data) 435 pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); 436 pcpu_free_chunk(chunk); 437 } 438 439 static struct page *pcpu_addr_to_page(void *addr) 440 { 441 return vmalloc_to_page(addr); 442 } 443 444 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) 445 { 446 /* no extra restriction */ 447 return 0; 448 } 449