1 /* 2 * Procedures for maintaining information about logical memory blocks. 3 * 4 * Peter Bergner, IBM Corp. June 2001. 5 * Copyright (C) 2001 Peter Bergner. 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/slab.h> 15 #include <linux/init.h> 16 #include <linux/bitops.h> 17 #include <linux/poison.h> 18 #include <linux/pfn.h> 19 #include <linux/debugfs.h> 20 #include <linux/kmemleak.h> 21 #include <linux/seq_file.h> 22 #include <linux/memblock.h> 23 24 #include <asm/sections.h> 25 #include <linux/io.h> 26 27 #include "internal.h" 28 29 #define INIT_MEMBLOCK_REGIONS 128 30 #define INIT_PHYSMEM_REGIONS 4 31 32 #ifndef INIT_MEMBLOCK_RESERVED_REGIONS 33 # define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS 34 #endif 35 36 /** 37 * DOC: memblock overview 38 * 39 * Memblock is a method of managing memory regions during the early 40 * boot period when the usual kernel memory allocators are not up and 41 * running. 42 * 43 * Memblock views the system memory as collections of contiguous 44 * regions. There are several types of these collections: 45 * 46 * * ``memory`` - describes the physical memory available to the 47 * kernel; this may differ from the actual physical memory installed 48 * in the system, for instance when the memory is restricted with 49 * ``mem=`` command line parameter 50 * * ``reserved`` - describes the regions that were allocated 51 * * ``physmap`` - describes the actual physical memory regardless of 52 * the possible restrictions; the ``physmap`` type is only available 53 * on some architectures. 54 * 55 * Each region is represented by :c:type:`struct memblock_region` that 56 * defines the region extents, its attributes and NUMA node id on NUMA 57 * systems. Every memory type is described by the :c:type:`struct 58 * memblock_type` which contains an array of memory regions along with 59 * the allocator metadata. The memory types are nicely wrapped with 60 * :c:type:`struct memblock`. This structure is statically initialzed 61 * at build time. The region arrays for the "memory" and "reserved" 62 * types are initially sized to %INIT_MEMBLOCK_REGIONS and for the 63 * "physmap" type to %INIT_PHYSMEM_REGIONS. 64 * The :c:func:`memblock_allow_resize` enables automatic resizing of 65 * the region arrays during addition of new regions. This feature 66 * should be used with care so that memory allocated for the region 67 * array will not overlap with areas that should be reserved, for 68 * example initrd. 69 * 70 * The early architecture setup should tell memblock what the physical 71 * memory layout is by using :c:func:`memblock_add` or 72 * :c:func:`memblock_add_node` functions. The first function does not 73 * assign the region to a NUMA node and it is appropriate for UMA 74 * systems. Yet, it is possible to use it on NUMA systems as well and 75 * assign the region to a NUMA node later in the setup process using 76 * :c:func:`memblock_set_node`. The :c:func:`memblock_add_node` 77 * performs such an assignment directly. 78 * 79 * Once memblock is setup the memory can be allocated using either 80 * memblock or bootmem APIs. 81 * 82 * As the system boot progresses, the architecture specific 83 * :c:func:`mem_init` function frees all the memory to the buddy page 84 * allocator. 85 * 86 * If an architecure enables %CONFIG_ARCH_DISCARD_MEMBLOCK, the 87 * memblock data structures will be discarded after the system 88 * initialization compltes. 89 */ 90 91 #ifndef CONFIG_NEED_MULTIPLE_NODES 92 struct pglist_data __refdata contig_page_data; 93 EXPORT_SYMBOL(contig_page_data); 94 #endif 95 96 unsigned long max_low_pfn; 97 unsigned long min_low_pfn; 98 unsigned long max_pfn; 99 unsigned long long max_possible_pfn; 100 101 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; 102 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; 103 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 104 static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock; 105 #endif 106 107 struct memblock memblock __initdata_memblock = { 108 .memory.regions = memblock_memory_init_regions, 109 .memory.cnt = 1, /* empty dummy entry */ 110 .memory.max = INIT_MEMBLOCK_REGIONS, 111 .memory.name = "memory", 112 113 .reserved.regions = memblock_reserved_init_regions, 114 .reserved.cnt = 1, /* empty dummy entry */ 115 .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, 116 .reserved.name = "reserved", 117 118 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 119 .physmem.regions = memblock_physmem_init_regions, 120 .physmem.cnt = 1, /* empty dummy entry */ 121 .physmem.max = INIT_PHYSMEM_REGIONS, 122 .physmem.name = "physmem", 123 #endif 124 125 .bottom_up = false, 126 .current_limit = MEMBLOCK_ALLOC_ANYWHERE, 127 }; 128 129 int memblock_debug __initdata_memblock; 130 static bool system_has_some_mirror __initdata_memblock = false; 131 static int memblock_can_resize __initdata_memblock; 132 static int memblock_memory_in_slab __initdata_memblock = 0; 133 static int memblock_reserved_in_slab __initdata_memblock = 0; 134 135 enum memblock_flags __init_memblock choose_memblock_flags(void) 136 { 137 return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE; 138 } 139 140 /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ 141 static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) 142 { 143 return *size = min(*size, PHYS_ADDR_MAX - base); 144 } 145 146 /* 147 * Address comparison utilities 148 */ 149 static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, 150 phys_addr_t base2, phys_addr_t size2) 151 { 152 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); 153 } 154 155 bool __init_memblock memblock_overlaps_region(struct memblock_type *type, 156 phys_addr_t base, phys_addr_t size) 157 { 158 unsigned long i; 159 160 for (i = 0; i < type->cnt; i++) 161 if (memblock_addrs_overlap(base, size, type->regions[i].base, 162 type->regions[i].size)) 163 break; 164 return i < type->cnt; 165 } 166 167 /** 168 * __memblock_find_range_bottom_up - find free area utility in bottom-up 169 * @start: start of candidate range 170 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 171 * %MEMBLOCK_ALLOC_ACCESSIBLE 172 * @size: size of free area to find 173 * @align: alignment of free area to find 174 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 175 * @flags: pick from blocks based on memory attributes 176 * 177 * Utility called from memblock_find_in_range_node(), find free area bottom-up. 178 * 179 * Return: 180 * Found address on success, 0 on failure. 181 */ 182 static phys_addr_t __init_memblock 183 __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end, 184 phys_addr_t size, phys_addr_t align, int nid, 185 enum memblock_flags flags) 186 { 187 phys_addr_t this_start, this_end, cand; 188 u64 i; 189 190 for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) { 191 this_start = clamp(this_start, start, end); 192 this_end = clamp(this_end, start, end); 193 194 cand = round_up(this_start, align); 195 if (cand < this_end && this_end - cand >= size) 196 return cand; 197 } 198 199 return 0; 200 } 201 202 /** 203 * __memblock_find_range_top_down - find free area utility, in top-down 204 * @start: start of candidate range 205 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 206 * %MEMBLOCK_ALLOC_ACCESSIBLE 207 * @size: size of free area to find 208 * @align: alignment of free area to find 209 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 210 * @flags: pick from blocks based on memory attributes 211 * 212 * Utility called from memblock_find_in_range_node(), find free area top-down. 213 * 214 * Return: 215 * Found address on success, 0 on failure. 216 */ 217 static phys_addr_t __init_memblock 218 __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, 219 phys_addr_t size, phys_addr_t align, int nid, 220 enum memblock_flags flags) 221 { 222 phys_addr_t this_start, this_end, cand; 223 u64 i; 224 225 for_each_free_mem_range_reverse(i, nid, flags, &this_start, &this_end, 226 NULL) { 227 this_start = clamp(this_start, start, end); 228 this_end = clamp(this_end, start, end); 229 230 if (this_end < size) 231 continue; 232 233 cand = round_down(this_end - size, align); 234 if (cand >= this_start) 235 return cand; 236 } 237 238 return 0; 239 } 240 241 /** 242 * memblock_find_in_range_node - find free area in given range and node 243 * @size: size of free area to find 244 * @align: alignment of free area to find 245 * @start: start of candidate range 246 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 247 * %MEMBLOCK_ALLOC_ACCESSIBLE 248 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 249 * @flags: pick from blocks based on memory attributes 250 * 251 * Find @size free area aligned to @align in the specified range and node. 252 * 253 * When allocation direction is bottom-up, the @start should be greater 254 * than the end of the kernel image. Otherwise, it will be trimmed. The 255 * reason is that we want the bottom-up allocation just near the kernel 256 * image so it is highly likely that the allocated memory and the kernel 257 * will reside in the same node. 258 * 259 * If bottom-up allocation failed, will try to allocate memory top-down. 260 * 261 * Return: 262 * Found address on success, 0 on failure. 263 */ 264 phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, 265 phys_addr_t align, phys_addr_t start, 266 phys_addr_t end, int nid, 267 enum memblock_flags flags) 268 { 269 phys_addr_t kernel_end, ret; 270 271 /* pump up @end */ 272 if (end == MEMBLOCK_ALLOC_ACCESSIBLE || 273 end == MEMBLOCK_ALLOC_KASAN) 274 end = memblock.current_limit; 275 276 /* avoid allocating the first page */ 277 start = max_t(phys_addr_t, start, PAGE_SIZE); 278 end = max(start, end); 279 kernel_end = __pa_symbol(_end); 280 281 /* 282 * try bottom-up allocation only when bottom-up mode 283 * is set and @end is above the kernel image. 284 */ 285 if (memblock_bottom_up() && end > kernel_end) { 286 phys_addr_t bottom_up_start; 287 288 /* make sure we will allocate above the kernel */ 289 bottom_up_start = max(start, kernel_end); 290 291 /* ok, try bottom-up allocation first */ 292 ret = __memblock_find_range_bottom_up(bottom_up_start, end, 293 size, align, nid, flags); 294 if (ret) 295 return ret; 296 297 /* 298 * we always limit bottom-up allocation above the kernel, 299 * but top-down allocation doesn't have the limit, so 300 * retrying top-down allocation may succeed when bottom-up 301 * allocation failed. 302 * 303 * bottom-up allocation is expected to be fail very rarely, 304 * so we use WARN_ONCE() here to see the stack trace if 305 * fail happens. 306 */ 307 WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE), 308 "memblock: bottom-up allocation failed, memory hotremove may be affected\n"); 309 } 310 311 return __memblock_find_range_top_down(start, end, size, align, nid, 312 flags); 313 } 314 315 /** 316 * memblock_find_in_range - find free area in given range 317 * @start: start of candidate range 318 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or 319 * %MEMBLOCK_ALLOC_ACCESSIBLE 320 * @size: size of free area to find 321 * @align: alignment of free area to find 322 * 323 * Find @size free area aligned to @align in the specified range. 324 * 325 * Return: 326 * Found address on success, 0 on failure. 327 */ 328 phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, 329 phys_addr_t end, phys_addr_t size, 330 phys_addr_t align) 331 { 332 phys_addr_t ret; 333 enum memblock_flags flags = choose_memblock_flags(); 334 335 again: 336 ret = memblock_find_in_range_node(size, align, start, end, 337 NUMA_NO_NODE, flags); 338 339 if (!ret && (flags & MEMBLOCK_MIRROR)) { 340 pr_warn("Could not allocate %pap bytes of mirrored memory\n", 341 &size); 342 flags &= ~MEMBLOCK_MIRROR; 343 goto again; 344 } 345 346 return ret; 347 } 348 349 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) 350 { 351 type->total_size -= type->regions[r].size; 352 memmove(&type->regions[r], &type->regions[r + 1], 353 (type->cnt - (r + 1)) * sizeof(type->regions[r])); 354 type->cnt--; 355 356 /* Special case for empty arrays */ 357 if (type->cnt == 0) { 358 WARN_ON(type->total_size != 0); 359 type->cnt = 1; 360 type->regions[0].base = 0; 361 type->regions[0].size = 0; 362 type->regions[0].flags = 0; 363 memblock_set_region_node(&type->regions[0], MAX_NUMNODES); 364 } 365 } 366 367 #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK 368 /** 369 * memblock_discard - discard memory and reserved arrays if they were allocated 370 */ 371 void __init memblock_discard(void) 372 { 373 phys_addr_t addr, size; 374 375 if (memblock.reserved.regions != memblock_reserved_init_regions) { 376 addr = __pa(memblock.reserved.regions); 377 size = PAGE_ALIGN(sizeof(struct memblock_region) * 378 memblock.reserved.max); 379 __memblock_free_late(addr, size); 380 } 381 382 if (memblock.memory.regions != memblock_memory_init_regions) { 383 addr = __pa(memblock.memory.regions); 384 size = PAGE_ALIGN(sizeof(struct memblock_region) * 385 memblock.memory.max); 386 __memblock_free_late(addr, size); 387 } 388 } 389 #endif 390 391 /** 392 * memblock_double_array - double the size of the memblock regions array 393 * @type: memblock type of the regions array being doubled 394 * @new_area_start: starting address of memory range to avoid overlap with 395 * @new_area_size: size of memory range to avoid overlap with 396 * 397 * Double the size of the @type regions array. If memblock is being used to 398 * allocate memory for a new reserved regions array and there is a previously 399 * allocated memory range [@new_area_start, @new_area_start + @new_area_size] 400 * waiting to be reserved, ensure the memory used by the new array does 401 * not overlap. 402 * 403 * Return: 404 * 0 on success, -1 on failure. 405 */ 406 static int __init_memblock memblock_double_array(struct memblock_type *type, 407 phys_addr_t new_area_start, 408 phys_addr_t new_area_size) 409 { 410 struct memblock_region *new_array, *old_array; 411 phys_addr_t old_alloc_size, new_alloc_size; 412 phys_addr_t old_size, new_size, addr, new_end; 413 int use_slab = slab_is_available(); 414 int *in_slab; 415 416 /* We don't allow resizing until we know about the reserved regions 417 * of memory that aren't suitable for allocation 418 */ 419 if (!memblock_can_resize) 420 return -1; 421 422 /* Calculate new doubled size */ 423 old_size = type->max * sizeof(struct memblock_region); 424 new_size = old_size << 1; 425 /* 426 * We need to allocated new one align to PAGE_SIZE, 427 * so we can free them completely later. 428 */ 429 old_alloc_size = PAGE_ALIGN(old_size); 430 new_alloc_size = PAGE_ALIGN(new_size); 431 432 /* Retrieve the slab flag */ 433 if (type == &memblock.memory) 434 in_slab = &memblock_memory_in_slab; 435 else 436 in_slab = &memblock_reserved_in_slab; 437 438 /* Try to find some space for it. 439 * 440 * WARNING: We assume that either slab_is_available() and we use it or 441 * we use MEMBLOCK for allocations. That means that this is unsafe to 442 * use when bootmem is currently active (unless bootmem itself is 443 * implemented on top of MEMBLOCK which isn't the case yet) 444 * 445 * This should however not be an issue for now, as we currently only 446 * call into MEMBLOCK while it's still active, or much later when slab 447 * is active for memory hotplug operations 448 */ 449 if (use_slab) { 450 new_array = kmalloc(new_size, GFP_KERNEL); 451 addr = new_array ? __pa(new_array) : 0; 452 } else { 453 /* only exclude range when trying to double reserved.regions */ 454 if (type != &memblock.reserved) 455 new_area_start = new_area_size = 0; 456 457 addr = memblock_find_in_range(new_area_start + new_area_size, 458 memblock.current_limit, 459 new_alloc_size, PAGE_SIZE); 460 if (!addr && new_area_size) 461 addr = memblock_find_in_range(0, 462 min(new_area_start, memblock.current_limit), 463 new_alloc_size, PAGE_SIZE); 464 465 new_array = addr ? __va(addr) : NULL; 466 } 467 if (!addr) { 468 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", 469 type->name, type->max, type->max * 2); 470 return -1; 471 } 472 473 new_end = addr + new_size - 1; 474 memblock_dbg("memblock: %s is doubled to %ld at [%pa-%pa]", 475 type->name, type->max * 2, &addr, &new_end); 476 477 /* 478 * Found space, we now need to move the array over before we add the 479 * reserved region since it may be our reserved array itself that is 480 * full. 481 */ 482 memcpy(new_array, type->regions, old_size); 483 memset(new_array + type->max, 0, old_size); 484 old_array = type->regions; 485 type->regions = new_array; 486 type->max <<= 1; 487 488 /* Free old array. We needn't free it if the array is the static one */ 489 if (*in_slab) 490 kfree(old_array); 491 else if (old_array != memblock_memory_init_regions && 492 old_array != memblock_reserved_init_regions) 493 memblock_free(__pa(old_array), old_alloc_size); 494 495 /* 496 * Reserve the new array if that comes from the memblock. Otherwise, we 497 * needn't do it 498 */ 499 if (!use_slab) 500 BUG_ON(memblock_reserve(addr, new_alloc_size)); 501 502 /* Update slab flag */ 503 *in_slab = use_slab; 504 505 return 0; 506 } 507 508 /** 509 * memblock_merge_regions - merge neighboring compatible regions 510 * @type: memblock type to scan 511 * 512 * Scan @type and merge neighboring compatible regions. 513 */ 514 static void __init_memblock memblock_merge_regions(struct memblock_type *type) 515 { 516 int i = 0; 517 518 /* cnt never goes below 1 */ 519 while (i < type->cnt - 1) { 520 struct memblock_region *this = &type->regions[i]; 521 struct memblock_region *next = &type->regions[i + 1]; 522 523 if (this->base + this->size != next->base || 524 memblock_get_region_node(this) != 525 memblock_get_region_node(next) || 526 this->flags != next->flags) { 527 BUG_ON(this->base + this->size > next->base); 528 i++; 529 continue; 530 } 531 532 this->size += next->size; 533 /* move forward from next + 1, index of which is i + 2 */ 534 memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); 535 type->cnt--; 536 } 537 } 538 539 /** 540 * memblock_insert_region - insert new memblock region 541 * @type: memblock type to insert into 542 * @idx: index for the insertion point 543 * @base: base address of the new region 544 * @size: size of the new region 545 * @nid: node id of the new region 546 * @flags: flags of the new region 547 * 548 * Insert new memblock region [@base, @base + @size) into @type at @idx. 549 * @type must already have extra room to accommodate the new region. 550 */ 551 static void __init_memblock memblock_insert_region(struct memblock_type *type, 552 int idx, phys_addr_t base, 553 phys_addr_t size, 554 int nid, 555 enum memblock_flags flags) 556 { 557 struct memblock_region *rgn = &type->regions[idx]; 558 559 BUG_ON(type->cnt >= type->max); 560 memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); 561 rgn->base = base; 562 rgn->size = size; 563 rgn->flags = flags; 564 memblock_set_region_node(rgn, nid); 565 type->cnt++; 566 type->total_size += size; 567 } 568 569 /** 570 * memblock_add_range - add new memblock region 571 * @type: memblock type to add new region into 572 * @base: base address of the new region 573 * @size: size of the new region 574 * @nid: nid of the new region 575 * @flags: flags of the new region 576 * 577 * Add new memblock region [@base, @base + @size) into @type. The new region 578 * is allowed to overlap with existing ones - overlaps don't affect already 579 * existing regions. @type is guaranteed to be minimal (all neighbouring 580 * compatible regions are merged) after the addition. 581 * 582 * Return: 583 * 0 on success, -errno on failure. 584 */ 585 int __init_memblock memblock_add_range(struct memblock_type *type, 586 phys_addr_t base, phys_addr_t size, 587 int nid, enum memblock_flags flags) 588 { 589 bool insert = false; 590 phys_addr_t obase = base; 591 phys_addr_t end = base + memblock_cap_size(base, &size); 592 int idx, nr_new; 593 struct memblock_region *rgn; 594 595 if (!size) 596 return 0; 597 598 /* special case for empty array */ 599 if (type->regions[0].size == 0) { 600 WARN_ON(type->cnt != 1 || type->total_size); 601 type->regions[0].base = base; 602 type->regions[0].size = size; 603 type->regions[0].flags = flags; 604 memblock_set_region_node(&type->regions[0], nid); 605 type->total_size = size; 606 return 0; 607 } 608 repeat: 609 /* 610 * The following is executed twice. Once with %false @insert and 611 * then with %true. The first counts the number of regions needed 612 * to accommodate the new area. The second actually inserts them. 613 */ 614 base = obase; 615 nr_new = 0; 616 617 for_each_memblock_type(idx, type, rgn) { 618 phys_addr_t rbase = rgn->base; 619 phys_addr_t rend = rbase + rgn->size; 620 621 if (rbase >= end) 622 break; 623 if (rend <= base) 624 continue; 625 /* 626 * @rgn overlaps. If it separates the lower part of new 627 * area, insert that portion. 628 */ 629 if (rbase > base) { 630 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 631 WARN_ON(nid != memblock_get_region_node(rgn)); 632 #endif 633 WARN_ON(flags != rgn->flags); 634 nr_new++; 635 if (insert) 636 memblock_insert_region(type, idx++, base, 637 rbase - base, nid, 638 flags); 639 } 640 /* area below @rend is dealt with, forget about it */ 641 base = min(rend, end); 642 } 643 644 /* insert the remaining portion */ 645 if (base < end) { 646 nr_new++; 647 if (insert) 648 memblock_insert_region(type, idx, base, end - base, 649 nid, flags); 650 } 651 652 if (!nr_new) 653 return 0; 654 655 /* 656 * If this was the first round, resize array and repeat for actual 657 * insertions; otherwise, merge and return. 658 */ 659 if (!insert) { 660 while (type->cnt + nr_new > type->max) 661 if (memblock_double_array(type, obase, size) < 0) 662 return -ENOMEM; 663 insert = true; 664 goto repeat; 665 } else { 666 memblock_merge_regions(type); 667 return 0; 668 } 669 } 670 671 /** 672 * memblock_add_node - add new memblock region within a NUMA node 673 * @base: base address of the new region 674 * @size: size of the new region 675 * @nid: nid of the new region 676 * 677 * Add new memblock region [@base, @base + @size) to the "memory" 678 * type. See memblock_add_range() description for mode details 679 * 680 * Return: 681 * 0 on success, -errno on failure. 682 */ 683 int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, 684 int nid) 685 { 686 return memblock_add_range(&memblock.memory, base, size, nid, 0); 687 } 688 689 /** 690 * memblock_add - add new memblock region 691 * @base: base address of the new region 692 * @size: size of the new region 693 * 694 * Add new memblock region [@base, @base + @size) to the "memory" 695 * type. See memblock_add_range() description for mode details 696 * 697 * Return: 698 * 0 on success, -errno on failure. 699 */ 700 int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) 701 { 702 phys_addr_t end = base + size - 1; 703 704 memblock_dbg("memblock_add: [%pa-%pa] %pF\n", 705 &base, &end, (void *)_RET_IP_); 706 707 return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); 708 } 709 710 /** 711 * memblock_isolate_range - isolate given range into disjoint memblocks 712 * @type: memblock type to isolate range for 713 * @base: base of range to isolate 714 * @size: size of range to isolate 715 * @start_rgn: out parameter for the start of isolated region 716 * @end_rgn: out parameter for the end of isolated region 717 * 718 * Walk @type and ensure that regions don't cross the boundaries defined by 719 * [@base, @base + @size). Crossing regions are split at the boundaries, 720 * which may create at most two more regions. The index of the first 721 * region inside the range is returned in *@start_rgn and end in *@end_rgn. 722 * 723 * Return: 724 * 0 on success, -errno on failure. 725 */ 726 static int __init_memblock memblock_isolate_range(struct memblock_type *type, 727 phys_addr_t base, phys_addr_t size, 728 int *start_rgn, int *end_rgn) 729 { 730 phys_addr_t end = base + memblock_cap_size(base, &size); 731 int idx; 732 struct memblock_region *rgn; 733 734 *start_rgn = *end_rgn = 0; 735 736 if (!size) 737 return 0; 738 739 /* we'll create at most two more regions */ 740 while (type->cnt + 2 > type->max) 741 if (memblock_double_array(type, base, size) < 0) 742 return -ENOMEM; 743 744 for_each_memblock_type(idx, type, rgn) { 745 phys_addr_t rbase = rgn->base; 746 phys_addr_t rend = rbase + rgn->size; 747 748 if (rbase >= end) 749 break; 750 if (rend <= base) 751 continue; 752 753 if (rbase < base) { 754 /* 755 * @rgn intersects from below. Split and continue 756 * to process the next region - the new top half. 757 */ 758 rgn->base = base; 759 rgn->size -= base - rbase; 760 type->total_size -= base - rbase; 761 memblock_insert_region(type, idx, rbase, base - rbase, 762 memblock_get_region_node(rgn), 763 rgn->flags); 764 } else if (rend > end) { 765 /* 766 * @rgn intersects from above. Split and redo the 767 * current region - the new bottom half. 768 */ 769 rgn->base = end; 770 rgn->size -= end - rbase; 771 type->total_size -= end - rbase; 772 memblock_insert_region(type, idx--, rbase, end - rbase, 773 memblock_get_region_node(rgn), 774 rgn->flags); 775 } else { 776 /* @rgn is fully contained, record it */ 777 if (!*end_rgn) 778 *start_rgn = idx; 779 *end_rgn = idx + 1; 780 } 781 } 782 783 return 0; 784 } 785 786 static int __init_memblock memblock_remove_range(struct memblock_type *type, 787 phys_addr_t base, phys_addr_t size) 788 { 789 int start_rgn, end_rgn; 790 int i, ret; 791 792 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 793 if (ret) 794 return ret; 795 796 for (i = end_rgn - 1; i >= start_rgn; i--) 797 memblock_remove_region(type, i); 798 return 0; 799 } 800 801 int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) 802 { 803 phys_addr_t end = base + size - 1; 804 805 memblock_dbg("memblock_remove: [%pa-%pa] %pS\n", 806 &base, &end, (void *)_RET_IP_); 807 808 return memblock_remove_range(&memblock.memory, base, size); 809 } 810 811 /** 812 * memblock_free - free boot memory block 813 * @base: phys starting address of the boot memory block 814 * @size: size of the boot memory block in bytes 815 * 816 * Free boot memory block previously allocated by memblock_alloc_xx() API. 817 * The freeing memory will not be released to the buddy allocator. 818 */ 819 int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) 820 { 821 phys_addr_t end = base + size - 1; 822 823 memblock_dbg(" memblock_free: [%pa-%pa] %pF\n", 824 &base, &end, (void *)_RET_IP_); 825 826 kmemleak_free_part_phys(base, size); 827 return memblock_remove_range(&memblock.reserved, base, size); 828 } 829 830 int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) 831 { 832 phys_addr_t end = base + size - 1; 833 834 memblock_dbg("memblock_reserve: [%pa-%pa] %pF\n", 835 &base, &end, (void *)_RET_IP_); 836 837 return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0); 838 } 839 840 /** 841 * memblock_setclr_flag - set or clear flag for a memory region 842 * @base: base address of the region 843 * @size: size of the region 844 * @set: set or clear the flag 845 * @flag: the flag to udpate 846 * 847 * This function isolates region [@base, @base + @size), and sets/clears flag 848 * 849 * Return: 0 on success, -errno on failure. 850 */ 851 static int __init_memblock memblock_setclr_flag(phys_addr_t base, 852 phys_addr_t size, int set, int flag) 853 { 854 struct memblock_type *type = &memblock.memory; 855 int i, ret, start_rgn, end_rgn; 856 857 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 858 if (ret) 859 return ret; 860 861 for (i = start_rgn; i < end_rgn; i++) 862 if (set) 863 memblock_set_region_flags(&type->regions[i], flag); 864 else 865 memblock_clear_region_flags(&type->regions[i], flag); 866 867 memblock_merge_regions(type); 868 return 0; 869 } 870 871 /** 872 * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG. 873 * @base: the base phys addr of the region 874 * @size: the size of the region 875 * 876 * Return: 0 on success, -errno on failure. 877 */ 878 int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size) 879 { 880 return memblock_setclr_flag(base, size, 1, MEMBLOCK_HOTPLUG); 881 } 882 883 /** 884 * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region. 885 * @base: the base phys addr of the region 886 * @size: the size of the region 887 * 888 * Return: 0 on success, -errno on failure. 889 */ 890 int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size) 891 { 892 return memblock_setclr_flag(base, size, 0, MEMBLOCK_HOTPLUG); 893 } 894 895 /** 896 * memblock_mark_mirror - Mark mirrored memory with flag MEMBLOCK_MIRROR. 897 * @base: the base phys addr of the region 898 * @size: the size of the region 899 * 900 * Return: 0 on success, -errno on failure. 901 */ 902 int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) 903 { 904 system_has_some_mirror = true; 905 906 return memblock_setclr_flag(base, size, 1, MEMBLOCK_MIRROR); 907 } 908 909 /** 910 * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP. 911 * @base: the base phys addr of the region 912 * @size: the size of the region 913 * 914 * Return: 0 on success, -errno on failure. 915 */ 916 int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) 917 { 918 return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP); 919 } 920 921 /** 922 * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region. 923 * @base: the base phys addr of the region 924 * @size: the size of the region 925 * 926 * Return: 0 on success, -errno on failure. 927 */ 928 int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) 929 { 930 return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP); 931 } 932 933 /** 934 * __next_reserved_mem_region - next function for for_each_reserved_region() 935 * @idx: pointer to u64 loop variable 936 * @out_start: ptr to phys_addr_t for start address of the region, can be %NULL 937 * @out_end: ptr to phys_addr_t for end address of the region, can be %NULL 938 * 939 * Iterate over all reserved memory regions. 940 */ 941 void __init_memblock __next_reserved_mem_region(u64 *idx, 942 phys_addr_t *out_start, 943 phys_addr_t *out_end) 944 { 945 struct memblock_type *type = &memblock.reserved; 946 947 if (*idx < type->cnt) { 948 struct memblock_region *r = &type->regions[*idx]; 949 phys_addr_t base = r->base; 950 phys_addr_t size = r->size; 951 952 if (out_start) 953 *out_start = base; 954 if (out_end) 955 *out_end = base + size - 1; 956 957 *idx += 1; 958 return; 959 } 960 961 /* signal end of iteration */ 962 *idx = ULLONG_MAX; 963 } 964 965 /** 966 * __next__mem_range - next function for for_each_free_mem_range() etc. 967 * @idx: pointer to u64 loop variable 968 * @nid: node selector, %NUMA_NO_NODE for all nodes 969 * @flags: pick from blocks based on memory attributes 970 * @type_a: pointer to memblock_type from where the range is taken 971 * @type_b: pointer to memblock_type which excludes memory from being taken 972 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL 973 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL 974 * @out_nid: ptr to int for nid of the range, can be %NULL 975 * 976 * Find the first area from *@idx which matches @nid, fill the out 977 * parameters, and update *@idx for the next iteration. The lower 32bit of 978 * *@idx contains index into type_a and the upper 32bit indexes the 979 * areas before each region in type_b. For example, if type_b regions 980 * look like the following, 981 * 982 * 0:[0-16), 1:[32-48), 2:[128-130) 983 * 984 * The upper 32bit indexes the following regions. 985 * 986 * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX) 987 * 988 * As both region arrays are sorted, the function advances the two indices 989 * in lockstep and returns each intersection. 990 */ 991 void __init_memblock __next_mem_range(u64 *idx, int nid, 992 enum memblock_flags flags, 993 struct memblock_type *type_a, 994 struct memblock_type *type_b, 995 phys_addr_t *out_start, 996 phys_addr_t *out_end, int *out_nid) 997 { 998 int idx_a = *idx & 0xffffffff; 999 int idx_b = *idx >> 32; 1000 1001 if (WARN_ONCE(nid == MAX_NUMNODES, 1002 "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) 1003 nid = NUMA_NO_NODE; 1004 1005 for (; idx_a < type_a->cnt; idx_a++) { 1006 struct memblock_region *m = &type_a->regions[idx_a]; 1007 1008 phys_addr_t m_start = m->base; 1009 phys_addr_t m_end = m->base + m->size; 1010 int m_nid = memblock_get_region_node(m); 1011 1012 /* only memory regions are associated with nodes, check it */ 1013 if (nid != NUMA_NO_NODE && nid != m_nid) 1014 continue; 1015 1016 /* skip hotpluggable memory regions if needed */ 1017 if (movable_node_is_enabled() && memblock_is_hotpluggable(m)) 1018 continue; 1019 1020 /* if we want mirror memory skip non-mirror memory regions */ 1021 if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m)) 1022 continue; 1023 1024 /* skip nomap memory unless we were asked for it explicitly */ 1025 if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) 1026 continue; 1027 1028 if (!type_b) { 1029 if (out_start) 1030 *out_start = m_start; 1031 if (out_end) 1032 *out_end = m_end; 1033 if (out_nid) 1034 *out_nid = m_nid; 1035 idx_a++; 1036 *idx = (u32)idx_a | (u64)idx_b << 32; 1037 return; 1038 } 1039 1040 /* scan areas before each reservation */ 1041 for (; idx_b < type_b->cnt + 1; idx_b++) { 1042 struct memblock_region *r; 1043 phys_addr_t r_start; 1044 phys_addr_t r_end; 1045 1046 r = &type_b->regions[idx_b]; 1047 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1048 r_end = idx_b < type_b->cnt ? 1049 r->base : PHYS_ADDR_MAX; 1050 1051 /* 1052 * if idx_b advanced past idx_a, 1053 * break out to advance idx_a 1054 */ 1055 if (r_start >= m_end) 1056 break; 1057 /* if the two regions intersect, we're done */ 1058 if (m_start < r_end) { 1059 if (out_start) 1060 *out_start = 1061 max(m_start, r_start); 1062 if (out_end) 1063 *out_end = min(m_end, r_end); 1064 if (out_nid) 1065 *out_nid = m_nid; 1066 /* 1067 * The region which ends first is 1068 * advanced for the next iteration. 1069 */ 1070 if (m_end <= r_end) 1071 idx_a++; 1072 else 1073 idx_b++; 1074 *idx = (u32)idx_a | (u64)idx_b << 32; 1075 return; 1076 } 1077 } 1078 } 1079 1080 /* signal end of iteration */ 1081 *idx = ULLONG_MAX; 1082 } 1083 1084 /** 1085 * __next_mem_range_rev - generic next function for for_each_*_range_rev() 1086 * 1087 * @idx: pointer to u64 loop variable 1088 * @nid: node selector, %NUMA_NO_NODE for all nodes 1089 * @flags: pick from blocks based on memory attributes 1090 * @type_a: pointer to memblock_type from where the range is taken 1091 * @type_b: pointer to memblock_type which excludes memory from being taken 1092 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL 1093 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL 1094 * @out_nid: ptr to int for nid of the range, can be %NULL 1095 * 1096 * Finds the next range from type_a which is not marked as unsuitable 1097 * in type_b. 1098 * 1099 * Reverse of __next_mem_range(). 1100 */ 1101 void __init_memblock __next_mem_range_rev(u64 *idx, int nid, 1102 enum memblock_flags flags, 1103 struct memblock_type *type_a, 1104 struct memblock_type *type_b, 1105 phys_addr_t *out_start, 1106 phys_addr_t *out_end, int *out_nid) 1107 { 1108 int idx_a = *idx & 0xffffffff; 1109 int idx_b = *idx >> 32; 1110 1111 if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) 1112 nid = NUMA_NO_NODE; 1113 1114 if (*idx == (u64)ULLONG_MAX) { 1115 idx_a = type_a->cnt - 1; 1116 if (type_b != NULL) 1117 idx_b = type_b->cnt; 1118 else 1119 idx_b = 0; 1120 } 1121 1122 for (; idx_a >= 0; idx_a--) { 1123 struct memblock_region *m = &type_a->regions[idx_a]; 1124 1125 phys_addr_t m_start = m->base; 1126 phys_addr_t m_end = m->base + m->size; 1127 int m_nid = memblock_get_region_node(m); 1128 1129 /* only memory regions are associated with nodes, check it */ 1130 if (nid != NUMA_NO_NODE && nid != m_nid) 1131 continue; 1132 1133 /* skip hotpluggable memory regions if needed */ 1134 if (movable_node_is_enabled() && memblock_is_hotpluggable(m)) 1135 continue; 1136 1137 /* if we want mirror memory skip non-mirror memory regions */ 1138 if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m)) 1139 continue; 1140 1141 /* skip nomap memory unless we were asked for it explicitly */ 1142 if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) 1143 continue; 1144 1145 if (!type_b) { 1146 if (out_start) 1147 *out_start = m_start; 1148 if (out_end) 1149 *out_end = m_end; 1150 if (out_nid) 1151 *out_nid = m_nid; 1152 idx_a--; 1153 *idx = (u32)idx_a | (u64)idx_b << 32; 1154 return; 1155 } 1156 1157 /* scan areas before each reservation */ 1158 for (; idx_b >= 0; idx_b--) { 1159 struct memblock_region *r; 1160 phys_addr_t r_start; 1161 phys_addr_t r_end; 1162 1163 r = &type_b->regions[idx_b]; 1164 r_start = idx_b ? r[-1].base + r[-1].size : 0; 1165 r_end = idx_b < type_b->cnt ? 1166 r->base : PHYS_ADDR_MAX; 1167 /* 1168 * if idx_b advanced past idx_a, 1169 * break out to advance idx_a 1170 */ 1171 1172 if (r_end <= m_start) 1173 break; 1174 /* if the two regions intersect, we're done */ 1175 if (m_end > r_start) { 1176 if (out_start) 1177 *out_start = max(m_start, r_start); 1178 if (out_end) 1179 *out_end = min(m_end, r_end); 1180 if (out_nid) 1181 *out_nid = m_nid; 1182 if (m_start >= r_start) 1183 idx_a--; 1184 else 1185 idx_b--; 1186 *idx = (u32)idx_a | (u64)idx_b << 32; 1187 return; 1188 } 1189 } 1190 } 1191 /* signal end of iteration */ 1192 *idx = ULLONG_MAX; 1193 } 1194 1195 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 1196 /* 1197 * Common iterator interface used to define for_each_mem_pfn_range(). 1198 */ 1199 void __init_memblock __next_mem_pfn_range(int *idx, int nid, 1200 unsigned long *out_start_pfn, 1201 unsigned long *out_end_pfn, int *out_nid) 1202 { 1203 struct memblock_type *type = &memblock.memory; 1204 struct memblock_region *r; 1205 1206 while (++*idx < type->cnt) { 1207 r = &type->regions[*idx]; 1208 1209 if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) 1210 continue; 1211 if (nid == MAX_NUMNODES || nid == r->nid) 1212 break; 1213 } 1214 if (*idx >= type->cnt) { 1215 *idx = -1; 1216 return; 1217 } 1218 1219 if (out_start_pfn) 1220 *out_start_pfn = PFN_UP(r->base); 1221 if (out_end_pfn) 1222 *out_end_pfn = PFN_DOWN(r->base + r->size); 1223 if (out_nid) 1224 *out_nid = r->nid; 1225 } 1226 1227 /** 1228 * memblock_set_node - set node ID on memblock regions 1229 * @base: base of area to set node ID for 1230 * @size: size of area to set node ID for 1231 * @type: memblock type to set node ID for 1232 * @nid: node ID to set 1233 * 1234 * Set the nid of memblock @type regions in [@base, @base + @size) to @nid. 1235 * Regions which cross the area boundaries are split as necessary. 1236 * 1237 * Return: 1238 * 0 on success, -errno on failure. 1239 */ 1240 int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, 1241 struct memblock_type *type, int nid) 1242 { 1243 int start_rgn, end_rgn; 1244 int i, ret; 1245 1246 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); 1247 if (ret) 1248 return ret; 1249 1250 for (i = start_rgn; i < end_rgn; i++) 1251 memblock_set_region_node(&type->regions[i], nid); 1252 1253 memblock_merge_regions(type); 1254 return 0; 1255 } 1256 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ 1257 1258 static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, 1259 phys_addr_t align, phys_addr_t start, 1260 phys_addr_t end, int nid, 1261 enum memblock_flags flags) 1262 { 1263 phys_addr_t found; 1264 1265 if (!align) { 1266 /* Can't use WARNs this early in boot on powerpc */ 1267 dump_stack(); 1268 align = SMP_CACHE_BYTES; 1269 } 1270 1271 found = memblock_find_in_range_node(size, align, start, end, nid, 1272 flags); 1273 if (found && !memblock_reserve(found, size)) { 1274 /* 1275 * The min_count is set to 0 so that memblock allocations are 1276 * never reported as leaks. 1277 */ 1278 kmemleak_alloc_phys(found, size, 0, 0); 1279 return found; 1280 } 1281 return 0; 1282 } 1283 1284 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, 1285 phys_addr_t start, phys_addr_t end, 1286 enum memblock_flags flags) 1287 { 1288 return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE, 1289 flags); 1290 } 1291 1292 phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, 1293 phys_addr_t align, phys_addr_t max_addr, 1294 int nid, enum memblock_flags flags) 1295 { 1296 return memblock_alloc_range_nid(size, align, 0, max_addr, nid, flags); 1297 } 1298 1299 phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) 1300 { 1301 enum memblock_flags flags = choose_memblock_flags(); 1302 phys_addr_t ret; 1303 1304 again: 1305 ret = memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, 1306 nid, flags); 1307 1308 if (!ret && (flags & MEMBLOCK_MIRROR)) { 1309 flags &= ~MEMBLOCK_MIRROR; 1310 goto again; 1311 } 1312 return ret; 1313 } 1314 1315 phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) 1316 { 1317 return memblock_alloc_base_nid(size, align, max_addr, NUMA_NO_NODE, 1318 MEMBLOCK_NONE); 1319 } 1320 1321 phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) 1322 { 1323 phys_addr_t alloc; 1324 1325 alloc = __memblock_alloc_base(size, align, max_addr); 1326 1327 if (alloc == 0) 1328 panic("ERROR: Failed to allocate %pa bytes below %pa.\n", 1329 &size, &max_addr); 1330 1331 return alloc; 1332 } 1333 1334 phys_addr_t __init memblock_phys_alloc(phys_addr_t size, phys_addr_t align) 1335 { 1336 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); 1337 } 1338 1339 phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) 1340 { 1341 phys_addr_t res = memblock_phys_alloc_nid(size, align, nid); 1342 1343 if (res) 1344 return res; 1345 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); 1346 } 1347 1348 /** 1349 * memblock_alloc_internal - allocate boot memory block 1350 * @size: size of memory block to be allocated in bytes 1351 * @align: alignment of the region and block's size 1352 * @min_addr: the lower bound of the memory region to allocate (phys address) 1353 * @max_addr: the upper bound of the memory region to allocate (phys address) 1354 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1355 * 1356 * The @min_addr limit is dropped if it can not be satisfied and the allocation 1357 * will fall back to memory below @min_addr. Also, allocation may fall back 1358 * to any node in the system if the specified node can not 1359 * hold the requested memory. 1360 * 1361 * The allocation is performed from memory region limited by 1362 * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE. 1363 * 1364 * The phys address of allocated boot memory block is converted to virtual and 1365 * allocated memory is reset to 0. 1366 * 1367 * In addition, function sets the min_count to 0 using kmemleak_alloc for 1368 * allocated boot memory block, so that it is never reported as leaks. 1369 * 1370 * Return: 1371 * Virtual address of allocated memory block on success, NULL on failure. 1372 */ 1373 static void * __init memblock_alloc_internal( 1374 phys_addr_t size, phys_addr_t align, 1375 phys_addr_t min_addr, phys_addr_t max_addr, 1376 int nid) 1377 { 1378 phys_addr_t alloc; 1379 void *ptr; 1380 enum memblock_flags flags = choose_memblock_flags(); 1381 1382 if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) 1383 nid = NUMA_NO_NODE; 1384 1385 /* 1386 * Detect any accidental use of these APIs after slab is ready, as at 1387 * this moment memblock may be deinitialized already and its 1388 * internal data may be destroyed (after execution of memblock_free_all) 1389 */ 1390 if (WARN_ON_ONCE(slab_is_available())) 1391 return kzalloc_node(size, GFP_NOWAIT, nid); 1392 1393 if (!align) { 1394 dump_stack(); 1395 align = SMP_CACHE_BYTES; 1396 } 1397 1398 if (max_addr > memblock.current_limit) 1399 max_addr = memblock.current_limit; 1400 again: 1401 alloc = memblock_find_in_range_node(size, align, min_addr, max_addr, 1402 nid, flags); 1403 if (alloc && !memblock_reserve(alloc, size)) 1404 goto done; 1405 1406 if (nid != NUMA_NO_NODE) { 1407 alloc = memblock_find_in_range_node(size, align, min_addr, 1408 max_addr, NUMA_NO_NODE, 1409 flags); 1410 if (alloc && !memblock_reserve(alloc, size)) 1411 goto done; 1412 } 1413 1414 if (min_addr) { 1415 min_addr = 0; 1416 goto again; 1417 } 1418 1419 if (flags & MEMBLOCK_MIRROR) { 1420 flags &= ~MEMBLOCK_MIRROR; 1421 pr_warn("Could not allocate %pap bytes of mirrored memory\n", 1422 &size); 1423 goto again; 1424 } 1425 1426 return NULL; 1427 done: 1428 ptr = phys_to_virt(alloc); 1429 1430 /* Skip kmemleak for kasan_init() due to high volume. */ 1431 if (max_addr != MEMBLOCK_ALLOC_KASAN) 1432 /* 1433 * The min_count is set to 0 so that bootmem allocated 1434 * blocks are never reported as leaks. This is because many 1435 * of these blocks are only referred via the physical 1436 * address which is not looked up by kmemleak. 1437 */ 1438 kmemleak_alloc(ptr, size, 0, 0); 1439 1440 return ptr; 1441 } 1442 1443 /** 1444 * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing 1445 * memory and without panicking 1446 * @size: size of memory block to be allocated in bytes 1447 * @align: alignment of the region and block's size 1448 * @min_addr: the lower bound of the memory region from where the allocation 1449 * is preferred (phys address) 1450 * @max_addr: the upper bound of the memory region from where the allocation 1451 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1452 * allocate only from memory limited by memblock.current_limit value 1453 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1454 * 1455 * Public function, provides additional debug information (including caller 1456 * info), if enabled. Does not zero allocated memory, does not panic if request 1457 * cannot be satisfied. 1458 * 1459 * Return: 1460 * Virtual address of allocated memory block on success, NULL on failure. 1461 */ 1462 void * __init memblock_alloc_try_nid_raw( 1463 phys_addr_t size, phys_addr_t align, 1464 phys_addr_t min_addr, phys_addr_t max_addr, 1465 int nid) 1466 { 1467 void *ptr; 1468 1469 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n", 1470 __func__, (u64)size, (u64)align, nid, &min_addr, 1471 &max_addr, (void *)_RET_IP_); 1472 1473 ptr = memblock_alloc_internal(size, align, 1474 min_addr, max_addr, nid); 1475 if (ptr && size > 0) 1476 page_init_poison(ptr, size); 1477 1478 return ptr; 1479 } 1480 1481 /** 1482 * memblock_alloc_try_nid_nopanic - allocate boot memory block 1483 * @size: size of memory block to be allocated in bytes 1484 * @align: alignment of the region and block's size 1485 * @min_addr: the lower bound of the memory region from where the allocation 1486 * is preferred (phys address) 1487 * @max_addr: the upper bound of the memory region from where the allocation 1488 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1489 * allocate only from memory limited by memblock.current_limit value 1490 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1491 * 1492 * Public function, provides additional debug information (including caller 1493 * info), if enabled. This function zeroes the allocated memory. 1494 * 1495 * Return: 1496 * Virtual address of allocated memory block on success, NULL on failure. 1497 */ 1498 void * __init memblock_alloc_try_nid_nopanic( 1499 phys_addr_t size, phys_addr_t align, 1500 phys_addr_t min_addr, phys_addr_t max_addr, 1501 int nid) 1502 { 1503 void *ptr; 1504 1505 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n", 1506 __func__, (u64)size, (u64)align, nid, &min_addr, 1507 &max_addr, (void *)_RET_IP_); 1508 1509 ptr = memblock_alloc_internal(size, align, 1510 min_addr, max_addr, nid); 1511 if (ptr) 1512 memset(ptr, 0, size); 1513 return ptr; 1514 } 1515 1516 /** 1517 * memblock_alloc_try_nid - allocate boot memory block with panicking 1518 * @size: size of memory block to be allocated in bytes 1519 * @align: alignment of the region and block's size 1520 * @min_addr: the lower bound of the memory region from where the allocation 1521 * is preferred (phys address) 1522 * @max_addr: the upper bound of the memory region from where the allocation 1523 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to 1524 * allocate only from memory limited by memblock.current_limit value 1525 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node 1526 * 1527 * Public panicking version of memblock_alloc_try_nid_nopanic() 1528 * which provides debug information (including caller info), if enabled, 1529 * and panics if the request can not be satisfied. 1530 * 1531 * Return: 1532 * Virtual address of allocated memory block on success, NULL on failure. 1533 */ 1534 void * __init memblock_alloc_try_nid( 1535 phys_addr_t size, phys_addr_t align, 1536 phys_addr_t min_addr, phys_addr_t max_addr, 1537 int nid) 1538 { 1539 void *ptr; 1540 1541 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n", 1542 __func__, (u64)size, (u64)align, nid, &min_addr, 1543 &max_addr, (void *)_RET_IP_); 1544 ptr = memblock_alloc_internal(size, align, 1545 min_addr, max_addr, nid); 1546 if (ptr) { 1547 memset(ptr, 0, size); 1548 return ptr; 1549 } 1550 1551 panic("%s: Failed to allocate %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa\n", 1552 __func__, (u64)size, (u64)align, nid, &min_addr, &max_addr); 1553 return NULL; 1554 } 1555 1556 /** 1557 * __memblock_free_late - free bootmem block pages directly to buddy allocator 1558 * @base: phys starting address of the boot memory block 1559 * @size: size of the boot memory block in bytes 1560 * 1561 * This is only useful when the bootmem allocator has already been torn 1562 * down, but we are still initializing the system. Pages are released directly 1563 * to the buddy allocator, no bootmem metadata is updated because it is gone. 1564 */ 1565 void __init __memblock_free_late(phys_addr_t base, phys_addr_t size) 1566 { 1567 phys_addr_t cursor, end; 1568 1569 end = base + size - 1; 1570 memblock_dbg("%s: [%pa-%pa] %pF\n", 1571 __func__, &base, &end, (void *)_RET_IP_); 1572 kmemleak_free_part_phys(base, size); 1573 cursor = PFN_UP(base); 1574 end = PFN_DOWN(base + size); 1575 1576 for (; cursor < end; cursor++) { 1577 memblock_free_pages(pfn_to_page(cursor), cursor, 0); 1578 totalram_pages_inc(); 1579 } 1580 } 1581 1582 /* 1583 * Remaining API functions 1584 */ 1585 1586 phys_addr_t __init_memblock memblock_phys_mem_size(void) 1587 { 1588 return memblock.memory.total_size; 1589 } 1590 1591 phys_addr_t __init_memblock memblock_reserved_size(void) 1592 { 1593 return memblock.reserved.total_size; 1594 } 1595 1596 phys_addr_t __init memblock_mem_size(unsigned long limit_pfn) 1597 { 1598 unsigned long pages = 0; 1599 struct memblock_region *r; 1600 unsigned long start_pfn, end_pfn; 1601 1602 for_each_memblock(memory, r) { 1603 start_pfn = memblock_region_memory_base_pfn(r); 1604 end_pfn = memblock_region_memory_end_pfn(r); 1605 start_pfn = min_t(unsigned long, start_pfn, limit_pfn); 1606 end_pfn = min_t(unsigned long, end_pfn, limit_pfn); 1607 pages += end_pfn - start_pfn; 1608 } 1609 1610 return PFN_PHYS(pages); 1611 } 1612 1613 /* lowest address */ 1614 phys_addr_t __init_memblock memblock_start_of_DRAM(void) 1615 { 1616 return memblock.memory.regions[0].base; 1617 } 1618 1619 phys_addr_t __init_memblock memblock_end_of_DRAM(void) 1620 { 1621 int idx = memblock.memory.cnt - 1; 1622 1623 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); 1624 } 1625 1626 static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit) 1627 { 1628 phys_addr_t max_addr = PHYS_ADDR_MAX; 1629 struct memblock_region *r; 1630 1631 /* 1632 * translate the memory @limit size into the max address within one of 1633 * the memory memblock regions, if the @limit exceeds the total size 1634 * of those regions, max_addr will keep original value PHYS_ADDR_MAX 1635 */ 1636 for_each_memblock(memory, r) { 1637 if (limit <= r->size) { 1638 max_addr = r->base + limit; 1639 break; 1640 } 1641 limit -= r->size; 1642 } 1643 1644 return max_addr; 1645 } 1646 1647 void __init memblock_enforce_memory_limit(phys_addr_t limit) 1648 { 1649 phys_addr_t max_addr = PHYS_ADDR_MAX; 1650 1651 if (!limit) 1652 return; 1653 1654 max_addr = __find_max_addr(limit); 1655 1656 /* @limit exceeds the total size of the memory, do nothing */ 1657 if (max_addr == PHYS_ADDR_MAX) 1658 return; 1659 1660 /* truncate both memory and reserved regions */ 1661 memblock_remove_range(&memblock.memory, max_addr, 1662 PHYS_ADDR_MAX); 1663 memblock_remove_range(&memblock.reserved, max_addr, 1664 PHYS_ADDR_MAX); 1665 } 1666 1667 void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) 1668 { 1669 int start_rgn, end_rgn; 1670 int i, ret; 1671 1672 if (!size) 1673 return; 1674 1675 ret = memblock_isolate_range(&memblock.memory, base, size, 1676 &start_rgn, &end_rgn); 1677 if (ret) 1678 return; 1679 1680 /* remove all the MAP regions */ 1681 for (i = memblock.memory.cnt - 1; i >= end_rgn; i--) 1682 if (!memblock_is_nomap(&memblock.memory.regions[i])) 1683 memblock_remove_region(&memblock.memory, i); 1684 1685 for (i = start_rgn - 1; i >= 0; i--) 1686 if (!memblock_is_nomap(&memblock.memory.regions[i])) 1687 memblock_remove_region(&memblock.memory, i); 1688 1689 /* truncate the reserved regions */ 1690 memblock_remove_range(&memblock.reserved, 0, base); 1691 memblock_remove_range(&memblock.reserved, 1692 base + size, PHYS_ADDR_MAX); 1693 } 1694 1695 void __init memblock_mem_limit_remove_map(phys_addr_t limit) 1696 { 1697 phys_addr_t max_addr; 1698 1699 if (!limit) 1700 return; 1701 1702 max_addr = __find_max_addr(limit); 1703 1704 /* @limit exceeds the total size of the memory, do nothing */ 1705 if (max_addr == PHYS_ADDR_MAX) 1706 return; 1707 1708 memblock_cap_memory_range(0, max_addr); 1709 } 1710 1711 static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) 1712 { 1713 unsigned int left = 0, right = type->cnt; 1714 1715 do { 1716 unsigned int mid = (right + left) / 2; 1717 1718 if (addr < type->regions[mid].base) 1719 right = mid; 1720 else if (addr >= (type->regions[mid].base + 1721 type->regions[mid].size)) 1722 left = mid + 1; 1723 else 1724 return mid; 1725 } while (left < right); 1726 return -1; 1727 } 1728 1729 bool __init_memblock memblock_is_reserved(phys_addr_t addr) 1730 { 1731 return memblock_search(&memblock.reserved, addr) != -1; 1732 } 1733 1734 bool __init_memblock memblock_is_memory(phys_addr_t addr) 1735 { 1736 return memblock_search(&memblock.memory, addr) != -1; 1737 } 1738 1739 bool __init_memblock memblock_is_map_memory(phys_addr_t addr) 1740 { 1741 int i = memblock_search(&memblock.memory, addr); 1742 1743 if (i == -1) 1744 return false; 1745 return !memblock_is_nomap(&memblock.memory.regions[i]); 1746 } 1747 1748 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 1749 int __init_memblock memblock_search_pfn_nid(unsigned long pfn, 1750 unsigned long *start_pfn, unsigned long *end_pfn) 1751 { 1752 struct memblock_type *type = &memblock.memory; 1753 int mid = memblock_search(type, PFN_PHYS(pfn)); 1754 1755 if (mid == -1) 1756 return -1; 1757 1758 *start_pfn = PFN_DOWN(type->regions[mid].base); 1759 *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size); 1760 1761 return type->regions[mid].nid; 1762 } 1763 #endif 1764 1765 /** 1766 * memblock_is_region_memory - check if a region is a subset of memory 1767 * @base: base of region to check 1768 * @size: size of region to check 1769 * 1770 * Check if the region [@base, @base + @size) is a subset of a memory block. 1771 * 1772 * Return: 1773 * 0 if false, non-zero if true 1774 */ 1775 bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) 1776 { 1777 int idx = memblock_search(&memblock.memory, base); 1778 phys_addr_t end = base + memblock_cap_size(base, &size); 1779 1780 if (idx == -1) 1781 return false; 1782 return (memblock.memory.regions[idx].base + 1783 memblock.memory.regions[idx].size) >= end; 1784 } 1785 1786 /** 1787 * memblock_is_region_reserved - check if a region intersects reserved memory 1788 * @base: base of region to check 1789 * @size: size of region to check 1790 * 1791 * Check if the region [@base, @base + @size) intersects a reserved 1792 * memory block. 1793 * 1794 * Return: 1795 * True if they intersect, false if not. 1796 */ 1797 bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) 1798 { 1799 memblock_cap_size(base, &size); 1800 return memblock_overlaps_region(&memblock.reserved, base, size); 1801 } 1802 1803 void __init_memblock memblock_trim_memory(phys_addr_t align) 1804 { 1805 phys_addr_t start, end, orig_start, orig_end; 1806 struct memblock_region *r; 1807 1808 for_each_memblock(memory, r) { 1809 orig_start = r->base; 1810 orig_end = r->base + r->size; 1811 start = round_up(orig_start, align); 1812 end = round_down(orig_end, align); 1813 1814 if (start == orig_start && end == orig_end) 1815 continue; 1816 1817 if (start < end) { 1818 r->base = start; 1819 r->size = end - start; 1820 } else { 1821 memblock_remove_region(&memblock.memory, 1822 r - memblock.memory.regions); 1823 r--; 1824 } 1825 } 1826 } 1827 1828 void __init_memblock memblock_set_current_limit(phys_addr_t limit) 1829 { 1830 memblock.current_limit = limit; 1831 } 1832 1833 phys_addr_t __init_memblock memblock_get_current_limit(void) 1834 { 1835 return memblock.current_limit; 1836 } 1837 1838 static void __init_memblock memblock_dump(struct memblock_type *type) 1839 { 1840 phys_addr_t base, end, size; 1841 enum memblock_flags flags; 1842 int idx; 1843 struct memblock_region *rgn; 1844 1845 pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt); 1846 1847 for_each_memblock_type(idx, type, rgn) { 1848 char nid_buf[32] = ""; 1849 1850 base = rgn->base; 1851 size = rgn->size; 1852 end = base + size - 1; 1853 flags = rgn->flags; 1854 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 1855 if (memblock_get_region_node(rgn) != MAX_NUMNODES) 1856 snprintf(nid_buf, sizeof(nid_buf), " on node %d", 1857 memblock_get_region_node(rgn)); 1858 #endif 1859 pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#x\n", 1860 type->name, idx, &base, &end, &size, nid_buf, flags); 1861 } 1862 } 1863 1864 void __init_memblock __memblock_dump_all(void) 1865 { 1866 pr_info("MEMBLOCK configuration:\n"); 1867 pr_info(" memory size = %pa reserved size = %pa\n", 1868 &memblock.memory.total_size, 1869 &memblock.reserved.total_size); 1870 1871 memblock_dump(&memblock.memory); 1872 memblock_dump(&memblock.reserved); 1873 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 1874 memblock_dump(&memblock.physmem); 1875 #endif 1876 } 1877 1878 void __init memblock_allow_resize(void) 1879 { 1880 memblock_can_resize = 1; 1881 } 1882 1883 static int __init early_memblock(char *p) 1884 { 1885 if (p && strstr(p, "debug")) 1886 memblock_debug = 1; 1887 return 0; 1888 } 1889 early_param("memblock", early_memblock); 1890 1891 static void __init __free_pages_memory(unsigned long start, unsigned long end) 1892 { 1893 int order; 1894 1895 while (start < end) { 1896 order = min(MAX_ORDER - 1UL, __ffs(start)); 1897 1898 while (start + (1UL << order) > end) 1899 order--; 1900 1901 memblock_free_pages(pfn_to_page(start), start, order); 1902 1903 start += (1UL << order); 1904 } 1905 } 1906 1907 static unsigned long __init __free_memory_core(phys_addr_t start, 1908 phys_addr_t end) 1909 { 1910 unsigned long start_pfn = PFN_UP(start); 1911 unsigned long end_pfn = min_t(unsigned long, 1912 PFN_DOWN(end), max_low_pfn); 1913 1914 if (start_pfn >= end_pfn) 1915 return 0; 1916 1917 __free_pages_memory(start_pfn, end_pfn); 1918 1919 return end_pfn - start_pfn; 1920 } 1921 1922 static unsigned long __init free_low_memory_core_early(void) 1923 { 1924 unsigned long count = 0; 1925 phys_addr_t start, end; 1926 u64 i; 1927 1928 memblock_clear_hotplug(0, -1); 1929 1930 for_each_reserved_mem_region(i, &start, &end) 1931 reserve_bootmem_region(start, end); 1932 1933 /* 1934 * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id 1935 * because in some case like Node0 doesn't have RAM installed 1936 * low ram will be on Node1 1937 */ 1938 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, 1939 NULL) 1940 count += __free_memory_core(start, end); 1941 1942 return count; 1943 } 1944 1945 static int reset_managed_pages_done __initdata; 1946 1947 void reset_node_managed_pages(pg_data_t *pgdat) 1948 { 1949 struct zone *z; 1950 1951 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) 1952 atomic_long_set(&z->managed_pages, 0); 1953 } 1954 1955 void __init reset_all_zones_managed_pages(void) 1956 { 1957 struct pglist_data *pgdat; 1958 1959 if (reset_managed_pages_done) 1960 return; 1961 1962 for_each_online_pgdat(pgdat) 1963 reset_node_managed_pages(pgdat); 1964 1965 reset_managed_pages_done = 1; 1966 } 1967 1968 /** 1969 * memblock_free_all - release free pages to the buddy allocator 1970 * 1971 * Return: the number of pages actually released. 1972 */ 1973 unsigned long __init memblock_free_all(void) 1974 { 1975 unsigned long pages; 1976 1977 reset_all_zones_managed_pages(); 1978 1979 pages = free_low_memory_core_early(); 1980 totalram_pages_add(pages); 1981 1982 return pages; 1983 } 1984 1985 #if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_ARCH_DISCARD_MEMBLOCK) 1986 1987 static int memblock_debug_show(struct seq_file *m, void *private) 1988 { 1989 struct memblock_type *type = m->private; 1990 struct memblock_region *reg; 1991 int i; 1992 phys_addr_t end; 1993 1994 for (i = 0; i < type->cnt; i++) { 1995 reg = &type->regions[i]; 1996 end = reg->base + reg->size - 1; 1997 1998 seq_printf(m, "%4d: ", i); 1999 seq_printf(m, "%pa..%pa\n", ®->base, &end); 2000 } 2001 return 0; 2002 } 2003 DEFINE_SHOW_ATTRIBUTE(memblock_debug); 2004 2005 static int __init memblock_init_debugfs(void) 2006 { 2007 struct dentry *root = debugfs_create_dir("memblock", NULL); 2008 2009 debugfs_create_file("memory", 0444, root, 2010 &memblock.memory, &memblock_debug_fops); 2011 debugfs_create_file("reserved", 0444, root, 2012 &memblock.reserved, &memblock_debug_fops); 2013 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP 2014 debugfs_create_file("physmem", 0444, root, 2015 &memblock.physmem, &memblock_debug_fops); 2016 #endif 2017 2018 return 0; 2019 } 2020 __initcall(memblock_init_debugfs); 2021 2022 #endif /* CONFIG_DEBUG_FS */ 2023