1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Memory subsystem support 4 * 5 * Written by Matt Tolentino <matthew.e.tolentino@intel.com> 6 * Dave Hansen <haveblue@us.ibm.com> 7 * 8 * This file provides the necessary infrastructure to represent 9 * a SPARSEMEM-memory-model system's physical memory in /sysfs. 10 * All arch-independent code that assumes MEMORY_HOTPLUG requires 11 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c. 12 */ 13 14 #include <linux/module.h> 15 #include <linux/init.h> 16 #include <linux/topology.h> 17 #include <linux/capability.h> 18 #include <linux/device.h> 19 #include <linux/memory.h> 20 #include <linux/memory_hotplug.h> 21 #include <linux/mm.h> 22 #include <linux/stat.h> 23 #include <linux/slab.h> 24 25 #include <linux/atomic.h> 26 #include <linux/uaccess.h> 27 28 #define MEMORY_CLASS_NAME "memory" 29 30 #define to_memory_block(dev) container_of(dev, struct memory_block, dev) 31 32 static int sections_per_block; 33 34 static inline unsigned long base_memory_block_id(unsigned long section_nr) 35 { 36 return section_nr / sections_per_block; 37 } 38 39 static inline unsigned long pfn_to_block_id(unsigned long pfn) 40 { 41 return base_memory_block_id(pfn_to_section_nr(pfn)); 42 } 43 44 static inline unsigned long phys_to_block_id(unsigned long phys) 45 { 46 return pfn_to_block_id(PFN_DOWN(phys)); 47 } 48 49 static int memory_subsys_online(struct device *dev); 50 static int memory_subsys_offline(struct device *dev); 51 52 static struct bus_type memory_subsys = { 53 .name = MEMORY_CLASS_NAME, 54 .dev_name = MEMORY_CLASS_NAME, 55 .online = memory_subsys_online, 56 .offline = memory_subsys_offline, 57 }; 58 59 static BLOCKING_NOTIFIER_HEAD(memory_chain); 60 61 int register_memory_notifier(struct notifier_block *nb) 62 { 63 return blocking_notifier_chain_register(&memory_chain, nb); 64 } 65 EXPORT_SYMBOL(register_memory_notifier); 66 67 void unregister_memory_notifier(struct notifier_block *nb) 68 { 69 blocking_notifier_chain_unregister(&memory_chain, nb); 70 } 71 EXPORT_SYMBOL(unregister_memory_notifier); 72 73 static void memory_block_release(struct device *dev) 74 { 75 struct memory_block *mem = to_memory_block(dev); 76 77 kfree(mem); 78 } 79 80 unsigned long __weak memory_block_size_bytes(void) 81 { 82 return MIN_MEMORY_BLOCK_SIZE; 83 } 84 EXPORT_SYMBOL_GPL(memory_block_size_bytes); 85 86 /* 87 * Show the first physical section index (number) of this memory block. 88 */ 89 static ssize_t phys_index_show(struct device *dev, 90 struct device_attribute *attr, char *buf) 91 { 92 struct memory_block *mem = to_memory_block(dev); 93 unsigned long phys_index; 94 95 phys_index = mem->start_section_nr / sections_per_block; 96 return sprintf(buf, "%08lx\n", phys_index); 97 } 98 99 /* 100 * Legacy interface that we cannot remove. Always indicate "removable" 101 * with CONFIG_MEMORY_HOTREMOVE - bad heuristic. 102 */ 103 static ssize_t removable_show(struct device *dev, struct device_attribute *attr, 104 char *buf) 105 { 106 return sprintf(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE)); 107 } 108 109 /* 110 * online, offline, going offline, etc. 111 */ 112 static ssize_t state_show(struct device *dev, struct device_attribute *attr, 113 char *buf) 114 { 115 struct memory_block *mem = to_memory_block(dev); 116 ssize_t len = 0; 117 118 /* 119 * We can probably put these states in a nice little array 120 * so that they're not open-coded 121 */ 122 switch (mem->state) { 123 case MEM_ONLINE: 124 len = sprintf(buf, "online\n"); 125 break; 126 case MEM_OFFLINE: 127 len = sprintf(buf, "offline\n"); 128 break; 129 case MEM_GOING_OFFLINE: 130 len = sprintf(buf, "going-offline\n"); 131 break; 132 default: 133 len = sprintf(buf, "ERROR-UNKNOWN-%ld\n", 134 mem->state); 135 WARN_ON(1); 136 break; 137 } 138 139 return len; 140 } 141 142 int memory_notify(unsigned long val, void *v) 143 { 144 return blocking_notifier_call_chain(&memory_chain, val, v); 145 } 146 147 /* 148 * The probe routines leave the pages uninitialized, just as the bootmem code 149 * does. Make sure we do not access them, but instead use only information from 150 * within sections. 151 */ 152 static bool pages_correctly_probed(unsigned long start_pfn) 153 { 154 unsigned long section_nr = pfn_to_section_nr(start_pfn); 155 unsigned long section_nr_end = section_nr + sections_per_block; 156 unsigned long pfn = start_pfn; 157 158 /* 159 * memmap between sections is not contiguous except with 160 * SPARSEMEM_VMEMMAP. We lookup the page once per section 161 * and assume memmap is contiguous within each section 162 */ 163 for (; section_nr < section_nr_end; section_nr++) { 164 if (WARN_ON_ONCE(!pfn_valid(pfn))) 165 return false; 166 167 if (!present_section_nr(section_nr)) { 168 pr_warn("section %ld pfn[%lx, %lx) not present\n", 169 section_nr, pfn, pfn + PAGES_PER_SECTION); 170 return false; 171 } else if (!valid_section_nr(section_nr)) { 172 pr_warn("section %ld pfn[%lx, %lx) no valid memmap\n", 173 section_nr, pfn, pfn + PAGES_PER_SECTION); 174 return false; 175 } else if (online_section_nr(section_nr)) { 176 pr_warn("section %ld pfn[%lx, %lx) is already online\n", 177 section_nr, pfn, pfn + PAGES_PER_SECTION); 178 return false; 179 } 180 pfn += PAGES_PER_SECTION; 181 } 182 183 return true; 184 } 185 186 /* 187 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is 188 * OK to have direct references to sparsemem variables in here. 189 */ 190 static int 191 memory_block_action(unsigned long start_section_nr, unsigned long action, 192 int online_type, int nid) 193 { 194 unsigned long start_pfn; 195 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 196 int ret; 197 198 start_pfn = section_nr_to_pfn(start_section_nr); 199 200 switch (action) { 201 case MEM_ONLINE: 202 if (!pages_correctly_probed(start_pfn)) 203 return -EBUSY; 204 205 ret = online_pages(start_pfn, nr_pages, online_type, nid); 206 break; 207 case MEM_OFFLINE: 208 ret = offline_pages(start_pfn, nr_pages); 209 break; 210 default: 211 WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: " 212 "%ld\n", __func__, start_section_nr, action, action); 213 ret = -EINVAL; 214 } 215 216 return ret; 217 } 218 219 static int memory_block_change_state(struct memory_block *mem, 220 unsigned long to_state, unsigned long from_state_req) 221 { 222 int ret = 0; 223 224 if (mem->state != from_state_req) 225 return -EINVAL; 226 227 if (to_state == MEM_OFFLINE) 228 mem->state = MEM_GOING_OFFLINE; 229 230 ret = memory_block_action(mem->start_section_nr, to_state, 231 mem->online_type, mem->nid); 232 233 mem->state = ret ? from_state_req : to_state; 234 235 return ret; 236 } 237 238 /* The device lock serializes operations on memory_subsys_[online|offline] */ 239 static int memory_subsys_online(struct device *dev) 240 { 241 struct memory_block *mem = to_memory_block(dev); 242 int ret; 243 244 if (mem->state == MEM_ONLINE) 245 return 0; 246 247 /* 248 * If we are called from state_store(), online_type will be 249 * set >= 0 Otherwise we were called from the device online 250 * attribute and need to set the online_type. 251 */ 252 if (mem->online_type < 0) 253 mem->online_type = MMOP_ONLINE_KEEP; 254 255 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 256 257 /* clear online_type */ 258 mem->online_type = -1; 259 260 return ret; 261 } 262 263 static int memory_subsys_offline(struct device *dev) 264 { 265 struct memory_block *mem = to_memory_block(dev); 266 267 if (mem->state == MEM_OFFLINE) 268 return 0; 269 270 /* Can't offline block with non-present sections */ 271 if (mem->section_count != sections_per_block) 272 return -EINVAL; 273 274 return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); 275 } 276 277 static ssize_t state_store(struct device *dev, struct device_attribute *attr, 278 const char *buf, size_t count) 279 { 280 struct memory_block *mem = to_memory_block(dev); 281 int ret, online_type; 282 283 ret = lock_device_hotplug_sysfs(); 284 if (ret) 285 return ret; 286 287 if (sysfs_streq(buf, "online_kernel")) 288 online_type = MMOP_ONLINE_KERNEL; 289 else if (sysfs_streq(buf, "online_movable")) 290 online_type = MMOP_ONLINE_MOVABLE; 291 else if (sysfs_streq(buf, "online")) 292 online_type = MMOP_ONLINE_KEEP; 293 else if (sysfs_streq(buf, "offline")) 294 online_type = MMOP_OFFLINE; 295 else { 296 ret = -EINVAL; 297 goto err; 298 } 299 300 switch (online_type) { 301 case MMOP_ONLINE_KERNEL: 302 case MMOP_ONLINE_MOVABLE: 303 case MMOP_ONLINE_KEEP: 304 /* mem->online_type is protected by device_hotplug_lock */ 305 mem->online_type = online_type; 306 ret = device_online(&mem->dev); 307 break; 308 case MMOP_OFFLINE: 309 ret = device_offline(&mem->dev); 310 break; 311 default: 312 ret = -EINVAL; /* should never happen */ 313 } 314 315 err: 316 unlock_device_hotplug(); 317 318 if (ret < 0) 319 return ret; 320 if (ret) 321 return -EINVAL; 322 323 return count; 324 } 325 326 /* 327 * phys_device is a bad name for this. What I really want 328 * is a way to differentiate between memory ranges that 329 * are part of physical devices that constitute 330 * a complete removable unit or fru. 331 * i.e. do these ranges belong to the same physical device, 332 * s.t. if I offline all of these sections I can then 333 * remove the physical device? 334 */ 335 static ssize_t phys_device_show(struct device *dev, 336 struct device_attribute *attr, char *buf) 337 { 338 struct memory_block *mem = to_memory_block(dev); 339 return sprintf(buf, "%d\n", mem->phys_device); 340 } 341 342 #ifdef CONFIG_MEMORY_HOTREMOVE 343 static void print_allowed_zone(char *buf, int nid, unsigned long start_pfn, 344 unsigned long nr_pages, int online_type, 345 struct zone *default_zone) 346 { 347 struct zone *zone; 348 349 zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); 350 if (zone != default_zone) { 351 strcat(buf, " "); 352 strcat(buf, zone->name); 353 } 354 } 355 356 static ssize_t valid_zones_show(struct device *dev, 357 struct device_attribute *attr, char *buf) 358 { 359 struct memory_block *mem = to_memory_block(dev); 360 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 361 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 362 struct zone *default_zone; 363 int nid; 364 365 /* 366 * Check the existing zone. Make sure that we do that only on the 367 * online nodes otherwise the page_zone is not reliable 368 */ 369 if (mem->state == MEM_ONLINE) { 370 /* 371 * The block contains more than one zone can not be offlined. 372 * This can happen e.g. for ZONE_DMA and ZONE_DMA32 373 */ 374 default_zone = test_pages_in_a_zone(start_pfn, 375 start_pfn + nr_pages); 376 if (!default_zone) 377 return sprintf(buf, "none\n"); 378 strcat(buf, default_zone->name); 379 goto out; 380 } 381 382 nid = mem->nid; 383 default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, nr_pages); 384 strcat(buf, default_zone->name); 385 386 print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL, 387 default_zone); 388 print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE, 389 default_zone); 390 out: 391 strcat(buf, "\n"); 392 393 return strlen(buf); 394 } 395 static DEVICE_ATTR_RO(valid_zones); 396 #endif 397 398 static DEVICE_ATTR_RO(phys_index); 399 static DEVICE_ATTR_RW(state); 400 static DEVICE_ATTR_RO(phys_device); 401 static DEVICE_ATTR_RO(removable); 402 403 /* 404 * Show the memory block size (shared by all memory blocks). 405 */ 406 static ssize_t block_size_bytes_show(struct device *dev, 407 struct device_attribute *attr, char *buf) 408 { 409 return sprintf(buf, "%lx\n", memory_block_size_bytes()); 410 } 411 412 static DEVICE_ATTR_RO(block_size_bytes); 413 414 /* 415 * Memory auto online policy. 416 */ 417 418 static ssize_t auto_online_blocks_show(struct device *dev, 419 struct device_attribute *attr, char *buf) 420 { 421 if (memhp_auto_online) 422 return sprintf(buf, "online\n"); 423 else 424 return sprintf(buf, "offline\n"); 425 } 426 427 static ssize_t auto_online_blocks_store(struct device *dev, 428 struct device_attribute *attr, 429 const char *buf, size_t count) 430 { 431 if (sysfs_streq(buf, "online")) 432 memhp_auto_online = true; 433 else if (sysfs_streq(buf, "offline")) 434 memhp_auto_online = false; 435 else 436 return -EINVAL; 437 438 return count; 439 } 440 441 static DEVICE_ATTR_RW(auto_online_blocks); 442 443 /* 444 * Some architectures will have custom drivers to do this, and 445 * will not need to do it from userspace. The fake hot-add code 446 * as well as ppc64 will do all of their discovery in userspace 447 * and will require this interface. 448 */ 449 #ifdef CONFIG_ARCH_MEMORY_PROBE 450 static ssize_t probe_store(struct device *dev, struct device_attribute *attr, 451 const char *buf, size_t count) 452 { 453 u64 phys_addr; 454 int nid, ret; 455 unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block; 456 457 ret = kstrtoull(buf, 0, &phys_addr); 458 if (ret) 459 return ret; 460 461 if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) 462 return -EINVAL; 463 464 ret = lock_device_hotplug_sysfs(); 465 if (ret) 466 return ret; 467 468 nid = memory_add_physaddr_to_nid(phys_addr); 469 ret = __add_memory(nid, phys_addr, 470 MIN_MEMORY_BLOCK_SIZE * sections_per_block); 471 472 if (ret) 473 goto out; 474 475 ret = count; 476 out: 477 unlock_device_hotplug(); 478 return ret; 479 } 480 481 static DEVICE_ATTR_WO(probe); 482 #endif 483 484 #ifdef CONFIG_MEMORY_FAILURE 485 /* 486 * Support for offlining pages of memory 487 */ 488 489 /* Soft offline a page */ 490 static ssize_t soft_offline_page_store(struct device *dev, 491 struct device_attribute *attr, 492 const char *buf, size_t count) 493 { 494 int ret; 495 u64 pfn; 496 if (!capable(CAP_SYS_ADMIN)) 497 return -EPERM; 498 if (kstrtoull(buf, 0, &pfn) < 0) 499 return -EINVAL; 500 pfn >>= PAGE_SHIFT; 501 ret = soft_offline_page(pfn, 0); 502 return ret == 0 ? count : ret; 503 } 504 505 /* Forcibly offline a page, including killing processes. */ 506 static ssize_t hard_offline_page_store(struct device *dev, 507 struct device_attribute *attr, 508 const char *buf, size_t count) 509 { 510 int ret; 511 u64 pfn; 512 if (!capable(CAP_SYS_ADMIN)) 513 return -EPERM; 514 if (kstrtoull(buf, 0, &pfn) < 0) 515 return -EINVAL; 516 pfn >>= PAGE_SHIFT; 517 ret = memory_failure(pfn, 0); 518 return ret ? ret : count; 519 } 520 521 static DEVICE_ATTR_WO(soft_offline_page); 522 static DEVICE_ATTR_WO(hard_offline_page); 523 #endif 524 525 /* 526 * Note that phys_device is optional. It is here to allow for 527 * differentiation between which *physical* devices each 528 * section belongs to... 529 */ 530 int __weak arch_get_memory_phys_device(unsigned long start_pfn) 531 { 532 return 0; 533 } 534 535 /* A reference for the returned memory block device is acquired. */ 536 static struct memory_block *find_memory_block_by_id(unsigned long block_id) 537 { 538 struct device *dev; 539 540 dev = subsys_find_device_by_id(&memory_subsys, block_id, NULL); 541 return dev ? to_memory_block(dev) : NULL; 542 } 543 544 /* 545 * For now, we have a linear search to go find the appropriate 546 * memory_block corresponding to a particular phys_index. If 547 * this gets to be a real problem, we can always use a radix 548 * tree or something here. 549 * 550 * This could be made generic for all device subsystems. 551 */ 552 struct memory_block *find_memory_block(struct mem_section *section) 553 { 554 unsigned long block_id = base_memory_block_id(__section_nr(section)); 555 556 return find_memory_block_by_id(block_id); 557 } 558 559 static struct attribute *memory_memblk_attrs[] = { 560 &dev_attr_phys_index.attr, 561 &dev_attr_state.attr, 562 &dev_attr_phys_device.attr, 563 &dev_attr_removable.attr, 564 #ifdef CONFIG_MEMORY_HOTREMOVE 565 &dev_attr_valid_zones.attr, 566 #endif 567 NULL 568 }; 569 570 static struct attribute_group memory_memblk_attr_group = { 571 .attrs = memory_memblk_attrs, 572 }; 573 574 static const struct attribute_group *memory_memblk_attr_groups[] = { 575 &memory_memblk_attr_group, 576 NULL, 577 }; 578 579 /* 580 * register_memory - Setup a sysfs device for a memory block 581 */ 582 static 583 int register_memory(struct memory_block *memory) 584 { 585 int ret; 586 587 memory->dev.bus = &memory_subsys; 588 memory->dev.id = memory->start_section_nr / sections_per_block; 589 memory->dev.release = memory_block_release; 590 memory->dev.groups = memory_memblk_attr_groups; 591 memory->dev.offline = memory->state == MEM_OFFLINE; 592 593 ret = device_register(&memory->dev); 594 if (ret) 595 put_device(&memory->dev); 596 597 return ret; 598 } 599 600 static int init_memory_block(struct memory_block **memory, 601 unsigned long block_id, unsigned long state) 602 { 603 struct memory_block *mem; 604 unsigned long start_pfn; 605 int ret = 0; 606 607 mem = find_memory_block_by_id(block_id); 608 if (mem) { 609 put_device(&mem->dev); 610 return -EEXIST; 611 } 612 mem = kzalloc(sizeof(*mem), GFP_KERNEL); 613 if (!mem) 614 return -ENOMEM; 615 616 mem->start_section_nr = block_id * sections_per_block; 617 mem->state = state; 618 start_pfn = section_nr_to_pfn(mem->start_section_nr); 619 mem->phys_device = arch_get_memory_phys_device(start_pfn); 620 mem->nid = NUMA_NO_NODE; 621 622 ret = register_memory(mem); 623 624 *memory = mem; 625 return ret; 626 } 627 628 static int add_memory_block(unsigned long base_section_nr) 629 { 630 int ret, section_count = 0; 631 struct memory_block *mem; 632 unsigned long nr; 633 634 for (nr = base_section_nr; nr < base_section_nr + sections_per_block; 635 nr++) 636 if (present_section_nr(nr)) 637 section_count++; 638 639 if (section_count == 0) 640 return 0; 641 ret = init_memory_block(&mem, base_memory_block_id(base_section_nr), 642 MEM_ONLINE); 643 if (ret) 644 return ret; 645 mem->section_count = section_count; 646 return 0; 647 } 648 649 static void unregister_memory(struct memory_block *memory) 650 { 651 if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) 652 return; 653 654 /* drop the ref. we got via find_memory_block() */ 655 put_device(&memory->dev); 656 device_unregister(&memory->dev); 657 } 658 659 /* 660 * Create memory block devices for the given memory area. Start and size 661 * have to be aligned to memory block granularity. Memory block devices 662 * will be initialized as offline. 663 * 664 * Called under device_hotplug_lock. 665 */ 666 int create_memory_block_devices(unsigned long start, unsigned long size) 667 { 668 const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); 669 unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); 670 struct memory_block *mem; 671 unsigned long block_id; 672 int ret = 0; 673 674 if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || 675 !IS_ALIGNED(size, memory_block_size_bytes()))) 676 return -EINVAL; 677 678 for (block_id = start_block_id; block_id != end_block_id; block_id++) { 679 ret = init_memory_block(&mem, block_id, MEM_OFFLINE); 680 if (ret) 681 break; 682 mem->section_count = sections_per_block; 683 } 684 if (ret) { 685 end_block_id = block_id; 686 for (block_id = start_block_id; block_id != end_block_id; 687 block_id++) { 688 mem = find_memory_block_by_id(block_id); 689 if (WARN_ON_ONCE(!mem)) 690 continue; 691 mem->section_count = 0; 692 unregister_memory(mem); 693 } 694 } 695 return ret; 696 } 697 698 /* 699 * Remove memory block devices for the given memory area. Start and size 700 * have to be aligned to memory block granularity. Memory block devices 701 * have to be offline. 702 * 703 * Called under device_hotplug_lock. 704 */ 705 void remove_memory_block_devices(unsigned long start, unsigned long size) 706 { 707 const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); 708 const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); 709 struct memory_block *mem; 710 unsigned long block_id; 711 712 if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || 713 !IS_ALIGNED(size, memory_block_size_bytes()))) 714 return; 715 716 for (block_id = start_block_id; block_id != end_block_id; block_id++) { 717 mem = find_memory_block_by_id(block_id); 718 if (WARN_ON_ONCE(!mem)) 719 continue; 720 mem->section_count = 0; 721 unregister_memory_block_under_nodes(mem); 722 unregister_memory(mem); 723 } 724 } 725 726 /* return true if the memory block is offlined, otherwise, return false */ 727 bool is_memblock_offlined(struct memory_block *mem) 728 { 729 return mem->state == MEM_OFFLINE; 730 } 731 732 static struct attribute *memory_root_attrs[] = { 733 #ifdef CONFIG_ARCH_MEMORY_PROBE 734 &dev_attr_probe.attr, 735 #endif 736 737 #ifdef CONFIG_MEMORY_FAILURE 738 &dev_attr_soft_offline_page.attr, 739 &dev_attr_hard_offline_page.attr, 740 #endif 741 742 &dev_attr_block_size_bytes.attr, 743 &dev_attr_auto_online_blocks.attr, 744 NULL 745 }; 746 747 static struct attribute_group memory_root_attr_group = { 748 .attrs = memory_root_attrs, 749 }; 750 751 static const struct attribute_group *memory_root_attr_groups[] = { 752 &memory_root_attr_group, 753 NULL, 754 }; 755 756 /* 757 * Initialize the sysfs support for memory devices. At the time this function 758 * is called, we cannot have concurrent creation/deletion of memory block 759 * devices, the device_hotplug_lock is not needed. 760 */ 761 void __init memory_dev_init(void) 762 { 763 int ret; 764 unsigned long block_sz, nr; 765 766 /* Validate the configured memory block size */ 767 block_sz = memory_block_size_bytes(); 768 if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE) 769 panic("Memory block size not suitable: 0x%lx\n", block_sz); 770 sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; 771 772 ret = subsys_system_register(&memory_subsys, memory_root_attr_groups); 773 if (ret) 774 panic("%s() failed to register subsystem: %d\n", __func__, ret); 775 776 /* 777 * Create entries for memory sections that were found 778 * during boot and have been initialized 779 */ 780 for (nr = 0; nr <= __highest_present_section_nr; 781 nr += sections_per_block) { 782 ret = add_memory_block(nr); 783 if (ret) 784 panic("%s() failed to add memory block: %d\n", __func__, 785 ret); 786 } 787 } 788 789 /** 790 * walk_memory_blocks - walk through all present memory blocks overlapped 791 * by the range [start, start + size) 792 * 793 * @start: start address of the memory range 794 * @size: size of the memory range 795 * @arg: argument passed to func 796 * @func: callback for each memory section walked 797 * 798 * This function walks through all present memory blocks overlapped by the 799 * range [start, start + size), calling func on each memory block. 800 * 801 * In case func() returns an error, walking is aborted and the error is 802 * returned. 803 */ 804 int walk_memory_blocks(unsigned long start, unsigned long size, 805 void *arg, walk_memory_blocks_func_t func) 806 { 807 const unsigned long start_block_id = phys_to_block_id(start); 808 const unsigned long end_block_id = phys_to_block_id(start + size - 1); 809 struct memory_block *mem; 810 unsigned long block_id; 811 int ret = 0; 812 813 if (!size) 814 return 0; 815 816 for (block_id = start_block_id; block_id <= end_block_id; block_id++) { 817 mem = find_memory_block_by_id(block_id); 818 if (!mem) 819 continue; 820 821 ret = func(mem, arg); 822 put_device(&mem->dev); 823 if (ret) 824 break; 825 } 826 return ret; 827 } 828 829 struct for_each_memory_block_cb_data { 830 walk_memory_blocks_func_t func; 831 void *arg; 832 }; 833 834 static int for_each_memory_block_cb(struct device *dev, void *data) 835 { 836 struct memory_block *mem = to_memory_block(dev); 837 struct for_each_memory_block_cb_data *cb_data = data; 838 839 return cb_data->func(mem, cb_data->arg); 840 } 841 842 /** 843 * for_each_memory_block - walk through all present memory blocks 844 * 845 * @arg: argument passed to func 846 * @func: callback for each memory block walked 847 * 848 * This function walks through all present memory blocks, calling func on 849 * each memory block. 850 * 851 * In case func() returns an error, walking is aborted and the error is 852 * returned. 853 */ 854 int for_each_memory_block(void *arg, walk_memory_blocks_func_t func) 855 { 856 struct for_each_memory_block_cb_data cb_data = { 857 .func = func, 858 .arg = arg, 859 }; 860 861 return bus_for_each_dev(&memory_subsys, NULL, &cb_data, 862 for_each_memory_block_cb); 863 } 864