1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Memory subsystem support 4 * 5 * Written by Matt Tolentino <matthew.e.tolentino@intel.com> 6 * Dave Hansen <haveblue@us.ibm.com> 7 * 8 * This file provides the necessary infrastructure to represent 9 * a SPARSEMEM-memory-model system's physical memory in /sysfs. 10 * All arch-independent code that assumes MEMORY_HOTPLUG requires 11 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c. 12 */ 13 14 #include <linux/module.h> 15 #include <linux/init.h> 16 #include <linux/topology.h> 17 #include <linux/capability.h> 18 #include <linux/device.h> 19 #include <linux/memory.h> 20 #include <linux/memory_hotplug.h> 21 #include <linux/mm.h> 22 #include <linux/stat.h> 23 #include <linux/slab.h> 24 25 #include <linux/atomic.h> 26 #include <linux/uaccess.h> 27 28 #define MEMORY_CLASS_NAME "memory" 29 30 #define to_memory_block(dev) container_of(dev, struct memory_block, dev) 31 32 static int sections_per_block; 33 34 static inline unsigned long base_memory_block_id(unsigned long section_nr) 35 { 36 return section_nr / sections_per_block; 37 } 38 39 static inline unsigned long pfn_to_block_id(unsigned long pfn) 40 { 41 return base_memory_block_id(pfn_to_section_nr(pfn)); 42 } 43 44 static inline unsigned long phys_to_block_id(unsigned long phys) 45 { 46 return pfn_to_block_id(PFN_DOWN(phys)); 47 } 48 49 static int memory_subsys_online(struct device *dev); 50 static int memory_subsys_offline(struct device *dev); 51 52 static struct bus_type memory_subsys = { 53 .name = MEMORY_CLASS_NAME, 54 .dev_name = MEMORY_CLASS_NAME, 55 .online = memory_subsys_online, 56 .offline = memory_subsys_offline, 57 }; 58 59 static BLOCKING_NOTIFIER_HEAD(memory_chain); 60 61 int register_memory_notifier(struct notifier_block *nb) 62 { 63 return blocking_notifier_chain_register(&memory_chain, nb); 64 } 65 EXPORT_SYMBOL(register_memory_notifier); 66 67 void unregister_memory_notifier(struct notifier_block *nb) 68 { 69 blocking_notifier_chain_unregister(&memory_chain, nb); 70 } 71 EXPORT_SYMBOL(unregister_memory_notifier); 72 73 static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain); 74 75 int register_memory_isolate_notifier(struct notifier_block *nb) 76 { 77 return atomic_notifier_chain_register(&memory_isolate_chain, nb); 78 } 79 EXPORT_SYMBOL(register_memory_isolate_notifier); 80 81 void unregister_memory_isolate_notifier(struct notifier_block *nb) 82 { 83 atomic_notifier_chain_unregister(&memory_isolate_chain, nb); 84 } 85 EXPORT_SYMBOL(unregister_memory_isolate_notifier); 86 87 static void memory_block_release(struct device *dev) 88 { 89 struct memory_block *mem = to_memory_block(dev); 90 91 kfree(mem); 92 } 93 94 unsigned long __weak memory_block_size_bytes(void) 95 { 96 return MIN_MEMORY_BLOCK_SIZE; 97 } 98 EXPORT_SYMBOL_GPL(memory_block_size_bytes); 99 100 /* 101 * Show the first physical section index (number) of this memory block. 102 */ 103 static ssize_t phys_index_show(struct device *dev, 104 struct device_attribute *attr, char *buf) 105 { 106 struct memory_block *mem = to_memory_block(dev); 107 unsigned long phys_index; 108 109 phys_index = mem->start_section_nr / sections_per_block; 110 return sprintf(buf, "%08lx\n", phys_index); 111 } 112 113 /* 114 * Show whether the memory block is likely to be offlineable (or is already 115 * offline). Once offline, the memory block could be removed. The return 116 * value does, however, not indicate that there is a way to remove the 117 * memory block. 118 */ 119 static ssize_t removable_show(struct device *dev, struct device_attribute *attr, 120 char *buf) 121 { 122 struct memory_block *mem = to_memory_block(dev); 123 unsigned long pfn; 124 int ret = 1, i; 125 126 if (mem->state != MEM_ONLINE) 127 goto out; 128 129 for (i = 0; i < sections_per_block; i++) { 130 if (!present_section_nr(mem->start_section_nr + i)) 131 continue; 132 pfn = section_nr_to_pfn(mem->start_section_nr + i); 133 ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); 134 } 135 136 out: 137 return sprintf(buf, "%d\n", ret); 138 } 139 140 /* 141 * online, offline, going offline, etc. 142 */ 143 static ssize_t state_show(struct device *dev, struct device_attribute *attr, 144 char *buf) 145 { 146 struct memory_block *mem = to_memory_block(dev); 147 ssize_t len = 0; 148 149 /* 150 * We can probably put these states in a nice little array 151 * so that they're not open-coded 152 */ 153 switch (mem->state) { 154 case MEM_ONLINE: 155 len = sprintf(buf, "online\n"); 156 break; 157 case MEM_OFFLINE: 158 len = sprintf(buf, "offline\n"); 159 break; 160 case MEM_GOING_OFFLINE: 161 len = sprintf(buf, "going-offline\n"); 162 break; 163 default: 164 len = sprintf(buf, "ERROR-UNKNOWN-%ld\n", 165 mem->state); 166 WARN_ON(1); 167 break; 168 } 169 170 return len; 171 } 172 173 int memory_notify(unsigned long val, void *v) 174 { 175 return blocking_notifier_call_chain(&memory_chain, val, v); 176 } 177 178 int memory_isolate_notify(unsigned long val, void *v) 179 { 180 return atomic_notifier_call_chain(&memory_isolate_chain, val, v); 181 } 182 183 /* 184 * The probe routines leave the pages uninitialized, just as the bootmem code 185 * does. Make sure we do not access them, but instead use only information from 186 * within sections. 187 */ 188 static bool pages_correctly_probed(unsigned long start_pfn) 189 { 190 unsigned long section_nr = pfn_to_section_nr(start_pfn); 191 unsigned long section_nr_end = section_nr + sections_per_block; 192 unsigned long pfn = start_pfn; 193 194 /* 195 * memmap between sections is not contiguous except with 196 * SPARSEMEM_VMEMMAP. We lookup the page once per section 197 * and assume memmap is contiguous within each section 198 */ 199 for (; section_nr < section_nr_end; section_nr++) { 200 if (WARN_ON_ONCE(!pfn_valid(pfn))) 201 return false; 202 203 if (!present_section_nr(section_nr)) { 204 pr_warn("section %ld pfn[%lx, %lx) not present\n", 205 section_nr, pfn, pfn + PAGES_PER_SECTION); 206 return false; 207 } else if (!valid_section_nr(section_nr)) { 208 pr_warn("section %ld pfn[%lx, %lx) no valid memmap\n", 209 section_nr, pfn, pfn + PAGES_PER_SECTION); 210 return false; 211 } else if (online_section_nr(section_nr)) { 212 pr_warn("section %ld pfn[%lx, %lx) is already online\n", 213 section_nr, pfn, pfn + PAGES_PER_SECTION); 214 return false; 215 } 216 pfn += PAGES_PER_SECTION; 217 } 218 219 return true; 220 } 221 222 /* 223 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is 224 * OK to have direct references to sparsemem variables in here. 225 */ 226 static int 227 memory_block_action(unsigned long start_section_nr, unsigned long action, 228 int online_type) 229 { 230 unsigned long start_pfn; 231 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 232 int ret; 233 234 start_pfn = section_nr_to_pfn(start_section_nr); 235 236 switch (action) { 237 case MEM_ONLINE: 238 if (!pages_correctly_probed(start_pfn)) 239 return -EBUSY; 240 241 ret = online_pages(start_pfn, nr_pages, online_type); 242 break; 243 case MEM_OFFLINE: 244 ret = offline_pages(start_pfn, nr_pages); 245 break; 246 default: 247 WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: " 248 "%ld\n", __func__, start_section_nr, action, action); 249 ret = -EINVAL; 250 } 251 252 return ret; 253 } 254 255 static int memory_block_change_state(struct memory_block *mem, 256 unsigned long to_state, unsigned long from_state_req) 257 { 258 int ret = 0; 259 260 if (mem->state != from_state_req) 261 return -EINVAL; 262 263 if (to_state == MEM_OFFLINE) 264 mem->state = MEM_GOING_OFFLINE; 265 266 ret = memory_block_action(mem->start_section_nr, to_state, 267 mem->online_type); 268 269 mem->state = ret ? from_state_req : to_state; 270 271 return ret; 272 } 273 274 /* The device lock serializes operations on memory_subsys_[online|offline] */ 275 static int memory_subsys_online(struct device *dev) 276 { 277 struct memory_block *mem = to_memory_block(dev); 278 int ret; 279 280 if (mem->state == MEM_ONLINE) 281 return 0; 282 283 /* 284 * If we are called from state_store(), online_type will be 285 * set >= 0 Otherwise we were called from the device online 286 * attribute and need to set the online_type. 287 */ 288 if (mem->online_type < 0) 289 mem->online_type = MMOP_ONLINE_KEEP; 290 291 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 292 293 /* clear online_type */ 294 mem->online_type = -1; 295 296 return ret; 297 } 298 299 static int memory_subsys_offline(struct device *dev) 300 { 301 struct memory_block *mem = to_memory_block(dev); 302 303 if (mem->state == MEM_OFFLINE) 304 return 0; 305 306 /* Can't offline block with non-present sections */ 307 if (mem->section_count != sections_per_block) 308 return -EINVAL; 309 310 return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); 311 } 312 313 static ssize_t state_store(struct device *dev, struct device_attribute *attr, 314 const char *buf, size_t count) 315 { 316 struct memory_block *mem = to_memory_block(dev); 317 int ret, online_type; 318 319 ret = lock_device_hotplug_sysfs(); 320 if (ret) 321 return ret; 322 323 if (sysfs_streq(buf, "online_kernel")) 324 online_type = MMOP_ONLINE_KERNEL; 325 else if (sysfs_streq(buf, "online_movable")) 326 online_type = MMOP_ONLINE_MOVABLE; 327 else if (sysfs_streq(buf, "online")) 328 online_type = MMOP_ONLINE_KEEP; 329 else if (sysfs_streq(buf, "offline")) 330 online_type = MMOP_OFFLINE; 331 else { 332 ret = -EINVAL; 333 goto err; 334 } 335 336 switch (online_type) { 337 case MMOP_ONLINE_KERNEL: 338 case MMOP_ONLINE_MOVABLE: 339 case MMOP_ONLINE_KEEP: 340 /* mem->online_type is protected by device_hotplug_lock */ 341 mem->online_type = online_type; 342 ret = device_online(&mem->dev); 343 break; 344 case MMOP_OFFLINE: 345 ret = device_offline(&mem->dev); 346 break; 347 default: 348 ret = -EINVAL; /* should never happen */ 349 } 350 351 err: 352 unlock_device_hotplug(); 353 354 if (ret < 0) 355 return ret; 356 if (ret) 357 return -EINVAL; 358 359 return count; 360 } 361 362 /* 363 * phys_device is a bad name for this. What I really want 364 * is a way to differentiate between memory ranges that 365 * are part of physical devices that constitute 366 * a complete removable unit or fru. 367 * i.e. do these ranges belong to the same physical device, 368 * s.t. if I offline all of these sections I can then 369 * remove the physical device? 370 */ 371 static ssize_t phys_device_show(struct device *dev, 372 struct device_attribute *attr, char *buf) 373 { 374 struct memory_block *mem = to_memory_block(dev); 375 return sprintf(buf, "%d\n", mem->phys_device); 376 } 377 378 #ifdef CONFIG_MEMORY_HOTREMOVE 379 static void print_allowed_zone(char *buf, int nid, unsigned long start_pfn, 380 unsigned long nr_pages, int online_type, 381 struct zone *default_zone) 382 { 383 struct zone *zone; 384 385 zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); 386 if (zone != default_zone) { 387 strcat(buf, " "); 388 strcat(buf, zone->name); 389 } 390 } 391 392 static ssize_t valid_zones_show(struct device *dev, 393 struct device_attribute *attr, char *buf) 394 { 395 struct memory_block *mem = to_memory_block(dev); 396 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 397 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 398 unsigned long valid_start_pfn, valid_end_pfn; 399 struct zone *default_zone; 400 int nid; 401 402 /* 403 * Check the existing zone. Make sure that we do that only on the 404 * online nodes otherwise the page_zone is not reliable 405 */ 406 if (mem->state == MEM_ONLINE) { 407 /* 408 * The block contains more than one zone can not be offlined. 409 * This can happen e.g. for ZONE_DMA and ZONE_DMA32 410 */ 411 if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages, 412 &valid_start_pfn, &valid_end_pfn)) 413 return sprintf(buf, "none\n"); 414 start_pfn = valid_start_pfn; 415 strcat(buf, page_zone(pfn_to_page(start_pfn))->name); 416 goto out; 417 } 418 419 nid = mem->nid; 420 default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, nr_pages); 421 strcat(buf, default_zone->name); 422 423 print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL, 424 default_zone); 425 print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE, 426 default_zone); 427 out: 428 strcat(buf, "\n"); 429 430 return strlen(buf); 431 } 432 static DEVICE_ATTR_RO(valid_zones); 433 #endif 434 435 static DEVICE_ATTR_RO(phys_index); 436 static DEVICE_ATTR_RW(state); 437 static DEVICE_ATTR_RO(phys_device); 438 static DEVICE_ATTR_RO(removable); 439 440 /* 441 * Show the memory block size (shared by all memory blocks). 442 */ 443 static ssize_t block_size_bytes_show(struct device *dev, 444 struct device_attribute *attr, char *buf) 445 { 446 return sprintf(buf, "%lx\n", memory_block_size_bytes()); 447 } 448 449 static DEVICE_ATTR_RO(block_size_bytes); 450 451 /* 452 * Memory auto online policy. 453 */ 454 455 static ssize_t auto_online_blocks_show(struct device *dev, 456 struct device_attribute *attr, char *buf) 457 { 458 if (memhp_auto_online) 459 return sprintf(buf, "online\n"); 460 else 461 return sprintf(buf, "offline\n"); 462 } 463 464 static ssize_t auto_online_blocks_store(struct device *dev, 465 struct device_attribute *attr, 466 const char *buf, size_t count) 467 { 468 if (sysfs_streq(buf, "online")) 469 memhp_auto_online = true; 470 else if (sysfs_streq(buf, "offline")) 471 memhp_auto_online = false; 472 else 473 return -EINVAL; 474 475 return count; 476 } 477 478 static DEVICE_ATTR_RW(auto_online_blocks); 479 480 /* 481 * Some architectures will have custom drivers to do this, and 482 * will not need to do it from userspace. The fake hot-add code 483 * as well as ppc64 will do all of their discovery in userspace 484 * and will require this interface. 485 */ 486 #ifdef CONFIG_ARCH_MEMORY_PROBE 487 static ssize_t probe_store(struct device *dev, struct device_attribute *attr, 488 const char *buf, size_t count) 489 { 490 u64 phys_addr; 491 int nid, ret; 492 unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block; 493 494 ret = kstrtoull(buf, 0, &phys_addr); 495 if (ret) 496 return ret; 497 498 if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) 499 return -EINVAL; 500 501 ret = lock_device_hotplug_sysfs(); 502 if (ret) 503 return ret; 504 505 nid = memory_add_physaddr_to_nid(phys_addr); 506 ret = __add_memory(nid, phys_addr, 507 MIN_MEMORY_BLOCK_SIZE * sections_per_block); 508 509 if (ret) 510 goto out; 511 512 ret = count; 513 out: 514 unlock_device_hotplug(); 515 return ret; 516 } 517 518 static DEVICE_ATTR_WO(probe); 519 #endif 520 521 #ifdef CONFIG_MEMORY_FAILURE 522 /* 523 * Support for offlining pages of memory 524 */ 525 526 /* Soft offline a page */ 527 static ssize_t soft_offline_page_store(struct device *dev, 528 struct device_attribute *attr, 529 const char *buf, size_t count) 530 { 531 int ret; 532 u64 pfn; 533 if (!capable(CAP_SYS_ADMIN)) 534 return -EPERM; 535 if (kstrtoull(buf, 0, &pfn) < 0) 536 return -EINVAL; 537 pfn >>= PAGE_SHIFT; 538 ret = soft_offline_page(pfn, 0); 539 return ret == 0 ? count : ret; 540 } 541 542 /* Forcibly offline a page, including killing processes. */ 543 static ssize_t hard_offline_page_store(struct device *dev, 544 struct device_attribute *attr, 545 const char *buf, size_t count) 546 { 547 int ret; 548 u64 pfn; 549 if (!capable(CAP_SYS_ADMIN)) 550 return -EPERM; 551 if (kstrtoull(buf, 0, &pfn) < 0) 552 return -EINVAL; 553 pfn >>= PAGE_SHIFT; 554 ret = memory_failure(pfn, 0); 555 return ret ? ret : count; 556 } 557 558 static DEVICE_ATTR_WO(soft_offline_page); 559 static DEVICE_ATTR_WO(hard_offline_page); 560 #endif 561 562 /* 563 * Note that phys_device is optional. It is here to allow for 564 * differentiation between which *physical* devices each 565 * section belongs to... 566 */ 567 int __weak arch_get_memory_phys_device(unsigned long start_pfn) 568 { 569 return 0; 570 } 571 572 /* A reference for the returned memory block device is acquired. */ 573 static struct memory_block *find_memory_block_by_id(unsigned long block_id) 574 { 575 struct device *dev; 576 577 dev = subsys_find_device_by_id(&memory_subsys, block_id, NULL); 578 return dev ? to_memory_block(dev) : NULL; 579 } 580 581 /* 582 * For now, we have a linear search to go find the appropriate 583 * memory_block corresponding to a particular phys_index. If 584 * this gets to be a real problem, we can always use a radix 585 * tree or something here. 586 * 587 * This could be made generic for all device subsystems. 588 */ 589 struct memory_block *find_memory_block(struct mem_section *section) 590 { 591 unsigned long block_id = base_memory_block_id(__section_nr(section)); 592 593 return find_memory_block_by_id(block_id); 594 } 595 596 static struct attribute *memory_memblk_attrs[] = { 597 &dev_attr_phys_index.attr, 598 &dev_attr_state.attr, 599 &dev_attr_phys_device.attr, 600 &dev_attr_removable.attr, 601 #ifdef CONFIG_MEMORY_HOTREMOVE 602 &dev_attr_valid_zones.attr, 603 #endif 604 NULL 605 }; 606 607 static struct attribute_group memory_memblk_attr_group = { 608 .attrs = memory_memblk_attrs, 609 }; 610 611 static const struct attribute_group *memory_memblk_attr_groups[] = { 612 &memory_memblk_attr_group, 613 NULL, 614 }; 615 616 /* 617 * register_memory - Setup a sysfs device for a memory block 618 */ 619 static 620 int register_memory(struct memory_block *memory) 621 { 622 int ret; 623 624 memory->dev.bus = &memory_subsys; 625 memory->dev.id = memory->start_section_nr / sections_per_block; 626 memory->dev.release = memory_block_release; 627 memory->dev.groups = memory_memblk_attr_groups; 628 memory->dev.offline = memory->state == MEM_OFFLINE; 629 630 ret = device_register(&memory->dev); 631 if (ret) 632 put_device(&memory->dev); 633 634 return ret; 635 } 636 637 static int init_memory_block(struct memory_block **memory, 638 unsigned long block_id, unsigned long state) 639 { 640 struct memory_block *mem; 641 unsigned long start_pfn; 642 int ret = 0; 643 644 mem = find_memory_block_by_id(block_id); 645 if (mem) { 646 put_device(&mem->dev); 647 return -EEXIST; 648 } 649 mem = kzalloc(sizeof(*mem), GFP_KERNEL); 650 if (!mem) 651 return -ENOMEM; 652 653 mem->start_section_nr = block_id * sections_per_block; 654 mem->state = state; 655 start_pfn = section_nr_to_pfn(mem->start_section_nr); 656 mem->phys_device = arch_get_memory_phys_device(start_pfn); 657 mem->nid = NUMA_NO_NODE; 658 659 ret = register_memory(mem); 660 661 *memory = mem; 662 return ret; 663 } 664 665 static int add_memory_block(unsigned long base_section_nr) 666 { 667 int ret, section_count = 0; 668 struct memory_block *mem; 669 unsigned long nr; 670 671 for (nr = base_section_nr; nr < base_section_nr + sections_per_block; 672 nr++) 673 if (present_section_nr(nr)) 674 section_count++; 675 676 if (section_count == 0) 677 return 0; 678 ret = init_memory_block(&mem, base_memory_block_id(base_section_nr), 679 MEM_ONLINE); 680 if (ret) 681 return ret; 682 mem->section_count = section_count; 683 return 0; 684 } 685 686 static void unregister_memory(struct memory_block *memory) 687 { 688 if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) 689 return; 690 691 /* drop the ref. we got via find_memory_block() */ 692 put_device(&memory->dev); 693 device_unregister(&memory->dev); 694 } 695 696 /* 697 * Create memory block devices for the given memory area. Start and size 698 * have to be aligned to memory block granularity. Memory block devices 699 * will be initialized as offline. 700 * 701 * Called under device_hotplug_lock. 702 */ 703 int create_memory_block_devices(unsigned long start, unsigned long size) 704 { 705 const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); 706 unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); 707 struct memory_block *mem; 708 unsigned long block_id; 709 int ret = 0; 710 711 if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || 712 !IS_ALIGNED(size, memory_block_size_bytes()))) 713 return -EINVAL; 714 715 for (block_id = start_block_id; block_id != end_block_id; block_id++) { 716 ret = init_memory_block(&mem, block_id, MEM_OFFLINE); 717 if (ret) 718 break; 719 mem->section_count = sections_per_block; 720 } 721 if (ret) { 722 end_block_id = block_id; 723 for (block_id = start_block_id; block_id != end_block_id; 724 block_id++) { 725 mem = find_memory_block_by_id(block_id); 726 if (WARN_ON_ONCE(!mem)) 727 continue; 728 mem->section_count = 0; 729 unregister_memory(mem); 730 } 731 } 732 return ret; 733 } 734 735 /* 736 * Remove memory block devices for the given memory area. Start and size 737 * have to be aligned to memory block granularity. Memory block devices 738 * have to be offline. 739 * 740 * Called under device_hotplug_lock. 741 */ 742 void remove_memory_block_devices(unsigned long start, unsigned long size) 743 { 744 const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); 745 const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); 746 struct memory_block *mem; 747 unsigned long block_id; 748 749 if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || 750 !IS_ALIGNED(size, memory_block_size_bytes()))) 751 return; 752 753 for (block_id = start_block_id; block_id != end_block_id; block_id++) { 754 mem = find_memory_block_by_id(block_id); 755 if (WARN_ON_ONCE(!mem)) 756 continue; 757 mem->section_count = 0; 758 unregister_memory_block_under_nodes(mem); 759 unregister_memory(mem); 760 } 761 } 762 763 /* return true if the memory block is offlined, otherwise, return false */ 764 bool is_memblock_offlined(struct memory_block *mem) 765 { 766 return mem->state == MEM_OFFLINE; 767 } 768 769 static struct attribute *memory_root_attrs[] = { 770 #ifdef CONFIG_ARCH_MEMORY_PROBE 771 &dev_attr_probe.attr, 772 #endif 773 774 #ifdef CONFIG_MEMORY_FAILURE 775 &dev_attr_soft_offline_page.attr, 776 &dev_attr_hard_offline_page.attr, 777 #endif 778 779 &dev_attr_block_size_bytes.attr, 780 &dev_attr_auto_online_blocks.attr, 781 NULL 782 }; 783 784 static struct attribute_group memory_root_attr_group = { 785 .attrs = memory_root_attrs, 786 }; 787 788 static const struct attribute_group *memory_root_attr_groups[] = { 789 &memory_root_attr_group, 790 NULL, 791 }; 792 793 /* 794 * Initialize the sysfs support for memory devices. At the time this function 795 * is called, we cannot have concurrent creation/deletion of memory block 796 * devices, the device_hotplug_lock is not needed. 797 */ 798 void __init memory_dev_init(void) 799 { 800 int ret; 801 unsigned long block_sz, nr; 802 803 /* Validate the configured memory block size */ 804 block_sz = memory_block_size_bytes(); 805 if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE) 806 panic("Memory block size not suitable: 0x%lx\n", block_sz); 807 sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; 808 809 ret = subsys_system_register(&memory_subsys, memory_root_attr_groups); 810 if (ret) 811 panic("%s() failed to register subsystem: %d\n", __func__, ret); 812 813 /* 814 * Create entries for memory sections that were found 815 * during boot and have been initialized 816 */ 817 for (nr = 0; nr <= __highest_present_section_nr; 818 nr += sections_per_block) { 819 ret = add_memory_block(nr); 820 if (ret) 821 panic("%s() failed to add memory block: %d\n", __func__, 822 ret); 823 } 824 } 825 826 /** 827 * walk_memory_blocks - walk through all present memory blocks overlapped 828 * by the range [start, start + size) 829 * 830 * @start: start address of the memory range 831 * @size: size of the memory range 832 * @arg: argument passed to func 833 * @func: callback for each memory section walked 834 * 835 * This function walks through all present memory blocks overlapped by the 836 * range [start, start + size), calling func on each memory block. 837 * 838 * In case func() returns an error, walking is aborted and the error is 839 * returned. 840 */ 841 int walk_memory_blocks(unsigned long start, unsigned long size, 842 void *arg, walk_memory_blocks_func_t func) 843 { 844 const unsigned long start_block_id = phys_to_block_id(start); 845 const unsigned long end_block_id = phys_to_block_id(start + size - 1); 846 struct memory_block *mem; 847 unsigned long block_id; 848 int ret = 0; 849 850 if (!size) 851 return 0; 852 853 for (block_id = start_block_id; block_id <= end_block_id; block_id++) { 854 mem = find_memory_block_by_id(block_id); 855 if (!mem) 856 continue; 857 858 ret = func(mem, arg); 859 put_device(&mem->dev); 860 if (ret) 861 break; 862 } 863 return ret; 864 } 865 866 struct for_each_memory_block_cb_data { 867 walk_memory_blocks_func_t func; 868 void *arg; 869 }; 870 871 static int for_each_memory_block_cb(struct device *dev, void *data) 872 { 873 struct memory_block *mem = to_memory_block(dev); 874 struct for_each_memory_block_cb_data *cb_data = data; 875 876 return cb_data->func(mem, cb_data->arg); 877 } 878 879 /** 880 * for_each_memory_block - walk through all present memory blocks 881 * 882 * @arg: argument passed to func 883 * @func: callback for each memory block walked 884 * 885 * This function walks through all present memory blocks, calling func on 886 * each memory block. 887 * 888 * In case func() returns an error, walking is aborted and the error is 889 * returned. 890 */ 891 int for_each_memory_block(void *arg, walk_memory_blocks_func_t func) 892 { 893 struct for_each_memory_block_cb_data cb_data = { 894 .func = func, 895 .arg = arg, 896 }; 897 898 return bus_for_each_dev(&memory_subsys, NULL, &cb_data, 899 for_each_memory_block_cb); 900 } 901