1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Memory subsystem support 4 * 5 * Written by Matt Tolentino <matthew.e.tolentino@intel.com> 6 * Dave Hansen <haveblue@us.ibm.com> 7 * 8 * This file provides the necessary infrastructure to represent 9 * a SPARSEMEM-memory-model system's physical memory in /sysfs. 10 * All arch-independent code that assumes MEMORY_HOTPLUG requires 11 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c. 12 */ 13 14 #include <linux/module.h> 15 #include <linux/init.h> 16 #include <linux/topology.h> 17 #include <linux/capability.h> 18 #include <linux/device.h> 19 #include <linux/memory.h> 20 #include <linux/memory_hotplug.h> 21 #include <linux/mm.h> 22 #include <linux/mutex.h> 23 #include <linux/stat.h> 24 #include <linux/slab.h> 25 26 #include <linux/atomic.h> 27 #include <linux/uaccess.h> 28 29 static DEFINE_MUTEX(mem_sysfs_mutex); 30 31 #define MEMORY_CLASS_NAME "memory" 32 33 #define to_memory_block(dev) container_of(dev, struct memory_block, dev) 34 35 static int sections_per_block; 36 37 static inline unsigned long base_memory_block_id(unsigned long section_nr) 38 { 39 return section_nr / sections_per_block; 40 } 41 42 static inline unsigned long pfn_to_block_id(unsigned long pfn) 43 { 44 return base_memory_block_id(pfn_to_section_nr(pfn)); 45 } 46 47 static inline unsigned long phys_to_block_id(unsigned long phys) 48 { 49 return pfn_to_block_id(PFN_DOWN(phys)); 50 } 51 52 static int memory_subsys_online(struct device *dev); 53 static int memory_subsys_offline(struct device *dev); 54 55 static struct bus_type memory_subsys = { 56 .name = MEMORY_CLASS_NAME, 57 .dev_name = MEMORY_CLASS_NAME, 58 .online = memory_subsys_online, 59 .offline = memory_subsys_offline, 60 }; 61 62 static BLOCKING_NOTIFIER_HEAD(memory_chain); 63 64 int register_memory_notifier(struct notifier_block *nb) 65 { 66 return blocking_notifier_chain_register(&memory_chain, nb); 67 } 68 EXPORT_SYMBOL(register_memory_notifier); 69 70 void unregister_memory_notifier(struct notifier_block *nb) 71 { 72 blocking_notifier_chain_unregister(&memory_chain, nb); 73 } 74 EXPORT_SYMBOL(unregister_memory_notifier); 75 76 static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain); 77 78 int register_memory_isolate_notifier(struct notifier_block *nb) 79 { 80 return atomic_notifier_chain_register(&memory_isolate_chain, nb); 81 } 82 EXPORT_SYMBOL(register_memory_isolate_notifier); 83 84 void unregister_memory_isolate_notifier(struct notifier_block *nb) 85 { 86 atomic_notifier_chain_unregister(&memory_isolate_chain, nb); 87 } 88 EXPORT_SYMBOL(unregister_memory_isolate_notifier); 89 90 static void memory_block_release(struct device *dev) 91 { 92 struct memory_block *mem = to_memory_block(dev); 93 94 kfree(mem); 95 } 96 97 unsigned long __weak memory_block_size_bytes(void) 98 { 99 return MIN_MEMORY_BLOCK_SIZE; 100 } 101 EXPORT_SYMBOL_GPL(memory_block_size_bytes); 102 103 /* 104 * Show the first physical section index (number) of this memory block. 105 */ 106 static ssize_t phys_index_show(struct device *dev, 107 struct device_attribute *attr, char *buf) 108 { 109 struct memory_block *mem = to_memory_block(dev); 110 unsigned long phys_index; 111 112 phys_index = mem->start_section_nr / sections_per_block; 113 return sprintf(buf, "%08lx\n", phys_index); 114 } 115 116 /* 117 * Show whether the memory block is likely to be offlineable (or is already 118 * offline). Once offline, the memory block could be removed. The return 119 * value does, however, not indicate that there is a way to remove the 120 * memory block. 121 */ 122 static ssize_t removable_show(struct device *dev, struct device_attribute *attr, 123 char *buf) 124 { 125 struct memory_block *mem = to_memory_block(dev); 126 unsigned long pfn; 127 int ret = 1, i; 128 129 if (mem->state != MEM_ONLINE) 130 goto out; 131 132 for (i = 0; i < sections_per_block; i++) { 133 if (!present_section_nr(mem->start_section_nr + i)) 134 continue; 135 pfn = section_nr_to_pfn(mem->start_section_nr + i); 136 ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); 137 } 138 139 out: 140 return sprintf(buf, "%d\n", ret); 141 } 142 143 /* 144 * online, offline, going offline, etc. 145 */ 146 static ssize_t state_show(struct device *dev, struct device_attribute *attr, 147 char *buf) 148 { 149 struct memory_block *mem = to_memory_block(dev); 150 ssize_t len = 0; 151 152 /* 153 * We can probably put these states in a nice little array 154 * so that they're not open-coded 155 */ 156 switch (mem->state) { 157 case MEM_ONLINE: 158 len = sprintf(buf, "online\n"); 159 break; 160 case MEM_OFFLINE: 161 len = sprintf(buf, "offline\n"); 162 break; 163 case MEM_GOING_OFFLINE: 164 len = sprintf(buf, "going-offline\n"); 165 break; 166 default: 167 len = sprintf(buf, "ERROR-UNKNOWN-%ld\n", 168 mem->state); 169 WARN_ON(1); 170 break; 171 } 172 173 return len; 174 } 175 176 int memory_notify(unsigned long val, void *v) 177 { 178 return blocking_notifier_call_chain(&memory_chain, val, v); 179 } 180 181 int memory_isolate_notify(unsigned long val, void *v) 182 { 183 return atomic_notifier_call_chain(&memory_isolate_chain, val, v); 184 } 185 186 /* 187 * The probe routines leave the pages uninitialized, just as the bootmem code 188 * does. Make sure we do not access them, but instead use only information from 189 * within sections. 190 */ 191 static bool pages_correctly_probed(unsigned long start_pfn) 192 { 193 unsigned long section_nr = pfn_to_section_nr(start_pfn); 194 unsigned long section_nr_end = section_nr + sections_per_block; 195 unsigned long pfn = start_pfn; 196 197 /* 198 * memmap between sections is not contiguous except with 199 * SPARSEMEM_VMEMMAP. We lookup the page once per section 200 * and assume memmap is contiguous within each section 201 */ 202 for (; section_nr < section_nr_end; section_nr++) { 203 if (WARN_ON_ONCE(!pfn_valid(pfn))) 204 return false; 205 206 if (!present_section_nr(section_nr)) { 207 pr_warn("section %ld pfn[%lx, %lx) not present\n", 208 section_nr, pfn, pfn + PAGES_PER_SECTION); 209 return false; 210 } else if (!valid_section_nr(section_nr)) { 211 pr_warn("section %ld pfn[%lx, %lx) no valid memmap\n", 212 section_nr, pfn, pfn + PAGES_PER_SECTION); 213 return false; 214 } else if (online_section_nr(section_nr)) { 215 pr_warn("section %ld pfn[%lx, %lx) is already online\n", 216 section_nr, pfn, pfn + PAGES_PER_SECTION); 217 return false; 218 } 219 pfn += PAGES_PER_SECTION; 220 } 221 222 return true; 223 } 224 225 /* 226 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is 227 * OK to have direct references to sparsemem variables in here. 228 */ 229 static int 230 memory_block_action(unsigned long start_section_nr, unsigned long action, 231 int online_type) 232 { 233 unsigned long start_pfn; 234 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 235 int ret; 236 237 start_pfn = section_nr_to_pfn(start_section_nr); 238 239 switch (action) { 240 case MEM_ONLINE: 241 if (!pages_correctly_probed(start_pfn)) 242 return -EBUSY; 243 244 ret = online_pages(start_pfn, nr_pages, online_type); 245 break; 246 case MEM_OFFLINE: 247 ret = offline_pages(start_pfn, nr_pages); 248 break; 249 default: 250 WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: " 251 "%ld\n", __func__, start_section_nr, action, action); 252 ret = -EINVAL; 253 } 254 255 return ret; 256 } 257 258 static int memory_block_change_state(struct memory_block *mem, 259 unsigned long to_state, unsigned long from_state_req) 260 { 261 int ret = 0; 262 263 if (mem->state != from_state_req) 264 return -EINVAL; 265 266 if (to_state == MEM_OFFLINE) 267 mem->state = MEM_GOING_OFFLINE; 268 269 ret = memory_block_action(mem->start_section_nr, to_state, 270 mem->online_type); 271 272 mem->state = ret ? from_state_req : to_state; 273 274 return ret; 275 } 276 277 /* The device lock serializes operations on memory_subsys_[online|offline] */ 278 static int memory_subsys_online(struct device *dev) 279 { 280 struct memory_block *mem = to_memory_block(dev); 281 int ret; 282 283 if (mem->state == MEM_ONLINE) 284 return 0; 285 286 /* 287 * If we are called from state_store(), online_type will be 288 * set >= 0 Otherwise we were called from the device online 289 * attribute and need to set the online_type. 290 */ 291 if (mem->online_type < 0) 292 mem->online_type = MMOP_ONLINE_KEEP; 293 294 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 295 296 /* clear online_type */ 297 mem->online_type = -1; 298 299 return ret; 300 } 301 302 static int memory_subsys_offline(struct device *dev) 303 { 304 struct memory_block *mem = to_memory_block(dev); 305 306 if (mem->state == MEM_OFFLINE) 307 return 0; 308 309 /* Can't offline block with non-present sections */ 310 if (mem->section_count != sections_per_block) 311 return -EINVAL; 312 313 return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); 314 } 315 316 static ssize_t state_store(struct device *dev, struct device_attribute *attr, 317 const char *buf, size_t count) 318 { 319 struct memory_block *mem = to_memory_block(dev); 320 int ret, online_type; 321 322 ret = lock_device_hotplug_sysfs(); 323 if (ret) 324 return ret; 325 326 if (sysfs_streq(buf, "online_kernel")) 327 online_type = MMOP_ONLINE_KERNEL; 328 else if (sysfs_streq(buf, "online_movable")) 329 online_type = MMOP_ONLINE_MOVABLE; 330 else if (sysfs_streq(buf, "online")) 331 online_type = MMOP_ONLINE_KEEP; 332 else if (sysfs_streq(buf, "offline")) 333 online_type = MMOP_OFFLINE; 334 else { 335 ret = -EINVAL; 336 goto err; 337 } 338 339 switch (online_type) { 340 case MMOP_ONLINE_KERNEL: 341 case MMOP_ONLINE_MOVABLE: 342 case MMOP_ONLINE_KEEP: 343 /* mem->online_type is protected by device_hotplug_lock */ 344 mem->online_type = online_type; 345 ret = device_online(&mem->dev); 346 break; 347 case MMOP_OFFLINE: 348 ret = device_offline(&mem->dev); 349 break; 350 default: 351 ret = -EINVAL; /* should never happen */ 352 } 353 354 err: 355 unlock_device_hotplug(); 356 357 if (ret < 0) 358 return ret; 359 if (ret) 360 return -EINVAL; 361 362 return count; 363 } 364 365 /* 366 * phys_device is a bad name for this. What I really want 367 * is a way to differentiate between memory ranges that 368 * are part of physical devices that constitute 369 * a complete removable unit or fru. 370 * i.e. do these ranges belong to the same physical device, 371 * s.t. if I offline all of these sections I can then 372 * remove the physical device? 373 */ 374 static ssize_t phys_device_show(struct device *dev, 375 struct device_attribute *attr, char *buf) 376 { 377 struct memory_block *mem = to_memory_block(dev); 378 return sprintf(buf, "%d\n", mem->phys_device); 379 } 380 381 #ifdef CONFIG_MEMORY_HOTREMOVE 382 static void print_allowed_zone(char *buf, int nid, unsigned long start_pfn, 383 unsigned long nr_pages, int online_type, 384 struct zone *default_zone) 385 { 386 struct zone *zone; 387 388 zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); 389 if (zone != default_zone) { 390 strcat(buf, " "); 391 strcat(buf, zone->name); 392 } 393 } 394 395 static ssize_t valid_zones_show(struct device *dev, 396 struct device_attribute *attr, char *buf) 397 { 398 struct memory_block *mem = to_memory_block(dev); 399 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 400 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 401 unsigned long valid_start_pfn, valid_end_pfn; 402 struct zone *default_zone; 403 int nid; 404 405 /* 406 * Check the existing zone. Make sure that we do that only on the 407 * online nodes otherwise the page_zone is not reliable 408 */ 409 if (mem->state == MEM_ONLINE) { 410 /* 411 * The block contains more than one zone can not be offlined. 412 * This can happen e.g. for ZONE_DMA and ZONE_DMA32 413 */ 414 if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages, 415 &valid_start_pfn, &valid_end_pfn)) 416 return sprintf(buf, "none\n"); 417 start_pfn = valid_start_pfn; 418 strcat(buf, page_zone(pfn_to_page(start_pfn))->name); 419 goto out; 420 } 421 422 nid = mem->nid; 423 default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, nr_pages); 424 strcat(buf, default_zone->name); 425 426 print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL, 427 default_zone); 428 print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE, 429 default_zone); 430 out: 431 strcat(buf, "\n"); 432 433 return strlen(buf); 434 } 435 static DEVICE_ATTR_RO(valid_zones); 436 #endif 437 438 static DEVICE_ATTR_RO(phys_index); 439 static DEVICE_ATTR_RW(state); 440 static DEVICE_ATTR_RO(phys_device); 441 static DEVICE_ATTR_RO(removable); 442 443 /* 444 * Show the memory block size (shared by all memory blocks). 445 */ 446 static ssize_t block_size_bytes_show(struct device *dev, 447 struct device_attribute *attr, char *buf) 448 { 449 return sprintf(buf, "%lx\n", memory_block_size_bytes()); 450 } 451 452 static DEVICE_ATTR_RO(block_size_bytes); 453 454 /* 455 * Memory auto online policy. 456 */ 457 458 static ssize_t auto_online_blocks_show(struct device *dev, 459 struct device_attribute *attr, char *buf) 460 { 461 if (memhp_auto_online) 462 return sprintf(buf, "online\n"); 463 else 464 return sprintf(buf, "offline\n"); 465 } 466 467 static ssize_t auto_online_blocks_store(struct device *dev, 468 struct device_attribute *attr, 469 const char *buf, size_t count) 470 { 471 if (sysfs_streq(buf, "online")) 472 memhp_auto_online = true; 473 else if (sysfs_streq(buf, "offline")) 474 memhp_auto_online = false; 475 else 476 return -EINVAL; 477 478 return count; 479 } 480 481 static DEVICE_ATTR_RW(auto_online_blocks); 482 483 /* 484 * Some architectures will have custom drivers to do this, and 485 * will not need to do it from userspace. The fake hot-add code 486 * as well as ppc64 will do all of their discovery in userspace 487 * and will require this interface. 488 */ 489 #ifdef CONFIG_ARCH_MEMORY_PROBE 490 static ssize_t probe_store(struct device *dev, struct device_attribute *attr, 491 const char *buf, size_t count) 492 { 493 u64 phys_addr; 494 int nid, ret; 495 unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block; 496 497 ret = kstrtoull(buf, 0, &phys_addr); 498 if (ret) 499 return ret; 500 501 if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) 502 return -EINVAL; 503 504 ret = lock_device_hotplug_sysfs(); 505 if (ret) 506 return ret; 507 508 nid = memory_add_physaddr_to_nid(phys_addr); 509 ret = __add_memory(nid, phys_addr, 510 MIN_MEMORY_BLOCK_SIZE * sections_per_block); 511 512 if (ret) 513 goto out; 514 515 ret = count; 516 out: 517 unlock_device_hotplug(); 518 return ret; 519 } 520 521 static DEVICE_ATTR_WO(probe); 522 #endif 523 524 #ifdef CONFIG_MEMORY_FAILURE 525 /* 526 * Support for offlining pages of memory 527 */ 528 529 /* Soft offline a page */ 530 static ssize_t soft_offline_page_store(struct device *dev, 531 struct device_attribute *attr, 532 const char *buf, size_t count) 533 { 534 int ret; 535 u64 pfn; 536 if (!capable(CAP_SYS_ADMIN)) 537 return -EPERM; 538 if (kstrtoull(buf, 0, &pfn) < 0) 539 return -EINVAL; 540 pfn >>= PAGE_SHIFT; 541 if (!pfn_valid(pfn)) 542 return -ENXIO; 543 /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ 544 if (!pfn_to_online_page(pfn)) 545 return -EIO; 546 ret = soft_offline_page(pfn_to_page(pfn), 0); 547 return ret == 0 ? count : ret; 548 } 549 550 /* Forcibly offline a page, including killing processes. */ 551 static ssize_t hard_offline_page_store(struct device *dev, 552 struct device_attribute *attr, 553 const char *buf, size_t count) 554 { 555 int ret; 556 u64 pfn; 557 if (!capable(CAP_SYS_ADMIN)) 558 return -EPERM; 559 if (kstrtoull(buf, 0, &pfn) < 0) 560 return -EINVAL; 561 pfn >>= PAGE_SHIFT; 562 ret = memory_failure(pfn, 0); 563 return ret ? ret : count; 564 } 565 566 static DEVICE_ATTR_WO(soft_offline_page); 567 static DEVICE_ATTR_WO(hard_offline_page); 568 #endif 569 570 /* 571 * Note that phys_device is optional. It is here to allow for 572 * differentiation between which *physical* devices each 573 * section belongs to... 574 */ 575 int __weak arch_get_memory_phys_device(unsigned long start_pfn) 576 { 577 return 0; 578 } 579 580 /* A reference for the returned memory block device is acquired. */ 581 static struct memory_block *find_memory_block_by_id(unsigned long block_id) 582 { 583 struct device *dev; 584 585 dev = subsys_find_device_by_id(&memory_subsys, block_id, NULL); 586 return dev ? to_memory_block(dev) : NULL; 587 } 588 589 /* 590 * For now, we have a linear search to go find the appropriate 591 * memory_block corresponding to a particular phys_index. If 592 * this gets to be a real problem, we can always use a radix 593 * tree or something here. 594 * 595 * This could be made generic for all device subsystems. 596 */ 597 struct memory_block *find_memory_block(struct mem_section *section) 598 { 599 unsigned long block_id = base_memory_block_id(__section_nr(section)); 600 601 return find_memory_block_by_id(block_id); 602 } 603 604 static struct attribute *memory_memblk_attrs[] = { 605 &dev_attr_phys_index.attr, 606 &dev_attr_state.attr, 607 &dev_attr_phys_device.attr, 608 &dev_attr_removable.attr, 609 #ifdef CONFIG_MEMORY_HOTREMOVE 610 &dev_attr_valid_zones.attr, 611 #endif 612 NULL 613 }; 614 615 static struct attribute_group memory_memblk_attr_group = { 616 .attrs = memory_memblk_attrs, 617 }; 618 619 static const struct attribute_group *memory_memblk_attr_groups[] = { 620 &memory_memblk_attr_group, 621 NULL, 622 }; 623 624 /* 625 * register_memory - Setup a sysfs device for a memory block 626 */ 627 static 628 int register_memory(struct memory_block *memory) 629 { 630 int ret; 631 632 memory->dev.bus = &memory_subsys; 633 memory->dev.id = memory->start_section_nr / sections_per_block; 634 memory->dev.release = memory_block_release; 635 memory->dev.groups = memory_memblk_attr_groups; 636 memory->dev.offline = memory->state == MEM_OFFLINE; 637 638 ret = device_register(&memory->dev); 639 if (ret) 640 put_device(&memory->dev); 641 642 return ret; 643 } 644 645 static int init_memory_block(struct memory_block **memory, 646 unsigned long block_id, unsigned long state) 647 { 648 struct memory_block *mem; 649 unsigned long start_pfn; 650 int ret = 0; 651 652 mem = find_memory_block_by_id(block_id); 653 if (mem) { 654 put_device(&mem->dev); 655 return -EEXIST; 656 } 657 mem = kzalloc(sizeof(*mem), GFP_KERNEL); 658 if (!mem) 659 return -ENOMEM; 660 661 mem->start_section_nr = block_id * sections_per_block; 662 mem->state = state; 663 start_pfn = section_nr_to_pfn(mem->start_section_nr); 664 mem->phys_device = arch_get_memory_phys_device(start_pfn); 665 mem->nid = NUMA_NO_NODE; 666 667 ret = register_memory(mem); 668 669 *memory = mem; 670 return ret; 671 } 672 673 static int add_memory_block(unsigned long base_section_nr) 674 { 675 int ret, section_count = 0; 676 struct memory_block *mem; 677 unsigned long nr; 678 679 for (nr = base_section_nr; nr < base_section_nr + sections_per_block; 680 nr++) 681 if (present_section_nr(nr)) 682 section_count++; 683 684 if (section_count == 0) 685 return 0; 686 ret = init_memory_block(&mem, base_memory_block_id(base_section_nr), 687 MEM_ONLINE); 688 if (ret) 689 return ret; 690 mem->section_count = section_count; 691 return 0; 692 } 693 694 static void unregister_memory(struct memory_block *memory) 695 { 696 if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) 697 return; 698 699 /* drop the ref. we got via find_memory_block() */ 700 put_device(&memory->dev); 701 device_unregister(&memory->dev); 702 } 703 704 /* 705 * Create memory block devices for the given memory area. Start and size 706 * have to be aligned to memory block granularity. Memory block devices 707 * will be initialized as offline. 708 */ 709 int create_memory_block_devices(unsigned long start, unsigned long size) 710 { 711 const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); 712 unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); 713 struct memory_block *mem; 714 unsigned long block_id; 715 int ret = 0; 716 717 if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || 718 !IS_ALIGNED(size, memory_block_size_bytes()))) 719 return -EINVAL; 720 721 mutex_lock(&mem_sysfs_mutex); 722 for (block_id = start_block_id; block_id != end_block_id; block_id++) { 723 ret = init_memory_block(&mem, block_id, MEM_OFFLINE); 724 if (ret) 725 break; 726 mem->section_count = sections_per_block; 727 } 728 if (ret) { 729 end_block_id = block_id; 730 for (block_id = start_block_id; block_id != end_block_id; 731 block_id++) { 732 mem = find_memory_block_by_id(block_id); 733 mem->section_count = 0; 734 unregister_memory(mem); 735 } 736 } 737 mutex_unlock(&mem_sysfs_mutex); 738 return ret; 739 } 740 741 /* 742 * Remove memory block devices for the given memory area. Start and size 743 * have to be aligned to memory block granularity. Memory block devices 744 * have to be offline. 745 */ 746 void remove_memory_block_devices(unsigned long start, unsigned long size) 747 { 748 const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); 749 const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); 750 struct memory_block *mem; 751 unsigned long block_id; 752 753 if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || 754 !IS_ALIGNED(size, memory_block_size_bytes()))) 755 return; 756 757 mutex_lock(&mem_sysfs_mutex); 758 for (block_id = start_block_id; block_id != end_block_id; block_id++) { 759 mem = find_memory_block_by_id(block_id); 760 if (WARN_ON_ONCE(!mem)) 761 continue; 762 mem->section_count = 0; 763 unregister_memory_block_under_nodes(mem); 764 unregister_memory(mem); 765 } 766 mutex_unlock(&mem_sysfs_mutex); 767 } 768 769 /* return true if the memory block is offlined, otherwise, return false */ 770 bool is_memblock_offlined(struct memory_block *mem) 771 { 772 return mem->state == MEM_OFFLINE; 773 } 774 775 static struct attribute *memory_root_attrs[] = { 776 #ifdef CONFIG_ARCH_MEMORY_PROBE 777 &dev_attr_probe.attr, 778 #endif 779 780 #ifdef CONFIG_MEMORY_FAILURE 781 &dev_attr_soft_offline_page.attr, 782 &dev_attr_hard_offline_page.attr, 783 #endif 784 785 &dev_attr_block_size_bytes.attr, 786 &dev_attr_auto_online_blocks.attr, 787 NULL 788 }; 789 790 static struct attribute_group memory_root_attr_group = { 791 .attrs = memory_root_attrs, 792 }; 793 794 static const struct attribute_group *memory_root_attr_groups[] = { 795 &memory_root_attr_group, 796 NULL, 797 }; 798 799 /* 800 * Initialize the sysfs support for memory devices... 801 */ 802 void __init memory_dev_init(void) 803 { 804 int ret; 805 int err; 806 unsigned long block_sz, nr; 807 808 /* Validate the configured memory block size */ 809 block_sz = memory_block_size_bytes(); 810 if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE) 811 panic("Memory block size not suitable: 0x%lx\n", block_sz); 812 sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; 813 814 ret = subsys_system_register(&memory_subsys, memory_root_attr_groups); 815 if (ret) 816 goto out; 817 818 /* 819 * Create entries for memory sections that were found 820 * during boot and have been initialized 821 */ 822 mutex_lock(&mem_sysfs_mutex); 823 for (nr = 0; nr <= __highest_present_section_nr; 824 nr += sections_per_block) { 825 err = add_memory_block(nr); 826 if (!ret) 827 ret = err; 828 } 829 mutex_unlock(&mem_sysfs_mutex); 830 831 out: 832 if (ret) 833 panic("%s() failed: %d\n", __func__, ret); 834 } 835 836 /** 837 * walk_memory_blocks - walk through all present memory blocks overlapped 838 * by the range [start, start + size) 839 * 840 * @start: start address of the memory range 841 * @size: size of the memory range 842 * @arg: argument passed to func 843 * @func: callback for each memory section walked 844 * 845 * This function walks through all present memory blocks overlapped by the 846 * range [start, start + size), calling func on each memory block. 847 * 848 * In case func() returns an error, walking is aborted and the error is 849 * returned. 850 */ 851 int walk_memory_blocks(unsigned long start, unsigned long size, 852 void *arg, walk_memory_blocks_func_t func) 853 { 854 const unsigned long start_block_id = phys_to_block_id(start); 855 const unsigned long end_block_id = phys_to_block_id(start + size - 1); 856 struct memory_block *mem; 857 unsigned long block_id; 858 int ret = 0; 859 860 if (!size) 861 return 0; 862 863 for (block_id = start_block_id; block_id <= end_block_id; block_id++) { 864 mem = find_memory_block_by_id(block_id); 865 if (!mem) 866 continue; 867 868 ret = func(mem, arg); 869 put_device(&mem->dev); 870 if (ret) 871 break; 872 } 873 return ret; 874 } 875