1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Memory subsystem support 4 * 5 * Written by Matt Tolentino <matthew.e.tolentino@intel.com> 6 * Dave Hansen <haveblue@us.ibm.com> 7 * 8 * This file provides the necessary infrastructure to represent 9 * a SPARSEMEM-memory-model system's physical memory in /sysfs. 10 * All arch-independent code that assumes MEMORY_HOTPLUG requires 11 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c. 12 */ 13 14 #include <linux/module.h> 15 #include <linux/init.h> 16 #include <linux/topology.h> 17 #include <linux/capability.h> 18 #include <linux/device.h> 19 #include <linux/memory.h> 20 #include <linux/memory_hotplug.h> 21 #include <linux/mm.h> 22 #include <linux/mutex.h> 23 #include <linux/stat.h> 24 #include <linux/slab.h> 25 26 #include <linux/atomic.h> 27 #include <linux/uaccess.h> 28 29 static DEFINE_MUTEX(mem_sysfs_mutex); 30 31 #define MEMORY_CLASS_NAME "memory" 32 33 #define to_memory_block(dev) container_of(dev, struct memory_block, dev) 34 35 static int sections_per_block; 36 37 static inline unsigned long base_memory_block_id(unsigned long section_nr) 38 { 39 return section_nr / sections_per_block; 40 } 41 42 static inline unsigned long pfn_to_block_id(unsigned long pfn) 43 { 44 return base_memory_block_id(pfn_to_section_nr(pfn)); 45 } 46 47 static inline unsigned long phys_to_block_id(unsigned long phys) 48 { 49 return pfn_to_block_id(PFN_DOWN(phys)); 50 } 51 52 static int memory_subsys_online(struct device *dev); 53 static int memory_subsys_offline(struct device *dev); 54 55 static struct bus_type memory_subsys = { 56 .name = MEMORY_CLASS_NAME, 57 .dev_name = MEMORY_CLASS_NAME, 58 .online = memory_subsys_online, 59 .offline = memory_subsys_offline, 60 }; 61 62 static BLOCKING_NOTIFIER_HEAD(memory_chain); 63 64 int register_memory_notifier(struct notifier_block *nb) 65 { 66 return blocking_notifier_chain_register(&memory_chain, nb); 67 } 68 EXPORT_SYMBOL(register_memory_notifier); 69 70 void unregister_memory_notifier(struct notifier_block *nb) 71 { 72 blocking_notifier_chain_unregister(&memory_chain, nb); 73 } 74 EXPORT_SYMBOL(unregister_memory_notifier); 75 76 static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain); 77 78 int register_memory_isolate_notifier(struct notifier_block *nb) 79 { 80 return atomic_notifier_chain_register(&memory_isolate_chain, nb); 81 } 82 EXPORT_SYMBOL(register_memory_isolate_notifier); 83 84 void unregister_memory_isolate_notifier(struct notifier_block *nb) 85 { 86 atomic_notifier_chain_unregister(&memory_isolate_chain, nb); 87 } 88 EXPORT_SYMBOL(unregister_memory_isolate_notifier); 89 90 static void memory_block_release(struct device *dev) 91 { 92 struct memory_block *mem = to_memory_block(dev); 93 94 kfree(mem); 95 } 96 97 unsigned long __weak memory_block_size_bytes(void) 98 { 99 return MIN_MEMORY_BLOCK_SIZE; 100 } 101 EXPORT_SYMBOL_GPL(memory_block_size_bytes); 102 103 static unsigned long get_memory_block_size(void) 104 { 105 unsigned long block_sz; 106 107 block_sz = memory_block_size_bytes(); 108 109 /* Validate blk_sz is a power of 2 and not less than section size */ 110 if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) { 111 WARN_ON(1); 112 block_sz = MIN_MEMORY_BLOCK_SIZE; 113 } 114 115 return block_sz; 116 } 117 118 /* 119 * use this as the physical section index that this memsection 120 * uses. 121 */ 122 123 static ssize_t phys_index_show(struct device *dev, 124 struct device_attribute *attr, char *buf) 125 { 126 struct memory_block *mem = to_memory_block(dev); 127 unsigned long phys_index; 128 129 phys_index = mem->start_section_nr / sections_per_block; 130 return sprintf(buf, "%08lx\n", phys_index); 131 } 132 133 /* 134 * Show whether the section of memory is likely to be hot-removable 135 */ 136 static ssize_t removable_show(struct device *dev, struct device_attribute *attr, 137 char *buf) 138 { 139 struct memory_block *mem = to_memory_block(dev); 140 unsigned long pfn; 141 int ret = 1, i; 142 143 if (mem->state != MEM_ONLINE) 144 goto out; 145 146 for (i = 0; i < sections_per_block; i++) { 147 if (!present_section_nr(mem->start_section_nr + i)) 148 continue; 149 pfn = section_nr_to_pfn(mem->start_section_nr + i); 150 ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); 151 } 152 153 out: 154 return sprintf(buf, "%d\n", ret); 155 } 156 157 /* 158 * online, offline, going offline, etc. 159 */ 160 static ssize_t state_show(struct device *dev, struct device_attribute *attr, 161 char *buf) 162 { 163 struct memory_block *mem = to_memory_block(dev); 164 ssize_t len = 0; 165 166 /* 167 * We can probably put these states in a nice little array 168 * so that they're not open-coded 169 */ 170 switch (mem->state) { 171 case MEM_ONLINE: 172 len = sprintf(buf, "online\n"); 173 break; 174 case MEM_OFFLINE: 175 len = sprintf(buf, "offline\n"); 176 break; 177 case MEM_GOING_OFFLINE: 178 len = sprintf(buf, "going-offline\n"); 179 break; 180 default: 181 len = sprintf(buf, "ERROR-UNKNOWN-%ld\n", 182 mem->state); 183 WARN_ON(1); 184 break; 185 } 186 187 return len; 188 } 189 190 int memory_notify(unsigned long val, void *v) 191 { 192 return blocking_notifier_call_chain(&memory_chain, val, v); 193 } 194 195 int memory_isolate_notify(unsigned long val, void *v) 196 { 197 return atomic_notifier_call_chain(&memory_isolate_chain, val, v); 198 } 199 200 /* 201 * The probe routines leave the pages uninitialized, just as the bootmem code 202 * does. Make sure we do not access them, but instead use only information from 203 * within sections. 204 */ 205 static bool pages_correctly_probed(unsigned long start_pfn) 206 { 207 unsigned long section_nr = pfn_to_section_nr(start_pfn); 208 unsigned long section_nr_end = section_nr + sections_per_block; 209 unsigned long pfn = start_pfn; 210 211 /* 212 * memmap between sections is not contiguous except with 213 * SPARSEMEM_VMEMMAP. We lookup the page once per section 214 * and assume memmap is contiguous within each section 215 */ 216 for (; section_nr < section_nr_end; section_nr++) { 217 if (WARN_ON_ONCE(!pfn_valid(pfn))) 218 return false; 219 220 if (!present_section_nr(section_nr)) { 221 pr_warn("section %ld pfn[%lx, %lx) not present\n", 222 section_nr, pfn, pfn + PAGES_PER_SECTION); 223 return false; 224 } else if (!valid_section_nr(section_nr)) { 225 pr_warn("section %ld pfn[%lx, %lx) no valid memmap\n", 226 section_nr, pfn, pfn + PAGES_PER_SECTION); 227 return false; 228 } else if (online_section_nr(section_nr)) { 229 pr_warn("section %ld pfn[%lx, %lx) is already online\n", 230 section_nr, pfn, pfn + PAGES_PER_SECTION); 231 return false; 232 } 233 pfn += PAGES_PER_SECTION; 234 } 235 236 return true; 237 } 238 239 /* 240 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is 241 * OK to have direct references to sparsemem variables in here. 242 */ 243 static int 244 memory_block_action(unsigned long start_section_nr, unsigned long action, 245 int online_type) 246 { 247 unsigned long start_pfn; 248 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 249 int ret; 250 251 start_pfn = section_nr_to_pfn(start_section_nr); 252 253 switch (action) { 254 case MEM_ONLINE: 255 if (!pages_correctly_probed(start_pfn)) 256 return -EBUSY; 257 258 ret = online_pages(start_pfn, nr_pages, online_type); 259 break; 260 case MEM_OFFLINE: 261 ret = offline_pages(start_pfn, nr_pages); 262 break; 263 default: 264 WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: " 265 "%ld\n", __func__, start_section_nr, action, action); 266 ret = -EINVAL; 267 } 268 269 return ret; 270 } 271 272 static int memory_block_change_state(struct memory_block *mem, 273 unsigned long to_state, unsigned long from_state_req) 274 { 275 int ret = 0; 276 277 if (mem->state != from_state_req) 278 return -EINVAL; 279 280 if (to_state == MEM_OFFLINE) 281 mem->state = MEM_GOING_OFFLINE; 282 283 ret = memory_block_action(mem->start_section_nr, to_state, 284 mem->online_type); 285 286 mem->state = ret ? from_state_req : to_state; 287 288 return ret; 289 } 290 291 /* The device lock serializes operations on memory_subsys_[online|offline] */ 292 static int memory_subsys_online(struct device *dev) 293 { 294 struct memory_block *mem = to_memory_block(dev); 295 int ret; 296 297 if (mem->state == MEM_ONLINE) 298 return 0; 299 300 /* 301 * If we are called from state_store(), online_type will be 302 * set >= 0 Otherwise we were called from the device online 303 * attribute and need to set the online_type. 304 */ 305 if (mem->online_type < 0) 306 mem->online_type = MMOP_ONLINE_KEEP; 307 308 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 309 310 /* clear online_type */ 311 mem->online_type = -1; 312 313 return ret; 314 } 315 316 static int memory_subsys_offline(struct device *dev) 317 { 318 struct memory_block *mem = to_memory_block(dev); 319 320 if (mem->state == MEM_OFFLINE) 321 return 0; 322 323 /* Can't offline block with non-present sections */ 324 if (mem->section_count != sections_per_block) 325 return -EINVAL; 326 327 return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); 328 } 329 330 static ssize_t state_store(struct device *dev, struct device_attribute *attr, 331 const char *buf, size_t count) 332 { 333 struct memory_block *mem = to_memory_block(dev); 334 int ret, online_type; 335 336 ret = lock_device_hotplug_sysfs(); 337 if (ret) 338 return ret; 339 340 if (sysfs_streq(buf, "online_kernel")) 341 online_type = MMOP_ONLINE_KERNEL; 342 else if (sysfs_streq(buf, "online_movable")) 343 online_type = MMOP_ONLINE_MOVABLE; 344 else if (sysfs_streq(buf, "online")) 345 online_type = MMOP_ONLINE_KEEP; 346 else if (sysfs_streq(buf, "offline")) 347 online_type = MMOP_OFFLINE; 348 else { 349 ret = -EINVAL; 350 goto err; 351 } 352 353 switch (online_type) { 354 case MMOP_ONLINE_KERNEL: 355 case MMOP_ONLINE_MOVABLE: 356 case MMOP_ONLINE_KEEP: 357 /* mem->online_type is protected by device_hotplug_lock */ 358 mem->online_type = online_type; 359 ret = device_online(&mem->dev); 360 break; 361 case MMOP_OFFLINE: 362 ret = device_offline(&mem->dev); 363 break; 364 default: 365 ret = -EINVAL; /* should never happen */ 366 } 367 368 err: 369 unlock_device_hotplug(); 370 371 if (ret < 0) 372 return ret; 373 if (ret) 374 return -EINVAL; 375 376 return count; 377 } 378 379 /* 380 * phys_device is a bad name for this. What I really want 381 * is a way to differentiate between memory ranges that 382 * are part of physical devices that constitute 383 * a complete removable unit or fru. 384 * i.e. do these ranges belong to the same physical device, 385 * s.t. if I offline all of these sections I can then 386 * remove the physical device? 387 */ 388 static ssize_t phys_device_show(struct device *dev, 389 struct device_attribute *attr, char *buf) 390 { 391 struct memory_block *mem = to_memory_block(dev); 392 return sprintf(buf, "%d\n", mem->phys_device); 393 } 394 395 #ifdef CONFIG_MEMORY_HOTREMOVE 396 static void print_allowed_zone(char *buf, int nid, unsigned long start_pfn, 397 unsigned long nr_pages, int online_type, 398 struct zone *default_zone) 399 { 400 struct zone *zone; 401 402 zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); 403 if (zone != default_zone) { 404 strcat(buf, " "); 405 strcat(buf, zone->name); 406 } 407 } 408 409 static ssize_t valid_zones_show(struct device *dev, 410 struct device_attribute *attr, char *buf) 411 { 412 struct memory_block *mem = to_memory_block(dev); 413 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 414 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 415 unsigned long valid_start_pfn, valid_end_pfn; 416 struct zone *default_zone; 417 int nid; 418 419 /* 420 * Check the existing zone. Make sure that we do that only on the 421 * online nodes otherwise the page_zone is not reliable 422 */ 423 if (mem->state == MEM_ONLINE) { 424 /* 425 * The block contains more than one zone can not be offlined. 426 * This can happen e.g. for ZONE_DMA and ZONE_DMA32 427 */ 428 if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages, 429 &valid_start_pfn, &valid_end_pfn)) 430 return sprintf(buf, "none\n"); 431 start_pfn = valid_start_pfn; 432 strcat(buf, page_zone(pfn_to_page(start_pfn))->name); 433 goto out; 434 } 435 436 nid = mem->nid; 437 default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, nr_pages); 438 strcat(buf, default_zone->name); 439 440 print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL, 441 default_zone); 442 print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE, 443 default_zone); 444 out: 445 strcat(buf, "\n"); 446 447 return strlen(buf); 448 } 449 static DEVICE_ATTR_RO(valid_zones); 450 #endif 451 452 static DEVICE_ATTR_RO(phys_index); 453 static DEVICE_ATTR_RW(state); 454 static DEVICE_ATTR_RO(phys_device); 455 static DEVICE_ATTR_RO(removable); 456 457 /* 458 * Block size attribute stuff 459 */ 460 static ssize_t block_size_bytes_show(struct device *dev, 461 struct device_attribute *attr, char *buf) 462 { 463 return sprintf(buf, "%lx\n", get_memory_block_size()); 464 } 465 466 static DEVICE_ATTR_RO(block_size_bytes); 467 468 /* 469 * Memory auto online policy. 470 */ 471 472 static ssize_t auto_online_blocks_show(struct device *dev, 473 struct device_attribute *attr, char *buf) 474 { 475 if (memhp_auto_online) 476 return sprintf(buf, "online\n"); 477 else 478 return sprintf(buf, "offline\n"); 479 } 480 481 static ssize_t auto_online_blocks_store(struct device *dev, 482 struct device_attribute *attr, 483 const char *buf, size_t count) 484 { 485 if (sysfs_streq(buf, "online")) 486 memhp_auto_online = true; 487 else if (sysfs_streq(buf, "offline")) 488 memhp_auto_online = false; 489 else 490 return -EINVAL; 491 492 return count; 493 } 494 495 static DEVICE_ATTR_RW(auto_online_blocks); 496 497 /* 498 * Some architectures will have custom drivers to do this, and 499 * will not need to do it from userspace. The fake hot-add code 500 * as well as ppc64 will do all of their discovery in userspace 501 * and will require this interface. 502 */ 503 #ifdef CONFIG_ARCH_MEMORY_PROBE 504 static ssize_t probe_store(struct device *dev, struct device_attribute *attr, 505 const char *buf, size_t count) 506 { 507 u64 phys_addr; 508 int nid, ret; 509 unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block; 510 511 ret = kstrtoull(buf, 0, &phys_addr); 512 if (ret) 513 return ret; 514 515 if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) 516 return -EINVAL; 517 518 ret = lock_device_hotplug_sysfs(); 519 if (ret) 520 return ret; 521 522 nid = memory_add_physaddr_to_nid(phys_addr); 523 ret = __add_memory(nid, phys_addr, 524 MIN_MEMORY_BLOCK_SIZE * sections_per_block); 525 526 if (ret) 527 goto out; 528 529 ret = count; 530 out: 531 unlock_device_hotplug(); 532 return ret; 533 } 534 535 static DEVICE_ATTR_WO(probe); 536 #endif 537 538 #ifdef CONFIG_MEMORY_FAILURE 539 /* 540 * Support for offlining pages of memory 541 */ 542 543 /* Soft offline a page */ 544 static ssize_t soft_offline_page_store(struct device *dev, 545 struct device_attribute *attr, 546 const char *buf, size_t count) 547 { 548 int ret; 549 u64 pfn; 550 if (!capable(CAP_SYS_ADMIN)) 551 return -EPERM; 552 if (kstrtoull(buf, 0, &pfn) < 0) 553 return -EINVAL; 554 pfn >>= PAGE_SHIFT; 555 if (!pfn_valid(pfn)) 556 return -ENXIO; 557 ret = soft_offline_page(pfn_to_page(pfn), 0); 558 return ret == 0 ? count : ret; 559 } 560 561 /* Forcibly offline a page, including killing processes. */ 562 static ssize_t hard_offline_page_store(struct device *dev, 563 struct device_attribute *attr, 564 const char *buf, size_t count) 565 { 566 int ret; 567 u64 pfn; 568 if (!capable(CAP_SYS_ADMIN)) 569 return -EPERM; 570 if (kstrtoull(buf, 0, &pfn) < 0) 571 return -EINVAL; 572 pfn >>= PAGE_SHIFT; 573 ret = memory_failure(pfn, 0); 574 return ret ? ret : count; 575 } 576 577 static DEVICE_ATTR_WO(soft_offline_page); 578 static DEVICE_ATTR_WO(hard_offline_page); 579 #endif 580 581 /* 582 * Note that phys_device is optional. It is here to allow for 583 * differentiation between which *physical* devices each 584 * section belongs to... 585 */ 586 int __weak arch_get_memory_phys_device(unsigned long start_pfn) 587 { 588 return 0; 589 } 590 591 /* A reference for the returned memory block device is acquired. */ 592 static struct memory_block *find_memory_block_by_id(unsigned long block_id) 593 { 594 struct device *dev; 595 596 dev = subsys_find_device_by_id(&memory_subsys, block_id, NULL); 597 return dev ? to_memory_block(dev) : NULL; 598 } 599 600 /* 601 * For now, we have a linear search to go find the appropriate 602 * memory_block corresponding to a particular phys_index. If 603 * this gets to be a real problem, we can always use a radix 604 * tree or something here. 605 * 606 * This could be made generic for all device subsystems. 607 */ 608 struct memory_block *find_memory_block(struct mem_section *section) 609 { 610 unsigned long block_id = base_memory_block_id(__section_nr(section)); 611 612 return find_memory_block_by_id(block_id); 613 } 614 615 static struct attribute *memory_memblk_attrs[] = { 616 &dev_attr_phys_index.attr, 617 &dev_attr_state.attr, 618 &dev_attr_phys_device.attr, 619 &dev_attr_removable.attr, 620 #ifdef CONFIG_MEMORY_HOTREMOVE 621 &dev_attr_valid_zones.attr, 622 #endif 623 NULL 624 }; 625 626 static struct attribute_group memory_memblk_attr_group = { 627 .attrs = memory_memblk_attrs, 628 }; 629 630 static const struct attribute_group *memory_memblk_attr_groups[] = { 631 &memory_memblk_attr_group, 632 NULL, 633 }; 634 635 /* 636 * register_memory - Setup a sysfs device for a memory block 637 */ 638 static 639 int register_memory(struct memory_block *memory) 640 { 641 int ret; 642 643 memory->dev.bus = &memory_subsys; 644 memory->dev.id = memory->start_section_nr / sections_per_block; 645 memory->dev.release = memory_block_release; 646 memory->dev.groups = memory_memblk_attr_groups; 647 memory->dev.offline = memory->state == MEM_OFFLINE; 648 649 ret = device_register(&memory->dev); 650 if (ret) 651 put_device(&memory->dev); 652 653 return ret; 654 } 655 656 static int init_memory_block(struct memory_block **memory, 657 unsigned long block_id, unsigned long state) 658 { 659 struct memory_block *mem; 660 unsigned long start_pfn; 661 int ret = 0; 662 663 mem = find_memory_block_by_id(block_id); 664 if (mem) { 665 put_device(&mem->dev); 666 return -EEXIST; 667 } 668 mem = kzalloc(sizeof(*mem), GFP_KERNEL); 669 if (!mem) 670 return -ENOMEM; 671 672 mem->start_section_nr = block_id * sections_per_block; 673 mem->end_section_nr = mem->start_section_nr + sections_per_block - 1; 674 mem->state = state; 675 start_pfn = section_nr_to_pfn(mem->start_section_nr); 676 mem->phys_device = arch_get_memory_phys_device(start_pfn); 677 678 ret = register_memory(mem); 679 680 *memory = mem; 681 return ret; 682 } 683 684 static int add_memory_block(unsigned long base_section_nr) 685 { 686 int ret, section_count = 0; 687 struct memory_block *mem; 688 unsigned long nr; 689 690 for (nr = base_section_nr; nr < base_section_nr + sections_per_block; 691 nr++) 692 if (present_section_nr(nr)) 693 section_count++; 694 695 if (section_count == 0) 696 return 0; 697 ret = init_memory_block(&mem, base_memory_block_id(base_section_nr), 698 MEM_ONLINE); 699 if (ret) 700 return ret; 701 mem->section_count = section_count; 702 return 0; 703 } 704 705 static void unregister_memory(struct memory_block *memory) 706 { 707 if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) 708 return; 709 710 /* drop the ref. we got via find_memory_block() */ 711 put_device(&memory->dev); 712 device_unregister(&memory->dev); 713 } 714 715 /* 716 * Create memory block devices for the given memory area. Start and size 717 * have to be aligned to memory block granularity. Memory block devices 718 * will be initialized as offline. 719 */ 720 int create_memory_block_devices(unsigned long start, unsigned long size) 721 { 722 const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); 723 unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); 724 struct memory_block *mem; 725 unsigned long block_id; 726 int ret = 0; 727 728 if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || 729 !IS_ALIGNED(size, memory_block_size_bytes()))) 730 return -EINVAL; 731 732 mutex_lock(&mem_sysfs_mutex); 733 for (block_id = start_block_id; block_id != end_block_id; block_id++) { 734 ret = init_memory_block(&mem, block_id, MEM_OFFLINE); 735 if (ret) 736 break; 737 mem->section_count = sections_per_block; 738 } 739 if (ret) { 740 end_block_id = block_id; 741 for (block_id = start_block_id; block_id != end_block_id; 742 block_id++) { 743 mem = find_memory_block_by_id(block_id); 744 mem->section_count = 0; 745 unregister_memory(mem); 746 } 747 } 748 mutex_unlock(&mem_sysfs_mutex); 749 return ret; 750 } 751 752 /* 753 * Remove memory block devices for the given memory area. Start and size 754 * have to be aligned to memory block granularity. Memory block devices 755 * have to be offline. 756 */ 757 void remove_memory_block_devices(unsigned long start, unsigned long size) 758 { 759 const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); 760 const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); 761 struct memory_block *mem; 762 unsigned long block_id; 763 764 if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || 765 !IS_ALIGNED(size, memory_block_size_bytes()))) 766 return; 767 768 mutex_lock(&mem_sysfs_mutex); 769 for (block_id = start_block_id; block_id != end_block_id; block_id++) { 770 mem = find_memory_block_by_id(block_id); 771 if (WARN_ON_ONCE(!mem)) 772 continue; 773 mem->section_count = 0; 774 unregister_memory_block_under_nodes(mem); 775 unregister_memory(mem); 776 } 777 mutex_unlock(&mem_sysfs_mutex); 778 } 779 780 /* return true if the memory block is offlined, otherwise, return false */ 781 bool is_memblock_offlined(struct memory_block *mem) 782 { 783 return mem->state == MEM_OFFLINE; 784 } 785 786 static struct attribute *memory_root_attrs[] = { 787 #ifdef CONFIG_ARCH_MEMORY_PROBE 788 &dev_attr_probe.attr, 789 #endif 790 791 #ifdef CONFIG_MEMORY_FAILURE 792 &dev_attr_soft_offline_page.attr, 793 &dev_attr_hard_offline_page.attr, 794 #endif 795 796 &dev_attr_block_size_bytes.attr, 797 &dev_attr_auto_online_blocks.attr, 798 NULL 799 }; 800 801 static struct attribute_group memory_root_attr_group = { 802 .attrs = memory_root_attrs, 803 }; 804 805 static const struct attribute_group *memory_root_attr_groups[] = { 806 &memory_root_attr_group, 807 NULL, 808 }; 809 810 /* 811 * Initialize the sysfs support for memory devices... 812 */ 813 int __init memory_dev_init(void) 814 { 815 int ret; 816 int err; 817 unsigned long block_sz, nr; 818 819 ret = subsys_system_register(&memory_subsys, memory_root_attr_groups); 820 if (ret) 821 goto out; 822 823 block_sz = get_memory_block_size(); 824 sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; 825 826 /* 827 * Create entries for memory sections that were found 828 * during boot and have been initialized 829 */ 830 mutex_lock(&mem_sysfs_mutex); 831 for (nr = 0; nr <= __highest_present_section_nr; 832 nr += sections_per_block) { 833 err = add_memory_block(nr); 834 if (!ret) 835 ret = err; 836 } 837 mutex_unlock(&mem_sysfs_mutex); 838 839 out: 840 if (ret) 841 printk(KERN_ERR "%s() failed: %d\n", __func__, ret); 842 return ret; 843 } 844 845 /** 846 * walk_memory_blocks - walk through all present memory blocks overlapped 847 * by the range [start, start + size) 848 * 849 * @start: start address of the memory range 850 * @size: size of the memory range 851 * @arg: argument passed to func 852 * @func: callback for each memory section walked 853 * 854 * This function walks through all present memory blocks overlapped by the 855 * range [start, start + size), calling func on each memory block. 856 * 857 * In case func() returns an error, walking is aborted and the error is 858 * returned. 859 */ 860 int walk_memory_blocks(unsigned long start, unsigned long size, 861 void *arg, walk_memory_blocks_func_t func) 862 { 863 const unsigned long start_block_id = phys_to_block_id(start); 864 const unsigned long end_block_id = phys_to_block_id(start + size - 1); 865 struct memory_block *mem; 866 unsigned long block_id; 867 int ret = 0; 868 869 if (!size) 870 return 0; 871 872 for (block_id = start_block_id; block_id <= end_block_id; block_id++) { 873 mem = find_memory_block_by_id(block_id); 874 if (!mem) 875 continue; 876 877 ret = func(mem, arg); 878 put_device(&mem->dev); 879 if (ret) 880 break; 881 } 882 return ret; 883 } 884