xref: /openbmc/linux/drivers/base/memory.c (revision 6c33a6f4)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Memory subsystem support
4  *
5  * Written by Matt Tolentino <matthew.e.tolentino@intel.com>
6  *            Dave Hansen <haveblue@us.ibm.com>
7  *
8  * This file provides the necessary infrastructure to represent
9  * a SPARSEMEM-memory-model system's physical memory in /sysfs.
10  * All arch-independent code that assumes MEMORY_HOTPLUG requires
11  * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.
12  */
13 
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/topology.h>
17 #include <linux/capability.h>
18 #include <linux/device.h>
19 #include <linux/memory.h>
20 #include <linux/memory_hotplug.h>
21 #include <linux/mm.h>
22 #include <linux/stat.h>
23 #include <linux/slab.h>
24 
25 #include <linux/atomic.h>
26 #include <linux/uaccess.h>
27 
28 #define MEMORY_CLASS_NAME	"memory"
29 
30 #define to_memory_block(dev) container_of(dev, struct memory_block, dev)
31 
32 static int sections_per_block;
33 
34 static inline unsigned long base_memory_block_id(unsigned long section_nr)
35 {
36 	return section_nr / sections_per_block;
37 }
38 
39 static inline unsigned long pfn_to_block_id(unsigned long pfn)
40 {
41 	return base_memory_block_id(pfn_to_section_nr(pfn));
42 }
43 
44 static inline unsigned long phys_to_block_id(unsigned long phys)
45 {
46 	return pfn_to_block_id(PFN_DOWN(phys));
47 }
48 
49 static int memory_subsys_online(struct device *dev);
50 static int memory_subsys_offline(struct device *dev);
51 
52 static struct bus_type memory_subsys = {
53 	.name = MEMORY_CLASS_NAME,
54 	.dev_name = MEMORY_CLASS_NAME,
55 	.online = memory_subsys_online,
56 	.offline = memory_subsys_offline,
57 };
58 
59 static BLOCKING_NOTIFIER_HEAD(memory_chain);
60 
61 int register_memory_notifier(struct notifier_block *nb)
62 {
63 	return blocking_notifier_chain_register(&memory_chain, nb);
64 }
65 EXPORT_SYMBOL(register_memory_notifier);
66 
67 void unregister_memory_notifier(struct notifier_block *nb)
68 {
69 	blocking_notifier_chain_unregister(&memory_chain, nb);
70 }
71 EXPORT_SYMBOL(unregister_memory_notifier);
72 
73 static void memory_block_release(struct device *dev)
74 {
75 	struct memory_block *mem = to_memory_block(dev);
76 
77 	kfree(mem);
78 }
79 
80 unsigned long __weak memory_block_size_bytes(void)
81 {
82 	return MIN_MEMORY_BLOCK_SIZE;
83 }
84 EXPORT_SYMBOL_GPL(memory_block_size_bytes);
85 
86 /*
87  * Show the first physical section index (number) of this memory block.
88  */
89 static ssize_t phys_index_show(struct device *dev,
90 			       struct device_attribute *attr, char *buf)
91 {
92 	struct memory_block *mem = to_memory_block(dev);
93 	unsigned long phys_index;
94 
95 	phys_index = mem->start_section_nr / sections_per_block;
96 	return sprintf(buf, "%08lx\n", phys_index);
97 }
98 
99 /*
100  * Legacy interface that we cannot remove. Always indicate "removable"
101  * with CONFIG_MEMORY_HOTREMOVE - bad heuristic.
102  */
103 static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
104 			      char *buf)
105 {
106 	return sprintf(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE));
107 }
108 
109 /*
110  * online, offline, going offline, etc.
111  */
112 static ssize_t state_show(struct device *dev, struct device_attribute *attr,
113 			  char *buf)
114 {
115 	struct memory_block *mem = to_memory_block(dev);
116 	ssize_t len = 0;
117 
118 	/*
119 	 * We can probably put these states in a nice little array
120 	 * so that they're not open-coded
121 	 */
122 	switch (mem->state) {
123 	case MEM_ONLINE:
124 		len = sprintf(buf, "online\n");
125 		break;
126 	case MEM_OFFLINE:
127 		len = sprintf(buf, "offline\n");
128 		break;
129 	case MEM_GOING_OFFLINE:
130 		len = sprintf(buf, "going-offline\n");
131 		break;
132 	default:
133 		len = sprintf(buf, "ERROR-UNKNOWN-%ld\n",
134 				mem->state);
135 		WARN_ON(1);
136 		break;
137 	}
138 
139 	return len;
140 }
141 
142 int memory_notify(unsigned long val, void *v)
143 {
144 	return blocking_notifier_call_chain(&memory_chain, val, v);
145 }
146 
147 /*
148  * The probe routines leave the pages uninitialized, just as the bootmem code
149  * does. Make sure we do not access them, but instead use only information from
150  * within sections.
151  */
152 static bool pages_correctly_probed(unsigned long start_pfn)
153 {
154 	unsigned long section_nr = pfn_to_section_nr(start_pfn);
155 	unsigned long section_nr_end = section_nr + sections_per_block;
156 	unsigned long pfn = start_pfn;
157 
158 	/*
159 	 * memmap between sections is not contiguous except with
160 	 * SPARSEMEM_VMEMMAP. We lookup the page once per section
161 	 * and assume memmap is contiguous within each section
162 	 */
163 	for (; section_nr < section_nr_end; section_nr++) {
164 		if (WARN_ON_ONCE(!pfn_valid(pfn)))
165 			return false;
166 
167 		if (!present_section_nr(section_nr)) {
168 			pr_warn("section %ld pfn[%lx, %lx) not present\n",
169 				section_nr, pfn, pfn + PAGES_PER_SECTION);
170 			return false;
171 		} else if (!valid_section_nr(section_nr)) {
172 			pr_warn("section %ld pfn[%lx, %lx) no valid memmap\n",
173 				section_nr, pfn, pfn + PAGES_PER_SECTION);
174 			return false;
175 		} else if (online_section_nr(section_nr)) {
176 			pr_warn("section %ld pfn[%lx, %lx) is already online\n",
177 				section_nr, pfn, pfn + PAGES_PER_SECTION);
178 			return false;
179 		}
180 		pfn += PAGES_PER_SECTION;
181 	}
182 
183 	return true;
184 }
185 
186 /*
187  * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
188  * OK to have direct references to sparsemem variables in here.
189  */
190 static int
191 memory_block_action(unsigned long start_section_nr, unsigned long action,
192 		    int online_type, int nid)
193 {
194 	unsigned long start_pfn;
195 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
196 	int ret;
197 
198 	start_pfn = section_nr_to_pfn(start_section_nr);
199 
200 	switch (action) {
201 	case MEM_ONLINE:
202 		if (!pages_correctly_probed(start_pfn))
203 			return -EBUSY;
204 
205 		ret = online_pages(start_pfn, nr_pages, online_type, nid);
206 		break;
207 	case MEM_OFFLINE:
208 		ret = offline_pages(start_pfn, nr_pages);
209 		break;
210 	default:
211 		WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
212 		     "%ld\n", __func__, start_section_nr, action, action);
213 		ret = -EINVAL;
214 	}
215 
216 	return ret;
217 }
218 
219 static int memory_block_change_state(struct memory_block *mem,
220 		unsigned long to_state, unsigned long from_state_req)
221 {
222 	int ret = 0;
223 
224 	if (mem->state != from_state_req)
225 		return -EINVAL;
226 
227 	if (to_state == MEM_OFFLINE)
228 		mem->state = MEM_GOING_OFFLINE;
229 
230 	ret = memory_block_action(mem->start_section_nr, to_state,
231 				  mem->online_type, mem->nid);
232 
233 	mem->state = ret ? from_state_req : to_state;
234 
235 	return ret;
236 }
237 
238 /* The device lock serializes operations on memory_subsys_[online|offline] */
239 static int memory_subsys_online(struct device *dev)
240 {
241 	struct memory_block *mem = to_memory_block(dev);
242 	int ret;
243 
244 	if (mem->state == MEM_ONLINE)
245 		return 0;
246 
247 	/*
248 	 * If we are called from state_store(), online_type will be
249 	 * set >= 0 Otherwise we were called from the device online
250 	 * attribute and need to set the online_type.
251 	 */
252 	if (mem->online_type < 0)
253 		mem->online_type = MMOP_ONLINE_KEEP;
254 
255 	ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
256 
257 	/* clear online_type */
258 	mem->online_type = -1;
259 
260 	return ret;
261 }
262 
263 static int memory_subsys_offline(struct device *dev)
264 {
265 	struct memory_block *mem = to_memory_block(dev);
266 
267 	if (mem->state == MEM_OFFLINE)
268 		return 0;
269 
270 	/* Can't offline block with non-present sections */
271 	if (mem->section_count != sections_per_block)
272 		return -EINVAL;
273 
274 	return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
275 }
276 
277 static ssize_t state_store(struct device *dev, struct device_attribute *attr,
278 			   const char *buf, size_t count)
279 {
280 	struct memory_block *mem = to_memory_block(dev);
281 	int ret, online_type;
282 
283 	ret = lock_device_hotplug_sysfs();
284 	if (ret)
285 		return ret;
286 
287 	if (sysfs_streq(buf, "online_kernel"))
288 		online_type = MMOP_ONLINE_KERNEL;
289 	else if (sysfs_streq(buf, "online_movable"))
290 		online_type = MMOP_ONLINE_MOVABLE;
291 	else if (sysfs_streq(buf, "online"))
292 		online_type = MMOP_ONLINE_KEEP;
293 	else if (sysfs_streq(buf, "offline"))
294 		online_type = MMOP_OFFLINE;
295 	else {
296 		ret = -EINVAL;
297 		goto err;
298 	}
299 
300 	switch (online_type) {
301 	case MMOP_ONLINE_KERNEL:
302 	case MMOP_ONLINE_MOVABLE:
303 	case MMOP_ONLINE_KEEP:
304 		/* mem->online_type is protected by device_hotplug_lock */
305 		mem->online_type = online_type;
306 		ret = device_online(&mem->dev);
307 		break;
308 	case MMOP_OFFLINE:
309 		ret = device_offline(&mem->dev);
310 		break;
311 	default:
312 		ret = -EINVAL; /* should never happen */
313 	}
314 
315 err:
316 	unlock_device_hotplug();
317 
318 	if (ret < 0)
319 		return ret;
320 	if (ret)
321 		return -EINVAL;
322 
323 	return count;
324 }
325 
326 /*
327  * phys_device is a bad name for this.  What I really want
328  * is a way to differentiate between memory ranges that
329  * are part of physical devices that constitute
330  * a complete removable unit or fru.
331  * i.e. do these ranges belong to the same physical device,
332  * s.t. if I offline all of these sections I can then
333  * remove the physical device?
334  */
335 static ssize_t phys_device_show(struct device *dev,
336 				struct device_attribute *attr, char *buf)
337 {
338 	struct memory_block *mem = to_memory_block(dev);
339 	return sprintf(buf, "%d\n", mem->phys_device);
340 }
341 
342 #ifdef CONFIG_MEMORY_HOTREMOVE
343 static void print_allowed_zone(char *buf, int nid, unsigned long start_pfn,
344 		unsigned long nr_pages, int online_type,
345 		struct zone *default_zone)
346 {
347 	struct zone *zone;
348 
349 	zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
350 	if (zone != default_zone) {
351 		strcat(buf, " ");
352 		strcat(buf, zone->name);
353 	}
354 }
355 
356 static ssize_t valid_zones_show(struct device *dev,
357 				struct device_attribute *attr, char *buf)
358 {
359 	struct memory_block *mem = to_memory_block(dev);
360 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
361 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
362 	struct zone *default_zone;
363 	int nid;
364 
365 	/*
366 	 * Check the existing zone. Make sure that we do that only on the
367 	 * online nodes otherwise the page_zone is not reliable
368 	 */
369 	if (mem->state == MEM_ONLINE) {
370 		/*
371 		 * The block contains more than one zone can not be offlined.
372 		 * This can happen e.g. for ZONE_DMA and ZONE_DMA32
373 		 */
374 		default_zone = test_pages_in_a_zone(start_pfn,
375 						    start_pfn + nr_pages);
376 		if (!default_zone)
377 			return sprintf(buf, "none\n");
378 		strcat(buf, default_zone->name);
379 		goto out;
380 	}
381 
382 	nid = mem->nid;
383 	default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, nr_pages);
384 	strcat(buf, default_zone->name);
385 
386 	print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL,
387 			default_zone);
388 	print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE,
389 			default_zone);
390 out:
391 	strcat(buf, "\n");
392 
393 	return strlen(buf);
394 }
395 static DEVICE_ATTR_RO(valid_zones);
396 #endif
397 
398 static DEVICE_ATTR_RO(phys_index);
399 static DEVICE_ATTR_RW(state);
400 static DEVICE_ATTR_RO(phys_device);
401 static DEVICE_ATTR_RO(removable);
402 
403 /*
404  * Show the memory block size (shared by all memory blocks).
405  */
406 static ssize_t block_size_bytes_show(struct device *dev,
407 				     struct device_attribute *attr, char *buf)
408 {
409 	return sprintf(buf, "%lx\n", memory_block_size_bytes());
410 }
411 
412 static DEVICE_ATTR_RO(block_size_bytes);
413 
414 /*
415  * Memory auto online policy.
416  */
417 
418 static ssize_t auto_online_blocks_show(struct device *dev,
419 				       struct device_attribute *attr, char *buf)
420 {
421 	if (memhp_auto_online)
422 		return sprintf(buf, "online\n");
423 	else
424 		return sprintf(buf, "offline\n");
425 }
426 
427 static ssize_t auto_online_blocks_store(struct device *dev,
428 					struct device_attribute *attr,
429 					const char *buf, size_t count)
430 {
431 	if (sysfs_streq(buf, "online"))
432 		memhp_auto_online = true;
433 	else if (sysfs_streq(buf, "offline"))
434 		memhp_auto_online = false;
435 	else
436 		return -EINVAL;
437 
438 	return count;
439 }
440 
441 static DEVICE_ATTR_RW(auto_online_blocks);
442 
443 /*
444  * Some architectures will have custom drivers to do this, and
445  * will not need to do it from userspace.  The fake hot-add code
446  * as well as ppc64 will do all of their discovery in userspace
447  * and will require this interface.
448  */
449 #ifdef CONFIG_ARCH_MEMORY_PROBE
450 static ssize_t probe_store(struct device *dev, struct device_attribute *attr,
451 			   const char *buf, size_t count)
452 {
453 	u64 phys_addr;
454 	int nid, ret;
455 	unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
456 
457 	ret = kstrtoull(buf, 0, &phys_addr);
458 	if (ret)
459 		return ret;
460 
461 	if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
462 		return -EINVAL;
463 
464 	ret = lock_device_hotplug_sysfs();
465 	if (ret)
466 		return ret;
467 
468 	nid = memory_add_physaddr_to_nid(phys_addr);
469 	ret = __add_memory(nid, phys_addr,
470 			   MIN_MEMORY_BLOCK_SIZE * sections_per_block);
471 
472 	if (ret)
473 		goto out;
474 
475 	ret = count;
476 out:
477 	unlock_device_hotplug();
478 	return ret;
479 }
480 
481 static DEVICE_ATTR_WO(probe);
482 #endif
483 
484 #ifdef CONFIG_MEMORY_FAILURE
485 /*
486  * Support for offlining pages of memory
487  */
488 
489 /* Soft offline a page */
490 static ssize_t soft_offline_page_store(struct device *dev,
491 				       struct device_attribute *attr,
492 				       const char *buf, size_t count)
493 {
494 	int ret;
495 	u64 pfn;
496 	if (!capable(CAP_SYS_ADMIN))
497 		return -EPERM;
498 	if (kstrtoull(buf, 0, &pfn) < 0)
499 		return -EINVAL;
500 	pfn >>= PAGE_SHIFT;
501 	ret = soft_offline_page(pfn, 0);
502 	return ret == 0 ? count : ret;
503 }
504 
505 /* Forcibly offline a page, including killing processes. */
506 static ssize_t hard_offline_page_store(struct device *dev,
507 				       struct device_attribute *attr,
508 				       const char *buf, size_t count)
509 {
510 	int ret;
511 	u64 pfn;
512 	if (!capable(CAP_SYS_ADMIN))
513 		return -EPERM;
514 	if (kstrtoull(buf, 0, &pfn) < 0)
515 		return -EINVAL;
516 	pfn >>= PAGE_SHIFT;
517 	ret = memory_failure(pfn, 0);
518 	return ret ? ret : count;
519 }
520 
521 static DEVICE_ATTR_WO(soft_offline_page);
522 static DEVICE_ATTR_WO(hard_offline_page);
523 #endif
524 
525 /*
526  * Note that phys_device is optional.  It is here to allow for
527  * differentiation between which *physical* devices each
528  * section belongs to...
529  */
530 int __weak arch_get_memory_phys_device(unsigned long start_pfn)
531 {
532 	return 0;
533 }
534 
535 /* A reference for the returned memory block device is acquired. */
536 static struct memory_block *find_memory_block_by_id(unsigned long block_id)
537 {
538 	struct device *dev;
539 
540 	dev = subsys_find_device_by_id(&memory_subsys, block_id, NULL);
541 	return dev ? to_memory_block(dev) : NULL;
542 }
543 
544 /*
545  * For now, we have a linear search to go find the appropriate
546  * memory_block corresponding to a particular phys_index. If
547  * this gets to be a real problem, we can always use a radix
548  * tree or something here.
549  *
550  * This could be made generic for all device subsystems.
551  */
552 struct memory_block *find_memory_block(struct mem_section *section)
553 {
554 	unsigned long block_id = base_memory_block_id(__section_nr(section));
555 
556 	return find_memory_block_by_id(block_id);
557 }
558 
559 static struct attribute *memory_memblk_attrs[] = {
560 	&dev_attr_phys_index.attr,
561 	&dev_attr_state.attr,
562 	&dev_attr_phys_device.attr,
563 	&dev_attr_removable.attr,
564 #ifdef CONFIG_MEMORY_HOTREMOVE
565 	&dev_attr_valid_zones.attr,
566 #endif
567 	NULL
568 };
569 
570 static struct attribute_group memory_memblk_attr_group = {
571 	.attrs = memory_memblk_attrs,
572 };
573 
574 static const struct attribute_group *memory_memblk_attr_groups[] = {
575 	&memory_memblk_attr_group,
576 	NULL,
577 };
578 
579 /*
580  * register_memory - Setup a sysfs device for a memory block
581  */
582 static
583 int register_memory(struct memory_block *memory)
584 {
585 	int ret;
586 
587 	memory->dev.bus = &memory_subsys;
588 	memory->dev.id = memory->start_section_nr / sections_per_block;
589 	memory->dev.release = memory_block_release;
590 	memory->dev.groups = memory_memblk_attr_groups;
591 	memory->dev.offline = memory->state == MEM_OFFLINE;
592 
593 	ret = device_register(&memory->dev);
594 	if (ret)
595 		put_device(&memory->dev);
596 
597 	return ret;
598 }
599 
600 static int init_memory_block(struct memory_block **memory,
601 			     unsigned long block_id, unsigned long state)
602 {
603 	struct memory_block *mem;
604 	unsigned long start_pfn;
605 	int ret = 0;
606 
607 	mem = find_memory_block_by_id(block_id);
608 	if (mem) {
609 		put_device(&mem->dev);
610 		return -EEXIST;
611 	}
612 	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
613 	if (!mem)
614 		return -ENOMEM;
615 
616 	mem->start_section_nr = block_id * sections_per_block;
617 	mem->state = state;
618 	start_pfn = section_nr_to_pfn(mem->start_section_nr);
619 	mem->phys_device = arch_get_memory_phys_device(start_pfn);
620 	mem->nid = NUMA_NO_NODE;
621 
622 	ret = register_memory(mem);
623 
624 	*memory = mem;
625 	return ret;
626 }
627 
628 static int add_memory_block(unsigned long base_section_nr)
629 {
630 	int ret, section_count = 0;
631 	struct memory_block *mem;
632 	unsigned long nr;
633 
634 	for (nr = base_section_nr; nr < base_section_nr + sections_per_block;
635 	     nr++)
636 		if (present_section_nr(nr))
637 			section_count++;
638 
639 	if (section_count == 0)
640 		return 0;
641 	ret = init_memory_block(&mem, base_memory_block_id(base_section_nr),
642 				MEM_ONLINE);
643 	if (ret)
644 		return ret;
645 	mem->section_count = section_count;
646 	return 0;
647 }
648 
649 static void unregister_memory(struct memory_block *memory)
650 {
651 	if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
652 		return;
653 
654 	/* drop the ref. we got via find_memory_block() */
655 	put_device(&memory->dev);
656 	device_unregister(&memory->dev);
657 }
658 
659 /*
660  * Create memory block devices for the given memory area. Start and size
661  * have to be aligned to memory block granularity. Memory block devices
662  * will be initialized as offline.
663  *
664  * Called under device_hotplug_lock.
665  */
666 int create_memory_block_devices(unsigned long start, unsigned long size)
667 {
668 	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
669 	unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
670 	struct memory_block *mem;
671 	unsigned long block_id;
672 	int ret = 0;
673 
674 	if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
675 			 !IS_ALIGNED(size, memory_block_size_bytes())))
676 		return -EINVAL;
677 
678 	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
679 		ret = init_memory_block(&mem, block_id, MEM_OFFLINE);
680 		if (ret)
681 			break;
682 		mem->section_count = sections_per_block;
683 	}
684 	if (ret) {
685 		end_block_id = block_id;
686 		for (block_id = start_block_id; block_id != end_block_id;
687 		     block_id++) {
688 			mem = find_memory_block_by_id(block_id);
689 			if (WARN_ON_ONCE(!mem))
690 				continue;
691 			mem->section_count = 0;
692 			unregister_memory(mem);
693 		}
694 	}
695 	return ret;
696 }
697 
698 /*
699  * Remove memory block devices for the given memory area. Start and size
700  * have to be aligned to memory block granularity. Memory block devices
701  * have to be offline.
702  *
703  * Called under device_hotplug_lock.
704  */
705 void remove_memory_block_devices(unsigned long start, unsigned long size)
706 {
707 	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
708 	const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
709 	struct memory_block *mem;
710 	unsigned long block_id;
711 
712 	if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
713 			 !IS_ALIGNED(size, memory_block_size_bytes())))
714 		return;
715 
716 	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
717 		mem = find_memory_block_by_id(block_id);
718 		if (WARN_ON_ONCE(!mem))
719 			continue;
720 		mem->section_count = 0;
721 		unregister_memory_block_under_nodes(mem);
722 		unregister_memory(mem);
723 	}
724 }
725 
726 /* return true if the memory block is offlined, otherwise, return false */
727 bool is_memblock_offlined(struct memory_block *mem)
728 {
729 	return mem->state == MEM_OFFLINE;
730 }
731 
732 static struct attribute *memory_root_attrs[] = {
733 #ifdef CONFIG_ARCH_MEMORY_PROBE
734 	&dev_attr_probe.attr,
735 #endif
736 
737 #ifdef CONFIG_MEMORY_FAILURE
738 	&dev_attr_soft_offline_page.attr,
739 	&dev_attr_hard_offline_page.attr,
740 #endif
741 
742 	&dev_attr_block_size_bytes.attr,
743 	&dev_attr_auto_online_blocks.attr,
744 	NULL
745 };
746 
747 static struct attribute_group memory_root_attr_group = {
748 	.attrs = memory_root_attrs,
749 };
750 
751 static const struct attribute_group *memory_root_attr_groups[] = {
752 	&memory_root_attr_group,
753 	NULL,
754 };
755 
756 /*
757  * Initialize the sysfs support for memory devices. At the time this function
758  * is called, we cannot have concurrent creation/deletion of memory block
759  * devices, the device_hotplug_lock is not needed.
760  */
761 void __init memory_dev_init(void)
762 {
763 	int ret;
764 	unsigned long block_sz, nr;
765 
766 	/* Validate the configured memory block size */
767 	block_sz = memory_block_size_bytes();
768 	if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE)
769 		panic("Memory block size not suitable: 0x%lx\n", block_sz);
770 	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
771 
772 	ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);
773 	if (ret)
774 		panic("%s() failed to register subsystem: %d\n", __func__, ret);
775 
776 	/*
777 	 * Create entries for memory sections that were found
778 	 * during boot and have been initialized
779 	 */
780 	for (nr = 0; nr <= __highest_present_section_nr;
781 	     nr += sections_per_block) {
782 		ret = add_memory_block(nr);
783 		if (ret)
784 			panic("%s() failed to add memory block: %d\n", __func__,
785 			      ret);
786 	}
787 }
788 
789 /**
790  * walk_memory_blocks - walk through all present memory blocks overlapped
791  *			by the range [start, start + size)
792  *
793  * @start: start address of the memory range
794  * @size: size of the memory range
795  * @arg: argument passed to func
796  * @func: callback for each memory section walked
797  *
798  * This function walks through all present memory blocks overlapped by the
799  * range [start, start + size), calling func on each memory block.
800  *
801  * In case func() returns an error, walking is aborted and the error is
802  * returned.
803  */
804 int walk_memory_blocks(unsigned long start, unsigned long size,
805 		       void *arg, walk_memory_blocks_func_t func)
806 {
807 	const unsigned long start_block_id = phys_to_block_id(start);
808 	const unsigned long end_block_id = phys_to_block_id(start + size - 1);
809 	struct memory_block *mem;
810 	unsigned long block_id;
811 	int ret = 0;
812 
813 	if (!size)
814 		return 0;
815 
816 	for (block_id = start_block_id; block_id <= end_block_id; block_id++) {
817 		mem = find_memory_block_by_id(block_id);
818 		if (!mem)
819 			continue;
820 
821 		ret = func(mem, arg);
822 		put_device(&mem->dev);
823 		if (ret)
824 			break;
825 	}
826 	return ret;
827 }
828 
829 struct for_each_memory_block_cb_data {
830 	walk_memory_blocks_func_t func;
831 	void *arg;
832 };
833 
834 static int for_each_memory_block_cb(struct device *dev, void *data)
835 {
836 	struct memory_block *mem = to_memory_block(dev);
837 	struct for_each_memory_block_cb_data *cb_data = data;
838 
839 	return cb_data->func(mem, cb_data->arg);
840 }
841 
842 /**
843  * for_each_memory_block - walk through all present memory blocks
844  *
845  * @arg: argument passed to func
846  * @func: callback for each memory block walked
847  *
848  * This function walks through all present memory blocks, calling func on
849  * each memory block.
850  *
851  * In case func() returns an error, walking is aborted and the error is
852  * returned.
853  */
854 int for_each_memory_block(void *arg, walk_memory_blocks_func_t func)
855 {
856 	struct for_each_memory_block_cb_data cb_data = {
857 		.func = func,
858 		.arg = arg,
859 	};
860 
861 	return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
862 				for_each_memory_block_cb);
863 }
864