1 /* 2 * drivers/base/memory.c - basic Memory class support 3 * 4 * Written by Matt Tolentino <matthew.e.tolentino@intel.com> 5 * Dave Hansen <haveblue@us.ibm.com> 6 * 7 * This file provides the necessary infrastructure to represent 8 * a SPARSEMEM-memory-model system's physical memory in /sysfs. 9 * All arch-independent code that assumes MEMORY_HOTPLUG requires 10 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c. 11 */ 12 13 #include <linux/sysdev.h> 14 #include <linux/module.h> 15 #include <linux/init.h> 16 #include <linux/topology.h> 17 #include <linux/capability.h> 18 #include <linux/device.h> 19 #include <linux/memory.h> 20 #include <linux/kobject.h> 21 #include <linux/memory_hotplug.h> 22 #include <linux/mm.h> 23 #include <asm/atomic.h> 24 #include <asm/uaccess.h> 25 26 #define MEMORY_CLASS_NAME "memory" 27 28 static struct sysdev_class memory_sysdev_class = { 29 .name = MEMORY_CLASS_NAME, 30 }; 31 32 static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj) 33 { 34 return MEMORY_CLASS_NAME; 35 } 36 37 static int memory_uevent(struct kset *kset, struct kobject *obj, struct kobj_uevent_env *env) 38 { 39 int retval = 0; 40 41 return retval; 42 } 43 44 static struct kset_uevent_ops memory_uevent_ops = { 45 .name = memory_uevent_name, 46 .uevent = memory_uevent, 47 }; 48 49 static BLOCKING_NOTIFIER_HEAD(memory_chain); 50 51 int register_memory_notifier(struct notifier_block *nb) 52 { 53 return blocking_notifier_chain_register(&memory_chain, nb); 54 } 55 56 void unregister_memory_notifier(struct notifier_block *nb) 57 { 58 blocking_notifier_chain_unregister(&memory_chain, nb); 59 } 60 61 /* 62 * register_memory - Setup a sysfs device for a memory block 63 */ 64 int register_memory(struct memory_block *memory, struct mem_section *section, 65 struct node *root) 66 { 67 int error; 68 69 memory->sysdev.cls = &memory_sysdev_class; 70 memory->sysdev.id = __section_nr(section); 71 72 error = sysdev_register(&memory->sysdev); 73 74 if (root && !error) 75 error = sysfs_create_link(&root->sysdev.kobj, 76 &memory->sysdev.kobj, 77 kobject_name(&memory->sysdev.kobj)); 78 79 return error; 80 } 81 82 static void 83 unregister_memory(struct memory_block *memory, struct mem_section *section, 84 struct node *root) 85 { 86 BUG_ON(memory->sysdev.cls != &memory_sysdev_class); 87 BUG_ON(memory->sysdev.id != __section_nr(section)); 88 89 sysdev_unregister(&memory->sysdev); 90 if (root) 91 sysfs_remove_link(&root->sysdev.kobj, 92 kobject_name(&memory->sysdev.kobj)); 93 } 94 95 /* 96 * use this as the physical section index that this memsection 97 * uses. 98 */ 99 100 static ssize_t show_mem_phys_index(struct sys_device *dev, char *buf) 101 { 102 struct memory_block *mem = 103 container_of(dev, struct memory_block, sysdev); 104 return sprintf(buf, "%08lx\n", mem->phys_index); 105 } 106 107 /* 108 * online, offline, going offline, etc. 109 */ 110 static ssize_t show_mem_state(struct sys_device *dev, char *buf) 111 { 112 struct memory_block *mem = 113 container_of(dev, struct memory_block, sysdev); 114 ssize_t len = 0; 115 116 /* 117 * We can probably put these states in a nice little array 118 * so that they're not open-coded 119 */ 120 switch (mem->state) { 121 case MEM_ONLINE: 122 len = sprintf(buf, "online\n"); 123 break; 124 case MEM_OFFLINE: 125 len = sprintf(buf, "offline\n"); 126 break; 127 case MEM_GOING_OFFLINE: 128 len = sprintf(buf, "going-offline\n"); 129 break; 130 default: 131 len = sprintf(buf, "ERROR-UNKNOWN-%ld\n", 132 mem->state); 133 WARN_ON(1); 134 break; 135 } 136 137 return len; 138 } 139 140 int memory_notify(unsigned long val, void *v) 141 { 142 return blocking_notifier_call_chain(&memory_chain, val, v); 143 } 144 145 /* 146 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is 147 * OK to have direct references to sparsemem variables in here. 148 */ 149 static int 150 memory_block_action(struct memory_block *mem, unsigned long action) 151 { 152 int i; 153 unsigned long psection; 154 unsigned long start_pfn, start_paddr; 155 struct page *first_page; 156 int ret; 157 int old_state = mem->state; 158 159 psection = mem->phys_index; 160 first_page = pfn_to_page(psection << PFN_SECTION_SHIFT); 161 162 /* 163 * The probe routines leave the pages reserved, just 164 * as the bootmem code does. Make sure they're still 165 * that way. 166 */ 167 if (action == MEM_ONLINE) { 168 for (i = 0; i < PAGES_PER_SECTION; i++) { 169 if (PageReserved(first_page+i)) 170 continue; 171 172 printk(KERN_WARNING "section number %ld page number %d " 173 "not reserved, was it already online? \n", 174 psection, i); 175 return -EBUSY; 176 } 177 } 178 179 switch (action) { 180 case MEM_ONLINE: 181 start_pfn = page_to_pfn(first_page); 182 ret = online_pages(start_pfn, PAGES_PER_SECTION); 183 break; 184 case MEM_OFFLINE: 185 mem->state = MEM_GOING_OFFLINE; 186 start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; 187 ret = remove_memory(start_paddr, 188 PAGES_PER_SECTION << PAGE_SHIFT); 189 if (ret) { 190 mem->state = old_state; 191 break; 192 } 193 break; 194 default: 195 printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", 196 __FUNCTION__, mem, action, action); 197 WARN_ON(1); 198 ret = -EINVAL; 199 } 200 201 return ret; 202 } 203 204 static int memory_block_change_state(struct memory_block *mem, 205 unsigned long to_state, unsigned long from_state_req) 206 { 207 int ret = 0; 208 down(&mem->state_sem); 209 210 if (mem->state != from_state_req) { 211 ret = -EINVAL; 212 goto out; 213 } 214 215 ret = memory_block_action(mem, to_state); 216 if (!ret) 217 mem->state = to_state; 218 219 out: 220 up(&mem->state_sem); 221 return ret; 222 } 223 224 static ssize_t 225 store_mem_state(struct sys_device *dev, const char *buf, size_t count) 226 { 227 struct memory_block *mem; 228 unsigned int phys_section_nr; 229 int ret = -EINVAL; 230 231 mem = container_of(dev, struct memory_block, sysdev); 232 phys_section_nr = mem->phys_index; 233 234 if (!present_section_nr(phys_section_nr)) 235 goto out; 236 237 if (!strncmp(buf, "online", min((int)count, 6))) 238 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 239 else if(!strncmp(buf, "offline", min((int)count, 7))) 240 ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); 241 out: 242 if (ret) 243 return ret; 244 return count; 245 } 246 247 /* 248 * phys_device is a bad name for this. What I really want 249 * is a way to differentiate between memory ranges that 250 * are part of physical devices that constitute 251 * a complete removable unit or fru. 252 * i.e. do these ranges belong to the same physical device, 253 * s.t. if I offline all of these sections I can then 254 * remove the physical device? 255 */ 256 static ssize_t show_phys_device(struct sys_device *dev, char *buf) 257 { 258 struct memory_block *mem = 259 container_of(dev, struct memory_block, sysdev); 260 return sprintf(buf, "%d\n", mem->phys_device); 261 } 262 263 static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL); 264 static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state); 265 static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL); 266 267 #define mem_create_simple_file(mem, attr_name) \ 268 sysdev_create_file(&mem->sysdev, &attr_##attr_name) 269 #define mem_remove_simple_file(mem, attr_name) \ 270 sysdev_remove_file(&mem->sysdev, &attr_##attr_name) 271 272 /* 273 * Block size attribute stuff 274 */ 275 static ssize_t 276 print_block_size(struct class *class, char *buf) 277 { 278 return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE); 279 } 280 281 static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL); 282 283 static int block_size_init(void) 284 { 285 return sysfs_create_file(&memory_sysdev_class.kset.kobj, 286 &class_attr_block_size_bytes.attr); 287 } 288 289 /* 290 * Some architectures will have custom drivers to do this, and 291 * will not need to do it from userspace. The fake hot-add code 292 * as well as ppc64 will do all of their discovery in userspace 293 * and will require this interface. 294 */ 295 #ifdef CONFIG_ARCH_MEMORY_PROBE 296 static ssize_t 297 memory_probe_store(struct class *class, const char *buf, size_t count) 298 { 299 u64 phys_addr; 300 int nid; 301 int ret; 302 303 phys_addr = simple_strtoull(buf, NULL, 0); 304 305 nid = memory_add_physaddr_to_nid(phys_addr); 306 ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT); 307 308 if (ret) 309 count = ret; 310 311 return count; 312 } 313 static CLASS_ATTR(probe, 0700, NULL, memory_probe_store); 314 315 static int memory_probe_init(void) 316 { 317 return sysfs_create_file(&memory_sysdev_class.kset.kobj, 318 &class_attr_probe.attr); 319 } 320 #else 321 static inline int memory_probe_init(void) 322 { 323 return 0; 324 } 325 #endif 326 327 /* 328 * Note that phys_device is optional. It is here to allow for 329 * differentiation between which *physical* devices each 330 * section belongs to... 331 */ 332 333 static int add_memory_block(unsigned long node_id, struct mem_section *section, 334 unsigned long state, int phys_device) 335 { 336 struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); 337 int ret = 0; 338 339 if (!mem) 340 return -ENOMEM; 341 342 mem->phys_index = __section_nr(section); 343 mem->state = state; 344 init_MUTEX(&mem->state_sem); 345 mem->phys_device = phys_device; 346 347 ret = register_memory(mem, section, NULL); 348 if (!ret) 349 ret = mem_create_simple_file(mem, phys_index); 350 if (!ret) 351 ret = mem_create_simple_file(mem, state); 352 if (!ret) 353 ret = mem_create_simple_file(mem, phys_device); 354 355 return ret; 356 } 357 358 /* 359 * For now, we have a linear search to go find the appropriate 360 * memory_block corresponding to a particular phys_index. If 361 * this gets to be a real problem, we can always use a radix 362 * tree or something here. 363 * 364 * This could be made generic for all sysdev classes. 365 */ 366 static struct memory_block *find_memory_block(struct mem_section *section) 367 { 368 struct kobject *kobj; 369 struct sys_device *sysdev; 370 struct memory_block *mem; 371 char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1]; 372 373 /* 374 * This only works because we know that section == sysdev->id 375 * slightly redundant with sysdev_register() 376 */ 377 sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section)); 378 379 kobj = kset_find_obj(&memory_sysdev_class.kset, name); 380 if (!kobj) 381 return NULL; 382 383 sysdev = container_of(kobj, struct sys_device, kobj); 384 mem = container_of(sysdev, struct memory_block, sysdev); 385 386 return mem; 387 } 388 389 int remove_memory_block(unsigned long node_id, struct mem_section *section, 390 int phys_device) 391 { 392 struct memory_block *mem; 393 394 mem = find_memory_block(section); 395 mem_remove_simple_file(mem, phys_index); 396 mem_remove_simple_file(mem, state); 397 mem_remove_simple_file(mem, phys_device); 398 unregister_memory(mem, section, NULL); 399 400 return 0; 401 } 402 403 /* 404 * need an interface for the VM to add new memory regions, 405 * but without onlining it. 406 */ 407 int register_new_memory(struct mem_section *section) 408 { 409 return add_memory_block(0, section, MEM_OFFLINE, 0); 410 } 411 412 int unregister_memory_section(struct mem_section *section) 413 { 414 if (!present_section(section)) 415 return -EINVAL; 416 417 return remove_memory_block(0, section, 0); 418 } 419 420 /* 421 * Initialize the sysfs support for memory devices... 422 */ 423 int __init memory_dev_init(void) 424 { 425 unsigned int i; 426 int ret; 427 int err; 428 429 memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops; 430 ret = sysdev_class_register(&memory_sysdev_class); 431 if (ret) 432 goto out; 433 434 /* 435 * Create entries for memory sections that were found 436 * during boot and have been initialized 437 */ 438 for (i = 0; i < NR_MEM_SECTIONS; i++) { 439 if (!present_section_nr(i)) 440 continue; 441 err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0); 442 if (!ret) 443 ret = err; 444 } 445 446 err = memory_probe_init(); 447 if (!ret) 448 ret = err; 449 err = block_size_init(); 450 if (!ret) 451 ret = err; 452 out: 453 if (ret) 454 printk(KERN_ERR "%s() failed: %d\n", __FUNCTION__, ret); 455 return ret; 456 } 457