1 /* 2 * drivers/base/memory.c - basic Memory class support 3 * 4 * Written by Matt Tolentino <matthew.e.tolentino@intel.com> 5 * Dave Hansen <haveblue@us.ibm.com> 6 * 7 * This file provides the necessary infrastructure to represent 8 * a SPARSEMEM-memory-model system's physical memory in /sysfs. 9 * All arch-independent code that assumes MEMORY_HOTPLUG requires 10 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c. 11 */ 12 13 #include <linux/sysdev.h> 14 #include <linux/module.h> 15 #include <linux/init.h> 16 #include <linux/topology.h> 17 #include <linux/capability.h> 18 #include <linux/device.h> 19 #include <linux/memory.h> 20 #include <linux/kobject.h> 21 #include <linux/memory_hotplug.h> 22 #include <linux/mm.h> 23 #include <asm/atomic.h> 24 #include <asm/uaccess.h> 25 26 #define MEMORY_CLASS_NAME "memory" 27 28 static struct sysdev_class memory_sysdev_class = { 29 set_kset_name(MEMORY_CLASS_NAME), 30 }; 31 32 static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj) 33 { 34 return MEMORY_CLASS_NAME; 35 } 36 37 static int memory_uevent(struct kset *kset, struct kobject *obj, struct kobj_uevent_env *env) 38 { 39 int retval = 0; 40 41 return retval; 42 } 43 44 static struct kset_uevent_ops memory_uevent_ops = { 45 .name = memory_uevent_name, 46 .uevent = memory_uevent, 47 }; 48 49 static BLOCKING_NOTIFIER_HEAD(memory_chain); 50 51 int register_memory_notifier(struct notifier_block *nb) 52 { 53 return blocking_notifier_chain_register(&memory_chain, nb); 54 } 55 56 void unregister_memory_notifier(struct notifier_block *nb) 57 { 58 blocking_notifier_chain_unregister(&memory_chain, nb); 59 } 60 61 /* 62 * register_memory - Setup a sysfs device for a memory block 63 */ 64 int register_memory(struct memory_block *memory, struct mem_section *section, 65 struct node *root) 66 { 67 int error; 68 69 memory->sysdev.cls = &memory_sysdev_class; 70 memory->sysdev.id = __section_nr(section); 71 72 error = sysdev_register(&memory->sysdev); 73 74 if (root && !error) 75 error = sysfs_create_link(&root->sysdev.kobj, 76 &memory->sysdev.kobj, 77 kobject_name(&memory->sysdev.kobj)); 78 79 return error; 80 } 81 82 static void 83 unregister_memory(struct memory_block *memory, struct mem_section *section, 84 struct node *root) 85 { 86 BUG_ON(memory->sysdev.cls != &memory_sysdev_class); 87 BUG_ON(memory->sysdev.id != __section_nr(section)); 88 89 sysdev_unregister(&memory->sysdev); 90 if (root) 91 sysfs_remove_link(&root->sysdev.kobj, 92 kobject_name(&memory->sysdev.kobj)); 93 } 94 95 /* 96 * use this as the physical section index that this memsection 97 * uses. 98 */ 99 100 static ssize_t show_mem_phys_index(struct sys_device *dev, char *buf) 101 { 102 struct memory_block *mem = 103 container_of(dev, struct memory_block, sysdev); 104 return sprintf(buf, "%08lx\n", mem->phys_index); 105 } 106 107 /* 108 * online, offline, going offline, etc. 109 */ 110 static ssize_t show_mem_state(struct sys_device *dev, char *buf) 111 { 112 struct memory_block *mem = 113 container_of(dev, struct memory_block, sysdev); 114 ssize_t len = 0; 115 116 /* 117 * We can probably put these states in a nice little array 118 * so that they're not open-coded 119 */ 120 switch (mem->state) { 121 case MEM_ONLINE: 122 len = sprintf(buf, "online\n"); 123 break; 124 case MEM_OFFLINE: 125 len = sprintf(buf, "offline\n"); 126 break; 127 case MEM_GOING_OFFLINE: 128 len = sprintf(buf, "going-offline\n"); 129 break; 130 default: 131 len = sprintf(buf, "ERROR-UNKNOWN-%ld\n", 132 mem->state); 133 WARN_ON(1); 134 break; 135 } 136 137 return len; 138 } 139 140 static inline int memory_notify(unsigned long val, void *v) 141 { 142 return blocking_notifier_call_chain(&memory_chain, val, v); 143 } 144 145 /* 146 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is 147 * OK to have direct references to sparsemem variables in here. 148 */ 149 static int 150 memory_block_action(struct memory_block *mem, unsigned long action) 151 { 152 int i; 153 unsigned long psection; 154 unsigned long start_pfn, start_paddr; 155 struct page *first_page; 156 int ret; 157 int old_state = mem->state; 158 159 psection = mem->phys_index; 160 first_page = pfn_to_page(psection << PFN_SECTION_SHIFT); 161 162 /* 163 * The probe routines leave the pages reserved, just 164 * as the bootmem code does. Make sure they're still 165 * that way. 166 */ 167 if (action == MEM_ONLINE) { 168 for (i = 0; i < PAGES_PER_SECTION; i++) { 169 if (PageReserved(first_page+i)) 170 continue; 171 172 printk(KERN_WARNING "section number %ld page number %d " 173 "not reserved, was it already online? \n", 174 psection, i); 175 return -EBUSY; 176 } 177 } 178 179 switch (action) { 180 case MEM_ONLINE: 181 start_pfn = page_to_pfn(first_page); 182 ret = online_pages(start_pfn, PAGES_PER_SECTION); 183 break; 184 case MEM_OFFLINE: 185 mem->state = MEM_GOING_OFFLINE; 186 memory_notify(MEM_GOING_OFFLINE, NULL); 187 start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; 188 ret = remove_memory(start_paddr, 189 PAGES_PER_SECTION << PAGE_SHIFT); 190 if (ret) { 191 mem->state = old_state; 192 break; 193 } 194 memory_notify(MEM_MAPPING_INVALID, NULL); 195 break; 196 default: 197 printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", 198 __FUNCTION__, mem, action, action); 199 WARN_ON(1); 200 ret = -EINVAL; 201 } 202 /* 203 * For now, only notify on successful memory operations 204 */ 205 if (!ret) 206 memory_notify(action, NULL); 207 208 return ret; 209 } 210 211 static int memory_block_change_state(struct memory_block *mem, 212 unsigned long to_state, unsigned long from_state_req) 213 { 214 int ret = 0; 215 down(&mem->state_sem); 216 217 if (mem->state != from_state_req) { 218 ret = -EINVAL; 219 goto out; 220 } 221 222 ret = memory_block_action(mem, to_state); 223 if (!ret) 224 mem->state = to_state; 225 226 out: 227 up(&mem->state_sem); 228 return ret; 229 } 230 231 static ssize_t 232 store_mem_state(struct sys_device *dev, const char *buf, size_t count) 233 { 234 struct memory_block *mem; 235 unsigned int phys_section_nr; 236 int ret = -EINVAL; 237 238 mem = container_of(dev, struct memory_block, sysdev); 239 phys_section_nr = mem->phys_index; 240 241 if (!present_section_nr(phys_section_nr)) 242 goto out; 243 244 if (!strncmp(buf, "online", min((int)count, 6))) 245 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 246 else if(!strncmp(buf, "offline", min((int)count, 7))) 247 ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); 248 out: 249 if (ret) 250 return ret; 251 return count; 252 } 253 254 /* 255 * phys_device is a bad name for this. What I really want 256 * is a way to differentiate between memory ranges that 257 * are part of physical devices that constitute 258 * a complete removable unit or fru. 259 * i.e. do these ranges belong to the same physical device, 260 * s.t. if I offline all of these sections I can then 261 * remove the physical device? 262 */ 263 static ssize_t show_phys_device(struct sys_device *dev, char *buf) 264 { 265 struct memory_block *mem = 266 container_of(dev, struct memory_block, sysdev); 267 return sprintf(buf, "%d\n", mem->phys_device); 268 } 269 270 static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL); 271 static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state); 272 static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL); 273 274 #define mem_create_simple_file(mem, attr_name) \ 275 sysdev_create_file(&mem->sysdev, &attr_##attr_name) 276 #define mem_remove_simple_file(mem, attr_name) \ 277 sysdev_remove_file(&mem->sysdev, &attr_##attr_name) 278 279 /* 280 * Block size attribute stuff 281 */ 282 static ssize_t 283 print_block_size(struct class *class, char *buf) 284 { 285 return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE); 286 } 287 288 static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL); 289 290 static int block_size_init(void) 291 { 292 return sysfs_create_file(&memory_sysdev_class.kset.kobj, 293 &class_attr_block_size_bytes.attr); 294 } 295 296 /* 297 * Some architectures will have custom drivers to do this, and 298 * will not need to do it from userspace. The fake hot-add code 299 * as well as ppc64 will do all of their discovery in userspace 300 * and will require this interface. 301 */ 302 #ifdef CONFIG_ARCH_MEMORY_PROBE 303 static ssize_t 304 memory_probe_store(struct class *class, const char *buf, size_t count) 305 { 306 u64 phys_addr; 307 int nid; 308 int ret; 309 310 phys_addr = simple_strtoull(buf, NULL, 0); 311 312 nid = memory_add_physaddr_to_nid(phys_addr); 313 ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT); 314 315 if (ret) 316 count = ret; 317 318 return count; 319 } 320 static CLASS_ATTR(probe, 0700, NULL, memory_probe_store); 321 322 static int memory_probe_init(void) 323 { 324 return sysfs_create_file(&memory_sysdev_class.kset.kobj, 325 &class_attr_probe.attr); 326 } 327 #else 328 static inline int memory_probe_init(void) 329 { 330 return 0; 331 } 332 #endif 333 334 /* 335 * Note that phys_device is optional. It is here to allow for 336 * differentiation between which *physical* devices each 337 * section belongs to... 338 */ 339 340 static int add_memory_block(unsigned long node_id, struct mem_section *section, 341 unsigned long state, int phys_device) 342 { 343 struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); 344 int ret = 0; 345 346 if (!mem) 347 return -ENOMEM; 348 349 mem->phys_index = __section_nr(section); 350 mem->state = state; 351 init_MUTEX(&mem->state_sem); 352 mem->phys_device = phys_device; 353 354 ret = register_memory(mem, section, NULL); 355 if (!ret) 356 ret = mem_create_simple_file(mem, phys_index); 357 if (!ret) 358 ret = mem_create_simple_file(mem, state); 359 if (!ret) 360 ret = mem_create_simple_file(mem, phys_device); 361 362 return ret; 363 } 364 365 /* 366 * For now, we have a linear search to go find the appropriate 367 * memory_block corresponding to a particular phys_index. If 368 * this gets to be a real problem, we can always use a radix 369 * tree or something here. 370 * 371 * This could be made generic for all sysdev classes. 372 */ 373 static struct memory_block *find_memory_block(struct mem_section *section) 374 { 375 struct kobject *kobj; 376 struct sys_device *sysdev; 377 struct memory_block *mem; 378 char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1]; 379 380 /* 381 * This only works because we know that section == sysdev->id 382 * slightly redundant with sysdev_register() 383 */ 384 sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section)); 385 386 kobj = kset_find_obj(&memory_sysdev_class.kset, name); 387 if (!kobj) 388 return NULL; 389 390 sysdev = container_of(kobj, struct sys_device, kobj); 391 mem = container_of(sysdev, struct memory_block, sysdev); 392 393 return mem; 394 } 395 396 int remove_memory_block(unsigned long node_id, struct mem_section *section, 397 int phys_device) 398 { 399 struct memory_block *mem; 400 401 mem = find_memory_block(section); 402 mem_remove_simple_file(mem, phys_index); 403 mem_remove_simple_file(mem, state); 404 mem_remove_simple_file(mem, phys_device); 405 unregister_memory(mem, section, NULL); 406 407 return 0; 408 } 409 410 /* 411 * need an interface for the VM to add new memory regions, 412 * but without onlining it. 413 */ 414 int register_new_memory(struct mem_section *section) 415 { 416 return add_memory_block(0, section, MEM_OFFLINE, 0); 417 } 418 419 int unregister_memory_section(struct mem_section *section) 420 { 421 if (!present_section(section)) 422 return -EINVAL; 423 424 return remove_memory_block(0, section, 0); 425 } 426 427 /* 428 * Initialize the sysfs support for memory devices... 429 */ 430 int __init memory_dev_init(void) 431 { 432 unsigned int i; 433 int ret; 434 int err; 435 436 memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops; 437 ret = sysdev_class_register(&memory_sysdev_class); 438 if (ret) 439 goto out; 440 441 /* 442 * Create entries for memory sections that were found 443 * during boot and have been initialized 444 */ 445 for (i = 0; i < NR_MEM_SECTIONS; i++) { 446 if (!present_section_nr(i)) 447 continue; 448 err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0); 449 if (!ret) 450 ret = err; 451 } 452 453 err = memory_probe_init(); 454 if (!ret) 455 ret = err; 456 err = block_size_init(); 457 if (!ret) 458 ret = err; 459 out: 460 if (ret) 461 printk(KERN_ERR "%s() failed: %d\n", __FUNCTION__, ret); 462 return ret; 463 } 464