1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Collaborative memory management interface. 4 * 5 * Copyright (C) 2008 IBM Corporation 6 * Author(s): Brian King (brking@linux.vnet.ibm.com), 7 */ 8 9 #include <linux/ctype.h> 10 #include <linux/delay.h> 11 #include <linux/errno.h> 12 #include <linux/fs.h> 13 #include <linux/gfp.h> 14 #include <linux/kthread.h> 15 #include <linux/module.h> 16 #include <linux/oom.h> 17 #include <linux/reboot.h> 18 #include <linux/sched.h> 19 #include <linux/stringify.h> 20 #include <linux/swap.h> 21 #include <linux/device.h> 22 #include <linux/mount.h> 23 #include <linux/pseudo_fs.h> 24 #include <linux/magic.h> 25 #include <linux/balloon_compaction.h> 26 #include <asm/firmware.h> 27 #include <asm/hvcall.h> 28 #include <asm/mmu.h> 29 #include <linux/uaccess.h> 30 #include <linux/memory.h> 31 #include <asm/plpar_wrappers.h> 32 33 #include "pseries.h" 34 35 #define CMM_DRIVER_VERSION "1.0.0" 36 #define CMM_DEFAULT_DELAY 1 37 #define CMM_HOTPLUG_DELAY 5 38 #define CMM_DEBUG 0 39 #define CMM_DISABLE 0 40 #define CMM_OOM_KB 1024 41 #define CMM_MIN_MEM_MB 256 42 #define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) 43 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) 44 45 #define CMM_MEM_HOTPLUG_PRI 1 46 47 static unsigned int delay = CMM_DEFAULT_DELAY; 48 static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY; 49 static unsigned int oom_kb = CMM_OOM_KB; 50 static unsigned int cmm_debug = CMM_DEBUG; 51 static unsigned int cmm_disabled = CMM_DISABLE; 52 static unsigned long min_mem_mb = CMM_MIN_MEM_MB; 53 static bool __read_mostly simulate; 54 static unsigned long simulate_loan_target_kb; 55 static struct device cmm_dev; 56 57 MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>"); 58 MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager"); 59 MODULE_LICENSE("GPL"); 60 MODULE_VERSION(CMM_DRIVER_VERSION); 61 62 module_param_named(delay, delay, uint, 0644); 63 MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " 64 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); 65 module_param_named(hotplug_delay, hotplug_delay, uint, 0644); 66 MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove " 67 "before loaning resumes. " 68 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]"); 69 module_param_named(oom_kb, oom_kb, uint, 0644); 70 MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " 71 "[Default=" __stringify(CMM_OOM_KB) "]"); 72 module_param_named(min_mem_mb, min_mem_mb, ulong, 0644); 73 MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. " 74 "[Default=" __stringify(CMM_MIN_MEM_MB) "]"); 75 module_param_named(debug, cmm_debug, uint, 0644); 76 MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. " 77 "[Default=" __stringify(CMM_DEBUG) "]"); 78 module_param_named(simulate, simulate, bool, 0444); 79 MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw)."); 80 81 #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); } 82 83 static atomic_long_t loaned_pages; 84 static unsigned long loaned_pages_target; 85 static unsigned long oom_freed_pages; 86 87 static DEFINE_MUTEX(hotplug_mutex); 88 static int hotplug_occurred; /* protected by the hotplug mutex */ 89 90 static struct task_struct *cmm_thread_ptr; 91 static struct balloon_dev_info b_dev_info; 92 93 static long plpar_page_set_loaned(struct page *page) 94 { 95 const unsigned long vpa = page_to_phys(page); 96 unsigned long cmo_page_sz = cmo_get_page_size(); 97 long rc = 0; 98 int i; 99 100 if (unlikely(simulate)) 101 return 0; 102 103 for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) 104 rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); 105 106 for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) 107 plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, 108 vpa + i - cmo_page_sz, 0); 109 110 return rc; 111 } 112 113 static long plpar_page_set_active(struct page *page) 114 { 115 const unsigned long vpa = page_to_phys(page); 116 unsigned long cmo_page_sz = cmo_get_page_size(); 117 long rc = 0; 118 int i; 119 120 if (unlikely(simulate)) 121 return 0; 122 123 for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) 124 rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); 125 126 for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) 127 plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, 128 vpa + i - cmo_page_sz, 0); 129 130 return rc; 131 } 132 133 /** 134 * cmm_alloc_pages - Allocate pages and mark them as loaned 135 * @nr: number of pages to allocate 136 * 137 * Return value: 138 * number of pages requested to be allocated which were not 139 **/ 140 static long cmm_alloc_pages(long nr) 141 { 142 struct page *page; 143 long rc; 144 145 cmm_dbg("Begin request for %ld pages\n", nr); 146 147 while (nr) { 148 /* Exit if a hotplug operation is in progress or occurred */ 149 if (mutex_trylock(&hotplug_mutex)) { 150 if (hotplug_occurred) { 151 mutex_unlock(&hotplug_mutex); 152 break; 153 } 154 mutex_unlock(&hotplug_mutex); 155 } else { 156 break; 157 } 158 159 page = balloon_page_alloc(); 160 if (!page) 161 break; 162 rc = plpar_page_set_loaned(page); 163 if (rc) { 164 pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc); 165 __free_page(page); 166 break; 167 } 168 169 balloon_page_enqueue(&b_dev_info, page); 170 atomic_long_inc(&loaned_pages); 171 adjust_managed_page_count(page, -1); 172 nr--; 173 } 174 175 cmm_dbg("End request with %ld pages unfulfilled\n", nr); 176 return nr; 177 } 178 179 /** 180 * cmm_free_pages - Free pages and mark them as active 181 * @nr: number of pages to free 182 * 183 * Return value: 184 * number of pages requested to be freed which were not 185 **/ 186 static long cmm_free_pages(long nr) 187 { 188 struct page *page; 189 190 cmm_dbg("Begin free of %ld pages.\n", nr); 191 while (nr) { 192 page = balloon_page_dequeue(&b_dev_info); 193 if (!page) 194 break; 195 plpar_page_set_active(page); 196 adjust_managed_page_count(page, 1); 197 __free_page(page); 198 atomic_long_dec(&loaned_pages); 199 nr--; 200 } 201 cmm_dbg("End request with %ld pages unfulfilled\n", nr); 202 return nr; 203 } 204 205 /** 206 * cmm_oom_notify - OOM notifier 207 * @self: notifier block struct 208 * @dummy: not used 209 * @parm: returned - number of pages freed 210 * 211 * Return value: 212 * NOTIFY_OK 213 **/ 214 static int cmm_oom_notify(struct notifier_block *self, 215 unsigned long dummy, void *parm) 216 { 217 unsigned long *freed = parm; 218 long nr = KB2PAGES(oom_kb); 219 220 cmm_dbg("OOM processing started\n"); 221 nr = cmm_free_pages(nr); 222 loaned_pages_target = atomic_long_read(&loaned_pages); 223 *freed += KB2PAGES(oom_kb) - nr; 224 oom_freed_pages += KB2PAGES(oom_kb) - nr; 225 cmm_dbg("OOM processing complete\n"); 226 return NOTIFY_OK; 227 } 228 229 /** 230 * cmm_get_mpp - Read memory performance parameters 231 * 232 * Makes hcall to query the current page loan request from the hypervisor. 233 * 234 * Return value: 235 * nothing 236 **/ 237 static void cmm_get_mpp(void) 238 { 239 const long __loaned_pages = atomic_long_read(&loaned_pages); 240 const long total_pages = totalram_pages() + __loaned_pages; 241 int rc; 242 struct hvcall_mpp_data mpp_data; 243 signed long active_pages_target, page_loan_request, target; 244 signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE; 245 246 if (likely(!simulate)) { 247 rc = h_get_mpp(&mpp_data); 248 if (rc != H_SUCCESS) 249 return; 250 page_loan_request = div_s64((s64)mpp_data.loan_request, 251 PAGE_SIZE); 252 target = page_loan_request + __loaned_pages; 253 } else { 254 target = KB2PAGES(simulate_loan_target_kb); 255 page_loan_request = target - __loaned_pages; 256 } 257 258 if (target < 0 || total_pages < min_mem_pages) 259 target = 0; 260 261 if (target > oom_freed_pages) 262 target -= oom_freed_pages; 263 else 264 target = 0; 265 266 active_pages_target = total_pages - target; 267 268 if (min_mem_pages > active_pages_target) 269 target = total_pages - min_mem_pages; 270 271 if (target < 0) 272 target = 0; 273 274 loaned_pages_target = target; 275 276 cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n", 277 page_loan_request, __loaned_pages, loaned_pages_target, 278 oom_freed_pages, totalram_pages()); 279 } 280 281 static struct notifier_block cmm_oom_nb = { 282 .notifier_call = cmm_oom_notify 283 }; 284 285 /** 286 * cmm_thread - CMM task thread 287 * @dummy: not used 288 * 289 * Return value: 290 * 0 291 **/ 292 static int cmm_thread(void *dummy) 293 { 294 unsigned long timeleft; 295 long __loaned_pages; 296 297 while (1) { 298 timeleft = msleep_interruptible(delay * 1000); 299 300 if (kthread_should_stop() || timeleft) 301 break; 302 303 if (mutex_trylock(&hotplug_mutex)) { 304 if (hotplug_occurred) { 305 hotplug_occurred = 0; 306 mutex_unlock(&hotplug_mutex); 307 cmm_dbg("Hotplug operation has occurred, " 308 "loaning activity suspended " 309 "for %d seconds.\n", 310 hotplug_delay); 311 timeleft = msleep_interruptible(hotplug_delay * 312 1000); 313 if (kthread_should_stop() || timeleft) 314 break; 315 continue; 316 } 317 mutex_unlock(&hotplug_mutex); 318 } else { 319 cmm_dbg("Hotplug operation in progress, activity " 320 "suspended\n"); 321 continue; 322 } 323 324 cmm_get_mpp(); 325 326 __loaned_pages = atomic_long_read(&loaned_pages); 327 if (loaned_pages_target > __loaned_pages) { 328 if (cmm_alloc_pages(loaned_pages_target - __loaned_pages)) 329 loaned_pages_target = __loaned_pages; 330 } else if (loaned_pages_target < __loaned_pages) 331 cmm_free_pages(__loaned_pages - loaned_pages_target); 332 } 333 return 0; 334 } 335 336 #define CMM_SHOW(name, format, args...) \ 337 static ssize_t show_##name(struct device *dev, \ 338 struct device_attribute *attr, \ 339 char *buf) \ 340 { \ 341 return sprintf(buf, format, ##args); \ 342 } \ 343 static DEVICE_ATTR(name, 0444, show_##name, NULL) 344 345 CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages))); 346 CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target)); 347 348 static ssize_t show_oom_pages(struct device *dev, 349 struct device_attribute *attr, char *buf) 350 { 351 return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages)); 352 } 353 354 static ssize_t store_oom_pages(struct device *dev, 355 struct device_attribute *attr, 356 const char *buf, size_t count) 357 { 358 unsigned long val = simple_strtoul (buf, NULL, 10); 359 360 if (!capable(CAP_SYS_ADMIN)) 361 return -EPERM; 362 if (val != 0) 363 return -EBADMSG; 364 365 oom_freed_pages = 0; 366 return count; 367 } 368 369 static DEVICE_ATTR(oom_freed_kb, 0644, 370 show_oom_pages, store_oom_pages); 371 372 static struct device_attribute *cmm_attrs[] = { 373 &dev_attr_loaned_kb, 374 &dev_attr_loaned_target_kb, 375 &dev_attr_oom_freed_kb, 376 }; 377 378 static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644, 379 simulate_loan_target_kb); 380 381 static struct bus_type cmm_subsys = { 382 .name = "cmm", 383 .dev_name = "cmm", 384 }; 385 386 static void cmm_release_device(struct device *dev) 387 { 388 } 389 390 /** 391 * cmm_sysfs_register - Register with sysfs 392 * 393 * Return value: 394 * 0 on success / other on failure 395 **/ 396 static int cmm_sysfs_register(struct device *dev) 397 { 398 int i, rc; 399 400 if ((rc = subsys_system_register(&cmm_subsys, NULL))) 401 return rc; 402 403 dev->id = 0; 404 dev->bus = &cmm_subsys; 405 dev->release = cmm_release_device; 406 407 if ((rc = device_register(dev))) 408 goto subsys_unregister; 409 410 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) { 411 if ((rc = device_create_file(dev, cmm_attrs[i]))) 412 goto fail; 413 } 414 415 if (!simulate) 416 return 0; 417 rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr); 418 if (rc) 419 goto fail; 420 return 0; 421 422 fail: 423 while (--i >= 0) 424 device_remove_file(dev, cmm_attrs[i]); 425 device_unregister(dev); 426 subsys_unregister: 427 bus_unregister(&cmm_subsys); 428 return rc; 429 } 430 431 /** 432 * cmm_unregister_sysfs - Unregister from sysfs 433 * 434 **/ 435 static void cmm_unregister_sysfs(struct device *dev) 436 { 437 int i; 438 439 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) 440 device_remove_file(dev, cmm_attrs[i]); 441 device_unregister(dev); 442 bus_unregister(&cmm_subsys); 443 } 444 445 /** 446 * cmm_reboot_notifier - Make sure pages are not still marked as "loaned" 447 * 448 **/ 449 static int cmm_reboot_notifier(struct notifier_block *nb, 450 unsigned long action, void *unused) 451 { 452 if (action == SYS_RESTART) { 453 if (cmm_thread_ptr) 454 kthread_stop(cmm_thread_ptr); 455 cmm_thread_ptr = NULL; 456 cmm_free_pages(atomic_long_read(&loaned_pages)); 457 } 458 return NOTIFY_DONE; 459 } 460 461 static struct notifier_block cmm_reboot_nb = { 462 .notifier_call = cmm_reboot_notifier, 463 }; 464 465 /** 466 * cmm_memory_cb - Handle memory hotplug notifier calls 467 * @self: notifier block struct 468 * @action: action to take 469 * @arg: struct memory_notify data for handler 470 * 471 * Return value: 472 * NOTIFY_OK or notifier error based on subfunction return value 473 * 474 **/ 475 static int cmm_memory_cb(struct notifier_block *self, 476 unsigned long action, void *arg) 477 { 478 switch (action) { 479 case MEM_GOING_OFFLINE: 480 mutex_lock(&hotplug_mutex); 481 hotplug_occurred = 1; 482 break; 483 case MEM_OFFLINE: 484 case MEM_CANCEL_OFFLINE: 485 mutex_unlock(&hotplug_mutex); 486 cmm_dbg("Memory offline operation complete.\n"); 487 break; 488 case MEM_GOING_ONLINE: 489 case MEM_ONLINE: 490 case MEM_CANCEL_ONLINE: 491 break; 492 } 493 494 return NOTIFY_OK; 495 } 496 497 static struct notifier_block cmm_mem_nb = { 498 .notifier_call = cmm_memory_cb, 499 .priority = CMM_MEM_HOTPLUG_PRI 500 }; 501 502 #ifdef CONFIG_BALLOON_COMPACTION 503 static struct vfsmount *balloon_mnt; 504 505 static int cmm_init_fs_context(struct fs_context *fc) 506 { 507 return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM; 508 } 509 510 static struct file_system_type balloon_fs = { 511 .name = "ppc-cmm", 512 .init_fs_context = cmm_init_fs_context, 513 .kill_sb = kill_anon_super, 514 }; 515 516 static int cmm_migratepage(struct balloon_dev_info *b_dev_info, 517 struct page *newpage, struct page *page, 518 enum migrate_mode mode) 519 { 520 unsigned long flags; 521 522 /* 523 * loan/"inflate" the newpage first. 524 * 525 * We might race against the cmm_thread who might discover after our 526 * loan request that another page is to be unloaned. However, once 527 * the cmm_thread runs again later, this error will automatically 528 * be corrected. 529 */ 530 if (plpar_page_set_loaned(newpage)) { 531 /* Unlikely, but possible. Tell the caller not to retry now. */ 532 pr_err_ratelimited("%s: Cannot set page to loaned.", __func__); 533 return -EBUSY; 534 } 535 536 /* balloon page list reference */ 537 get_page(newpage); 538 539 /* 540 * When we migrate a page to a different zone, we have to fixup the 541 * count of both involved zones as we adjusted the managed page count 542 * when inflating. 543 */ 544 if (page_zone(page) != page_zone(newpage)) { 545 adjust_managed_page_count(page, 1); 546 adjust_managed_page_count(newpage, -1); 547 } 548 549 spin_lock_irqsave(&b_dev_info->pages_lock, flags); 550 balloon_page_insert(b_dev_info, newpage); 551 balloon_page_delete(page); 552 b_dev_info->isolated_pages--; 553 spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); 554 555 /* 556 * activate/"deflate" the old page. We ignore any errors just like the 557 * other callers. 558 */ 559 plpar_page_set_active(page); 560 561 /* balloon page list reference */ 562 put_page(page); 563 564 return MIGRATEPAGE_SUCCESS; 565 } 566 567 static int cmm_balloon_compaction_init(void) 568 { 569 int rc; 570 571 balloon_devinfo_init(&b_dev_info); 572 b_dev_info.migratepage = cmm_migratepage; 573 574 balloon_mnt = kern_mount(&balloon_fs); 575 if (IS_ERR(balloon_mnt)) { 576 rc = PTR_ERR(balloon_mnt); 577 balloon_mnt = NULL; 578 return rc; 579 } 580 581 b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); 582 if (IS_ERR(b_dev_info.inode)) { 583 rc = PTR_ERR(b_dev_info.inode); 584 b_dev_info.inode = NULL; 585 kern_unmount(balloon_mnt); 586 balloon_mnt = NULL; 587 return rc; 588 } 589 590 b_dev_info.inode->i_mapping->a_ops = &balloon_aops; 591 return 0; 592 } 593 static void cmm_balloon_compaction_deinit(void) 594 { 595 if (b_dev_info.inode) 596 iput(b_dev_info.inode); 597 b_dev_info.inode = NULL; 598 kern_unmount(balloon_mnt); 599 balloon_mnt = NULL; 600 } 601 #else /* CONFIG_BALLOON_COMPACTION */ 602 static int cmm_balloon_compaction_init(void) 603 { 604 return 0; 605 } 606 607 static void cmm_balloon_compaction_deinit(void) 608 { 609 } 610 #endif /* CONFIG_BALLOON_COMPACTION */ 611 612 /** 613 * cmm_init - Module initialization 614 * 615 * Return value: 616 * 0 on success / other on failure 617 **/ 618 static int cmm_init(void) 619 { 620 int rc; 621 622 if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate) 623 return -EOPNOTSUPP; 624 625 rc = cmm_balloon_compaction_init(); 626 if (rc) 627 return rc; 628 629 rc = register_oom_notifier(&cmm_oom_nb); 630 if (rc < 0) 631 goto out_balloon_compaction; 632 633 if ((rc = register_reboot_notifier(&cmm_reboot_nb))) 634 goto out_oom_notifier; 635 636 if ((rc = cmm_sysfs_register(&cmm_dev))) 637 goto out_reboot_notifier; 638 639 rc = register_memory_notifier(&cmm_mem_nb); 640 if (rc) 641 goto out_unregister_notifier; 642 643 if (cmm_disabled) 644 return 0; 645 646 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); 647 if (IS_ERR(cmm_thread_ptr)) { 648 rc = PTR_ERR(cmm_thread_ptr); 649 goto out_unregister_notifier; 650 } 651 652 return 0; 653 out_unregister_notifier: 654 unregister_memory_notifier(&cmm_mem_nb); 655 cmm_unregister_sysfs(&cmm_dev); 656 out_reboot_notifier: 657 unregister_reboot_notifier(&cmm_reboot_nb); 658 out_oom_notifier: 659 unregister_oom_notifier(&cmm_oom_nb); 660 out_balloon_compaction: 661 cmm_balloon_compaction_deinit(); 662 return rc; 663 } 664 665 /** 666 * cmm_exit - Module exit 667 * 668 * Return value: 669 * nothing 670 **/ 671 static void cmm_exit(void) 672 { 673 if (cmm_thread_ptr) 674 kthread_stop(cmm_thread_ptr); 675 unregister_oom_notifier(&cmm_oom_nb); 676 unregister_reboot_notifier(&cmm_reboot_nb); 677 unregister_memory_notifier(&cmm_mem_nb); 678 cmm_free_pages(atomic_long_read(&loaned_pages)); 679 cmm_unregister_sysfs(&cmm_dev); 680 cmm_balloon_compaction_deinit(); 681 } 682 683 /** 684 * cmm_set_disable - Disable/Enable CMM 685 * 686 * Return value: 687 * 0 on success / other on failure 688 **/ 689 static int cmm_set_disable(const char *val, const struct kernel_param *kp) 690 { 691 int disable = simple_strtoul(val, NULL, 10); 692 693 if (disable != 0 && disable != 1) 694 return -EINVAL; 695 696 if (disable && !cmm_disabled) { 697 if (cmm_thread_ptr) 698 kthread_stop(cmm_thread_ptr); 699 cmm_thread_ptr = NULL; 700 cmm_free_pages(atomic_long_read(&loaned_pages)); 701 } else if (!disable && cmm_disabled) { 702 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); 703 if (IS_ERR(cmm_thread_ptr)) 704 return PTR_ERR(cmm_thread_ptr); 705 } 706 707 cmm_disabled = disable; 708 return 0; 709 } 710 711 module_param_call(disable, cmm_set_disable, param_get_uint, 712 &cmm_disabled, 0644); 713 MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. " 714 "[Default=" __stringify(CMM_DISABLE) "]"); 715 716 module_init(cmm_init); 717 module_exit(cmm_exit); 718