1 /* 2 * Collaborative memory management interface. 3 * 4 * Copyright (C) 2008 IBM Corporation 5 * Author(s): Brian King (brking@linux.vnet.ibm.com), 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 */ 22 23 #include <linux/ctype.h> 24 #include <linux/delay.h> 25 #include <linux/errno.h> 26 #include <linux/fs.h> 27 #include <linux/init.h> 28 #include <linux/kthread.h> 29 #include <linux/module.h> 30 #include <linux/oom.h> 31 #include <linux/reboot.h> 32 #include <linux/sched.h> 33 #include <linux/stringify.h> 34 #include <linux/swap.h> 35 #include <linux/sysdev.h> 36 #include <asm/firmware.h> 37 #include <asm/hvcall.h> 38 #include <asm/mmu.h> 39 #include <asm/pgalloc.h> 40 #include <asm/uaccess.h> 41 42 #include "plpar_wrappers.h" 43 44 #define CMM_DRIVER_VERSION "1.0.0" 45 #define CMM_DEFAULT_DELAY 1 46 #define CMM_DEBUG 0 47 #define CMM_DISABLE 0 48 #define CMM_OOM_KB 1024 49 #define CMM_MIN_MEM_MB 256 50 #define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) 51 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) 52 53 static unsigned int delay = CMM_DEFAULT_DELAY; 54 static unsigned int oom_kb = CMM_OOM_KB; 55 static unsigned int cmm_debug = CMM_DEBUG; 56 static unsigned int cmm_disabled = CMM_DISABLE; 57 static unsigned long min_mem_mb = CMM_MIN_MEM_MB; 58 static struct sys_device cmm_sysdev; 59 60 MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>"); 61 MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager"); 62 MODULE_LICENSE("GPL"); 63 MODULE_VERSION(CMM_DRIVER_VERSION); 64 65 module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR); 66 MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " 67 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); 68 module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR); 69 MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " 70 "[Default=" __stringify(CMM_OOM_KB) "]"); 71 module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR); 72 MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. " 73 "[Default=" __stringify(CMM_MIN_MEM_MB) "]"); 74 module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR); 75 MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. " 76 "[Default=" __stringify(CMM_DEBUG) "]"); 77 78 #define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long)) 79 80 #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); } 81 82 struct cmm_page_array { 83 struct cmm_page_array *next; 84 unsigned long index; 85 unsigned long page[CMM_NR_PAGES]; 86 }; 87 88 static unsigned long loaned_pages; 89 static unsigned long loaned_pages_target; 90 static unsigned long oom_freed_pages; 91 92 static struct cmm_page_array *cmm_page_list; 93 static DEFINE_SPINLOCK(cmm_lock); 94 95 static struct task_struct *cmm_thread_ptr; 96 97 /** 98 * cmm_alloc_pages - Allocate pages and mark them as loaned 99 * @nr: number of pages to allocate 100 * 101 * Return value: 102 * number of pages requested to be allocated which were not 103 **/ 104 static long cmm_alloc_pages(long nr) 105 { 106 struct cmm_page_array *pa, *npa; 107 unsigned long addr; 108 long rc; 109 110 cmm_dbg("Begin request for %ld pages\n", nr); 111 112 while (nr) { 113 addr = __get_free_page(GFP_NOIO | __GFP_NOWARN | 114 __GFP_NORETRY | __GFP_NOMEMALLOC); 115 if (!addr) 116 break; 117 spin_lock(&cmm_lock); 118 pa = cmm_page_list; 119 if (!pa || pa->index >= CMM_NR_PAGES) { 120 /* Need a new page for the page list. */ 121 spin_unlock(&cmm_lock); 122 npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN | 123 __GFP_NORETRY | __GFP_NOMEMALLOC); 124 if (!npa) { 125 pr_info("%s: Can not allocate new page list\n", __func__); 126 free_page(addr); 127 break; 128 } 129 spin_lock(&cmm_lock); 130 pa = cmm_page_list; 131 132 if (!pa || pa->index >= CMM_NR_PAGES) { 133 npa->next = pa; 134 npa->index = 0; 135 pa = npa; 136 cmm_page_list = pa; 137 } else 138 free_page((unsigned long) npa); 139 } 140 141 if ((rc = plpar_page_set_loaned(__pa(addr)))) { 142 pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc); 143 spin_unlock(&cmm_lock); 144 free_page(addr); 145 break; 146 } 147 148 pa->page[pa->index++] = addr; 149 loaned_pages++; 150 totalram_pages--; 151 spin_unlock(&cmm_lock); 152 nr--; 153 } 154 155 cmm_dbg("End request with %ld pages unfulfilled\n", nr); 156 return nr; 157 } 158 159 /** 160 * cmm_free_pages - Free pages and mark them as active 161 * @nr: number of pages to free 162 * 163 * Return value: 164 * number of pages requested to be freed which were not 165 **/ 166 static long cmm_free_pages(long nr) 167 { 168 struct cmm_page_array *pa; 169 unsigned long addr; 170 171 cmm_dbg("Begin free of %ld pages.\n", nr); 172 spin_lock(&cmm_lock); 173 pa = cmm_page_list; 174 while (nr) { 175 if (!pa || pa->index <= 0) 176 break; 177 addr = pa->page[--pa->index]; 178 179 if (pa->index == 0) { 180 pa = pa->next; 181 free_page((unsigned long) cmm_page_list); 182 cmm_page_list = pa; 183 } 184 185 plpar_page_set_active(__pa(addr)); 186 free_page(addr); 187 loaned_pages--; 188 nr--; 189 totalram_pages++; 190 } 191 spin_unlock(&cmm_lock); 192 cmm_dbg("End request with %ld pages unfulfilled\n", nr); 193 return nr; 194 } 195 196 /** 197 * cmm_oom_notify - OOM notifier 198 * @self: notifier block struct 199 * @dummy: not used 200 * @parm: returned - number of pages freed 201 * 202 * Return value: 203 * NOTIFY_OK 204 **/ 205 static int cmm_oom_notify(struct notifier_block *self, 206 unsigned long dummy, void *parm) 207 { 208 unsigned long *freed = parm; 209 long nr = KB2PAGES(oom_kb); 210 211 cmm_dbg("OOM processing started\n"); 212 nr = cmm_free_pages(nr); 213 loaned_pages_target = loaned_pages; 214 *freed += KB2PAGES(oom_kb) - nr; 215 oom_freed_pages += KB2PAGES(oom_kb) - nr; 216 cmm_dbg("OOM processing complete\n"); 217 return NOTIFY_OK; 218 } 219 220 /** 221 * cmm_get_mpp - Read memory performance parameters 222 * 223 * Makes hcall to query the current page loan request from the hypervisor. 224 * 225 * Return value: 226 * nothing 227 **/ 228 static void cmm_get_mpp(void) 229 { 230 int rc; 231 struct hvcall_mpp_data mpp_data; 232 unsigned long active_pages_target; 233 signed long page_loan_request; 234 235 rc = h_get_mpp(&mpp_data); 236 237 if (rc != H_SUCCESS) 238 return; 239 240 page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE); 241 loaned_pages_target = page_loan_request + loaned_pages; 242 if (loaned_pages_target > oom_freed_pages) 243 loaned_pages_target -= oom_freed_pages; 244 else 245 loaned_pages_target = 0; 246 247 active_pages_target = totalram_pages + loaned_pages - loaned_pages_target; 248 249 if ((min_mem_mb * 1024 * 1024) > (active_pages_target * PAGE_SIZE)) 250 loaned_pages_target = totalram_pages + loaned_pages - 251 ((min_mem_mb * 1024 * 1024) / PAGE_SIZE); 252 253 cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n", 254 page_loan_request, loaned_pages, loaned_pages_target, 255 oom_freed_pages, totalram_pages); 256 } 257 258 static struct notifier_block cmm_oom_nb = { 259 .notifier_call = cmm_oom_notify 260 }; 261 262 /** 263 * cmm_thread - CMM task thread 264 * @dummy: not used 265 * 266 * Return value: 267 * 0 268 **/ 269 static int cmm_thread(void *dummy) 270 { 271 unsigned long timeleft; 272 273 while (1) { 274 timeleft = msleep_interruptible(delay * 1000); 275 276 if (kthread_should_stop() || timeleft) { 277 loaned_pages_target = loaned_pages; 278 break; 279 } 280 281 cmm_get_mpp(); 282 283 if (loaned_pages_target > loaned_pages) { 284 if (cmm_alloc_pages(loaned_pages_target - loaned_pages)) 285 loaned_pages_target = loaned_pages; 286 } else if (loaned_pages_target < loaned_pages) 287 cmm_free_pages(loaned_pages - loaned_pages_target); 288 } 289 return 0; 290 } 291 292 #define CMM_SHOW(name, format, args...) \ 293 static ssize_t show_##name(struct sys_device *dev, \ 294 struct sysdev_attribute *attr, \ 295 char *buf) \ 296 { \ 297 return sprintf(buf, format, ##args); \ 298 } \ 299 static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) 300 301 CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages)); 302 CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target)); 303 304 static ssize_t show_oom_pages(struct sys_device *dev, 305 struct sysdev_attribute *attr, char *buf) 306 { 307 return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages)); 308 } 309 310 static ssize_t store_oom_pages(struct sys_device *dev, 311 struct sysdev_attribute *attr, 312 const char *buf, size_t count) 313 { 314 unsigned long val = simple_strtoul (buf, NULL, 10); 315 316 if (!capable(CAP_SYS_ADMIN)) 317 return -EPERM; 318 if (val != 0) 319 return -EBADMSG; 320 321 oom_freed_pages = 0; 322 return count; 323 } 324 325 static SYSDEV_ATTR(oom_freed_kb, S_IWUSR| S_IRUGO, 326 show_oom_pages, store_oom_pages); 327 328 static struct sysdev_attribute *cmm_attrs[] = { 329 &attr_loaned_kb, 330 &attr_loaned_target_kb, 331 &attr_oom_freed_kb, 332 }; 333 334 static struct sysdev_class cmm_sysdev_class = { 335 .name = "cmm", 336 }; 337 338 /** 339 * cmm_sysfs_register - Register with sysfs 340 * 341 * Return value: 342 * 0 on success / other on failure 343 **/ 344 static int cmm_sysfs_register(struct sys_device *sysdev) 345 { 346 int i, rc; 347 348 if ((rc = sysdev_class_register(&cmm_sysdev_class))) 349 return rc; 350 351 sysdev->id = 0; 352 sysdev->cls = &cmm_sysdev_class; 353 354 if ((rc = sysdev_register(sysdev))) 355 goto class_unregister; 356 357 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) { 358 if ((rc = sysdev_create_file(sysdev, cmm_attrs[i]))) 359 goto fail; 360 } 361 362 return 0; 363 364 fail: 365 while (--i >= 0) 366 sysdev_remove_file(sysdev, cmm_attrs[i]); 367 sysdev_unregister(sysdev); 368 class_unregister: 369 sysdev_class_unregister(&cmm_sysdev_class); 370 return rc; 371 } 372 373 /** 374 * cmm_unregister_sysfs - Unregister from sysfs 375 * 376 **/ 377 static void cmm_unregister_sysfs(struct sys_device *sysdev) 378 { 379 int i; 380 381 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) 382 sysdev_remove_file(sysdev, cmm_attrs[i]); 383 sysdev_unregister(sysdev); 384 sysdev_class_unregister(&cmm_sysdev_class); 385 } 386 387 /** 388 * cmm_reboot_notifier - Make sure pages are not still marked as "loaned" 389 * 390 **/ 391 static int cmm_reboot_notifier(struct notifier_block *nb, 392 unsigned long action, void *unused) 393 { 394 if (action == SYS_RESTART) { 395 if (cmm_thread_ptr) 396 kthread_stop(cmm_thread_ptr); 397 cmm_thread_ptr = NULL; 398 cmm_free_pages(loaned_pages); 399 } 400 return NOTIFY_DONE; 401 } 402 403 static struct notifier_block cmm_reboot_nb = { 404 .notifier_call = cmm_reboot_notifier, 405 }; 406 407 /** 408 * cmm_init - Module initialization 409 * 410 * Return value: 411 * 0 on success / other on failure 412 **/ 413 static int cmm_init(void) 414 { 415 int rc = -ENOMEM; 416 417 if (!firmware_has_feature(FW_FEATURE_CMO)) 418 return -EOPNOTSUPP; 419 420 if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0) 421 return rc; 422 423 if ((rc = register_reboot_notifier(&cmm_reboot_nb))) 424 goto out_oom_notifier; 425 426 if ((rc = cmm_sysfs_register(&cmm_sysdev))) 427 goto out_reboot_notifier; 428 429 if (cmm_disabled) 430 return rc; 431 432 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); 433 if (IS_ERR(cmm_thread_ptr)) { 434 rc = PTR_ERR(cmm_thread_ptr); 435 goto out_unregister_sysfs; 436 } 437 438 return rc; 439 440 out_unregister_sysfs: 441 cmm_unregister_sysfs(&cmm_sysdev); 442 out_reboot_notifier: 443 unregister_reboot_notifier(&cmm_reboot_nb); 444 out_oom_notifier: 445 unregister_oom_notifier(&cmm_oom_nb); 446 return rc; 447 } 448 449 /** 450 * cmm_exit - Module exit 451 * 452 * Return value: 453 * nothing 454 **/ 455 static void cmm_exit(void) 456 { 457 if (cmm_thread_ptr) 458 kthread_stop(cmm_thread_ptr); 459 unregister_oom_notifier(&cmm_oom_nb); 460 unregister_reboot_notifier(&cmm_reboot_nb); 461 cmm_free_pages(loaned_pages); 462 cmm_unregister_sysfs(&cmm_sysdev); 463 } 464 465 /** 466 * cmm_set_disable - Disable/Enable CMM 467 * 468 * Return value: 469 * 0 on success / other on failure 470 **/ 471 static int cmm_set_disable(const char *val, struct kernel_param *kp) 472 { 473 int disable = simple_strtoul(val, NULL, 10); 474 475 if (disable != 0 && disable != 1) 476 return -EINVAL; 477 478 if (disable && !cmm_disabled) { 479 if (cmm_thread_ptr) 480 kthread_stop(cmm_thread_ptr); 481 cmm_thread_ptr = NULL; 482 cmm_free_pages(loaned_pages); 483 } else if (!disable && cmm_disabled) { 484 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); 485 if (IS_ERR(cmm_thread_ptr)) 486 return PTR_ERR(cmm_thread_ptr); 487 } 488 489 cmm_disabled = disable; 490 return 0; 491 } 492 493 module_param_call(disable, cmm_set_disable, param_get_uint, 494 &cmm_disabled, S_IRUGO | S_IWUSR); 495 MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. " 496 "[Default=" __stringify(CMM_DISABLE) "]"); 497 498 module_init(cmm_init); 499 module_exit(cmm_exit); 500