xref: /openbmc/linux/arch/powerpc/platforms/pseries/cmm.c (revision e4dca7b7aa08b22893c45485d222b5807c1375ae)
184af458bSBrian King /*
284af458bSBrian King  * Collaborative memory management interface.
384af458bSBrian King  *
484af458bSBrian King  * Copyright (C) 2008 IBM Corporation
584af458bSBrian King  * Author(s): Brian King (brking@linux.vnet.ibm.com),
684af458bSBrian King  *
784af458bSBrian King  * This program is free software; you can redistribute it and/or modify
884af458bSBrian King  * it under the terms of the GNU General Public License as published by
984af458bSBrian King  * the Free Software Foundation; either version 2 of the License, or
1084af458bSBrian King  * (at your option) any later version.
1184af458bSBrian King  *
1284af458bSBrian King  * This program is distributed in the hope that it will be useful,
1384af458bSBrian King  * but WITHOUT ANY WARRANTY; without even the implied warranty of
1484af458bSBrian King  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1584af458bSBrian King  * GNU General Public License for more details.
1684af458bSBrian King  *
1784af458bSBrian King  * You should have received a copy of the GNU General Public License
1884af458bSBrian King  * along with this program; if not, write to the Free Software
1984af458bSBrian King  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
2084af458bSBrian King  *
2184af458bSBrian King  */
2284af458bSBrian King 
2384af458bSBrian King #include <linux/ctype.h>
2484af458bSBrian King #include <linux/delay.h>
2584af458bSBrian King #include <linux/errno.h>
2684af458bSBrian King #include <linux/fs.h>
275a0e3ad6STejun Heo #include <linux/gfp.h>
2884af458bSBrian King #include <linux/kthread.h>
2984af458bSBrian King #include <linux/module.h>
3084af458bSBrian King #include <linux/oom.h>
31fecba962SBrian King #include <linux/reboot.h>
3284af458bSBrian King #include <linux/sched.h>
3384af458bSBrian King #include <linux/stringify.h>
3484af458bSBrian King #include <linux/swap.h>
356c9d2909SKay Sievers #include <linux/device.h>
3684af458bSBrian King #include <asm/firmware.h>
3784af458bSBrian King #include <asm/hvcall.h>
3884af458bSBrian King #include <asm/mmu.h>
3984af458bSBrian King #include <asm/pgalloc.h>
407c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
4114b8a76bSRobert Jennings #include <linux/memory.h>
42212bebb4SDeepthi Dharwar #include <asm/plpar_wrappers.h>
4384af458bSBrian King 
448f272a5dSMichael Ellerman #include "pseries.h"
458f272a5dSMichael Ellerman 
4684af458bSBrian King #define CMM_DRIVER_VERSION	"1.0.0"
4784af458bSBrian King #define CMM_DEFAULT_DELAY	1
4814b8a76bSRobert Jennings #define CMM_HOTPLUG_DELAY	5
4984af458bSBrian King #define CMM_DEBUG			0
5084af458bSBrian King #define CMM_DISABLE		0
5184af458bSBrian King #define CMM_OOM_KB		1024
5284af458bSBrian King #define CMM_MIN_MEM_MB		256
5384af458bSBrian King #define KB2PAGES(_p)		((_p)>>(PAGE_SHIFT-10))
5484af458bSBrian King #define PAGES2KB(_p)		((_p)<<(PAGE_SHIFT-10))
5514b8a76bSRobert Jennings /*
5614b8a76bSRobert Jennings  * The priority level tries to ensure that this notifier is called as
5714b8a76bSRobert Jennings  * late as possible to reduce thrashing in the shared memory pool.
5814b8a76bSRobert Jennings  */
5914b8a76bSRobert Jennings #define CMM_MEM_HOTPLUG_PRI	1
6014b8a76bSRobert Jennings #define CMM_MEM_ISOLATE_PRI	15
6184af458bSBrian King 
6284af458bSBrian King static unsigned int delay = CMM_DEFAULT_DELAY;
6314b8a76bSRobert Jennings static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
6484af458bSBrian King static unsigned int oom_kb = CMM_OOM_KB;
6584af458bSBrian King static unsigned int cmm_debug = CMM_DEBUG;
6684af458bSBrian King static unsigned int cmm_disabled = CMM_DISABLE;
6784af458bSBrian King static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
686c9d2909SKay Sievers static struct device cmm_dev;
6984af458bSBrian King 
7084af458bSBrian King MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
7184af458bSBrian King MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
7284af458bSBrian King MODULE_LICENSE("GPL");
7384af458bSBrian King MODULE_VERSION(CMM_DRIVER_VERSION);
7484af458bSBrian King 
7584af458bSBrian King module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
7684af458bSBrian King MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
7784af458bSBrian King 		 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
7814b8a76bSRobert Jennings module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR);
79b0b5a765SWei Yongjun MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
8014b8a76bSRobert Jennings 		 "before loaning resumes. "
8114b8a76bSRobert Jennings 		 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
8284af458bSBrian King module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
8384af458bSBrian King MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
8484af458bSBrian King 		 "[Default=" __stringify(CMM_OOM_KB) "]");
8584af458bSBrian King module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
8684af458bSBrian King MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
8784af458bSBrian King 		 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
8884af458bSBrian King module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
8984af458bSBrian King MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
9084af458bSBrian King 		 "[Default=" __stringify(CMM_DEBUG) "]");
9184af458bSBrian King 
9284af458bSBrian King #define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
9384af458bSBrian King 
9484af458bSBrian King #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
9584af458bSBrian King 
9684af458bSBrian King struct cmm_page_array {
9784af458bSBrian King 	struct cmm_page_array *next;
9884af458bSBrian King 	unsigned long index;
9984af458bSBrian King 	unsigned long page[CMM_NR_PAGES];
10084af458bSBrian King };
10184af458bSBrian King 
10284af458bSBrian King static unsigned long loaned_pages;
10384af458bSBrian King static unsigned long loaned_pages_target;
10484af458bSBrian King static unsigned long oom_freed_pages;
10584af458bSBrian King 
10684af458bSBrian King static struct cmm_page_array *cmm_page_list;
10784af458bSBrian King static DEFINE_SPINLOCK(cmm_lock);
10884af458bSBrian King 
10914b8a76bSRobert Jennings static DEFINE_MUTEX(hotplug_mutex);
11014b8a76bSRobert Jennings static int hotplug_occurred; /* protected by the hotplug mutex */
11114b8a76bSRobert Jennings 
11284af458bSBrian King static struct task_struct *cmm_thread_ptr;
11384af458bSBrian King 
1148f272a5dSMichael Ellerman static long plpar_page_set_loaned(unsigned long vpa)
1158f272a5dSMichael Ellerman {
1168f272a5dSMichael Ellerman 	unsigned long cmo_page_sz = cmo_get_page_size();
1178f272a5dSMichael Ellerman 	long rc = 0;
1188f272a5dSMichael Ellerman 	int i;
1198f272a5dSMichael Ellerman 
1208f272a5dSMichael Ellerman 	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
1218f272a5dSMichael Ellerman 		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
1228f272a5dSMichael Ellerman 
1238f272a5dSMichael Ellerman 	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
1248f272a5dSMichael Ellerman 		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE,
1258f272a5dSMichael Ellerman 				   vpa + i - cmo_page_sz, 0);
1268f272a5dSMichael Ellerman 
1278f272a5dSMichael Ellerman 	return rc;
1288f272a5dSMichael Ellerman }
1298f272a5dSMichael Ellerman 
1308f272a5dSMichael Ellerman static long plpar_page_set_active(unsigned long vpa)
1318f272a5dSMichael Ellerman {
1328f272a5dSMichael Ellerman 	unsigned long cmo_page_sz = cmo_get_page_size();
1338f272a5dSMichael Ellerman 	long rc = 0;
1348f272a5dSMichael Ellerman 	int i;
1358f272a5dSMichael Ellerman 
1368f272a5dSMichael Ellerman 	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
1378f272a5dSMichael Ellerman 		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
1388f272a5dSMichael Ellerman 
1398f272a5dSMichael Ellerman 	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
1408f272a5dSMichael Ellerman 		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED,
1418f272a5dSMichael Ellerman 				   vpa + i - cmo_page_sz, 0);
1428f272a5dSMichael Ellerman 
1438f272a5dSMichael Ellerman 	return rc;
1448f272a5dSMichael Ellerman }
1458f272a5dSMichael Ellerman 
14684af458bSBrian King /**
14784af458bSBrian King  * cmm_alloc_pages - Allocate pages and mark them as loaned
14884af458bSBrian King  * @nr:	number of pages to allocate
14984af458bSBrian King  *
15084af458bSBrian King  * Return value:
15184af458bSBrian King  * 	number of pages requested to be allocated which were not
15284af458bSBrian King  **/
15384af458bSBrian King static long cmm_alloc_pages(long nr)
15484af458bSBrian King {
15584af458bSBrian King 	struct cmm_page_array *pa, *npa;
15684af458bSBrian King 	unsigned long addr;
15784af458bSBrian King 	long rc;
15884af458bSBrian King 
15984af458bSBrian King 	cmm_dbg("Begin request for %ld pages\n", nr);
16084af458bSBrian King 
16184af458bSBrian King 	while (nr) {
16214b8a76bSRobert Jennings 		/* Exit if a hotplug operation is in progress or occurred */
16314b8a76bSRobert Jennings 		if (mutex_trylock(&hotplug_mutex)) {
16414b8a76bSRobert Jennings 			if (hotplug_occurred) {
16514b8a76bSRobert Jennings 				mutex_unlock(&hotplug_mutex);
16614b8a76bSRobert Jennings 				break;
16714b8a76bSRobert Jennings 			}
16814b8a76bSRobert Jennings 			mutex_unlock(&hotplug_mutex);
16914b8a76bSRobert Jennings 		} else {
17014b8a76bSRobert Jennings 			break;
17114b8a76bSRobert Jennings 		}
17214b8a76bSRobert Jennings 
17384af458bSBrian King 		addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
17484af458bSBrian King 				       __GFP_NORETRY | __GFP_NOMEMALLOC);
17584af458bSBrian King 		if (!addr)
17684af458bSBrian King 			break;
17784af458bSBrian King 		spin_lock(&cmm_lock);
17884af458bSBrian King 		pa = cmm_page_list;
17984af458bSBrian King 		if (!pa || pa->index >= CMM_NR_PAGES) {
18084af458bSBrian King 			/* Need a new page for the page list. */
18184af458bSBrian King 			spin_unlock(&cmm_lock);
18214b8a76bSRobert Jennings 			npa = (struct cmm_page_array *)__get_free_page(
18314b8a76bSRobert Jennings 					GFP_NOIO | __GFP_NOWARN |
18484af458bSBrian King 					__GFP_NORETRY | __GFP_NOMEMALLOC);
18584af458bSBrian King 			if (!npa) {
1865df72bf3SHarvey Harrison 				pr_info("%s: Can not allocate new page list\n", __func__);
18784af458bSBrian King 				free_page(addr);
18884af458bSBrian King 				break;
18984af458bSBrian King 			}
19084af458bSBrian King 			spin_lock(&cmm_lock);
19184af458bSBrian King 			pa = cmm_page_list;
19284af458bSBrian King 
19384af458bSBrian King 			if (!pa || pa->index >= CMM_NR_PAGES) {
19484af458bSBrian King 				npa->next = pa;
19584af458bSBrian King 				npa->index = 0;
19684af458bSBrian King 				pa = npa;
19784af458bSBrian King 				cmm_page_list = pa;
19884af458bSBrian King 			} else
19984af458bSBrian King 				free_page((unsigned long) npa);
20084af458bSBrian King 		}
20184af458bSBrian King 
20284af458bSBrian King 		if ((rc = plpar_page_set_loaned(__pa(addr)))) {
2035df72bf3SHarvey Harrison 			pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
20484af458bSBrian King 			spin_unlock(&cmm_lock);
20584af458bSBrian King 			free_page(addr);
20684af458bSBrian King 			break;
20784af458bSBrian King 		}
20884af458bSBrian King 
20984af458bSBrian King 		pa->page[pa->index++] = addr;
21084af458bSBrian King 		loaned_pages++;
21184af458bSBrian King 		totalram_pages--;
21284af458bSBrian King 		spin_unlock(&cmm_lock);
21384af458bSBrian King 		nr--;
21484af458bSBrian King 	}
21584af458bSBrian King 
21684af458bSBrian King 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
21784af458bSBrian King 	return nr;
21884af458bSBrian King }
21984af458bSBrian King 
22084af458bSBrian King /**
22184af458bSBrian King  * cmm_free_pages - Free pages and mark them as active
22284af458bSBrian King  * @nr:	number of pages to free
22384af458bSBrian King  *
22484af458bSBrian King  * Return value:
22584af458bSBrian King  * 	number of pages requested to be freed which were not
22684af458bSBrian King  **/
22784af458bSBrian King static long cmm_free_pages(long nr)
22884af458bSBrian King {
22984af458bSBrian King 	struct cmm_page_array *pa;
23084af458bSBrian King 	unsigned long addr;
23184af458bSBrian King 
23284af458bSBrian King 	cmm_dbg("Begin free of %ld pages.\n", nr);
23384af458bSBrian King 	spin_lock(&cmm_lock);
23484af458bSBrian King 	pa = cmm_page_list;
23584af458bSBrian King 	while (nr) {
23684af458bSBrian King 		if (!pa || pa->index <= 0)
23784af458bSBrian King 			break;
23884af458bSBrian King 		addr = pa->page[--pa->index];
23984af458bSBrian King 
24084af458bSBrian King 		if (pa->index == 0) {
24184af458bSBrian King 			pa = pa->next;
24284af458bSBrian King 			free_page((unsigned long) cmm_page_list);
24384af458bSBrian King 			cmm_page_list = pa;
24484af458bSBrian King 		}
24584af458bSBrian King 
24684af458bSBrian King 		plpar_page_set_active(__pa(addr));
24784af458bSBrian King 		free_page(addr);
24884af458bSBrian King 		loaned_pages--;
24984af458bSBrian King 		nr--;
25084af458bSBrian King 		totalram_pages++;
25184af458bSBrian King 	}
25284af458bSBrian King 	spin_unlock(&cmm_lock);
25384af458bSBrian King 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
25484af458bSBrian King 	return nr;
25584af458bSBrian King }
25684af458bSBrian King 
25784af458bSBrian King /**
25884af458bSBrian King  * cmm_oom_notify - OOM notifier
25984af458bSBrian King  * @self:	notifier block struct
26084af458bSBrian King  * @dummy:	not used
26184af458bSBrian King  * @parm:	returned - number of pages freed
26284af458bSBrian King  *
26384af458bSBrian King  * Return value:
26484af458bSBrian King  * 	NOTIFY_OK
26584af458bSBrian King  **/
26684af458bSBrian King static int cmm_oom_notify(struct notifier_block *self,
26784af458bSBrian King 			  unsigned long dummy, void *parm)
26884af458bSBrian King {
26984af458bSBrian King 	unsigned long *freed = parm;
27084af458bSBrian King 	long nr = KB2PAGES(oom_kb);
27184af458bSBrian King 
27284af458bSBrian King 	cmm_dbg("OOM processing started\n");
27384af458bSBrian King 	nr = cmm_free_pages(nr);
27484af458bSBrian King 	loaned_pages_target = loaned_pages;
27584af458bSBrian King 	*freed += KB2PAGES(oom_kb) - nr;
27684af458bSBrian King 	oom_freed_pages += KB2PAGES(oom_kb) - nr;
27784af458bSBrian King 	cmm_dbg("OOM processing complete\n");
27884af458bSBrian King 	return NOTIFY_OK;
27984af458bSBrian King }
28084af458bSBrian King 
28184af458bSBrian King /**
28284af458bSBrian King  * cmm_get_mpp - Read memory performance parameters
28384af458bSBrian King  *
28484af458bSBrian King  * Makes hcall to query the current page loan request from the hypervisor.
28584af458bSBrian King  *
28684af458bSBrian King  * Return value:
28784af458bSBrian King  * 	nothing
28884af458bSBrian King  **/
28984af458bSBrian King static void cmm_get_mpp(void)
29084af458bSBrian King {
29184af458bSBrian King 	int rc;
29284af458bSBrian King 	struct hvcall_mpp_data mpp_data;
2938be8cf5bSBrian King 	signed long active_pages_target, page_loan_request, target;
2948be8cf5bSBrian King 	signed long total_pages = totalram_pages + loaned_pages;
2958be8cf5bSBrian King 	signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
29684af458bSBrian King 
29784af458bSBrian King 	rc = h_get_mpp(&mpp_data);
29884af458bSBrian King 
29984af458bSBrian King 	if (rc != H_SUCCESS)
30084af458bSBrian King 		return;
30184af458bSBrian King 
30284af458bSBrian King 	page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
3038be8cf5bSBrian King 	target = page_loan_request + (signed long)loaned_pages;
3048be8cf5bSBrian King 
3058be8cf5bSBrian King 	if (target < 0 || total_pages < min_mem_pages)
3068be8cf5bSBrian King 		target = 0;
3078be8cf5bSBrian King 
3088be8cf5bSBrian King 	if (target > oom_freed_pages)
3098be8cf5bSBrian King 		target -= oom_freed_pages;
31084af458bSBrian King 	else
3118be8cf5bSBrian King 		target = 0;
31284af458bSBrian King 
3138be8cf5bSBrian King 	active_pages_target = total_pages - target;
31484af458bSBrian King 
3158be8cf5bSBrian King 	if (min_mem_pages > active_pages_target)
3168be8cf5bSBrian King 		target = total_pages - min_mem_pages;
3178be8cf5bSBrian King 
3188be8cf5bSBrian King 	if (target < 0)
3198be8cf5bSBrian King 		target = 0;
3208be8cf5bSBrian King 
3218be8cf5bSBrian King 	loaned_pages_target = target;
32284af458bSBrian King 
32384af458bSBrian King 	cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
32484af458bSBrian King 		page_loan_request, loaned_pages, loaned_pages_target,
32584af458bSBrian King 		oom_freed_pages, totalram_pages);
32684af458bSBrian King }
32784af458bSBrian King 
32884af458bSBrian King static struct notifier_block cmm_oom_nb = {
32984af458bSBrian King 	.notifier_call = cmm_oom_notify
33084af458bSBrian King };
33184af458bSBrian King 
33284af458bSBrian King /**
33384af458bSBrian King  * cmm_thread - CMM task thread
33484af458bSBrian King  * @dummy:	not used
33584af458bSBrian King  *
33684af458bSBrian King  * Return value:
33784af458bSBrian King  * 	0
33884af458bSBrian King  **/
33984af458bSBrian King static int cmm_thread(void *dummy)
34084af458bSBrian King {
34184af458bSBrian King 	unsigned long timeleft;
34284af458bSBrian King 
34384af458bSBrian King 	while (1) {
34484af458bSBrian King 		timeleft = msleep_interruptible(delay * 1000);
34584af458bSBrian King 
34614b8a76bSRobert Jennings 		if (kthread_should_stop() || timeleft)
34784af458bSBrian King 			break;
34814b8a76bSRobert Jennings 
34914b8a76bSRobert Jennings 		if (mutex_trylock(&hotplug_mutex)) {
35014b8a76bSRobert Jennings 			if (hotplug_occurred) {
35114b8a76bSRobert Jennings 				hotplug_occurred = 0;
35214b8a76bSRobert Jennings 				mutex_unlock(&hotplug_mutex);
35314b8a76bSRobert Jennings 				cmm_dbg("Hotplug operation has occurred, "
35414b8a76bSRobert Jennings 						"loaning activity suspended "
35514b8a76bSRobert Jennings 						"for %d seconds.\n",
35614b8a76bSRobert Jennings 						hotplug_delay);
35714b8a76bSRobert Jennings 				timeleft = msleep_interruptible(hotplug_delay *
35814b8a76bSRobert Jennings 						1000);
35914b8a76bSRobert Jennings 				if (kthread_should_stop() || timeleft)
36014b8a76bSRobert Jennings 					break;
36114b8a76bSRobert Jennings 				continue;
36214b8a76bSRobert Jennings 			}
36314b8a76bSRobert Jennings 			mutex_unlock(&hotplug_mutex);
36414b8a76bSRobert Jennings 		} else {
36514b8a76bSRobert Jennings 			cmm_dbg("Hotplug operation in progress, activity "
36614b8a76bSRobert Jennings 					"suspended\n");
36714b8a76bSRobert Jennings 			continue;
36884af458bSBrian King 		}
36984af458bSBrian King 
37084af458bSBrian King 		cmm_get_mpp();
37184af458bSBrian King 
37284af458bSBrian King 		if (loaned_pages_target > loaned_pages) {
37384af458bSBrian King 			if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
37484af458bSBrian King 				loaned_pages_target = loaned_pages;
37584af458bSBrian King 		} else if (loaned_pages_target < loaned_pages)
37684af458bSBrian King 			cmm_free_pages(loaned_pages - loaned_pages_target);
37784af458bSBrian King 	}
37884af458bSBrian King 	return 0;
37984af458bSBrian King }
38084af458bSBrian King 
38184af458bSBrian King #define CMM_SHOW(name, format, args...)			\
3826c9d2909SKay Sievers 	static ssize_t show_##name(struct device *dev,	\
3836c9d2909SKay Sievers 				   struct device_attribute *attr,	\
3843cee67f7SStephen Rothwell 				   char *buf)			\
38584af458bSBrian King 	{							\
38684af458bSBrian King 		return sprintf(buf, format, ##args);		\
38784af458bSBrian King 	}							\
3886c9d2909SKay Sievers 	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
38984af458bSBrian King 
39084af458bSBrian King CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
39184af458bSBrian King CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
39284af458bSBrian King 
3936c9d2909SKay Sievers static ssize_t show_oom_pages(struct device *dev,
3946c9d2909SKay Sievers 			      struct device_attribute *attr, char *buf)
39584af458bSBrian King {
39684af458bSBrian King 	return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
39784af458bSBrian King }
39884af458bSBrian King 
3996c9d2909SKay Sievers static ssize_t store_oom_pages(struct device *dev,
4006c9d2909SKay Sievers 			       struct device_attribute *attr,
40184af458bSBrian King 			       const char *buf, size_t count)
40284af458bSBrian King {
40384af458bSBrian King 	unsigned long val = simple_strtoul (buf, NULL, 10);
40484af458bSBrian King 
40584af458bSBrian King 	if (!capable(CAP_SYS_ADMIN))
40684af458bSBrian King 		return -EPERM;
40784af458bSBrian King 	if (val != 0)
40884af458bSBrian King 		return -EBADMSG;
40984af458bSBrian King 
41084af458bSBrian King 	oom_freed_pages = 0;
41184af458bSBrian King 	return count;
41284af458bSBrian King }
41384af458bSBrian King 
4146c9d2909SKay Sievers static DEVICE_ATTR(oom_freed_kb, S_IWUSR | S_IRUGO,
41584af458bSBrian King 		   show_oom_pages, store_oom_pages);
41684af458bSBrian King 
4176c9d2909SKay Sievers static struct device_attribute *cmm_attrs[] = {
4186c9d2909SKay Sievers 	&dev_attr_loaned_kb,
4196c9d2909SKay Sievers 	&dev_attr_loaned_target_kb,
4206c9d2909SKay Sievers 	&dev_attr_oom_freed_kb,
42184af458bSBrian King };
42284af458bSBrian King 
4236c9d2909SKay Sievers static struct bus_type cmm_subsys = {
42484af458bSBrian King 	.name = "cmm",
4256c9d2909SKay Sievers 	.dev_name = "cmm",
42684af458bSBrian King };
42784af458bSBrian King 
42884af458bSBrian King /**
42984af458bSBrian King  * cmm_sysfs_register - Register with sysfs
43084af458bSBrian King  *
43184af458bSBrian King  * Return value:
43284af458bSBrian King  * 	0 on success / other on failure
43384af458bSBrian King  **/
4346c9d2909SKay Sievers static int cmm_sysfs_register(struct device *dev)
43584af458bSBrian King {
43684af458bSBrian King 	int i, rc;
43784af458bSBrian King 
4386c9d2909SKay Sievers 	if ((rc = subsys_system_register(&cmm_subsys, NULL)))
43984af458bSBrian King 		return rc;
44084af458bSBrian King 
4416c9d2909SKay Sievers 	dev->id = 0;
4426c9d2909SKay Sievers 	dev->bus = &cmm_subsys;
44384af458bSBrian King 
4446c9d2909SKay Sievers 	if ((rc = device_register(dev)))
4456c9d2909SKay Sievers 		goto subsys_unregister;
44684af458bSBrian King 
44784af458bSBrian King 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
4486c9d2909SKay Sievers 		if ((rc = device_create_file(dev, cmm_attrs[i])))
44984af458bSBrian King 			goto fail;
45084af458bSBrian King 	}
45184af458bSBrian King 
45284af458bSBrian King 	return 0;
45384af458bSBrian King 
45484af458bSBrian King fail:
45584af458bSBrian King 	while (--i >= 0)
4566c9d2909SKay Sievers 		device_remove_file(dev, cmm_attrs[i]);
4576c9d2909SKay Sievers 	device_unregister(dev);
4586c9d2909SKay Sievers subsys_unregister:
4596c9d2909SKay Sievers 	bus_unregister(&cmm_subsys);
46084af458bSBrian King 	return rc;
46184af458bSBrian King }
46284af458bSBrian King 
46384af458bSBrian King /**
46484af458bSBrian King  * cmm_unregister_sysfs - Unregister from sysfs
46584af458bSBrian King  *
46684af458bSBrian King  **/
4676c9d2909SKay Sievers static void cmm_unregister_sysfs(struct device *dev)
46884af458bSBrian King {
46984af458bSBrian King 	int i;
47084af458bSBrian King 
47184af458bSBrian King 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
4726c9d2909SKay Sievers 		device_remove_file(dev, cmm_attrs[i]);
4736c9d2909SKay Sievers 	device_unregister(dev);
4746c9d2909SKay Sievers 	bus_unregister(&cmm_subsys);
47584af458bSBrian King }
47684af458bSBrian King 
47784af458bSBrian King /**
478fecba962SBrian King  * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
479fecba962SBrian King  *
480fecba962SBrian King  **/
481fecba962SBrian King static int cmm_reboot_notifier(struct notifier_block *nb,
482fecba962SBrian King 			       unsigned long action, void *unused)
483fecba962SBrian King {
484fecba962SBrian King 	if (action == SYS_RESTART) {
485fecba962SBrian King 		if (cmm_thread_ptr)
486fecba962SBrian King 			kthread_stop(cmm_thread_ptr);
487fecba962SBrian King 		cmm_thread_ptr = NULL;
488fecba962SBrian King 		cmm_free_pages(loaned_pages);
489fecba962SBrian King 	}
490fecba962SBrian King 	return NOTIFY_DONE;
491fecba962SBrian King }
492fecba962SBrian King 
493fecba962SBrian King static struct notifier_block cmm_reboot_nb = {
494fecba962SBrian King 	.notifier_call = cmm_reboot_notifier,
495fecba962SBrian King };
496fecba962SBrian King 
497fecba962SBrian King /**
49814b8a76bSRobert Jennings  * cmm_count_pages - Count the number of pages loaned in a particular range.
49914b8a76bSRobert Jennings  *
50014b8a76bSRobert Jennings  * @arg: memory_isolate_notify structure with address range and count
50114b8a76bSRobert Jennings  *
50214b8a76bSRobert Jennings  * Return value:
50314b8a76bSRobert Jennings  *      0 on success
50414b8a76bSRobert Jennings  **/
50514b8a76bSRobert Jennings static unsigned long cmm_count_pages(void *arg)
50614b8a76bSRobert Jennings {
50714b8a76bSRobert Jennings 	struct memory_isolate_notify *marg = arg;
50814b8a76bSRobert Jennings 	struct cmm_page_array *pa;
50914b8a76bSRobert Jennings 	unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
51014b8a76bSRobert Jennings 	unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
51114b8a76bSRobert Jennings 	unsigned long idx;
51214b8a76bSRobert Jennings 
51314b8a76bSRobert Jennings 	spin_lock(&cmm_lock);
51414b8a76bSRobert Jennings 	pa = cmm_page_list;
51514b8a76bSRobert Jennings 	while (pa) {
51614b8a76bSRobert Jennings 		if ((unsigned long)pa >= start && (unsigned long)pa < end)
51714b8a76bSRobert Jennings 			marg->pages_found++;
51814b8a76bSRobert Jennings 		for (idx = 0; idx < pa->index; idx++)
51914b8a76bSRobert Jennings 			if (pa->page[idx] >= start && pa->page[idx] < end)
52014b8a76bSRobert Jennings 				marg->pages_found++;
52114b8a76bSRobert Jennings 		pa = pa->next;
52214b8a76bSRobert Jennings 	}
52314b8a76bSRobert Jennings 	spin_unlock(&cmm_lock);
52414b8a76bSRobert Jennings 	return 0;
52514b8a76bSRobert Jennings }
52614b8a76bSRobert Jennings 
52714b8a76bSRobert Jennings /**
52814b8a76bSRobert Jennings  * cmm_memory_isolate_cb - Handle memory isolation notifier calls
52914b8a76bSRobert Jennings  * @self:	notifier block struct
53014b8a76bSRobert Jennings  * @action:	action to take
53114b8a76bSRobert Jennings  * @arg:	struct memory_isolate_notify data for handler
53214b8a76bSRobert Jennings  *
53314b8a76bSRobert Jennings  * Return value:
53414b8a76bSRobert Jennings  *	NOTIFY_OK or notifier error based on subfunction return value
53514b8a76bSRobert Jennings  **/
53614b8a76bSRobert Jennings static int cmm_memory_isolate_cb(struct notifier_block *self,
53714b8a76bSRobert Jennings 				 unsigned long action, void *arg)
53814b8a76bSRobert Jennings {
53914b8a76bSRobert Jennings 	int ret = 0;
54014b8a76bSRobert Jennings 
54114b8a76bSRobert Jennings 	if (action == MEM_ISOLATE_COUNT)
54214b8a76bSRobert Jennings 		ret = cmm_count_pages(arg);
54314b8a76bSRobert Jennings 
5447e26065dSPrarit Bhargava 	return notifier_from_errno(ret);
54514b8a76bSRobert Jennings }
54614b8a76bSRobert Jennings 
54714b8a76bSRobert Jennings static struct notifier_block cmm_mem_isolate_nb = {
54814b8a76bSRobert Jennings 	.notifier_call = cmm_memory_isolate_cb,
54914b8a76bSRobert Jennings 	.priority = CMM_MEM_ISOLATE_PRI
55014b8a76bSRobert Jennings };
55114b8a76bSRobert Jennings 
55214b8a76bSRobert Jennings /**
55314b8a76bSRobert Jennings  * cmm_mem_going_offline - Unloan pages where memory is to be removed
55414b8a76bSRobert Jennings  * @arg: memory_notify structure with page range to be offlined
55514b8a76bSRobert Jennings  *
55614b8a76bSRobert Jennings  * Return value:
55714b8a76bSRobert Jennings  *	0 on success
55814b8a76bSRobert Jennings  **/
55914b8a76bSRobert Jennings static int cmm_mem_going_offline(void *arg)
56014b8a76bSRobert Jennings {
56114b8a76bSRobert Jennings 	struct memory_notify *marg = arg;
56214b8a76bSRobert Jennings 	unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
56314b8a76bSRobert Jennings 	unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
56414b8a76bSRobert Jennings 	struct cmm_page_array *pa_curr, *pa_last, *npa;
56514b8a76bSRobert Jennings 	unsigned long idx;
56614b8a76bSRobert Jennings 	unsigned long freed = 0;
56714b8a76bSRobert Jennings 
56814b8a76bSRobert Jennings 	cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
56914b8a76bSRobert Jennings 			start_page, marg->nr_pages);
57014b8a76bSRobert Jennings 	spin_lock(&cmm_lock);
57114b8a76bSRobert Jennings 
57214b8a76bSRobert Jennings 	/* Search the page list for pages in the range to be offlined */
57314b8a76bSRobert Jennings 	pa_last = pa_curr = cmm_page_list;
57414b8a76bSRobert Jennings 	while (pa_curr) {
57514b8a76bSRobert Jennings 		for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
57614b8a76bSRobert Jennings 			if ((pa_curr->page[idx] < start_page) ||
57714b8a76bSRobert Jennings 			    (pa_curr->page[idx] >= end_page))
57814b8a76bSRobert Jennings 				continue;
57914b8a76bSRobert Jennings 
58014b8a76bSRobert Jennings 			plpar_page_set_active(__pa(pa_curr->page[idx]));
58114b8a76bSRobert Jennings 			free_page(pa_curr->page[idx]);
58214b8a76bSRobert Jennings 			freed++;
58314b8a76bSRobert Jennings 			loaned_pages--;
58414b8a76bSRobert Jennings 			totalram_pages++;
58514b8a76bSRobert Jennings 			pa_curr->page[idx] = pa_last->page[--pa_last->index];
58614b8a76bSRobert Jennings 			if (pa_last->index == 0) {
58714b8a76bSRobert Jennings 				if (pa_curr == pa_last)
58814b8a76bSRobert Jennings 					pa_curr = pa_last->next;
58914b8a76bSRobert Jennings 				pa_last = pa_last->next;
59014b8a76bSRobert Jennings 				free_page((unsigned long)cmm_page_list);
59114b8a76bSRobert Jennings 				cmm_page_list = pa_last;
59214b8a76bSRobert Jennings 			}
59314b8a76bSRobert Jennings 		}
59414b8a76bSRobert Jennings 		pa_curr = pa_curr->next;
59514b8a76bSRobert Jennings 	}
59614b8a76bSRobert Jennings 
59714b8a76bSRobert Jennings 	/* Search for page list structures in the range to be offlined */
59814b8a76bSRobert Jennings 	pa_last = NULL;
59914b8a76bSRobert Jennings 	pa_curr = cmm_page_list;
60014b8a76bSRobert Jennings 	while (pa_curr) {
60114b8a76bSRobert Jennings 		if (((unsigned long)pa_curr >= start_page) &&
60214b8a76bSRobert Jennings 				((unsigned long)pa_curr < end_page)) {
60314b8a76bSRobert Jennings 			npa = (struct cmm_page_array *)__get_free_page(
60414b8a76bSRobert Jennings 					GFP_NOIO | __GFP_NOWARN |
60514b8a76bSRobert Jennings 					__GFP_NORETRY | __GFP_NOMEMALLOC);
60614b8a76bSRobert Jennings 			if (!npa) {
60714b8a76bSRobert Jennings 				spin_unlock(&cmm_lock);
60814b8a76bSRobert Jennings 				cmm_dbg("Failed to allocate memory for list "
60914b8a76bSRobert Jennings 						"management. Memory hotplug "
61014b8a76bSRobert Jennings 						"failed.\n");
611e2be2371SRasmus Villemoes 				return -ENOMEM;
61214b8a76bSRobert Jennings 			}
61314b8a76bSRobert Jennings 			memcpy(npa, pa_curr, PAGE_SIZE);
61414b8a76bSRobert Jennings 			if (pa_curr == cmm_page_list)
61514b8a76bSRobert Jennings 				cmm_page_list = npa;
61614b8a76bSRobert Jennings 			if (pa_last)
61714b8a76bSRobert Jennings 				pa_last->next = npa;
61814b8a76bSRobert Jennings 			free_page((unsigned long) pa_curr);
61914b8a76bSRobert Jennings 			freed++;
62014b8a76bSRobert Jennings 			pa_curr = npa;
62114b8a76bSRobert Jennings 		}
62214b8a76bSRobert Jennings 
62314b8a76bSRobert Jennings 		pa_last = pa_curr;
62414b8a76bSRobert Jennings 		pa_curr = pa_curr->next;
62514b8a76bSRobert Jennings 	}
62614b8a76bSRobert Jennings 
62714b8a76bSRobert Jennings 	spin_unlock(&cmm_lock);
62814b8a76bSRobert Jennings 	cmm_dbg("Released %ld pages in the search range.\n", freed);
62914b8a76bSRobert Jennings 
63014b8a76bSRobert Jennings 	return 0;
63114b8a76bSRobert Jennings }
63214b8a76bSRobert Jennings 
63314b8a76bSRobert Jennings /**
63414b8a76bSRobert Jennings  * cmm_memory_cb - Handle memory hotplug notifier calls
63514b8a76bSRobert Jennings  * @self:	notifier block struct
63614b8a76bSRobert Jennings  * @action:	action to take
63714b8a76bSRobert Jennings  * @arg:	struct memory_notify data for handler
63814b8a76bSRobert Jennings  *
63914b8a76bSRobert Jennings  * Return value:
64014b8a76bSRobert Jennings  *	NOTIFY_OK or notifier error based on subfunction return value
64114b8a76bSRobert Jennings  *
64214b8a76bSRobert Jennings  **/
64314b8a76bSRobert Jennings static int cmm_memory_cb(struct notifier_block *self,
64414b8a76bSRobert Jennings 			unsigned long action, void *arg)
64514b8a76bSRobert Jennings {
64614b8a76bSRobert Jennings 	int ret = 0;
64714b8a76bSRobert Jennings 
64814b8a76bSRobert Jennings 	switch (action) {
64914b8a76bSRobert Jennings 	case MEM_GOING_OFFLINE:
65014b8a76bSRobert Jennings 		mutex_lock(&hotplug_mutex);
65114b8a76bSRobert Jennings 		hotplug_occurred = 1;
65214b8a76bSRobert Jennings 		ret = cmm_mem_going_offline(arg);
65314b8a76bSRobert Jennings 		break;
65414b8a76bSRobert Jennings 	case MEM_OFFLINE:
65514b8a76bSRobert Jennings 	case MEM_CANCEL_OFFLINE:
65614b8a76bSRobert Jennings 		mutex_unlock(&hotplug_mutex);
65714b8a76bSRobert Jennings 		cmm_dbg("Memory offline operation complete.\n");
65814b8a76bSRobert Jennings 		break;
65914b8a76bSRobert Jennings 	case MEM_GOING_ONLINE:
66014b8a76bSRobert Jennings 	case MEM_ONLINE:
66114b8a76bSRobert Jennings 	case MEM_CANCEL_ONLINE:
66214b8a76bSRobert Jennings 		break;
66314b8a76bSRobert Jennings 	}
66414b8a76bSRobert Jennings 
6657e26065dSPrarit Bhargava 	return notifier_from_errno(ret);
66614b8a76bSRobert Jennings }
66714b8a76bSRobert Jennings 
66814b8a76bSRobert Jennings static struct notifier_block cmm_mem_nb = {
66914b8a76bSRobert Jennings 	.notifier_call = cmm_memory_cb,
67014b8a76bSRobert Jennings 	.priority = CMM_MEM_HOTPLUG_PRI
67114b8a76bSRobert Jennings };
67214b8a76bSRobert Jennings 
67314b8a76bSRobert Jennings /**
67484af458bSBrian King  * cmm_init - Module initialization
67584af458bSBrian King  *
67684af458bSBrian King  * Return value:
67784af458bSBrian King  * 	0 on success / other on failure
67884af458bSBrian King  **/
67984af458bSBrian King static int cmm_init(void)
68084af458bSBrian King {
68184af458bSBrian King 	int rc = -ENOMEM;
68284af458bSBrian King 
68384af458bSBrian King 	if (!firmware_has_feature(FW_FEATURE_CMO))
68484af458bSBrian King 		return -EOPNOTSUPP;
68584af458bSBrian King 
68684af458bSBrian King 	if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
68784af458bSBrian King 		return rc;
68884af458bSBrian King 
689fecba962SBrian King 	if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
69084af458bSBrian King 		goto out_oom_notifier;
69184af458bSBrian King 
6926c9d2909SKay Sievers 	if ((rc = cmm_sysfs_register(&cmm_dev)))
693fecba962SBrian King 		goto out_reboot_notifier;
694fecba962SBrian King 
69514b8a76bSRobert Jennings 	if (register_memory_notifier(&cmm_mem_nb) ||
69614b8a76bSRobert Jennings 	    register_memory_isolate_notifier(&cmm_mem_isolate_nb))
69714b8a76bSRobert Jennings 		goto out_unregister_notifier;
69814b8a76bSRobert Jennings 
69984af458bSBrian King 	if (cmm_disabled)
70084af458bSBrian King 		return rc;
70184af458bSBrian King 
70284af458bSBrian King 	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
70384af458bSBrian King 	if (IS_ERR(cmm_thread_ptr)) {
70484af458bSBrian King 		rc = PTR_ERR(cmm_thread_ptr);
70514b8a76bSRobert Jennings 		goto out_unregister_notifier;
70684af458bSBrian King 	}
70784af458bSBrian King 
70884af458bSBrian King 	return rc;
70984af458bSBrian King 
71014b8a76bSRobert Jennings out_unregister_notifier:
71114b8a76bSRobert Jennings 	unregister_memory_notifier(&cmm_mem_nb);
71214b8a76bSRobert Jennings 	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
7136c9d2909SKay Sievers 	cmm_unregister_sysfs(&cmm_dev);
714fecba962SBrian King out_reboot_notifier:
715fecba962SBrian King 	unregister_reboot_notifier(&cmm_reboot_nb);
71684af458bSBrian King out_oom_notifier:
71784af458bSBrian King 	unregister_oom_notifier(&cmm_oom_nb);
71884af458bSBrian King 	return rc;
71984af458bSBrian King }
72084af458bSBrian King 
72184af458bSBrian King /**
72284af458bSBrian King  * cmm_exit - Module exit
72384af458bSBrian King  *
72484af458bSBrian King  * Return value:
72584af458bSBrian King  * 	nothing
72684af458bSBrian King  **/
72784af458bSBrian King static void cmm_exit(void)
72884af458bSBrian King {
72984af458bSBrian King 	if (cmm_thread_ptr)
73084af458bSBrian King 		kthread_stop(cmm_thread_ptr);
73184af458bSBrian King 	unregister_oom_notifier(&cmm_oom_nb);
732fecba962SBrian King 	unregister_reboot_notifier(&cmm_reboot_nb);
73314b8a76bSRobert Jennings 	unregister_memory_notifier(&cmm_mem_nb);
73414b8a76bSRobert Jennings 	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
73584af458bSBrian King 	cmm_free_pages(loaned_pages);
7366c9d2909SKay Sievers 	cmm_unregister_sysfs(&cmm_dev);
73784af458bSBrian King }
73884af458bSBrian King 
73984af458bSBrian King /**
74084af458bSBrian King  * cmm_set_disable - Disable/Enable CMM
74184af458bSBrian King  *
74284af458bSBrian King  * Return value:
74384af458bSBrian King  * 	0 on success / other on failure
74484af458bSBrian King  **/
745*e4dca7b7SKees Cook static int cmm_set_disable(const char *val, const struct kernel_param *kp)
74684af458bSBrian King {
74784af458bSBrian King 	int disable = simple_strtoul(val, NULL, 10);
74884af458bSBrian King 
74984af458bSBrian King 	if (disable != 0 && disable != 1)
75084af458bSBrian King 		return -EINVAL;
75184af458bSBrian King 
75284af458bSBrian King 	if (disable && !cmm_disabled) {
75384af458bSBrian King 		if (cmm_thread_ptr)
75484af458bSBrian King 			kthread_stop(cmm_thread_ptr);
75584af458bSBrian King 		cmm_thread_ptr = NULL;
75684af458bSBrian King 		cmm_free_pages(loaned_pages);
75784af458bSBrian King 	} else if (!disable && cmm_disabled) {
75884af458bSBrian King 		cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
75984af458bSBrian King 		if (IS_ERR(cmm_thread_ptr))
76084af458bSBrian King 			return PTR_ERR(cmm_thread_ptr);
76184af458bSBrian King 	}
76284af458bSBrian King 
76384af458bSBrian King 	cmm_disabled = disable;
76484af458bSBrian King 	return 0;
76584af458bSBrian King }
76684af458bSBrian King 
76784af458bSBrian King module_param_call(disable, cmm_set_disable, param_get_uint,
76884af458bSBrian King 		  &cmm_disabled, S_IRUGO | S_IWUSR);
76984af458bSBrian King MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
77084af458bSBrian King 		 "[Default=" __stringify(CMM_DISABLE) "]");
77184af458bSBrian King 
77284af458bSBrian King module_init(cmm_init);
77384af458bSBrian King module_exit(cmm_exit);
774