xref: /openbmc/linux/arch/powerpc/platforms/pseries/cmm.c (revision 7d8212747435c534c8d564fbef4541a463c976ff)
11a59d1b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
284af458bSBrian King /*
384af458bSBrian King  * Collaborative memory management interface.
484af458bSBrian King  *
584af458bSBrian King  * Copyright (C) 2008 IBM Corporation
684af458bSBrian King  * Author(s): Brian King (brking@linux.vnet.ibm.com),
784af458bSBrian King  */
884af458bSBrian King 
984af458bSBrian King #include <linux/ctype.h>
1084af458bSBrian King #include <linux/delay.h>
1184af458bSBrian King #include <linux/errno.h>
1284af458bSBrian King #include <linux/fs.h>
135a0e3ad6STejun Heo #include <linux/gfp.h>
1484af458bSBrian King #include <linux/kthread.h>
1584af458bSBrian King #include <linux/module.h>
1684af458bSBrian King #include <linux/oom.h>
17fecba962SBrian King #include <linux/reboot.h>
1884af458bSBrian King #include <linux/sched.h>
1984af458bSBrian King #include <linux/stringify.h>
2084af458bSBrian King #include <linux/swap.h>
216c9d2909SKay Sievers #include <linux/device.h>
2284af458bSBrian King #include <asm/firmware.h>
2384af458bSBrian King #include <asm/hvcall.h>
2484af458bSBrian King #include <asm/mmu.h>
2584af458bSBrian King #include <asm/pgalloc.h>
267c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
2714b8a76bSRobert Jennings #include <linux/memory.h>
28212bebb4SDeepthi Dharwar #include <asm/plpar_wrappers.h>
2984af458bSBrian King 
308f272a5dSMichael Ellerman #include "pseries.h"
318f272a5dSMichael Ellerman 
3284af458bSBrian King #define CMM_DRIVER_VERSION	"1.0.0"
3384af458bSBrian King #define CMM_DEFAULT_DELAY	1
3414b8a76bSRobert Jennings #define CMM_HOTPLUG_DELAY	5
3584af458bSBrian King #define CMM_DEBUG			0
3684af458bSBrian King #define CMM_DISABLE		0
3784af458bSBrian King #define CMM_OOM_KB		1024
3884af458bSBrian King #define CMM_MIN_MEM_MB		256
3984af458bSBrian King #define KB2PAGES(_p)		((_p)>>(PAGE_SHIFT-10))
4084af458bSBrian King #define PAGES2KB(_p)		((_p)<<(PAGE_SHIFT-10))
4114b8a76bSRobert Jennings /*
4214b8a76bSRobert Jennings  * The priority level tries to ensure that this notifier is called as
4314b8a76bSRobert Jennings  * late as possible to reduce thrashing in the shared memory pool.
4414b8a76bSRobert Jennings  */
4514b8a76bSRobert Jennings #define CMM_MEM_HOTPLUG_PRI	1
4614b8a76bSRobert Jennings #define CMM_MEM_ISOLATE_PRI	15
4784af458bSBrian King 
4884af458bSBrian King static unsigned int delay = CMM_DEFAULT_DELAY;
4914b8a76bSRobert Jennings static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
5084af458bSBrian King static unsigned int oom_kb = CMM_OOM_KB;
5184af458bSBrian King static unsigned int cmm_debug = CMM_DEBUG;
5284af458bSBrian King static unsigned int cmm_disabled = CMM_DISABLE;
5384af458bSBrian King static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
546c9d2909SKay Sievers static struct device cmm_dev;
5584af458bSBrian King 
5684af458bSBrian King MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
5784af458bSBrian King MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
5884af458bSBrian King MODULE_LICENSE("GPL");
5984af458bSBrian King MODULE_VERSION(CMM_DRIVER_VERSION);
6084af458bSBrian King 
6157ad583fSRussell Currey module_param_named(delay, delay, uint, 0644);
6284af458bSBrian King MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
6384af458bSBrian King 		 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
6457ad583fSRussell Currey module_param_named(hotplug_delay, hotplug_delay, uint, 0644);
65b0b5a765SWei Yongjun MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
6614b8a76bSRobert Jennings 		 "before loaning resumes. "
6714b8a76bSRobert Jennings 		 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
6857ad583fSRussell Currey module_param_named(oom_kb, oom_kb, uint, 0644);
6984af458bSBrian King MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
7084af458bSBrian King 		 "[Default=" __stringify(CMM_OOM_KB) "]");
7157ad583fSRussell Currey module_param_named(min_mem_mb, min_mem_mb, ulong, 0644);
7284af458bSBrian King MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
7384af458bSBrian King 		 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
7457ad583fSRussell Currey module_param_named(debug, cmm_debug, uint, 0644);
7584af458bSBrian King MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
7684af458bSBrian King 		 "[Default=" __stringify(CMM_DEBUG) "]");
7784af458bSBrian King 
7884af458bSBrian King #define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
7984af458bSBrian King 
8084af458bSBrian King #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
8184af458bSBrian King 
8284af458bSBrian King struct cmm_page_array {
8384af458bSBrian King 	struct cmm_page_array *next;
8484af458bSBrian King 	unsigned long index;
8584af458bSBrian King 	unsigned long page[CMM_NR_PAGES];
8684af458bSBrian King };
8784af458bSBrian King 
8884af458bSBrian King static unsigned long loaned_pages;
8984af458bSBrian King static unsigned long loaned_pages_target;
9084af458bSBrian King static unsigned long oom_freed_pages;
9184af458bSBrian King 
9284af458bSBrian King static struct cmm_page_array *cmm_page_list;
9384af458bSBrian King static DEFINE_SPINLOCK(cmm_lock);
9484af458bSBrian King 
9514b8a76bSRobert Jennings static DEFINE_MUTEX(hotplug_mutex);
9614b8a76bSRobert Jennings static int hotplug_occurred; /* protected by the hotplug mutex */
9714b8a76bSRobert Jennings 
9884af458bSBrian King static struct task_struct *cmm_thread_ptr;
9984af458bSBrian King 
1008f272a5dSMichael Ellerman static long plpar_page_set_loaned(unsigned long vpa)
1018f272a5dSMichael Ellerman {
1028f272a5dSMichael Ellerman 	unsigned long cmo_page_sz = cmo_get_page_size();
1038f272a5dSMichael Ellerman 	long rc = 0;
1048f272a5dSMichael Ellerman 	int i;
1058f272a5dSMichael Ellerman 
1068f272a5dSMichael Ellerman 	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
1078f272a5dSMichael Ellerman 		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
1088f272a5dSMichael Ellerman 
1098f272a5dSMichael Ellerman 	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
1108f272a5dSMichael Ellerman 		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE,
1118f272a5dSMichael Ellerman 				   vpa + i - cmo_page_sz, 0);
1128f272a5dSMichael Ellerman 
1138f272a5dSMichael Ellerman 	return rc;
1148f272a5dSMichael Ellerman }
1158f272a5dSMichael Ellerman 
1168f272a5dSMichael Ellerman static long plpar_page_set_active(unsigned long vpa)
1178f272a5dSMichael Ellerman {
1188f272a5dSMichael Ellerman 	unsigned long cmo_page_sz = cmo_get_page_size();
1198f272a5dSMichael Ellerman 	long rc = 0;
1208f272a5dSMichael Ellerman 	int i;
1218f272a5dSMichael Ellerman 
1228f272a5dSMichael Ellerman 	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
1238f272a5dSMichael Ellerman 		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
1248f272a5dSMichael Ellerman 
1258f272a5dSMichael Ellerman 	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
1268f272a5dSMichael Ellerman 		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED,
1278f272a5dSMichael Ellerman 				   vpa + i - cmo_page_sz, 0);
1288f272a5dSMichael Ellerman 
1298f272a5dSMichael Ellerman 	return rc;
1308f272a5dSMichael Ellerman }
1318f272a5dSMichael Ellerman 
13284af458bSBrian King /**
13384af458bSBrian King  * cmm_alloc_pages - Allocate pages and mark them as loaned
13484af458bSBrian King  * @nr:	number of pages to allocate
13584af458bSBrian King  *
13684af458bSBrian King  * Return value:
13784af458bSBrian King  * 	number of pages requested to be allocated which were not
13884af458bSBrian King  **/
13984af458bSBrian King static long cmm_alloc_pages(long nr)
14084af458bSBrian King {
14184af458bSBrian King 	struct cmm_page_array *pa, *npa;
14284af458bSBrian King 	unsigned long addr;
14384af458bSBrian King 	long rc;
14484af458bSBrian King 
14584af458bSBrian King 	cmm_dbg("Begin request for %ld pages\n", nr);
14684af458bSBrian King 
14784af458bSBrian King 	while (nr) {
14814b8a76bSRobert Jennings 		/* Exit if a hotplug operation is in progress or occurred */
14914b8a76bSRobert Jennings 		if (mutex_trylock(&hotplug_mutex)) {
15014b8a76bSRobert Jennings 			if (hotplug_occurred) {
15114b8a76bSRobert Jennings 				mutex_unlock(&hotplug_mutex);
15214b8a76bSRobert Jennings 				break;
15314b8a76bSRobert Jennings 			}
15414b8a76bSRobert Jennings 			mutex_unlock(&hotplug_mutex);
15514b8a76bSRobert Jennings 		} else {
15614b8a76bSRobert Jennings 			break;
15714b8a76bSRobert Jennings 		}
15814b8a76bSRobert Jennings 
15984af458bSBrian King 		addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
16084af458bSBrian King 				       __GFP_NORETRY | __GFP_NOMEMALLOC);
16184af458bSBrian King 		if (!addr)
16284af458bSBrian King 			break;
16384af458bSBrian King 		spin_lock(&cmm_lock);
16484af458bSBrian King 		pa = cmm_page_list;
16584af458bSBrian King 		if (!pa || pa->index >= CMM_NR_PAGES) {
16684af458bSBrian King 			/* Need a new page for the page list. */
16784af458bSBrian King 			spin_unlock(&cmm_lock);
16814b8a76bSRobert Jennings 			npa = (struct cmm_page_array *)__get_free_page(
16914b8a76bSRobert Jennings 					GFP_NOIO | __GFP_NOWARN |
17084af458bSBrian King 					__GFP_NORETRY | __GFP_NOMEMALLOC);
17184af458bSBrian King 			if (!npa) {
1725df72bf3SHarvey Harrison 				pr_info("%s: Can not allocate new page list\n", __func__);
17384af458bSBrian King 				free_page(addr);
17484af458bSBrian King 				break;
17584af458bSBrian King 			}
17684af458bSBrian King 			spin_lock(&cmm_lock);
17784af458bSBrian King 			pa = cmm_page_list;
17884af458bSBrian King 
17984af458bSBrian King 			if (!pa || pa->index >= CMM_NR_PAGES) {
18084af458bSBrian King 				npa->next = pa;
18184af458bSBrian King 				npa->index = 0;
18284af458bSBrian King 				pa = npa;
18384af458bSBrian King 				cmm_page_list = pa;
18484af458bSBrian King 			} else
18584af458bSBrian King 				free_page((unsigned long) npa);
18684af458bSBrian King 		}
18784af458bSBrian King 
18884af458bSBrian King 		if ((rc = plpar_page_set_loaned(__pa(addr)))) {
1895df72bf3SHarvey Harrison 			pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
19084af458bSBrian King 			spin_unlock(&cmm_lock);
19184af458bSBrian King 			free_page(addr);
19284af458bSBrian King 			break;
19384af458bSBrian King 		}
19484af458bSBrian King 
19584af458bSBrian King 		pa->page[pa->index++] = addr;
19684af458bSBrian King 		loaned_pages++;
197ca79b0c2SArun KS 		totalram_pages_dec();
19884af458bSBrian King 		spin_unlock(&cmm_lock);
19984af458bSBrian King 		nr--;
20084af458bSBrian King 	}
20184af458bSBrian King 
20284af458bSBrian King 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
20384af458bSBrian King 	return nr;
20484af458bSBrian King }
20584af458bSBrian King 
20684af458bSBrian King /**
20784af458bSBrian King  * cmm_free_pages - Free pages and mark them as active
20884af458bSBrian King  * @nr:	number of pages to free
20984af458bSBrian King  *
21084af458bSBrian King  * Return value:
21184af458bSBrian King  * 	number of pages requested to be freed which were not
21284af458bSBrian King  **/
21384af458bSBrian King static long cmm_free_pages(long nr)
21484af458bSBrian King {
21584af458bSBrian King 	struct cmm_page_array *pa;
21684af458bSBrian King 	unsigned long addr;
21784af458bSBrian King 
21884af458bSBrian King 	cmm_dbg("Begin free of %ld pages.\n", nr);
21984af458bSBrian King 	spin_lock(&cmm_lock);
22084af458bSBrian King 	pa = cmm_page_list;
22184af458bSBrian King 	while (nr) {
22284af458bSBrian King 		if (!pa || pa->index <= 0)
22384af458bSBrian King 			break;
22484af458bSBrian King 		addr = pa->page[--pa->index];
22584af458bSBrian King 
22684af458bSBrian King 		if (pa->index == 0) {
22784af458bSBrian King 			pa = pa->next;
22884af458bSBrian King 			free_page((unsigned long) cmm_page_list);
22984af458bSBrian King 			cmm_page_list = pa;
23084af458bSBrian King 		}
23184af458bSBrian King 
23284af458bSBrian King 		plpar_page_set_active(__pa(addr));
23384af458bSBrian King 		free_page(addr);
23484af458bSBrian King 		loaned_pages--;
23584af458bSBrian King 		nr--;
236ca79b0c2SArun KS 		totalram_pages_inc();
23784af458bSBrian King 	}
23884af458bSBrian King 	spin_unlock(&cmm_lock);
23984af458bSBrian King 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
24084af458bSBrian King 	return nr;
24184af458bSBrian King }
24284af458bSBrian King 
24384af458bSBrian King /**
24484af458bSBrian King  * cmm_oom_notify - OOM notifier
24584af458bSBrian King  * @self:	notifier block struct
24684af458bSBrian King  * @dummy:	not used
24784af458bSBrian King  * @parm:	returned - number of pages freed
24884af458bSBrian King  *
24984af458bSBrian King  * Return value:
25084af458bSBrian King  * 	NOTIFY_OK
25184af458bSBrian King  **/
25284af458bSBrian King static int cmm_oom_notify(struct notifier_block *self,
25384af458bSBrian King 			  unsigned long dummy, void *parm)
25484af458bSBrian King {
25584af458bSBrian King 	unsigned long *freed = parm;
25684af458bSBrian King 	long nr = KB2PAGES(oom_kb);
25784af458bSBrian King 
25884af458bSBrian King 	cmm_dbg("OOM processing started\n");
25984af458bSBrian King 	nr = cmm_free_pages(nr);
26084af458bSBrian King 	loaned_pages_target = loaned_pages;
26184af458bSBrian King 	*freed += KB2PAGES(oom_kb) - nr;
26284af458bSBrian King 	oom_freed_pages += KB2PAGES(oom_kb) - nr;
26384af458bSBrian King 	cmm_dbg("OOM processing complete\n");
26484af458bSBrian King 	return NOTIFY_OK;
26584af458bSBrian King }
26684af458bSBrian King 
26784af458bSBrian King /**
26884af458bSBrian King  * cmm_get_mpp - Read memory performance parameters
26984af458bSBrian King  *
27084af458bSBrian King  * Makes hcall to query the current page loan request from the hypervisor.
27184af458bSBrian King  *
27284af458bSBrian King  * Return value:
27384af458bSBrian King  * 	nothing
27484af458bSBrian King  **/
27584af458bSBrian King static void cmm_get_mpp(void)
27684af458bSBrian King {
27784af458bSBrian King 	int rc;
27884af458bSBrian King 	struct hvcall_mpp_data mpp_data;
2798be8cf5bSBrian King 	signed long active_pages_target, page_loan_request, target;
280ca79b0c2SArun KS 	signed long total_pages = totalram_pages() + loaned_pages;
2818be8cf5bSBrian King 	signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
28284af458bSBrian King 
28384af458bSBrian King 	rc = h_get_mpp(&mpp_data);
28484af458bSBrian King 
28584af458bSBrian King 	if (rc != H_SUCCESS)
28684af458bSBrian King 		return;
28784af458bSBrian King 
28884af458bSBrian King 	page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
2898be8cf5bSBrian King 	target = page_loan_request + (signed long)loaned_pages;
2908be8cf5bSBrian King 
2918be8cf5bSBrian King 	if (target < 0 || total_pages < min_mem_pages)
2928be8cf5bSBrian King 		target = 0;
2938be8cf5bSBrian King 
2948be8cf5bSBrian King 	if (target > oom_freed_pages)
2958be8cf5bSBrian King 		target -= oom_freed_pages;
29684af458bSBrian King 	else
2978be8cf5bSBrian King 		target = 0;
29884af458bSBrian King 
2998be8cf5bSBrian King 	active_pages_target = total_pages - target;
30084af458bSBrian King 
3018be8cf5bSBrian King 	if (min_mem_pages > active_pages_target)
3028be8cf5bSBrian King 		target = total_pages - min_mem_pages;
3038be8cf5bSBrian King 
3048be8cf5bSBrian King 	if (target < 0)
3058be8cf5bSBrian King 		target = 0;
3068be8cf5bSBrian King 
3078be8cf5bSBrian King 	loaned_pages_target = target;
30884af458bSBrian King 
30984af458bSBrian King 	cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
31084af458bSBrian King 		page_loan_request, loaned_pages, loaned_pages_target,
311ca79b0c2SArun KS 		oom_freed_pages, totalram_pages());
31284af458bSBrian King }
31384af458bSBrian King 
31484af458bSBrian King static struct notifier_block cmm_oom_nb = {
31584af458bSBrian King 	.notifier_call = cmm_oom_notify
31684af458bSBrian King };
31784af458bSBrian King 
31884af458bSBrian King /**
31984af458bSBrian King  * cmm_thread - CMM task thread
32084af458bSBrian King  * @dummy:	not used
32184af458bSBrian King  *
32284af458bSBrian King  * Return value:
32384af458bSBrian King  * 	0
32484af458bSBrian King  **/
32584af458bSBrian King static int cmm_thread(void *dummy)
32684af458bSBrian King {
32784af458bSBrian King 	unsigned long timeleft;
32884af458bSBrian King 
32984af458bSBrian King 	while (1) {
33084af458bSBrian King 		timeleft = msleep_interruptible(delay * 1000);
33184af458bSBrian King 
33214b8a76bSRobert Jennings 		if (kthread_should_stop() || timeleft)
33384af458bSBrian King 			break;
33414b8a76bSRobert Jennings 
33514b8a76bSRobert Jennings 		if (mutex_trylock(&hotplug_mutex)) {
33614b8a76bSRobert Jennings 			if (hotplug_occurred) {
33714b8a76bSRobert Jennings 				hotplug_occurred = 0;
33814b8a76bSRobert Jennings 				mutex_unlock(&hotplug_mutex);
33914b8a76bSRobert Jennings 				cmm_dbg("Hotplug operation has occurred, "
34014b8a76bSRobert Jennings 						"loaning activity suspended "
34114b8a76bSRobert Jennings 						"for %d seconds.\n",
34214b8a76bSRobert Jennings 						hotplug_delay);
34314b8a76bSRobert Jennings 				timeleft = msleep_interruptible(hotplug_delay *
34414b8a76bSRobert Jennings 						1000);
34514b8a76bSRobert Jennings 				if (kthread_should_stop() || timeleft)
34614b8a76bSRobert Jennings 					break;
34714b8a76bSRobert Jennings 				continue;
34814b8a76bSRobert Jennings 			}
34914b8a76bSRobert Jennings 			mutex_unlock(&hotplug_mutex);
35014b8a76bSRobert Jennings 		} else {
35114b8a76bSRobert Jennings 			cmm_dbg("Hotplug operation in progress, activity "
35214b8a76bSRobert Jennings 					"suspended\n");
35314b8a76bSRobert Jennings 			continue;
35484af458bSBrian King 		}
35584af458bSBrian King 
35684af458bSBrian King 		cmm_get_mpp();
35784af458bSBrian King 
35884af458bSBrian King 		if (loaned_pages_target > loaned_pages) {
35984af458bSBrian King 			if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
36084af458bSBrian King 				loaned_pages_target = loaned_pages;
36184af458bSBrian King 		} else if (loaned_pages_target < loaned_pages)
36284af458bSBrian King 			cmm_free_pages(loaned_pages - loaned_pages_target);
36384af458bSBrian King 	}
36484af458bSBrian King 	return 0;
36584af458bSBrian King }
36684af458bSBrian King 
36784af458bSBrian King #define CMM_SHOW(name, format, args...)			\
3686c9d2909SKay Sievers 	static ssize_t show_##name(struct device *dev,	\
3696c9d2909SKay Sievers 				   struct device_attribute *attr,	\
3703cee67f7SStephen Rothwell 				   char *buf)			\
37184af458bSBrian King 	{							\
37284af458bSBrian King 		return sprintf(buf, format, ##args);		\
37384af458bSBrian King 	}							\
37457ad583fSRussell Currey 	static DEVICE_ATTR(name, 0444, show_##name, NULL)
37584af458bSBrian King 
37684af458bSBrian King CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
37784af458bSBrian King CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
37884af458bSBrian King 
3796c9d2909SKay Sievers static ssize_t show_oom_pages(struct device *dev,
3806c9d2909SKay Sievers 			      struct device_attribute *attr, char *buf)
38184af458bSBrian King {
38284af458bSBrian King 	return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
38384af458bSBrian King }
38484af458bSBrian King 
3856c9d2909SKay Sievers static ssize_t store_oom_pages(struct device *dev,
3866c9d2909SKay Sievers 			       struct device_attribute *attr,
38784af458bSBrian King 			       const char *buf, size_t count)
38884af458bSBrian King {
38984af458bSBrian King 	unsigned long val = simple_strtoul (buf, NULL, 10);
39084af458bSBrian King 
39184af458bSBrian King 	if (!capable(CAP_SYS_ADMIN))
39284af458bSBrian King 		return -EPERM;
39384af458bSBrian King 	if (val != 0)
39484af458bSBrian King 		return -EBADMSG;
39584af458bSBrian King 
39684af458bSBrian King 	oom_freed_pages = 0;
39784af458bSBrian King 	return count;
39884af458bSBrian King }
39984af458bSBrian King 
40057ad583fSRussell Currey static DEVICE_ATTR(oom_freed_kb, 0644,
40184af458bSBrian King 		   show_oom_pages, store_oom_pages);
40284af458bSBrian King 
4036c9d2909SKay Sievers static struct device_attribute *cmm_attrs[] = {
4046c9d2909SKay Sievers 	&dev_attr_loaned_kb,
4056c9d2909SKay Sievers 	&dev_attr_loaned_target_kb,
4066c9d2909SKay Sievers 	&dev_attr_oom_freed_kb,
40784af458bSBrian King };
40884af458bSBrian King 
4096c9d2909SKay Sievers static struct bus_type cmm_subsys = {
41084af458bSBrian King 	.name = "cmm",
4116c9d2909SKay Sievers 	.dev_name = "cmm",
41284af458bSBrian King };
41384af458bSBrian King 
414*7d821274SDavid Hildenbrand static void cmm_release_device(struct device *dev)
415*7d821274SDavid Hildenbrand {
416*7d821274SDavid Hildenbrand }
417*7d821274SDavid Hildenbrand 
41884af458bSBrian King /**
41984af458bSBrian King  * cmm_sysfs_register - Register with sysfs
42084af458bSBrian King  *
42184af458bSBrian King  * Return value:
42284af458bSBrian King  * 	0 on success / other on failure
42384af458bSBrian King  **/
4246c9d2909SKay Sievers static int cmm_sysfs_register(struct device *dev)
42584af458bSBrian King {
42684af458bSBrian King 	int i, rc;
42784af458bSBrian King 
4286c9d2909SKay Sievers 	if ((rc = subsys_system_register(&cmm_subsys, NULL)))
42984af458bSBrian King 		return rc;
43084af458bSBrian King 
4316c9d2909SKay Sievers 	dev->id = 0;
4326c9d2909SKay Sievers 	dev->bus = &cmm_subsys;
433*7d821274SDavid Hildenbrand 	dev->release = cmm_release_device;
43484af458bSBrian King 
4356c9d2909SKay Sievers 	if ((rc = device_register(dev)))
4366c9d2909SKay Sievers 		goto subsys_unregister;
43784af458bSBrian King 
43884af458bSBrian King 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
4396c9d2909SKay Sievers 		if ((rc = device_create_file(dev, cmm_attrs[i])))
44084af458bSBrian King 			goto fail;
44184af458bSBrian King 	}
44284af458bSBrian King 
44384af458bSBrian King 	return 0;
44484af458bSBrian King 
44584af458bSBrian King fail:
44684af458bSBrian King 	while (--i >= 0)
4476c9d2909SKay Sievers 		device_remove_file(dev, cmm_attrs[i]);
4486c9d2909SKay Sievers 	device_unregister(dev);
4496c9d2909SKay Sievers subsys_unregister:
4506c9d2909SKay Sievers 	bus_unregister(&cmm_subsys);
45184af458bSBrian King 	return rc;
45284af458bSBrian King }
45384af458bSBrian King 
45484af458bSBrian King /**
45584af458bSBrian King  * cmm_unregister_sysfs - Unregister from sysfs
45684af458bSBrian King  *
45784af458bSBrian King  **/
4586c9d2909SKay Sievers static void cmm_unregister_sysfs(struct device *dev)
45984af458bSBrian King {
46084af458bSBrian King 	int i;
46184af458bSBrian King 
46284af458bSBrian King 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
4636c9d2909SKay Sievers 		device_remove_file(dev, cmm_attrs[i]);
4646c9d2909SKay Sievers 	device_unregister(dev);
4656c9d2909SKay Sievers 	bus_unregister(&cmm_subsys);
46684af458bSBrian King }
46784af458bSBrian King 
46884af458bSBrian King /**
469fecba962SBrian King  * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
470fecba962SBrian King  *
471fecba962SBrian King  **/
472fecba962SBrian King static int cmm_reboot_notifier(struct notifier_block *nb,
473fecba962SBrian King 			       unsigned long action, void *unused)
474fecba962SBrian King {
475fecba962SBrian King 	if (action == SYS_RESTART) {
476fecba962SBrian King 		if (cmm_thread_ptr)
477fecba962SBrian King 			kthread_stop(cmm_thread_ptr);
478fecba962SBrian King 		cmm_thread_ptr = NULL;
479fecba962SBrian King 		cmm_free_pages(loaned_pages);
480fecba962SBrian King 	}
481fecba962SBrian King 	return NOTIFY_DONE;
482fecba962SBrian King }
483fecba962SBrian King 
484fecba962SBrian King static struct notifier_block cmm_reboot_nb = {
485fecba962SBrian King 	.notifier_call = cmm_reboot_notifier,
486fecba962SBrian King };
487fecba962SBrian King 
488fecba962SBrian King /**
48914b8a76bSRobert Jennings  * cmm_count_pages - Count the number of pages loaned in a particular range.
49014b8a76bSRobert Jennings  *
49114b8a76bSRobert Jennings  * @arg: memory_isolate_notify structure with address range and count
49214b8a76bSRobert Jennings  *
49314b8a76bSRobert Jennings  * Return value:
49414b8a76bSRobert Jennings  *      0 on success
49514b8a76bSRobert Jennings  **/
49614b8a76bSRobert Jennings static unsigned long cmm_count_pages(void *arg)
49714b8a76bSRobert Jennings {
49814b8a76bSRobert Jennings 	struct memory_isolate_notify *marg = arg;
49914b8a76bSRobert Jennings 	struct cmm_page_array *pa;
50014b8a76bSRobert Jennings 	unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
50114b8a76bSRobert Jennings 	unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
50214b8a76bSRobert Jennings 	unsigned long idx;
50314b8a76bSRobert Jennings 
50414b8a76bSRobert Jennings 	spin_lock(&cmm_lock);
50514b8a76bSRobert Jennings 	pa = cmm_page_list;
50614b8a76bSRobert Jennings 	while (pa) {
50714b8a76bSRobert Jennings 		if ((unsigned long)pa >= start && (unsigned long)pa < end)
50814b8a76bSRobert Jennings 			marg->pages_found++;
50914b8a76bSRobert Jennings 		for (idx = 0; idx < pa->index; idx++)
51014b8a76bSRobert Jennings 			if (pa->page[idx] >= start && pa->page[idx] < end)
51114b8a76bSRobert Jennings 				marg->pages_found++;
51214b8a76bSRobert Jennings 		pa = pa->next;
51314b8a76bSRobert Jennings 	}
51414b8a76bSRobert Jennings 	spin_unlock(&cmm_lock);
51514b8a76bSRobert Jennings 	return 0;
51614b8a76bSRobert Jennings }
51714b8a76bSRobert Jennings 
51814b8a76bSRobert Jennings /**
51914b8a76bSRobert Jennings  * cmm_memory_isolate_cb - Handle memory isolation notifier calls
52014b8a76bSRobert Jennings  * @self:	notifier block struct
52114b8a76bSRobert Jennings  * @action:	action to take
52214b8a76bSRobert Jennings  * @arg:	struct memory_isolate_notify data for handler
52314b8a76bSRobert Jennings  *
52414b8a76bSRobert Jennings  * Return value:
52514b8a76bSRobert Jennings  *	NOTIFY_OK or notifier error based on subfunction return value
52614b8a76bSRobert Jennings  **/
52714b8a76bSRobert Jennings static int cmm_memory_isolate_cb(struct notifier_block *self,
52814b8a76bSRobert Jennings 				 unsigned long action, void *arg)
52914b8a76bSRobert Jennings {
53014b8a76bSRobert Jennings 	int ret = 0;
53114b8a76bSRobert Jennings 
53214b8a76bSRobert Jennings 	if (action == MEM_ISOLATE_COUNT)
53314b8a76bSRobert Jennings 		ret = cmm_count_pages(arg);
53414b8a76bSRobert Jennings 
5357e26065dSPrarit Bhargava 	return notifier_from_errno(ret);
53614b8a76bSRobert Jennings }
53714b8a76bSRobert Jennings 
53814b8a76bSRobert Jennings static struct notifier_block cmm_mem_isolate_nb = {
53914b8a76bSRobert Jennings 	.notifier_call = cmm_memory_isolate_cb,
54014b8a76bSRobert Jennings 	.priority = CMM_MEM_ISOLATE_PRI
54114b8a76bSRobert Jennings };
54214b8a76bSRobert Jennings 
54314b8a76bSRobert Jennings /**
54414b8a76bSRobert Jennings  * cmm_mem_going_offline - Unloan pages where memory is to be removed
54514b8a76bSRobert Jennings  * @arg: memory_notify structure with page range to be offlined
54614b8a76bSRobert Jennings  *
54714b8a76bSRobert Jennings  * Return value:
54814b8a76bSRobert Jennings  *	0 on success
54914b8a76bSRobert Jennings  **/
55014b8a76bSRobert Jennings static int cmm_mem_going_offline(void *arg)
55114b8a76bSRobert Jennings {
55214b8a76bSRobert Jennings 	struct memory_notify *marg = arg;
55314b8a76bSRobert Jennings 	unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
55414b8a76bSRobert Jennings 	unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
55514b8a76bSRobert Jennings 	struct cmm_page_array *pa_curr, *pa_last, *npa;
55614b8a76bSRobert Jennings 	unsigned long idx;
55714b8a76bSRobert Jennings 	unsigned long freed = 0;
55814b8a76bSRobert Jennings 
55914b8a76bSRobert Jennings 	cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
56014b8a76bSRobert Jennings 			start_page, marg->nr_pages);
56114b8a76bSRobert Jennings 	spin_lock(&cmm_lock);
56214b8a76bSRobert Jennings 
56314b8a76bSRobert Jennings 	/* Search the page list for pages in the range to be offlined */
56414b8a76bSRobert Jennings 	pa_last = pa_curr = cmm_page_list;
56514b8a76bSRobert Jennings 	while (pa_curr) {
56614b8a76bSRobert Jennings 		for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
56714b8a76bSRobert Jennings 			if ((pa_curr->page[idx] < start_page) ||
56814b8a76bSRobert Jennings 			    (pa_curr->page[idx] >= end_page))
56914b8a76bSRobert Jennings 				continue;
57014b8a76bSRobert Jennings 
57114b8a76bSRobert Jennings 			plpar_page_set_active(__pa(pa_curr->page[idx]));
57214b8a76bSRobert Jennings 			free_page(pa_curr->page[idx]);
57314b8a76bSRobert Jennings 			freed++;
57414b8a76bSRobert Jennings 			loaned_pages--;
575ca79b0c2SArun KS 			totalram_pages_inc();
57614b8a76bSRobert Jennings 			pa_curr->page[idx] = pa_last->page[--pa_last->index];
57714b8a76bSRobert Jennings 			if (pa_last->index == 0) {
57814b8a76bSRobert Jennings 				if (pa_curr == pa_last)
57914b8a76bSRobert Jennings 					pa_curr = pa_last->next;
58014b8a76bSRobert Jennings 				pa_last = pa_last->next;
58114b8a76bSRobert Jennings 				free_page((unsigned long)cmm_page_list);
58214b8a76bSRobert Jennings 				cmm_page_list = pa_last;
58314b8a76bSRobert Jennings 			}
58414b8a76bSRobert Jennings 		}
58514b8a76bSRobert Jennings 		pa_curr = pa_curr->next;
58614b8a76bSRobert Jennings 	}
58714b8a76bSRobert Jennings 
58814b8a76bSRobert Jennings 	/* Search for page list structures in the range to be offlined */
58914b8a76bSRobert Jennings 	pa_last = NULL;
59014b8a76bSRobert Jennings 	pa_curr = cmm_page_list;
59114b8a76bSRobert Jennings 	while (pa_curr) {
59214b8a76bSRobert Jennings 		if (((unsigned long)pa_curr >= start_page) &&
59314b8a76bSRobert Jennings 				((unsigned long)pa_curr < end_page)) {
59414b8a76bSRobert Jennings 			npa = (struct cmm_page_array *)__get_free_page(
59514b8a76bSRobert Jennings 					GFP_NOIO | __GFP_NOWARN |
59614b8a76bSRobert Jennings 					__GFP_NORETRY | __GFP_NOMEMALLOC);
59714b8a76bSRobert Jennings 			if (!npa) {
59814b8a76bSRobert Jennings 				spin_unlock(&cmm_lock);
59914b8a76bSRobert Jennings 				cmm_dbg("Failed to allocate memory for list "
60014b8a76bSRobert Jennings 						"management. Memory hotplug "
60114b8a76bSRobert Jennings 						"failed.\n");
602e2be2371SRasmus Villemoes 				return -ENOMEM;
60314b8a76bSRobert Jennings 			}
60414b8a76bSRobert Jennings 			memcpy(npa, pa_curr, PAGE_SIZE);
60514b8a76bSRobert Jennings 			if (pa_curr == cmm_page_list)
60614b8a76bSRobert Jennings 				cmm_page_list = npa;
60714b8a76bSRobert Jennings 			if (pa_last)
60814b8a76bSRobert Jennings 				pa_last->next = npa;
60914b8a76bSRobert Jennings 			free_page((unsigned long) pa_curr);
61014b8a76bSRobert Jennings 			freed++;
61114b8a76bSRobert Jennings 			pa_curr = npa;
61214b8a76bSRobert Jennings 		}
61314b8a76bSRobert Jennings 
61414b8a76bSRobert Jennings 		pa_last = pa_curr;
61514b8a76bSRobert Jennings 		pa_curr = pa_curr->next;
61614b8a76bSRobert Jennings 	}
61714b8a76bSRobert Jennings 
61814b8a76bSRobert Jennings 	spin_unlock(&cmm_lock);
61914b8a76bSRobert Jennings 	cmm_dbg("Released %ld pages in the search range.\n", freed);
62014b8a76bSRobert Jennings 
62114b8a76bSRobert Jennings 	return 0;
62214b8a76bSRobert Jennings }
62314b8a76bSRobert Jennings 
62414b8a76bSRobert Jennings /**
62514b8a76bSRobert Jennings  * cmm_memory_cb - Handle memory hotplug notifier calls
62614b8a76bSRobert Jennings  * @self:	notifier block struct
62714b8a76bSRobert Jennings  * @action:	action to take
62814b8a76bSRobert Jennings  * @arg:	struct memory_notify data for handler
62914b8a76bSRobert Jennings  *
63014b8a76bSRobert Jennings  * Return value:
63114b8a76bSRobert Jennings  *	NOTIFY_OK or notifier error based on subfunction return value
63214b8a76bSRobert Jennings  *
63314b8a76bSRobert Jennings  **/
63414b8a76bSRobert Jennings static int cmm_memory_cb(struct notifier_block *self,
63514b8a76bSRobert Jennings 			unsigned long action, void *arg)
63614b8a76bSRobert Jennings {
63714b8a76bSRobert Jennings 	int ret = 0;
63814b8a76bSRobert Jennings 
63914b8a76bSRobert Jennings 	switch (action) {
64014b8a76bSRobert Jennings 	case MEM_GOING_OFFLINE:
64114b8a76bSRobert Jennings 		mutex_lock(&hotplug_mutex);
64214b8a76bSRobert Jennings 		hotplug_occurred = 1;
64314b8a76bSRobert Jennings 		ret = cmm_mem_going_offline(arg);
64414b8a76bSRobert Jennings 		break;
64514b8a76bSRobert Jennings 	case MEM_OFFLINE:
64614b8a76bSRobert Jennings 	case MEM_CANCEL_OFFLINE:
64714b8a76bSRobert Jennings 		mutex_unlock(&hotplug_mutex);
64814b8a76bSRobert Jennings 		cmm_dbg("Memory offline operation complete.\n");
64914b8a76bSRobert Jennings 		break;
65014b8a76bSRobert Jennings 	case MEM_GOING_ONLINE:
65114b8a76bSRobert Jennings 	case MEM_ONLINE:
65214b8a76bSRobert Jennings 	case MEM_CANCEL_ONLINE:
65314b8a76bSRobert Jennings 		break;
65414b8a76bSRobert Jennings 	}
65514b8a76bSRobert Jennings 
6567e26065dSPrarit Bhargava 	return notifier_from_errno(ret);
65714b8a76bSRobert Jennings }
65814b8a76bSRobert Jennings 
65914b8a76bSRobert Jennings static struct notifier_block cmm_mem_nb = {
66014b8a76bSRobert Jennings 	.notifier_call = cmm_memory_cb,
66114b8a76bSRobert Jennings 	.priority = CMM_MEM_HOTPLUG_PRI
66214b8a76bSRobert Jennings };
66314b8a76bSRobert Jennings 
66414b8a76bSRobert Jennings /**
66584af458bSBrian King  * cmm_init - Module initialization
66684af458bSBrian King  *
66784af458bSBrian King  * Return value:
66884af458bSBrian King  * 	0 on success / other on failure
66984af458bSBrian King  **/
67084af458bSBrian King static int cmm_init(void)
67184af458bSBrian King {
67284af458bSBrian King 	int rc = -ENOMEM;
67384af458bSBrian King 
67484af458bSBrian King 	if (!firmware_has_feature(FW_FEATURE_CMO))
67584af458bSBrian King 		return -EOPNOTSUPP;
67684af458bSBrian King 
67784af458bSBrian King 	if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
67884af458bSBrian King 		return rc;
67984af458bSBrian King 
680fecba962SBrian King 	if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
68184af458bSBrian King 		goto out_oom_notifier;
68284af458bSBrian King 
6836c9d2909SKay Sievers 	if ((rc = cmm_sysfs_register(&cmm_dev)))
684fecba962SBrian King 		goto out_reboot_notifier;
685fecba962SBrian King 
68614b8a76bSRobert Jennings 	if (register_memory_notifier(&cmm_mem_nb) ||
68714b8a76bSRobert Jennings 	    register_memory_isolate_notifier(&cmm_mem_isolate_nb))
68814b8a76bSRobert Jennings 		goto out_unregister_notifier;
68914b8a76bSRobert Jennings 
69084af458bSBrian King 	if (cmm_disabled)
69184af458bSBrian King 		return rc;
69284af458bSBrian King 
69384af458bSBrian King 	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
69484af458bSBrian King 	if (IS_ERR(cmm_thread_ptr)) {
69584af458bSBrian King 		rc = PTR_ERR(cmm_thread_ptr);
69614b8a76bSRobert Jennings 		goto out_unregister_notifier;
69784af458bSBrian King 	}
69884af458bSBrian King 
69984af458bSBrian King 	return rc;
70084af458bSBrian King 
70114b8a76bSRobert Jennings out_unregister_notifier:
70214b8a76bSRobert Jennings 	unregister_memory_notifier(&cmm_mem_nb);
70314b8a76bSRobert Jennings 	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
7046c9d2909SKay Sievers 	cmm_unregister_sysfs(&cmm_dev);
705fecba962SBrian King out_reboot_notifier:
706fecba962SBrian King 	unregister_reboot_notifier(&cmm_reboot_nb);
70784af458bSBrian King out_oom_notifier:
70884af458bSBrian King 	unregister_oom_notifier(&cmm_oom_nb);
70984af458bSBrian King 	return rc;
71084af458bSBrian King }
71184af458bSBrian King 
71284af458bSBrian King /**
71384af458bSBrian King  * cmm_exit - Module exit
71484af458bSBrian King  *
71584af458bSBrian King  * Return value:
71684af458bSBrian King  * 	nothing
71784af458bSBrian King  **/
71884af458bSBrian King static void cmm_exit(void)
71984af458bSBrian King {
72084af458bSBrian King 	if (cmm_thread_ptr)
72184af458bSBrian King 		kthread_stop(cmm_thread_ptr);
72284af458bSBrian King 	unregister_oom_notifier(&cmm_oom_nb);
723fecba962SBrian King 	unregister_reboot_notifier(&cmm_reboot_nb);
72414b8a76bSRobert Jennings 	unregister_memory_notifier(&cmm_mem_nb);
72514b8a76bSRobert Jennings 	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
72684af458bSBrian King 	cmm_free_pages(loaned_pages);
7276c9d2909SKay Sievers 	cmm_unregister_sysfs(&cmm_dev);
72884af458bSBrian King }
72984af458bSBrian King 
73084af458bSBrian King /**
73184af458bSBrian King  * cmm_set_disable - Disable/Enable CMM
73284af458bSBrian King  *
73384af458bSBrian King  * Return value:
73484af458bSBrian King  * 	0 on success / other on failure
73584af458bSBrian King  **/
736e4dca7b7SKees Cook static int cmm_set_disable(const char *val, const struct kernel_param *kp)
73784af458bSBrian King {
73884af458bSBrian King 	int disable = simple_strtoul(val, NULL, 10);
73984af458bSBrian King 
74084af458bSBrian King 	if (disable != 0 && disable != 1)
74184af458bSBrian King 		return -EINVAL;
74284af458bSBrian King 
74384af458bSBrian King 	if (disable && !cmm_disabled) {
74484af458bSBrian King 		if (cmm_thread_ptr)
74584af458bSBrian King 			kthread_stop(cmm_thread_ptr);
74684af458bSBrian King 		cmm_thread_ptr = NULL;
74784af458bSBrian King 		cmm_free_pages(loaned_pages);
74884af458bSBrian King 	} else if (!disable && cmm_disabled) {
74984af458bSBrian King 		cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
75084af458bSBrian King 		if (IS_ERR(cmm_thread_ptr))
75184af458bSBrian King 			return PTR_ERR(cmm_thread_ptr);
75284af458bSBrian King 	}
75384af458bSBrian King 
75484af458bSBrian King 	cmm_disabled = disable;
75584af458bSBrian King 	return 0;
75684af458bSBrian King }
75784af458bSBrian King 
75884af458bSBrian King module_param_call(disable, cmm_set_disable, param_get_uint,
75957ad583fSRussell Currey 		  &cmm_disabled, 0644);
76084af458bSBrian King MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
76184af458bSBrian King 		 "[Default=" __stringify(CMM_DISABLE) "]");
76284af458bSBrian King 
76384af458bSBrian King module_init(cmm_init);
76484af458bSBrian King module_exit(cmm_exit);
765