xref: /openbmc/linux/arch/powerpc/platforms/pseries/cmm.c (revision 14b8a76b9d53346f2871bf419da2aaf219940c50)
184af458bSBrian King /*
284af458bSBrian King  * Collaborative memory management interface.
384af458bSBrian King  *
484af458bSBrian King  * Copyright (C) 2008 IBM Corporation
584af458bSBrian King  * Author(s): Brian King (brking@linux.vnet.ibm.com),
684af458bSBrian King  *
784af458bSBrian King  * This program is free software; you can redistribute it and/or modify
884af458bSBrian King  * it under the terms of the GNU General Public License as published by
984af458bSBrian King  * the Free Software Foundation; either version 2 of the License, or
1084af458bSBrian King  * (at your option) any later version.
1184af458bSBrian King  *
1284af458bSBrian King  * This program is distributed in the hope that it will be useful,
1384af458bSBrian King  * but WITHOUT ANY WARRANTY; without even the implied warranty of
1484af458bSBrian King  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1584af458bSBrian King  * GNU General Public License for more details.
1684af458bSBrian King  *
1784af458bSBrian King  * You should have received a copy of the GNU General Public License
1884af458bSBrian King  * along with this program; if not, write to the Free Software
1984af458bSBrian King  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
2084af458bSBrian King  *
2184af458bSBrian King  */
2284af458bSBrian King 
2384af458bSBrian King #include <linux/ctype.h>
2484af458bSBrian King #include <linux/delay.h>
2584af458bSBrian King #include <linux/errno.h>
2684af458bSBrian King #include <linux/fs.h>
2784af458bSBrian King #include <linux/init.h>
2884af458bSBrian King #include <linux/kthread.h>
2984af458bSBrian King #include <linux/module.h>
3084af458bSBrian King #include <linux/oom.h>
31fecba962SBrian King #include <linux/reboot.h>
3284af458bSBrian King #include <linux/sched.h>
3384af458bSBrian King #include <linux/stringify.h>
3484af458bSBrian King #include <linux/swap.h>
3584af458bSBrian King #include <linux/sysdev.h>
3684af458bSBrian King #include <asm/firmware.h>
3784af458bSBrian King #include <asm/hvcall.h>
3884af458bSBrian King #include <asm/mmu.h>
3984af458bSBrian King #include <asm/pgalloc.h>
4084af458bSBrian King #include <asm/uaccess.h>
41*14b8a76bSRobert Jennings #include <linux/memory.h>
4284af458bSBrian King 
4384af458bSBrian King #include "plpar_wrappers.h"
4484af458bSBrian King 
4584af458bSBrian King #define CMM_DRIVER_VERSION	"1.0.0"
4684af458bSBrian King #define CMM_DEFAULT_DELAY	1
47*14b8a76bSRobert Jennings #define CMM_HOTPLUG_DELAY	5
4884af458bSBrian King #define CMM_DEBUG			0
4984af458bSBrian King #define CMM_DISABLE		0
5084af458bSBrian King #define CMM_OOM_KB		1024
5184af458bSBrian King #define CMM_MIN_MEM_MB		256
5284af458bSBrian King #define KB2PAGES(_p)		((_p)>>(PAGE_SHIFT-10))
5384af458bSBrian King #define PAGES2KB(_p)		((_p)<<(PAGE_SHIFT-10))
54*14b8a76bSRobert Jennings /*
55*14b8a76bSRobert Jennings  * The priority level tries to ensure that this notifier is called as
56*14b8a76bSRobert Jennings  * late as possible to reduce thrashing in the shared memory pool.
57*14b8a76bSRobert Jennings  */
58*14b8a76bSRobert Jennings #define CMM_MEM_HOTPLUG_PRI	1
59*14b8a76bSRobert Jennings #define CMM_MEM_ISOLATE_PRI	15
6084af458bSBrian King 
6184af458bSBrian King static unsigned int delay = CMM_DEFAULT_DELAY;
62*14b8a76bSRobert Jennings static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
6384af458bSBrian King static unsigned int oom_kb = CMM_OOM_KB;
6484af458bSBrian King static unsigned int cmm_debug = CMM_DEBUG;
6584af458bSBrian King static unsigned int cmm_disabled = CMM_DISABLE;
6684af458bSBrian King static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
6784af458bSBrian King static struct sys_device cmm_sysdev;
6884af458bSBrian King 
6984af458bSBrian King MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
7084af458bSBrian King MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
7184af458bSBrian King MODULE_LICENSE("GPL");
7284af458bSBrian King MODULE_VERSION(CMM_DRIVER_VERSION);
7384af458bSBrian King 
7484af458bSBrian King module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
7584af458bSBrian King MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
7684af458bSBrian King 		 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
77*14b8a76bSRobert Jennings module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR);
78*14b8a76bSRobert Jennings MODULE_PARM_DESC(delay, "Delay (in seconds) after memory hotplug remove "
79*14b8a76bSRobert Jennings 		 "before loaning resumes. "
80*14b8a76bSRobert Jennings 		 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
8184af458bSBrian King module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
8284af458bSBrian King MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
8384af458bSBrian King 		 "[Default=" __stringify(CMM_OOM_KB) "]");
8484af458bSBrian King module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
8584af458bSBrian King MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
8684af458bSBrian King 		 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
8784af458bSBrian King module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
8884af458bSBrian King MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
8984af458bSBrian King 		 "[Default=" __stringify(CMM_DEBUG) "]");
9084af458bSBrian King 
9184af458bSBrian King #define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
9284af458bSBrian King 
9384af458bSBrian King #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
9484af458bSBrian King 
9584af458bSBrian King struct cmm_page_array {
9684af458bSBrian King 	struct cmm_page_array *next;
9784af458bSBrian King 	unsigned long index;
9884af458bSBrian King 	unsigned long page[CMM_NR_PAGES];
9984af458bSBrian King };
10084af458bSBrian King 
10184af458bSBrian King static unsigned long loaned_pages;
10284af458bSBrian King static unsigned long loaned_pages_target;
10384af458bSBrian King static unsigned long oom_freed_pages;
10484af458bSBrian King 
10584af458bSBrian King static struct cmm_page_array *cmm_page_list;
10684af458bSBrian King static DEFINE_SPINLOCK(cmm_lock);
10784af458bSBrian King 
108*14b8a76bSRobert Jennings static DEFINE_MUTEX(hotplug_mutex);
109*14b8a76bSRobert Jennings static int hotplug_occurred; /* protected by the hotplug mutex */
110*14b8a76bSRobert Jennings 
11184af458bSBrian King static struct task_struct *cmm_thread_ptr;
11284af458bSBrian King 
11384af458bSBrian King /**
11484af458bSBrian King  * cmm_alloc_pages - Allocate pages and mark them as loaned
11584af458bSBrian King  * @nr:	number of pages to allocate
11684af458bSBrian King  *
11784af458bSBrian King  * Return value:
11884af458bSBrian King  * 	number of pages requested to be allocated which were not
11984af458bSBrian King  **/
12084af458bSBrian King static long cmm_alloc_pages(long nr)
12184af458bSBrian King {
12284af458bSBrian King 	struct cmm_page_array *pa, *npa;
12384af458bSBrian King 	unsigned long addr;
12484af458bSBrian King 	long rc;
12584af458bSBrian King 
12684af458bSBrian King 	cmm_dbg("Begin request for %ld pages\n", nr);
12784af458bSBrian King 
12884af458bSBrian King 	while (nr) {
129*14b8a76bSRobert Jennings 		/* Exit if a hotplug operation is in progress or occurred */
130*14b8a76bSRobert Jennings 		if (mutex_trylock(&hotplug_mutex)) {
131*14b8a76bSRobert Jennings 			if (hotplug_occurred) {
132*14b8a76bSRobert Jennings 				mutex_unlock(&hotplug_mutex);
133*14b8a76bSRobert Jennings 				break;
134*14b8a76bSRobert Jennings 			}
135*14b8a76bSRobert Jennings 			mutex_unlock(&hotplug_mutex);
136*14b8a76bSRobert Jennings 		} else {
137*14b8a76bSRobert Jennings 			break;
138*14b8a76bSRobert Jennings 		}
139*14b8a76bSRobert Jennings 
14084af458bSBrian King 		addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
14184af458bSBrian King 				       __GFP_NORETRY | __GFP_NOMEMALLOC);
14284af458bSBrian King 		if (!addr)
14384af458bSBrian King 			break;
14484af458bSBrian King 		spin_lock(&cmm_lock);
14584af458bSBrian King 		pa = cmm_page_list;
14684af458bSBrian King 		if (!pa || pa->index >= CMM_NR_PAGES) {
14784af458bSBrian King 			/* Need a new page for the page list. */
14884af458bSBrian King 			spin_unlock(&cmm_lock);
149*14b8a76bSRobert Jennings 			npa = (struct cmm_page_array *)__get_free_page(
150*14b8a76bSRobert Jennings 					GFP_NOIO | __GFP_NOWARN |
15184af458bSBrian King 					__GFP_NORETRY | __GFP_NOMEMALLOC);
15284af458bSBrian King 			if (!npa) {
1535df72bf3SHarvey Harrison 				pr_info("%s: Can not allocate new page list\n", __func__);
15484af458bSBrian King 				free_page(addr);
15584af458bSBrian King 				break;
15684af458bSBrian King 			}
15784af458bSBrian King 			spin_lock(&cmm_lock);
15884af458bSBrian King 			pa = cmm_page_list;
15984af458bSBrian King 
16084af458bSBrian King 			if (!pa || pa->index >= CMM_NR_PAGES) {
16184af458bSBrian King 				npa->next = pa;
16284af458bSBrian King 				npa->index = 0;
16384af458bSBrian King 				pa = npa;
16484af458bSBrian King 				cmm_page_list = pa;
16584af458bSBrian King 			} else
16684af458bSBrian King 				free_page((unsigned long) npa);
16784af458bSBrian King 		}
16884af458bSBrian King 
16984af458bSBrian King 		if ((rc = plpar_page_set_loaned(__pa(addr)))) {
1705df72bf3SHarvey Harrison 			pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
17184af458bSBrian King 			spin_unlock(&cmm_lock);
17284af458bSBrian King 			free_page(addr);
17384af458bSBrian King 			break;
17484af458bSBrian King 		}
17584af458bSBrian King 
17684af458bSBrian King 		pa->page[pa->index++] = addr;
17784af458bSBrian King 		loaned_pages++;
17884af458bSBrian King 		totalram_pages--;
17984af458bSBrian King 		spin_unlock(&cmm_lock);
18084af458bSBrian King 		nr--;
18184af458bSBrian King 	}
18284af458bSBrian King 
18384af458bSBrian King 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
18484af458bSBrian King 	return nr;
18584af458bSBrian King }
18684af458bSBrian King 
18784af458bSBrian King /**
18884af458bSBrian King  * cmm_free_pages - Free pages and mark them as active
18984af458bSBrian King  * @nr:	number of pages to free
19084af458bSBrian King  *
19184af458bSBrian King  * Return value:
19284af458bSBrian King  * 	number of pages requested to be freed which were not
19384af458bSBrian King  **/
19484af458bSBrian King static long cmm_free_pages(long nr)
19584af458bSBrian King {
19684af458bSBrian King 	struct cmm_page_array *pa;
19784af458bSBrian King 	unsigned long addr;
19884af458bSBrian King 
19984af458bSBrian King 	cmm_dbg("Begin free of %ld pages.\n", nr);
20084af458bSBrian King 	spin_lock(&cmm_lock);
20184af458bSBrian King 	pa = cmm_page_list;
20284af458bSBrian King 	while (nr) {
20384af458bSBrian King 		if (!pa || pa->index <= 0)
20484af458bSBrian King 			break;
20584af458bSBrian King 		addr = pa->page[--pa->index];
20684af458bSBrian King 
20784af458bSBrian King 		if (pa->index == 0) {
20884af458bSBrian King 			pa = pa->next;
20984af458bSBrian King 			free_page((unsigned long) cmm_page_list);
21084af458bSBrian King 			cmm_page_list = pa;
21184af458bSBrian King 		}
21284af458bSBrian King 
21384af458bSBrian King 		plpar_page_set_active(__pa(addr));
21484af458bSBrian King 		free_page(addr);
21584af458bSBrian King 		loaned_pages--;
21684af458bSBrian King 		nr--;
21784af458bSBrian King 		totalram_pages++;
21884af458bSBrian King 	}
21984af458bSBrian King 	spin_unlock(&cmm_lock);
22084af458bSBrian King 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
22184af458bSBrian King 	return nr;
22284af458bSBrian King }
22384af458bSBrian King 
22484af458bSBrian King /**
22584af458bSBrian King  * cmm_oom_notify - OOM notifier
22684af458bSBrian King  * @self:	notifier block struct
22784af458bSBrian King  * @dummy:	not used
22884af458bSBrian King  * @parm:	returned - number of pages freed
22984af458bSBrian King  *
23084af458bSBrian King  * Return value:
23184af458bSBrian King  * 	NOTIFY_OK
23284af458bSBrian King  **/
23384af458bSBrian King static int cmm_oom_notify(struct notifier_block *self,
23484af458bSBrian King 			  unsigned long dummy, void *parm)
23584af458bSBrian King {
23684af458bSBrian King 	unsigned long *freed = parm;
23784af458bSBrian King 	long nr = KB2PAGES(oom_kb);
23884af458bSBrian King 
23984af458bSBrian King 	cmm_dbg("OOM processing started\n");
24084af458bSBrian King 	nr = cmm_free_pages(nr);
24184af458bSBrian King 	loaned_pages_target = loaned_pages;
24284af458bSBrian King 	*freed += KB2PAGES(oom_kb) - nr;
24384af458bSBrian King 	oom_freed_pages += KB2PAGES(oom_kb) - nr;
24484af458bSBrian King 	cmm_dbg("OOM processing complete\n");
24584af458bSBrian King 	return NOTIFY_OK;
24684af458bSBrian King }
24784af458bSBrian King 
24884af458bSBrian King /**
24984af458bSBrian King  * cmm_get_mpp - Read memory performance parameters
25084af458bSBrian King  *
25184af458bSBrian King  * Makes hcall to query the current page loan request from the hypervisor.
25284af458bSBrian King  *
25384af458bSBrian King  * Return value:
25484af458bSBrian King  * 	nothing
25584af458bSBrian King  **/
25684af458bSBrian King static void cmm_get_mpp(void)
25784af458bSBrian King {
25884af458bSBrian King 	int rc;
25984af458bSBrian King 	struct hvcall_mpp_data mpp_data;
2608be8cf5bSBrian King 	signed long active_pages_target, page_loan_request, target;
2618be8cf5bSBrian King 	signed long total_pages = totalram_pages + loaned_pages;
2628be8cf5bSBrian King 	signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
26384af458bSBrian King 
26484af458bSBrian King 	rc = h_get_mpp(&mpp_data);
26584af458bSBrian King 
26684af458bSBrian King 	if (rc != H_SUCCESS)
26784af458bSBrian King 		return;
26884af458bSBrian King 
26984af458bSBrian King 	page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
2708be8cf5bSBrian King 	target = page_loan_request + (signed long)loaned_pages;
2718be8cf5bSBrian King 
2728be8cf5bSBrian King 	if (target < 0 || total_pages < min_mem_pages)
2738be8cf5bSBrian King 		target = 0;
2748be8cf5bSBrian King 
2758be8cf5bSBrian King 	if (target > oom_freed_pages)
2768be8cf5bSBrian King 		target -= oom_freed_pages;
27784af458bSBrian King 	else
2788be8cf5bSBrian King 		target = 0;
27984af458bSBrian King 
2808be8cf5bSBrian King 	active_pages_target = total_pages - target;
28184af458bSBrian King 
2828be8cf5bSBrian King 	if (min_mem_pages > active_pages_target)
2838be8cf5bSBrian King 		target = total_pages - min_mem_pages;
2848be8cf5bSBrian King 
2858be8cf5bSBrian King 	if (target < 0)
2868be8cf5bSBrian King 		target = 0;
2878be8cf5bSBrian King 
2888be8cf5bSBrian King 	loaned_pages_target = target;
28984af458bSBrian King 
29084af458bSBrian King 	cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
29184af458bSBrian King 		page_loan_request, loaned_pages, loaned_pages_target,
29284af458bSBrian King 		oom_freed_pages, totalram_pages);
29384af458bSBrian King }
29484af458bSBrian King 
29584af458bSBrian King static struct notifier_block cmm_oom_nb = {
29684af458bSBrian King 	.notifier_call = cmm_oom_notify
29784af458bSBrian King };
29884af458bSBrian King 
29984af458bSBrian King /**
30084af458bSBrian King  * cmm_thread - CMM task thread
30184af458bSBrian King  * @dummy:	not used
30284af458bSBrian King  *
30384af458bSBrian King  * Return value:
30484af458bSBrian King  * 	0
30584af458bSBrian King  **/
30684af458bSBrian King static int cmm_thread(void *dummy)
30784af458bSBrian King {
30884af458bSBrian King 	unsigned long timeleft;
30984af458bSBrian King 
31084af458bSBrian King 	while (1) {
31184af458bSBrian King 		timeleft = msleep_interruptible(delay * 1000);
31284af458bSBrian King 
313*14b8a76bSRobert Jennings 		if (kthread_should_stop() || timeleft)
31484af458bSBrian King 			break;
315*14b8a76bSRobert Jennings 
316*14b8a76bSRobert Jennings 		if (mutex_trylock(&hotplug_mutex)) {
317*14b8a76bSRobert Jennings 			if (hotplug_occurred) {
318*14b8a76bSRobert Jennings 				hotplug_occurred = 0;
319*14b8a76bSRobert Jennings 				mutex_unlock(&hotplug_mutex);
320*14b8a76bSRobert Jennings 				cmm_dbg("Hotplug operation has occurred, "
321*14b8a76bSRobert Jennings 						"loaning activity suspended "
322*14b8a76bSRobert Jennings 						"for %d seconds.\n",
323*14b8a76bSRobert Jennings 						hotplug_delay);
324*14b8a76bSRobert Jennings 				timeleft = msleep_interruptible(hotplug_delay *
325*14b8a76bSRobert Jennings 						1000);
326*14b8a76bSRobert Jennings 				if (kthread_should_stop() || timeleft)
327*14b8a76bSRobert Jennings 					break;
328*14b8a76bSRobert Jennings 				continue;
329*14b8a76bSRobert Jennings 			}
330*14b8a76bSRobert Jennings 			mutex_unlock(&hotplug_mutex);
331*14b8a76bSRobert Jennings 		} else {
332*14b8a76bSRobert Jennings 			cmm_dbg("Hotplug operation in progress, activity "
333*14b8a76bSRobert Jennings 					"suspended\n");
334*14b8a76bSRobert Jennings 			continue;
33584af458bSBrian King 		}
33684af458bSBrian King 
33784af458bSBrian King 		cmm_get_mpp();
33884af458bSBrian King 
33984af458bSBrian King 		if (loaned_pages_target > loaned_pages) {
34084af458bSBrian King 			if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
34184af458bSBrian King 				loaned_pages_target = loaned_pages;
34284af458bSBrian King 		} else if (loaned_pages_target < loaned_pages)
34384af458bSBrian King 			cmm_free_pages(loaned_pages - loaned_pages_target);
34484af458bSBrian King 	}
34584af458bSBrian King 	return 0;
34684af458bSBrian King }
34784af458bSBrian King 
34884af458bSBrian King #define CMM_SHOW(name, format, args...)			\
3493cee67f7SStephen Rothwell 	static ssize_t show_##name(struct sys_device *dev,	\
3503cee67f7SStephen Rothwell 				   struct sysdev_attribute *attr,	\
3513cee67f7SStephen Rothwell 				   char *buf)			\
35284af458bSBrian King 	{							\
35384af458bSBrian King 		return sprintf(buf, format, ##args);		\
35484af458bSBrian King 	}							\
35584af458bSBrian King 	static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
35684af458bSBrian King 
35784af458bSBrian King CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
35884af458bSBrian King CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
35984af458bSBrian King 
3603cee67f7SStephen Rothwell static ssize_t show_oom_pages(struct sys_device *dev,
3613cee67f7SStephen Rothwell 			      struct sysdev_attribute *attr, char *buf)
36284af458bSBrian King {
36384af458bSBrian King 	return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
36484af458bSBrian King }
36584af458bSBrian King 
36684af458bSBrian King static ssize_t store_oom_pages(struct sys_device *dev,
3673cee67f7SStephen Rothwell 			       struct sysdev_attribute *attr,
36884af458bSBrian King 			       const char *buf, size_t count)
36984af458bSBrian King {
37084af458bSBrian King 	unsigned long val = simple_strtoul (buf, NULL, 10);
37184af458bSBrian King 
37284af458bSBrian King 	if (!capable(CAP_SYS_ADMIN))
37384af458bSBrian King 		return -EPERM;
37484af458bSBrian King 	if (val != 0)
37584af458bSBrian King 		return -EBADMSG;
37684af458bSBrian King 
37784af458bSBrian King 	oom_freed_pages = 0;
37884af458bSBrian King 	return count;
37984af458bSBrian King }
38084af458bSBrian King 
38184af458bSBrian King static SYSDEV_ATTR(oom_freed_kb, S_IWUSR| S_IRUGO,
38284af458bSBrian King 		   show_oom_pages, store_oom_pages);
38384af458bSBrian King 
38484af458bSBrian King static struct sysdev_attribute *cmm_attrs[] = {
38584af458bSBrian King 	&attr_loaned_kb,
38684af458bSBrian King 	&attr_loaned_target_kb,
38784af458bSBrian King 	&attr_oom_freed_kb,
38884af458bSBrian King };
38984af458bSBrian King 
39084af458bSBrian King static struct sysdev_class cmm_sysdev_class = {
39184af458bSBrian King 	.name = "cmm",
39284af458bSBrian King };
39384af458bSBrian King 
39484af458bSBrian King /**
39584af458bSBrian King  * cmm_sysfs_register - Register with sysfs
39684af458bSBrian King  *
39784af458bSBrian King  * Return value:
39884af458bSBrian King  * 	0 on success / other on failure
39984af458bSBrian King  **/
40084af458bSBrian King static int cmm_sysfs_register(struct sys_device *sysdev)
40184af458bSBrian King {
40284af458bSBrian King 	int i, rc;
40384af458bSBrian King 
40484af458bSBrian King 	if ((rc = sysdev_class_register(&cmm_sysdev_class)))
40584af458bSBrian King 		return rc;
40684af458bSBrian King 
40784af458bSBrian King 	sysdev->id = 0;
40884af458bSBrian King 	sysdev->cls = &cmm_sysdev_class;
40984af458bSBrian King 
41084af458bSBrian King 	if ((rc = sysdev_register(sysdev)))
41184af458bSBrian King 		goto class_unregister;
41284af458bSBrian King 
41384af458bSBrian King 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
41484af458bSBrian King 		if ((rc = sysdev_create_file(sysdev, cmm_attrs[i])))
41584af458bSBrian King 			goto fail;
41684af458bSBrian King 	}
41784af458bSBrian King 
41884af458bSBrian King 	return 0;
41984af458bSBrian King 
42084af458bSBrian King fail:
42184af458bSBrian King 	while (--i >= 0)
42284af458bSBrian King 		sysdev_remove_file(sysdev, cmm_attrs[i]);
42384af458bSBrian King 	sysdev_unregister(sysdev);
42484af458bSBrian King class_unregister:
42584af458bSBrian King 	sysdev_class_unregister(&cmm_sysdev_class);
42684af458bSBrian King 	return rc;
42784af458bSBrian King }
42884af458bSBrian King 
42984af458bSBrian King /**
43084af458bSBrian King  * cmm_unregister_sysfs - Unregister from sysfs
43184af458bSBrian King  *
43284af458bSBrian King  **/
43384af458bSBrian King static void cmm_unregister_sysfs(struct sys_device *sysdev)
43484af458bSBrian King {
43584af458bSBrian King 	int i;
43684af458bSBrian King 
43784af458bSBrian King 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
43884af458bSBrian King 		sysdev_remove_file(sysdev, cmm_attrs[i]);
43984af458bSBrian King 	sysdev_unregister(sysdev);
44084af458bSBrian King 	sysdev_class_unregister(&cmm_sysdev_class);
44184af458bSBrian King }
44284af458bSBrian King 
44384af458bSBrian King /**
444fecba962SBrian King  * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
445fecba962SBrian King  *
446fecba962SBrian King  **/
447fecba962SBrian King static int cmm_reboot_notifier(struct notifier_block *nb,
448fecba962SBrian King 			       unsigned long action, void *unused)
449fecba962SBrian King {
450fecba962SBrian King 	if (action == SYS_RESTART) {
451fecba962SBrian King 		if (cmm_thread_ptr)
452fecba962SBrian King 			kthread_stop(cmm_thread_ptr);
453fecba962SBrian King 		cmm_thread_ptr = NULL;
454fecba962SBrian King 		cmm_free_pages(loaned_pages);
455fecba962SBrian King 	}
456fecba962SBrian King 	return NOTIFY_DONE;
457fecba962SBrian King }
458fecba962SBrian King 
459fecba962SBrian King static struct notifier_block cmm_reboot_nb = {
460fecba962SBrian King 	.notifier_call = cmm_reboot_notifier,
461fecba962SBrian King };
462fecba962SBrian King 
463fecba962SBrian King /**
464*14b8a76bSRobert Jennings  * cmm_count_pages - Count the number of pages loaned in a particular range.
465*14b8a76bSRobert Jennings  *
466*14b8a76bSRobert Jennings  * @arg: memory_isolate_notify structure with address range and count
467*14b8a76bSRobert Jennings  *
468*14b8a76bSRobert Jennings  * Return value:
469*14b8a76bSRobert Jennings  *      0 on success
470*14b8a76bSRobert Jennings  **/
471*14b8a76bSRobert Jennings static unsigned long cmm_count_pages(void *arg)
472*14b8a76bSRobert Jennings {
473*14b8a76bSRobert Jennings 	struct memory_isolate_notify *marg = arg;
474*14b8a76bSRobert Jennings 	struct cmm_page_array *pa;
475*14b8a76bSRobert Jennings 	unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
476*14b8a76bSRobert Jennings 	unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
477*14b8a76bSRobert Jennings 	unsigned long idx;
478*14b8a76bSRobert Jennings 
479*14b8a76bSRobert Jennings 	spin_lock(&cmm_lock);
480*14b8a76bSRobert Jennings 	pa = cmm_page_list;
481*14b8a76bSRobert Jennings 	while (pa) {
482*14b8a76bSRobert Jennings 		if ((unsigned long)pa >= start && (unsigned long)pa < end)
483*14b8a76bSRobert Jennings 			marg->pages_found++;
484*14b8a76bSRobert Jennings 		for (idx = 0; idx < pa->index; idx++)
485*14b8a76bSRobert Jennings 			if (pa->page[idx] >= start && pa->page[idx] < end)
486*14b8a76bSRobert Jennings 				marg->pages_found++;
487*14b8a76bSRobert Jennings 		pa = pa->next;
488*14b8a76bSRobert Jennings 	}
489*14b8a76bSRobert Jennings 	spin_unlock(&cmm_lock);
490*14b8a76bSRobert Jennings 	return 0;
491*14b8a76bSRobert Jennings }
492*14b8a76bSRobert Jennings 
493*14b8a76bSRobert Jennings /**
494*14b8a76bSRobert Jennings  * cmm_memory_isolate_cb - Handle memory isolation notifier calls
495*14b8a76bSRobert Jennings  * @self:	notifier block struct
496*14b8a76bSRobert Jennings  * @action:	action to take
497*14b8a76bSRobert Jennings  * @arg:	struct memory_isolate_notify data for handler
498*14b8a76bSRobert Jennings  *
499*14b8a76bSRobert Jennings  * Return value:
500*14b8a76bSRobert Jennings  *	NOTIFY_OK or notifier error based on subfunction return value
501*14b8a76bSRobert Jennings  **/
502*14b8a76bSRobert Jennings static int cmm_memory_isolate_cb(struct notifier_block *self,
503*14b8a76bSRobert Jennings 				 unsigned long action, void *arg)
504*14b8a76bSRobert Jennings {
505*14b8a76bSRobert Jennings 	int ret = 0;
506*14b8a76bSRobert Jennings 
507*14b8a76bSRobert Jennings 	if (action == MEM_ISOLATE_COUNT)
508*14b8a76bSRobert Jennings 		ret = cmm_count_pages(arg);
509*14b8a76bSRobert Jennings 
510*14b8a76bSRobert Jennings 	if (ret)
511*14b8a76bSRobert Jennings 		ret = notifier_from_errno(ret);
512*14b8a76bSRobert Jennings 	else
513*14b8a76bSRobert Jennings 		ret = NOTIFY_OK;
514*14b8a76bSRobert Jennings 
515*14b8a76bSRobert Jennings 	return ret;
516*14b8a76bSRobert Jennings }
517*14b8a76bSRobert Jennings 
518*14b8a76bSRobert Jennings static struct notifier_block cmm_mem_isolate_nb = {
519*14b8a76bSRobert Jennings 	.notifier_call = cmm_memory_isolate_cb,
520*14b8a76bSRobert Jennings 	.priority = CMM_MEM_ISOLATE_PRI
521*14b8a76bSRobert Jennings };
522*14b8a76bSRobert Jennings 
523*14b8a76bSRobert Jennings /**
524*14b8a76bSRobert Jennings  * cmm_mem_going_offline - Unloan pages where memory is to be removed
525*14b8a76bSRobert Jennings  * @arg: memory_notify structure with page range to be offlined
526*14b8a76bSRobert Jennings  *
527*14b8a76bSRobert Jennings  * Return value:
528*14b8a76bSRobert Jennings  *	0 on success
529*14b8a76bSRobert Jennings  **/
530*14b8a76bSRobert Jennings static int cmm_mem_going_offline(void *arg)
531*14b8a76bSRobert Jennings {
532*14b8a76bSRobert Jennings 	struct memory_notify *marg = arg;
533*14b8a76bSRobert Jennings 	unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
534*14b8a76bSRobert Jennings 	unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
535*14b8a76bSRobert Jennings 	struct cmm_page_array *pa_curr, *pa_last, *npa;
536*14b8a76bSRobert Jennings 	unsigned long idx;
537*14b8a76bSRobert Jennings 	unsigned long freed = 0;
538*14b8a76bSRobert Jennings 
539*14b8a76bSRobert Jennings 	cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
540*14b8a76bSRobert Jennings 			start_page, marg->nr_pages);
541*14b8a76bSRobert Jennings 	spin_lock(&cmm_lock);
542*14b8a76bSRobert Jennings 
543*14b8a76bSRobert Jennings 	/* Search the page list for pages in the range to be offlined */
544*14b8a76bSRobert Jennings 	pa_last = pa_curr = cmm_page_list;
545*14b8a76bSRobert Jennings 	while (pa_curr) {
546*14b8a76bSRobert Jennings 		for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
547*14b8a76bSRobert Jennings 			if ((pa_curr->page[idx] < start_page) ||
548*14b8a76bSRobert Jennings 			    (pa_curr->page[idx] >= end_page))
549*14b8a76bSRobert Jennings 				continue;
550*14b8a76bSRobert Jennings 
551*14b8a76bSRobert Jennings 			plpar_page_set_active(__pa(pa_curr->page[idx]));
552*14b8a76bSRobert Jennings 			free_page(pa_curr->page[idx]);
553*14b8a76bSRobert Jennings 			freed++;
554*14b8a76bSRobert Jennings 			loaned_pages--;
555*14b8a76bSRobert Jennings 			totalram_pages++;
556*14b8a76bSRobert Jennings 			pa_curr->page[idx] = pa_last->page[--pa_last->index];
557*14b8a76bSRobert Jennings 			if (pa_last->index == 0) {
558*14b8a76bSRobert Jennings 				if (pa_curr == pa_last)
559*14b8a76bSRobert Jennings 					pa_curr = pa_last->next;
560*14b8a76bSRobert Jennings 				pa_last = pa_last->next;
561*14b8a76bSRobert Jennings 				free_page((unsigned long)cmm_page_list);
562*14b8a76bSRobert Jennings 				cmm_page_list = pa_last;
563*14b8a76bSRobert Jennings 				continue;
564*14b8a76bSRobert Jennings 			}
565*14b8a76bSRobert Jennings 		}
566*14b8a76bSRobert Jennings 		pa_curr = pa_curr->next;
567*14b8a76bSRobert Jennings 	}
568*14b8a76bSRobert Jennings 
569*14b8a76bSRobert Jennings 	/* Search for page list structures in the range to be offlined */
570*14b8a76bSRobert Jennings 	pa_last = NULL;
571*14b8a76bSRobert Jennings 	pa_curr = cmm_page_list;
572*14b8a76bSRobert Jennings 	while (pa_curr) {
573*14b8a76bSRobert Jennings 		if (((unsigned long)pa_curr >= start_page) &&
574*14b8a76bSRobert Jennings 				((unsigned long)pa_curr < end_page)) {
575*14b8a76bSRobert Jennings 			npa = (struct cmm_page_array *)__get_free_page(
576*14b8a76bSRobert Jennings 					GFP_NOIO | __GFP_NOWARN |
577*14b8a76bSRobert Jennings 					__GFP_NORETRY | __GFP_NOMEMALLOC);
578*14b8a76bSRobert Jennings 			if (!npa) {
579*14b8a76bSRobert Jennings 				spin_unlock(&cmm_lock);
580*14b8a76bSRobert Jennings 				cmm_dbg("Failed to allocate memory for list "
581*14b8a76bSRobert Jennings 						"management. Memory hotplug "
582*14b8a76bSRobert Jennings 						"failed.\n");
583*14b8a76bSRobert Jennings 				return ENOMEM;
584*14b8a76bSRobert Jennings 			}
585*14b8a76bSRobert Jennings 			memcpy(npa, pa_curr, PAGE_SIZE);
586*14b8a76bSRobert Jennings 			if (pa_curr == cmm_page_list)
587*14b8a76bSRobert Jennings 				cmm_page_list = npa;
588*14b8a76bSRobert Jennings 			if (pa_last)
589*14b8a76bSRobert Jennings 				pa_last->next = npa;
590*14b8a76bSRobert Jennings 			free_page((unsigned long) pa_curr);
591*14b8a76bSRobert Jennings 			freed++;
592*14b8a76bSRobert Jennings 			pa_curr = npa;
593*14b8a76bSRobert Jennings 		}
594*14b8a76bSRobert Jennings 
595*14b8a76bSRobert Jennings 		pa_last = pa_curr;
596*14b8a76bSRobert Jennings 		pa_curr = pa_curr->next;
597*14b8a76bSRobert Jennings 	}
598*14b8a76bSRobert Jennings 
599*14b8a76bSRobert Jennings 	spin_unlock(&cmm_lock);
600*14b8a76bSRobert Jennings 	cmm_dbg("Released %ld pages in the search range.\n", freed);
601*14b8a76bSRobert Jennings 
602*14b8a76bSRobert Jennings 	return 0;
603*14b8a76bSRobert Jennings }
604*14b8a76bSRobert Jennings 
605*14b8a76bSRobert Jennings /**
606*14b8a76bSRobert Jennings  * cmm_memory_cb - Handle memory hotplug notifier calls
607*14b8a76bSRobert Jennings  * @self:	notifier block struct
608*14b8a76bSRobert Jennings  * @action:	action to take
609*14b8a76bSRobert Jennings  * @arg:	struct memory_notify data for handler
610*14b8a76bSRobert Jennings  *
611*14b8a76bSRobert Jennings  * Return value:
612*14b8a76bSRobert Jennings  *	NOTIFY_OK or notifier error based on subfunction return value
613*14b8a76bSRobert Jennings  *
614*14b8a76bSRobert Jennings  **/
615*14b8a76bSRobert Jennings static int cmm_memory_cb(struct notifier_block *self,
616*14b8a76bSRobert Jennings 			unsigned long action, void *arg)
617*14b8a76bSRobert Jennings {
618*14b8a76bSRobert Jennings 	int ret = 0;
619*14b8a76bSRobert Jennings 
620*14b8a76bSRobert Jennings 	switch (action) {
621*14b8a76bSRobert Jennings 	case MEM_GOING_OFFLINE:
622*14b8a76bSRobert Jennings 		mutex_lock(&hotplug_mutex);
623*14b8a76bSRobert Jennings 		hotplug_occurred = 1;
624*14b8a76bSRobert Jennings 		ret = cmm_mem_going_offline(arg);
625*14b8a76bSRobert Jennings 		break;
626*14b8a76bSRobert Jennings 	case MEM_OFFLINE:
627*14b8a76bSRobert Jennings 	case MEM_CANCEL_OFFLINE:
628*14b8a76bSRobert Jennings 		mutex_unlock(&hotplug_mutex);
629*14b8a76bSRobert Jennings 		cmm_dbg("Memory offline operation complete.\n");
630*14b8a76bSRobert Jennings 		break;
631*14b8a76bSRobert Jennings 	case MEM_GOING_ONLINE:
632*14b8a76bSRobert Jennings 	case MEM_ONLINE:
633*14b8a76bSRobert Jennings 	case MEM_CANCEL_ONLINE:
634*14b8a76bSRobert Jennings 		break;
635*14b8a76bSRobert Jennings 	}
636*14b8a76bSRobert Jennings 
637*14b8a76bSRobert Jennings 	if (ret)
638*14b8a76bSRobert Jennings 		ret = notifier_from_errno(ret);
639*14b8a76bSRobert Jennings 	else
640*14b8a76bSRobert Jennings 		ret = NOTIFY_OK;
641*14b8a76bSRobert Jennings 
642*14b8a76bSRobert Jennings 	return ret;
643*14b8a76bSRobert Jennings }
644*14b8a76bSRobert Jennings 
645*14b8a76bSRobert Jennings static struct notifier_block cmm_mem_nb = {
646*14b8a76bSRobert Jennings 	.notifier_call = cmm_memory_cb,
647*14b8a76bSRobert Jennings 	.priority = CMM_MEM_HOTPLUG_PRI
648*14b8a76bSRobert Jennings };
649*14b8a76bSRobert Jennings 
650*14b8a76bSRobert Jennings /**
65184af458bSBrian King  * cmm_init - Module initialization
65284af458bSBrian King  *
65384af458bSBrian King  * Return value:
65484af458bSBrian King  * 	0 on success / other on failure
65584af458bSBrian King  **/
65684af458bSBrian King static int cmm_init(void)
65784af458bSBrian King {
65884af458bSBrian King 	int rc = -ENOMEM;
65984af458bSBrian King 
66084af458bSBrian King 	if (!firmware_has_feature(FW_FEATURE_CMO))
66184af458bSBrian King 		return -EOPNOTSUPP;
66284af458bSBrian King 
66384af458bSBrian King 	if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
66484af458bSBrian King 		return rc;
66584af458bSBrian King 
666fecba962SBrian King 	if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
66784af458bSBrian King 		goto out_oom_notifier;
66884af458bSBrian King 
669fecba962SBrian King 	if ((rc = cmm_sysfs_register(&cmm_sysdev)))
670fecba962SBrian King 		goto out_reboot_notifier;
671fecba962SBrian King 
672*14b8a76bSRobert Jennings 	if (register_memory_notifier(&cmm_mem_nb) ||
673*14b8a76bSRobert Jennings 	    register_memory_isolate_notifier(&cmm_mem_isolate_nb))
674*14b8a76bSRobert Jennings 		goto out_unregister_notifier;
675*14b8a76bSRobert Jennings 
67684af458bSBrian King 	if (cmm_disabled)
67784af458bSBrian King 		return rc;
67884af458bSBrian King 
67984af458bSBrian King 	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
68084af458bSBrian King 	if (IS_ERR(cmm_thread_ptr)) {
68184af458bSBrian King 		rc = PTR_ERR(cmm_thread_ptr);
682*14b8a76bSRobert Jennings 		goto out_unregister_notifier;
68384af458bSBrian King 	}
68484af458bSBrian King 
68584af458bSBrian King 	return rc;
68684af458bSBrian King 
687*14b8a76bSRobert Jennings out_unregister_notifier:
688*14b8a76bSRobert Jennings 	unregister_memory_notifier(&cmm_mem_nb);
689*14b8a76bSRobert Jennings 	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
69084af458bSBrian King 	cmm_unregister_sysfs(&cmm_sysdev);
691fecba962SBrian King out_reboot_notifier:
692fecba962SBrian King 	unregister_reboot_notifier(&cmm_reboot_nb);
69384af458bSBrian King out_oom_notifier:
69484af458bSBrian King 	unregister_oom_notifier(&cmm_oom_nb);
69584af458bSBrian King 	return rc;
69684af458bSBrian King }
69784af458bSBrian King 
69884af458bSBrian King /**
69984af458bSBrian King  * cmm_exit - Module exit
70084af458bSBrian King  *
70184af458bSBrian King  * Return value:
70284af458bSBrian King  * 	nothing
70384af458bSBrian King  **/
70484af458bSBrian King static void cmm_exit(void)
70584af458bSBrian King {
70684af458bSBrian King 	if (cmm_thread_ptr)
70784af458bSBrian King 		kthread_stop(cmm_thread_ptr);
70884af458bSBrian King 	unregister_oom_notifier(&cmm_oom_nb);
709fecba962SBrian King 	unregister_reboot_notifier(&cmm_reboot_nb);
710*14b8a76bSRobert Jennings 	unregister_memory_notifier(&cmm_mem_nb);
711*14b8a76bSRobert Jennings 	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
71284af458bSBrian King 	cmm_free_pages(loaned_pages);
71384af458bSBrian King 	cmm_unregister_sysfs(&cmm_sysdev);
71484af458bSBrian King }
71584af458bSBrian King 
71684af458bSBrian King /**
71784af458bSBrian King  * cmm_set_disable - Disable/Enable CMM
71884af458bSBrian King  *
71984af458bSBrian King  * Return value:
72084af458bSBrian King  * 	0 on success / other on failure
72184af458bSBrian King  **/
72284af458bSBrian King static int cmm_set_disable(const char *val, struct kernel_param *kp)
72384af458bSBrian King {
72484af458bSBrian King 	int disable = simple_strtoul(val, NULL, 10);
72584af458bSBrian King 
72684af458bSBrian King 	if (disable != 0 && disable != 1)
72784af458bSBrian King 		return -EINVAL;
72884af458bSBrian King 
72984af458bSBrian King 	if (disable && !cmm_disabled) {
73084af458bSBrian King 		if (cmm_thread_ptr)
73184af458bSBrian King 			kthread_stop(cmm_thread_ptr);
73284af458bSBrian King 		cmm_thread_ptr = NULL;
73384af458bSBrian King 		cmm_free_pages(loaned_pages);
73484af458bSBrian King 	} else if (!disable && cmm_disabled) {
73584af458bSBrian King 		cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
73684af458bSBrian King 		if (IS_ERR(cmm_thread_ptr))
73784af458bSBrian King 			return PTR_ERR(cmm_thread_ptr);
73884af458bSBrian King 	}
73984af458bSBrian King 
74084af458bSBrian King 	cmm_disabled = disable;
74184af458bSBrian King 	return 0;
74284af458bSBrian King }
74384af458bSBrian King 
74484af458bSBrian King module_param_call(disable, cmm_set_disable, param_get_uint,
74584af458bSBrian King 		  &cmm_disabled, S_IRUGO | S_IWUSR);
74684af458bSBrian King MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
74784af458bSBrian King 		 "[Default=" __stringify(CMM_DISABLE) "]");
74884af458bSBrian King 
74984af458bSBrian King module_init(cmm_init);
75084af458bSBrian King module_exit(cmm_exit);
751