xref: /openbmc/linux/arch/powerpc/platforms/pseries/cmm.c (revision 5a0e3ad6af8660be21ca98a971cd00f331318c05)
184af458bSBrian King /*
284af458bSBrian King  * Collaborative memory management interface.
384af458bSBrian King  *
484af458bSBrian King  * Copyright (C) 2008 IBM Corporation
584af458bSBrian King  * Author(s): Brian King (brking@linux.vnet.ibm.com),
684af458bSBrian King  *
784af458bSBrian King  * This program is free software; you can redistribute it and/or modify
884af458bSBrian King  * it under the terms of the GNU General Public License as published by
984af458bSBrian King  * the Free Software Foundation; either version 2 of the License, or
1084af458bSBrian King  * (at your option) any later version.
1184af458bSBrian King  *
1284af458bSBrian King  * This program is distributed in the hope that it will be useful,
1384af458bSBrian King  * but WITHOUT ANY WARRANTY; without even the implied warranty of
1484af458bSBrian King  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1584af458bSBrian King  * GNU General Public License for more details.
1684af458bSBrian King  *
1784af458bSBrian King  * You should have received a copy of the GNU General Public License
1884af458bSBrian King  * along with this program; if not, write to the Free Software
1984af458bSBrian King  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
2084af458bSBrian King  *
2184af458bSBrian King  */
2284af458bSBrian King 
2384af458bSBrian King #include <linux/ctype.h>
2484af458bSBrian King #include <linux/delay.h>
2584af458bSBrian King #include <linux/errno.h>
2684af458bSBrian King #include <linux/fs.h>
27*5a0e3ad6STejun Heo #include <linux/gfp.h>
2884af458bSBrian King #include <linux/init.h>
2984af458bSBrian King #include <linux/kthread.h>
3084af458bSBrian King #include <linux/module.h>
3184af458bSBrian King #include <linux/oom.h>
32fecba962SBrian King #include <linux/reboot.h>
3384af458bSBrian King #include <linux/sched.h>
3484af458bSBrian King #include <linux/stringify.h>
3584af458bSBrian King #include <linux/swap.h>
3684af458bSBrian King #include <linux/sysdev.h>
3784af458bSBrian King #include <asm/firmware.h>
3884af458bSBrian King #include <asm/hvcall.h>
3984af458bSBrian King #include <asm/mmu.h>
4084af458bSBrian King #include <asm/pgalloc.h>
4184af458bSBrian King #include <asm/uaccess.h>
4214b8a76bSRobert Jennings #include <linux/memory.h>
4384af458bSBrian King 
4484af458bSBrian King #include "plpar_wrappers.h"
4584af458bSBrian King 
4684af458bSBrian King #define CMM_DRIVER_VERSION	"1.0.0"
4784af458bSBrian King #define CMM_DEFAULT_DELAY	1
4814b8a76bSRobert Jennings #define CMM_HOTPLUG_DELAY	5
4984af458bSBrian King #define CMM_DEBUG			0
5084af458bSBrian King #define CMM_DISABLE		0
5184af458bSBrian King #define CMM_OOM_KB		1024
5284af458bSBrian King #define CMM_MIN_MEM_MB		256
5384af458bSBrian King #define KB2PAGES(_p)		((_p)>>(PAGE_SHIFT-10))
5484af458bSBrian King #define PAGES2KB(_p)		((_p)<<(PAGE_SHIFT-10))
5514b8a76bSRobert Jennings /*
5614b8a76bSRobert Jennings  * The priority level tries to ensure that this notifier is called as
5714b8a76bSRobert Jennings  * late as possible to reduce thrashing in the shared memory pool.
5814b8a76bSRobert Jennings  */
5914b8a76bSRobert Jennings #define CMM_MEM_HOTPLUG_PRI	1
6014b8a76bSRobert Jennings #define CMM_MEM_ISOLATE_PRI	15
6184af458bSBrian King 
6284af458bSBrian King static unsigned int delay = CMM_DEFAULT_DELAY;
6314b8a76bSRobert Jennings static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
6484af458bSBrian King static unsigned int oom_kb = CMM_OOM_KB;
6584af458bSBrian King static unsigned int cmm_debug = CMM_DEBUG;
6684af458bSBrian King static unsigned int cmm_disabled = CMM_DISABLE;
6784af458bSBrian King static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
6884af458bSBrian King static struct sys_device cmm_sysdev;
6984af458bSBrian King 
7084af458bSBrian King MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
7184af458bSBrian King MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
7284af458bSBrian King MODULE_LICENSE("GPL");
7384af458bSBrian King MODULE_VERSION(CMM_DRIVER_VERSION);
7484af458bSBrian King 
7584af458bSBrian King module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
7684af458bSBrian King MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
7784af458bSBrian King 		 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
7814b8a76bSRobert Jennings module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR);
7914b8a76bSRobert Jennings MODULE_PARM_DESC(delay, "Delay (in seconds) after memory hotplug remove "
8014b8a76bSRobert Jennings 		 "before loaning resumes. "
8114b8a76bSRobert Jennings 		 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
8284af458bSBrian King module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
8384af458bSBrian King MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
8484af458bSBrian King 		 "[Default=" __stringify(CMM_OOM_KB) "]");
8584af458bSBrian King module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
8684af458bSBrian King MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
8784af458bSBrian King 		 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
8884af458bSBrian King module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
8984af458bSBrian King MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
9084af458bSBrian King 		 "[Default=" __stringify(CMM_DEBUG) "]");
9184af458bSBrian King 
9284af458bSBrian King #define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
9384af458bSBrian King 
9484af458bSBrian King #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
9584af458bSBrian King 
9684af458bSBrian King struct cmm_page_array {
9784af458bSBrian King 	struct cmm_page_array *next;
9884af458bSBrian King 	unsigned long index;
9984af458bSBrian King 	unsigned long page[CMM_NR_PAGES];
10084af458bSBrian King };
10184af458bSBrian King 
10284af458bSBrian King static unsigned long loaned_pages;
10384af458bSBrian King static unsigned long loaned_pages_target;
10484af458bSBrian King static unsigned long oom_freed_pages;
10584af458bSBrian King 
10684af458bSBrian King static struct cmm_page_array *cmm_page_list;
10784af458bSBrian King static DEFINE_SPINLOCK(cmm_lock);
10884af458bSBrian King 
10914b8a76bSRobert Jennings static DEFINE_MUTEX(hotplug_mutex);
11014b8a76bSRobert Jennings static int hotplug_occurred; /* protected by the hotplug mutex */
11114b8a76bSRobert Jennings 
11284af458bSBrian King static struct task_struct *cmm_thread_ptr;
11384af458bSBrian King 
11484af458bSBrian King /**
11584af458bSBrian King  * cmm_alloc_pages - Allocate pages and mark them as loaned
11684af458bSBrian King  * @nr:	number of pages to allocate
11784af458bSBrian King  *
11884af458bSBrian King  * Return value:
11984af458bSBrian King  * 	number of pages requested to be allocated which were not
12084af458bSBrian King  **/
12184af458bSBrian King static long cmm_alloc_pages(long nr)
12284af458bSBrian King {
12384af458bSBrian King 	struct cmm_page_array *pa, *npa;
12484af458bSBrian King 	unsigned long addr;
12584af458bSBrian King 	long rc;
12684af458bSBrian King 
12784af458bSBrian King 	cmm_dbg("Begin request for %ld pages\n", nr);
12884af458bSBrian King 
12984af458bSBrian King 	while (nr) {
13014b8a76bSRobert Jennings 		/* Exit if a hotplug operation is in progress or occurred */
13114b8a76bSRobert Jennings 		if (mutex_trylock(&hotplug_mutex)) {
13214b8a76bSRobert Jennings 			if (hotplug_occurred) {
13314b8a76bSRobert Jennings 				mutex_unlock(&hotplug_mutex);
13414b8a76bSRobert Jennings 				break;
13514b8a76bSRobert Jennings 			}
13614b8a76bSRobert Jennings 			mutex_unlock(&hotplug_mutex);
13714b8a76bSRobert Jennings 		} else {
13814b8a76bSRobert Jennings 			break;
13914b8a76bSRobert Jennings 		}
14014b8a76bSRobert Jennings 
14184af458bSBrian King 		addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
14284af458bSBrian King 				       __GFP_NORETRY | __GFP_NOMEMALLOC);
14384af458bSBrian King 		if (!addr)
14484af458bSBrian King 			break;
14584af458bSBrian King 		spin_lock(&cmm_lock);
14684af458bSBrian King 		pa = cmm_page_list;
14784af458bSBrian King 		if (!pa || pa->index >= CMM_NR_PAGES) {
14884af458bSBrian King 			/* Need a new page for the page list. */
14984af458bSBrian King 			spin_unlock(&cmm_lock);
15014b8a76bSRobert Jennings 			npa = (struct cmm_page_array *)__get_free_page(
15114b8a76bSRobert Jennings 					GFP_NOIO | __GFP_NOWARN |
15284af458bSBrian King 					__GFP_NORETRY | __GFP_NOMEMALLOC);
15384af458bSBrian King 			if (!npa) {
1545df72bf3SHarvey Harrison 				pr_info("%s: Can not allocate new page list\n", __func__);
15584af458bSBrian King 				free_page(addr);
15684af458bSBrian King 				break;
15784af458bSBrian King 			}
15884af458bSBrian King 			spin_lock(&cmm_lock);
15984af458bSBrian King 			pa = cmm_page_list;
16084af458bSBrian King 
16184af458bSBrian King 			if (!pa || pa->index >= CMM_NR_PAGES) {
16284af458bSBrian King 				npa->next = pa;
16384af458bSBrian King 				npa->index = 0;
16484af458bSBrian King 				pa = npa;
16584af458bSBrian King 				cmm_page_list = pa;
16684af458bSBrian King 			} else
16784af458bSBrian King 				free_page((unsigned long) npa);
16884af458bSBrian King 		}
16984af458bSBrian King 
17084af458bSBrian King 		if ((rc = plpar_page_set_loaned(__pa(addr)))) {
1715df72bf3SHarvey Harrison 			pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
17284af458bSBrian King 			spin_unlock(&cmm_lock);
17384af458bSBrian King 			free_page(addr);
17484af458bSBrian King 			break;
17584af458bSBrian King 		}
17684af458bSBrian King 
17784af458bSBrian King 		pa->page[pa->index++] = addr;
17884af458bSBrian King 		loaned_pages++;
17984af458bSBrian King 		totalram_pages--;
18084af458bSBrian King 		spin_unlock(&cmm_lock);
18184af458bSBrian King 		nr--;
18284af458bSBrian King 	}
18384af458bSBrian King 
18484af458bSBrian King 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
18584af458bSBrian King 	return nr;
18684af458bSBrian King }
18784af458bSBrian King 
18884af458bSBrian King /**
18984af458bSBrian King  * cmm_free_pages - Free pages and mark them as active
19084af458bSBrian King  * @nr:	number of pages to free
19184af458bSBrian King  *
19284af458bSBrian King  * Return value:
19384af458bSBrian King  * 	number of pages requested to be freed which were not
19484af458bSBrian King  **/
19584af458bSBrian King static long cmm_free_pages(long nr)
19684af458bSBrian King {
19784af458bSBrian King 	struct cmm_page_array *pa;
19884af458bSBrian King 	unsigned long addr;
19984af458bSBrian King 
20084af458bSBrian King 	cmm_dbg("Begin free of %ld pages.\n", nr);
20184af458bSBrian King 	spin_lock(&cmm_lock);
20284af458bSBrian King 	pa = cmm_page_list;
20384af458bSBrian King 	while (nr) {
20484af458bSBrian King 		if (!pa || pa->index <= 0)
20584af458bSBrian King 			break;
20684af458bSBrian King 		addr = pa->page[--pa->index];
20784af458bSBrian King 
20884af458bSBrian King 		if (pa->index == 0) {
20984af458bSBrian King 			pa = pa->next;
21084af458bSBrian King 			free_page((unsigned long) cmm_page_list);
21184af458bSBrian King 			cmm_page_list = pa;
21284af458bSBrian King 		}
21384af458bSBrian King 
21484af458bSBrian King 		plpar_page_set_active(__pa(addr));
21584af458bSBrian King 		free_page(addr);
21684af458bSBrian King 		loaned_pages--;
21784af458bSBrian King 		nr--;
21884af458bSBrian King 		totalram_pages++;
21984af458bSBrian King 	}
22084af458bSBrian King 	spin_unlock(&cmm_lock);
22184af458bSBrian King 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
22284af458bSBrian King 	return nr;
22384af458bSBrian King }
22484af458bSBrian King 
22584af458bSBrian King /**
22684af458bSBrian King  * cmm_oom_notify - OOM notifier
22784af458bSBrian King  * @self:	notifier block struct
22884af458bSBrian King  * @dummy:	not used
22984af458bSBrian King  * @parm:	returned - number of pages freed
23084af458bSBrian King  *
23184af458bSBrian King  * Return value:
23284af458bSBrian King  * 	NOTIFY_OK
23384af458bSBrian King  **/
23484af458bSBrian King static int cmm_oom_notify(struct notifier_block *self,
23584af458bSBrian King 			  unsigned long dummy, void *parm)
23684af458bSBrian King {
23784af458bSBrian King 	unsigned long *freed = parm;
23884af458bSBrian King 	long nr = KB2PAGES(oom_kb);
23984af458bSBrian King 
24084af458bSBrian King 	cmm_dbg("OOM processing started\n");
24184af458bSBrian King 	nr = cmm_free_pages(nr);
24284af458bSBrian King 	loaned_pages_target = loaned_pages;
24384af458bSBrian King 	*freed += KB2PAGES(oom_kb) - nr;
24484af458bSBrian King 	oom_freed_pages += KB2PAGES(oom_kb) - nr;
24584af458bSBrian King 	cmm_dbg("OOM processing complete\n");
24684af458bSBrian King 	return NOTIFY_OK;
24784af458bSBrian King }
24884af458bSBrian King 
24984af458bSBrian King /**
25084af458bSBrian King  * cmm_get_mpp - Read memory performance parameters
25184af458bSBrian King  *
25284af458bSBrian King  * Makes hcall to query the current page loan request from the hypervisor.
25384af458bSBrian King  *
25484af458bSBrian King  * Return value:
25584af458bSBrian King  * 	nothing
25684af458bSBrian King  **/
25784af458bSBrian King static void cmm_get_mpp(void)
25884af458bSBrian King {
25984af458bSBrian King 	int rc;
26084af458bSBrian King 	struct hvcall_mpp_data mpp_data;
2618be8cf5bSBrian King 	signed long active_pages_target, page_loan_request, target;
2628be8cf5bSBrian King 	signed long total_pages = totalram_pages + loaned_pages;
2638be8cf5bSBrian King 	signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
26484af458bSBrian King 
26584af458bSBrian King 	rc = h_get_mpp(&mpp_data);
26684af458bSBrian King 
26784af458bSBrian King 	if (rc != H_SUCCESS)
26884af458bSBrian King 		return;
26984af458bSBrian King 
27084af458bSBrian King 	page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
2718be8cf5bSBrian King 	target = page_loan_request + (signed long)loaned_pages;
2728be8cf5bSBrian King 
2738be8cf5bSBrian King 	if (target < 0 || total_pages < min_mem_pages)
2748be8cf5bSBrian King 		target = 0;
2758be8cf5bSBrian King 
2768be8cf5bSBrian King 	if (target > oom_freed_pages)
2778be8cf5bSBrian King 		target -= oom_freed_pages;
27884af458bSBrian King 	else
2798be8cf5bSBrian King 		target = 0;
28084af458bSBrian King 
2818be8cf5bSBrian King 	active_pages_target = total_pages - target;
28284af458bSBrian King 
2838be8cf5bSBrian King 	if (min_mem_pages > active_pages_target)
2848be8cf5bSBrian King 		target = total_pages - min_mem_pages;
2858be8cf5bSBrian King 
2868be8cf5bSBrian King 	if (target < 0)
2878be8cf5bSBrian King 		target = 0;
2888be8cf5bSBrian King 
2898be8cf5bSBrian King 	loaned_pages_target = target;
29084af458bSBrian King 
29184af458bSBrian King 	cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
29284af458bSBrian King 		page_loan_request, loaned_pages, loaned_pages_target,
29384af458bSBrian King 		oom_freed_pages, totalram_pages);
29484af458bSBrian King }
29584af458bSBrian King 
29684af458bSBrian King static struct notifier_block cmm_oom_nb = {
29784af458bSBrian King 	.notifier_call = cmm_oom_notify
29884af458bSBrian King };
29984af458bSBrian King 
30084af458bSBrian King /**
30184af458bSBrian King  * cmm_thread - CMM task thread
30284af458bSBrian King  * @dummy:	not used
30384af458bSBrian King  *
30484af458bSBrian King  * Return value:
30584af458bSBrian King  * 	0
30684af458bSBrian King  **/
30784af458bSBrian King static int cmm_thread(void *dummy)
30884af458bSBrian King {
30984af458bSBrian King 	unsigned long timeleft;
31084af458bSBrian King 
31184af458bSBrian King 	while (1) {
31284af458bSBrian King 		timeleft = msleep_interruptible(delay * 1000);
31384af458bSBrian King 
31414b8a76bSRobert Jennings 		if (kthread_should_stop() || timeleft)
31584af458bSBrian King 			break;
31614b8a76bSRobert Jennings 
31714b8a76bSRobert Jennings 		if (mutex_trylock(&hotplug_mutex)) {
31814b8a76bSRobert Jennings 			if (hotplug_occurred) {
31914b8a76bSRobert Jennings 				hotplug_occurred = 0;
32014b8a76bSRobert Jennings 				mutex_unlock(&hotplug_mutex);
32114b8a76bSRobert Jennings 				cmm_dbg("Hotplug operation has occurred, "
32214b8a76bSRobert Jennings 						"loaning activity suspended "
32314b8a76bSRobert Jennings 						"for %d seconds.\n",
32414b8a76bSRobert Jennings 						hotplug_delay);
32514b8a76bSRobert Jennings 				timeleft = msleep_interruptible(hotplug_delay *
32614b8a76bSRobert Jennings 						1000);
32714b8a76bSRobert Jennings 				if (kthread_should_stop() || timeleft)
32814b8a76bSRobert Jennings 					break;
32914b8a76bSRobert Jennings 				continue;
33014b8a76bSRobert Jennings 			}
33114b8a76bSRobert Jennings 			mutex_unlock(&hotplug_mutex);
33214b8a76bSRobert Jennings 		} else {
33314b8a76bSRobert Jennings 			cmm_dbg("Hotplug operation in progress, activity "
33414b8a76bSRobert Jennings 					"suspended\n");
33514b8a76bSRobert Jennings 			continue;
33684af458bSBrian King 		}
33784af458bSBrian King 
33884af458bSBrian King 		cmm_get_mpp();
33984af458bSBrian King 
34084af458bSBrian King 		if (loaned_pages_target > loaned_pages) {
34184af458bSBrian King 			if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
34284af458bSBrian King 				loaned_pages_target = loaned_pages;
34384af458bSBrian King 		} else if (loaned_pages_target < loaned_pages)
34484af458bSBrian King 			cmm_free_pages(loaned_pages - loaned_pages_target);
34584af458bSBrian King 	}
34684af458bSBrian King 	return 0;
34784af458bSBrian King }
34884af458bSBrian King 
34984af458bSBrian King #define CMM_SHOW(name, format, args...)			\
3503cee67f7SStephen Rothwell 	static ssize_t show_##name(struct sys_device *dev,	\
3513cee67f7SStephen Rothwell 				   struct sysdev_attribute *attr,	\
3523cee67f7SStephen Rothwell 				   char *buf)			\
35384af458bSBrian King 	{							\
35484af458bSBrian King 		return sprintf(buf, format, ##args);		\
35584af458bSBrian King 	}							\
35684af458bSBrian King 	static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
35784af458bSBrian King 
35884af458bSBrian King CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
35984af458bSBrian King CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
36084af458bSBrian King 
3613cee67f7SStephen Rothwell static ssize_t show_oom_pages(struct sys_device *dev,
3623cee67f7SStephen Rothwell 			      struct sysdev_attribute *attr, char *buf)
36384af458bSBrian King {
36484af458bSBrian King 	return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
36584af458bSBrian King }
36684af458bSBrian King 
36784af458bSBrian King static ssize_t store_oom_pages(struct sys_device *dev,
3683cee67f7SStephen Rothwell 			       struct sysdev_attribute *attr,
36984af458bSBrian King 			       const char *buf, size_t count)
37084af458bSBrian King {
37184af458bSBrian King 	unsigned long val = simple_strtoul (buf, NULL, 10);
37284af458bSBrian King 
37384af458bSBrian King 	if (!capable(CAP_SYS_ADMIN))
37484af458bSBrian King 		return -EPERM;
37584af458bSBrian King 	if (val != 0)
37684af458bSBrian King 		return -EBADMSG;
37784af458bSBrian King 
37884af458bSBrian King 	oom_freed_pages = 0;
37984af458bSBrian King 	return count;
38084af458bSBrian King }
38184af458bSBrian King 
38284af458bSBrian King static SYSDEV_ATTR(oom_freed_kb, S_IWUSR| S_IRUGO,
38384af458bSBrian King 		   show_oom_pages, store_oom_pages);
38484af458bSBrian King 
38584af458bSBrian King static struct sysdev_attribute *cmm_attrs[] = {
38684af458bSBrian King 	&attr_loaned_kb,
38784af458bSBrian King 	&attr_loaned_target_kb,
38884af458bSBrian King 	&attr_oom_freed_kb,
38984af458bSBrian King };
39084af458bSBrian King 
39184af458bSBrian King static struct sysdev_class cmm_sysdev_class = {
39284af458bSBrian King 	.name = "cmm",
39384af458bSBrian King };
39484af458bSBrian King 
39584af458bSBrian King /**
39684af458bSBrian King  * cmm_sysfs_register - Register with sysfs
39784af458bSBrian King  *
39884af458bSBrian King  * Return value:
39984af458bSBrian King  * 	0 on success / other on failure
40084af458bSBrian King  **/
40184af458bSBrian King static int cmm_sysfs_register(struct sys_device *sysdev)
40284af458bSBrian King {
40384af458bSBrian King 	int i, rc;
40484af458bSBrian King 
40584af458bSBrian King 	if ((rc = sysdev_class_register(&cmm_sysdev_class)))
40684af458bSBrian King 		return rc;
40784af458bSBrian King 
40884af458bSBrian King 	sysdev->id = 0;
40984af458bSBrian King 	sysdev->cls = &cmm_sysdev_class;
41084af458bSBrian King 
41184af458bSBrian King 	if ((rc = sysdev_register(sysdev)))
41284af458bSBrian King 		goto class_unregister;
41384af458bSBrian King 
41484af458bSBrian King 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
41584af458bSBrian King 		if ((rc = sysdev_create_file(sysdev, cmm_attrs[i])))
41684af458bSBrian King 			goto fail;
41784af458bSBrian King 	}
41884af458bSBrian King 
41984af458bSBrian King 	return 0;
42084af458bSBrian King 
42184af458bSBrian King fail:
42284af458bSBrian King 	while (--i >= 0)
42384af458bSBrian King 		sysdev_remove_file(sysdev, cmm_attrs[i]);
42484af458bSBrian King 	sysdev_unregister(sysdev);
42584af458bSBrian King class_unregister:
42684af458bSBrian King 	sysdev_class_unregister(&cmm_sysdev_class);
42784af458bSBrian King 	return rc;
42884af458bSBrian King }
42984af458bSBrian King 
43084af458bSBrian King /**
43184af458bSBrian King  * cmm_unregister_sysfs - Unregister from sysfs
43284af458bSBrian King  *
43384af458bSBrian King  **/
43484af458bSBrian King static void cmm_unregister_sysfs(struct sys_device *sysdev)
43584af458bSBrian King {
43684af458bSBrian King 	int i;
43784af458bSBrian King 
43884af458bSBrian King 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
43984af458bSBrian King 		sysdev_remove_file(sysdev, cmm_attrs[i]);
44084af458bSBrian King 	sysdev_unregister(sysdev);
44184af458bSBrian King 	sysdev_class_unregister(&cmm_sysdev_class);
44284af458bSBrian King }
44384af458bSBrian King 
44484af458bSBrian King /**
445fecba962SBrian King  * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
446fecba962SBrian King  *
447fecba962SBrian King  **/
448fecba962SBrian King static int cmm_reboot_notifier(struct notifier_block *nb,
449fecba962SBrian King 			       unsigned long action, void *unused)
450fecba962SBrian King {
451fecba962SBrian King 	if (action == SYS_RESTART) {
452fecba962SBrian King 		if (cmm_thread_ptr)
453fecba962SBrian King 			kthread_stop(cmm_thread_ptr);
454fecba962SBrian King 		cmm_thread_ptr = NULL;
455fecba962SBrian King 		cmm_free_pages(loaned_pages);
456fecba962SBrian King 	}
457fecba962SBrian King 	return NOTIFY_DONE;
458fecba962SBrian King }
459fecba962SBrian King 
460fecba962SBrian King static struct notifier_block cmm_reboot_nb = {
461fecba962SBrian King 	.notifier_call = cmm_reboot_notifier,
462fecba962SBrian King };
463fecba962SBrian King 
464fecba962SBrian King /**
46514b8a76bSRobert Jennings  * cmm_count_pages - Count the number of pages loaned in a particular range.
46614b8a76bSRobert Jennings  *
46714b8a76bSRobert Jennings  * @arg: memory_isolate_notify structure with address range and count
46814b8a76bSRobert Jennings  *
46914b8a76bSRobert Jennings  * Return value:
47014b8a76bSRobert Jennings  *      0 on success
47114b8a76bSRobert Jennings  **/
47214b8a76bSRobert Jennings static unsigned long cmm_count_pages(void *arg)
47314b8a76bSRobert Jennings {
47414b8a76bSRobert Jennings 	struct memory_isolate_notify *marg = arg;
47514b8a76bSRobert Jennings 	struct cmm_page_array *pa;
47614b8a76bSRobert Jennings 	unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
47714b8a76bSRobert Jennings 	unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
47814b8a76bSRobert Jennings 	unsigned long idx;
47914b8a76bSRobert Jennings 
48014b8a76bSRobert Jennings 	spin_lock(&cmm_lock);
48114b8a76bSRobert Jennings 	pa = cmm_page_list;
48214b8a76bSRobert Jennings 	while (pa) {
48314b8a76bSRobert Jennings 		if ((unsigned long)pa >= start && (unsigned long)pa < end)
48414b8a76bSRobert Jennings 			marg->pages_found++;
48514b8a76bSRobert Jennings 		for (idx = 0; idx < pa->index; idx++)
48614b8a76bSRobert Jennings 			if (pa->page[idx] >= start && pa->page[idx] < end)
48714b8a76bSRobert Jennings 				marg->pages_found++;
48814b8a76bSRobert Jennings 		pa = pa->next;
48914b8a76bSRobert Jennings 	}
49014b8a76bSRobert Jennings 	spin_unlock(&cmm_lock);
49114b8a76bSRobert Jennings 	return 0;
49214b8a76bSRobert Jennings }
49314b8a76bSRobert Jennings 
49414b8a76bSRobert Jennings /**
49514b8a76bSRobert Jennings  * cmm_memory_isolate_cb - Handle memory isolation notifier calls
49614b8a76bSRobert Jennings  * @self:	notifier block struct
49714b8a76bSRobert Jennings  * @action:	action to take
49814b8a76bSRobert Jennings  * @arg:	struct memory_isolate_notify data for handler
49914b8a76bSRobert Jennings  *
50014b8a76bSRobert Jennings  * Return value:
50114b8a76bSRobert Jennings  *	NOTIFY_OK or notifier error based on subfunction return value
50214b8a76bSRobert Jennings  **/
50314b8a76bSRobert Jennings static int cmm_memory_isolate_cb(struct notifier_block *self,
50414b8a76bSRobert Jennings 				 unsigned long action, void *arg)
50514b8a76bSRobert Jennings {
50614b8a76bSRobert Jennings 	int ret = 0;
50714b8a76bSRobert Jennings 
50814b8a76bSRobert Jennings 	if (action == MEM_ISOLATE_COUNT)
50914b8a76bSRobert Jennings 		ret = cmm_count_pages(arg);
51014b8a76bSRobert Jennings 
51114b8a76bSRobert Jennings 	if (ret)
51214b8a76bSRobert Jennings 		ret = notifier_from_errno(ret);
51314b8a76bSRobert Jennings 	else
51414b8a76bSRobert Jennings 		ret = NOTIFY_OK;
51514b8a76bSRobert Jennings 
51614b8a76bSRobert Jennings 	return ret;
51714b8a76bSRobert Jennings }
51814b8a76bSRobert Jennings 
51914b8a76bSRobert Jennings static struct notifier_block cmm_mem_isolate_nb = {
52014b8a76bSRobert Jennings 	.notifier_call = cmm_memory_isolate_cb,
52114b8a76bSRobert Jennings 	.priority = CMM_MEM_ISOLATE_PRI
52214b8a76bSRobert Jennings };
52314b8a76bSRobert Jennings 
52414b8a76bSRobert Jennings /**
52514b8a76bSRobert Jennings  * cmm_mem_going_offline - Unloan pages where memory is to be removed
52614b8a76bSRobert Jennings  * @arg: memory_notify structure with page range to be offlined
52714b8a76bSRobert Jennings  *
52814b8a76bSRobert Jennings  * Return value:
52914b8a76bSRobert Jennings  *	0 on success
53014b8a76bSRobert Jennings  **/
53114b8a76bSRobert Jennings static int cmm_mem_going_offline(void *arg)
53214b8a76bSRobert Jennings {
53314b8a76bSRobert Jennings 	struct memory_notify *marg = arg;
53414b8a76bSRobert Jennings 	unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
53514b8a76bSRobert Jennings 	unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
53614b8a76bSRobert Jennings 	struct cmm_page_array *pa_curr, *pa_last, *npa;
53714b8a76bSRobert Jennings 	unsigned long idx;
53814b8a76bSRobert Jennings 	unsigned long freed = 0;
53914b8a76bSRobert Jennings 
54014b8a76bSRobert Jennings 	cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
54114b8a76bSRobert Jennings 			start_page, marg->nr_pages);
54214b8a76bSRobert Jennings 	spin_lock(&cmm_lock);
54314b8a76bSRobert Jennings 
54414b8a76bSRobert Jennings 	/* Search the page list for pages in the range to be offlined */
54514b8a76bSRobert Jennings 	pa_last = pa_curr = cmm_page_list;
54614b8a76bSRobert Jennings 	while (pa_curr) {
54714b8a76bSRobert Jennings 		for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
54814b8a76bSRobert Jennings 			if ((pa_curr->page[idx] < start_page) ||
54914b8a76bSRobert Jennings 			    (pa_curr->page[idx] >= end_page))
55014b8a76bSRobert Jennings 				continue;
55114b8a76bSRobert Jennings 
55214b8a76bSRobert Jennings 			plpar_page_set_active(__pa(pa_curr->page[idx]));
55314b8a76bSRobert Jennings 			free_page(pa_curr->page[idx]);
55414b8a76bSRobert Jennings 			freed++;
55514b8a76bSRobert Jennings 			loaned_pages--;
55614b8a76bSRobert Jennings 			totalram_pages++;
55714b8a76bSRobert Jennings 			pa_curr->page[idx] = pa_last->page[--pa_last->index];
55814b8a76bSRobert Jennings 			if (pa_last->index == 0) {
55914b8a76bSRobert Jennings 				if (pa_curr == pa_last)
56014b8a76bSRobert Jennings 					pa_curr = pa_last->next;
56114b8a76bSRobert Jennings 				pa_last = pa_last->next;
56214b8a76bSRobert Jennings 				free_page((unsigned long)cmm_page_list);
56314b8a76bSRobert Jennings 				cmm_page_list = pa_last;
56414b8a76bSRobert Jennings 				continue;
56514b8a76bSRobert Jennings 			}
56614b8a76bSRobert Jennings 		}
56714b8a76bSRobert Jennings 		pa_curr = pa_curr->next;
56814b8a76bSRobert Jennings 	}
56914b8a76bSRobert Jennings 
57014b8a76bSRobert Jennings 	/* Search for page list structures in the range to be offlined */
57114b8a76bSRobert Jennings 	pa_last = NULL;
57214b8a76bSRobert Jennings 	pa_curr = cmm_page_list;
57314b8a76bSRobert Jennings 	while (pa_curr) {
57414b8a76bSRobert Jennings 		if (((unsigned long)pa_curr >= start_page) &&
57514b8a76bSRobert Jennings 				((unsigned long)pa_curr < end_page)) {
57614b8a76bSRobert Jennings 			npa = (struct cmm_page_array *)__get_free_page(
57714b8a76bSRobert Jennings 					GFP_NOIO | __GFP_NOWARN |
57814b8a76bSRobert Jennings 					__GFP_NORETRY | __GFP_NOMEMALLOC);
57914b8a76bSRobert Jennings 			if (!npa) {
58014b8a76bSRobert Jennings 				spin_unlock(&cmm_lock);
58114b8a76bSRobert Jennings 				cmm_dbg("Failed to allocate memory for list "
58214b8a76bSRobert Jennings 						"management. Memory hotplug "
58314b8a76bSRobert Jennings 						"failed.\n");
58414b8a76bSRobert Jennings 				return ENOMEM;
58514b8a76bSRobert Jennings 			}
58614b8a76bSRobert Jennings 			memcpy(npa, pa_curr, PAGE_SIZE);
58714b8a76bSRobert Jennings 			if (pa_curr == cmm_page_list)
58814b8a76bSRobert Jennings 				cmm_page_list = npa;
58914b8a76bSRobert Jennings 			if (pa_last)
59014b8a76bSRobert Jennings 				pa_last->next = npa;
59114b8a76bSRobert Jennings 			free_page((unsigned long) pa_curr);
59214b8a76bSRobert Jennings 			freed++;
59314b8a76bSRobert Jennings 			pa_curr = npa;
59414b8a76bSRobert Jennings 		}
59514b8a76bSRobert Jennings 
59614b8a76bSRobert Jennings 		pa_last = pa_curr;
59714b8a76bSRobert Jennings 		pa_curr = pa_curr->next;
59814b8a76bSRobert Jennings 	}
59914b8a76bSRobert Jennings 
60014b8a76bSRobert Jennings 	spin_unlock(&cmm_lock);
60114b8a76bSRobert Jennings 	cmm_dbg("Released %ld pages in the search range.\n", freed);
60214b8a76bSRobert Jennings 
60314b8a76bSRobert Jennings 	return 0;
60414b8a76bSRobert Jennings }
60514b8a76bSRobert Jennings 
60614b8a76bSRobert Jennings /**
60714b8a76bSRobert Jennings  * cmm_memory_cb - Handle memory hotplug notifier calls
60814b8a76bSRobert Jennings  * @self:	notifier block struct
60914b8a76bSRobert Jennings  * @action:	action to take
61014b8a76bSRobert Jennings  * @arg:	struct memory_notify data for handler
61114b8a76bSRobert Jennings  *
61214b8a76bSRobert Jennings  * Return value:
61314b8a76bSRobert Jennings  *	NOTIFY_OK or notifier error based on subfunction return value
61414b8a76bSRobert Jennings  *
61514b8a76bSRobert Jennings  **/
61614b8a76bSRobert Jennings static int cmm_memory_cb(struct notifier_block *self,
61714b8a76bSRobert Jennings 			unsigned long action, void *arg)
61814b8a76bSRobert Jennings {
61914b8a76bSRobert Jennings 	int ret = 0;
62014b8a76bSRobert Jennings 
62114b8a76bSRobert Jennings 	switch (action) {
62214b8a76bSRobert Jennings 	case MEM_GOING_OFFLINE:
62314b8a76bSRobert Jennings 		mutex_lock(&hotplug_mutex);
62414b8a76bSRobert Jennings 		hotplug_occurred = 1;
62514b8a76bSRobert Jennings 		ret = cmm_mem_going_offline(arg);
62614b8a76bSRobert Jennings 		break;
62714b8a76bSRobert Jennings 	case MEM_OFFLINE:
62814b8a76bSRobert Jennings 	case MEM_CANCEL_OFFLINE:
62914b8a76bSRobert Jennings 		mutex_unlock(&hotplug_mutex);
63014b8a76bSRobert Jennings 		cmm_dbg("Memory offline operation complete.\n");
63114b8a76bSRobert Jennings 		break;
63214b8a76bSRobert Jennings 	case MEM_GOING_ONLINE:
63314b8a76bSRobert Jennings 	case MEM_ONLINE:
63414b8a76bSRobert Jennings 	case MEM_CANCEL_ONLINE:
63514b8a76bSRobert Jennings 		break;
63614b8a76bSRobert Jennings 	}
63714b8a76bSRobert Jennings 
63814b8a76bSRobert Jennings 	if (ret)
63914b8a76bSRobert Jennings 		ret = notifier_from_errno(ret);
64014b8a76bSRobert Jennings 	else
64114b8a76bSRobert Jennings 		ret = NOTIFY_OK;
64214b8a76bSRobert Jennings 
64314b8a76bSRobert Jennings 	return ret;
64414b8a76bSRobert Jennings }
64514b8a76bSRobert Jennings 
64614b8a76bSRobert Jennings static struct notifier_block cmm_mem_nb = {
64714b8a76bSRobert Jennings 	.notifier_call = cmm_memory_cb,
64814b8a76bSRobert Jennings 	.priority = CMM_MEM_HOTPLUG_PRI
64914b8a76bSRobert Jennings };
65014b8a76bSRobert Jennings 
65114b8a76bSRobert Jennings /**
65284af458bSBrian King  * cmm_init - Module initialization
65384af458bSBrian King  *
65484af458bSBrian King  * Return value:
65584af458bSBrian King  * 	0 on success / other on failure
65684af458bSBrian King  **/
65784af458bSBrian King static int cmm_init(void)
65884af458bSBrian King {
65984af458bSBrian King 	int rc = -ENOMEM;
66084af458bSBrian King 
66184af458bSBrian King 	if (!firmware_has_feature(FW_FEATURE_CMO))
66284af458bSBrian King 		return -EOPNOTSUPP;
66384af458bSBrian King 
66484af458bSBrian King 	if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
66584af458bSBrian King 		return rc;
66684af458bSBrian King 
667fecba962SBrian King 	if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
66884af458bSBrian King 		goto out_oom_notifier;
66984af458bSBrian King 
670fecba962SBrian King 	if ((rc = cmm_sysfs_register(&cmm_sysdev)))
671fecba962SBrian King 		goto out_reboot_notifier;
672fecba962SBrian King 
67314b8a76bSRobert Jennings 	if (register_memory_notifier(&cmm_mem_nb) ||
67414b8a76bSRobert Jennings 	    register_memory_isolate_notifier(&cmm_mem_isolate_nb))
67514b8a76bSRobert Jennings 		goto out_unregister_notifier;
67614b8a76bSRobert Jennings 
67784af458bSBrian King 	if (cmm_disabled)
67884af458bSBrian King 		return rc;
67984af458bSBrian King 
68084af458bSBrian King 	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
68184af458bSBrian King 	if (IS_ERR(cmm_thread_ptr)) {
68284af458bSBrian King 		rc = PTR_ERR(cmm_thread_ptr);
68314b8a76bSRobert Jennings 		goto out_unregister_notifier;
68484af458bSBrian King 	}
68584af458bSBrian King 
68684af458bSBrian King 	return rc;
68784af458bSBrian King 
68814b8a76bSRobert Jennings out_unregister_notifier:
68914b8a76bSRobert Jennings 	unregister_memory_notifier(&cmm_mem_nb);
69014b8a76bSRobert Jennings 	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
69184af458bSBrian King 	cmm_unregister_sysfs(&cmm_sysdev);
692fecba962SBrian King out_reboot_notifier:
693fecba962SBrian King 	unregister_reboot_notifier(&cmm_reboot_nb);
69484af458bSBrian King out_oom_notifier:
69584af458bSBrian King 	unregister_oom_notifier(&cmm_oom_nb);
69684af458bSBrian King 	return rc;
69784af458bSBrian King }
69884af458bSBrian King 
69984af458bSBrian King /**
70084af458bSBrian King  * cmm_exit - Module exit
70184af458bSBrian King  *
70284af458bSBrian King  * Return value:
70384af458bSBrian King  * 	nothing
70484af458bSBrian King  **/
70584af458bSBrian King static void cmm_exit(void)
70684af458bSBrian King {
70784af458bSBrian King 	if (cmm_thread_ptr)
70884af458bSBrian King 		kthread_stop(cmm_thread_ptr);
70984af458bSBrian King 	unregister_oom_notifier(&cmm_oom_nb);
710fecba962SBrian King 	unregister_reboot_notifier(&cmm_reboot_nb);
71114b8a76bSRobert Jennings 	unregister_memory_notifier(&cmm_mem_nb);
71214b8a76bSRobert Jennings 	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
71384af458bSBrian King 	cmm_free_pages(loaned_pages);
71484af458bSBrian King 	cmm_unregister_sysfs(&cmm_sysdev);
71584af458bSBrian King }
71684af458bSBrian King 
71784af458bSBrian King /**
71884af458bSBrian King  * cmm_set_disable - Disable/Enable CMM
71984af458bSBrian King  *
72084af458bSBrian King  * Return value:
72184af458bSBrian King  * 	0 on success / other on failure
72284af458bSBrian King  **/
72384af458bSBrian King static int cmm_set_disable(const char *val, struct kernel_param *kp)
72484af458bSBrian King {
72584af458bSBrian King 	int disable = simple_strtoul(val, NULL, 10);
72684af458bSBrian King 
72784af458bSBrian King 	if (disable != 0 && disable != 1)
72884af458bSBrian King 		return -EINVAL;
72984af458bSBrian King 
73084af458bSBrian King 	if (disable && !cmm_disabled) {
73184af458bSBrian King 		if (cmm_thread_ptr)
73284af458bSBrian King 			kthread_stop(cmm_thread_ptr);
73384af458bSBrian King 		cmm_thread_ptr = NULL;
73484af458bSBrian King 		cmm_free_pages(loaned_pages);
73584af458bSBrian King 	} else if (!disable && cmm_disabled) {
73684af458bSBrian King 		cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
73784af458bSBrian King 		if (IS_ERR(cmm_thread_ptr))
73884af458bSBrian King 			return PTR_ERR(cmm_thread_ptr);
73984af458bSBrian King 	}
74084af458bSBrian King 
74184af458bSBrian King 	cmm_disabled = disable;
74284af458bSBrian King 	return 0;
74384af458bSBrian King }
74484af458bSBrian King 
74584af458bSBrian King module_param_call(disable, cmm_set_disable, param_get_uint,
74684af458bSBrian King 		  &cmm_disabled, S_IRUGO | S_IWUSR);
74784af458bSBrian King MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
74884af458bSBrian King 		 "[Default=" __stringify(CMM_DISABLE) "]");
74984af458bSBrian King 
75084af458bSBrian King module_init(cmm_init);
75184af458bSBrian King module_exit(cmm_exit);
752