xref: /openbmc/linux/arch/powerpc/platforms/pseries/cmm.c (revision 212bebb4097837ec0b601c42be839c1314994dc2)
184af458bSBrian King /*
284af458bSBrian King  * Collaborative memory management interface.
384af458bSBrian King  *
484af458bSBrian King  * Copyright (C) 2008 IBM Corporation
584af458bSBrian King  * Author(s): Brian King (brking@linux.vnet.ibm.com),
684af458bSBrian King  *
784af458bSBrian King  * This program is free software; you can redistribute it and/or modify
884af458bSBrian King  * it under the terms of the GNU General Public License as published by
984af458bSBrian King  * the Free Software Foundation; either version 2 of the License, or
1084af458bSBrian King  * (at your option) any later version.
1184af458bSBrian King  *
1284af458bSBrian King  * This program is distributed in the hope that it will be useful,
1384af458bSBrian King  * but WITHOUT ANY WARRANTY; without even the implied warranty of
1484af458bSBrian King  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1584af458bSBrian King  * GNU General Public License for more details.
1684af458bSBrian King  *
1784af458bSBrian King  * You should have received a copy of the GNU General Public License
1884af458bSBrian King  * along with this program; if not, write to the Free Software
1984af458bSBrian King  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
2084af458bSBrian King  *
2184af458bSBrian King  */
2284af458bSBrian King 
2384af458bSBrian King #include <linux/ctype.h>
2484af458bSBrian King #include <linux/delay.h>
2584af458bSBrian King #include <linux/errno.h>
2684af458bSBrian King #include <linux/fs.h>
275a0e3ad6STejun Heo #include <linux/gfp.h>
2884af458bSBrian King #include <linux/init.h>
2984af458bSBrian King #include <linux/kthread.h>
3084af458bSBrian King #include <linux/module.h>
3184af458bSBrian King #include <linux/oom.h>
32fecba962SBrian King #include <linux/reboot.h>
3384af458bSBrian King #include <linux/sched.h>
3484af458bSBrian King #include <linux/stringify.h>
3584af458bSBrian King #include <linux/swap.h>
366c9d2909SKay Sievers #include <linux/device.h>
3784af458bSBrian King #include <asm/firmware.h>
3884af458bSBrian King #include <asm/hvcall.h>
3984af458bSBrian King #include <asm/mmu.h>
4084af458bSBrian King #include <asm/pgalloc.h>
4184af458bSBrian King #include <asm/uaccess.h>
4214b8a76bSRobert Jennings #include <linux/memory.h>
43*212bebb4SDeepthi Dharwar #include <asm/plpar_wrappers.h>
4484af458bSBrian King 
4584af458bSBrian King #define CMM_DRIVER_VERSION	"1.0.0"
4684af458bSBrian King #define CMM_DEFAULT_DELAY	1
4714b8a76bSRobert Jennings #define CMM_HOTPLUG_DELAY	5
4884af458bSBrian King #define CMM_DEBUG			0
4984af458bSBrian King #define CMM_DISABLE		0
5084af458bSBrian King #define CMM_OOM_KB		1024
5184af458bSBrian King #define CMM_MIN_MEM_MB		256
5284af458bSBrian King #define KB2PAGES(_p)		((_p)>>(PAGE_SHIFT-10))
5384af458bSBrian King #define PAGES2KB(_p)		((_p)<<(PAGE_SHIFT-10))
5414b8a76bSRobert Jennings /*
5514b8a76bSRobert Jennings  * The priority level tries to ensure that this notifier is called as
5614b8a76bSRobert Jennings  * late as possible to reduce thrashing in the shared memory pool.
5714b8a76bSRobert Jennings  */
5814b8a76bSRobert Jennings #define CMM_MEM_HOTPLUG_PRI	1
5914b8a76bSRobert Jennings #define CMM_MEM_ISOLATE_PRI	15
6084af458bSBrian King 
6184af458bSBrian King static unsigned int delay = CMM_DEFAULT_DELAY;
6214b8a76bSRobert Jennings static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
6384af458bSBrian King static unsigned int oom_kb = CMM_OOM_KB;
6484af458bSBrian King static unsigned int cmm_debug = CMM_DEBUG;
6584af458bSBrian King static unsigned int cmm_disabled = CMM_DISABLE;
6684af458bSBrian King static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
676c9d2909SKay Sievers static struct device cmm_dev;
6884af458bSBrian King 
6984af458bSBrian King MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
7084af458bSBrian King MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
7184af458bSBrian King MODULE_LICENSE("GPL");
7284af458bSBrian King MODULE_VERSION(CMM_DRIVER_VERSION);
7384af458bSBrian King 
7484af458bSBrian King module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
7584af458bSBrian King MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
7684af458bSBrian King 		 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
7714b8a76bSRobert Jennings module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR);
7814b8a76bSRobert Jennings MODULE_PARM_DESC(delay, "Delay (in seconds) after memory hotplug remove "
7914b8a76bSRobert Jennings 		 "before loaning resumes. "
8014b8a76bSRobert Jennings 		 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
8184af458bSBrian King module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
8284af458bSBrian King MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
8384af458bSBrian King 		 "[Default=" __stringify(CMM_OOM_KB) "]");
8484af458bSBrian King module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
8584af458bSBrian King MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
8684af458bSBrian King 		 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
8784af458bSBrian King module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
8884af458bSBrian King MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
8984af458bSBrian King 		 "[Default=" __stringify(CMM_DEBUG) "]");
9084af458bSBrian King 
9184af458bSBrian King #define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
9284af458bSBrian King 
9384af458bSBrian King #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
9484af458bSBrian King 
9584af458bSBrian King struct cmm_page_array {
9684af458bSBrian King 	struct cmm_page_array *next;
9784af458bSBrian King 	unsigned long index;
9884af458bSBrian King 	unsigned long page[CMM_NR_PAGES];
9984af458bSBrian King };
10084af458bSBrian King 
10184af458bSBrian King static unsigned long loaned_pages;
10284af458bSBrian King static unsigned long loaned_pages_target;
10384af458bSBrian King static unsigned long oom_freed_pages;
10484af458bSBrian King 
10584af458bSBrian King static struct cmm_page_array *cmm_page_list;
10684af458bSBrian King static DEFINE_SPINLOCK(cmm_lock);
10784af458bSBrian King 
10814b8a76bSRobert Jennings static DEFINE_MUTEX(hotplug_mutex);
10914b8a76bSRobert Jennings static int hotplug_occurred; /* protected by the hotplug mutex */
11014b8a76bSRobert Jennings 
11184af458bSBrian King static struct task_struct *cmm_thread_ptr;
11284af458bSBrian King 
11384af458bSBrian King /**
11484af458bSBrian King  * cmm_alloc_pages - Allocate pages and mark them as loaned
11584af458bSBrian King  * @nr:	number of pages to allocate
11684af458bSBrian King  *
11784af458bSBrian King  * Return value:
11884af458bSBrian King  * 	number of pages requested to be allocated which were not
11984af458bSBrian King  **/
12084af458bSBrian King static long cmm_alloc_pages(long nr)
12184af458bSBrian King {
12284af458bSBrian King 	struct cmm_page_array *pa, *npa;
12384af458bSBrian King 	unsigned long addr;
12484af458bSBrian King 	long rc;
12584af458bSBrian King 
12684af458bSBrian King 	cmm_dbg("Begin request for %ld pages\n", nr);
12784af458bSBrian King 
12884af458bSBrian King 	while (nr) {
12914b8a76bSRobert Jennings 		/* Exit if a hotplug operation is in progress or occurred */
13014b8a76bSRobert Jennings 		if (mutex_trylock(&hotplug_mutex)) {
13114b8a76bSRobert Jennings 			if (hotplug_occurred) {
13214b8a76bSRobert Jennings 				mutex_unlock(&hotplug_mutex);
13314b8a76bSRobert Jennings 				break;
13414b8a76bSRobert Jennings 			}
13514b8a76bSRobert Jennings 			mutex_unlock(&hotplug_mutex);
13614b8a76bSRobert Jennings 		} else {
13714b8a76bSRobert Jennings 			break;
13814b8a76bSRobert Jennings 		}
13914b8a76bSRobert Jennings 
14084af458bSBrian King 		addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
14184af458bSBrian King 				       __GFP_NORETRY | __GFP_NOMEMALLOC);
14284af458bSBrian King 		if (!addr)
14384af458bSBrian King 			break;
14484af458bSBrian King 		spin_lock(&cmm_lock);
14584af458bSBrian King 		pa = cmm_page_list;
14684af458bSBrian King 		if (!pa || pa->index >= CMM_NR_PAGES) {
14784af458bSBrian King 			/* Need a new page for the page list. */
14884af458bSBrian King 			spin_unlock(&cmm_lock);
14914b8a76bSRobert Jennings 			npa = (struct cmm_page_array *)__get_free_page(
15014b8a76bSRobert Jennings 					GFP_NOIO | __GFP_NOWARN |
15184af458bSBrian King 					__GFP_NORETRY | __GFP_NOMEMALLOC);
15284af458bSBrian King 			if (!npa) {
1535df72bf3SHarvey Harrison 				pr_info("%s: Can not allocate new page list\n", __func__);
15484af458bSBrian King 				free_page(addr);
15584af458bSBrian King 				break;
15684af458bSBrian King 			}
15784af458bSBrian King 			spin_lock(&cmm_lock);
15884af458bSBrian King 			pa = cmm_page_list;
15984af458bSBrian King 
16084af458bSBrian King 			if (!pa || pa->index >= CMM_NR_PAGES) {
16184af458bSBrian King 				npa->next = pa;
16284af458bSBrian King 				npa->index = 0;
16384af458bSBrian King 				pa = npa;
16484af458bSBrian King 				cmm_page_list = pa;
16584af458bSBrian King 			} else
16684af458bSBrian King 				free_page((unsigned long) npa);
16784af458bSBrian King 		}
16884af458bSBrian King 
16984af458bSBrian King 		if ((rc = plpar_page_set_loaned(__pa(addr)))) {
1705df72bf3SHarvey Harrison 			pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
17184af458bSBrian King 			spin_unlock(&cmm_lock);
17284af458bSBrian King 			free_page(addr);
17384af458bSBrian King 			break;
17484af458bSBrian King 		}
17584af458bSBrian King 
17684af458bSBrian King 		pa->page[pa->index++] = addr;
17784af458bSBrian King 		loaned_pages++;
17884af458bSBrian King 		totalram_pages--;
17984af458bSBrian King 		spin_unlock(&cmm_lock);
18084af458bSBrian King 		nr--;
18184af458bSBrian King 	}
18284af458bSBrian King 
18384af458bSBrian King 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
18484af458bSBrian King 	return nr;
18584af458bSBrian King }
18684af458bSBrian King 
18784af458bSBrian King /**
18884af458bSBrian King  * cmm_free_pages - Free pages and mark them as active
18984af458bSBrian King  * @nr:	number of pages to free
19084af458bSBrian King  *
19184af458bSBrian King  * Return value:
19284af458bSBrian King  * 	number of pages requested to be freed which were not
19384af458bSBrian King  **/
19484af458bSBrian King static long cmm_free_pages(long nr)
19584af458bSBrian King {
19684af458bSBrian King 	struct cmm_page_array *pa;
19784af458bSBrian King 	unsigned long addr;
19884af458bSBrian King 
19984af458bSBrian King 	cmm_dbg("Begin free of %ld pages.\n", nr);
20084af458bSBrian King 	spin_lock(&cmm_lock);
20184af458bSBrian King 	pa = cmm_page_list;
20284af458bSBrian King 	while (nr) {
20384af458bSBrian King 		if (!pa || pa->index <= 0)
20484af458bSBrian King 			break;
20584af458bSBrian King 		addr = pa->page[--pa->index];
20684af458bSBrian King 
20784af458bSBrian King 		if (pa->index == 0) {
20884af458bSBrian King 			pa = pa->next;
20984af458bSBrian King 			free_page((unsigned long) cmm_page_list);
21084af458bSBrian King 			cmm_page_list = pa;
21184af458bSBrian King 		}
21284af458bSBrian King 
21384af458bSBrian King 		plpar_page_set_active(__pa(addr));
21484af458bSBrian King 		free_page(addr);
21584af458bSBrian King 		loaned_pages--;
21684af458bSBrian King 		nr--;
21784af458bSBrian King 		totalram_pages++;
21884af458bSBrian King 	}
21984af458bSBrian King 	spin_unlock(&cmm_lock);
22084af458bSBrian King 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
22184af458bSBrian King 	return nr;
22284af458bSBrian King }
22384af458bSBrian King 
22484af458bSBrian King /**
22584af458bSBrian King  * cmm_oom_notify - OOM notifier
22684af458bSBrian King  * @self:	notifier block struct
22784af458bSBrian King  * @dummy:	not used
22884af458bSBrian King  * @parm:	returned - number of pages freed
22984af458bSBrian King  *
23084af458bSBrian King  * Return value:
23184af458bSBrian King  * 	NOTIFY_OK
23284af458bSBrian King  **/
23384af458bSBrian King static int cmm_oom_notify(struct notifier_block *self,
23484af458bSBrian King 			  unsigned long dummy, void *parm)
23584af458bSBrian King {
23684af458bSBrian King 	unsigned long *freed = parm;
23784af458bSBrian King 	long nr = KB2PAGES(oom_kb);
23884af458bSBrian King 
23984af458bSBrian King 	cmm_dbg("OOM processing started\n");
24084af458bSBrian King 	nr = cmm_free_pages(nr);
24184af458bSBrian King 	loaned_pages_target = loaned_pages;
24284af458bSBrian King 	*freed += KB2PAGES(oom_kb) - nr;
24384af458bSBrian King 	oom_freed_pages += KB2PAGES(oom_kb) - nr;
24484af458bSBrian King 	cmm_dbg("OOM processing complete\n");
24584af458bSBrian King 	return NOTIFY_OK;
24684af458bSBrian King }
24784af458bSBrian King 
24884af458bSBrian King /**
24984af458bSBrian King  * cmm_get_mpp - Read memory performance parameters
25084af458bSBrian King  *
25184af458bSBrian King  * Makes hcall to query the current page loan request from the hypervisor.
25284af458bSBrian King  *
25384af458bSBrian King  * Return value:
25484af458bSBrian King  * 	nothing
25584af458bSBrian King  **/
25684af458bSBrian King static void cmm_get_mpp(void)
25784af458bSBrian King {
25884af458bSBrian King 	int rc;
25984af458bSBrian King 	struct hvcall_mpp_data mpp_data;
2608be8cf5bSBrian King 	signed long active_pages_target, page_loan_request, target;
2618be8cf5bSBrian King 	signed long total_pages = totalram_pages + loaned_pages;
2628be8cf5bSBrian King 	signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
26384af458bSBrian King 
26484af458bSBrian King 	rc = h_get_mpp(&mpp_data);
26584af458bSBrian King 
26684af458bSBrian King 	if (rc != H_SUCCESS)
26784af458bSBrian King 		return;
26884af458bSBrian King 
26984af458bSBrian King 	page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
2708be8cf5bSBrian King 	target = page_loan_request + (signed long)loaned_pages;
2718be8cf5bSBrian King 
2728be8cf5bSBrian King 	if (target < 0 || total_pages < min_mem_pages)
2738be8cf5bSBrian King 		target = 0;
2748be8cf5bSBrian King 
2758be8cf5bSBrian King 	if (target > oom_freed_pages)
2768be8cf5bSBrian King 		target -= oom_freed_pages;
27784af458bSBrian King 	else
2788be8cf5bSBrian King 		target = 0;
27984af458bSBrian King 
2808be8cf5bSBrian King 	active_pages_target = total_pages - target;
28184af458bSBrian King 
2828be8cf5bSBrian King 	if (min_mem_pages > active_pages_target)
2838be8cf5bSBrian King 		target = total_pages - min_mem_pages;
2848be8cf5bSBrian King 
2858be8cf5bSBrian King 	if (target < 0)
2868be8cf5bSBrian King 		target = 0;
2878be8cf5bSBrian King 
2888be8cf5bSBrian King 	loaned_pages_target = target;
28984af458bSBrian King 
29084af458bSBrian King 	cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
29184af458bSBrian King 		page_loan_request, loaned_pages, loaned_pages_target,
29284af458bSBrian King 		oom_freed_pages, totalram_pages);
29384af458bSBrian King }
29484af458bSBrian King 
29584af458bSBrian King static struct notifier_block cmm_oom_nb = {
29684af458bSBrian King 	.notifier_call = cmm_oom_notify
29784af458bSBrian King };
29884af458bSBrian King 
29984af458bSBrian King /**
30084af458bSBrian King  * cmm_thread - CMM task thread
30184af458bSBrian King  * @dummy:	not used
30284af458bSBrian King  *
30384af458bSBrian King  * Return value:
30484af458bSBrian King  * 	0
30584af458bSBrian King  **/
30684af458bSBrian King static int cmm_thread(void *dummy)
30784af458bSBrian King {
30884af458bSBrian King 	unsigned long timeleft;
30984af458bSBrian King 
31084af458bSBrian King 	while (1) {
31184af458bSBrian King 		timeleft = msleep_interruptible(delay * 1000);
31284af458bSBrian King 
31314b8a76bSRobert Jennings 		if (kthread_should_stop() || timeleft)
31484af458bSBrian King 			break;
31514b8a76bSRobert Jennings 
31614b8a76bSRobert Jennings 		if (mutex_trylock(&hotplug_mutex)) {
31714b8a76bSRobert Jennings 			if (hotplug_occurred) {
31814b8a76bSRobert Jennings 				hotplug_occurred = 0;
31914b8a76bSRobert Jennings 				mutex_unlock(&hotplug_mutex);
32014b8a76bSRobert Jennings 				cmm_dbg("Hotplug operation has occurred, "
32114b8a76bSRobert Jennings 						"loaning activity suspended "
32214b8a76bSRobert Jennings 						"for %d seconds.\n",
32314b8a76bSRobert Jennings 						hotplug_delay);
32414b8a76bSRobert Jennings 				timeleft = msleep_interruptible(hotplug_delay *
32514b8a76bSRobert Jennings 						1000);
32614b8a76bSRobert Jennings 				if (kthread_should_stop() || timeleft)
32714b8a76bSRobert Jennings 					break;
32814b8a76bSRobert Jennings 				continue;
32914b8a76bSRobert Jennings 			}
33014b8a76bSRobert Jennings 			mutex_unlock(&hotplug_mutex);
33114b8a76bSRobert Jennings 		} else {
33214b8a76bSRobert Jennings 			cmm_dbg("Hotplug operation in progress, activity "
33314b8a76bSRobert Jennings 					"suspended\n");
33414b8a76bSRobert Jennings 			continue;
33584af458bSBrian King 		}
33684af458bSBrian King 
33784af458bSBrian King 		cmm_get_mpp();
33884af458bSBrian King 
33984af458bSBrian King 		if (loaned_pages_target > loaned_pages) {
34084af458bSBrian King 			if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
34184af458bSBrian King 				loaned_pages_target = loaned_pages;
34284af458bSBrian King 		} else if (loaned_pages_target < loaned_pages)
34384af458bSBrian King 			cmm_free_pages(loaned_pages - loaned_pages_target);
34484af458bSBrian King 	}
34584af458bSBrian King 	return 0;
34684af458bSBrian King }
34784af458bSBrian King 
34884af458bSBrian King #define CMM_SHOW(name, format, args...)			\
3496c9d2909SKay Sievers 	static ssize_t show_##name(struct device *dev,	\
3506c9d2909SKay Sievers 				   struct device_attribute *attr,	\
3513cee67f7SStephen Rothwell 				   char *buf)			\
35284af458bSBrian King 	{							\
35384af458bSBrian King 		return sprintf(buf, format, ##args);		\
35484af458bSBrian King 	}							\
3556c9d2909SKay Sievers 	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
35684af458bSBrian King 
35784af458bSBrian King CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
35884af458bSBrian King CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
35984af458bSBrian King 
3606c9d2909SKay Sievers static ssize_t show_oom_pages(struct device *dev,
3616c9d2909SKay Sievers 			      struct device_attribute *attr, char *buf)
36284af458bSBrian King {
36384af458bSBrian King 	return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
36484af458bSBrian King }
36584af458bSBrian King 
3666c9d2909SKay Sievers static ssize_t store_oom_pages(struct device *dev,
3676c9d2909SKay Sievers 			       struct device_attribute *attr,
36884af458bSBrian King 			       const char *buf, size_t count)
36984af458bSBrian King {
37084af458bSBrian King 	unsigned long val = simple_strtoul (buf, NULL, 10);
37184af458bSBrian King 
37284af458bSBrian King 	if (!capable(CAP_SYS_ADMIN))
37384af458bSBrian King 		return -EPERM;
37484af458bSBrian King 	if (val != 0)
37584af458bSBrian King 		return -EBADMSG;
37684af458bSBrian King 
37784af458bSBrian King 	oom_freed_pages = 0;
37884af458bSBrian King 	return count;
37984af458bSBrian King }
38084af458bSBrian King 
3816c9d2909SKay Sievers static DEVICE_ATTR(oom_freed_kb, S_IWUSR | S_IRUGO,
38284af458bSBrian King 		   show_oom_pages, store_oom_pages);
38384af458bSBrian King 
3846c9d2909SKay Sievers static struct device_attribute *cmm_attrs[] = {
3856c9d2909SKay Sievers 	&dev_attr_loaned_kb,
3866c9d2909SKay Sievers 	&dev_attr_loaned_target_kb,
3876c9d2909SKay Sievers 	&dev_attr_oom_freed_kb,
38884af458bSBrian King };
38984af458bSBrian King 
3906c9d2909SKay Sievers static struct bus_type cmm_subsys = {
39184af458bSBrian King 	.name = "cmm",
3926c9d2909SKay Sievers 	.dev_name = "cmm",
39384af458bSBrian King };
39484af458bSBrian King 
39584af458bSBrian King /**
39684af458bSBrian King  * cmm_sysfs_register - Register with sysfs
39784af458bSBrian King  *
39884af458bSBrian King  * Return value:
39984af458bSBrian King  * 	0 on success / other on failure
40084af458bSBrian King  **/
4016c9d2909SKay Sievers static int cmm_sysfs_register(struct device *dev)
40284af458bSBrian King {
40384af458bSBrian King 	int i, rc;
40484af458bSBrian King 
4056c9d2909SKay Sievers 	if ((rc = subsys_system_register(&cmm_subsys, NULL)))
40684af458bSBrian King 		return rc;
40784af458bSBrian King 
4086c9d2909SKay Sievers 	dev->id = 0;
4096c9d2909SKay Sievers 	dev->bus = &cmm_subsys;
41084af458bSBrian King 
4116c9d2909SKay Sievers 	if ((rc = device_register(dev)))
4126c9d2909SKay Sievers 		goto subsys_unregister;
41384af458bSBrian King 
41484af458bSBrian King 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
4156c9d2909SKay Sievers 		if ((rc = device_create_file(dev, cmm_attrs[i])))
41684af458bSBrian King 			goto fail;
41784af458bSBrian King 	}
41884af458bSBrian King 
41984af458bSBrian King 	return 0;
42084af458bSBrian King 
42184af458bSBrian King fail:
42284af458bSBrian King 	while (--i >= 0)
4236c9d2909SKay Sievers 		device_remove_file(dev, cmm_attrs[i]);
4246c9d2909SKay Sievers 	device_unregister(dev);
4256c9d2909SKay Sievers subsys_unregister:
4266c9d2909SKay Sievers 	bus_unregister(&cmm_subsys);
42784af458bSBrian King 	return rc;
42884af458bSBrian King }
42984af458bSBrian King 
43084af458bSBrian King /**
43184af458bSBrian King  * cmm_unregister_sysfs - Unregister from sysfs
43284af458bSBrian King  *
43384af458bSBrian King  **/
4346c9d2909SKay Sievers static void cmm_unregister_sysfs(struct device *dev)
43584af458bSBrian King {
43684af458bSBrian King 	int i;
43784af458bSBrian King 
43884af458bSBrian King 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
4396c9d2909SKay Sievers 		device_remove_file(dev, cmm_attrs[i]);
4406c9d2909SKay Sievers 	device_unregister(dev);
4416c9d2909SKay Sievers 	bus_unregister(&cmm_subsys);
44284af458bSBrian King }
44384af458bSBrian King 
44484af458bSBrian King /**
445fecba962SBrian King  * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
446fecba962SBrian King  *
447fecba962SBrian King  **/
448fecba962SBrian King static int cmm_reboot_notifier(struct notifier_block *nb,
449fecba962SBrian King 			       unsigned long action, void *unused)
450fecba962SBrian King {
451fecba962SBrian King 	if (action == SYS_RESTART) {
452fecba962SBrian King 		if (cmm_thread_ptr)
453fecba962SBrian King 			kthread_stop(cmm_thread_ptr);
454fecba962SBrian King 		cmm_thread_ptr = NULL;
455fecba962SBrian King 		cmm_free_pages(loaned_pages);
456fecba962SBrian King 	}
457fecba962SBrian King 	return NOTIFY_DONE;
458fecba962SBrian King }
459fecba962SBrian King 
460fecba962SBrian King static struct notifier_block cmm_reboot_nb = {
461fecba962SBrian King 	.notifier_call = cmm_reboot_notifier,
462fecba962SBrian King };
463fecba962SBrian King 
464fecba962SBrian King /**
46514b8a76bSRobert Jennings  * cmm_count_pages - Count the number of pages loaned in a particular range.
46614b8a76bSRobert Jennings  *
46714b8a76bSRobert Jennings  * @arg: memory_isolate_notify structure with address range and count
46814b8a76bSRobert Jennings  *
46914b8a76bSRobert Jennings  * Return value:
47014b8a76bSRobert Jennings  *      0 on success
47114b8a76bSRobert Jennings  **/
47214b8a76bSRobert Jennings static unsigned long cmm_count_pages(void *arg)
47314b8a76bSRobert Jennings {
47414b8a76bSRobert Jennings 	struct memory_isolate_notify *marg = arg;
47514b8a76bSRobert Jennings 	struct cmm_page_array *pa;
47614b8a76bSRobert Jennings 	unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
47714b8a76bSRobert Jennings 	unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
47814b8a76bSRobert Jennings 	unsigned long idx;
47914b8a76bSRobert Jennings 
48014b8a76bSRobert Jennings 	spin_lock(&cmm_lock);
48114b8a76bSRobert Jennings 	pa = cmm_page_list;
48214b8a76bSRobert Jennings 	while (pa) {
48314b8a76bSRobert Jennings 		if ((unsigned long)pa >= start && (unsigned long)pa < end)
48414b8a76bSRobert Jennings 			marg->pages_found++;
48514b8a76bSRobert Jennings 		for (idx = 0; idx < pa->index; idx++)
48614b8a76bSRobert Jennings 			if (pa->page[idx] >= start && pa->page[idx] < end)
48714b8a76bSRobert Jennings 				marg->pages_found++;
48814b8a76bSRobert Jennings 		pa = pa->next;
48914b8a76bSRobert Jennings 	}
49014b8a76bSRobert Jennings 	spin_unlock(&cmm_lock);
49114b8a76bSRobert Jennings 	return 0;
49214b8a76bSRobert Jennings }
49314b8a76bSRobert Jennings 
49414b8a76bSRobert Jennings /**
49514b8a76bSRobert Jennings  * cmm_memory_isolate_cb - Handle memory isolation notifier calls
49614b8a76bSRobert Jennings  * @self:	notifier block struct
49714b8a76bSRobert Jennings  * @action:	action to take
49814b8a76bSRobert Jennings  * @arg:	struct memory_isolate_notify data for handler
49914b8a76bSRobert Jennings  *
50014b8a76bSRobert Jennings  * Return value:
50114b8a76bSRobert Jennings  *	NOTIFY_OK or notifier error based on subfunction return value
50214b8a76bSRobert Jennings  **/
50314b8a76bSRobert Jennings static int cmm_memory_isolate_cb(struct notifier_block *self,
50414b8a76bSRobert Jennings 				 unsigned long action, void *arg)
50514b8a76bSRobert Jennings {
50614b8a76bSRobert Jennings 	int ret = 0;
50714b8a76bSRobert Jennings 
50814b8a76bSRobert Jennings 	if (action == MEM_ISOLATE_COUNT)
50914b8a76bSRobert Jennings 		ret = cmm_count_pages(arg);
51014b8a76bSRobert Jennings 
5117e26065dSPrarit Bhargava 	return notifier_from_errno(ret);
51214b8a76bSRobert Jennings }
51314b8a76bSRobert Jennings 
51414b8a76bSRobert Jennings static struct notifier_block cmm_mem_isolate_nb = {
51514b8a76bSRobert Jennings 	.notifier_call = cmm_memory_isolate_cb,
51614b8a76bSRobert Jennings 	.priority = CMM_MEM_ISOLATE_PRI
51714b8a76bSRobert Jennings };
51814b8a76bSRobert Jennings 
51914b8a76bSRobert Jennings /**
52014b8a76bSRobert Jennings  * cmm_mem_going_offline - Unloan pages where memory is to be removed
52114b8a76bSRobert Jennings  * @arg: memory_notify structure with page range to be offlined
52214b8a76bSRobert Jennings  *
52314b8a76bSRobert Jennings  * Return value:
52414b8a76bSRobert Jennings  *	0 on success
52514b8a76bSRobert Jennings  **/
52614b8a76bSRobert Jennings static int cmm_mem_going_offline(void *arg)
52714b8a76bSRobert Jennings {
52814b8a76bSRobert Jennings 	struct memory_notify *marg = arg;
52914b8a76bSRobert Jennings 	unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
53014b8a76bSRobert Jennings 	unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
53114b8a76bSRobert Jennings 	struct cmm_page_array *pa_curr, *pa_last, *npa;
53214b8a76bSRobert Jennings 	unsigned long idx;
53314b8a76bSRobert Jennings 	unsigned long freed = 0;
53414b8a76bSRobert Jennings 
53514b8a76bSRobert Jennings 	cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
53614b8a76bSRobert Jennings 			start_page, marg->nr_pages);
53714b8a76bSRobert Jennings 	spin_lock(&cmm_lock);
53814b8a76bSRobert Jennings 
53914b8a76bSRobert Jennings 	/* Search the page list for pages in the range to be offlined */
54014b8a76bSRobert Jennings 	pa_last = pa_curr = cmm_page_list;
54114b8a76bSRobert Jennings 	while (pa_curr) {
54214b8a76bSRobert Jennings 		for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
54314b8a76bSRobert Jennings 			if ((pa_curr->page[idx] < start_page) ||
54414b8a76bSRobert Jennings 			    (pa_curr->page[idx] >= end_page))
54514b8a76bSRobert Jennings 				continue;
54614b8a76bSRobert Jennings 
54714b8a76bSRobert Jennings 			plpar_page_set_active(__pa(pa_curr->page[idx]));
54814b8a76bSRobert Jennings 			free_page(pa_curr->page[idx]);
54914b8a76bSRobert Jennings 			freed++;
55014b8a76bSRobert Jennings 			loaned_pages--;
55114b8a76bSRobert Jennings 			totalram_pages++;
55214b8a76bSRobert Jennings 			pa_curr->page[idx] = pa_last->page[--pa_last->index];
55314b8a76bSRobert Jennings 			if (pa_last->index == 0) {
55414b8a76bSRobert Jennings 				if (pa_curr == pa_last)
55514b8a76bSRobert Jennings 					pa_curr = pa_last->next;
55614b8a76bSRobert Jennings 				pa_last = pa_last->next;
55714b8a76bSRobert Jennings 				free_page((unsigned long)cmm_page_list);
55814b8a76bSRobert Jennings 				cmm_page_list = pa_last;
55914b8a76bSRobert Jennings 				continue;
56014b8a76bSRobert Jennings 			}
56114b8a76bSRobert Jennings 		}
56214b8a76bSRobert Jennings 		pa_curr = pa_curr->next;
56314b8a76bSRobert Jennings 	}
56414b8a76bSRobert Jennings 
56514b8a76bSRobert Jennings 	/* Search for page list structures in the range to be offlined */
56614b8a76bSRobert Jennings 	pa_last = NULL;
56714b8a76bSRobert Jennings 	pa_curr = cmm_page_list;
56814b8a76bSRobert Jennings 	while (pa_curr) {
56914b8a76bSRobert Jennings 		if (((unsigned long)pa_curr >= start_page) &&
57014b8a76bSRobert Jennings 				((unsigned long)pa_curr < end_page)) {
57114b8a76bSRobert Jennings 			npa = (struct cmm_page_array *)__get_free_page(
57214b8a76bSRobert Jennings 					GFP_NOIO | __GFP_NOWARN |
57314b8a76bSRobert Jennings 					__GFP_NORETRY | __GFP_NOMEMALLOC);
57414b8a76bSRobert Jennings 			if (!npa) {
57514b8a76bSRobert Jennings 				spin_unlock(&cmm_lock);
57614b8a76bSRobert Jennings 				cmm_dbg("Failed to allocate memory for list "
57714b8a76bSRobert Jennings 						"management. Memory hotplug "
57814b8a76bSRobert Jennings 						"failed.\n");
57914b8a76bSRobert Jennings 				return ENOMEM;
58014b8a76bSRobert Jennings 			}
58114b8a76bSRobert Jennings 			memcpy(npa, pa_curr, PAGE_SIZE);
58214b8a76bSRobert Jennings 			if (pa_curr == cmm_page_list)
58314b8a76bSRobert Jennings 				cmm_page_list = npa;
58414b8a76bSRobert Jennings 			if (pa_last)
58514b8a76bSRobert Jennings 				pa_last->next = npa;
58614b8a76bSRobert Jennings 			free_page((unsigned long) pa_curr);
58714b8a76bSRobert Jennings 			freed++;
58814b8a76bSRobert Jennings 			pa_curr = npa;
58914b8a76bSRobert Jennings 		}
59014b8a76bSRobert Jennings 
59114b8a76bSRobert Jennings 		pa_last = pa_curr;
59214b8a76bSRobert Jennings 		pa_curr = pa_curr->next;
59314b8a76bSRobert Jennings 	}
59414b8a76bSRobert Jennings 
59514b8a76bSRobert Jennings 	spin_unlock(&cmm_lock);
59614b8a76bSRobert Jennings 	cmm_dbg("Released %ld pages in the search range.\n", freed);
59714b8a76bSRobert Jennings 
59814b8a76bSRobert Jennings 	return 0;
59914b8a76bSRobert Jennings }
60014b8a76bSRobert Jennings 
60114b8a76bSRobert Jennings /**
60214b8a76bSRobert Jennings  * cmm_memory_cb - Handle memory hotplug notifier calls
60314b8a76bSRobert Jennings  * @self:	notifier block struct
60414b8a76bSRobert Jennings  * @action:	action to take
60514b8a76bSRobert Jennings  * @arg:	struct memory_notify data for handler
60614b8a76bSRobert Jennings  *
60714b8a76bSRobert Jennings  * Return value:
60814b8a76bSRobert Jennings  *	NOTIFY_OK or notifier error based on subfunction return value
60914b8a76bSRobert Jennings  *
61014b8a76bSRobert Jennings  **/
61114b8a76bSRobert Jennings static int cmm_memory_cb(struct notifier_block *self,
61214b8a76bSRobert Jennings 			unsigned long action, void *arg)
61314b8a76bSRobert Jennings {
61414b8a76bSRobert Jennings 	int ret = 0;
61514b8a76bSRobert Jennings 
61614b8a76bSRobert Jennings 	switch (action) {
61714b8a76bSRobert Jennings 	case MEM_GOING_OFFLINE:
61814b8a76bSRobert Jennings 		mutex_lock(&hotplug_mutex);
61914b8a76bSRobert Jennings 		hotplug_occurred = 1;
62014b8a76bSRobert Jennings 		ret = cmm_mem_going_offline(arg);
62114b8a76bSRobert Jennings 		break;
62214b8a76bSRobert Jennings 	case MEM_OFFLINE:
62314b8a76bSRobert Jennings 	case MEM_CANCEL_OFFLINE:
62414b8a76bSRobert Jennings 		mutex_unlock(&hotplug_mutex);
62514b8a76bSRobert Jennings 		cmm_dbg("Memory offline operation complete.\n");
62614b8a76bSRobert Jennings 		break;
62714b8a76bSRobert Jennings 	case MEM_GOING_ONLINE:
62814b8a76bSRobert Jennings 	case MEM_ONLINE:
62914b8a76bSRobert Jennings 	case MEM_CANCEL_ONLINE:
63014b8a76bSRobert Jennings 		break;
63114b8a76bSRobert Jennings 	}
63214b8a76bSRobert Jennings 
6337e26065dSPrarit Bhargava 	return notifier_from_errno(ret);
63414b8a76bSRobert Jennings }
63514b8a76bSRobert Jennings 
63614b8a76bSRobert Jennings static struct notifier_block cmm_mem_nb = {
63714b8a76bSRobert Jennings 	.notifier_call = cmm_memory_cb,
63814b8a76bSRobert Jennings 	.priority = CMM_MEM_HOTPLUG_PRI
63914b8a76bSRobert Jennings };
64014b8a76bSRobert Jennings 
64114b8a76bSRobert Jennings /**
64284af458bSBrian King  * cmm_init - Module initialization
64384af458bSBrian King  *
64484af458bSBrian King  * Return value:
64584af458bSBrian King  * 	0 on success / other on failure
64684af458bSBrian King  **/
64784af458bSBrian King static int cmm_init(void)
64884af458bSBrian King {
64984af458bSBrian King 	int rc = -ENOMEM;
65084af458bSBrian King 
65184af458bSBrian King 	if (!firmware_has_feature(FW_FEATURE_CMO))
65284af458bSBrian King 		return -EOPNOTSUPP;
65384af458bSBrian King 
65484af458bSBrian King 	if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
65584af458bSBrian King 		return rc;
65684af458bSBrian King 
657fecba962SBrian King 	if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
65884af458bSBrian King 		goto out_oom_notifier;
65984af458bSBrian King 
6606c9d2909SKay Sievers 	if ((rc = cmm_sysfs_register(&cmm_dev)))
661fecba962SBrian King 		goto out_reboot_notifier;
662fecba962SBrian King 
66314b8a76bSRobert Jennings 	if (register_memory_notifier(&cmm_mem_nb) ||
66414b8a76bSRobert Jennings 	    register_memory_isolate_notifier(&cmm_mem_isolate_nb))
66514b8a76bSRobert Jennings 		goto out_unregister_notifier;
66614b8a76bSRobert Jennings 
66784af458bSBrian King 	if (cmm_disabled)
66884af458bSBrian King 		return rc;
66984af458bSBrian King 
67084af458bSBrian King 	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
67184af458bSBrian King 	if (IS_ERR(cmm_thread_ptr)) {
67284af458bSBrian King 		rc = PTR_ERR(cmm_thread_ptr);
67314b8a76bSRobert Jennings 		goto out_unregister_notifier;
67484af458bSBrian King 	}
67584af458bSBrian King 
67684af458bSBrian King 	return rc;
67784af458bSBrian King 
67814b8a76bSRobert Jennings out_unregister_notifier:
67914b8a76bSRobert Jennings 	unregister_memory_notifier(&cmm_mem_nb);
68014b8a76bSRobert Jennings 	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
6816c9d2909SKay Sievers 	cmm_unregister_sysfs(&cmm_dev);
682fecba962SBrian King out_reboot_notifier:
683fecba962SBrian King 	unregister_reboot_notifier(&cmm_reboot_nb);
68484af458bSBrian King out_oom_notifier:
68584af458bSBrian King 	unregister_oom_notifier(&cmm_oom_nb);
68684af458bSBrian King 	return rc;
68784af458bSBrian King }
68884af458bSBrian King 
68984af458bSBrian King /**
69084af458bSBrian King  * cmm_exit - Module exit
69184af458bSBrian King  *
69284af458bSBrian King  * Return value:
69384af458bSBrian King  * 	nothing
69484af458bSBrian King  **/
69584af458bSBrian King static void cmm_exit(void)
69684af458bSBrian King {
69784af458bSBrian King 	if (cmm_thread_ptr)
69884af458bSBrian King 		kthread_stop(cmm_thread_ptr);
69984af458bSBrian King 	unregister_oom_notifier(&cmm_oom_nb);
700fecba962SBrian King 	unregister_reboot_notifier(&cmm_reboot_nb);
70114b8a76bSRobert Jennings 	unregister_memory_notifier(&cmm_mem_nb);
70214b8a76bSRobert Jennings 	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
70384af458bSBrian King 	cmm_free_pages(loaned_pages);
7046c9d2909SKay Sievers 	cmm_unregister_sysfs(&cmm_dev);
70584af458bSBrian King }
70684af458bSBrian King 
70784af458bSBrian King /**
70884af458bSBrian King  * cmm_set_disable - Disable/Enable CMM
70984af458bSBrian King  *
71084af458bSBrian King  * Return value:
71184af458bSBrian King  * 	0 on success / other on failure
71284af458bSBrian King  **/
71384af458bSBrian King static int cmm_set_disable(const char *val, struct kernel_param *kp)
71484af458bSBrian King {
71584af458bSBrian King 	int disable = simple_strtoul(val, NULL, 10);
71684af458bSBrian King 
71784af458bSBrian King 	if (disable != 0 && disable != 1)
71884af458bSBrian King 		return -EINVAL;
71984af458bSBrian King 
72084af458bSBrian King 	if (disable && !cmm_disabled) {
72184af458bSBrian King 		if (cmm_thread_ptr)
72284af458bSBrian King 			kthread_stop(cmm_thread_ptr);
72384af458bSBrian King 		cmm_thread_ptr = NULL;
72484af458bSBrian King 		cmm_free_pages(loaned_pages);
72584af458bSBrian King 	} else if (!disable && cmm_disabled) {
72684af458bSBrian King 		cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
72784af458bSBrian King 		if (IS_ERR(cmm_thread_ptr))
72884af458bSBrian King 			return PTR_ERR(cmm_thread_ptr);
72984af458bSBrian King 	}
73084af458bSBrian King 
73184af458bSBrian King 	cmm_disabled = disable;
73284af458bSBrian King 	return 0;
73384af458bSBrian King }
73484af458bSBrian King 
73584af458bSBrian King module_param_call(disable, cmm_set_disable, param_get_uint,
73684af458bSBrian King 		  &cmm_disabled, S_IRUGO | S_IWUSR);
73784af458bSBrian King MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
73884af458bSBrian King 		 "[Default=" __stringify(CMM_DISABLE) "]");
73984af458bSBrian King 
74084af458bSBrian King module_init(cmm_init);
74184af458bSBrian King module_exit(cmm_exit);
742