xref: /openbmc/linux/drivers/xen/balloon.c (revision 40095de1)
11775826cSJeremy Fitzhardinge /******************************************************************************
21775826cSJeremy Fitzhardinge  * balloon.c
31775826cSJeremy Fitzhardinge  *
41775826cSJeremy Fitzhardinge  * Xen balloon driver - enables returning/claiming memory to/from Xen.
51775826cSJeremy Fitzhardinge  *
61775826cSJeremy Fitzhardinge  * Copyright (c) 2003, B Dragovic
71775826cSJeremy Fitzhardinge  * Copyright (c) 2003-2004, M Williamson, K Fraser
81775826cSJeremy Fitzhardinge  * Copyright (c) 2005 Dan M. Smith, IBM Corporation
91775826cSJeremy Fitzhardinge  *
101775826cSJeremy Fitzhardinge  * This program is free software; you can redistribute it and/or
111775826cSJeremy Fitzhardinge  * modify it under the terms of the GNU General Public License version 2
121775826cSJeremy Fitzhardinge  * as published by the Free Software Foundation; or, when distributed
131775826cSJeremy Fitzhardinge  * separately from the Linux kernel or incorporated into other
141775826cSJeremy Fitzhardinge  * software packages, subject to the following license:
151775826cSJeremy Fitzhardinge  *
161775826cSJeremy Fitzhardinge  * Permission is hereby granted, free of charge, to any person obtaining a copy
171775826cSJeremy Fitzhardinge  * of this source file (the "Software"), to deal in the Software without
181775826cSJeremy Fitzhardinge  * restriction, including without limitation the rights to use, copy, modify,
191775826cSJeremy Fitzhardinge  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
201775826cSJeremy Fitzhardinge  * and to permit persons to whom the Software is furnished to do so, subject to
211775826cSJeremy Fitzhardinge  * the following conditions:
221775826cSJeremy Fitzhardinge  *
231775826cSJeremy Fitzhardinge  * The above copyright notice and this permission notice shall be included in
241775826cSJeremy Fitzhardinge  * all copies or substantial portions of the Software.
251775826cSJeremy Fitzhardinge  *
261775826cSJeremy Fitzhardinge  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
271775826cSJeremy Fitzhardinge  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
281775826cSJeremy Fitzhardinge  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
291775826cSJeremy Fitzhardinge  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
301775826cSJeremy Fitzhardinge  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
311775826cSJeremy Fitzhardinge  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
321775826cSJeremy Fitzhardinge  * IN THE SOFTWARE.
331775826cSJeremy Fitzhardinge  */
341775826cSJeremy Fitzhardinge 
351775826cSJeremy Fitzhardinge #include <linux/kernel.h>
361775826cSJeremy Fitzhardinge #include <linux/module.h>
371775826cSJeremy Fitzhardinge #include <linux/sched.h>
381775826cSJeremy Fitzhardinge #include <linux/errno.h>
391775826cSJeremy Fitzhardinge #include <linux/mm.h>
401775826cSJeremy Fitzhardinge #include <linux/bootmem.h>
411775826cSJeremy Fitzhardinge #include <linux/pagemap.h>
421775826cSJeremy Fitzhardinge #include <linux/highmem.h>
431775826cSJeremy Fitzhardinge #include <linux/mutex.h>
441775826cSJeremy Fitzhardinge #include <linux/list.h>
451775826cSJeremy Fitzhardinge #include <linux/sysdev.h>
465a0e3ad6STejun Heo #include <linux/gfp.h>
471775826cSJeremy Fitzhardinge 
481775826cSJeremy Fitzhardinge #include <asm/page.h>
491775826cSJeremy Fitzhardinge #include <asm/pgalloc.h>
501775826cSJeremy Fitzhardinge #include <asm/pgtable.h>
511775826cSJeremy Fitzhardinge #include <asm/uaccess.h>
521775826cSJeremy Fitzhardinge #include <asm/tlb.h>
5366946f67SJeremy Fitzhardinge #include <asm/e820.h>
541775826cSJeremy Fitzhardinge 
55ecbf29cdSJeremy Fitzhardinge #include <asm/xen/hypervisor.h>
56ecbf29cdSJeremy Fitzhardinge #include <asm/xen/hypercall.h>
571ccbf534SJeremy Fitzhardinge 
581ccbf534SJeremy Fitzhardinge #include <xen/xen.h>
59ecbf29cdSJeremy Fitzhardinge #include <xen/interface/xen.h>
601775826cSJeremy Fitzhardinge #include <xen/interface/memory.h>
611775826cSJeremy Fitzhardinge #include <xen/xenbus.h>
621775826cSJeremy Fitzhardinge #include <xen/features.h>
631775826cSJeremy Fitzhardinge #include <xen/page.h>
641775826cSJeremy Fitzhardinge 
651775826cSJeremy Fitzhardinge #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
661775826cSJeremy Fitzhardinge 
67167e6cf6SJeremy Fitzhardinge #define BALLOON_CLASS_NAME "xen_memory"
681775826cSJeremy Fitzhardinge 
6995d2ac4aSDaniel Kiper /*
7095d2ac4aSDaniel Kiper  * balloon_process() state:
7195d2ac4aSDaniel Kiper  *
7295d2ac4aSDaniel Kiper  * BP_DONE: done or nothing to do,
7395d2ac4aSDaniel Kiper  * BP_EAGAIN: error, go to sleep,
7495d2ac4aSDaniel Kiper  * BP_ECANCELED: error, balloon operation canceled.
7595d2ac4aSDaniel Kiper  */
7695d2ac4aSDaniel Kiper 
7795d2ac4aSDaniel Kiper enum bp_state {
7895d2ac4aSDaniel Kiper 	BP_DONE,
7995d2ac4aSDaniel Kiper 	BP_EAGAIN,
8095d2ac4aSDaniel Kiper 	BP_ECANCELED
8195d2ac4aSDaniel Kiper };
8295d2ac4aSDaniel Kiper 
8395d2ac4aSDaniel Kiper #define RETRY_UNLIMITED	0
8495d2ac4aSDaniel Kiper 
851775826cSJeremy Fitzhardinge struct balloon_stats {
861775826cSJeremy Fitzhardinge 	/* We aim for 'current allocation' == 'target allocation'. */
871775826cSJeremy Fitzhardinge 	unsigned long current_pages;
881775826cSJeremy Fitzhardinge 	unsigned long target_pages;
891775826cSJeremy Fitzhardinge 	/* Number of pages in high- and low-memory balloons. */
901775826cSJeremy Fitzhardinge 	unsigned long balloon_low;
911775826cSJeremy Fitzhardinge 	unsigned long balloon_high;
9295d2ac4aSDaniel Kiper 	unsigned long schedule_delay;
9395d2ac4aSDaniel Kiper 	unsigned long max_schedule_delay;
9495d2ac4aSDaniel Kiper 	unsigned long retry_count;
9595d2ac4aSDaniel Kiper 	unsigned long max_retry_count;
961775826cSJeremy Fitzhardinge };
971775826cSJeremy Fitzhardinge 
981775826cSJeremy Fitzhardinge static DEFINE_MUTEX(balloon_mutex);
991775826cSJeremy Fitzhardinge 
1001775826cSJeremy Fitzhardinge static struct sys_device balloon_sysdev;
1011775826cSJeremy Fitzhardinge 
1021775826cSJeremy Fitzhardinge static int register_balloon(struct sys_device *sysdev);
1031775826cSJeremy Fitzhardinge 
1041775826cSJeremy Fitzhardinge static struct balloon_stats balloon_stats;
1051775826cSJeremy Fitzhardinge 
1061775826cSJeremy Fitzhardinge /* We increase/decrease in batches which fit in a page */
1071775826cSJeremy Fitzhardinge static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
1081775826cSJeremy Fitzhardinge 
1091775826cSJeremy Fitzhardinge #ifdef CONFIG_HIGHMEM
1101775826cSJeremy Fitzhardinge #define inc_totalhigh_pages() (totalhigh_pages++)
1111775826cSJeremy Fitzhardinge #define dec_totalhigh_pages() (totalhigh_pages--)
1121775826cSJeremy Fitzhardinge #else
1131775826cSJeremy Fitzhardinge #define inc_totalhigh_pages() do {} while(0)
1141775826cSJeremy Fitzhardinge #define dec_totalhigh_pages() do {} while(0)
1151775826cSJeremy Fitzhardinge #endif
1161775826cSJeremy Fitzhardinge 
1171775826cSJeremy Fitzhardinge /* List of ballooned pages, threaded through the mem_map array. */
1181775826cSJeremy Fitzhardinge static LIST_HEAD(ballooned_pages);
1191775826cSJeremy Fitzhardinge 
1201775826cSJeremy Fitzhardinge /* Main work function, always executed in process context. */
1211775826cSJeremy Fitzhardinge static void balloon_process(struct work_struct *work);
12295170b2eSDaniel Kiper static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
1231775826cSJeremy Fitzhardinge 
1241775826cSJeremy Fitzhardinge /* When ballooning out (allocating memory to return to Xen) we don't really
1251775826cSJeremy Fitzhardinge    want the kernel to try too hard since that can trigger the oom killer. */
1261775826cSJeremy Fitzhardinge #define GFP_BALLOON \
1271775826cSJeremy Fitzhardinge 	(GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
1281775826cSJeremy Fitzhardinge 
1291775826cSJeremy Fitzhardinge static void scrub_page(struct page *page)
1301775826cSJeremy Fitzhardinge {
1311775826cSJeremy Fitzhardinge #ifdef CONFIG_XEN_SCRUB_PAGES
13226a3e991SJeremy Fitzhardinge 	clear_highpage(page);
1331775826cSJeremy Fitzhardinge #endif
1341775826cSJeremy Fitzhardinge }
1351775826cSJeremy Fitzhardinge 
1361775826cSJeremy Fitzhardinge /* balloon_append: add the given page to the balloon. */
1379be4d457SJeremy Fitzhardinge static void __balloon_append(struct page *page)
1381775826cSJeremy Fitzhardinge {
1391775826cSJeremy Fitzhardinge 	/* Lowmem is re-populated first, so highmem pages go at list tail. */
1401775826cSJeremy Fitzhardinge 	if (PageHighMem(page)) {
1411775826cSJeremy Fitzhardinge 		list_add_tail(&page->lru, &ballooned_pages);
1421775826cSJeremy Fitzhardinge 		balloon_stats.balloon_high++;
1431775826cSJeremy Fitzhardinge 		dec_totalhigh_pages();
1441775826cSJeremy Fitzhardinge 	} else {
1451775826cSJeremy Fitzhardinge 		list_add(&page->lru, &ballooned_pages);
1461775826cSJeremy Fitzhardinge 		balloon_stats.balloon_low++;
1471775826cSJeremy Fitzhardinge 	}
1489be4d457SJeremy Fitzhardinge }
1493d65c948SGianluca Guida 
1509be4d457SJeremy Fitzhardinge static void balloon_append(struct page *page)
1519be4d457SJeremy Fitzhardinge {
1529be4d457SJeremy Fitzhardinge 	__balloon_append(page);
1533d65c948SGianluca Guida 	totalram_pages--;
1541775826cSJeremy Fitzhardinge }
1551775826cSJeremy Fitzhardinge 
1561775826cSJeremy Fitzhardinge /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
1571775826cSJeremy Fitzhardinge static struct page *balloon_retrieve(void)
1581775826cSJeremy Fitzhardinge {
1591775826cSJeremy Fitzhardinge 	struct page *page;
1601775826cSJeremy Fitzhardinge 
1611775826cSJeremy Fitzhardinge 	if (list_empty(&ballooned_pages))
1621775826cSJeremy Fitzhardinge 		return NULL;
1631775826cSJeremy Fitzhardinge 
1641775826cSJeremy Fitzhardinge 	page = list_entry(ballooned_pages.next, struct page, lru);
1651775826cSJeremy Fitzhardinge 	list_del(&page->lru);
1661775826cSJeremy Fitzhardinge 
1671775826cSJeremy Fitzhardinge 	if (PageHighMem(page)) {
1681775826cSJeremy Fitzhardinge 		balloon_stats.balloon_high--;
1691775826cSJeremy Fitzhardinge 		inc_totalhigh_pages();
1701775826cSJeremy Fitzhardinge 	}
1711775826cSJeremy Fitzhardinge 	else
1721775826cSJeremy Fitzhardinge 		balloon_stats.balloon_low--;
1731775826cSJeremy Fitzhardinge 
1743d65c948SGianluca Guida 	totalram_pages++;
1753d65c948SGianluca Guida 
1761775826cSJeremy Fitzhardinge 	return page;
1771775826cSJeremy Fitzhardinge }
1781775826cSJeremy Fitzhardinge 
1791775826cSJeremy Fitzhardinge static struct page *balloon_first_page(void)
1801775826cSJeremy Fitzhardinge {
1811775826cSJeremy Fitzhardinge 	if (list_empty(&ballooned_pages))
1821775826cSJeremy Fitzhardinge 		return NULL;
1831775826cSJeremy Fitzhardinge 	return list_entry(ballooned_pages.next, struct page, lru);
1841775826cSJeremy Fitzhardinge }
1851775826cSJeremy Fitzhardinge 
1861775826cSJeremy Fitzhardinge static struct page *balloon_next_page(struct page *page)
1871775826cSJeremy Fitzhardinge {
1881775826cSJeremy Fitzhardinge 	struct list_head *next = page->lru.next;
1891775826cSJeremy Fitzhardinge 	if (next == &ballooned_pages)
1901775826cSJeremy Fitzhardinge 		return NULL;
1911775826cSJeremy Fitzhardinge 	return list_entry(next, struct page, lru);
1921775826cSJeremy Fitzhardinge }
1931775826cSJeremy Fitzhardinge 
19495d2ac4aSDaniel Kiper static enum bp_state update_schedule(enum bp_state state)
19595d2ac4aSDaniel Kiper {
19695d2ac4aSDaniel Kiper 	if (state == BP_DONE) {
19795d2ac4aSDaniel Kiper 		balloon_stats.schedule_delay = 1;
19895d2ac4aSDaniel Kiper 		balloon_stats.retry_count = 1;
19995d2ac4aSDaniel Kiper 		return BP_DONE;
20095d2ac4aSDaniel Kiper 	}
20195d2ac4aSDaniel Kiper 
20295d2ac4aSDaniel Kiper 	++balloon_stats.retry_count;
20395d2ac4aSDaniel Kiper 
20495d2ac4aSDaniel Kiper 	if (balloon_stats.max_retry_count != RETRY_UNLIMITED &&
20595d2ac4aSDaniel Kiper 			balloon_stats.retry_count > balloon_stats.max_retry_count) {
20695d2ac4aSDaniel Kiper 		balloon_stats.schedule_delay = 1;
20795d2ac4aSDaniel Kiper 		balloon_stats.retry_count = 1;
20895d2ac4aSDaniel Kiper 		return BP_ECANCELED;
20995d2ac4aSDaniel Kiper 	}
21095d2ac4aSDaniel Kiper 
21195d2ac4aSDaniel Kiper 	balloon_stats.schedule_delay <<= 1;
21295d2ac4aSDaniel Kiper 
21395d2ac4aSDaniel Kiper 	if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay)
21495d2ac4aSDaniel Kiper 		balloon_stats.schedule_delay = balloon_stats.max_schedule_delay;
21595d2ac4aSDaniel Kiper 
21695d2ac4aSDaniel Kiper 	return BP_EAGAIN;
21795d2ac4aSDaniel Kiper }
21895d2ac4aSDaniel Kiper 
2191775826cSJeremy Fitzhardinge static unsigned long current_target(void)
2201775826cSJeremy Fitzhardinge {
221bc2c0303SIan Campbell 	unsigned long target = balloon_stats.target_pages;
2221775826cSJeremy Fitzhardinge 
2231775826cSJeremy Fitzhardinge 	target = min(target,
2241775826cSJeremy Fitzhardinge 		     balloon_stats.current_pages +
2251775826cSJeremy Fitzhardinge 		     balloon_stats.balloon_low +
2261775826cSJeremy Fitzhardinge 		     balloon_stats.balloon_high);
2271775826cSJeremy Fitzhardinge 
2281775826cSJeremy Fitzhardinge 	return target;
2291775826cSJeremy Fitzhardinge }
2301775826cSJeremy Fitzhardinge 
23195d2ac4aSDaniel Kiper static enum bp_state increase_reservation(unsigned long nr_pages)
2321775826cSJeremy Fitzhardinge {
23395d2ac4aSDaniel Kiper 	int rc;
2342f70e0acSJeremy Fitzhardinge 	unsigned long  pfn, i;
2351775826cSJeremy Fitzhardinge 	struct page   *page;
2361775826cSJeremy Fitzhardinge 	struct xen_memory_reservation reservation = {
2371775826cSJeremy Fitzhardinge 		.address_bits = 0,
2381775826cSJeremy Fitzhardinge 		.extent_order = 0,
2391775826cSJeremy Fitzhardinge 		.domid        = DOMID_SELF
2401775826cSJeremy Fitzhardinge 	};
2411775826cSJeremy Fitzhardinge 
2421775826cSJeremy Fitzhardinge 	if (nr_pages > ARRAY_SIZE(frame_list))
2431775826cSJeremy Fitzhardinge 		nr_pages = ARRAY_SIZE(frame_list);
2441775826cSJeremy Fitzhardinge 
2451775826cSJeremy Fitzhardinge 	page = balloon_first_page();
2461775826cSJeremy Fitzhardinge 	for (i = 0; i < nr_pages; i++) {
24795d2ac4aSDaniel Kiper 		if (!page) {
24895d2ac4aSDaniel Kiper 			nr_pages = i;
24995d2ac4aSDaniel Kiper 			break;
25095d2ac4aSDaniel Kiper 		}
251a419aef8SJoe Perches 		frame_list[i] = page_to_pfn(page);
2521775826cSJeremy Fitzhardinge 		page = balloon_next_page(page);
2531775826cSJeremy Fitzhardinge 	}
2541775826cSJeremy Fitzhardinge 
255a90971ebSIsaku Yamahata 	set_xen_guest_handle(reservation.extent_start, frame_list);
2561775826cSJeremy Fitzhardinge 	reservation.nr_extents = nr_pages;
257fde28e8fSJeremy Fitzhardinge 	rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
25840095de1SKonrad Rzeszutek Wilk 	if (rc <= 0)
25995d2ac4aSDaniel Kiper 		return BP_EAGAIN;
2601775826cSJeremy Fitzhardinge 
261bc2c0303SIan Campbell 	for (i = 0; i < rc; i++) {
2621775826cSJeremy Fitzhardinge 		page = balloon_retrieve();
2631775826cSJeremy Fitzhardinge 		BUG_ON(page == NULL);
2641775826cSJeremy Fitzhardinge 
2651775826cSJeremy Fitzhardinge 		pfn = page_to_pfn(page);
2661775826cSJeremy Fitzhardinge 		BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
2671775826cSJeremy Fitzhardinge 		       phys_to_machine_mapping_valid(pfn));
2681775826cSJeremy Fitzhardinge 
2691775826cSJeremy Fitzhardinge 		set_phys_to_machine(pfn, frame_list[i]);
2701775826cSJeremy Fitzhardinge 
2711775826cSJeremy Fitzhardinge 		/* Link back into the page tables if not highmem. */
2721775826cSJeremy Fitzhardinge 		if (pfn < max_low_pfn) {
2731775826cSJeremy Fitzhardinge 			int ret;
2741775826cSJeremy Fitzhardinge 			ret = HYPERVISOR_update_va_mapping(
2751775826cSJeremy Fitzhardinge 				(unsigned long)__va(pfn << PAGE_SHIFT),
2761775826cSJeremy Fitzhardinge 				mfn_pte(frame_list[i], PAGE_KERNEL),
2771775826cSJeremy Fitzhardinge 				0);
2781775826cSJeremy Fitzhardinge 			BUG_ON(ret);
2791775826cSJeremy Fitzhardinge 		}
2801775826cSJeremy Fitzhardinge 
2811775826cSJeremy Fitzhardinge 		/* Relinquish the page back to the allocator. */
2821775826cSJeremy Fitzhardinge 		ClearPageReserved(page);
2831775826cSJeremy Fitzhardinge 		init_page_count(page);
2841775826cSJeremy Fitzhardinge 		__free_page(page);
2851775826cSJeremy Fitzhardinge 	}
2861775826cSJeremy Fitzhardinge 
287bc2c0303SIan Campbell 	balloon_stats.current_pages += rc;
2881775826cSJeremy Fitzhardinge 
28995d2ac4aSDaniel Kiper 	return BP_DONE;
2901775826cSJeremy Fitzhardinge }
2911775826cSJeremy Fitzhardinge 
29295d2ac4aSDaniel Kiper static enum bp_state decrease_reservation(unsigned long nr_pages)
2931775826cSJeremy Fitzhardinge {
29495d2ac4aSDaniel Kiper 	enum bp_state state = BP_DONE;
2952f70e0acSJeremy Fitzhardinge 	unsigned long  pfn, i;
2961775826cSJeremy Fitzhardinge 	struct page   *page;
2971775826cSJeremy Fitzhardinge 	int ret;
2981775826cSJeremy Fitzhardinge 	struct xen_memory_reservation reservation = {
2991775826cSJeremy Fitzhardinge 		.address_bits = 0,
3001775826cSJeremy Fitzhardinge 		.extent_order = 0,
3011775826cSJeremy Fitzhardinge 		.domid        = DOMID_SELF
3021775826cSJeremy Fitzhardinge 	};
3031775826cSJeremy Fitzhardinge 
3041775826cSJeremy Fitzhardinge 	if (nr_pages > ARRAY_SIZE(frame_list))
3051775826cSJeremy Fitzhardinge 		nr_pages = ARRAY_SIZE(frame_list);
3061775826cSJeremy Fitzhardinge 
3071775826cSJeremy Fitzhardinge 	for (i = 0; i < nr_pages; i++) {
3081775826cSJeremy Fitzhardinge 		if ((page = alloc_page(GFP_BALLOON)) == NULL) {
3091775826cSJeremy Fitzhardinge 			nr_pages = i;
31095d2ac4aSDaniel Kiper 			state = BP_EAGAIN;
3111775826cSJeremy Fitzhardinge 			break;
3121775826cSJeremy Fitzhardinge 		}
3131775826cSJeremy Fitzhardinge 
3141775826cSJeremy Fitzhardinge 		pfn = page_to_pfn(page);
3151775826cSJeremy Fitzhardinge 		frame_list[i] = pfn_to_mfn(pfn);
3161775826cSJeremy Fitzhardinge 
3171775826cSJeremy Fitzhardinge 		scrub_page(page);
3181058a75fSDan Magenheimer 
319ff4ce8c3SIan Campbell 		if (!PageHighMem(page)) {
3201058a75fSDan Magenheimer 			ret = HYPERVISOR_update_va_mapping(
3211058a75fSDan Magenheimer 				(unsigned long)__va(pfn << PAGE_SHIFT),
3221058a75fSDan Magenheimer 				__pte_ma(0), 0);
3231058a75fSDan Magenheimer 			BUG_ON(ret);
3241775826cSJeremy Fitzhardinge                 }
3251775826cSJeremy Fitzhardinge 
326ff4ce8c3SIan Campbell 	}
327ff4ce8c3SIan Campbell 
3281775826cSJeremy Fitzhardinge 	/* Ensure that ballooned highmem pages don't have kmaps. */
3291775826cSJeremy Fitzhardinge 	kmap_flush_unused();
3301775826cSJeremy Fitzhardinge 	flush_tlb_all();
3311775826cSJeremy Fitzhardinge 
3321775826cSJeremy Fitzhardinge 	/* No more mappings: invalidate P2M and add to balloon. */
3331775826cSJeremy Fitzhardinge 	for (i = 0; i < nr_pages; i++) {
3341775826cSJeremy Fitzhardinge 		pfn = mfn_to_pfn(frame_list[i]);
3351775826cSJeremy Fitzhardinge 		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
3361775826cSJeremy Fitzhardinge 		balloon_append(pfn_to_page(pfn));
3371775826cSJeremy Fitzhardinge 	}
3381775826cSJeremy Fitzhardinge 
339a90971ebSIsaku Yamahata 	set_xen_guest_handle(reservation.extent_start, frame_list);
3401775826cSJeremy Fitzhardinge 	reservation.nr_extents   = nr_pages;
3411775826cSJeremy Fitzhardinge 	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
3421775826cSJeremy Fitzhardinge 	BUG_ON(ret != nr_pages);
3431775826cSJeremy Fitzhardinge 
3441775826cSJeremy Fitzhardinge 	balloon_stats.current_pages -= nr_pages;
3451775826cSJeremy Fitzhardinge 
34695d2ac4aSDaniel Kiper 	return state;
3471775826cSJeremy Fitzhardinge }
3481775826cSJeremy Fitzhardinge 
3491775826cSJeremy Fitzhardinge /*
3501775826cSJeremy Fitzhardinge  * We avoid multiple worker processes conflicting via the balloon mutex.
3511775826cSJeremy Fitzhardinge  * We may of course race updates of the target counts (which are protected
3521775826cSJeremy Fitzhardinge  * by the balloon lock), or with changes to the Xen hard limit, but we will
3531775826cSJeremy Fitzhardinge  * recover from these in time.
3541775826cSJeremy Fitzhardinge  */
3551775826cSJeremy Fitzhardinge static void balloon_process(struct work_struct *work)
3561775826cSJeremy Fitzhardinge {
35795d2ac4aSDaniel Kiper 	enum bp_state state = BP_DONE;
3581775826cSJeremy Fitzhardinge 	long credit;
3591775826cSJeremy Fitzhardinge 
3601775826cSJeremy Fitzhardinge 	mutex_lock(&balloon_mutex);
3611775826cSJeremy Fitzhardinge 
3621775826cSJeremy Fitzhardinge 	do {
3631775826cSJeremy Fitzhardinge 		credit = current_target() - balloon_stats.current_pages;
36495d2ac4aSDaniel Kiper 
3651775826cSJeremy Fitzhardinge 		if (credit > 0)
36695d2ac4aSDaniel Kiper 			state = increase_reservation(credit);
36795d2ac4aSDaniel Kiper 
3681775826cSJeremy Fitzhardinge 		if (credit < 0)
36995d2ac4aSDaniel Kiper 			state = decrease_reservation(-credit);
37095d2ac4aSDaniel Kiper 
37195d2ac4aSDaniel Kiper 		state = update_schedule(state);
3721775826cSJeremy Fitzhardinge 
3731775826cSJeremy Fitzhardinge #ifndef CONFIG_PREEMPT
3741775826cSJeremy Fitzhardinge 		if (need_resched())
3751775826cSJeremy Fitzhardinge 			schedule();
3761775826cSJeremy Fitzhardinge #endif
37795d2ac4aSDaniel Kiper 	} while (credit && state == BP_DONE);
3781775826cSJeremy Fitzhardinge 
3791775826cSJeremy Fitzhardinge 	/* Schedule more work if there is some still to be done. */
38095d2ac4aSDaniel Kiper 	if (state == BP_EAGAIN)
38195d2ac4aSDaniel Kiper 		schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ);
3821775826cSJeremy Fitzhardinge 
3831775826cSJeremy Fitzhardinge 	mutex_unlock(&balloon_mutex);
3841775826cSJeremy Fitzhardinge }
3851775826cSJeremy Fitzhardinge 
3861775826cSJeremy Fitzhardinge /* Resets the Xen limit, sets new target, and kicks off processing. */
387955d6f17SAdrian Bunk static void balloon_set_new_target(unsigned long target)
3881775826cSJeremy Fitzhardinge {
3891775826cSJeremy Fitzhardinge 	/* No need for lock. Not read-modify-write updates. */
3901775826cSJeremy Fitzhardinge 	balloon_stats.target_pages = target;
39195170b2eSDaniel Kiper 	schedule_delayed_work(&balloon_worker, 0);
3921775826cSJeremy Fitzhardinge }
3931775826cSJeremy Fitzhardinge 
3941775826cSJeremy Fitzhardinge static struct xenbus_watch target_watch =
3951775826cSJeremy Fitzhardinge {
3961775826cSJeremy Fitzhardinge 	.node = "memory/target"
3971775826cSJeremy Fitzhardinge };
3981775826cSJeremy Fitzhardinge 
3991775826cSJeremy Fitzhardinge /* React to a change in the target key */
4001775826cSJeremy Fitzhardinge static void watch_target(struct xenbus_watch *watch,
4011775826cSJeremy Fitzhardinge 			 const char **vec, unsigned int len)
4021775826cSJeremy Fitzhardinge {
4031775826cSJeremy Fitzhardinge 	unsigned long long new_target;
4041775826cSJeremy Fitzhardinge 	int err;
4051775826cSJeremy Fitzhardinge 
4061775826cSJeremy Fitzhardinge 	err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
4071775826cSJeremy Fitzhardinge 	if (err != 1) {
4081775826cSJeremy Fitzhardinge 		/* This is ok (for domain0 at least) - so just return */
4091775826cSJeremy Fitzhardinge 		return;
4101775826cSJeremy Fitzhardinge 	}
4111775826cSJeremy Fitzhardinge 
4121775826cSJeremy Fitzhardinge 	/* The given memory/target value is in KiB, so it needs converting to
4131775826cSJeremy Fitzhardinge 	 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
4141775826cSJeremy Fitzhardinge 	 */
4151775826cSJeremy Fitzhardinge 	balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
4161775826cSJeremy Fitzhardinge }
4171775826cSJeremy Fitzhardinge 
4181775826cSJeremy Fitzhardinge static int balloon_init_watcher(struct notifier_block *notifier,
4191775826cSJeremy Fitzhardinge 				unsigned long event,
4201775826cSJeremy Fitzhardinge 				void *data)
4211775826cSJeremy Fitzhardinge {
4221775826cSJeremy Fitzhardinge 	int err;
4231775826cSJeremy Fitzhardinge 
4241775826cSJeremy Fitzhardinge 	err = register_xenbus_watch(&target_watch);
4251775826cSJeremy Fitzhardinge 	if (err)
4261775826cSJeremy Fitzhardinge 		printk(KERN_ERR "Failed to set balloon watcher\n");
4271775826cSJeremy Fitzhardinge 
4281775826cSJeremy Fitzhardinge 	return NOTIFY_DONE;
4291775826cSJeremy Fitzhardinge }
4301775826cSJeremy Fitzhardinge 
4311775826cSJeremy Fitzhardinge static struct notifier_block xenstore_notifier;
4321775826cSJeremy Fitzhardinge 
4331775826cSJeremy Fitzhardinge static int __init balloon_init(void)
4341775826cSJeremy Fitzhardinge {
43566946f67SJeremy Fitzhardinge 	unsigned long pfn, extra_pfn_end;
4361775826cSJeremy Fitzhardinge 	struct page *page;
4371775826cSJeremy Fitzhardinge 
4386e833587SJeremy Fitzhardinge 	if (!xen_pv_domain())
4391775826cSJeremy Fitzhardinge 		return -ENODEV;
4401775826cSJeremy Fitzhardinge 
4411775826cSJeremy Fitzhardinge 	pr_info("xen_balloon: Initialising balloon driver.\n");
4421775826cSJeremy Fitzhardinge 
4431775826cSJeremy Fitzhardinge 	balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
4441775826cSJeremy Fitzhardinge 	balloon_stats.target_pages  = balloon_stats.current_pages;
4451775826cSJeremy Fitzhardinge 	balloon_stats.balloon_low   = 0;
4461775826cSJeremy Fitzhardinge 	balloon_stats.balloon_high  = 0;
4471775826cSJeremy Fitzhardinge 
44895d2ac4aSDaniel Kiper 	balloon_stats.schedule_delay = 1;
44995d2ac4aSDaniel Kiper 	balloon_stats.max_schedule_delay = 32;
45095d2ac4aSDaniel Kiper 	balloon_stats.retry_count = 1;
45140095de1SKonrad Rzeszutek Wilk 	balloon_stats.max_retry_count = RETRY_UNLIMITED;
45295d2ac4aSDaniel Kiper 
4531775826cSJeremy Fitzhardinge 	register_balloon(&balloon_sysdev);
4541775826cSJeremy Fitzhardinge 
4552a4c92faSJeremy Fitzhardinge 	/*
4562a4c92faSJeremy Fitzhardinge 	 * Initialise the balloon with excess memory space.  We need
4572a4c92faSJeremy Fitzhardinge 	 * to make sure we don't add memory which doesn't exist or
4582a4c92faSJeremy Fitzhardinge 	 * logically exist.  The E820 map can be trimmed to be smaller
4592a4c92faSJeremy Fitzhardinge 	 * than the amount of physical memory due to the mem= command
4602a4c92faSJeremy Fitzhardinge 	 * line parameter.  And if this is a 32-bit non-HIGHMEM kernel
4612a4c92faSJeremy Fitzhardinge 	 * on a system with memory which requires highmem to access,
4622a4c92faSJeremy Fitzhardinge 	 * don't try to use it.
4632a4c92faSJeremy Fitzhardinge 	 */
4642a4c92faSJeremy Fitzhardinge 	extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()),
46566946f67SJeremy Fitzhardinge 			    (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size));
4669be4d457SJeremy Fitzhardinge 	for (pfn = PFN_UP(xen_extra_mem_start);
46766946f67SJeremy Fitzhardinge 	     pfn < extra_pfn_end;
4689be4d457SJeremy Fitzhardinge 	     pfn++) {
4691775826cSJeremy Fitzhardinge 		page = pfn_to_page(pfn);
4709be4d457SJeremy Fitzhardinge 		/* totalram_pages doesn't include the boot-time
4719be4d457SJeremy Fitzhardinge 		   balloon extension, so don't subtract from it. */
4729be4d457SJeremy Fitzhardinge 		__balloon_append(page);
4731775826cSJeremy Fitzhardinge 	}
4741775826cSJeremy Fitzhardinge 
4751775826cSJeremy Fitzhardinge 	target_watch.callback = watch_target;
4761775826cSJeremy Fitzhardinge 	xenstore_notifier.notifier_call = balloon_init_watcher;
4771775826cSJeremy Fitzhardinge 
4781775826cSJeremy Fitzhardinge 	register_xenstore_notifier(&xenstore_notifier);
4791775826cSJeremy Fitzhardinge 
4801775826cSJeremy Fitzhardinge 	return 0;
4811775826cSJeremy Fitzhardinge }
4821775826cSJeremy Fitzhardinge 
4831775826cSJeremy Fitzhardinge subsys_initcall(balloon_init);
4841775826cSJeremy Fitzhardinge 
4851775826cSJeremy Fitzhardinge static void balloon_exit(void)
4861775826cSJeremy Fitzhardinge {
4871775826cSJeremy Fitzhardinge     /* XXX - release balloon here */
4881775826cSJeremy Fitzhardinge     return;
4891775826cSJeremy Fitzhardinge }
4901775826cSJeremy Fitzhardinge 
4911775826cSJeremy Fitzhardinge module_exit(balloon_exit);
4921775826cSJeremy Fitzhardinge 
4931775826cSJeremy Fitzhardinge #define BALLOON_SHOW(name, format, args...)				\
4941775826cSJeremy Fitzhardinge 	static ssize_t show_##name(struct sys_device *dev,		\
495167e6cf6SJeremy Fitzhardinge 				   struct sysdev_attribute *attr,	\
4961775826cSJeremy Fitzhardinge 				   char *buf)				\
4971775826cSJeremy Fitzhardinge 	{								\
4981775826cSJeremy Fitzhardinge 		return sprintf(buf, format, ##args);			\
4991775826cSJeremy Fitzhardinge 	}								\
5001775826cSJeremy Fitzhardinge 	static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
5011775826cSJeremy Fitzhardinge 
5021775826cSJeremy Fitzhardinge BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
5031775826cSJeremy Fitzhardinge BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
5041775826cSJeremy Fitzhardinge BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
5051775826cSJeremy Fitzhardinge 
50695d2ac4aSDaniel Kiper static SYSDEV_ULONG_ATTR(schedule_delay, 0444, balloon_stats.schedule_delay);
50795d2ac4aSDaniel Kiper static SYSDEV_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay);
50895d2ac4aSDaniel Kiper static SYSDEV_ULONG_ATTR(retry_count, 0444, balloon_stats.retry_count);
50995d2ac4aSDaniel Kiper static SYSDEV_ULONG_ATTR(max_retry_count, 0644, balloon_stats.max_retry_count);
51095d2ac4aSDaniel Kiper 
511167e6cf6SJeremy Fitzhardinge static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
512167e6cf6SJeremy Fitzhardinge 			      char *buf)
5131775826cSJeremy Fitzhardinge {
5141775826cSJeremy Fitzhardinge 	return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
5151775826cSJeremy Fitzhardinge }
5161775826cSJeremy Fitzhardinge 
5171775826cSJeremy Fitzhardinge static ssize_t store_target_kb(struct sys_device *dev,
5184a0b2b4dSAndi Kleen 			       struct sysdev_attribute *attr,
5191775826cSJeremy Fitzhardinge 			       const char *buf,
5201775826cSJeremy Fitzhardinge 			       size_t count)
5211775826cSJeremy Fitzhardinge {
522167e6cf6SJeremy Fitzhardinge 	char *endchar;
5231775826cSJeremy Fitzhardinge 	unsigned long long target_bytes;
5241775826cSJeremy Fitzhardinge 
5251775826cSJeremy Fitzhardinge 	if (!capable(CAP_SYS_ADMIN))
5261775826cSJeremy Fitzhardinge 		return -EPERM;
5271775826cSJeremy Fitzhardinge 
528618b2c8dSJeremy Fitzhardinge 	target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
5291775826cSJeremy Fitzhardinge 
5301775826cSJeremy Fitzhardinge 	balloon_set_new_target(target_bytes >> PAGE_SHIFT);
5311775826cSJeremy Fitzhardinge 
5321775826cSJeremy Fitzhardinge 	return count;
5331775826cSJeremy Fitzhardinge }
5341775826cSJeremy Fitzhardinge 
5351775826cSJeremy Fitzhardinge static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
5361775826cSJeremy Fitzhardinge 		   show_target_kb, store_target_kb);
5371775826cSJeremy Fitzhardinge 
538618b2c8dSJeremy Fitzhardinge 
539618b2c8dSJeremy Fitzhardinge static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr,
540618b2c8dSJeremy Fitzhardinge 			      char *buf)
541618b2c8dSJeremy Fitzhardinge {
542618b2c8dSJeremy Fitzhardinge 	return sprintf(buf, "%llu\n",
5430692698cSJan Beulich 		       (unsigned long long)balloon_stats.target_pages
5440692698cSJan Beulich 		       << PAGE_SHIFT);
545618b2c8dSJeremy Fitzhardinge }
546618b2c8dSJeremy Fitzhardinge 
547618b2c8dSJeremy Fitzhardinge static ssize_t store_target(struct sys_device *dev,
548618b2c8dSJeremy Fitzhardinge 			    struct sysdev_attribute *attr,
549618b2c8dSJeremy Fitzhardinge 			    const char *buf,
550618b2c8dSJeremy Fitzhardinge 			    size_t count)
551618b2c8dSJeremy Fitzhardinge {
552618b2c8dSJeremy Fitzhardinge 	char *endchar;
553618b2c8dSJeremy Fitzhardinge 	unsigned long long target_bytes;
554618b2c8dSJeremy Fitzhardinge 
555618b2c8dSJeremy Fitzhardinge 	if (!capable(CAP_SYS_ADMIN))
556618b2c8dSJeremy Fitzhardinge 		return -EPERM;
557618b2c8dSJeremy Fitzhardinge 
558618b2c8dSJeremy Fitzhardinge 	target_bytes = memparse(buf, &endchar);
559618b2c8dSJeremy Fitzhardinge 
560618b2c8dSJeremy Fitzhardinge 	balloon_set_new_target(target_bytes >> PAGE_SHIFT);
561618b2c8dSJeremy Fitzhardinge 
562618b2c8dSJeremy Fitzhardinge 	return count;
563618b2c8dSJeremy Fitzhardinge }
564618b2c8dSJeremy Fitzhardinge 
565618b2c8dSJeremy Fitzhardinge static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR,
566618b2c8dSJeremy Fitzhardinge 		   show_target, store_target);
567618b2c8dSJeremy Fitzhardinge 
568618b2c8dSJeremy Fitzhardinge 
5691775826cSJeremy Fitzhardinge static struct sysdev_attribute *balloon_attrs[] = {
5701775826cSJeremy Fitzhardinge 	&attr_target_kb,
571618b2c8dSJeremy Fitzhardinge 	&attr_target,
57295d2ac4aSDaniel Kiper 	&attr_schedule_delay.attr,
57395d2ac4aSDaniel Kiper 	&attr_max_schedule_delay.attr,
57495d2ac4aSDaniel Kiper 	&attr_retry_count.attr,
57595d2ac4aSDaniel Kiper 	&attr_max_retry_count.attr
5761775826cSJeremy Fitzhardinge };
5771775826cSJeremy Fitzhardinge 
5781775826cSJeremy Fitzhardinge static struct attribute *balloon_info_attrs[] = {
5791775826cSJeremy Fitzhardinge 	&attr_current_kb.attr,
5801775826cSJeremy Fitzhardinge 	&attr_low_kb.attr,
5811775826cSJeremy Fitzhardinge 	&attr_high_kb.attr,
5821775826cSJeremy Fitzhardinge 	NULL
5831775826cSJeremy Fitzhardinge };
5841775826cSJeremy Fitzhardinge 
5851775826cSJeremy Fitzhardinge static struct attribute_group balloon_info_group = {
5861775826cSJeremy Fitzhardinge 	.name = "info",
5871775826cSJeremy Fitzhardinge 	.attrs = balloon_info_attrs,
5881775826cSJeremy Fitzhardinge };
5891775826cSJeremy Fitzhardinge 
5901775826cSJeremy Fitzhardinge static struct sysdev_class balloon_sysdev_class = {
5911775826cSJeremy Fitzhardinge 	.name = BALLOON_CLASS_NAME,
5921775826cSJeremy Fitzhardinge };
5931775826cSJeremy Fitzhardinge 
5941775826cSJeremy Fitzhardinge static int register_balloon(struct sys_device *sysdev)
5951775826cSJeremy Fitzhardinge {
5961775826cSJeremy Fitzhardinge 	int i, error;
5971775826cSJeremy Fitzhardinge 
5981775826cSJeremy Fitzhardinge 	error = sysdev_class_register(&balloon_sysdev_class);
5991775826cSJeremy Fitzhardinge 	if (error)
6001775826cSJeremy Fitzhardinge 		return error;
6011775826cSJeremy Fitzhardinge 
6021775826cSJeremy Fitzhardinge 	sysdev->id = 0;
6031775826cSJeremy Fitzhardinge 	sysdev->cls = &balloon_sysdev_class;
6041775826cSJeremy Fitzhardinge 
6051775826cSJeremy Fitzhardinge 	error = sysdev_register(sysdev);
6061775826cSJeremy Fitzhardinge 	if (error) {
6071775826cSJeremy Fitzhardinge 		sysdev_class_unregister(&balloon_sysdev_class);
6081775826cSJeremy Fitzhardinge 		return error;
6091775826cSJeremy Fitzhardinge 	}
6101775826cSJeremy Fitzhardinge 
6111775826cSJeremy Fitzhardinge 	for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
6121775826cSJeremy Fitzhardinge 		error = sysdev_create_file(sysdev, balloon_attrs[i]);
6131775826cSJeremy Fitzhardinge 		if (error)
6141775826cSJeremy Fitzhardinge 			goto fail;
6151775826cSJeremy Fitzhardinge 	}
6161775826cSJeremy Fitzhardinge 
6171775826cSJeremy Fitzhardinge 	error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
6181775826cSJeremy Fitzhardinge 	if (error)
6191775826cSJeremy Fitzhardinge 		goto fail;
6201775826cSJeremy Fitzhardinge 
6211775826cSJeremy Fitzhardinge 	return 0;
6221775826cSJeremy Fitzhardinge 
6231775826cSJeremy Fitzhardinge  fail:
6241775826cSJeremy Fitzhardinge 	while (--i >= 0)
6251775826cSJeremy Fitzhardinge 		sysdev_remove_file(sysdev, balloon_attrs[i]);
6261775826cSJeremy Fitzhardinge 	sysdev_unregister(sysdev);
6271775826cSJeremy Fitzhardinge 	sysdev_class_unregister(&balloon_sysdev_class);
6281775826cSJeremy Fitzhardinge 	return error;
6291775826cSJeremy Fitzhardinge }
6301775826cSJeremy Fitzhardinge 
6311775826cSJeremy Fitzhardinge MODULE_LICENSE("GPL");
632