xref: /openbmc/linux/drivers/virtio/virtio_mem.c (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
15f1f79bbSDavid Hildenbrand // SPDX-License-Identifier: GPL-2.0-or-later
25f1f79bbSDavid Hildenbrand /*
35f1f79bbSDavid Hildenbrand  * Virtio-mem device driver.
45f1f79bbSDavid Hildenbrand  *
55f1f79bbSDavid Hildenbrand  * Copyright Red Hat, Inc. 2020
65f1f79bbSDavid Hildenbrand  *
75f1f79bbSDavid Hildenbrand  * Author(s): David Hildenbrand <david@redhat.com>
85f1f79bbSDavid Hildenbrand  */
95f1f79bbSDavid Hildenbrand 
105f1f79bbSDavid Hildenbrand #include <linux/virtio.h>
115f1f79bbSDavid Hildenbrand #include <linux/virtio_mem.h>
125f1f79bbSDavid Hildenbrand #include <linux/workqueue.h>
135f1f79bbSDavid Hildenbrand #include <linux/slab.h>
145f1f79bbSDavid Hildenbrand #include <linux/module.h>
155f1f79bbSDavid Hildenbrand #include <linux/mm.h>
165f1f79bbSDavid Hildenbrand #include <linux/memory_hotplug.h>
175f1f79bbSDavid Hildenbrand #include <linux/memory.h>
185f1f79bbSDavid Hildenbrand #include <linux/hrtimer.h>
195f1f79bbSDavid Hildenbrand #include <linux/crash_dump.h>
205f1f79bbSDavid Hildenbrand #include <linux/mutex.h>
215f1f79bbSDavid Hildenbrand #include <linux/bitmap.h>
225f1f79bbSDavid Hildenbrand #include <linux/lockdep.h>
236639032aSDavid Hildenbrand #include <linux/log2.h>
245f1f79bbSDavid Hildenbrand 
25f2af6d39SDavid Hildenbrand #include <acpi/acpi_numa.h>
26f2af6d39SDavid Hildenbrand 
27255f5985SDavid Hildenbrand static bool unplug_online = true;
28255f5985SDavid Hildenbrand module_param(unplug_online, bool, 0644);
29255f5985SDavid Hildenbrand MODULE_PARM_DESC(unplug_online, "Try to unplug online memory");
30255f5985SDavid Hildenbrand 
31faa45ff4SDavid Hildenbrand static bool force_bbm;
32faa45ff4SDavid Hildenbrand module_param(force_bbm, bool, 0444);
33faa45ff4SDavid Hildenbrand MODULE_PARM_DESC(force_bbm,
34faa45ff4SDavid Hildenbrand 		"Force Big Block Mode. Default is 0 (auto-selection)");
35faa45ff4SDavid Hildenbrand 
36faa45ff4SDavid Hildenbrand static unsigned long bbm_block_size;
37faa45ff4SDavid Hildenbrand module_param(bbm_block_size, ulong, 0444);
38faa45ff4SDavid Hildenbrand MODULE_PARM_DESC(bbm_block_size,
39faa45ff4SDavid Hildenbrand 		 "Big Block size in bytes. Default is 0 (auto-detection).");
40faa45ff4SDavid Hildenbrand 
41d5614944SDavid Hildenbrand /*
42d5614944SDavid Hildenbrand  * virtio-mem currently supports the following modes of operation:
43d5614944SDavid Hildenbrand  *
444ba50cd3SDavid Hildenbrand  * * Sub Block Mode (SBM): A Linux memory block spans 2..X subblocks (SB). The
45d5614944SDavid Hildenbrand  *   size of a Sub Block (SB) is determined based on the device block size, the
46d5614944SDavid Hildenbrand  *   pageblock size, and the maximum allocation granularity of the buddy.
47d5614944SDavid Hildenbrand  *   Subblocks within a Linux memory block might either be plugged or unplugged.
48d5614944SDavid Hildenbrand  *   Memory is added/removed to Linux MM in Linux memory block granularity.
49d5614944SDavid Hildenbrand  *
504ba50cd3SDavid Hildenbrand  * * Big Block Mode (BBM): A Big Block (BB) spans 1..X Linux memory blocks.
514ba50cd3SDavid Hildenbrand  *   Memory is added/removed to Linux MM in Big Block granularity.
524ba50cd3SDavid Hildenbrand  *
534ba50cd3SDavid Hildenbrand  * The mode is determined automatically based on the Linux memory block size
544ba50cd3SDavid Hildenbrand  * and the device block size.
554ba50cd3SDavid Hildenbrand  *
56d5614944SDavid Hildenbrand  * User space / core MM (auto onlining) is responsible for onlining added
57d5614944SDavid Hildenbrand  * Linux memory blocks - and for selecting a zone. Linux Memory Blocks are
58d5614944SDavid Hildenbrand  * always onlined separately, and all memory within a Linux memory block is
59d5614944SDavid Hildenbrand  * onlined to the same zone - virtio-mem relies on this behavior.
60d5614944SDavid Hildenbrand  */
61d5614944SDavid Hildenbrand 
6299f0b55eSDavid Hildenbrand /*
6399f0b55eSDavid Hildenbrand  * State of a Linux memory block in SBM.
6499f0b55eSDavid Hildenbrand  */
6599f0b55eSDavid Hildenbrand enum virtio_mem_sbm_mb_state {
665f1f79bbSDavid Hildenbrand 	/* Unplugged, not added to Linux. Can be reused later. */
6799f0b55eSDavid Hildenbrand 	VIRTIO_MEM_SBM_MB_UNUSED = 0,
685f1f79bbSDavid Hildenbrand 	/* (Partially) plugged, not added to Linux. Error on add_memory(). */
6999f0b55eSDavid Hildenbrand 	VIRTIO_MEM_SBM_MB_PLUGGED,
705f1f79bbSDavid Hildenbrand 	/* Fully plugged, fully added to Linux, offline. */
7199f0b55eSDavid Hildenbrand 	VIRTIO_MEM_SBM_MB_OFFLINE,
725f1f79bbSDavid Hildenbrand 	/* Partially plugged, fully added to Linux, offline. */
7399f0b55eSDavid Hildenbrand 	VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL,
74c740bb97SDavid Hildenbrand 	/* Fully plugged, fully added to Linux, onlined to a kernel zone. */
75c740bb97SDavid Hildenbrand 	VIRTIO_MEM_SBM_MB_KERNEL,
76c740bb97SDavid Hildenbrand 	/* Partially plugged, fully added to Linux, online to a kernel zone */
77c740bb97SDavid Hildenbrand 	VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL,
78c740bb97SDavid Hildenbrand 	/* Fully plugged, fully added to Linux, onlined to ZONE_MOVABLE. */
79c740bb97SDavid Hildenbrand 	VIRTIO_MEM_SBM_MB_MOVABLE,
80c740bb97SDavid Hildenbrand 	/* Partially plugged, fully added to Linux, onlined to ZONE_MOVABLE. */
81c740bb97SDavid Hildenbrand 	VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL,
8299f0b55eSDavid Hildenbrand 	VIRTIO_MEM_SBM_MB_COUNT
835f1f79bbSDavid Hildenbrand };
845f1f79bbSDavid Hildenbrand 
854ba50cd3SDavid Hildenbrand /*
864ba50cd3SDavid Hildenbrand  * State of a Big Block (BB) in BBM, covering 1..X Linux memory blocks.
874ba50cd3SDavid Hildenbrand  */
884ba50cd3SDavid Hildenbrand enum virtio_mem_bbm_bb_state {
894ba50cd3SDavid Hildenbrand 	/* Unplugged, not added to Linux. Can be reused later. */
904ba50cd3SDavid Hildenbrand 	VIRTIO_MEM_BBM_BB_UNUSED = 0,
914ba50cd3SDavid Hildenbrand 	/* Plugged, not added to Linux. Error on add_memory(). */
924ba50cd3SDavid Hildenbrand 	VIRTIO_MEM_BBM_BB_PLUGGED,
934ba50cd3SDavid Hildenbrand 	/* Plugged and added to Linux. */
944ba50cd3SDavid Hildenbrand 	VIRTIO_MEM_BBM_BB_ADDED,
953711387aSDavid Hildenbrand 	/* All online parts are fake-offline, ready to remove. */
963711387aSDavid Hildenbrand 	VIRTIO_MEM_BBM_BB_FAKE_OFFLINE,
974ba50cd3SDavid Hildenbrand 	VIRTIO_MEM_BBM_BB_COUNT
984ba50cd3SDavid Hildenbrand };
994ba50cd3SDavid Hildenbrand 
1005f1f79bbSDavid Hildenbrand struct virtio_mem {
1015f1f79bbSDavid Hildenbrand 	struct virtio_device *vdev;
1025f1f79bbSDavid Hildenbrand 
1035f1f79bbSDavid Hildenbrand 	/* We might first have to unplug all memory when starting up. */
1045f1f79bbSDavid Hildenbrand 	bool unplug_all_required;
1055f1f79bbSDavid Hildenbrand 
1065f1f79bbSDavid Hildenbrand 	/* Workqueue that processes the plug/unplug requests. */
1075f1f79bbSDavid Hildenbrand 	struct work_struct wq;
10898ff9f94SDavid Hildenbrand 	atomic_t wq_active;
1095f1f79bbSDavid Hildenbrand 	atomic_t config_changed;
1105f1f79bbSDavid Hildenbrand 
1115f1f79bbSDavid Hildenbrand 	/* Virtqueue for guest->host requests. */
1125f1f79bbSDavid Hildenbrand 	struct virtqueue *vq;
1135f1f79bbSDavid Hildenbrand 
1145f1f79bbSDavid Hildenbrand 	/* Wait for a host response to a guest request. */
1155f1f79bbSDavid Hildenbrand 	wait_queue_head_t host_resp;
1165f1f79bbSDavid Hildenbrand 
1175f1f79bbSDavid Hildenbrand 	/* Space for one guest request and the host response. */
1185f1f79bbSDavid Hildenbrand 	struct virtio_mem_req req;
1195f1f79bbSDavid Hildenbrand 	struct virtio_mem_resp resp;
1205f1f79bbSDavid Hildenbrand 
1215f1f79bbSDavid Hildenbrand 	/* The current size of the device. */
1225f1f79bbSDavid Hildenbrand 	uint64_t plugged_size;
1235f1f79bbSDavid Hildenbrand 	/* The requested size of the device. */
1245f1f79bbSDavid Hildenbrand 	uint64_t requested_size;
1255f1f79bbSDavid Hildenbrand 
1265f1f79bbSDavid Hildenbrand 	/* The device block size (for communicating with the device). */
127544fc7dbSMichael S. Tsirkin 	uint64_t device_block_size;
1286725f211SDavid Hildenbrand 	/* The determined node id for all memory of the device. */
129f2af6d39SDavid Hildenbrand 	int nid;
1305f1f79bbSDavid Hildenbrand 	/* Physical start address of the memory region. */
1315f1f79bbSDavid Hildenbrand 	uint64_t addr;
1325f1f79bbSDavid Hildenbrand 	/* Maximum region size in bytes. */
1335f1f79bbSDavid Hildenbrand 	uint64_t region_size;
1345f1f79bbSDavid Hildenbrand 
135ebf71552SDavid Hildenbrand 	/* The parent resource for all memory added via this device. */
136ebf71552SDavid Hildenbrand 	struct resource *parent_resource;
137b3562c60SDavid Hildenbrand 	/*
138b3562c60SDavid Hildenbrand 	 * Copy of "System RAM (virtio_mem)" to be used for
139b3562c60SDavid Hildenbrand 	 * add_memory_driver_managed().
140b3562c60SDavid Hildenbrand 	 */
141b3562c60SDavid Hildenbrand 	const char *resource_name;
142ffaa6ce8SDavid Hildenbrand 	/* Memory group identification. */
143ffaa6ce8SDavid Hildenbrand 	int mgid;
144ebf71552SDavid Hildenbrand 
14598ff9f94SDavid Hildenbrand 	/*
14698ff9f94SDavid Hildenbrand 	 * We don't want to add too much memory if it's not getting onlined,
14798ff9f94SDavid Hildenbrand 	 * to avoid running OOM. Besides this threshold, we allow to have at
14898ff9f94SDavid Hildenbrand 	 * least two offline blocks at a time (whatever is bigger).
14998ff9f94SDavid Hildenbrand 	 */
15098ff9f94SDavid Hildenbrand #define VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD		(1024 * 1024 * 1024)
15198ff9f94SDavid Hildenbrand 	atomic64_t offline_size;
15298ff9f94SDavid Hildenbrand 	uint64_t offline_threshold;
1535f1f79bbSDavid Hildenbrand 
1544ba50cd3SDavid Hildenbrand 	/* If set, the driver is in SBM, otherwise in BBM. */
1554ba50cd3SDavid Hildenbrand 	bool in_sbm;
1564ba50cd3SDavid Hildenbrand 
1574ba50cd3SDavid Hildenbrand 	union {
15899f0b55eSDavid Hildenbrand 		struct {
1598a6f082bSDavid Hildenbrand 			/* Id of the first memory block of this device. */
1608a6f082bSDavid Hildenbrand 			unsigned long first_mb_id;
1618a6f082bSDavid Hildenbrand 			/* Id of the last usable memory block of this device. */
1628a6f082bSDavid Hildenbrand 			unsigned long last_usable_mb_id;
1638a6f082bSDavid Hildenbrand 			/* Id of the next memory bock to prepare when needed. */
1648a6f082bSDavid Hildenbrand 			unsigned long next_mb_id;
1658a6f082bSDavid Hildenbrand 
166905c4c51SDavid Hildenbrand 			/* The subblock size. */
167905c4c51SDavid Hildenbrand 			uint64_t sb_size;
168905c4c51SDavid Hildenbrand 			/* The number of subblocks per Linux memory block. */
169905c4c51SDavid Hildenbrand 			uint32_t sbs_per_mb;
170905c4c51SDavid Hildenbrand 
171a31648fdSDavid Hildenbrand 			/*
172a31648fdSDavid Hildenbrand 			 * Some of the Linux memory blocks tracked as "partially
173a31648fdSDavid Hildenbrand 			 * plugged" are completely unplugged and can be offlined
174a31648fdSDavid Hildenbrand 			 * and removed -- which previously failed.
175a31648fdSDavid Hildenbrand 			 */
176a31648fdSDavid Hildenbrand 			bool have_unplugged_mb;
177a31648fdSDavid Hildenbrand 
17899f0b55eSDavid Hildenbrand 			/* Summary of all memory block states. */
17999f0b55eSDavid Hildenbrand 			unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT];
1805f1f79bbSDavid Hildenbrand 
1815f1f79bbSDavid Hildenbrand 			/*
1824ba50cd3SDavid Hildenbrand 			 * One byte state per memory block. Allocated via
1834ba50cd3SDavid Hildenbrand 			 * vmalloc(). Resized (alloc+copy+free) on demand.
18499f0b55eSDavid Hildenbrand 			 *
1854ba50cd3SDavid Hildenbrand 			 * With 128 MiB memory blocks, we have states for 512
1864ba50cd3SDavid Hildenbrand 			 * GiB of memory in one 4 KiB page.
18799f0b55eSDavid Hildenbrand 			 */
18899f0b55eSDavid Hildenbrand 			uint8_t *mb_states;
18954c6a6baSDavid Hildenbrand 
19054c6a6baSDavid Hildenbrand 			/*
19154c6a6baSDavid Hildenbrand 			 * Bitmap: one bit per subblock. Allocated similar to
19254c6a6baSDavid Hildenbrand 			 * sbm.mb_states.
19354c6a6baSDavid Hildenbrand 			 *
1944ba50cd3SDavid Hildenbrand 			 * A set bit means the corresponding subblock is
1954ba50cd3SDavid Hildenbrand 			 * plugged, otherwise it's unblocked.
19654c6a6baSDavid Hildenbrand 			 *
1974ba50cd3SDavid Hildenbrand 			 * With 4 MiB subblocks, we manage 128 GiB of memory
1984ba50cd3SDavid Hildenbrand 			 * in one 4 KiB page.
19954c6a6baSDavid Hildenbrand 			 */
20054c6a6baSDavid Hildenbrand 			unsigned long *sb_states;
20199f0b55eSDavid Hildenbrand 		} sbm;
20299f0b55eSDavid Hildenbrand 
2034ba50cd3SDavid Hildenbrand 		struct {
2044ba50cd3SDavid Hildenbrand 			/* Id of the first big block of this device. */
2054ba50cd3SDavid Hildenbrand 			unsigned long first_bb_id;
2064ba50cd3SDavid Hildenbrand 			/* Id of the last usable big block of this device. */
2074ba50cd3SDavid Hildenbrand 			unsigned long last_usable_bb_id;
2084ba50cd3SDavid Hildenbrand 			/* Id of the next device bock to prepare when needed. */
2094ba50cd3SDavid Hildenbrand 			unsigned long next_bb_id;
2104ba50cd3SDavid Hildenbrand 
2114ba50cd3SDavid Hildenbrand 			/* Summary of all big block states. */
2124ba50cd3SDavid Hildenbrand 			unsigned long bb_count[VIRTIO_MEM_BBM_BB_COUNT];
2134ba50cd3SDavid Hildenbrand 
2144ba50cd3SDavid Hildenbrand 			/* One byte state per big block. See sbm.mb_states. */
2154ba50cd3SDavid Hildenbrand 			uint8_t *bb_states;
2164ba50cd3SDavid Hildenbrand 
2174ba50cd3SDavid Hildenbrand 			/* The block size used for plugging/adding/removing. */
2184ba50cd3SDavid Hildenbrand 			uint64_t bb_size;
2194ba50cd3SDavid Hildenbrand 		} bbm;
2204ba50cd3SDavid Hildenbrand 	};
2214ba50cd3SDavid Hildenbrand 
22299f0b55eSDavid Hildenbrand 	/*
2234ba50cd3SDavid Hildenbrand 	 * Mutex that protects the sbm.mb_count, sbm.mb_states,
2244ba50cd3SDavid Hildenbrand 	 * sbm.sb_states, bbm.bb_count, and bbm.bb_states
2255f1f79bbSDavid Hildenbrand 	 *
2265f1f79bbSDavid Hildenbrand 	 * When this lock is held the pointers can't change, ONLINE and
2275f1f79bbSDavid Hildenbrand 	 * OFFLINE blocks can't change the state and no subblocks will get
228c627ff5dSDavid Hildenbrand 	 * plugged/unplugged.
229ce281462SDavid Hildenbrand 	 *
230ce281462SDavid Hildenbrand 	 * In kdump mode, used to serialize requests, last_block_addr and
231ce281462SDavid Hildenbrand 	 * last_block_plugged.
2325f1f79bbSDavid Hildenbrand 	 */
2335f1f79bbSDavid Hildenbrand 	struct mutex hotplug_mutex;
2345f1f79bbSDavid Hildenbrand 	bool hotplug_active;
2355f1f79bbSDavid Hildenbrand 
2365f1f79bbSDavid Hildenbrand 	/* An error occurred we cannot handle - stop processing requests. */
2375f1f79bbSDavid Hildenbrand 	bool broken;
2385f1f79bbSDavid Hildenbrand 
239ce281462SDavid Hildenbrand 	/* Cached valued of is_kdump_kernel() when the device was probed. */
240ce281462SDavid Hildenbrand 	bool in_kdump;
241ce281462SDavid Hildenbrand 
2425f1f79bbSDavid Hildenbrand 	/* The driver is being removed. */
2435f1f79bbSDavid Hildenbrand 	spinlock_t removal_lock;
2445f1f79bbSDavid Hildenbrand 	bool removing;
2455f1f79bbSDavid Hildenbrand 
2465f1f79bbSDavid Hildenbrand 	/* Timer for retrying to plug/unplug memory. */
2475f1f79bbSDavid Hildenbrand 	struct hrtimer retry_timer;
24823e77b5dSDavid Hildenbrand 	unsigned int retry_timer_ms;
24923e77b5dSDavid Hildenbrand #define VIRTIO_MEM_RETRY_TIMER_MIN_MS		50000
25023e77b5dSDavid Hildenbrand #define VIRTIO_MEM_RETRY_TIMER_MAX_MS		300000
2515f1f79bbSDavid Hildenbrand 
2525f1f79bbSDavid Hildenbrand 	/* Memory notifier (online/offline events). */
2535f1f79bbSDavid Hildenbrand 	struct notifier_block memory_notifier;
2545f1f79bbSDavid Hildenbrand 
255ce281462SDavid Hildenbrand #ifdef CONFIG_PROC_VMCORE
256ce281462SDavid Hildenbrand 	/* vmcore callback for /proc/vmcore handling in kdump mode */
257ce281462SDavid Hildenbrand 	struct vmcore_cb vmcore_cb;
258ce281462SDavid Hildenbrand 	uint64_t last_block_addr;
259ce281462SDavid Hildenbrand 	bool last_block_plugged;
260ce281462SDavid Hildenbrand #endif /* CONFIG_PROC_VMCORE */
261ce281462SDavid Hildenbrand 
2625f1f79bbSDavid Hildenbrand 	/* Next device in the list of virtio-mem devices. */
2635f1f79bbSDavid Hildenbrand 	struct list_head next;
2645f1f79bbSDavid Hildenbrand };
2655f1f79bbSDavid Hildenbrand 
2665f1f79bbSDavid Hildenbrand /*
2675f1f79bbSDavid Hildenbrand  * We have to share a single online_page callback among all virtio-mem
2685f1f79bbSDavid Hildenbrand  * devices. We use RCU to iterate the list in the callback.
2695f1f79bbSDavid Hildenbrand  */
2705f1f79bbSDavid Hildenbrand static DEFINE_MUTEX(virtio_mem_mutex);
2715f1f79bbSDavid Hildenbrand static LIST_HEAD(virtio_mem_devices);
2725f1f79bbSDavid Hildenbrand 
2735f1f79bbSDavid Hildenbrand static void virtio_mem_online_page_cb(struct page *page, unsigned int order);
2747a34c77dSDavid Hildenbrand static void virtio_mem_fake_offline_going_offline(unsigned long pfn,
2757a34c77dSDavid Hildenbrand 						  unsigned long nr_pages);
2767a34c77dSDavid Hildenbrand static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn,
2777a34c77dSDavid Hildenbrand 						   unsigned long nr_pages);
2781d33c2caSDavid Hildenbrand static void virtio_mem_retry(struct virtio_mem *vm);
27984e17e68SDavid Hildenbrand static int virtio_mem_create_resource(struct virtio_mem *vm);
28084e17e68SDavid Hildenbrand static void virtio_mem_delete_resource(struct virtio_mem *vm);
2815f1f79bbSDavid Hildenbrand 
2825f1f79bbSDavid Hildenbrand /*
2835f1f79bbSDavid Hildenbrand  * Register a virtio-mem device so it will be considered for the online_page
2845f1f79bbSDavid Hildenbrand  * callback.
2855f1f79bbSDavid Hildenbrand  */
register_virtio_mem_device(struct virtio_mem * vm)2865f1f79bbSDavid Hildenbrand static int register_virtio_mem_device(struct virtio_mem *vm)
2875f1f79bbSDavid Hildenbrand {
2885f1f79bbSDavid Hildenbrand 	int rc = 0;
2895f1f79bbSDavid Hildenbrand 
2905f1f79bbSDavid Hildenbrand 	/* First device registers the callback. */
2915f1f79bbSDavid Hildenbrand 	mutex_lock(&virtio_mem_mutex);
2925f1f79bbSDavid Hildenbrand 	if (list_empty(&virtio_mem_devices))
2935f1f79bbSDavid Hildenbrand 		rc = set_online_page_callback(&virtio_mem_online_page_cb);
2945f1f79bbSDavid Hildenbrand 	if (!rc)
2955f1f79bbSDavid Hildenbrand 		list_add_rcu(&vm->next, &virtio_mem_devices);
2965f1f79bbSDavid Hildenbrand 	mutex_unlock(&virtio_mem_mutex);
2975f1f79bbSDavid Hildenbrand 
2985f1f79bbSDavid Hildenbrand 	return rc;
2995f1f79bbSDavid Hildenbrand }
3005f1f79bbSDavid Hildenbrand 
3015f1f79bbSDavid Hildenbrand /*
3025f1f79bbSDavid Hildenbrand  * Unregister a virtio-mem device so it will no longer be considered for the
3035f1f79bbSDavid Hildenbrand  * online_page callback.
3045f1f79bbSDavid Hildenbrand  */
unregister_virtio_mem_device(struct virtio_mem * vm)3055f1f79bbSDavid Hildenbrand static void unregister_virtio_mem_device(struct virtio_mem *vm)
3065f1f79bbSDavid Hildenbrand {
3075f1f79bbSDavid Hildenbrand 	/* Last device unregisters the callback. */
3085f1f79bbSDavid Hildenbrand 	mutex_lock(&virtio_mem_mutex);
3095f1f79bbSDavid Hildenbrand 	list_del_rcu(&vm->next);
3105f1f79bbSDavid Hildenbrand 	if (list_empty(&virtio_mem_devices))
3115f1f79bbSDavid Hildenbrand 		restore_online_page_callback(&virtio_mem_online_page_cb);
3125f1f79bbSDavid Hildenbrand 	mutex_unlock(&virtio_mem_mutex);
3135f1f79bbSDavid Hildenbrand 
3145f1f79bbSDavid Hildenbrand 	synchronize_rcu();
3155f1f79bbSDavid Hildenbrand }
3165f1f79bbSDavid Hildenbrand 
3175f1f79bbSDavid Hildenbrand /*
3185f1f79bbSDavid Hildenbrand  * Calculate the memory block id of a given address.
3195f1f79bbSDavid Hildenbrand  */
virtio_mem_phys_to_mb_id(unsigned long addr)3205f1f79bbSDavid Hildenbrand static unsigned long virtio_mem_phys_to_mb_id(unsigned long addr)
3215f1f79bbSDavid Hildenbrand {
3225f1f79bbSDavid Hildenbrand 	return addr / memory_block_size_bytes();
3235f1f79bbSDavid Hildenbrand }
3245f1f79bbSDavid Hildenbrand 
3255f1f79bbSDavid Hildenbrand /*
3265f1f79bbSDavid Hildenbrand  * Calculate the physical start address of a given memory block id.
3275f1f79bbSDavid Hildenbrand  */
virtio_mem_mb_id_to_phys(unsigned long mb_id)3285f1f79bbSDavid Hildenbrand static unsigned long virtio_mem_mb_id_to_phys(unsigned long mb_id)
3295f1f79bbSDavid Hildenbrand {
3305f1f79bbSDavid Hildenbrand 	return mb_id * memory_block_size_bytes();
3315f1f79bbSDavid Hildenbrand }
3325f1f79bbSDavid Hildenbrand 
3335f1f79bbSDavid Hildenbrand /*
3344ba50cd3SDavid Hildenbrand  * Calculate the big block id of a given address.
3354ba50cd3SDavid Hildenbrand  */
virtio_mem_phys_to_bb_id(struct virtio_mem * vm,uint64_t addr)3364ba50cd3SDavid Hildenbrand static unsigned long virtio_mem_phys_to_bb_id(struct virtio_mem *vm,
3374ba50cd3SDavid Hildenbrand 					      uint64_t addr)
3384ba50cd3SDavid Hildenbrand {
3394ba50cd3SDavid Hildenbrand 	return addr / vm->bbm.bb_size;
3404ba50cd3SDavid Hildenbrand }
3414ba50cd3SDavid Hildenbrand 
3424ba50cd3SDavid Hildenbrand /*
3434ba50cd3SDavid Hildenbrand  * Calculate the physical start address of a given big block id.
3444ba50cd3SDavid Hildenbrand  */
virtio_mem_bb_id_to_phys(struct virtio_mem * vm,unsigned long bb_id)3454ba50cd3SDavid Hildenbrand static uint64_t virtio_mem_bb_id_to_phys(struct virtio_mem *vm,
3464ba50cd3SDavid Hildenbrand 					 unsigned long bb_id)
3474ba50cd3SDavid Hildenbrand {
3484ba50cd3SDavid Hildenbrand 	return bb_id * vm->bbm.bb_size;
3494ba50cd3SDavid Hildenbrand }
3504ba50cd3SDavid Hildenbrand 
3514ba50cd3SDavid Hildenbrand /*
3525f1f79bbSDavid Hildenbrand  * Calculate the subblock id of a given address.
3535f1f79bbSDavid Hildenbrand  */
virtio_mem_phys_to_sb_id(struct virtio_mem * vm,unsigned long addr)3545f1f79bbSDavid Hildenbrand static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm,
3555f1f79bbSDavid Hildenbrand 					      unsigned long addr)
3565f1f79bbSDavid Hildenbrand {
3575f1f79bbSDavid Hildenbrand 	const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr);
3585f1f79bbSDavid Hildenbrand 	const unsigned long mb_addr = virtio_mem_mb_id_to_phys(mb_id);
3595f1f79bbSDavid Hildenbrand 
360905c4c51SDavid Hildenbrand 	return (addr - mb_addr) / vm->sbm.sb_size;
3615f1f79bbSDavid Hildenbrand }
3625f1f79bbSDavid Hildenbrand 
3635f1f79bbSDavid Hildenbrand /*
3644ba50cd3SDavid Hildenbrand  * Set the state of a big block, taking care of the state counter.
3654ba50cd3SDavid Hildenbrand  */
virtio_mem_bbm_set_bb_state(struct virtio_mem * vm,unsigned long bb_id,enum virtio_mem_bbm_bb_state state)3664ba50cd3SDavid Hildenbrand static void virtio_mem_bbm_set_bb_state(struct virtio_mem *vm,
3674ba50cd3SDavid Hildenbrand 					unsigned long bb_id,
3684ba50cd3SDavid Hildenbrand 					enum virtio_mem_bbm_bb_state state)
3694ba50cd3SDavid Hildenbrand {
3704ba50cd3SDavid Hildenbrand 	const unsigned long idx = bb_id - vm->bbm.first_bb_id;
3714ba50cd3SDavid Hildenbrand 	enum virtio_mem_bbm_bb_state old_state;
3724ba50cd3SDavid Hildenbrand 
3734ba50cd3SDavid Hildenbrand 	old_state = vm->bbm.bb_states[idx];
3744ba50cd3SDavid Hildenbrand 	vm->bbm.bb_states[idx] = state;
3754ba50cd3SDavid Hildenbrand 
3764ba50cd3SDavid Hildenbrand 	BUG_ON(vm->bbm.bb_count[old_state] == 0);
3774ba50cd3SDavid Hildenbrand 	vm->bbm.bb_count[old_state]--;
3784ba50cd3SDavid Hildenbrand 	vm->bbm.bb_count[state]++;
3794ba50cd3SDavid Hildenbrand }
3804ba50cd3SDavid Hildenbrand 
3814ba50cd3SDavid Hildenbrand /*
3824ba50cd3SDavid Hildenbrand  * Get the state of a big block.
3834ba50cd3SDavid Hildenbrand  */
virtio_mem_bbm_get_bb_state(struct virtio_mem * vm,unsigned long bb_id)3844ba50cd3SDavid Hildenbrand static enum virtio_mem_bbm_bb_state virtio_mem_bbm_get_bb_state(struct virtio_mem *vm,
3854ba50cd3SDavid Hildenbrand 								unsigned long bb_id)
3864ba50cd3SDavid Hildenbrand {
3874ba50cd3SDavid Hildenbrand 	return vm->bbm.bb_states[bb_id - vm->bbm.first_bb_id];
3884ba50cd3SDavid Hildenbrand }
3894ba50cd3SDavid Hildenbrand 
3904ba50cd3SDavid Hildenbrand /*
3914ba50cd3SDavid Hildenbrand  * Prepare the big block state array for the next big block.
3924ba50cd3SDavid Hildenbrand  */
virtio_mem_bbm_bb_states_prepare_next_bb(struct virtio_mem * vm)3934ba50cd3SDavid Hildenbrand static int virtio_mem_bbm_bb_states_prepare_next_bb(struct virtio_mem *vm)
3944ba50cd3SDavid Hildenbrand {
3954ba50cd3SDavid Hildenbrand 	unsigned long old_bytes = vm->bbm.next_bb_id - vm->bbm.first_bb_id;
3964ba50cd3SDavid Hildenbrand 	unsigned long new_bytes = old_bytes + 1;
3974ba50cd3SDavid Hildenbrand 	int old_pages = PFN_UP(old_bytes);
3984ba50cd3SDavid Hildenbrand 	int new_pages = PFN_UP(new_bytes);
3994ba50cd3SDavid Hildenbrand 	uint8_t *new_array;
4004ba50cd3SDavid Hildenbrand 
4014ba50cd3SDavid Hildenbrand 	if (vm->bbm.bb_states && old_pages == new_pages)
4024ba50cd3SDavid Hildenbrand 		return 0;
4034ba50cd3SDavid Hildenbrand 
4044ba50cd3SDavid Hildenbrand 	new_array = vzalloc(new_pages * PAGE_SIZE);
4054ba50cd3SDavid Hildenbrand 	if (!new_array)
4064ba50cd3SDavid Hildenbrand 		return -ENOMEM;
4074ba50cd3SDavid Hildenbrand 
4084ba50cd3SDavid Hildenbrand 	mutex_lock(&vm->hotplug_mutex);
4094ba50cd3SDavid Hildenbrand 	if (vm->bbm.bb_states)
4104ba50cd3SDavid Hildenbrand 		memcpy(new_array, vm->bbm.bb_states, old_pages * PAGE_SIZE);
4114ba50cd3SDavid Hildenbrand 	vfree(vm->bbm.bb_states);
4124ba50cd3SDavid Hildenbrand 	vm->bbm.bb_states = new_array;
4134ba50cd3SDavid Hildenbrand 	mutex_unlock(&vm->hotplug_mutex);
4144ba50cd3SDavid Hildenbrand 
4154ba50cd3SDavid Hildenbrand 	return 0;
4164ba50cd3SDavid Hildenbrand }
4174ba50cd3SDavid Hildenbrand 
4184ba50cd3SDavid Hildenbrand #define virtio_mem_bbm_for_each_bb(_vm, _bb_id, _state) \
4194ba50cd3SDavid Hildenbrand 	for (_bb_id = vm->bbm.first_bb_id; \
4204ba50cd3SDavid Hildenbrand 	     _bb_id < vm->bbm.next_bb_id && _vm->bbm.bb_count[_state]; \
4214ba50cd3SDavid Hildenbrand 	     _bb_id++) \
4224ba50cd3SDavid Hildenbrand 		if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state)
4234ba50cd3SDavid Hildenbrand 
424269ac938SDavid Hildenbrand #define virtio_mem_bbm_for_each_bb_rev(_vm, _bb_id, _state) \
425269ac938SDavid Hildenbrand 	for (_bb_id = vm->bbm.next_bb_id - 1; \
426269ac938SDavid Hildenbrand 	     _bb_id >= vm->bbm.first_bb_id && _vm->bbm.bb_count[_state]; \
427269ac938SDavid Hildenbrand 	     _bb_id--) \
428269ac938SDavid Hildenbrand 		if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state)
429269ac938SDavid Hildenbrand 
4304ba50cd3SDavid Hildenbrand /*
4315f1f79bbSDavid Hildenbrand  * Set the state of a memory block, taking care of the state counter.
4325f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_set_mb_state(struct virtio_mem * vm,unsigned long mb_id,uint8_t state)43399f0b55eSDavid Hildenbrand static void virtio_mem_sbm_set_mb_state(struct virtio_mem *vm,
43499f0b55eSDavid Hildenbrand 					unsigned long mb_id, uint8_t state)
4355f1f79bbSDavid Hildenbrand {
4368a6f082bSDavid Hildenbrand 	const unsigned long idx = mb_id - vm->sbm.first_mb_id;
43799f0b55eSDavid Hildenbrand 	uint8_t old_state;
4385f1f79bbSDavid Hildenbrand 
43999f0b55eSDavid Hildenbrand 	old_state = vm->sbm.mb_states[idx];
44099f0b55eSDavid Hildenbrand 	vm->sbm.mb_states[idx] = state;
4415f1f79bbSDavid Hildenbrand 
44299f0b55eSDavid Hildenbrand 	BUG_ON(vm->sbm.mb_count[old_state] == 0);
44399f0b55eSDavid Hildenbrand 	vm->sbm.mb_count[old_state]--;
44499f0b55eSDavid Hildenbrand 	vm->sbm.mb_count[state]++;
4455f1f79bbSDavid Hildenbrand }
4465f1f79bbSDavid Hildenbrand 
4475f1f79bbSDavid Hildenbrand /*
4485f1f79bbSDavid Hildenbrand  * Get the state of a memory block.
4495f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_get_mb_state(struct virtio_mem * vm,unsigned long mb_id)45099f0b55eSDavid Hildenbrand static uint8_t virtio_mem_sbm_get_mb_state(struct virtio_mem *vm,
4515f1f79bbSDavid Hildenbrand 					   unsigned long mb_id)
4525f1f79bbSDavid Hildenbrand {
4538a6f082bSDavid Hildenbrand 	const unsigned long idx = mb_id - vm->sbm.first_mb_id;
4545f1f79bbSDavid Hildenbrand 
45599f0b55eSDavid Hildenbrand 	return vm->sbm.mb_states[idx];
4565f1f79bbSDavid Hildenbrand }
4575f1f79bbSDavid Hildenbrand 
4585f1f79bbSDavid Hildenbrand /*
4595f1f79bbSDavid Hildenbrand  * Prepare the state array for the next memory block.
4605f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem * vm)46199f0b55eSDavid Hildenbrand static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm)
4625f1f79bbSDavid Hildenbrand {
4638a6f082bSDavid Hildenbrand 	int old_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id);
4648a6f082bSDavid Hildenbrand 	int new_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id + 1);
46599f0b55eSDavid Hildenbrand 	uint8_t *new_array;
4665f1f79bbSDavid Hildenbrand 
46799f0b55eSDavid Hildenbrand 	if (vm->sbm.mb_states && old_pages == new_pages)
4685f1f79bbSDavid Hildenbrand 		return 0;
4695f1f79bbSDavid Hildenbrand 
47099f0b55eSDavid Hildenbrand 	new_array = vzalloc(new_pages * PAGE_SIZE);
47199f0b55eSDavid Hildenbrand 	if (!new_array)
4725f1f79bbSDavid Hildenbrand 		return -ENOMEM;
4735f1f79bbSDavid Hildenbrand 
4745f1f79bbSDavid Hildenbrand 	mutex_lock(&vm->hotplug_mutex);
47599f0b55eSDavid Hildenbrand 	if (vm->sbm.mb_states)
47699f0b55eSDavid Hildenbrand 		memcpy(new_array, vm->sbm.mb_states, old_pages * PAGE_SIZE);
47799f0b55eSDavid Hildenbrand 	vfree(vm->sbm.mb_states);
47899f0b55eSDavid Hildenbrand 	vm->sbm.mb_states = new_array;
4795f1f79bbSDavid Hildenbrand 	mutex_unlock(&vm->hotplug_mutex);
4805f1f79bbSDavid Hildenbrand 
4815f1f79bbSDavid Hildenbrand 	return 0;
4825f1f79bbSDavid Hildenbrand }
4835f1f79bbSDavid Hildenbrand 
48499f0b55eSDavid Hildenbrand #define virtio_mem_sbm_for_each_mb(_vm, _mb_id, _state) \
4858a6f082bSDavid Hildenbrand 	for (_mb_id = _vm->sbm.first_mb_id; \
4868a6f082bSDavid Hildenbrand 	     _mb_id < _vm->sbm.next_mb_id && _vm->sbm.mb_count[_state]; \
4875f1f79bbSDavid Hildenbrand 	     _mb_id++) \
48899f0b55eSDavid Hildenbrand 		if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state)
4895f1f79bbSDavid Hildenbrand 
49099f0b55eSDavid Hildenbrand #define virtio_mem_sbm_for_each_mb_rev(_vm, _mb_id, _state) \
4918a6f082bSDavid Hildenbrand 	for (_mb_id = _vm->sbm.next_mb_id - 1; \
4928a6f082bSDavid Hildenbrand 	     _mb_id >= _vm->sbm.first_mb_id && _vm->sbm.mb_count[_state]; \
493c627ff5dSDavid Hildenbrand 	     _mb_id--) \
49499f0b55eSDavid Hildenbrand 		if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state)
495c627ff5dSDavid Hildenbrand 
4965f1f79bbSDavid Hildenbrand /*
49741e6215cSDavid Hildenbrand  * Calculate the bit number in the subblock bitmap for the given subblock
49841e6215cSDavid Hildenbrand  * inside the given memory block.
49941e6215cSDavid Hildenbrand  */
virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem * vm,unsigned long mb_id,int sb_id)50054c6a6baSDavid Hildenbrand static int virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem *vm,
50141e6215cSDavid Hildenbrand 					  unsigned long mb_id, int sb_id)
50241e6215cSDavid Hildenbrand {
5038a6f082bSDavid Hildenbrand 	return (mb_id - vm->sbm.first_mb_id) * vm->sbm.sbs_per_mb + sb_id;
50441e6215cSDavid Hildenbrand }
50541e6215cSDavid Hildenbrand 
50641e6215cSDavid Hildenbrand /*
5075f1f79bbSDavid Hildenbrand  * Mark all selected subblocks plugged.
5085f1f79bbSDavid Hildenbrand  *
5095f1f79bbSDavid Hildenbrand  * Will not modify the state of the memory block.
5105f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_set_sb_plugged(struct virtio_mem * vm,unsigned long mb_id,int sb_id,int count)51154c6a6baSDavid Hildenbrand static void virtio_mem_sbm_set_sb_plugged(struct virtio_mem *vm,
5125f1f79bbSDavid Hildenbrand 					  unsigned long mb_id, int sb_id,
5135f1f79bbSDavid Hildenbrand 					  int count)
5145f1f79bbSDavid Hildenbrand {
51554c6a6baSDavid Hildenbrand 	const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
5165f1f79bbSDavid Hildenbrand 
51754c6a6baSDavid Hildenbrand 	__bitmap_set(vm->sbm.sb_states, bit, count);
5185f1f79bbSDavid Hildenbrand }
5195f1f79bbSDavid Hildenbrand 
5205f1f79bbSDavid Hildenbrand /*
5215f1f79bbSDavid Hildenbrand  * Mark all selected subblocks unplugged.
5225f1f79bbSDavid Hildenbrand  *
5235f1f79bbSDavid Hildenbrand  * Will not modify the state of the memory block.
5245f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_set_sb_unplugged(struct virtio_mem * vm,unsigned long mb_id,int sb_id,int count)52554c6a6baSDavid Hildenbrand static void virtio_mem_sbm_set_sb_unplugged(struct virtio_mem *vm,
5265f1f79bbSDavid Hildenbrand 					    unsigned long mb_id, int sb_id,
5275f1f79bbSDavid Hildenbrand 					    int count)
5285f1f79bbSDavid Hildenbrand {
52954c6a6baSDavid Hildenbrand 	const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
5305f1f79bbSDavid Hildenbrand 
53154c6a6baSDavid Hildenbrand 	__bitmap_clear(vm->sbm.sb_states, bit, count);
5325f1f79bbSDavid Hildenbrand }
5335f1f79bbSDavid Hildenbrand 
5345f1f79bbSDavid Hildenbrand /*
5355f1f79bbSDavid Hildenbrand  * Test if all selected subblocks are plugged.
5365f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_test_sb_plugged(struct virtio_mem * vm,unsigned long mb_id,int sb_id,int count)53754c6a6baSDavid Hildenbrand static bool virtio_mem_sbm_test_sb_plugged(struct virtio_mem *vm,
5385f1f79bbSDavid Hildenbrand 					   unsigned long mb_id, int sb_id,
5395f1f79bbSDavid Hildenbrand 					   int count)
5405f1f79bbSDavid Hildenbrand {
54154c6a6baSDavid Hildenbrand 	const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
5425f1f79bbSDavid Hildenbrand 
5435f1f79bbSDavid Hildenbrand 	if (count == 1)
54454c6a6baSDavid Hildenbrand 		return test_bit(bit, vm->sbm.sb_states);
5455f1f79bbSDavid Hildenbrand 
5465f1f79bbSDavid Hildenbrand 	/* TODO: Helper similar to bitmap_set() */
54754c6a6baSDavid Hildenbrand 	return find_next_zero_bit(vm->sbm.sb_states, bit + count, bit) >=
5485f1f79bbSDavid Hildenbrand 	       bit + count;
5495f1f79bbSDavid Hildenbrand }
5505f1f79bbSDavid Hildenbrand 
5515f1f79bbSDavid Hildenbrand /*
552c627ff5dSDavid Hildenbrand  * Test if all selected subblocks are unplugged.
553c627ff5dSDavid Hildenbrand  */
virtio_mem_sbm_test_sb_unplugged(struct virtio_mem * vm,unsigned long mb_id,int sb_id,int count)55454c6a6baSDavid Hildenbrand static bool virtio_mem_sbm_test_sb_unplugged(struct virtio_mem *vm,
555c627ff5dSDavid Hildenbrand 					     unsigned long mb_id, int sb_id,
556c627ff5dSDavid Hildenbrand 					     int count)
557c627ff5dSDavid Hildenbrand {
55854c6a6baSDavid Hildenbrand 	const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
559c627ff5dSDavid Hildenbrand 
560c627ff5dSDavid Hildenbrand 	/* TODO: Helper similar to bitmap_set() */
56154c6a6baSDavid Hildenbrand 	return find_next_bit(vm->sbm.sb_states, bit + count, bit) >=
56254c6a6baSDavid Hildenbrand 	       bit + count;
563c627ff5dSDavid Hildenbrand }
564c627ff5dSDavid Hildenbrand 
565c627ff5dSDavid Hildenbrand /*
566905c4c51SDavid Hildenbrand  * Find the first unplugged subblock. Returns vm->sbm.sbs_per_mb in case there is
5675f1f79bbSDavid Hildenbrand  * none.
5685f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_first_unplugged_sb(struct virtio_mem * vm,unsigned long mb_id)56954c6a6baSDavid Hildenbrand static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm,
5705f1f79bbSDavid Hildenbrand 					    unsigned long mb_id)
5715f1f79bbSDavid Hildenbrand {
57254c6a6baSDavid Hildenbrand 	const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, 0);
5735f1f79bbSDavid Hildenbrand 
57454c6a6baSDavid Hildenbrand 	return find_next_zero_bit(vm->sbm.sb_states,
575905c4c51SDavid Hildenbrand 				  bit + vm->sbm.sbs_per_mb, bit) - bit;
5765f1f79bbSDavid Hildenbrand }
5775f1f79bbSDavid Hildenbrand 
5785f1f79bbSDavid Hildenbrand /*
5795f1f79bbSDavid Hildenbrand  * Prepare the subblock bitmap for the next memory block.
5805f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem * vm)58154c6a6baSDavid Hildenbrand static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm)
5825f1f79bbSDavid Hildenbrand {
5838a6f082bSDavid Hildenbrand 	const unsigned long old_nb_mb = vm->sbm.next_mb_id - vm->sbm.first_mb_id;
584905c4c51SDavid Hildenbrand 	const unsigned long old_nb_bits = old_nb_mb * vm->sbm.sbs_per_mb;
585905c4c51SDavid Hildenbrand 	const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->sbm.sbs_per_mb;
5865f1f79bbSDavid Hildenbrand 	int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long));
5875f1f79bbSDavid Hildenbrand 	int new_pages = PFN_UP(BITS_TO_LONGS(new_nb_bits) * sizeof(long));
58854c6a6baSDavid Hildenbrand 	unsigned long *new_bitmap, *old_bitmap;
5895f1f79bbSDavid Hildenbrand 
59054c6a6baSDavid Hildenbrand 	if (vm->sbm.sb_states && old_pages == new_pages)
5915f1f79bbSDavid Hildenbrand 		return 0;
5925f1f79bbSDavid Hildenbrand 
59354c6a6baSDavid Hildenbrand 	new_bitmap = vzalloc(new_pages * PAGE_SIZE);
59454c6a6baSDavid Hildenbrand 	if (!new_bitmap)
5955f1f79bbSDavid Hildenbrand 		return -ENOMEM;
5965f1f79bbSDavid Hildenbrand 
5975f1f79bbSDavid Hildenbrand 	mutex_lock(&vm->hotplug_mutex);
598cf4a4493SPeng Hao 	if (vm->sbm.sb_states)
59954c6a6baSDavid Hildenbrand 		memcpy(new_bitmap, vm->sbm.sb_states, old_pages * PAGE_SIZE);
6005f1f79bbSDavid Hildenbrand 
60154c6a6baSDavid Hildenbrand 	old_bitmap = vm->sbm.sb_states;
60254c6a6baSDavid Hildenbrand 	vm->sbm.sb_states = new_bitmap;
6035f1f79bbSDavid Hildenbrand 	mutex_unlock(&vm->hotplug_mutex);
6045f1f79bbSDavid Hildenbrand 
60554c6a6baSDavid Hildenbrand 	vfree(old_bitmap);
6065f1f79bbSDavid Hildenbrand 	return 0;
6075f1f79bbSDavid Hildenbrand }
6085f1f79bbSDavid Hildenbrand 
6095f1f79bbSDavid Hildenbrand /*
61098ff9f94SDavid Hildenbrand  * Test if we could add memory without creating too much offline memory -
61198ff9f94SDavid Hildenbrand  * to avoid running OOM if memory is getting onlined deferred.
61298ff9f94SDavid Hildenbrand  */
virtio_mem_could_add_memory(struct virtio_mem * vm,uint64_t size)61398ff9f94SDavid Hildenbrand static bool virtio_mem_could_add_memory(struct virtio_mem *vm, uint64_t size)
61498ff9f94SDavid Hildenbrand {
61598ff9f94SDavid Hildenbrand 	if (WARN_ON_ONCE(size > vm->offline_threshold))
61698ff9f94SDavid Hildenbrand 		return false;
61798ff9f94SDavid Hildenbrand 
61898ff9f94SDavid Hildenbrand 	return atomic64_read(&vm->offline_size) + size <= vm->offline_threshold;
61998ff9f94SDavid Hildenbrand }
62098ff9f94SDavid Hildenbrand 
62198ff9f94SDavid Hildenbrand /*
62201afdee2SDavid Hildenbrand  * Try adding memory to Linux. Will usually only fail if out of memory.
6235f1f79bbSDavid Hildenbrand  *
6245f1f79bbSDavid Hildenbrand  * Must not be called with the vm->hotplug_mutex held (possible deadlock with
6255f1f79bbSDavid Hildenbrand  * onlining code).
6265f1f79bbSDavid Hildenbrand  *
62701afdee2SDavid Hildenbrand  * Will not modify the state of memory blocks in virtio-mem.
6285f1f79bbSDavid Hildenbrand  */
virtio_mem_add_memory(struct virtio_mem * vm,uint64_t addr,uint64_t size)62901afdee2SDavid Hildenbrand static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr,
63001afdee2SDavid Hildenbrand 				 uint64_t size)
6315f1f79bbSDavid Hildenbrand {
63298ff9f94SDavid Hildenbrand 	int rc;
6335f1f79bbSDavid Hildenbrand 
634b3562c60SDavid Hildenbrand 	/*
635b3562c60SDavid Hildenbrand 	 * When force-unloading the driver and we still have memory added to
636b3562c60SDavid Hildenbrand 	 * Linux, the resource name has to stay.
637b3562c60SDavid Hildenbrand 	 */
638b3562c60SDavid Hildenbrand 	if (!vm->resource_name) {
639b3562c60SDavid Hildenbrand 		vm->resource_name = kstrdup_const("System RAM (virtio_mem)",
640b3562c60SDavid Hildenbrand 						  GFP_KERNEL);
641b3562c60SDavid Hildenbrand 		if (!vm->resource_name)
642b3562c60SDavid Hildenbrand 			return -ENOMEM;
643b3562c60SDavid Hildenbrand 	}
644b3562c60SDavid Hildenbrand 
64501afdee2SDavid Hildenbrand 	dev_dbg(&vm->vdev->dev, "adding memory: 0x%llx - 0x%llx\n", addr,
64601afdee2SDavid Hildenbrand 		addr + size - 1);
64798ff9f94SDavid Hildenbrand 	/* Memory might get onlined immediately. */
64898ff9f94SDavid Hildenbrand 	atomic64_add(size, &vm->offline_size);
649ffaa6ce8SDavid Hildenbrand 	rc = add_memory_driver_managed(vm->mgid, addr, size, vm->resource_name,
650ffaa6ce8SDavid Hildenbrand 				       MHP_MERGE_RESOURCE | MHP_NID_IS_MGID);
65101afdee2SDavid Hildenbrand 	if (rc) {
65298ff9f94SDavid Hildenbrand 		atomic64_sub(size, &vm->offline_size);
65301afdee2SDavid Hildenbrand 		dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc);
65401afdee2SDavid Hildenbrand 		/*
65501afdee2SDavid Hildenbrand 		 * TODO: Linux MM does not properly clean up yet in all cases
65601afdee2SDavid Hildenbrand 		 * where adding of memory failed - especially on -ENOMEM.
65701afdee2SDavid Hildenbrand 		 */
65801afdee2SDavid Hildenbrand 	}
65998ff9f94SDavid Hildenbrand 	return rc;
6605f1f79bbSDavid Hildenbrand }
6615f1f79bbSDavid Hildenbrand 
6625f1f79bbSDavid Hildenbrand /*
66301afdee2SDavid Hildenbrand  * See virtio_mem_add_memory(): Try adding a single Linux memory block.
66401afdee2SDavid Hildenbrand  */
virtio_mem_sbm_add_mb(struct virtio_mem * vm,unsigned long mb_id)66501afdee2SDavid Hildenbrand static int virtio_mem_sbm_add_mb(struct virtio_mem *vm, unsigned long mb_id)
66601afdee2SDavid Hildenbrand {
66701afdee2SDavid Hildenbrand 	const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
66801afdee2SDavid Hildenbrand 	const uint64_t size = memory_block_size_bytes();
66901afdee2SDavid Hildenbrand 
67001afdee2SDavid Hildenbrand 	return virtio_mem_add_memory(vm, addr, size);
67101afdee2SDavid Hildenbrand }
67201afdee2SDavid Hildenbrand 
67301afdee2SDavid Hildenbrand /*
6744ba50cd3SDavid Hildenbrand  * See virtio_mem_add_memory(): Try adding a big block.
6754ba50cd3SDavid Hildenbrand  */
virtio_mem_bbm_add_bb(struct virtio_mem * vm,unsigned long bb_id)6764ba50cd3SDavid Hildenbrand static int virtio_mem_bbm_add_bb(struct virtio_mem *vm, unsigned long bb_id)
6774ba50cd3SDavid Hildenbrand {
6784ba50cd3SDavid Hildenbrand 	const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
6794ba50cd3SDavid Hildenbrand 	const uint64_t size = vm->bbm.bb_size;
6804ba50cd3SDavid Hildenbrand 
6814ba50cd3SDavid Hildenbrand 	return virtio_mem_add_memory(vm, addr, size);
6824ba50cd3SDavid Hildenbrand }
6834ba50cd3SDavid Hildenbrand 
6844ba50cd3SDavid Hildenbrand /*
68501afdee2SDavid Hildenbrand  * Try removing memory from Linux. Will only fail if memory blocks aren't
68601afdee2SDavid Hildenbrand  * offline.
6875f1f79bbSDavid Hildenbrand  *
6885f1f79bbSDavid Hildenbrand  * Must not be called with the vm->hotplug_mutex held (possible deadlock with
6895f1f79bbSDavid Hildenbrand  * onlining code).
6905f1f79bbSDavid Hildenbrand  *
69101afdee2SDavid Hildenbrand  * Will not modify the state of memory blocks in virtio-mem.
6925f1f79bbSDavid Hildenbrand  */
virtio_mem_remove_memory(struct virtio_mem * vm,uint64_t addr,uint64_t size)69301afdee2SDavid Hildenbrand static int virtio_mem_remove_memory(struct virtio_mem *vm, uint64_t addr,
69401afdee2SDavid Hildenbrand 				    uint64_t size)
6955f1f79bbSDavid Hildenbrand {
6961d33c2caSDavid Hildenbrand 	int rc;
6975f1f79bbSDavid Hildenbrand 
69801afdee2SDavid Hildenbrand 	dev_dbg(&vm->vdev->dev, "removing memory: 0x%llx - 0x%llx\n", addr,
69901afdee2SDavid Hildenbrand 		addr + size - 1);
700e1c158e4SDavid Hildenbrand 	rc = remove_memory(addr, size);
70198ff9f94SDavid Hildenbrand 	if (!rc) {
70298ff9f94SDavid Hildenbrand 		atomic64_sub(size, &vm->offline_size);
7031d33c2caSDavid Hildenbrand 		/*
7041d33c2caSDavid Hildenbrand 		 * We might have freed up memory we can now unplug, retry
7051d33c2caSDavid Hildenbrand 		 * immediately instead of waiting.
7061d33c2caSDavid Hildenbrand 		 */
7071d33c2caSDavid Hildenbrand 		virtio_mem_retry(vm);
70801afdee2SDavid Hildenbrand 	} else {
70901afdee2SDavid Hildenbrand 		dev_dbg(&vm->vdev->dev, "removing memory failed: %d\n", rc);
71098ff9f94SDavid Hildenbrand 	}
7111d33c2caSDavid Hildenbrand 	return rc;
7125f1f79bbSDavid Hildenbrand }
7135f1f79bbSDavid Hildenbrand 
7145f1f79bbSDavid Hildenbrand /*
71501afdee2SDavid Hildenbrand  * See virtio_mem_remove_memory(): Try removing a single Linux memory block.
71601afdee2SDavid Hildenbrand  */
virtio_mem_sbm_remove_mb(struct virtio_mem * vm,unsigned long mb_id)71701afdee2SDavid Hildenbrand static int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id)
71801afdee2SDavid Hildenbrand {
71901afdee2SDavid Hildenbrand 	const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
72001afdee2SDavid Hildenbrand 	const uint64_t size = memory_block_size_bytes();
72101afdee2SDavid Hildenbrand 
72201afdee2SDavid Hildenbrand 	return virtio_mem_remove_memory(vm, addr, size);
72301afdee2SDavid Hildenbrand }
72401afdee2SDavid Hildenbrand 
72501afdee2SDavid Hildenbrand /*
72601afdee2SDavid Hildenbrand  * Try offlining and removing memory from Linux.
727a5732387SDavid Hildenbrand  *
728a5732387SDavid Hildenbrand  * Must not be called with the vm->hotplug_mutex held (possible deadlock with
729a5732387SDavid Hildenbrand  * onlining code).
730a5732387SDavid Hildenbrand  *
73101afdee2SDavid Hildenbrand  * Will not modify the state of memory blocks in virtio-mem.
732a5732387SDavid Hildenbrand  */
virtio_mem_offline_and_remove_memory(struct virtio_mem * vm,uint64_t addr,uint64_t size)73301afdee2SDavid Hildenbrand static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm,
73401afdee2SDavid Hildenbrand 						uint64_t addr,
73501afdee2SDavid Hildenbrand 						uint64_t size)
736a5732387SDavid Hildenbrand {
7371d33c2caSDavid Hildenbrand 	int rc;
738a5732387SDavid Hildenbrand 
73901afdee2SDavid Hildenbrand 	dev_dbg(&vm->vdev->dev,
74001afdee2SDavid Hildenbrand 		"offlining and removing memory: 0x%llx - 0x%llx\n", addr,
74101afdee2SDavid Hildenbrand 		addr + size - 1);
74201afdee2SDavid Hildenbrand 
743e1c158e4SDavid Hildenbrand 	rc = offline_and_remove_memory(addr, size);
74498ff9f94SDavid Hildenbrand 	if (!rc) {
74598ff9f94SDavid Hildenbrand 		atomic64_sub(size, &vm->offline_size);
7461d33c2caSDavid Hildenbrand 		/*
7471d33c2caSDavid Hildenbrand 		 * We might have freed up memory we can now unplug, retry
7481d33c2caSDavid Hildenbrand 		 * immediately instead of waiting.
7491d33c2caSDavid Hildenbrand 		 */
7501d33c2caSDavid Hildenbrand 		virtio_mem_retry(vm);
751ddf40985SDavid Hildenbrand 		return 0;
75298ff9f94SDavid Hildenbrand 	}
753ddf40985SDavid Hildenbrand 	dev_dbg(&vm->vdev->dev, "offlining and removing memory failed: %d\n", rc);
754ddf40985SDavid Hildenbrand 	/*
755ddf40985SDavid Hildenbrand 	 * We don't really expect this to fail, because we fake-offlined all
756ddf40985SDavid Hildenbrand 	 * memory already. But it could fail in corner cases.
757ddf40985SDavid Hildenbrand 	 */
758ddf40985SDavid Hildenbrand 	WARN_ON_ONCE(rc != -ENOMEM && rc != -EBUSY);
759ddf40985SDavid Hildenbrand 	return rc == -ENOMEM ? -ENOMEM : -EBUSY;
760a5732387SDavid Hildenbrand }
761a5732387SDavid Hildenbrand 
762a5732387SDavid Hildenbrand /*
76301afdee2SDavid Hildenbrand  * See virtio_mem_offline_and_remove_memory(): Try offlining and removing
76401afdee2SDavid Hildenbrand  * a single Linux memory block.
76501afdee2SDavid Hildenbrand  */
virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem * vm,unsigned long mb_id)76601afdee2SDavid Hildenbrand static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm,
76701afdee2SDavid Hildenbrand 						unsigned long mb_id)
76801afdee2SDavid Hildenbrand {
76901afdee2SDavid Hildenbrand 	const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
77001afdee2SDavid Hildenbrand 	const uint64_t size = memory_block_size_bytes();
77101afdee2SDavid Hildenbrand 
77201afdee2SDavid Hildenbrand 	return virtio_mem_offline_and_remove_memory(vm, addr, size);
77301afdee2SDavid Hildenbrand }
77401afdee2SDavid Hildenbrand 
77501afdee2SDavid Hildenbrand /*
776a31648fdSDavid Hildenbrand  * Try (offlining and) removing memory from Linux in case all subblocks are
777a31648fdSDavid Hildenbrand  * unplugged. Can be called on online and offline memory blocks.
778a31648fdSDavid Hildenbrand  *
779a31648fdSDavid Hildenbrand  * May modify the state of memory blocks in virtio-mem.
780a31648fdSDavid Hildenbrand  */
virtio_mem_sbm_try_remove_unplugged_mb(struct virtio_mem * vm,unsigned long mb_id)781a31648fdSDavid Hildenbrand static int virtio_mem_sbm_try_remove_unplugged_mb(struct virtio_mem *vm,
782a31648fdSDavid Hildenbrand 						  unsigned long mb_id)
783a31648fdSDavid Hildenbrand {
784a31648fdSDavid Hildenbrand 	int rc;
785a31648fdSDavid Hildenbrand 
786a31648fdSDavid Hildenbrand 	/*
787a31648fdSDavid Hildenbrand 	 * Once all subblocks of a memory block were unplugged, offline and
788a31648fdSDavid Hildenbrand 	 * remove it.
789a31648fdSDavid Hildenbrand 	 */
790a31648fdSDavid Hildenbrand 	if (!virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
791a31648fdSDavid Hildenbrand 		return 0;
792a31648fdSDavid Hildenbrand 
793a31648fdSDavid Hildenbrand 	/* offline_and_remove_memory() works for online and offline memory. */
794a31648fdSDavid Hildenbrand 	mutex_unlock(&vm->hotplug_mutex);
795a31648fdSDavid Hildenbrand 	rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id);
796a31648fdSDavid Hildenbrand 	mutex_lock(&vm->hotplug_mutex);
797a31648fdSDavid Hildenbrand 	if (!rc)
798a31648fdSDavid Hildenbrand 		virtio_mem_sbm_set_mb_state(vm, mb_id,
799a31648fdSDavid Hildenbrand 					    VIRTIO_MEM_SBM_MB_UNUSED);
800a31648fdSDavid Hildenbrand 	return rc;
801a31648fdSDavid Hildenbrand }
802a31648fdSDavid Hildenbrand 
803a31648fdSDavid Hildenbrand /*
804269ac938SDavid Hildenbrand  * See virtio_mem_offline_and_remove_memory(): Try to offline and remove a
805269ac938SDavid Hildenbrand  * all Linux memory blocks covered by the big block.
806269ac938SDavid Hildenbrand  */
virtio_mem_bbm_offline_and_remove_bb(struct virtio_mem * vm,unsigned long bb_id)807269ac938SDavid Hildenbrand static int virtio_mem_bbm_offline_and_remove_bb(struct virtio_mem *vm,
808269ac938SDavid Hildenbrand 						unsigned long bb_id)
809269ac938SDavid Hildenbrand {
810269ac938SDavid Hildenbrand 	const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
811269ac938SDavid Hildenbrand 	const uint64_t size = vm->bbm.bb_size;
812269ac938SDavid Hildenbrand 
813269ac938SDavid Hildenbrand 	return virtio_mem_offline_and_remove_memory(vm, addr, size);
814269ac938SDavid Hildenbrand }
815269ac938SDavid Hildenbrand 
816269ac938SDavid Hildenbrand /*
8175f1f79bbSDavid Hildenbrand  * Trigger the workqueue so the device can perform its magic.
8185f1f79bbSDavid Hildenbrand  */
virtio_mem_retry(struct virtio_mem * vm)8195f1f79bbSDavid Hildenbrand static void virtio_mem_retry(struct virtio_mem *vm)
8205f1f79bbSDavid Hildenbrand {
8215f1f79bbSDavid Hildenbrand 	unsigned long flags;
8225f1f79bbSDavid Hildenbrand 
8235f1f79bbSDavid Hildenbrand 	spin_lock_irqsave(&vm->removal_lock, flags);
8245f1f79bbSDavid Hildenbrand 	if (!vm->removing)
8255f1f79bbSDavid Hildenbrand 		queue_work(system_freezable_wq, &vm->wq);
8265f1f79bbSDavid Hildenbrand 	spin_unlock_irqrestore(&vm->removal_lock, flags);
8275f1f79bbSDavid Hildenbrand }
8285f1f79bbSDavid Hildenbrand 
virtio_mem_translate_node_id(struct virtio_mem * vm,uint16_t node_id)829f2af6d39SDavid Hildenbrand static int virtio_mem_translate_node_id(struct virtio_mem *vm, uint16_t node_id)
830f2af6d39SDavid Hildenbrand {
831f2af6d39SDavid Hildenbrand 	int node = NUMA_NO_NODE;
832f2af6d39SDavid Hildenbrand 
833f2af6d39SDavid Hildenbrand #if defined(CONFIG_ACPI_NUMA)
834f2af6d39SDavid Hildenbrand 	if (virtio_has_feature(vm->vdev, VIRTIO_MEM_F_ACPI_PXM))
835f2af6d39SDavid Hildenbrand 		node = pxm_to_node(node_id);
836f2af6d39SDavid Hildenbrand #endif
837f2af6d39SDavid Hildenbrand 	return node;
838f2af6d39SDavid Hildenbrand }
839f2af6d39SDavid Hildenbrand 
8405f1f79bbSDavid Hildenbrand /*
8415f1f79bbSDavid Hildenbrand  * Test if a virtio-mem device overlaps with the given range. Can be called
8425f1f79bbSDavid Hildenbrand  * from (notifier) callbacks lockless.
8435f1f79bbSDavid Hildenbrand  */
virtio_mem_overlaps_range(struct virtio_mem * vm,uint64_t start,uint64_t size)844835491c5SDavid Hildenbrand static bool virtio_mem_overlaps_range(struct virtio_mem *vm, uint64_t start,
845835491c5SDavid Hildenbrand 				      uint64_t size)
8465f1f79bbSDavid Hildenbrand {
847835491c5SDavid Hildenbrand 	return start < vm->addr + vm->region_size && vm->addr < start + size;
8485f1f79bbSDavid Hildenbrand }
8495f1f79bbSDavid Hildenbrand 
8505f1f79bbSDavid Hildenbrand /*
8518464e3bdSDavid Hildenbrand  * Test if a virtio-mem device contains a given range. Can be called from
8525f1f79bbSDavid Hildenbrand  * (notifier) callbacks lockless.
8535f1f79bbSDavid Hildenbrand  */
virtio_mem_contains_range(struct virtio_mem * vm,uint64_t start,uint64_t size)8548464e3bdSDavid Hildenbrand static bool virtio_mem_contains_range(struct virtio_mem *vm, uint64_t start,
8558464e3bdSDavid Hildenbrand 				      uint64_t size)
8565f1f79bbSDavid Hildenbrand {
8578464e3bdSDavid Hildenbrand 	return start >= vm->addr && start + size <= vm->addr + vm->region_size;
8585f1f79bbSDavid Hildenbrand }
8595f1f79bbSDavid Hildenbrand 
virtio_mem_sbm_notify_going_online(struct virtio_mem * vm,unsigned long mb_id)860d46dfb62SDavid Hildenbrand static int virtio_mem_sbm_notify_going_online(struct virtio_mem *vm,
86127f85279SDavid Hildenbrand 					      unsigned long mb_id)
8625f1f79bbSDavid Hildenbrand {
86399f0b55eSDavid Hildenbrand 	switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
86499f0b55eSDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL:
86599f0b55eSDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_OFFLINE:
8665f1f79bbSDavid Hildenbrand 		return NOTIFY_OK;
8675f1f79bbSDavid Hildenbrand 	default:
8685f1f79bbSDavid Hildenbrand 		break;
8695f1f79bbSDavid Hildenbrand 	}
8705f1f79bbSDavid Hildenbrand 	dev_warn_ratelimited(&vm->vdev->dev,
8715f1f79bbSDavid Hildenbrand 			     "memory block onlining denied\n");
8725f1f79bbSDavid Hildenbrand 	return NOTIFY_BAD;
8735f1f79bbSDavid Hildenbrand }
8745f1f79bbSDavid Hildenbrand 
virtio_mem_sbm_notify_offline(struct virtio_mem * vm,unsigned long mb_id)875d46dfb62SDavid Hildenbrand static void virtio_mem_sbm_notify_offline(struct virtio_mem *vm,
8765f1f79bbSDavid Hildenbrand 					  unsigned long mb_id)
8775f1f79bbSDavid Hildenbrand {
87899f0b55eSDavid Hildenbrand 	switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
879c740bb97SDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL:
880c740bb97SDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL:
88199f0b55eSDavid Hildenbrand 		virtio_mem_sbm_set_mb_state(vm, mb_id,
88299f0b55eSDavid Hildenbrand 					    VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL);
8835f1f79bbSDavid Hildenbrand 		break;
884c740bb97SDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_KERNEL:
885c740bb97SDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_MOVABLE:
88699f0b55eSDavid Hildenbrand 		virtio_mem_sbm_set_mb_state(vm, mb_id,
88799f0b55eSDavid Hildenbrand 					    VIRTIO_MEM_SBM_MB_OFFLINE);
8885f1f79bbSDavid Hildenbrand 		break;
8895f1f79bbSDavid Hildenbrand 	default:
8905f1f79bbSDavid Hildenbrand 		BUG();
8915f1f79bbSDavid Hildenbrand 		break;
8925f1f79bbSDavid Hildenbrand 	}
8935f1f79bbSDavid Hildenbrand }
8945f1f79bbSDavid Hildenbrand 
virtio_mem_sbm_notify_online(struct virtio_mem * vm,unsigned long mb_id,unsigned long start_pfn)895d46dfb62SDavid Hildenbrand static void virtio_mem_sbm_notify_online(struct virtio_mem *vm,
896c740bb97SDavid Hildenbrand 					 unsigned long mb_id,
897c740bb97SDavid Hildenbrand 					 unsigned long start_pfn)
8985f1f79bbSDavid Hildenbrand {
89907252dfeSKefeng Wang 	const bool is_movable = is_zone_movable_page(pfn_to_page(start_pfn));
900c740bb97SDavid Hildenbrand 	int new_state;
901c740bb97SDavid Hildenbrand 
90299f0b55eSDavid Hildenbrand 	switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
90399f0b55eSDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL:
904c740bb97SDavid Hildenbrand 		new_state = VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL;
905c740bb97SDavid Hildenbrand 		if (is_movable)
906c740bb97SDavid Hildenbrand 			new_state = VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL;
9075f1f79bbSDavid Hildenbrand 		break;
90899f0b55eSDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_OFFLINE:
909c740bb97SDavid Hildenbrand 		new_state = VIRTIO_MEM_SBM_MB_KERNEL;
910c740bb97SDavid Hildenbrand 		if (is_movable)
911c740bb97SDavid Hildenbrand 			new_state = VIRTIO_MEM_SBM_MB_MOVABLE;
9125f1f79bbSDavid Hildenbrand 		break;
9135f1f79bbSDavid Hildenbrand 	default:
9145f1f79bbSDavid Hildenbrand 		BUG();
9155f1f79bbSDavid Hildenbrand 		break;
9165f1f79bbSDavid Hildenbrand 	}
917c740bb97SDavid Hildenbrand 	virtio_mem_sbm_set_mb_state(vm, mb_id, new_state);
9185f1f79bbSDavid Hildenbrand }
9195f1f79bbSDavid Hildenbrand 
virtio_mem_sbm_notify_going_offline(struct virtio_mem * vm,unsigned long mb_id)920d46dfb62SDavid Hildenbrand static void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm,
9218e5c921cSDavid Hildenbrand 						unsigned long mb_id)
9228e5c921cSDavid Hildenbrand {
923905c4c51SDavid Hildenbrand 	const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size);
9248e5c921cSDavid Hildenbrand 	unsigned long pfn;
9257a34c77dSDavid Hildenbrand 	int sb_id;
9268e5c921cSDavid Hildenbrand 
927905c4c51SDavid Hildenbrand 	for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) {
92854c6a6baSDavid Hildenbrand 		if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1))
9298e5c921cSDavid Hildenbrand 			continue;
9308e5c921cSDavid Hildenbrand 		pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
931905c4c51SDavid Hildenbrand 			       sb_id * vm->sbm.sb_size);
9327a34c77dSDavid Hildenbrand 		virtio_mem_fake_offline_going_offline(pfn, nr_pages);
9338e5c921cSDavid Hildenbrand 	}
9348e5c921cSDavid Hildenbrand }
9358e5c921cSDavid Hildenbrand 
virtio_mem_sbm_notify_cancel_offline(struct virtio_mem * vm,unsigned long mb_id)936d46dfb62SDavid Hildenbrand static void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm,
9378e5c921cSDavid Hildenbrand 						 unsigned long mb_id)
9388e5c921cSDavid Hildenbrand {
939905c4c51SDavid Hildenbrand 	const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size);
9408e5c921cSDavid Hildenbrand 	unsigned long pfn;
9417a34c77dSDavid Hildenbrand 	int sb_id;
9428e5c921cSDavid Hildenbrand 
943905c4c51SDavid Hildenbrand 	for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) {
94454c6a6baSDavid Hildenbrand 		if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1))
9458e5c921cSDavid Hildenbrand 			continue;
9468e5c921cSDavid Hildenbrand 		pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
947905c4c51SDavid Hildenbrand 			       sb_id * vm->sbm.sb_size);
9487a34c77dSDavid Hildenbrand 		virtio_mem_fake_offline_cancel_offline(pfn, nr_pages);
9498e5c921cSDavid Hildenbrand 	}
9508e5c921cSDavid Hildenbrand }
9518e5c921cSDavid Hildenbrand 
virtio_mem_bbm_notify_going_offline(struct virtio_mem * vm,unsigned long bb_id,unsigned long pfn,unsigned long nr_pages)9523711387aSDavid Hildenbrand static void virtio_mem_bbm_notify_going_offline(struct virtio_mem *vm,
9533711387aSDavid Hildenbrand 						unsigned long bb_id,
9543711387aSDavid Hildenbrand 						unsigned long pfn,
9553711387aSDavid Hildenbrand 						unsigned long nr_pages)
9563711387aSDavid Hildenbrand {
9573711387aSDavid Hildenbrand 	/*
9583711387aSDavid Hildenbrand 	 * When marked as "fake-offline", all online memory of this device block
9593711387aSDavid Hildenbrand 	 * is allocated by us. Otherwise, we don't have any memory allocated.
9603711387aSDavid Hildenbrand 	 */
9613711387aSDavid Hildenbrand 	if (virtio_mem_bbm_get_bb_state(vm, bb_id) !=
9623711387aSDavid Hildenbrand 	    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
9633711387aSDavid Hildenbrand 		return;
9643711387aSDavid Hildenbrand 	virtio_mem_fake_offline_going_offline(pfn, nr_pages);
9653711387aSDavid Hildenbrand }
9663711387aSDavid Hildenbrand 
virtio_mem_bbm_notify_cancel_offline(struct virtio_mem * vm,unsigned long bb_id,unsigned long pfn,unsigned long nr_pages)9673711387aSDavid Hildenbrand static void virtio_mem_bbm_notify_cancel_offline(struct virtio_mem *vm,
9683711387aSDavid Hildenbrand 						 unsigned long bb_id,
9693711387aSDavid Hildenbrand 						 unsigned long pfn,
9703711387aSDavid Hildenbrand 						 unsigned long nr_pages)
9713711387aSDavid Hildenbrand {
9723711387aSDavid Hildenbrand 	if (virtio_mem_bbm_get_bb_state(vm, bb_id) !=
9733711387aSDavid Hildenbrand 	    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
9743711387aSDavid Hildenbrand 		return;
9753711387aSDavid Hildenbrand 	virtio_mem_fake_offline_cancel_offline(pfn, nr_pages);
9763711387aSDavid Hildenbrand }
9773711387aSDavid Hildenbrand 
9785f1f79bbSDavid Hildenbrand /*
9795f1f79bbSDavid Hildenbrand  * This callback will either be called synchronously from add_memory() or
9805f1f79bbSDavid Hildenbrand  * asynchronously (e.g., triggered via user space). We have to be careful
9815f1f79bbSDavid Hildenbrand  * with locking when calling add_memory().
9825f1f79bbSDavid Hildenbrand  */
virtio_mem_memory_notifier_cb(struct notifier_block * nb,unsigned long action,void * arg)9835f1f79bbSDavid Hildenbrand static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
9845f1f79bbSDavid Hildenbrand 					 unsigned long action, void *arg)
9855f1f79bbSDavid Hildenbrand {
9865f1f79bbSDavid Hildenbrand 	struct virtio_mem *vm = container_of(nb, struct virtio_mem,
9875f1f79bbSDavid Hildenbrand 					     memory_notifier);
9885f1f79bbSDavid Hildenbrand 	struct memory_notify *mhp = arg;
9895f1f79bbSDavid Hildenbrand 	const unsigned long start = PFN_PHYS(mhp->start_pfn);
9905f1f79bbSDavid Hildenbrand 	const unsigned long size = PFN_PHYS(mhp->nr_pages);
9915f1f79bbSDavid Hildenbrand 	int rc = NOTIFY_OK;
9924ba50cd3SDavid Hildenbrand 	unsigned long id;
9935f1f79bbSDavid Hildenbrand 
9945f1f79bbSDavid Hildenbrand 	if (!virtio_mem_overlaps_range(vm, start, size))
9955f1f79bbSDavid Hildenbrand 		return NOTIFY_DONE;
9965f1f79bbSDavid Hildenbrand 
9974ba50cd3SDavid Hildenbrand 	if (vm->in_sbm) {
9984ba50cd3SDavid Hildenbrand 		id = virtio_mem_phys_to_mb_id(start);
9995f1f79bbSDavid Hildenbrand 		/*
10004ba50cd3SDavid Hildenbrand 		 * In SBM, we add memory in separate memory blocks - we expect
10014ba50cd3SDavid Hildenbrand 		 * it to be onlined/offlined in the same granularity. Bail out
10024ba50cd3SDavid Hildenbrand 		 * if this ever changes.
10035f1f79bbSDavid Hildenbrand 		 */
10045f1f79bbSDavid Hildenbrand 		if (WARN_ON_ONCE(size != memory_block_size_bytes() ||
10055f1f79bbSDavid Hildenbrand 				 !IS_ALIGNED(start, memory_block_size_bytes())))
10065f1f79bbSDavid Hildenbrand 			return NOTIFY_BAD;
10074ba50cd3SDavid Hildenbrand 	} else {
10084ba50cd3SDavid Hildenbrand 		id = virtio_mem_phys_to_bb_id(vm, start);
10094ba50cd3SDavid Hildenbrand 		/*
10104ba50cd3SDavid Hildenbrand 		 * In BBM, we only care about onlining/offlining happening
10114ba50cd3SDavid Hildenbrand 		 * within a single big block, we don't care about the
10124ba50cd3SDavid Hildenbrand 		 * actual granularity as we don't track individual Linux
10134ba50cd3SDavid Hildenbrand 		 * memory blocks.
10144ba50cd3SDavid Hildenbrand 		 */
10154ba50cd3SDavid Hildenbrand 		if (WARN_ON_ONCE(id != virtio_mem_phys_to_bb_id(vm, start + size - 1)))
10164ba50cd3SDavid Hildenbrand 			return NOTIFY_BAD;
10174ba50cd3SDavid Hildenbrand 	}
10185f1f79bbSDavid Hildenbrand 
10195f1f79bbSDavid Hildenbrand 	/*
10205f1f79bbSDavid Hildenbrand 	 * Avoid circular locking lockdep warnings. We lock the mutex
10215f1f79bbSDavid Hildenbrand 	 * e.g., in MEM_GOING_ONLINE and unlock it in MEM_ONLINE. The
10225f1f79bbSDavid Hildenbrand 	 * blocking_notifier_call_chain() has it's own lock, which gets unlocked
10235f1f79bbSDavid Hildenbrand 	 * between both notifier calls and will bail out. False positive.
10245f1f79bbSDavid Hildenbrand 	 */
10255f1f79bbSDavid Hildenbrand 	lockdep_off();
10265f1f79bbSDavid Hildenbrand 
10275f1f79bbSDavid Hildenbrand 	switch (action) {
10285f1f79bbSDavid Hildenbrand 	case MEM_GOING_OFFLINE:
10295f1f79bbSDavid Hildenbrand 		mutex_lock(&vm->hotplug_mutex);
10305f1f79bbSDavid Hildenbrand 		if (vm->removing) {
10315f1f79bbSDavid Hildenbrand 			rc = notifier_from_errno(-EBUSY);
10325f1f79bbSDavid Hildenbrand 			mutex_unlock(&vm->hotplug_mutex);
10335f1f79bbSDavid Hildenbrand 			break;
10345f1f79bbSDavid Hildenbrand 		}
10355f1f79bbSDavid Hildenbrand 		vm->hotplug_active = true;
10364ba50cd3SDavid Hildenbrand 		if (vm->in_sbm)
10374ba50cd3SDavid Hildenbrand 			virtio_mem_sbm_notify_going_offline(vm, id);
10383711387aSDavid Hildenbrand 		else
10393711387aSDavid Hildenbrand 			virtio_mem_bbm_notify_going_offline(vm, id,
10403711387aSDavid Hildenbrand 							    mhp->start_pfn,
10413711387aSDavid Hildenbrand 							    mhp->nr_pages);
10425f1f79bbSDavid Hildenbrand 		break;
10435f1f79bbSDavid Hildenbrand 	case MEM_GOING_ONLINE:
10445f1f79bbSDavid Hildenbrand 		mutex_lock(&vm->hotplug_mutex);
10455f1f79bbSDavid Hildenbrand 		if (vm->removing) {
10465f1f79bbSDavid Hildenbrand 			rc = notifier_from_errno(-EBUSY);
10475f1f79bbSDavid Hildenbrand 			mutex_unlock(&vm->hotplug_mutex);
10485f1f79bbSDavid Hildenbrand 			break;
10495f1f79bbSDavid Hildenbrand 		}
10505f1f79bbSDavid Hildenbrand 		vm->hotplug_active = true;
10514ba50cd3SDavid Hildenbrand 		if (vm->in_sbm)
10524ba50cd3SDavid Hildenbrand 			rc = virtio_mem_sbm_notify_going_online(vm, id);
10535f1f79bbSDavid Hildenbrand 		break;
10545f1f79bbSDavid Hildenbrand 	case MEM_OFFLINE:
10554ba50cd3SDavid Hildenbrand 		if (vm->in_sbm)
10564ba50cd3SDavid Hildenbrand 			virtio_mem_sbm_notify_offline(vm, id);
10571d33c2caSDavid Hildenbrand 
105898ff9f94SDavid Hildenbrand 		atomic64_add(size, &vm->offline_size);
10591d33c2caSDavid Hildenbrand 		/*
10601d33c2caSDavid Hildenbrand 		 * Trigger the workqueue. Now that we have some offline memory,
10611d33c2caSDavid Hildenbrand 		 * maybe we can handle pending unplug requests.
10621d33c2caSDavid Hildenbrand 		 */
10631d33c2caSDavid Hildenbrand 		if (!unplug_online)
10641d33c2caSDavid Hildenbrand 			virtio_mem_retry(vm);
10651d33c2caSDavid Hildenbrand 
10665f1f79bbSDavid Hildenbrand 		vm->hotplug_active = false;
10675f1f79bbSDavid Hildenbrand 		mutex_unlock(&vm->hotplug_mutex);
10685f1f79bbSDavid Hildenbrand 		break;
10695f1f79bbSDavid Hildenbrand 	case MEM_ONLINE:
10704ba50cd3SDavid Hildenbrand 		if (vm->in_sbm)
1071c740bb97SDavid Hildenbrand 			virtio_mem_sbm_notify_online(vm, id, mhp->start_pfn);
107298ff9f94SDavid Hildenbrand 
107398ff9f94SDavid Hildenbrand 		atomic64_sub(size, &vm->offline_size);
107498ff9f94SDavid Hildenbrand 		/*
107598ff9f94SDavid Hildenbrand 		 * Start adding more memory once we onlined half of our
107698ff9f94SDavid Hildenbrand 		 * threshold. Don't trigger if it's possibly due to our actipn
107798ff9f94SDavid Hildenbrand 		 * (e.g., us adding memory which gets onlined immediately from
107898ff9f94SDavid Hildenbrand 		 * the core).
107998ff9f94SDavid Hildenbrand 		 */
108098ff9f94SDavid Hildenbrand 		if (!atomic_read(&vm->wq_active) &&
108198ff9f94SDavid Hildenbrand 		    virtio_mem_could_add_memory(vm, vm->offline_threshold / 2))
108298ff9f94SDavid Hildenbrand 			virtio_mem_retry(vm);
108398ff9f94SDavid Hildenbrand 
10845f1f79bbSDavid Hildenbrand 		vm->hotplug_active = false;
10855f1f79bbSDavid Hildenbrand 		mutex_unlock(&vm->hotplug_mutex);
10865f1f79bbSDavid Hildenbrand 		break;
10875f1f79bbSDavid Hildenbrand 	case MEM_CANCEL_OFFLINE:
10888e5c921cSDavid Hildenbrand 		if (!vm->hotplug_active)
10898e5c921cSDavid Hildenbrand 			break;
10904ba50cd3SDavid Hildenbrand 		if (vm->in_sbm)
10914ba50cd3SDavid Hildenbrand 			virtio_mem_sbm_notify_cancel_offline(vm, id);
10923711387aSDavid Hildenbrand 		else
10933711387aSDavid Hildenbrand 			virtio_mem_bbm_notify_cancel_offline(vm, id,
10943711387aSDavid Hildenbrand 							     mhp->start_pfn,
10953711387aSDavid Hildenbrand 							     mhp->nr_pages);
10968e5c921cSDavid Hildenbrand 		vm->hotplug_active = false;
10978e5c921cSDavid Hildenbrand 		mutex_unlock(&vm->hotplug_mutex);
10988e5c921cSDavid Hildenbrand 		break;
10995f1f79bbSDavid Hildenbrand 	case MEM_CANCEL_ONLINE:
11005f1f79bbSDavid Hildenbrand 		if (!vm->hotplug_active)
11015f1f79bbSDavid Hildenbrand 			break;
11025f1f79bbSDavid Hildenbrand 		vm->hotplug_active = false;
11035f1f79bbSDavid Hildenbrand 		mutex_unlock(&vm->hotplug_mutex);
11045f1f79bbSDavid Hildenbrand 		break;
11055f1f79bbSDavid Hildenbrand 	default:
11065f1f79bbSDavid Hildenbrand 		break;
11075f1f79bbSDavid Hildenbrand 	}
11085f1f79bbSDavid Hildenbrand 
11095f1f79bbSDavid Hildenbrand 	lockdep_on();
11105f1f79bbSDavid Hildenbrand 
11115f1f79bbSDavid Hildenbrand 	return rc;
11125f1f79bbSDavid Hildenbrand }
11135f1f79bbSDavid Hildenbrand 
11145f1f79bbSDavid Hildenbrand /*
1115255f5985SDavid Hildenbrand  * Set a range of pages PG_offline. Remember pages that were never onlined
1116255f5985SDavid Hildenbrand  * (via generic_online_page()) using PageDirty().
11175f1f79bbSDavid Hildenbrand  */
virtio_mem_set_fake_offline(unsigned long pfn,unsigned long nr_pages,bool onlined)11185f1f79bbSDavid Hildenbrand static void virtio_mem_set_fake_offline(unsigned long pfn,
11192a628511SDavid Hildenbrand 					unsigned long nr_pages, bool onlined)
11205f1f79bbSDavid Hildenbrand {
11216cc26d77SDavid Hildenbrand 	page_offline_begin();
1122255f5985SDavid Hildenbrand 	for (; nr_pages--; pfn++) {
1123255f5985SDavid Hildenbrand 		struct page *page = pfn_to_page(pfn);
1124255f5985SDavid Hildenbrand 
1125255f5985SDavid Hildenbrand 		__SetPageOffline(page);
11268e5c921cSDavid Hildenbrand 		if (!onlined) {
1127255f5985SDavid Hildenbrand 			SetPageDirty(page);
11288e5c921cSDavid Hildenbrand 			/* FIXME: remove after cleanups */
11298e5c921cSDavid Hildenbrand 			ClearPageReserved(page);
11308e5c921cSDavid Hildenbrand 		}
1131255f5985SDavid Hildenbrand 	}
11326cc26d77SDavid Hildenbrand 	page_offline_end();
11335f1f79bbSDavid Hildenbrand }
11345f1f79bbSDavid Hildenbrand 
11355f1f79bbSDavid Hildenbrand /*
1136255f5985SDavid Hildenbrand  * Clear PG_offline from a range of pages. If the pages were never onlined,
1137255f5985SDavid Hildenbrand  * (via generic_online_page()), clear PageDirty().
11385f1f79bbSDavid Hildenbrand  */
virtio_mem_clear_fake_offline(unsigned long pfn,unsigned long nr_pages,bool onlined)11395f1f79bbSDavid Hildenbrand static void virtio_mem_clear_fake_offline(unsigned long pfn,
11402a628511SDavid Hildenbrand 					  unsigned long nr_pages, bool onlined)
11415f1f79bbSDavid Hildenbrand {
1142255f5985SDavid Hildenbrand 	for (; nr_pages--; pfn++) {
1143255f5985SDavid Hildenbrand 		struct page *page = pfn_to_page(pfn);
1144255f5985SDavid Hildenbrand 
1145255f5985SDavid Hildenbrand 		__ClearPageOffline(page);
1146255f5985SDavid Hildenbrand 		if (!onlined)
1147255f5985SDavid Hildenbrand 			ClearPageDirty(page);
1148255f5985SDavid Hildenbrand 	}
11495f1f79bbSDavid Hildenbrand }
11505f1f79bbSDavid Hildenbrand 
11515f1f79bbSDavid Hildenbrand /*
11525f1f79bbSDavid Hildenbrand  * Release a range of fake-offline pages to the buddy, effectively
11535f1f79bbSDavid Hildenbrand  * fake-onlining them.
11545f1f79bbSDavid Hildenbrand  */
virtio_mem_fake_online(unsigned long pfn,unsigned long nr_pages)11552a628511SDavid Hildenbrand static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages)
11565f1f79bbSDavid Hildenbrand {
115723baf831SKirill A. Shutemov 	unsigned long order = MAX_ORDER;
11582a628511SDavid Hildenbrand 	unsigned long i;
11595f1f79bbSDavid Hildenbrand 
11605f1f79bbSDavid Hildenbrand 	/*
116157c5a5b3SDavid Hildenbrand 	 * We might get called for ranges that don't cover properly aligned
116223baf831SKirill A. Shutemov 	 * MAX_ORDER pages; however, we can only online properly aligned
116323baf831SKirill A. Shutemov 	 * pages with an order of MAX_ORDER at maximum.
11645f1f79bbSDavid Hildenbrand 	 */
116557c5a5b3SDavid Hildenbrand 	while (!IS_ALIGNED(pfn | nr_pages, 1 << order))
116657c5a5b3SDavid Hildenbrand 		order--;
116757c5a5b3SDavid Hildenbrand 
116857c5a5b3SDavid Hildenbrand 	for (i = 0; i < nr_pages; i += 1 << order) {
1169255f5985SDavid Hildenbrand 		struct page *page = pfn_to_page(pfn + i);
11705f1f79bbSDavid Hildenbrand 
1171255f5985SDavid Hildenbrand 		/*
1172255f5985SDavid Hildenbrand 		 * If the page is PageDirty(), it was kept fake-offline when
1173255f5985SDavid Hildenbrand 		 * onlining the memory block. Otherwise, it was allocated
1174255f5985SDavid Hildenbrand 		 * using alloc_contig_range(). All pages in a subblock are
1175255f5985SDavid Hildenbrand 		 * alike.
1176255f5985SDavid Hildenbrand 		 */
1177255f5985SDavid Hildenbrand 		if (PageDirty(page)) {
117857c5a5b3SDavid Hildenbrand 			virtio_mem_clear_fake_offline(pfn + i, 1 << order, false);
117957c5a5b3SDavid Hildenbrand 			generic_online_page(page, order);
1180255f5985SDavid Hildenbrand 		} else {
118157c5a5b3SDavid Hildenbrand 			virtio_mem_clear_fake_offline(pfn + i, 1 << order, true);
118257c5a5b3SDavid Hildenbrand 			free_contig_range(pfn + i, 1 << order);
118357c5a5b3SDavid Hildenbrand 			adjust_managed_page_count(page, 1 << order);
1184255f5985SDavid Hildenbrand 		}
1185255f5985SDavid Hildenbrand 	}
11865f1f79bbSDavid Hildenbrand }
11875f1f79bbSDavid Hildenbrand 
118889c486c4SDavid Hildenbrand /*
118989c486c4SDavid Hildenbrand  * Try to allocate a range, marking pages fake-offline, effectively
119089c486c4SDavid Hildenbrand  * fake-offlining them.
119189c486c4SDavid Hildenbrand  */
virtio_mem_fake_offline(struct virtio_mem * vm,unsigned long pfn,unsigned long nr_pages)1192*f55484fdSDavid Hildenbrand static int virtio_mem_fake_offline(struct virtio_mem *vm, unsigned long pfn,
1193*f55484fdSDavid Hildenbrand 				   unsigned long nr_pages)
119489c486c4SDavid Hildenbrand {
119507252dfeSKefeng Wang 	const bool is_movable = is_zone_movable_page(pfn_to_page(pfn));
1196f2d799d5SDavid Hildenbrand 	int rc, retry_count;
119789c486c4SDavid Hildenbrand 
1198f2d799d5SDavid Hildenbrand 	/*
1199f2d799d5SDavid Hildenbrand 	 * TODO: We want an alloc_contig_range() mode that tries to allocate
1200f2d799d5SDavid Hildenbrand 	 * harder (e.g., dealing with temporarily pinned pages, PCP), especially
1201f2d799d5SDavid Hildenbrand 	 * with ZONE_MOVABLE. So for now, retry a couple of times with
1202f2d799d5SDavid Hildenbrand 	 * ZONE_MOVABLE before giving up - because that zone is supposed to give
1203f2d799d5SDavid Hildenbrand 	 * some guarantees.
1204f2d799d5SDavid Hildenbrand 	 */
1205f2d799d5SDavid Hildenbrand 	for (retry_count = 0; retry_count < 5; retry_count++) {
1206*f55484fdSDavid Hildenbrand 		/*
1207*f55484fdSDavid Hildenbrand 		 * If the config changed, stop immediately and go back to the
1208*f55484fdSDavid Hildenbrand 		 * main loop: avoid trying to keep unplugging if the device
1209*f55484fdSDavid Hildenbrand 		 * might have decided to not remove any more memory.
1210*f55484fdSDavid Hildenbrand 		 */
1211*f55484fdSDavid Hildenbrand 		if (atomic_read(&vm->config_changed))
1212*f55484fdSDavid Hildenbrand 			return -EAGAIN;
1213*f55484fdSDavid Hildenbrand 
121489c486c4SDavid Hildenbrand 		rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE,
121589c486c4SDavid Hildenbrand 					GFP_KERNEL);
121689c486c4SDavid Hildenbrand 		if (rc == -ENOMEM)
121789c486c4SDavid Hildenbrand 			/* whoops, out of memory */
121889c486c4SDavid Hildenbrand 			return rc;
1219f2d799d5SDavid Hildenbrand 		else if (rc && !is_movable)
1220f2d799d5SDavid Hildenbrand 			break;
1221f2d799d5SDavid Hildenbrand 		else if (rc)
1222f2d799d5SDavid Hildenbrand 			continue;
122389c486c4SDavid Hildenbrand 
122489c486c4SDavid Hildenbrand 		virtio_mem_set_fake_offline(pfn, nr_pages, true);
122589c486c4SDavid Hildenbrand 		adjust_managed_page_count(pfn_to_page(pfn), -nr_pages);
122689c486c4SDavid Hildenbrand 		return 0;
122789c486c4SDavid Hildenbrand 	}
122889c486c4SDavid Hildenbrand 
1229f2d799d5SDavid Hildenbrand 	return -EBUSY;
1230f2d799d5SDavid Hildenbrand }
1231f2d799d5SDavid Hildenbrand 
12327a34c77dSDavid Hildenbrand /*
12337a34c77dSDavid Hildenbrand  * Handle fake-offline pages when memory is going offline - such that the
12347a34c77dSDavid Hildenbrand  * pages can be skipped by mm-core when offlining.
12357a34c77dSDavid Hildenbrand  */
virtio_mem_fake_offline_going_offline(unsigned long pfn,unsigned long nr_pages)12367a34c77dSDavid Hildenbrand static void virtio_mem_fake_offline_going_offline(unsigned long pfn,
12377a34c77dSDavid Hildenbrand 						  unsigned long nr_pages)
12387a34c77dSDavid Hildenbrand {
12397a34c77dSDavid Hildenbrand 	struct page *page;
12407a34c77dSDavid Hildenbrand 	unsigned long i;
12417a34c77dSDavid Hildenbrand 
12427a34c77dSDavid Hildenbrand 	/*
12437a34c77dSDavid Hildenbrand 	 * Drop our reference to the pages so the memory can get offlined
12447a34c77dSDavid Hildenbrand 	 * and add the unplugged pages to the managed page counters (so
12457a34c77dSDavid Hildenbrand 	 * offlining code can correctly subtract them again).
12467a34c77dSDavid Hildenbrand 	 */
12477a34c77dSDavid Hildenbrand 	adjust_managed_page_count(pfn_to_page(pfn), nr_pages);
12487a34c77dSDavid Hildenbrand 	/* Drop our reference to the pages so the memory can get offlined. */
12497a34c77dSDavid Hildenbrand 	for (i = 0; i < nr_pages; i++) {
12507a34c77dSDavid Hildenbrand 		page = pfn_to_page(pfn + i);
12517a34c77dSDavid Hildenbrand 		if (WARN_ON(!page_ref_dec_and_test(page)))
12527a34c77dSDavid Hildenbrand 			dump_page(page, "fake-offline page referenced");
12537a34c77dSDavid Hildenbrand 	}
12547a34c77dSDavid Hildenbrand }
12557a34c77dSDavid Hildenbrand 
12567a34c77dSDavid Hildenbrand /*
12577a34c77dSDavid Hildenbrand  * Handle fake-offline pages when memory offlining is canceled - to undo
12587a34c77dSDavid Hildenbrand  * what we did in virtio_mem_fake_offline_going_offline().
12597a34c77dSDavid Hildenbrand  */
virtio_mem_fake_offline_cancel_offline(unsigned long pfn,unsigned long nr_pages)12607a34c77dSDavid Hildenbrand static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn,
12617a34c77dSDavid Hildenbrand 						   unsigned long nr_pages)
12627a34c77dSDavid Hildenbrand {
12637a34c77dSDavid Hildenbrand 	unsigned long i;
12647a34c77dSDavid Hildenbrand 
12657a34c77dSDavid Hildenbrand 	/*
12667a34c77dSDavid Hildenbrand 	 * Get the reference we dropped when going offline and subtract the
12677a34c77dSDavid Hildenbrand 	 * unplugged pages from the managed page counters.
12687a34c77dSDavid Hildenbrand 	 */
12697a34c77dSDavid Hildenbrand 	adjust_managed_page_count(pfn_to_page(pfn), -nr_pages);
12707a34c77dSDavid Hildenbrand 	for (i = 0; i < nr_pages; i++)
12717a34c77dSDavid Hildenbrand 		page_ref_inc(pfn_to_page(pfn + i));
12727a34c77dSDavid Hildenbrand }
12737a34c77dSDavid Hildenbrand 
virtio_mem_online_page(struct virtio_mem * vm,struct page * page,unsigned int order)12746639032aSDavid Hildenbrand static void virtio_mem_online_page(struct virtio_mem *vm,
12756639032aSDavid Hildenbrand 				   struct page *page, unsigned int order)
12765f1f79bbSDavid Hildenbrand {
12776639032aSDavid Hildenbrand 	const unsigned long start = page_to_phys(page);
12786639032aSDavid Hildenbrand 	const unsigned long end = start + PFN_PHYS(1 << order);
12796639032aSDavid Hildenbrand 	unsigned long addr, next, id, sb_id, count;
12804ba50cd3SDavid Hildenbrand 	bool do_online;
12815f1f79bbSDavid Hildenbrand 
12826639032aSDavid Hildenbrand 	/*
128323baf831SKirill A. Shutemov 	 * We can get called with any order up to MAX_ORDER. If our subblock
128423baf831SKirill A. Shutemov 	 * size is smaller than that and we have a mixture of plugged and
128523baf831SKirill A. Shutemov 	 * unplugged subblocks within such a page, we have to process in
12866639032aSDavid Hildenbrand 	 * smaller granularity. In that case we'll adjust the order exactly once
12876639032aSDavid Hildenbrand 	 * within the loop.
12886639032aSDavid Hildenbrand 	 */
12896639032aSDavid Hildenbrand 	for (addr = start; addr < end; ) {
12906639032aSDavid Hildenbrand 		next = addr + PFN_PHYS(1 << order);
12915f1f79bbSDavid Hildenbrand 
12924ba50cd3SDavid Hildenbrand 		if (vm->in_sbm) {
12934ba50cd3SDavid Hildenbrand 			id = virtio_mem_phys_to_mb_id(addr);
12944ba50cd3SDavid Hildenbrand 			sb_id = virtio_mem_phys_to_sb_id(vm, addr);
12956639032aSDavid Hildenbrand 			count = virtio_mem_phys_to_sb_id(vm, next - 1) - sb_id + 1;
12966639032aSDavid Hildenbrand 
12976639032aSDavid Hildenbrand 			if (virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, count)) {
12986639032aSDavid Hildenbrand 				/* Fully plugged. */
12996639032aSDavid Hildenbrand 				do_online = true;
13006639032aSDavid Hildenbrand 			} else if (count == 1 ||
13016639032aSDavid Hildenbrand 				   virtio_mem_sbm_test_sb_unplugged(vm, id, sb_id, count)) {
13026639032aSDavid Hildenbrand 				/* Fully unplugged. */
13036639032aSDavid Hildenbrand 				do_online = false;
13046639032aSDavid Hildenbrand 			} else {
13056639032aSDavid Hildenbrand 				/*
13066639032aSDavid Hildenbrand 				 * Mixture, process sub-blocks instead. This
13076639032aSDavid Hildenbrand 				 * will be at least the size of a pageblock.
13086639032aSDavid Hildenbrand 				 * We'll run into this case exactly once.
13096639032aSDavid Hildenbrand 				 */
13106639032aSDavid Hildenbrand 				order = ilog2(vm->sbm.sb_size) - PAGE_SHIFT;
13116639032aSDavid Hildenbrand 				do_online = virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, 1);
13126639032aSDavid Hildenbrand 				continue;
13136639032aSDavid Hildenbrand 			}
13144ba50cd3SDavid Hildenbrand 		} else {
13153711387aSDavid Hildenbrand 			/*
13163711387aSDavid Hildenbrand 			 * If the whole block is marked fake offline, keep
13173711387aSDavid Hildenbrand 			 * everything that way.
13183711387aSDavid Hildenbrand 			 */
13193711387aSDavid Hildenbrand 			id = virtio_mem_phys_to_bb_id(vm, addr);
13203711387aSDavid Hildenbrand 			do_online = virtio_mem_bbm_get_bb_state(vm, id) !=
13213711387aSDavid Hildenbrand 				    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE;
13224ba50cd3SDavid Hildenbrand 		}
1323425bec00SDavid Hildenbrand 
13246639032aSDavid Hildenbrand 		if (do_online)
13256639032aSDavid Hildenbrand 			generic_online_page(pfn_to_page(PFN_DOWN(addr)), order);
13266639032aSDavid Hildenbrand 		else
13276639032aSDavid Hildenbrand 			virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order,
13286639032aSDavid Hildenbrand 						    false);
13296639032aSDavid Hildenbrand 		addr = next;
13306639032aSDavid Hildenbrand 	}
13316639032aSDavid Hildenbrand }
13326639032aSDavid Hildenbrand 
virtio_mem_online_page_cb(struct page * page,unsigned int order)13336639032aSDavid Hildenbrand static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
13346639032aSDavid Hildenbrand {
13356639032aSDavid Hildenbrand 	const unsigned long addr = page_to_phys(page);
13366639032aSDavid Hildenbrand 	struct virtio_mem *vm;
13376639032aSDavid Hildenbrand 
13386639032aSDavid Hildenbrand 	rcu_read_lock();
13396639032aSDavid Hildenbrand 	list_for_each_entry_rcu(vm, &virtio_mem_devices, next) {
1340425bec00SDavid Hildenbrand 		/*
13416639032aSDavid Hildenbrand 		 * Pages we're onlining will never cross memory blocks and,
13426639032aSDavid Hildenbrand 		 * therefore, not virtio-mem devices.
13436639032aSDavid Hildenbrand 		 */
13446639032aSDavid Hildenbrand 		if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order)))
13456639032aSDavid Hildenbrand 			continue;
13466639032aSDavid Hildenbrand 
13476639032aSDavid Hildenbrand 		/*
13486639032aSDavid Hildenbrand 		 * virtio_mem_set_fake_offline() might sleep. We can safely
13496639032aSDavid Hildenbrand 		 * drop the RCU lock at this point because the device
13506639032aSDavid Hildenbrand 		 * cannot go away. See virtio_mem_remove() how races
1351425bec00SDavid Hildenbrand 		 * between memory onlining and device removal are handled.
1352425bec00SDavid Hildenbrand 		 */
1353425bec00SDavid Hildenbrand 		rcu_read_unlock();
1354425bec00SDavid Hildenbrand 
13556639032aSDavid Hildenbrand 		virtio_mem_online_page(vm, page, order);
13565f1f79bbSDavid Hildenbrand 		return;
13575f1f79bbSDavid Hildenbrand 	}
13585f1f79bbSDavid Hildenbrand 	rcu_read_unlock();
13595f1f79bbSDavid Hildenbrand 
13605f1f79bbSDavid Hildenbrand 	/* not virtio-mem memory, but e.g., a DIMM. online it */
13615f1f79bbSDavid Hildenbrand 	generic_online_page(page, order);
13625f1f79bbSDavid Hildenbrand }
13635f1f79bbSDavid Hildenbrand 
virtio_mem_send_request(struct virtio_mem * vm,const struct virtio_mem_req * req)13645f1f79bbSDavid Hildenbrand static uint64_t virtio_mem_send_request(struct virtio_mem *vm,
13655f1f79bbSDavid Hildenbrand 					const struct virtio_mem_req *req)
13665f1f79bbSDavid Hildenbrand {
13675f1f79bbSDavid Hildenbrand 	struct scatterlist *sgs[2], sg_req, sg_resp;
13685f1f79bbSDavid Hildenbrand 	unsigned int len;
13695f1f79bbSDavid Hildenbrand 	int rc;
13705f1f79bbSDavid Hildenbrand 
13715f1f79bbSDavid Hildenbrand 	/* don't use the request residing on the stack (vaddr) */
13725f1f79bbSDavid Hildenbrand 	vm->req = *req;
13735f1f79bbSDavid Hildenbrand 
13745f1f79bbSDavid Hildenbrand 	/* out: buffer for request */
13755f1f79bbSDavid Hildenbrand 	sg_init_one(&sg_req, &vm->req, sizeof(vm->req));
13765f1f79bbSDavid Hildenbrand 	sgs[0] = &sg_req;
13775f1f79bbSDavid Hildenbrand 
13785f1f79bbSDavid Hildenbrand 	/* in: buffer for response */
13795f1f79bbSDavid Hildenbrand 	sg_init_one(&sg_resp, &vm->resp, sizeof(vm->resp));
13805f1f79bbSDavid Hildenbrand 	sgs[1] = &sg_resp;
13815f1f79bbSDavid Hildenbrand 
13825f1f79bbSDavid Hildenbrand 	rc = virtqueue_add_sgs(vm->vq, sgs, 1, 1, vm, GFP_KERNEL);
13835f1f79bbSDavid Hildenbrand 	if (rc < 0)
13845f1f79bbSDavid Hildenbrand 		return rc;
13855f1f79bbSDavid Hildenbrand 
13865f1f79bbSDavid Hildenbrand 	virtqueue_kick(vm->vq);
13875f1f79bbSDavid Hildenbrand 
13885f1f79bbSDavid Hildenbrand 	/* wait for a response */
13895f1f79bbSDavid Hildenbrand 	wait_event(vm->host_resp, virtqueue_get_buf(vm->vq, &len));
13905f1f79bbSDavid Hildenbrand 
13915f1f79bbSDavid Hildenbrand 	return virtio16_to_cpu(vm->vdev, vm->resp.type);
13925f1f79bbSDavid Hildenbrand }
13935f1f79bbSDavid Hildenbrand 
virtio_mem_send_plug_request(struct virtio_mem * vm,uint64_t addr,uint64_t size)13945f1f79bbSDavid Hildenbrand static int virtio_mem_send_plug_request(struct virtio_mem *vm, uint64_t addr,
13955f1f79bbSDavid Hildenbrand 					uint64_t size)
13965f1f79bbSDavid Hildenbrand {
13975f1f79bbSDavid Hildenbrand 	const uint64_t nb_vm_blocks = size / vm->device_block_size;
13985f1f79bbSDavid Hildenbrand 	const struct virtio_mem_req req = {
13995f1f79bbSDavid Hildenbrand 		.type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_PLUG),
14005f1f79bbSDavid Hildenbrand 		.u.plug.addr = cpu_to_virtio64(vm->vdev, addr),
14015f1f79bbSDavid Hildenbrand 		.u.plug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks),
14025f1f79bbSDavid Hildenbrand 	};
14036beb3a94SDavid Hildenbrand 	int rc = -ENOMEM;
14045f1f79bbSDavid Hildenbrand 
14055f1f79bbSDavid Hildenbrand 	if (atomic_read(&vm->config_changed))
14065f1f79bbSDavid Hildenbrand 		return -EAGAIN;
14075f1f79bbSDavid Hildenbrand 
14086beb3a94SDavid Hildenbrand 	dev_dbg(&vm->vdev->dev, "plugging memory: 0x%llx - 0x%llx\n", addr,
14096beb3a94SDavid Hildenbrand 		addr + size - 1);
14106beb3a94SDavid Hildenbrand 
14115f1f79bbSDavid Hildenbrand 	switch (virtio_mem_send_request(vm, &req)) {
14125f1f79bbSDavid Hildenbrand 	case VIRTIO_MEM_RESP_ACK:
14135f1f79bbSDavid Hildenbrand 		vm->plugged_size += size;
14145f1f79bbSDavid Hildenbrand 		return 0;
14155f1f79bbSDavid Hildenbrand 	case VIRTIO_MEM_RESP_NACK:
14166beb3a94SDavid Hildenbrand 		rc = -EAGAIN;
14176beb3a94SDavid Hildenbrand 		break;
14185f1f79bbSDavid Hildenbrand 	case VIRTIO_MEM_RESP_BUSY:
14196beb3a94SDavid Hildenbrand 		rc = -ETXTBSY;
14206beb3a94SDavid Hildenbrand 		break;
14215f1f79bbSDavid Hildenbrand 	case VIRTIO_MEM_RESP_ERROR:
14226beb3a94SDavid Hildenbrand 		rc = -EINVAL;
14236beb3a94SDavid Hildenbrand 		break;
14245f1f79bbSDavid Hildenbrand 	default:
14256beb3a94SDavid Hildenbrand 		break;
14265f1f79bbSDavid Hildenbrand 	}
14276beb3a94SDavid Hildenbrand 
14286beb3a94SDavid Hildenbrand 	dev_dbg(&vm->vdev->dev, "plugging memory failed: %d\n", rc);
14296beb3a94SDavid Hildenbrand 	return rc;
14305f1f79bbSDavid Hildenbrand }
14315f1f79bbSDavid Hildenbrand 
virtio_mem_send_unplug_request(struct virtio_mem * vm,uint64_t addr,uint64_t size)14325f1f79bbSDavid Hildenbrand static int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr,
14335f1f79bbSDavid Hildenbrand 					  uint64_t size)
14345f1f79bbSDavid Hildenbrand {
14355f1f79bbSDavid Hildenbrand 	const uint64_t nb_vm_blocks = size / vm->device_block_size;
14365f1f79bbSDavid Hildenbrand 	const struct virtio_mem_req req = {
14375f1f79bbSDavid Hildenbrand 		.type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG),
14385f1f79bbSDavid Hildenbrand 		.u.unplug.addr = cpu_to_virtio64(vm->vdev, addr),
14395f1f79bbSDavid Hildenbrand 		.u.unplug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks),
14405f1f79bbSDavid Hildenbrand 	};
14416beb3a94SDavid Hildenbrand 	int rc = -ENOMEM;
14425f1f79bbSDavid Hildenbrand 
14435f1f79bbSDavid Hildenbrand 	if (atomic_read(&vm->config_changed))
14445f1f79bbSDavid Hildenbrand 		return -EAGAIN;
14455f1f79bbSDavid Hildenbrand 
14466beb3a94SDavid Hildenbrand 	dev_dbg(&vm->vdev->dev, "unplugging memory: 0x%llx - 0x%llx\n", addr,
14476beb3a94SDavid Hildenbrand 		addr + size - 1);
14486beb3a94SDavid Hildenbrand 
14495f1f79bbSDavid Hildenbrand 	switch (virtio_mem_send_request(vm, &req)) {
14505f1f79bbSDavid Hildenbrand 	case VIRTIO_MEM_RESP_ACK:
14515f1f79bbSDavid Hildenbrand 		vm->plugged_size -= size;
14525f1f79bbSDavid Hildenbrand 		return 0;
14535f1f79bbSDavid Hildenbrand 	case VIRTIO_MEM_RESP_BUSY:
14546beb3a94SDavid Hildenbrand 		rc = -ETXTBSY;
14556beb3a94SDavid Hildenbrand 		break;
14565f1f79bbSDavid Hildenbrand 	case VIRTIO_MEM_RESP_ERROR:
14576beb3a94SDavid Hildenbrand 		rc = -EINVAL;
14586beb3a94SDavid Hildenbrand 		break;
14595f1f79bbSDavid Hildenbrand 	default:
14606beb3a94SDavid Hildenbrand 		break;
14615f1f79bbSDavid Hildenbrand 	}
14626beb3a94SDavid Hildenbrand 
14636beb3a94SDavid Hildenbrand 	dev_dbg(&vm->vdev->dev, "unplugging memory failed: %d\n", rc);
14646beb3a94SDavid Hildenbrand 	return rc;
14655f1f79bbSDavid Hildenbrand }
14665f1f79bbSDavid Hildenbrand 
virtio_mem_send_unplug_all_request(struct virtio_mem * vm)14675f1f79bbSDavid Hildenbrand static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm)
14685f1f79bbSDavid Hildenbrand {
14695f1f79bbSDavid Hildenbrand 	const struct virtio_mem_req req = {
14705f1f79bbSDavid Hildenbrand 		.type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG_ALL),
14715f1f79bbSDavid Hildenbrand 	};
14726beb3a94SDavid Hildenbrand 	int rc = -ENOMEM;
14736beb3a94SDavid Hildenbrand 
14746beb3a94SDavid Hildenbrand 	dev_dbg(&vm->vdev->dev, "unplugging all memory");
14755f1f79bbSDavid Hildenbrand 
14765f1f79bbSDavid Hildenbrand 	switch (virtio_mem_send_request(vm, &req)) {
14775f1f79bbSDavid Hildenbrand 	case VIRTIO_MEM_RESP_ACK:
14785f1f79bbSDavid Hildenbrand 		vm->unplug_all_required = false;
14795f1f79bbSDavid Hildenbrand 		vm->plugged_size = 0;
14805f1f79bbSDavid Hildenbrand 		/* usable region might have shrunk */
14815f1f79bbSDavid Hildenbrand 		atomic_set(&vm->config_changed, 1);
14825f1f79bbSDavid Hildenbrand 		return 0;
14835f1f79bbSDavid Hildenbrand 	case VIRTIO_MEM_RESP_BUSY:
14846beb3a94SDavid Hildenbrand 		rc = -ETXTBSY;
14856beb3a94SDavid Hildenbrand 		break;
14865f1f79bbSDavid Hildenbrand 	default:
14876beb3a94SDavid Hildenbrand 		break;
14885f1f79bbSDavid Hildenbrand 	}
14896beb3a94SDavid Hildenbrand 
14906beb3a94SDavid Hildenbrand 	dev_dbg(&vm->vdev->dev, "unplugging all memory failed: %d\n", rc);
14916beb3a94SDavid Hildenbrand 	return rc;
14925f1f79bbSDavid Hildenbrand }
14935f1f79bbSDavid Hildenbrand 
14945f1f79bbSDavid Hildenbrand /*
14955f1f79bbSDavid Hildenbrand  * Plug selected subblocks. Updates the plugged state, but not the state
14965f1f79bbSDavid Hildenbrand  * of the memory block.
14975f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_plug_sb(struct virtio_mem * vm,unsigned long mb_id,int sb_id,int count)1498602ef894SDavid Hildenbrand static int virtio_mem_sbm_plug_sb(struct virtio_mem *vm, unsigned long mb_id,
14995f1f79bbSDavid Hildenbrand 				  int sb_id, int count)
15005f1f79bbSDavid Hildenbrand {
15015f1f79bbSDavid Hildenbrand 	const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) +
1502905c4c51SDavid Hildenbrand 			      sb_id * vm->sbm.sb_size;
1503905c4c51SDavid Hildenbrand 	const uint64_t size = count * vm->sbm.sb_size;
15045f1f79bbSDavid Hildenbrand 	int rc;
15055f1f79bbSDavid Hildenbrand 
15065f1f79bbSDavid Hildenbrand 	rc = virtio_mem_send_plug_request(vm, addr, size);
15075f1f79bbSDavid Hildenbrand 	if (!rc)
150854c6a6baSDavid Hildenbrand 		virtio_mem_sbm_set_sb_plugged(vm, mb_id, sb_id, count);
15095f1f79bbSDavid Hildenbrand 	return rc;
15105f1f79bbSDavid Hildenbrand }
15115f1f79bbSDavid Hildenbrand 
15125f1f79bbSDavid Hildenbrand /*
15135f1f79bbSDavid Hildenbrand  * Unplug selected subblocks. Updates the plugged state, but not the state
15145f1f79bbSDavid Hildenbrand  * of the memory block.
15155f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_unplug_sb(struct virtio_mem * vm,unsigned long mb_id,int sb_id,int count)1516602ef894SDavid Hildenbrand static int virtio_mem_sbm_unplug_sb(struct virtio_mem *vm, unsigned long mb_id,
15175f1f79bbSDavid Hildenbrand 				    int sb_id, int count)
15185f1f79bbSDavid Hildenbrand {
15195f1f79bbSDavid Hildenbrand 	const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) +
1520905c4c51SDavid Hildenbrand 			      sb_id * vm->sbm.sb_size;
1521905c4c51SDavid Hildenbrand 	const uint64_t size = count * vm->sbm.sb_size;
15225f1f79bbSDavid Hildenbrand 	int rc;
15235f1f79bbSDavid Hildenbrand 
15245f1f79bbSDavid Hildenbrand 	rc = virtio_mem_send_unplug_request(vm, addr, size);
15255f1f79bbSDavid Hildenbrand 	if (!rc)
152654c6a6baSDavid Hildenbrand 		virtio_mem_sbm_set_sb_unplugged(vm, mb_id, sb_id, count);
15275f1f79bbSDavid Hildenbrand 	return rc;
15285f1f79bbSDavid Hildenbrand }
15295f1f79bbSDavid Hildenbrand 
15305f1f79bbSDavid Hildenbrand /*
15314ba50cd3SDavid Hildenbrand  * Request to unplug a big block.
15324ba50cd3SDavid Hildenbrand  *
15334ba50cd3SDavid Hildenbrand  * Will not modify the state of the big block.
15344ba50cd3SDavid Hildenbrand  */
virtio_mem_bbm_unplug_bb(struct virtio_mem * vm,unsigned long bb_id)15354ba50cd3SDavid Hildenbrand static int virtio_mem_bbm_unplug_bb(struct virtio_mem *vm, unsigned long bb_id)
15364ba50cd3SDavid Hildenbrand {
15374ba50cd3SDavid Hildenbrand 	const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
15384ba50cd3SDavid Hildenbrand 	const uint64_t size = vm->bbm.bb_size;
15394ba50cd3SDavid Hildenbrand 
15404ba50cd3SDavid Hildenbrand 	return virtio_mem_send_unplug_request(vm, addr, size);
15414ba50cd3SDavid Hildenbrand }
15424ba50cd3SDavid Hildenbrand 
15434ba50cd3SDavid Hildenbrand /*
15444ba50cd3SDavid Hildenbrand  * Request to plug a big block.
15454ba50cd3SDavid Hildenbrand  *
15464ba50cd3SDavid Hildenbrand  * Will not modify the state of the big block.
15474ba50cd3SDavid Hildenbrand  */
virtio_mem_bbm_plug_bb(struct virtio_mem * vm,unsigned long bb_id)15484ba50cd3SDavid Hildenbrand static int virtio_mem_bbm_plug_bb(struct virtio_mem *vm, unsigned long bb_id)
15494ba50cd3SDavid Hildenbrand {
15504ba50cd3SDavid Hildenbrand 	const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
15514ba50cd3SDavid Hildenbrand 	const uint64_t size = vm->bbm.bb_size;
15524ba50cd3SDavid Hildenbrand 
15534ba50cd3SDavid Hildenbrand 	return virtio_mem_send_plug_request(vm, addr, size);
15544ba50cd3SDavid Hildenbrand }
15554ba50cd3SDavid Hildenbrand 
15564ba50cd3SDavid Hildenbrand /*
15575f1f79bbSDavid Hildenbrand  * Unplug the desired number of plugged subblocks of a offline or not-added
15585f1f79bbSDavid Hildenbrand  * memory block. Will fail if any subblock cannot get unplugged (instead of
15595f1f79bbSDavid Hildenbrand  * skipping it).
15605f1f79bbSDavid Hildenbrand  *
15615f1f79bbSDavid Hildenbrand  * Will not modify the state of the memory block.
15625f1f79bbSDavid Hildenbrand  *
15635f1f79bbSDavid Hildenbrand  * Note: can fail after some subblocks were unplugged.
15645f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_unplug_any_sb_raw(struct virtio_mem * vm,unsigned long mb_id,uint64_t * nb_sb)15655304ca3dSDavid Hildenbrand static int virtio_mem_sbm_unplug_any_sb_raw(struct virtio_mem *vm,
15665f1f79bbSDavid Hildenbrand 					    unsigned long mb_id, uint64_t *nb_sb)
15675f1f79bbSDavid Hildenbrand {
15685f1f79bbSDavid Hildenbrand 	int sb_id, count;
15695f1f79bbSDavid Hildenbrand 	int rc;
15705f1f79bbSDavid Hildenbrand 
1571905c4c51SDavid Hildenbrand 	sb_id = vm->sbm.sbs_per_mb - 1;
15725f1f79bbSDavid Hildenbrand 	while (*nb_sb) {
1573562e08cdSDavid Hildenbrand 		/* Find the next candidate subblock */
1574562e08cdSDavid Hildenbrand 		while (sb_id >= 0 &&
157554c6a6baSDavid Hildenbrand 		       virtio_mem_sbm_test_sb_unplugged(vm, mb_id, sb_id, 1))
1576562e08cdSDavid Hildenbrand 			sb_id--;
1577562e08cdSDavid Hildenbrand 		if (sb_id < 0)
15785f1f79bbSDavid Hildenbrand 			break;
1579562e08cdSDavid Hildenbrand 		/* Try to unplug multiple subblocks at a time */
15805f1f79bbSDavid Hildenbrand 		count = 1;
1581562e08cdSDavid Hildenbrand 		while (count < *nb_sb && sb_id > 0 &&
158254c6a6baSDavid Hildenbrand 		       virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) {
15835f1f79bbSDavid Hildenbrand 			count++;
1584562e08cdSDavid Hildenbrand 			sb_id--;
1585562e08cdSDavid Hildenbrand 		}
15865f1f79bbSDavid Hildenbrand 
1587602ef894SDavid Hildenbrand 		rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count);
15885f1f79bbSDavid Hildenbrand 		if (rc)
15895f1f79bbSDavid Hildenbrand 			return rc;
15905f1f79bbSDavid Hildenbrand 		*nb_sb -= count;
1591562e08cdSDavid Hildenbrand 		sb_id--;
15925f1f79bbSDavid Hildenbrand 	}
15935f1f79bbSDavid Hildenbrand 
15945f1f79bbSDavid Hildenbrand 	return 0;
15955f1f79bbSDavid Hildenbrand }
15965f1f79bbSDavid Hildenbrand 
15975f1f79bbSDavid Hildenbrand /*
15985f1f79bbSDavid Hildenbrand  * Unplug all plugged subblocks of an offline or not-added memory block.
15995f1f79bbSDavid Hildenbrand  *
16005f1f79bbSDavid Hildenbrand  * Will not modify the state of the memory block.
16015f1f79bbSDavid Hildenbrand  *
16025f1f79bbSDavid Hildenbrand  * Note: can fail after some subblocks were unplugged.
16035f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_unplug_mb(struct virtio_mem * vm,unsigned long mb_id)1604602ef894SDavid Hildenbrand static int virtio_mem_sbm_unplug_mb(struct virtio_mem *vm, unsigned long mb_id)
16055f1f79bbSDavid Hildenbrand {
1606905c4c51SDavid Hildenbrand 	uint64_t nb_sb = vm->sbm.sbs_per_mb;
16075f1f79bbSDavid Hildenbrand 
16085304ca3dSDavid Hildenbrand 	return virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, &nb_sb);
16095f1f79bbSDavid Hildenbrand }
16105f1f79bbSDavid Hildenbrand 
16115f1f79bbSDavid Hildenbrand /*
16125f1f79bbSDavid Hildenbrand  * Prepare tracking data for the next memory block.
16135f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_prepare_next_mb(struct virtio_mem * vm,unsigned long * mb_id)1614602ef894SDavid Hildenbrand static int virtio_mem_sbm_prepare_next_mb(struct virtio_mem *vm,
16155f1f79bbSDavid Hildenbrand 					  unsigned long *mb_id)
16165f1f79bbSDavid Hildenbrand {
16175f1f79bbSDavid Hildenbrand 	int rc;
16185f1f79bbSDavid Hildenbrand 
16198a6f082bSDavid Hildenbrand 	if (vm->sbm.next_mb_id > vm->sbm.last_usable_mb_id)
16205f1f79bbSDavid Hildenbrand 		return -ENOSPC;
16215f1f79bbSDavid Hildenbrand 
16225f1f79bbSDavid Hildenbrand 	/* Resize the state array if required. */
162399f0b55eSDavid Hildenbrand 	rc = virtio_mem_sbm_mb_states_prepare_next_mb(vm);
16245f1f79bbSDavid Hildenbrand 	if (rc)
16255f1f79bbSDavid Hildenbrand 		return rc;
16265f1f79bbSDavid Hildenbrand 
16275f1f79bbSDavid Hildenbrand 	/* Resize the subblock bitmap if required. */
162854c6a6baSDavid Hildenbrand 	rc = virtio_mem_sbm_sb_states_prepare_next_mb(vm);
16295f1f79bbSDavid Hildenbrand 	if (rc)
16305f1f79bbSDavid Hildenbrand 		return rc;
16315f1f79bbSDavid Hildenbrand 
163299f0b55eSDavid Hildenbrand 	vm->sbm.mb_count[VIRTIO_MEM_SBM_MB_UNUSED]++;
16338a6f082bSDavid Hildenbrand 	*mb_id = vm->sbm.next_mb_id++;
16345f1f79bbSDavid Hildenbrand 	return 0;
16355f1f79bbSDavid Hildenbrand }
16365f1f79bbSDavid Hildenbrand 
16375f1f79bbSDavid Hildenbrand /*
16385f1f79bbSDavid Hildenbrand  * Try to plug the desired number of subblocks and add the memory block
16395f1f79bbSDavid Hildenbrand  * to Linux.
16405f1f79bbSDavid Hildenbrand  *
16415f1f79bbSDavid Hildenbrand  * Will modify the state of the memory block.
16425f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_plug_and_add_mb(struct virtio_mem * vm,unsigned long mb_id,uint64_t * nb_sb)1643602ef894SDavid Hildenbrand static int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm,
1644602ef894SDavid Hildenbrand 					  unsigned long mb_id, uint64_t *nb_sb)
16455f1f79bbSDavid Hildenbrand {
1646905c4c51SDavid Hildenbrand 	const int count = min_t(int, *nb_sb, vm->sbm.sbs_per_mb);
1647d76944f8SDavid Hildenbrand 	int rc;
16485f1f79bbSDavid Hildenbrand 
16495f1f79bbSDavid Hildenbrand 	if (WARN_ON_ONCE(!count))
16505f1f79bbSDavid Hildenbrand 		return -EINVAL;
16515f1f79bbSDavid Hildenbrand 
16525f1f79bbSDavid Hildenbrand 	/*
16535f1f79bbSDavid Hildenbrand 	 * Plug the requested number of subblocks before adding it to linux,
16545f1f79bbSDavid Hildenbrand 	 * so that onlining will directly online all plugged subblocks.
16555f1f79bbSDavid Hildenbrand 	 */
1656602ef894SDavid Hildenbrand 	rc = virtio_mem_sbm_plug_sb(vm, mb_id, 0, count);
16575f1f79bbSDavid Hildenbrand 	if (rc)
16585f1f79bbSDavid Hildenbrand 		return rc;
16595f1f79bbSDavid Hildenbrand 
16605f1f79bbSDavid Hildenbrand 	/*
16615f1f79bbSDavid Hildenbrand 	 * Mark the block properly offline before adding it to Linux,
16625f1f79bbSDavid Hildenbrand 	 * so the memory notifiers will find the block in the right state.
16635f1f79bbSDavid Hildenbrand 	 */
1664905c4c51SDavid Hildenbrand 	if (count == vm->sbm.sbs_per_mb)
166599f0b55eSDavid Hildenbrand 		virtio_mem_sbm_set_mb_state(vm, mb_id,
166699f0b55eSDavid Hildenbrand 					    VIRTIO_MEM_SBM_MB_OFFLINE);
16675f1f79bbSDavid Hildenbrand 	else
166899f0b55eSDavid Hildenbrand 		virtio_mem_sbm_set_mb_state(vm, mb_id,
166999f0b55eSDavid Hildenbrand 					    VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL);
16705f1f79bbSDavid Hildenbrand 
16715f1f79bbSDavid Hildenbrand 	/* Add the memory block to linux - if that fails, try to unplug. */
167201afdee2SDavid Hildenbrand 	rc = virtio_mem_sbm_add_mb(vm, mb_id);
16735f1f79bbSDavid Hildenbrand 	if (rc) {
167499f0b55eSDavid Hildenbrand 		int new_state = VIRTIO_MEM_SBM_MB_UNUSED;
16755f1f79bbSDavid Hildenbrand 
1676602ef894SDavid Hildenbrand 		if (virtio_mem_sbm_unplug_sb(vm, mb_id, 0, count))
167799f0b55eSDavid Hildenbrand 			new_state = VIRTIO_MEM_SBM_MB_PLUGGED;
167899f0b55eSDavid Hildenbrand 		virtio_mem_sbm_set_mb_state(vm, mb_id, new_state);
16795f1f79bbSDavid Hildenbrand 		return rc;
16805f1f79bbSDavid Hildenbrand 	}
16815f1f79bbSDavid Hildenbrand 
16825f1f79bbSDavid Hildenbrand 	*nb_sb -= count;
16835f1f79bbSDavid Hildenbrand 	return 0;
16845f1f79bbSDavid Hildenbrand }
16855f1f79bbSDavid Hildenbrand 
16865f1f79bbSDavid Hildenbrand /*
16875f1f79bbSDavid Hildenbrand  * Try to plug the desired number of subblocks of a memory block that
16885f1f79bbSDavid Hildenbrand  * is already added to Linux.
16895f1f79bbSDavid Hildenbrand  *
16905f1f79bbSDavid Hildenbrand  * Will modify the state of the memory block.
16915f1f79bbSDavid Hildenbrand  *
16925f1f79bbSDavid Hildenbrand  * Note: Can fail after some subblocks were successfully plugged.
16935f1f79bbSDavid Hildenbrand  */
virtio_mem_sbm_plug_any_sb(struct virtio_mem * vm,unsigned long mb_id,uint64_t * nb_sb)1694602ef894SDavid Hildenbrand static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm,
1695f4cf803dSDavid Hildenbrand 				      unsigned long mb_id, uint64_t *nb_sb)
16965f1f79bbSDavid Hildenbrand {
1697f4cf803dSDavid Hildenbrand 	const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id);
16985f1f79bbSDavid Hildenbrand 	unsigned long pfn, nr_pages;
16995f1f79bbSDavid Hildenbrand 	int sb_id, count;
17005f1f79bbSDavid Hildenbrand 	int rc;
17015f1f79bbSDavid Hildenbrand 
17025f1f79bbSDavid Hildenbrand 	if (WARN_ON_ONCE(!*nb_sb))
17035f1f79bbSDavid Hildenbrand 		return -EINVAL;
17045f1f79bbSDavid Hildenbrand 
17055f1f79bbSDavid Hildenbrand 	while (*nb_sb) {
170654c6a6baSDavid Hildenbrand 		sb_id = virtio_mem_sbm_first_unplugged_sb(vm, mb_id);
1707905c4c51SDavid Hildenbrand 		if (sb_id >= vm->sbm.sbs_per_mb)
17085f1f79bbSDavid Hildenbrand 			break;
17095f1f79bbSDavid Hildenbrand 		count = 1;
17105f1f79bbSDavid Hildenbrand 		while (count < *nb_sb &&
1711905c4c51SDavid Hildenbrand 		       sb_id + count < vm->sbm.sbs_per_mb &&
171254c6a6baSDavid Hildenbrand 		       !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1))
17135f1f79bbSDavid Hildenbrand 			count++;
17145f1f79bbSDavid Hildenbrand 
1715602ef894SDavid Hildenbrand 		rc = virtio_mem_sbm_plug_sb(vm, mb_id, sb_id, count);
17165f1f79bbSDavid Hildenbrand 		if (rc)
17175f1f79bbSDavid Hildenbrand 			return rc;
17185f1f79bbSDavid Hildenbrand 		*nb_sb -= count;
1719f4cf803dSDavid Hildenbrand 		if (old_state == VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL)
17205f1f79bbSDavid Hildenbrand 			continue;
17215f1f79bbSDavid Hildenbrand 
17225f1f79bbSDavid Hildenbrand 		/* fake-online the pages if the memory block is online */
17235f1f79bbSDavid Hildenbrand 		pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
1724905c4c51SDavid Hildenbrand 			       sb_id * vm->sbm.sb_size);
1725905c4c51SDavid Hildenbrand 		nr_pages = PFN_DOWN(count * vm->sbm.sb_size);
17265f1f79bbSDavid Hildenbrand 		virtio_mem_fake_online(pfn, nr_pages);
17275f1f79bbSDavid Hildenbrand 	}
17285f1f79bbSDavid Hildenbrand 
1729f4cf803dSDavid Hildenbrand 	if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
1730f4cf803dSDavid Hildenbrand 		virtio_mem_sbm_set_mb_state(vm, mb_id, old_state - 1);
17315f1f79bbSDavid Hildenbrand 
17321c3d69abSDan Carpenter 	return 0;
17335f1f79bbSDavid Hildenbrand }
17345f1f79bbSDavid Hildenbrand 
virtio_mem_sbm_plug_request(struct virtio_mem * vm,uint64_t diff)17354ba50cd3SDavid Hildenbrand static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff)
17365f1f79bbSDavid Hildenbrand {
1737f4cf803dSDavid Hildenbrand 	const int mb_states[] = {
1738c740bb97SDavid Hildenbrand 		VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL,
1739c740bb97SDavid Hildenbrand 		VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL,
1740f4cf803dSDavid Hildenbrand 		VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL,
1741f4cf803dSDavid Hildenbrand 	};
1742905c4c51SDavid Hildenbrand 	uint64_t nb_sb = diff / vm->sbm.sb_size;
17435f1f79bbSDavid Hildenbrand 	unsigned long mb_id;
1744f4cf803dSDavid Hildenbrand 	int rc, i;
17455f1f79bbSDavid Hildenbrand 
17465f1f79bbSDavid Hildenbrand 	if (!nb_sb)
17475f1f79bbSDavid Hildenbrand 		return 0;
17485f1f79bbSDavid Hildenbrand 
17495f1f79bbSDavid Hildenbrand 	/* Don't race with onlining/offlining */
17505f1f79bbSDavid Hildenbrand 	mutex_lock(&vm->hotplug_mutex);
17515f1f79bbSDavid Hildenbrand 
1752f4cf803dSDavid Hildenbrand 	for (i = 0; i < ARRAY_SIZE(mb_states); i++) {
1753f4cf803dSDavid Hildenbrand 		virtio_mem_sbm_for_each_mb(vm, mb_id, mb_states[i]) {
1754f4cf803dSDavid Hildenbrand 			rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb);
17555f1f79bbSDavid Hildenbrand 			if (rc || !nb_sb)
17565f1f79bbSDavid Hildenbrand 				goto out_unlock;
17575f1f79bbSDavid Hildenbrand 			cond_resched();
17585f1f79bbSDavid Hildenbrand 		}
17595f1f79bbSDavid Hildenbrand 	}
17605f1f79bbSDavid Hildenbrand 
17615f1f79bbSDavid Hildenbrand 	/*
17625f1f79bbSDavid Hildenbrand 	 * We won't be working on online/offline memory blocks from this point,
17635f1f79bbSDavid Hildenbrand 	 * so we can't race with memory onlining/offlining. Drop the mutex.
17645f1f79bbSDavid Hildenbrand 	 */
17655f1f79bbSDavid Hildenbrand 	mutex_unlock(&vm->hotplug_mutex);
17665f1f79bbSDavid Hildenbrand 
17675f1f79bbSDavid Hildenbrand 	/* Try to plug and add unused blocks */
176899f0b55eSDavid Hildenbrand 	virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_UNUSED) {
176998ff9f94SDavid Hildenbrand 		if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes()))
17705f1f79bbSDavid Hildenbrand 			return -ENOSPC;
17715f1f79bbSDavid Hildenbrand 
1772602ef894SDavid Hildenbrand 		rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb);
17735f1f79bbSDavid Hildenbrand 		if (rc || !nb_sb)
17745f1f79bbSDavid Hildenbrand 			return rc;
17755f1f79bbSDavid Hildenbrand 		cond_resched();
17765f1f79bbSDavid Hildenbrand 	}
17775f1f79bbSDavid Hildenbrand 
17785f1f79bbSDavid Hildenbrand 	/* Try to prepare, plug and add new blocks */
17795f1f79bbSDavid Hildenbrand 	while (nb_sb) {
178098ff9f94SDavid Hildenbrand 		if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes()))
17815f1f79bbSDavid Hildenbrand 			return -ENOSPC;
17825f1f79bbSDavid Hildenbrand 
1783602ef894SDavid Hildenbrand 		rc = virtio_mem_sbm_prepare_next_mb(vm, &mb_id);
17845f1f79bbSDavid Hildenbrand 		if (rc)
17855f1f79bbSDavid Hildenbrand 			return rc;
1786602ef894SDavid Hildenbrand 		rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb);
17875f1f79bbSDavid Hildenbrand 		if (rc)
17885f1f79bbSDavid Hildenbrand 			return rc;
17895f1f79bbSDavid Hildenbrand 		cond_resched();
17905f1f79bbSDavid Hildenbrand 	}
17915f1f79bbSDavid Hildenbrand 
17925f1f79bbSDavid Hildenbrand 	return 0;
17935f1f79bbSDavid Hildenbrand out_unlock:
17945f1f79bbSDavid Hildenbrand 	mutex_unlock(&vm->hotplug_mutex);
17955f1f79bbSDavid Hildenbrand 	return rc;
17965f1f79bbSDavid Hildenbrand }
17975f1f79bbSDavid Hildenbrand 
17985f1f79bbSDavid Hildenbrand /*
17994ba50cd3SDavid Hildenbrand  * Plug a big block and add it to Linux.
18004ba50cd3SDavid Hildenbrand  *
18014ba50cd3SDavid Hildenbrand  * Will modify the state of the big block.
18024ba50cd3SDavid Hildenbrand  */
virtio_mem_bbm_plug_and_add_bb(struct virtio_mem * vm,unsigned long bb_id)18034ba50cd3SDavid Hildenbrand static int virtio_mem_bbm_plug_and_add_bb(struct virtio_mem *vm,
18044ba50cd3SDavid Hildenbrand 					  unsigned long bb_id)
18054ba50cd3SDavid Hildenbrand {
18064ba50cd3SDavid Hildenbrand 	int rc;
18074ba50cd3SDavid Hildenbrand 
18084ba50cd3SDavid Hildenbrand 	if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
18094ba50cd3SDavid Hildenbrand 			 VIRTIO_MEM_BBM_BB_UNUSED))
18104ba50cd3SDavid Hildenbrand 		return -EINVAL;
18114ba50cd3SDavid Hildenbrand 
18124ba50cd3SDavid Hildenbrand 	rc = virtio_mem_bbm_plug_bb(vm, bb_id);
18134ba50cd3SDavid Hildenbrand 	if (rc)
18144ba50cd3SDavid Hildenbrand 		return rc;
18154ba50cd3SDavid Hildenbrand 	virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED);
18164ba50cd3SDavid Hildenbrand 
18174ba50cd3SDavid Hildenbrand 	rc = virtio_mem_bbm_add_bb(vm, bb_id);
18184ba50cd3SDavid Hildenbrand 	if (rc) {
18194ba50cd3SDavid Hildenbrand 		if (!virtio_mem_bbm_unplug_bb(vm, bb_id))
18204ba50cd3SDavid Hildenbrand 			virtio_mem_bbm_set_bb_state(vm, bb_id,
18214ba50cd3SDavid Hildenbrand 						    VIRTIO_MEM_BBM_BB_UNUSED);
18224ba50cd3SDavid Hildenbrand 		else
18234ba50cd3SDavid Hildenbrand 			/* Retry from the main loop. */
18244ba50cd3SDavid Hildenbrand 			virtio_mem_bbm_set_bb_state(vm, bb_id,
18254ba50cd3SDavid Hildenbrand 						    VIRTIO_MEM_BBM_BB_PLUGGED);
18264ba50cd3SDavid Hildenbrand 		return rc;
18274ba50cd3SDavid Hildenbrand 	}
18284ba50cd3SDavid Hildenbrand 	return 0;
18294ba50cd3SDavid Hildenbrand }
18304ba50cd3SDavid Hildenbrand 
18314ba50cd3SDavid Hildenbrand /*
18324ba50cd3SDavid Hildenbrand  * Prepare tracking data for the next big block.
18334ba50cd3SDavid Hildenbrand  */
virtio_mem_bbm_prepare_next_bb(struct virtio_mem * vm,unsigned long * bb_id)18344ba50cd3SDavid Hildenbrand static int virtio_mem_bbm_prepare_next_bb(struct virtio_mem *vm,
18354ba50cd3SDavid Hildenbrand 					  unsigned long *bb_id)
18364ba50cd3SDavid Hildenbrand {
18374ba50cd3SDavid Hildenbrand 	int rc;
18384ba50cd3SDavid Hildenbrand 
18394ba50cd3SDavid Hildenbrand 	if (vm->bbm.next_bb_id > vm->bbm.last_usable_bb_id)
18404ba50cd3SDavid Hildenbrand 		return -ENOSPC;
18414ba50cd3SDavid Hildenbrand 
18424ba50cd3SDavid Hildenbrand 	/* Resize the big block state array if required. */
18434ba50cd3SDavid Hildenbrand 	rc = virtio_mem_bbm_bb_states_prepare_next_bb(vm);
18444ba50cd3SDavid Hildenbrand 	if (rc)
18454ba50cd3SDavid Hildenbrand 		return rc;
18464ba50cd3SDavid Hildenbrand 
18474ba50cd3SDavid Hildenbrand 	vm->bbm.bb_count[VIRTIO_MEM_BBM_BB_UNUSED]++;
18484ba50cd3SDavid Hildenbrand 	*bb_id = vm->bbm.next_bb_id;
18494ba50cd3SDavid Hildenbrand 	vm->bbm.next_bb_id++;
18504ba50cd3SDavid Hildenbrand 	return 0;
18514ba50cd3SDavid Hildenbrand }
18524ba50cd3SDavid Hildenbrand 
virtio_mem_bbm_plug_request(struct virtio_mem * vm,uint64_t diff)18534ba50cd3SDavid Hildenbrand static int virtio_mem_bbm_plug_request(struct virtio_mem *vm, uint64_t diff)
18544ba50cd3SDavid Hildenbrand {
18554ba50cd3SDavid Hildenbrand 	uint64_t nb_bb = diff / vm->bbm.bb_size;
18564ba50cd3SDavid Hildenbrand 	unsigned long bb_id;
18574ba50cd3SDavid Hildenbrand 	int rc;
18584ba50cd3SDavid Hildenbrand 
18594ba50cd3SDavid Hildenbrand 	if (!nb_bb)
18604ba50cd3SDavid Hildenbrand 		return 0;
18614ba50cd3SDavid Hildenbrand 
18624ba50cd3SDavid Hildenbrand 	/* Try to plug and add unused big blocks */
18634ba50cd3SDavid Hildenbrand 	virtio_mem_bbm_for_each_bb(vm, bb_id, VIRTIO_MEM_BBM_BB_UNUSED) {
18644ba50cd3SDavid Hildenbrand 		if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size))
18654ba50cd3SDavid Hildenbrand 			return -ENOSPC;
18664ba50cd3SDavid Hildenbrand 
18674ba50cd3SDavid Hildenbrand 		rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id);
18684ba50cd3SDavid Hildenbrand 		if (!rc)
18694ba50cd3SDavid Hildenbrand 			nb_bb--;
18704ba50cd3SDavid Hildenbrand 		if (rc || !nb_bb)
18714ba50cd3SDavid Hildenbrand 			return rc;
18724ba50cd3SDavid Hildenbrand 		cond_resched();
18734ba50cd3SDavid Hildenbrand 	}
18744ba50cd3SDavid Hildenbrand 
18754ba50cd3SDavid Hildenbrand 	/* Try to prepare, plug and add new big blocks */
18764ba50cd3SDavid Hildenbrand 	while (nb_bb) {
18774ba50cd3SDavid Hildenbrand 		if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size))
18784ba50cd3SDavid Hildenbrand 			return -ENOSPC;
18794ba50cd3SDavid Hildenbrand 
18804ba50cd3SDavid Hildenbrand 		rc = virtio_mem_bbm_prepare_next_bb(vm, &bb_id);
18814ba50cd3SDavid Hildenbrand 		if (rc)
18824ba50cd3SDavid Hildenbrand 			return rc;
18834ba50cd3SDavid Hildenbrand 		rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id);
18844ba50cd3SDavid Hildenbrand 		if (!rc)
18854ba50cd3SDavid Hildenbrand 			nb_bb--;
18864ba50cd3SDavid Hildenbrand 		if (rc)
18874ba50cd3SDavid Hildenbrand 			return rc;
18884ba50cd3SDavid Hildenbrand 		cond_resched();
18894ba50cd3SDavid Hildenbrand 	}
18904ba50cd3SDavid Hildenbrand 
18914ba50cd3SDavid Hildenbrand 	return 0;
18924ba50cd3SDavid Hildenbrand }
18934ba50cd3SDavid Hildenbrand 
18944ba50cd3SDavid Hildenbrand /*
18954ba50cd3SDavid Hildenbrand  * Try to plug the requested amount of memory.
18964ba50cd3SDavid Hildenbrand  */
virtio_mem_plug_request(struct virtio_mem * vm,uint64_t diff)18974ba50cd3SDavid Hildenbrand static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff)
18984ba50cd3SDavid Hildenbrand {
18994ba50cd3SDavid Hildenbrand 	if (vm->in_sbm)
19004ba50cd3SDavid Hildenbrand 		return virtio_mem_sbm_plug_request(vm, diff);
19014ba50cd3SDavid Hildenbrand 	return virtio_mem_bbm_plug_request(vm, diff);
19024ba50cd3SDavid Hildenbrand }
19034ba50cd3SDavid Hildenbrand 
19044ba50cd3SDavid Hildenbrand /*
1905c627ff5dSDavid Hildenbrand  * Unplug the desired number of plugged subblocks of an offline memory block.
1906c627ff5dSDavid Hildenbrand  * Will fail if any subblock cannot get unplugged (instead of skipping it).
1907c627ff5dSDavid Hildenbrand  *
1908c627ff5dSDavid Hildenbrand  * Will modify the state of the memory block. Might temporarily drop the
1909c627ff5dSDavid Hildenbrand  * hotplug_mutex.
1910c627ff5dSDavid Hildenbrand  *
1911c627ff5dSDavid Hildenbrand  * Note: Can fail after some subblocks were successfully unplugged.
1912c627ff5dSDavid Hildenbrand  */
virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem * vm,unsigned long mb_id,uint64_t * nb_sb)1913602ef894SDavid Hildenbrand static int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm,
1914c627ff5dSDavid Hildenbrand 						unsigned long mb_id,
1915c627ff5dSDavid Hildenbrand 						uint64_t *nb_sb)
1916c627ff5dSDavid Hildenbrand {
1917c627ff5dSDavid Hildenbrand 	int rc;
1918c627ff5dSDavid Hildenbrand 
19195304ca3dSDavid Hildenbrand 	rc = virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, nb_sb);
1920c627ff5dSDavid Hildenbrand 
1921c627ff5dSDavid Hildenbrand 	/* some subblocks might have been unplugged even on failure */
1922905c4c51SDavid Hildenbrand 	if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
192399f0b55eSDavid Hildenbrand 		virtio_mem_sbm_set_mb_state(vm, mb_id,
192499f0b55eSDavid Hildenbrand 					    VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL);
1925c627ff5dSDavid Hildenbrand 	if (rc)
1926c627ff5dSDavid Hildenbrand 		return rc;
1927c627ff5dSDavid Hildenbrand 
1928905c4c51SDavid Hildenbrand 	if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
1929c627ff5dSDavid Hildenbrand 		/*
1930c627ff5dSDavid Hildenbrand 		 * Remove the block from Linux - this should never fail.
1931c627ff5dSDavid Hildenbrand 		 * Hinder the block from getting onlined by marking it
1932c627ff5dSDavid Hildenbrand 		 * unplugged. Temporarily drop the mutex, so
1933c627ff5dSDavid Hildenbrand 		 * any pending GOING_ONLINE requests can be serviced/rejected.
1934c627ff5dSDavid Hildenbrand 		 */
193599f0b55eSDavid Hildenbrand 		virtio_mem_sbm_set_mb_state(vm, mb_id,
193699f0b55eSDavid Hildenbrand 					    VIRTIO_MEM_SBM_MB_UNUSED);
1937c627ff5dSDavid Hildenbrand 
1938c627ff5dSDavid Hildenbrand 		mutex_unlock(&vm->hotplug_mutex);
193901afdee2SDavid Hildenbrand 		rc = virtio_mem_sbm_remove_mb(vm, mb_id);
1940c627ff5dSDavid Hildenbrand 		BUG_ON(rc);
1941c627ff5dSDavid Hildenbrand 		mutex_lock(&vm->hotplug_mutex);
1942c627ff5dSDavid Hildenbrand 	}
1943c627ff5dSDavid Hildenbrand 	return 0;
1944c627ff5dSDavid Hildenbrand }
1945c627ff5dSDavid Hildenbrand 
1946c627ff5dSDavid Hildenbrand /*
194772f9525aSDavid Hildenbrand  * Unplug the given plugged subblocks of an online memory block.
194872f9525aSDavid Hildenbrand  *
194972f9525aSDavid Hildenbrand  * Will modify the state of the memory block.
195072f9525aSDavid Hildenbrand  */
virtio_mem_sbm_unplug_sb_online(struct virtio_mem * vm,unsigned long mb_id,int sb_id,int count)1951602ef894SDavid Hildenbrand static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm,
195272f9525aSDavid Hildenbrand 					   unsigned long mb_id, int sb_id,
195372f9525aSDavid Hildenbrand 					   int count)
195472f9525aSDavid Hildenbrand {
1955905c4c51SDavid Hildenbrand 	const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count;
1956c740bb97SDavid Hildenbrand 	const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id);
195772f9525aSDavid Hildenbrand 	unsigned long start_pfn;
195872f9525aSDavid Hildenbrand 	int rc;
195972f9525aSDavid Hildenbrand 
196072f9525aSDavid Hildenbrand 	start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
1961905c4c51SDavid Hildenbrand 			     sb_id * vm->sbm.sb_size);
196272f9525aSDavid Hildenbrand 
1963*f55484fdSDavid Hildenbrand 	rc = virtio_mem_fake_offline(vm, start_pfn, nr_pages);
196489c486c4SDavid Hildenbrand 	if (rc)
196589c486c4SDavid Hildenbrand 		return rc;
196672f9525aSDavid Hildenbrand 
196772f9525aSDavid Hildenbrand 	/* Try to unplug the allocated memory */
1968602ef894SDavid Hildenbrand 	rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count);
196972f9525aSDavid Hildenbrand 	if (rc) {
197072f9525aSDavid Hildenbrand 		/* Return the memory to the buddy. */
197172f9525aSDavid Hildenbrand 		virtio_mem_fake_online(start_pfn, nr_pages);
197272f9525aSDavid Hildenbrand 		return rc;
197372f9525aSDavid Hildenbrand 	}
197472f9525aSDavid Hildenbrand 
1975c740bb97SDavid Hildenbrand 	switch (old_state) {
1976c740bb97SDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_KERNEL:
197799f0b55eSDavid Hildenbrand 		virtio_mem_sbm_set_mb_state(vm, mb_id,
1978c740bb97SDavid Hildenbrand 					    VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL);
1979c740bb97SDavid Hildenbrand 		break;
1980c740bb97SDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_MOVABLE:
1981c740bb97SDavid Hildenbrand 		virtio_mem_sbm_set_mb_state(vm, mb_id,
1982c740bb97SDavid Hildenbrand 					    VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL);
1983c740bb97SDavid Hildenbrand 		break;
1984c740bb97SDavid Hildenbrand 	}
1985c740bb97SDavid Hildenbrand 
198672f9525aSDavid Hildenbrand 	return 0;
198772f9525aSDavid Hildenbrand }
198872f9525aSDavid Hildenbrand 
198972f9525aSDavid Hildenbrand /*
1990255f5985SDavid Hildenbrand  * Unplug the desired number of plugged subblocks of an online memory block.
1991255f5985SDavid Hildenbrand  * Will skip subblock that are busy.
1992255f5985SDavid Hildenbrand  *
1993a5732387SDavid Hildenbrand  * Will modify the state of the memory block. Might temporarily drop the
1994a5732387SDavid Hildenbrand  * hotplug_mutex.
1995255f5985SDavid Hildenbrand  *
1996255f5985SDavid Hildenbrand  * Note: Can fail after some subblocks were successfully unplugged. Can
1997255f5985SDavid Hildenbrand  *       return 0 even if subblocks were busy and could not get unplugged.
1998255f5985SDavid Hildenbrand  */
virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem * vm,unsigned long mb_id,uint64_t * nb_sb)1999602ef894SDavid Hildenbrand static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm,
2000255f5985SDavid Hildenbrand 					       unsigned long mb_id,
2001255f5985SDavid Hildenbrand 					       uint64_t *nb_sb)
2002255f5985SDavid Hildenbrand {
2003255f5985SDavid Hildenbrand 	int rc, sb_id;
2004255f5985SDavid Hildenbrand 
200572f9525aSDavid Hildenbrand 	/* If possible, try to unplug the complete block in one shot. */
2006905c4c51SDavid Hildenbrand 	if (*nb_sb >= vm->sbm.sbs_per_mb &&
2007905c4c51SDavid Hildenbrand 	    virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
2008602ef894SDavid Hildenbrand 		rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, 0,
2009905c4c51SDavid Hildenbrand 						     vm->sbm.sbs_per_mb);
201072f9525aSDavid Hildenbrand 		if (!rc) {
2011905c4c51SDavid Hildenbrand 			*nb_sb -= vm->sbm.sbs_per_mb;
201272f9525aSDavid Hildenbrand 			goto unplugged;
201372f9525aSDavid Hildenbrand 		} else if (rc != -EBUSY)
201472f9525aSDavid Hildenbrand 			return rc;
201572f9525aSDavid Hildenbrand 	}
201672f9525aSDavid Hildenbrand 
201772f9525aSDavid Hildenbrand 	/* Fallback to single subblocks. */
2018905c4c51SDavid Hildenbrand 	for (sb_id = vm->sbm.sbs_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) {
2019255f5985SDavid Hildenbrand 		/* Find the next candidate subblock */
2020562e08cdSDavid Hildenbrand 		while (sb_id >= 0 &&
202154c6a6baSDavid Hildenbrand 		       !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1))
2022562e08cdSDavid Hildenbrand 			sb_id--;
2023562e08cdSDavid Hildenbrand 		if (sb_id < 0)
2024255f5985SDavid Hildenbrand 			break;
2025255f5985SDavid Hildenbrand 
2026602ef894SDavid Hildenbrand 		rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, sb_id, 1);
202772f9525aSDavid Hildenbrand 		if (rc == -EBUSY)
2028255f5985SDavid Hildenbrand 			continue;
202972f9525aSDavid Hildenbrand 		else if (rc)
2030255f5985SDavid Hildenbrand 			return rc;
2031255f5985SDavid Hildenbrand 		*nb_sb -= 1;
2032255f5985SDavid Hildenbrand 	}
2033255f5985SDavid Hildenbrand 
203472f9525aSDavid Hildenbrand unplugged:
2035a31648fdSDavid Hildenbrand 	rc = virtio_mem_sbm_try_remove_unplugged_mb(vm, mb_id);
2036a31648fdSDavid Hildenbrand 	if (rc)
2037a31648fdSDavid Hildenbrand 		vm->sbm.have_unplugged_mb = 1;
2038a31648fdSDavid Hildenbrand 	/* Ignore errors, this is not critical. We'll retry later. */
2039255f5985SDavid Hildenbrand 	return 0;
2040255f5985SDavid Hildenbrand }
2041255f5985SDavid Hildenbrand 
20425304ca3dSDavid Hildenbrand /*
20435304ca3dSDavid Hildenbrand  * Unplug the desired number of plugged subblocks of a memory block that is
20445304ca3dSDavid Hildenbrand  * already added to Linux. Will skip subblock of online memory blocks that are
20455304ca3dSDavid Hildenbrand  * busy (by the OS). Will fail if any subblock that's not busy cannot get
20465304ca3dSDavid Hildenbrand  * unplugged.
20475304ca3dSDavid Hildenbrand  *
20485304ca3dSDavid Hildenbrand  * Will modify the state of the memory block. Might temporarily drop the
20495304ca3dSDavid Hildenbrand  * hotplug_mutex.
20505304ca3dSDavid Hildenbrand  *
20515304ca3dSDavid Hildenbrand  * Note: Can fail after some subblocks were successfully unplugged. Can
20525304ca3dSDavid Hildenbrand  *       return 0 even if subblocks were busy and could not get unplugged.
20535304ca3dSDavid Hildenbrand  */
virtio_mem_sbm_unplug_any_sb(struct virtio_mem * vm,unsigned long mb_id,uint64_t * nb_sb)20545304ca3dSDavid Hildenbrand static int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm,
20555304ca3dSDavid Hildenbrand 					unsigned long mb_id,
20565304ca3dSDavid Hildenbrand 					uint64_t *nb_sb)
20575304ca3dSDavid Hildenbrand {
20585304ca3dSDavid Hildenbrand 	const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id);
20595304ca3dSDavid Hildenbrand 
20605304ca3dSDavid Hildenbrand 	switch (old_state) {
2061c740bb97SDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL:
2062c740bb97SDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_KERNEL:
2063c740bb97SDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL:
2064c740bb97SDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_MOVABLE:
20655304ca3dSDavid Hildenbrand 		return virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, nb_sb);
20665304ca3dSDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL:
20675304ca3dSDavid Hildenbrand 	case VIRTIO_MEM_SBM_MB_OFFLINE:
20685304ca3dSDavid Hildenbrand 		return virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, nb_sb);
20695304ca3dSDavid Hildenbrand 	}
20705304ca3dSDavid Hildenbrand 	return -EINVAL;
20715304ca3dSDavid Hildenbrand }
20725304ca3dSDavid Hildenbrand 
virtio_mem_sbm_unplug_request(struct virtio_mem * vm,uint64_t diff)20734ba50cd3SDavid Hildenbrand static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
2074c627ff5dSDavid Hildenbrand {
20755304ca3dSDavid Hildenbrand 	const int mb_states[] = {
20765304ca3dSDavid Hildenbrand 		VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL,
20775304ca3dSDavid Hildenbrand 		VIRTIO_MEM_SBM_MB_OFFLINE,
2078c740bb97SDavid Hildenbrand 		VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL,
2079c740bb97SDavid Hildenbrand 		VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL,
2080c740bb97SDavid Hildenbrand 		VIRTIO_MEM_SBM_MB_MOVABLE,
2081c740bb97SDavid Hildenbrand 		VIRTIO_MEM_SBM_MB_KERNEL,
20825304ca3dSDavid Hildenbrand 	};
2083905c4c51SDavid Hildenbrand 	uint64_t nb_sb = diff / vm->sbm.sb_size;
2084c627ff5dSDavid Hildenbrand 	unsigned long mb_id;
20855304ca3dSDavid Hildenbrand 	int rc, i;
2086c627ff5dSDavid Hildenbrand 
2087c627ff5dSDavid Hildenbrand 	if (!nb_sb)
2088c627ff5dSDavid Hildenbrand 		return 0;
2089c627ff5dSDavid Hildenbrand 
2090c627ff5dSDavid Hildenbrand 	/*
2091c627ff5dSDavid Hildenbrand 	 * We'll drop the mutex a couple of times when it is safe to do so.
2092c627ff5dSDavid Hildenbrand 	 * This might result in some blocks switching the state (online/offline)
2093c627ff5dSDavid Hildenbrand 	 * and we could miss them in this run - we will retry again later.
2094c627ff5dSDavid Hildenbrand 	 */
2095c627ff5dSDavid Hildenbrand 	mutex_lock(&vm->hotplug_mutex);
2096c627ff5dSDavid Hildenbrand 
20975304ca3dSDavid Hildenbrand 	/*
20985304ca3dSDavid Hildenbrand 	 * We try unplug from partially plugged blocks first, to try removing
2099c740bb97SDavid Hildenbrand 	 * whole memory blocks along with metadata. We prioritize ZONE_MOVABLE
2100c740bb97SDavid Hildenbrand 	 * as it's more reliable to unplug memory and remove whole memory
2101c740bb97SDavid Hildenbrand 	 * blocks, and we don't want to trigger a zone imbalances by
2102c740bb97SDavid Hildenbrand 	 * accidentially removing too much kernel memory.
21035304ca3dSDavid Hildenbrand 	 */
21045304ca3dSDavid Hildenbrand 	for (i = 0; i < ARRAY_SIZE(mb_states); i++) {
21055304ca3dSDavid Hildenbrand 		virtio_mem_sbm_for_each_mb_rev(vm, mb_id, mb_states[i]) {
21065304ca3dSDavid Hildenbrand 			rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb);
2107c627ff5dSDavid Hildenbrand 			if (rc || !nb_sb)
2108c627ff5dSDavid Hildenbrand 				goto out_unlock;
21095304ca3dSDavid Hildenbrand 			mutex_unlock(&vm->hotplug_mutex);
2110c627ff5dSDavid Hildenbrand 			cond_resched();
21115304ca3dSDavid Hildenbrand 			mutex_lock(&vm->hotplug_mutex);
2112c627ff5dSDavid Hildenbrand 		}
21135304ca3dSDavid Hildenbrand 		if (!unplug_online && i == 1) {
2114c627ff5dSDavid Hildenbrand 			mutex_unlock(&vm->hotplug_mutex);
2115c627ff5dSDavid Hildenbrand 			return 0;
2116255f5985SDavid Hildenbrand 		}
2117255f5985SDavid Hildenbrand 	}
2118255f5985SDavid Hildenbrand 
2119255f5985SDavid Hildenbrand 	mutex_unlock(&vm->hotplug_mutex);
2120255f5985SDavid Hildenbrand 	return nb_sb ? -EBUSY : 0;
2121c627ff5dSDavid Hildenbrand out_unlock:
2122c627ff5dSDavid Hildenbrand 	mutex_unlock(&vm->hotplug_mutex);
2123c627ff5dSDavid Hildenbrand 	return rc;
2124c627ff5dSDavid Hildenbrand }
2125c627ff5dSDavid Hildenbrand 
2126c627ff5dSDavid Hildenbrand /*
2127269ac938SDavid Hildenbrand  * Try to offline and remove a big block from Linux and unplug it. Will fail
2128269ac938SDavid Hildenbrand  * with -EBUSY if some memory is busy and cannot get unplugged.
2129269ac938SDavid Hildenbrand  *
2130269ac938SDavid Hildenbrand  * Will modify the state of the memory block. Might temporarily drop the
2131269ac938SDavid Hildenbrand  * hotplug_mutex.
2132269ac938SDavid Hildenbrand  */
virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem * vm,unsigned long bb_id)2133269ac938SDavid Hildenbrand static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
2134269ac938SDavid Hildenbrand 						       unsigned long bb_id)
2135269ac938SDavid Hildenbrand {
21363711387aSDavid Hildenbrand 	const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
21373711387aSDavid Hildenbrand 	const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
21383711387aSDavid Hildenbrand 	unsigned long end_pfn = start_pfn + nr_pages;
21393711387aSDavid Hildenbrand 	unsigned long pfn;
21403711387aSDavid Hildenbrand 	struct page *page;
2141269ac938SDavid Hildenbrand 	int rc;
2142269ac938SDavid Hildenbrand 
2143269ac938SDavid Hildenbrand 	if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
2144269ac938SDavid Hildenbrand 			 VIRTIO_MEM_BBM_BB_ADDED))
2145269ac938SDavid Hildenbrand 		return -EINVAL;
2146269ac938SDavid Hildenbrand 
21473711387aSDavid Hildenbrand 	/*
21483711387aSDavid Hildenbrand 	 * Start by fake-offlining all memory. Once we marked the device
21493711387aSDavid Hildenbrand 	 * block as fake-offline, all newly onlined memory will
21503711387aSDavid Hildenbrand 	 * automatically be kept fake-offline. Protect from concurrent
21513711387aSDavid Hildenbrand 	 * onlining/offlining until we have a consistent state.
21523711387aSDavid Hildenbrand 	 */
21533711387aSDavid Hildenbrand 	mutex_lock(&vm->hotplug_mutex);
2154f504e15bSDavid Hildenbrand 	virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
21553711387aSDavid Hildenbrand 
21563711387aSDavid Hildenbrand 	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
21573711387aSDavid Hildenbrand 		page = pfn_to_online_page(pfn);
21583711387aSDavid Hildenbrand 		if (!page)
21593711387aSDavid Hildenbrand 			continue;
21603711387aSDavid Hildenbrand 
2161*f55484fdSDavid Hildenbrand 		rc = virtio_mem_fake_offline(vm, pfn, PAGES_PER_SECTION);
21623711387aSDavid Hildenbrand 		if (rc) {
21633711387aSDavid Hildenbrand 			end_pfn = pfn;
2164f504e15bSDavid Hildenbrand 			goto rollback;
21653711387aSDavid Hildenbrand 		}
21663711387aSDavid Hildenbrand 	}
21673711387aSDavid Hildenbrand 	mutex_unlock(&vm->hotplug_mutex);
21683711387aSDavid Hildenbrand 
2169269ac938SDavid Hildenbrand 	rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id);
21703711387aSDavid Hildenbrand 	if (rc) {
21713711387aSDavid Hildenbrand 		mutex_lock(&vm->hotplug_mutex);
2172f504e15bSDavid Hildenbrand 		goto rollback;
21733711387aSDavid Hildenbrand 	}
2174269ac938SDavid Hildenbrand 
2175269ac938SDavid Hildenbrand 	rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
2176269ac938SDavid Hildenbrand 	if (rc)
2177269ac938SDavid Hildenbrand 		virtio_mem_bbm_set_bb_state(vm, bb_id,
2178269ac938SDavid Hildenbrand 					    VIRTIO_MEM_BBM_BB_PLUGGED);
2179269ac938SDavid Hildenbrand 	else
2180269ac938SDavid Hildenbrand 		virtio_mem_bbm_set_bb_state(vm, bb_id,
2181269ac938SDavid Hildenbrand 					    VIRTIO_MEM_BBM_BB_UNUSED);
2182269ac938SDavid Hildenbrand 	return rc;
21833711387aSDavid Hildenbrand 
2184f504e15bSDavid Hildenbrand rollback:
21853711387aSDavid Hildenbrand 	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
21863711387aSDavid Hildenbrand 		page = pfn_to_online_page(pfn);
21873711387aSDavid Hildenbrand 		if (!page)
21883711387aSDavid Hildenbrand 			continue;
21893711387aSDavid Hildenbrand 		virtio_mem_fake_online(pfn, PAGES_PER_SECTION);
21903711387aSDavid Hildenbrand 	}
21913711387aSDavid Hildenbrand 	virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED);
21923711387aSDavid Hildenbrand 	mutex_unlock(&vm->hotplug_mutex);
21933711387aSDavid Hildenbrand 	return rc;
2194269ac938SDavid Hildenbrand }
2195269ac938SDavid Hildenbrand 
2196269ac938SDavid Hildenbrand /*
2197269ac938SDavid Hildenbrand  * Test if a big block is completely offline.
2198269ac938SDavid Hildenbrand  */
virtio_mem_bbm_bb_is_offline(struct virtio_mem * vm,unsigned long bb_id)2199269ac938SDavid Hildenbrand static bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm,
2200269ac938SDavid Hildenbrand 					 unsigned long bb_id)
2201269ac938SDavid Hildenbrand {
2202269ac938SDavid Hildenbrand 	const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
2203269ac938SDavid Hildenbrand 	const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
2204269ac938SDavid Hildenbrand 	unsigned long pfn;
2205269ac938SDavid Hildenbrand 
2206269ac938SDavid Hildenbrand 	for (pfn = start_pfn; pfn < start_pfn + nr_pages;
2207269ac938SDavid Hildenbrand 	     pfn += PAGES_PER_SECTION) {
2208269ac938SDavid Hildenbrand 		if (pfn_to_online_page(pfn))
2209269ac938SDavid Hildenbrand 			return false;
2210269ac938SDavid Hildenbrand 	}
2211269ac938SDavid Hildenbrand 
2212269ac938SDavid Hildenbrand 	return true;
2213269ac938SDavid Hildenbrand }
2214269ac938SDavid Hildenbrand 
2215db7b3377SDavid Hildenbrand /*
2216db7b3377SDavid Hildenbrand  * Test if a big block is completely onlined to ZONE_MOVABLE (or offline).
2217db7b3377SDavid Hildenbrand  */
virtio_mem_bbm_bb_is_movable(struct virtio_mem * vm,unsigned long bb_id)2218db7b3377SDavid Hildenbrand static bool virtio_mem_bbm_bb_is_movable(struct virtio_mem *vm,
2219db7b3377SDavid Hildenbrand 					 unsigned long bb_id)
2220db7b3377SDavid Hildenbrand {
2221db7b3377SDavid Hildenbrand 	const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
2222db7b3377SDavid Hildenbrand 	const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
2223db7b3377SDavid Hildenbrand 	struct page *page;
2224db7b3377SDavid Hildenbrand 	unsigned long pfn;
2225db7b3377SDavid Hildenbrand 
2226db7b3377SDavid Hildenbrand 	for (pfn = start_pfn; pfn < start_pfn + nr_pages;
2227db7b3377SDavid Hildenbrand 	     pfn += PAGES_PER_SECTION) {
2228db7b3377SDavid Hildenbrand 		page = pfn_to_online_page(pfn);
2229db7b3377SDavid Hildenbrand 		if (!page)
2230db7b3377SDavid Hildenbrand 			continue;
2231db7b3377SDavid Hildenbrand 		if (page_zonenum(page) != ZONE_MOVABLE)
2232db7b3377SDavid Hildenbrand 			return false;
2233db7b3377SDavid Hildenbrand 	}
2234db7b3377SDavid Hildenbrand 
2235db7b3377SDavid Hildenbrand 	return true;
2236db7b3377SDavid Hildenbrand }
2237db7b3377SDavid Hildenbrand 
virtio_mem_bbm_unplug_request(struct virtio_mem * vm,uint64_t diff)2238269ac938SDavid Hildenbrand static int virtio_mem_bbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
2239269ac938SDavid Hildenbrand {
2240269ac938SDavid Hildenbrand 	uint64_t nb_bb = diff / vm->bbm.bb_size;
2241269ac938SDavid Hildenbrand 	uint64_t bb_id;
2242c6bc1422SDavid Hildenbrand 	int rc, i;
2243269ac938SDavid Hildenbrand 
2244269ac938SDavid Hildenbrand 	if (!nb_bb)
2245269ac938SDavid Hildenbrand 		return 0;
2246269ac938SDavid Hildenbrand 
2247269ac938SDavid Hildenbrand 	/*
2248c6bc1422SDavid Hildenbrand 	 * Try to unplug big blocks. Similar to SBM, start with offline
2249c6bc1422SDavid Hildenbrand 	 * big blocks.
2250269ac938SDavid Hildenbrand 	 */
2251db7b3377SDavid Hildenbrand 	for (i = 0; i < 3; i++) {
2252269ac938SDavid Hildenbrand 		virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) {
2253269ac938SDavid Hildenbrand 			cond_resched();
2254c6bc1422SDavid Hildenbrand 
2255c6bc1422SDavid Hildenbrand 			/*
2256c6bc1422SDavid Hildenbrand 			 * As we're holding no locks, these checks are racy,
2257c6bc1422SDavid Hildenbrand 			 * but we don't care.
2258c6bc1422SDavid Hildenbrand 			 */
2259c6bc1422SDavid Hildenbrand 			if (i == 0 && !virtio_mem_bbm_bb_is_offline(vm, bb_id))
2260c6bc1422SDavid Hildenbrand 				continue;
2261db7b3377SDavid Hildenbrand 			if (i == 1 && !virtio_mem_bbm_bb_is_movable(vm, bb_id))
2262db7b3377SDavid Hildenbrand 				continue;
2263269ac938SDavid Hildenbrand 			rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id);
2264269ac938SDavid Hildenbrand 			if (rc == -EBUSY)
2265269ac938SDavid Hildenbrand 				continue;
2266269ac938SDavid Hildenbrand 			if (!rc)
2267269ac938SDavid Hildenbrand 				nb_bb--;
2268269ac938SDavid Hildenbrand 			if (rc || !nb_bb)
2269269ac938SDavid Hildenbrand 				return rc;
2270269ac938SDavid Hildenbrand 		}
2271c6bc1422SDavid Hildenbrand 		if (i == 0 && !unplug_online)
2272c6bc1422SDavid Hildenbrand 			return 0;
2273c6bc1422SDavid Hildenbrand 	}
2274269ac938SDavid Hildenbrand 
2275269ac938SDavid Hildenbrand 	return nb_bb ? -EBUSY : 0;
2276269ac938SDavid Hildenbrand }
2277269ac938SDavid Hildenbrand 
2278269ac938SDavid Hildenbrand /*
22794ba50cd3SDavid Hildenbrand  * Try to unplug the requested amount of memory.
22804ba50cd3SDavid Hildenbrand  */
virtio_mem_unplug_request(struct virtio_mem * vm,uint64_t diff)22814ba50cd3SDavid Hildenbrand static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
22824ba50cd3SDavid Hildenbrand {
22834ba50cd3SDavid Hildenbrand 	if (vm->in_sbm)
22844ba50cd3SDavid Hildenbrand 		return virtio_mem_sbm_unplug_request(vm, diff);
2285269ac938SDavid Hildenbrand 	return virtio_mem_bbm_unplug_request(vm, diff);
22864ba50cd3SDavid Hildenbrand }
22874ba50cd3SDavid Hildenbrand 
22884ba50cd3SDavid Hildenbrand /*
22895f1f79bbSDavid Hildenbrand  * Try to unplug all blocks that couldn't be unplugged before, for example,
2290a31648fdSDavid Hildenbrand  * because the hypervisor was busy. Further, offline and remove any memory
2291a31648fdSDavid Hildenbrand  * blocks where we previously failed.
22925f1f79bbSDavid Hildenbrand  */
virtio_mem_cleanup_pending_mb(struct virtio_mem * vm)2293a31648fdSDavid Hildenbrand static int virtio_mem_cleanup_pending_mb(struct virtio_mem *vm)
22945f1f79bbSDavid Hildenbrand {
22954ba50cd3SDavid Hildenbrand 	unsigned long id;
2296a31648fdSDavid Hildenbrand 	int rc = 0;
22975f1f79bbSDavid Hildenbrand 
22984ba50cd3SDavid Hildenbrand 	if (!vm->in_sbm) {
22994ba50cd3SDavid Hildenbrand 		virtio_mem_bbm_for_each_bb(vm, id,
23004ba50cd3SDavid Hildenbrand 					   VIRTIO_MEM_BBM_BB_PLUGGED) {
23014ba50cd3SDavid Hildenbrand 			rc = virtio_mem_bbm_unplug_bb(vm, id);
23025f1f79bbSDavid Hildenbrand 			if (rc)
23035f1f79bbSDavid Hildenbrand 				return rc;
23044ba50cd3SDavid Hildenbrand 			virtio_mem_bbm_set_bb_state(vm, id,
23054ba50cd3SDavid Hildenbrand 						    VIRTIO_MEM_BBM_BB_UNUSED);
23064ba50cd3SDavid Hildenbrand 		}
23074ba50cd3SDavid Hildenbrand 		return 0;
23084ba50cd3SDavid Hildenbrand 	}
23094ba50cd3SDavid Hildenbrand 
23104ba50cd3SDavid Hildenbrand 	virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_PLUGGED) {
23114ba50cd3SDavid Hildenbrand 		rc = virtio_mem_sbm_unplug_mb(vm, id);
23124ba50cd3SDavid Hildenbrand 		if (rc)
23134ba50cd3SDavid Hildenbrand 			return rc;
23144ba50cd3SDavid Hildenbrand 		virtio_mem_sbm_set_mb_state(vm, id,
231599f0b55eSDavid Hildenbrand 					    VIRTIO_MEM_SBM_MB_UNUSED);
23165f1f79bbSDavid Hildenbrand 	}
23175f1f79bbSDavid Hildenbrand 
2318a31648fdSDavid Hildenbrand 	if (!vm->sbm.have_unplugged_mb)
2319a31648fdSDavid Hildenbrand 		return 0;
2320a31648fdSDavid Hildenbrand 
2321a31648fdSDavid Hildenbrand 	/*
2322a31648fdSDavid Hildenbrand 	 * Let's retry (offlining and) removing completely unplugged Linux
2323a31648fdSDavid Hildenbrand 	 * memory blocks.
2324a31648fdSDavid Hildenbrand 	 */
2325a31648fdSDavid Hildenbrand 	vm->sbm.have_unplugged_mb = false;
2326a31648fdSDavid Hildenbrand 
2327a31648fdSDavid Hildenbrand 	mutex_lock(&vm->hotplug_mutex);
2328a31648fdSDavid Hildenbrand 	virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL)
2329a31648fdSDavid Hildenbrand 		rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
2330a31648fdSDavid Hildenbrand 	virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL)
2331a31648fdSDavid Hildenbrand 		rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
2332a31648fdSDavid Hildenbrand 	virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL)
2333a31648fdSDavid Hildenbrand 		rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
2334a31648fdSDavid Hildenbrand 	mutex_unlock(&vm->hotplug_mutex);
2335a31648fdSDavid Hildenbrand 
2336a31648fdSDavid Hildenbrand 	if (rc)
2337a31648fdSDavid Hildenbrand 		vm->sbm.have_unplugged_mb = true;
2338a31648fdSDavid Hildenbrand 	/* Ignore errors, this is not critical. We'll retry later. */
23395f1f79bbSDavid Hildenbrand 	return 0;
23405f1f79bbSDavid Hildenbrand }
23415f1f79bbSDavid Hildenbrand 
23425f1f79bbSDavid Hildenbrand /*
23435f1f79bbSDavid Hildenbrand  * Update all parts of the config that could have changed.
23445f1f79bbSDavid Hildenbrand  */
virtio_mem_refresh_config(struct virtio_mem * vm)23455f1f79bbSDavid Hildenbrand static void virtio_mem_refresh_config(struct virtio_mem *vm)
23465f1f79bbSDavid Hildenbrand {
234794c89453SDavid Hildenbrand 	const struct range pluggable_range = mhp_get_pluggable_range(true);
23485f1f79bbSDavid Hildenbrand 	uint64_t new_plugged_size, usable_region_size, end_addr;
23495f1f79bbSDavid Hildenbrand 
23505f1f79bbSDavid Hildenbrand 	/* the plugged_size is just a reflection of what _we_ did previously */
235199e0d048SMichael S. Tsirkin 	virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size,
23525f1f79bbSDavid Hildenbrand 			&new_plugged_size);
23535f1f79bbSDavid Hildenbrand 	if (WARN_ON_ONCE(new_plugged_size != vm->plugged_size))
23545f1f79bbSDavid Hildenbrand 		vm->plugged_size = new_plugged_size;
23555f1f79bbSDavid Hildenbrand 
23565f1f79bbSDavid Hildenbrand 	/* calculate the last usable memory block id */
235799e0d048SMichael S. Tsirkin 	virtio_cread_le(vm->vdev, struct virtio_mem_config,
23585f1f79bbSDavid Hildenbrand 			usable_region_size, &usable_region_size);
235994c89453SDavid Hildenbrand 	end_addr = min(vm->addr + usable_region_size - 1,
236094c89453SDavid Hildenbrand 		       pluggable_range.end);
23614ba50cd3SDavid Hildenbrand 
236294c89453SDavid Hildenbrand 	if (vm->in_sbm) {
236394c89453SDavid Hildenbrand 		vm->sbm.last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr);
236494c89453SDavid Hildenbrand 		if (!IS_ALIGNED(end_addr + 1, memory_block_size_bytes()))
236594c89453SDavid Hildenbrand 			vm->sbm.last_usable_mb_id--;
236694c89453SDavid Hildenbrand 	} else {
236794c89453SDavid Hildenbrand 		vm->bbm.last_usable_bb_id = virtio_mem_phys_to_bb_id(vm,
236894c89453SDavid Hildenbrand 								     end_addr);
236994c89453SDavid Hildenbrand 		if (!IS_ALIGNED(end_addr + 1, vm->bbm.bb_size))
237094c89453SDavid Hildenbrand 			vm->bbm.last_usable_bb_id--;
237194c89453SDavid Hildenbrand 	}
237294c89453SDavid Hildenbrand 	/*
237394c89453SDavid Hildenbrand 	 * If we cannot plug any of our device memory (e.g., nothing in the
237494c89453SDavid Hildenbrand 	 * usable region is addressable), the last usable memory block id will
237594c89453SDavid Hildenbrand 	 * be smaller than the first usable memory block id. We'll stop
237694c89453SDavid Hildenbrand 	 * attempting to add memory with -ENOSPC from our main loop.
237794c89453SDavid Hildenbrand 	 */
23785f1f79bbSDavid Hildenbrand 
23795f1f79bbSDavid Hildenbrand 	/* see if there is a request to change the size */
238099e0d048SMichael S. Tsirkin 	virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size,
23815f1f79bbSDavid Hildenbrand 			&vm->requested_size);
23825f1f79bbSDavid Hildenbrand 
23835f1f79bbSDavid Hildenbrand 	dev_info(&vm->vdev->dev, "plugged size: 0x%llx", vm->plugged_size);
23845f1f79bbSDavid Hildenbrand 	dev_info(&vm->vdev->dev, "requested size: 0x%llx", vm->requested_size);
23855f1f79bbSDavid Hildenbrand }
23865f1f79bbSDavid Hildenbrand 
23875f1f79bbSDavid Hildenbrand /*
23885f1f79bbSDavid Hildenbrand  * Workqueue function for handling plug/unplug requests and config updates.
23895f1f79bbSDavid Hildenbrand  */
virtio_mem_run_wq(struct work_struct * work)23905f1f79bbSDavid Hildenbrand static void virtio_mem_run_wq(struct work_struct *work)
23915f1f79bbSDavid Hildenbrand {
23925f1f79bbSDavid Hildenbrand 	struct virtio_mem *vm = container_of(work, struct virtio_mem, wq);
23935f1f79bbSDavid Hildenbrand 	uint64_t diff;
23945f1f79bbSDavid Hildenbrand 	int rc;
23955f1f79bbSDavid Hildenbrand 
2396ce281462SDavid Hildenbrand 	if (unlikely(vm->in_kdump)) {
2397ce281462SDavid Hildenbrand 		dev_warn_once(&vm->vdev->dev,
2398ce281462SDavid Hildenbrand 			     "unexpected workqueue run in kdump kernel\n");
2399ce281462SDavid Hildenbrand 		return;
2400ce281462SDavid Hildenbrand 	}
2401ce281462SDavid Hildenbrand 
24025f1f79bbSDavid Hildenbrand 	hrtimer_cancel(&vm->retry_timer);
24035f1f79bbSDavid Hildenbrand 
24045f1f79bbSDavid Hildenbrand 	if (vm->broken)
24055f1f79bbSDavid Hildenbrand 		return;
24065f1f79bbSDavid Hildenbrand 
240798ff9f94SDavid Hildenbrand 	atomic_set(&vm->wq_active, 1);
24085f1f79bbSDavid Hildenbrand retry:
24095f1f79bbSDavid Hildenbrand 	rc = 0;
24105f1f79bbSDavid Hildenbrand 
24115f1f79bbSDavid Hildenbrand 	/* Make sure we start with a clean state if there are leftovers. */
24125f1f79bbSDavid Hildenbrand 	if (unlikely(vm->unplug_all_required))
24135f1f79bbSDavid Hildenbrand 		rc = virtio_mem_send_unplug_all_request(vm);
24145f1f79bbSDavid Hildenbrand 
24155f1f79bbSDavid Hildenbrand 	if (atomic_read(&vm->config_changed)) {
24165f1f79bbSDavid Hildenbrand 		atomic_set(&vm->config_changed, 0);
24175f1f79bbSDavid Hildenbrand 		virtio_mem_refresh_config(vm);
24185f1f79bbSDavid Hildenbrand 	}
24195f1f79bbSDavid Hildenbrand 
2420a31648fdSDavid Hildenbrand 	/* Cleanup any leftovers from previous runs */
24215f1f79bbSDavid Hildenbrand 	if (!rc)
2422a31648fdSDavid Hildenbrand 		rc = virtio_mem_cleanup_pending_mb(vm);
24235f1f79bbSDavid Hildenbrand 
24245f1f79bbSDavid Hildenbrand 	if (!rc && vm->requested_size != vm->plugged_size) {
24255f1f79bbSDavid Hildenbrand 		if (vm->requested_size > vm->plugged_size) {
24265f1f79bbSDavid Hildenbrand 			diff = vm->requested_size - vm->plugged_size;
24275f1f79bbSDavid Hildenbrand 			rc = virtio_mem_plug_request(vm, diff);
2428c627ff5dSDavid Hildenbrand 		} else {
2429c627ff5dSDavid Hildenbrand 			diff = vm->plugged_size - vm->requested_size;
2430c627ff5dSDavid Hildenbrand 			rc = virtio_mem_unplug_request(vm, diff);
24315f1f79bbSDavid Hildenbrand 		}
24325f1f79bbSDavid Hildenbrand 	}
24335f1f79bbSDavid Hildenbrand 
2434a31648fdSDavid Hildenbrand 	/*
2435a31648fdSDavid Hildenbrand 	 * Keep retrying to offline and remove completely unplugged Linux
2436a31648fdSDavid Hildenbrand 	 * memory blocks.
2437a31648fdSDavid Hildenbrand 	 */
2438a31648fdSDavid Hildenbrand 	if (!rc && vm->in_sbm && vm->sbm.have_unplugged_mb)
2439a31648fdSDavid Hildenbrand 		rc = -EBUSY;
2440a31648fdSDavid Hildenbrand 
24415f1f79bbSDavid Hildenbrand 	switch (rc) {
24425f1f79bbSDavid Hildenbrand 	case 0:
244323e77b5dSDavid Hildenbrand 		vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS;
24445f1f79bbSDavid Hildenbrand 		break;
24455f1f79bbSDavid Hildenbrand 	case -ENOSPC:
24465f1f79bbSDavid Hildenbrand 		/*
24475f1f79bbSDavid Hildenbrand 		 * We cannot add any more memory (alignment, physical limit)
24485f1f79bbSDavid Hildenbrand 		 * or we have too many offline memory blocks.
24495f1f79bbSDavid Hildenbrand 		 */
24505f1f79bbSDavid Hildenbrand 		break;
24518d4edcfeSDavid Hildenbrand 	case -ETXTBSY:
24525f1f79bbSDavid Hildenbrand 		/*
24535f1f79bbSDavid Hildenbrand 		 * The hypervisor cannot process our request right now
24548d4edcfeSDavid Hildenbrand 		 * (e.g., out of memory, migrating);
24558d4edcfeSDavid Hildenbrand 		 */
24568d4edcfeSDavid Hildenbrand 	case -EBUSY:
24578d4edcfeSDavid Hildenbrand 		/*
24588d4edcfeSDavid Hildenbrand 		 * We cannot free up any memory to unplug it (all plugged memory
24598d4edcfeSDavid Hildenbrand 		 * is busy).
24605f1f79bbSDavid Hildenbrand 		 */
24615f1f79bbSDavid Hildenbrand 	case -ENOMEM:
24625f1f79bbSDavid Hildenbrand 		/* Out of memory, try again later. */
246323e77b5dSDavid Hildenbrand 		hrtimer_start(&vm->retry_timer, ms_to_ktime(vm->retry_timer_ms),
24645f1f79bbSDavid Hildenbrand 			      HRTIMER_MODE_REL);
24655f1f79bbSDavid Hildenbrand 		break;
24665f1f79bbSDavid Hildenbrand 	case -EAGAIN:
24675f1f79bbSDavid Hildenbrand 		/* Retry immediately (e.g., the config changed). */
24685f1f79bbSDavid Hildenbrand 		goto retry;
24695f1f79bbSDavid Hildenbrand 	default:
24705f1f79bbSDavid Hildenbrand 		/* Unknown error, mark as broken */
24715f1f79bbSDavid Hildenbrand 		dev_err(&vm->vdev->dev,
24725f1f79bbSDavid Hildenbrand 			"unknown error, marking device broken: %d\n", rc);
24735f1f79bbSDavid Hildenbrand 		vm->broken = true;
24745f1f79bbSDavid Hildenbrand 	}
247598ff9f94SDavid Hildenbrand 
247698ff9f94SDavid Hildenbrand 	atomic_set(&vm->wq_active, 0);
24775f1f79bbSDavid Hildenbrand }
24785f1f79bbSDavid Hildenbrand 
virtio_mem_timer_expired(struct hrtimer * timer)24795f1f79bbSDavid Hildenbrand static enum hrtimer_restart virtio_mem_timer_expired(struct hrtimer *timer)
24805f1f79bbSDavid Hildenbrand {
24815f1f79bbSDavid Hildenbrand 	struct virtio_mem *vm = container_of(timer, struct virtio_mem,
24825f1f79bbSDavid Hildenbrand 					     retry_timer);
24835f1f79bbSDavid Hildenbrand 
24845f1f79bbSDavid Hildenbrand 	virtio_mem_retry(vm);
248523e77b5dSDavid Hildenbrand 	vm->retry_timer_ms = min_t(unsigned int, vm->retry_timer_ms * 2,
248623e77b5dSDavid Hildenbrand 				   VIRTIO_MEM_RETRY_TIMER_MAX_MS);
24875f1f79bbSDavid Hildenbrand 	return HRTIMER_NORESTART;
24885f1f79bbSDavid Hildenbrand }
24895f1f79bbSDavid Hildenbrand 
virtio_mem_handle_response(struct virtqueue * vq)24905f1f79bbSDavid Hildenbrand static void virtio_mem_handle_response(struct virtqueue *vq)
24915f1f79bbSDavid Hildenbrand {
24925f1f79bbSDavid Hildenbrand 	struct virtio_mem *vm = vq->vdev->priv;
24935f1f79bbSDavid Hildenbrand 
24945f1f79bbSDavid Hildenbrand 	wake_up(&vm->host_resp);
24955f1f79bbSDavid Hildenbrand }
24965f1f79bbSDavid Hildenbrand 
virtio_mem_init_vq(struct virtio_mem * vm)24975f1f79bbSDavid Hildenbrand static int virtio_mem_init_vq(struct virtio_mem *vm)
24985f1f79bbSDavid Hildenbrand {
24995f1f79bbSDavid Hildenbrand 	struct virtqueue *vq;
25005f1f79bbSDavid Hildenbrand 
25015f1f79bbSDavid Hildenbrand 	vq = virtio_find_single_vq(vm->vdev, virtio_mem_handle_response,
25025f1f79bbSDavid Hildenbrand 				   "guest-request");
25035f1f79bbSDavid Hildenbrand 	if (IS_ERR(vq))
25045f1f79bbSDavid Hildenbrand 		return PTR_ERR(vq);
25055f1f79bbSDavid Hildenbrand 	vm->vq = vq;
25065f1f79bbSDavid Hildenbrand 
25075f1f79bbSDavid Hildenbrand 	return 0;
25085f1f79bbSDavid Hildenbrand }
25095f1f79bbSDavid Hildenbrand 
virtio_mem_init_hotplug(struct virtio_mem * vm)251094300fcfSDavid Hildenbrand static int virtio_mem_init_hotplug(struct virtio_mem *vm)
25115f1f79bbSDavid Hildenbrand {
251294c89453SDavid Hildenbrand 	const struct range pluggable_range = mhp_get_pluggable_range(true);
251384e17e68SDavid Hildenbrand 	uint64_t unit_pages, sb_size, addr;
251484e17e68SDavid Hildenbrand 	int rc;
25156725f211SDavid Hildenbrand 
25165f1f79bbSDavid Hildenbrand 	/* bad device setup - warn only */
25175f1f79bbSDavid Hildenbrand 	if (!IS_ALIGNED(vm->addr, memory_block_size_bytes()))
25185f1f79bbSDavid Hildenbrand 		dev_warn(&vm->vdev->dev,
25195f1f79bbSDavid Hildenbrand 			 "The alignment of the physical start address can make some memory unusable.\n");
25205f1f79bbSDavid Hildenbrand 	if (!IS_ALIGNED(vm->addr + vm->region_size, memory_block_size_bytes()))
25215f1f79bbSDavid Hildenbrand 		dev_warn(&vm->vdev->dev,
25225f1f79bbSDavid Hildenbrand 			 "The alignment of the physical end address can make some memory unusable.\n");
252394c89453SDavid Hildenbrand 	if (vm->addr < pluggable_range.start ||
252494c89453SDavid Hildenbrand 	    vm->addr + vm->region_size - 1 > pluggable_range.end)
25255f1f79bbSDavid Hildenbrand 		dev_warn(&vm->vdev->dev,
252694c89453SDavid Hildenbrand 			 "Some device memory is not addressable/pluggable. This can make some memory unusable.\n");
25275f1f79bbSDavid Hildenbrand 
2528500817bfSDavid Hildenbrand 	/* Prepare the offline threshold - make sure we can add two blocks. */
2529500817bfSDavid Hildenbrand 	vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(),
2530500817bfSDavid Hildenbrand 				      VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD);
2531500817bfSDavid Hildenbrand 
25325f1f79bbSDavid Hildenbrand 	/*
2533448b8ec3SZi Yan 	 * alloc_contig_range() works reliably with pageblock
2534448b8ec3SZi Yan 	 * granularity on ZONE_NORMAL, use pageblock_nr_pages.
25355f1f79bbSDavid Hildenbrand 	 */
2536448b8ec3SZi Yan 	sb_size = PAGE_SIZE * pageblock_nr_pages;
25374ba50cd3SDavid Hildenbrand 	sb_size = max_t(uint64_t, vm->device_block_size, sb_size);
25384ba50cd3SDavid Hildenbrand 
2539faa45ff4SDavid Hildenbrand 	if (sb_size < memory_block_size_bytes() && !force_bbm) {
25404ba50cd3SDavid Hildenbrand 		/* SBM: At least two subblocks per Linux memory block. */
25414ba50cd3SDavid Hildenbrand 		vm->in_sbm = true;
25424ba50cd3SDavid Hildenbrand 		vm->sbm.sb_size = sb_size;
25434ba50cd3SDavid Hildenbrand 		vm->sbm.sbs_per_mb = memory_block_size_bytes() /
25444ba50cd3SDavid Hildenbrand 				     vm->sbm.sb_size;
25455f1f79bbSDavid Hildenbrand 
25465f1f79bbSDavid Hildenbrand 		/* Round up to the next full memory block */
254794c89453SDavid Hildenbrand 		addr = max_t(uint64_t, vm->addr, pluggable_range.start) +
254894c89453SDavid Hildenbrand 		       memory_block_size_bytes() - 1;
25494ba50cd3SDavid Hildenbrand 		vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(addr);
25508a6f082bSDavid Hildenbrand 		vm->sbm.next_mb_id = vm->sbm.first_mb_id;
25514ba50cd3SDavid Hildenbrand 	} else {
25524ba50cd3SDavid Hildenbrand 		/* BBM: At least one Linux memory block. */
2553faa45ff4SDavid Hildenbrand 		vm->bbm.bb_size = max_t(uint64_t, vm->device_block_size,
2554faa45ff4SDavid Hildenbrand 					memory_block_size_bytes());
25554ba50cd3SDavid Hildenbrand 
2556faa45ff4SDavid Hildenbrand 		if (bbm_block_size) {
2557faa45ff4SDavid Hildenbrand 			if (!is_power_of_2(bbm_block_size)) {
2558faa45ff4SDavid Hildenbrand 				dev_warn(&vm->vdev->dev,
2559faa45ff4SDavid Hildenbrand 					 "bbm_block_size is not a power of 2");
2560faa45ff4SDavid Hildenbrand 			} else if (bbm_block_size < vm->bbm.bb_size) {
2561faa45ff4SDavid Hildenbrand 				dev_warn(&vm->vdev->dev,
2562faa45ff4SDavid Hildenbrand 					 "bbm_block_size is too small");
2563faa45ff4SDavid Hildenbrand 			} else {
2564faa45ff4SDavid Hildenbrand 				vm->bbm.bb_size = bbm_block_size;
2565faa45ff4SDavid Hildenbrand 			}
2566faa45ff4SDavid Hildenbrand 		}
2567faa45ff4SDavid Hildenbrand 
2568faa45ff4SDavid Hildenbrand 		/* Round up to the next aligned big block */
256994c89453SDavid Hildenbrand 		addr = max_t(uint64_t, vm->addr, pluggable_range.start) +
257094c89453SDavid Hildenbrand 		       vm->bbm.bb_size - 1;
2571faa45ff4SDavid Hildenbrand 		vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr);
25724ba50cd3SDavid Hildenbrand 		vm->bbm.next_bb_id = vm->bbm.first_bb_id;
25735f1f79bbSDavid Hildenbrand 
2574500817bfSDavid Hildenbrand 		/* Make sure we can add two big blocks. */
25754ba50cd3SDavid Hildenbrand 		vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size,
25764ba50cd3SDavid Hildenbrand 					      vm->offline_threshold);
2577500817bfSDavid Hildenbrand 	}
257898ff9f94SDavid Hildenbrand 
25795f1f79bbSDavid Hildenbrand 	dev_info(&vm->vdev->dev, "memory block size: 0x%lx",
25805f1f79bbSDavid Hildenbrand 		 memory_block_size_bytes());
25814ba50cd3SDavid Hildenbrand 	if (vm->in_sbm)
2582544fc7dbSMichael S. Tsirkin 		dev_info(&vm->vdev->dev, "subblock size: 0x%llx",
2583905c4c51SDavid Hildenbrand 			 (unsigned long long)vm->sbm.sb_size);
25844ba50cd3SDavid Hildenbrand 	else
25854ba50cd3SDavid Hildenbrand 		dev_info(&vm->vdev->dev, "big block size: 0x%llx",
25864ba50cd3SDavid Hildenbrand 			 (unsigned long long)vm->bbm.bb_size);
258794300fcfSDavid Hildenbrand 
258884e17e68SDavid Hildenbrand 	/* create the parent resource for all memory */
258984e17e68SDavid Hildenbrand 	rc = virtio_mem_create_resource(vm);
259084e17e68SDavid Hildenbrand 	if (rc)
259184e17e68SDavid Hildenbrand 		return rc;
259284e17e68SDavid Hildenbrand 
259384e17e68SDavid Hildenbrand 	/* use a single dynamic memory group to cover the whole memory device */
259484e17e68SDavid Hildenbrand 	if (vm->in_sbm)
259584e17e68SDavid Hildenbrand 		unit_pages = PHYS_PFN(memory_block_size_bytes());
259684e17e68SDavid Hildenbrand 	else
259784e17e68SDavid Hildenbrand 		unit_pages = PHYS_PFN(vm->bbm.bb_size);
259884e17e68SDavid Hildenbrand 	rc = memory_group_register_dynamic(vm->nid, unit_pages);
259984e17e68SDavid Hildenbrand 	if (rc < 0)
260084e17e68SDavid Hildenbrand 		goto out_del_resource;
260184e17e68SDavid Hildenbrand 	vm->mgid = rc;
260284e17e68SDavid Hildenbrand 
260384e17e68SDavid Hildenbrand 	/*
260484e17e68SDavid Hildenbrand 	 * If we still have memory plugged, we have to unplug all memory first.
260584e17e68SDavid Hildenbrand 	 * Registering our parent resource makes sure that this memory isn't
260684e17e68SDavid Hildenbrand 	 * actually in use (e.g., trying to reload the driver).
260784e17e68SDavid Hildenbrand 	 */
260884e17e68SDavid Hildenbrand 	if (vm->plugged_size) {
260984e17e68SDavid Hildenbrand 		vm->unplug_all_required = true;
261084e17e68SDavid Hildenbrand 		dev_info(&vm->vdev->dev, "unplugging all memory is required\n");
261184e17e68SDavid Hildenbrand 	}
261284e17e68SDavid Hildenbrand 
261384e17e68SDavid Hildenbrand 	/* register callbacks */
261484e17e68SDavid Hildenbrand 	vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb;
261584e17e68SDavid Hildenbrand 	rc = register_memory_notifier(&vm->memory_notifier);
261684e17e68SDavid Hildenbrand 	if (rc)
261784e17e68SDavid Hildenbrand 		goto out_unreg_group;
261884e17e68SDavid Hildenbrand 	rc = register_virtio_mem_device(vm);
261984e17e68SDavid Hildenbrand 	if (rc)
262084e17e68SDavid Hildenbrand 		goto out_unreg_mem;
262184e17e68SDavid Hildenbrand 
262294300fcfSDavid Hildenbrand 	return 0;
262384e17e68SDavid Hildenbrand out_unreg_mem:
262484e17e68SDavid Hildenbrand 	unregister_memory_notifier(&vm->memory_notifier);
262584e17e68SDavid Hildenbrand out_unreg_group:
262684e17e68SDavid Hildenbrand 	memory_group_unregister(vm->mgid);
262784e17e68SDavid Hildenbrand out_del_resource:
262884e17e68SDavid Hildenbrand 	virtio_mem_delete_resource(vm);
262984e17e68SDavid Hildenbrand 	return rc;
263094300fcfSDavid Hildenbrand }
263194300fcfSDavid Hildenbrand 
2632ce281462SDavid Hildenbrand #ifdef CONFIG_PROC_VMCORE
virtio_mem_send_state_request(struct virtio_mem * vm,uint64_t addr,uint64_t size)2633ce281462SDavid Hildenbrand static int virtio_mem_send_state_request(struct virtio_mem *vm, uint64_t addr,
2634ce281462SDavid Hildenbrand 					 uint64_t size)
2635ce281462SDavid Hildenbrand {
2636ce281462SDavid Hildenbrand 	const uint64_t nb_vm_blocks = size / vm->device_block_size;
2637ce281462SDavid Hildenbrand 	const struct virtio_mem_req req = {
2638ce281462SDavid Hildenbrand 		.type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_STATE),
2639ce281462SDavid Hildenbrand 		.u.state.addr = cpu_to_virtio64(vm->vdev, addr),
2640ce281462SDavid Hildenbrand 		.u.state.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks),
2641ce281462SDavid Hildenbrand 	};
2642ce281462SDavid Hildenbrand 	int rc = -ENOMEM;
2643ce281462SDavid Hildenbrand 
2644ce281462SDavid Hildenbrand 	dev_dbg(&vm->vdev->dev, "requesting state: 0x%llx - 0x%llx\n", addr,
2645ce281462SDavid Hildenbrand 		addr + size - 1);
2646ce281462SDavid Hildenbrand 
2647ce281462SDavid Hildenbrand 	switch (virtio_mem_send_request(vm, &req)) {
2648ce281462SDavid Hildenbrand 	case VIRTIO_MEM_RESP_ACK:
2649ce281462SDavid Hildenbrand 		return virtio16_to_cpu(vm->vdev, vm->resp.u.state.state);
2650ce281462SDavid Hildenbrand 	case VIRTIO_MEM_RESP_ERROR:
2651ce281462SDavid Hildenbrand 		rc = -EINVAL;
2652ce281462SDavid Hildenbrand 		break;
2653ce281462SDavid Hildenbrand 	default:
2654ce281462SDavid Hildenbrand 		break;
2655ce281462SDavid Hildenbrand 	}
2656ce281462SDavid Hildenbrand 
2657ce281462SDavid Hildenbrand 	dev_dbg(&vm->vdev->dev, "requesting state failed: %d\n", rc);
2658ce281462SDavid Hildenbrand 	return rc;
2659ce281462SDavid Hildenbrand }
2660ce281462SDavid Hildenbrand 
virtio_mem_vmcore_pfn_is_ram(struct vmcore_cb * cb,unsigned long pfn)2661ce281462SDavid Hildenbrand static bool virtio_mem_vmcore_pfn_is_ram(struct vmcore_cb *cb,
2662ce281462SDavid Hildenbrand 					 unsigned long pfn)
2663ce281462SDavid Hildenbrand {
2664ce281462SDavid Hildenbrand 	struct virtio_mem *vm = container_of(cb, struct virtio_mem,
2665ce281462SDavid Hildenbrand 					     vmcore_cb);
2666ce281462SDavid Hildenbrand 	uint64_t addr = PFN_PHYS(pfn);
2667ce281462SDavid Hildenbrand 	bool is_ram;
2668ce281462SDavid Hildenbrand 	int rc;
2669ce281462SDavid Hildenbrand 
2670ce281462SDavid Hildenbrand 	if (!virtio_mem_contains_range(vm, addr, PAGE_SIZE))
2671ce281462SDavid Hildenbrand 		return true;
2672ce281462SDavid Hildenbrand 	if (!vm->plugged_size)
2673ce281462SDavid Hildenbrand 		return false;
2674ce281462SDavid Hildenbrand 
2675ce281462SDavid Hildenbrand 	/*
2676ce281462SDavid Hildenbrand 	 * We have to serialize device requests and access to the information
2677ce281462SDavid Hildenbrand 	 * about the block queried last.
2678ce281462SDavid Hildenbrand 	 */
2679ce281462SDavid Hildenbrand 	mutex_lock(&vm->hotplug_mutex);
2680ce281462SDavid Hildenbrand 
2681ce281462SDavid Hildenbrand 	addr = ALIGN_DOWN(addr, vm->device_block_size);
2682ce281462SDavid Hildenbrand 	if (addr != vm->last_block_addr) {
2683ce281462SDavid Hildenbrand 		rc = virtio_mem_send_state_request(vm, addr,
2684ce281462SDavid Hildenbrand 						   vm->device_block_size);
2685ce281462SDavid Hildenbrand 		/* On any kind of error, we're going to signal !ram. */
2686ce281462SDavid Hildenbrand 		if (rc == VIRTIO_MEM_STATE_PLUGGED)
2687ce281462SDavid Hildenbrand 			vm->last_block_plugged = true;
2688ce281462SDavid Hildenbrand 		else
2689ce281462SDavid Hildenbrand 			vm->last_block_plugged = false;
2690ce281462SDavid Hildenbrand 		vm->last_block_addr = addr;
2691ce281462SDavid Hildenbrand 	}
2692ce281462SDavid Hildenbrand 
2693ce281462SDavid Hildenbrand 	is_ram = vm->last_block_plugged;
2694ce281462SDavid Hildenbrand 	mutex_unlock(&vm->hotplug_mutex);
2695ce281462SDavid Hildenbrand 	return is_ram;
2696ce281462SDavid Hildenbrand }
2697ce281462SDavid Hildenbrand #endif /* CONFIG_PROC_VMCORE */
2698ce281462SDavid Hildenbrand 
virtio_mem_init_kdump(struct virtio_mem * vm)2699ce281462SDavid Hildenbrand static int virtio_mem_init_kdump(struct virtio_mem *vm)
2700ce281462SDavid Hildenbrand {
2701ce281462SDavid Hildenbrand #ifdef CONFIG_PROC_VMCORE
2702ce281462SDavid Hildenbrand 	dev_info(&vm->vdev->dev, "memory hot(un)plug disabled in kdump kernel\n");
2703ce281462SDavid Hildenbrand 	vm->vmcore_cb.pfn_is_ram = virtio_mem_vmcore_pfn_is_ram;
2704ce281462SDavid Hildenbrand 	register_vmcore_cb(&vm->vmcore_cb);
2705ce281462SDavid Hildenbrand 	return 0;
2706ce281462SDavid Hildenbrand #else /* CONFIG_PROC_VMCORE */
2707ce281462SDavid Hildenbrand 	dev_warn(&vm->vdev->dev, "disabled in kdump kernel without vmcore\n");
2708ce281462SDavid Hildenbrand 	return -EBUSY;
2709ce281462SDavid Hildenbrand #endif /* CONFIG_PROC_VMCORE */
2710ce281462SDavid Hildenbrand }
2711ce281462SDavid Hildenbrand 
virtio_mem_init(struct virtio_mem * vm)271294300fcfSDavid Hildenbrand static int virtio_mem_init(struct virtio_mem *vm)
271394300fcfSDavid Hildenbrand {
271494300fcfSDavid Hildenbrand 	uint16_t node_id;
271594300fcfSDavid Hildenbrand 
271694300fcfSDavid Hildenbrand 	if (!vm->vdev->config->get) {
271794300fcfSDavid Hildenbrand 		dev_err(&vm->vdev->dev, "config access disabled\n");
271894300fcfSDavid Hildenbrand 		return -EINVAL;
271994300fcfSDavid Hildenbrand 	}
272094300fcfSDavid Hildenbrand 
272194300fcfSDavid Hildenbrand 	/* Fetch all properties that can't change. */
272294300fcfSDavid Hildenbrand 	virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size,
272394300fcfSDavid Hildenbrand 			&vm->plugged_size);
272494300fcfSDavid Hildenbrand 	virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size,
272594300fcfSDavid Hildenbrand 			&vm->device_block_size);
272694300fcfSDavid Hildenbrand 	virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id,
272794300fcfSDavid Hildenbrand 			&node_id);
272894300fcfSDavid Hildenbrand 	vm->nid = virtio_mem_translate_node_id(vm, node_id);
272994300fcfSDavid Hildenbrand 	virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr);
273094300fcfSDavid Hildenbrand 	virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size,
273194300fcfSDavid Hildenbrand 			&vm->region_size);
273294300fcfSDavid Hildenbrand 
273394300fcfSDavid Hildenbrand 	/* Determine the nid for the device based on the lowest address. */
273494300fcfSDavid Hildenbrand 	if (vm->nid == NUMA_NO_NODE)
273594300fcfSDavid Hildenbrand 		vm->nid = memory_add_physaddr_to_nid(vm->addr);
273694300fcfSDavid Hildenbrand 
273794300fcfSDavid Hildenbrand 	dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr);
273894300fcfSDavid Hildenbrand 	dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size);
273994300fcfSDavid Hildenbrand 	dev_info(&vm->vdev->dev, "device block size: 0x%llx",
274094300fcfSDavid Hildenbrand 		 (unsigned long long)vm->device_block_size);
27416725f211SDavid Hildenbrand 	if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA))
2742f2af6d39SDavid Hildenbrand 		dev_info(&vm->vdev->dev, "nid: %d", vm->nid);
27435f1f79bbSDavid Hildenbrand 
2744ce281462SDavid Hildenbrand 	/*
2745ce281462SDavid Hildenbrand 	 * We don't want to (un)plug or reuse any memory when in kdump. The
2746ce281462SDavid Hildenbrand 	 * memory is still accessible (but not exposed to Linux).
2747ce281462SDavid Hildenbrand 	 */
2748ce281462SDavid Hildenbrand 	if (vm->in_kdump)
2749ce281462SDavid Hildenbrand 		return virtio_mem_init_kdump(vm);
275094300fcfSDavid Hildenbrand 	return virtio_mem_init_hotplug(vm);
27515f1f79bbSDavid Hildenbrand }
27525f1f79bbSDavid Hildenbrand 
virtio_mem_create_resource(struct virtio_mem * vm)2753ebf71552SDavid Hildenbrand static int virtio_mem_create_resource(struct virtio_mem *vm)
2754ebf71552SDavid Hildenbrand {
2755ebf71552SDavid Hildenbrand 	/*
2756ebf71552SDavid Hildenbrand 	 * When force-unloading the driver and removing the device, we
2757ebf71552SDavid Hildenbrand 	 * could have a garbage pointer. Duplicate the string.
2758ebf71552SDavid Hildenbrand 	 */
2759ebf71552SDavid Hildenbrand 	const char *name = kstrdup(dev_name(&vm->vdev->dev), GFP_KERNEL);
2760ebf71552SDavid Hildenbrand 
2761ebf71552SDavid Hildenbrand 	if (!name)
2762ebf71552SDavid Hildenbrand 		return -ENOMEM;
2763ebf71552SDavid Hildenbrand 
27642128f4e2SDavid Hildenbrand 	/* Disallow mapping device memory via /dev/mem completely. */
2765ebf71552SDavid Hildenbrand 	vm->parent_resource = __request_mem_region(vm->addr, vm->region_size,
27662128f4e2SDavid Hildenbrand 						   name, IORESOURCE_SYSTEM_RAM |
27672128f4e2SDavid Hildenbrand 						   IORESOURCE_EXCLUSIVE);
2768ebf71552SDavid Hildenbrand 	if (!vm->parent_resource) {
2769ebf71552SDavid Hildenbrand 		kfree(name);
2770ebf71552SDavid Hildenbrand 		dev_warn(&vm->vdev->dev, "could not reserve device region\n");
27713c42e198SDavid Hildenbrand 		dev_info(&vm->vdev->dev,
27723c42e198SDavid Hildenbrand 			 "reloading the driver is not supported\n");
2773ebf71552SDavid Hildenbrand 		return -EBUSY;
2774ebf71552SDavid Hildenbrand 	}
2775ebf71552SDavid Hildenbrand 
2776ebf71552SDavid Hildenbrand 	/* The memory is not actually busy - make add_memory() work. */
2777ebf71552SDavid Hildenbrand 	vm->parent_resource->flags &= ~IORESOURCE_BUSY;
2778ebf71552SDavid Hildenbrand 	return 0;
2779ebf71552SDavid Hildenbrand }
2780ebf71552SDavid Hildenbrand 
virtio_mem_delete_resource(struct virtio_mem * vm)2781ebf71552SDavid Hildenbrand static void virtio_mem_delete_resource(struct virtio_mem *vm)
2782ebf71552SDavid Hildenbrand {
2783ebf71552SDavid Hildenbrand 	const char *name;
2784ebf71552SDavid Hildenbrand 
2785ebf71552SDavid Hildenbrand 	if (!vm->parent_resource)
2786ebf71552SDavid Hildenbrand 		return;
2787ebf71552SDavid Hildenbrand 
2788ebf71552SDavid Hildenbrand 	name = vm->parent_resource->name;
2789ebf71552SDavid Hildenbrand 	release_resource(vm->parent_resource);
2790ebf71552SDavid Hildenbrand 	kfree(vm->parent_resource);
2791ebf71552SDavid Hildenbrand 	kfree(name);
2792ebf71552SDavid Hildenbrand 	vm->parent_resource = NULL;
2793ebf71552SDavid Hildenbrand }
2794ebf71552SDavid Hildenbrand 
virtio_mem_range_has_system_ram(struct resource * res,void * arg)2795989ff825SDavid Hildenbrand static int virtio_mem_range_has_system_ram(struct resource *res, void *arg)
2796989ff825SDavid Hildenbrand {
2797989ff825SDavid Hildenbrand 	return 1;
2798989ff825SDavid Hildenbrand }
2799989ff825SDavid Hildenbrand 
virtio_mem_has_memory_added(struct virtio_mem * vm)2800989ff825SDavid Hildenbrand static bool virtio_mem_has_memory_added(struct virtio_mem *vm)
2801989ff825SDavid Hildenbrand {
2802989ff825SDavid Hildenbrand 	const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
2803989ff825SDavid Hildenbrand 
2804989ff825SDavid Hildenbrand 	return walk_iomem_res_desc(IORES_DESC_NONE, flags, vm->addr,
2805989ff825SDavid Hildenbrand 				   vm->addr + vm->region_size, NULL,
2806989ff825SDavid Hildenbrand 				   virtio_mem_range_has_system_ram) == 1;
2807989ff825SDavid Hildenbrand }
2808989ff825SDavid Hildenbrand 
virtio_mem_probe(struct virtio_device * vdev)28095f1f79bbSDavid Hildenbrand static int virtio_mem_probe(struct virtio_device *vdev)
28105f1f79bbSDavid Hildenbrand {
28115f1f79bbSDavid Hildenbrand 	struct virtio_mem *vm;
2812b3fb6de7SMichael S. Tsirkin 	int rc;
28135f1f79bbSDavid Hildenbrand 
2814fce8afd7SDavid Hildenbrand 	BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24);
2815fce8afd7SDavid Hildenbrand 	BUILD_BUG_ON(sizeof(struct virtio_mem_resp) != 10);
2816fce8afd7SDavid Hildenbrand 
28175f1f79bbSDavid Hildenbrand 	vdev->priv = vm = kzalloc(sizeof(*vm), GFP_KERNEL);
28185f1f79bbSDavid Hildenbrand 	if (!vm)
28195f1f79bbSDavid Hildenbrand 		return -ENOMEM;
28205f1f79bbSDavid Hildenbrand 
28215f1f79bbSDavid Hildenbrand 	init_waitqueue_head(&vm->host_resp);
28225f1f79bbSDavid Hildenbrand 	vm->vdev = vdev;
28235f1f79bbSDavid Hildenbrand 	INIT_WORK(&vm->wq, virtio_mem_run_wq);
28245f1f79bbSDavid Hildenbrand 	mutex_init(&vm->hotplug_mutex);
28255f1f79bbSDavid Hildenbrand 	INIT_LIST_HEAD(&vm->next);
28265f1f79bbSDavid Hildenbrand 	spin_lock_init(&vm->removal_lock);
28275f1f79bbSDavid Hildenbrand 	hrtimer_init(&vm->retry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
28285f1f79bbSDavid Hildenbrand 	vm->retry_timer.function = virtio_mem_timer_expired;
282923e77b5dSDavid Hildenbrand 	vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS;
2830ce281462SDavid Hildenbrand 	vm->in_kdump = is_kdump_kernel();
28315f1f79bbSDavid Hildenbrand 
28325f1f79bbSDavid Hildenbrand 	/* register the virtqueue */
28335f1f79bbSDavid Hildenbrand 	rc = virtio_mem_init_vq(vm);
28345f1f79bbSDavid Hildenbrand 	if (rc)
28355f1f79bbSDavid Hildenbrand 		goto out_free_vm;
28365f1f79bbSDavid Hildenbrand 
28375f1f79bbSDavid Hildenbrand 	/* initialize the device by querying the config */
28385f1f79bbSDavid Hildenbrand 	rc = virtio_mem_init(vm);
28395f1f79bbSDavid Hildenbrand 	if (rc)
28405f1f79bbSDavid Hildenbrand 		goto out_del_vq;
28415f1f79bbSDavid Hildenbrand 
28425f1f79bbSDavid Hildenbrand 	virtio_device_ready(vdev);
28435f1f79bbSDavid Hildenbrand 
28445f1f79bbSDavid Hildenbrand 	/* trigger a config update to start processing the requested_size */
2845ce281462SDavid Hildenbrand 	if (!vm->in_kdump) {
28465f1f79bbSDavid Hildenbrand 		atomic_set(&vm->config_changed, 1);
28475f1f79bbSDavid Hildenbrand 		queue_work(system_freezable_wq, &vm->wq);
2848ce281462SDavid Hildenbrand 	}
28495f1f79bbSDavid Hildenbrand 
28505f1f79bbSDavid Hildenbrand 	return 0;
28515f1f79bbSDavid Hildenbrand out_del_vq:
28525f1f79bbSDavid Hildenbrand 	vdev->config->del_vqs(vdev);
28535f1f79bbSDavid Hildenbrand out_free_vm:
28545f1f79bbSDavid Hildenbrand 	kfree(vm);
28555f1f79bbSDavid Hildenbrand 	vdev->priv = NULL;
28565f1f79bbSDavid Hildenbrand 
28575f1f79bbSDavid Hildenbrand 	return rc;
28585f1f79bbSDavid Hildenbrand }
28595f1f79bbSDavid Hildenbrand 
virtio_mem_deinit_hotplug(struct virtio_mem * vm)2860ffc763d0SDavid Hildenbrand static void virtio_mem_deinit_hotplug(struct virtio_mem *vm)
28615f1f79bbSDavid Hildenbrand {
28625f1f79bbSDavid Hildenbrand 	unsigned long mb_id;
28635f1f79bbSDavid Hildenbrand 	int rc;
28645f1f79bbSDavid Hildenbrand 
28655f1f79bbSDavid Hildenbrand 	/*
28665f1f79bbSDavid Hildenbrand 	 * Make sure the workqueue won't be triggered anymore and no memory
28675f1f79bbSDavid Hildenbrand 	 * blocks can be onlined/offlined until we're finished here.
28685f1f79bbSDavid Hildenbrand 	 */
28695f1f79bbSDavid Hildenbrand 	mutex_lock(&vm->hotplug_mutex);
28705f1f79bbSDavid Hildenbrand 	spin_lock_irq(&vm->removal_lock);
28715f1f79bbSDavid Hildenbrand 	vm->removing = true;
28725f1f79bbSDavid Hildenbrand 	spin_unlock_irq(&vm->removal_lock);
28735f1f79bbSDavid Hildenbrand 	mutex_unlock(&vm->hotplug_mutex);
28745f1f79bbSDavid Hildenbrand 
28755f1f79bbSDavid Hildenbrand 	/* wait until the workqueue stopped */
28765f1f79bbSDavid Hildenbrand 	cancel_work_sync(&vm->wq);
28775f1f79bbSDavid Hildenbrand 	hrtimer_cancel(&vm->retry_timer);
28785f1f79bbSDavid Hildenbrand 
28794ba50cd3SDavid Hildenbrand 	if (vm->in_sbm) {
28805f1f79bbSDavid Hildenbrand 		/*
28814ba50cd3SDavid Hildenbrand 		 * After we unregistered our callbacks, user space can online
28824ba50cd3SDavid Hildenbrand 		 * partially plugged offline blocks. Make sure to remove them.
28835f1f79bbSDavid Hildenbrand 		 */
288499f0b55eSDavid Hildenbrand 		virtio_mem_sbm_for_each_mb(vm, mb_id,
288599f0b55eSDavid Hildenbrand 					   VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) {
288601afdee2SDavid Hildenbrand 			rc = virtio_mem_sbm_remove_mb(vm, mb_id);
28875f1f79bbSDavid Hildenbrand 			BUG_ON(rc);
288899f0b55eSDavid Hildenbrand 			virtio_mem_sbm_set_mb_state(vm, mb_id,
288999f0b55eSDavid Hildenbrand 						    VIRTIO_MEM_SBM_MB_UNUSED);
28905f1f79bbSDavid Hildenbrand 		}
28918e5c921cSDavid Hildenbrand 		/*
28928e5c921cSDavid Hildenbrand 		 * After we unregistered our callbacks, user space can no longer
28934ba50cd3SDavid Hildenbrand 		 * offline partially plugged online memory blocks. No need to
28944ba50cd3SDavid Hildenbrand 		 * worry about them.
28958e5c921cSDavid Hildenbrand 		 */
28964ba50cd3SDavid Hildenbrand 	}
28975f1f79bbSDavid Hildenbrand 
28985f1f79bbSDavid Hildenbrand 	/* unregister callbacks */
28995f1f79bbSDavid Hildenbrand 	unregister_virtio_mem_device(vm);
29005f1f79bbSDavid Hildenbrand 	unregister_memory_notifier(&vm->memory_notifier);
29015f1f79bbSDavid Hildenbrand 
29025f1f79bbSDavid Hildenbrand 	/*
29035f1f79bbSDavid Hildenbrand 	 * There is no way we could reliably remove all memory we have added to
29045f1f79bbSDavid Hildenbrand 	 * the system. And there is no way to stop the driver/device from going
29055f1f79bbSDavid Hildenbrand 	 * away. Warn at least.
29065f1f79bbSDavid Hildenbrand 	 */
2907989ff825SDavid Hildenbrand 	if (virtio_mem_has_memory_added(vm)) {
2908ffc763d0SDavid Hildenbrand 		dev_warn(&vm->vdev->dev,
2909ffc763d0SDavid Hildenbrand 			 "device still has system memory added\n");
2910b3562c60SDavid Hildenbrand 	} else {
2911ebf71552SDavid Hildenbrand 		virtio_mem_delete_resource(vm);
2912b3562c60SDavid Hildenbrand 		kfree_const(vm->resource_name);
2913ffaa6ce8SDavid Hildenbrand 		memory_group_unregister(vm->mgid);
2914b3562c60SDavid Hildenbrand 	}
29155f1f79bbSDavid Hildenbrand 
29165f1f79bbSDavid Hildenbrand 	/* remove all tracking data - no locking needed */
29174ba50cd3SDavid Hildenbrand 	if (vm->in_sbm) {
291899f0b55eSDavid Hildenbrand 		vfree(vm->sbm.mb_states);
291954c6a6baSDavid Hildenbrand 		vfree(vm->sbm.sb_states);
29204ba50cd3SDavid Hildenbrand 	} else {
29214ba50cd3SDavid Hildenbrand 		vfree(vm->bbm.bb_states);
29224ba50cd3SDavid Hildenbrand 	}
2923ffc763d0SDavid Hildenbrand }
2924ffc763d0SDavid Hildenbrand 
virtio_mem_deinit_kdump(struct virtio_mem * vm)2925ce281462SDavid Hildenbrand static void virtio_mem_deinit_kdump(struct virtio_mem *vm)
2926ce281462SDavid Hildenbrand {
2927ce281462SDavid Hildenbrand #ifdef CONFIG_PROC_VMCORE
2928ce281462SDavid Hildenbrand 	unregister_vmcore_cb(&vm->vmcore_cb);
2929ce281462SDavid Hildenbrand #endif /* CONFIG_PROC_VMCORE */
2930ce281462SDavid Hildenbrand }
2931ce281462SDavid Hildenbrand 
virtio_mem_remove(struct virtio_device * vdev)2932ffc763d0SDavid Hildenbrand static void virtio_mem_remove(struct virtio_device *vdev)
2933ffc763d0SDavid Hildenbrand {
2934ffc763d0SDavid Hildenbrand 	struct virtio_mem *vm = vdev->priv;
2935ffc763d0SDavid Hildenbrand 
2936ce281462SDavid Hildenbrand 	if (vm->in_kdump)
2937ce281462SDavid Hildenbrand 		virtio_mem_deinit_kdump(vm);
2938ce281462SDavid Hildenbrand 	else
2939ffc763d0SDavid Hildenbrand 		virtio_mem_deinit_hotplug(vm);
29405f1f79bbSDavid Hildenbrand 
29415f1f79bbSDavid Hildenbrand 	/* reset the device and cleanup the queues */
2942d9679d00SMichael S. Tsirkin 	virtio_reset_device(vdev);
29435f1f79bbSDavid Hildenbrand 	vdev->config->del_vqs(vdev);
29445f1f79bbSDavid Hildenbrand 
29455f1f79bbSDavid Hildenbrand 	kfree(vm);
29465f1f79bbSDavid Hildenbrand 	vdev->priv = NULL;
29475f1f79bbSDavid Hildenbrand }
29485f1f79bbSDavid Hildenbrand 
virtio_mem_config_changed(struct virtio_device * vdev)29495f1f79bbSDavid Hildenbrand static void virtio_mem_config_changed(struct virtio_device *vdev)
29505f1f79bbSDavid Hildenbrand {
29515f1f79bbSDavid Hildenbrand 	struct virtio_mem *vm = vdev->priv;
29525f1f79bbSDavid Hildenbrand 
2953ce281462SDavid Hildenbrand 	if (unlikely(vm->in_kdump))
2954ce281462SDavid Hildenbrand 		return;
2955ce281462SDavid Hildenbrand 
29565f1f79bbSDavid Hildenbrand 	atomic_set(&vm->config_changed, 1);
29575f1f79bbSDavid Hildenbrand 	virtio_mem_retry(vm);
29585f1f79bbSDavid Hildenbrand }
29595f1f79bbSDavid Hildenbrand 
29605f1f79bbSDavid Hildenbrand #ifdef CONFIG_PM_SLEEP
virtio_mem_freeze(struct virtio_device * vdev)29615f1f79bbSDavid Hildenbrand static int virtio_mem_freeze(struct virtio_device *vdev)
29625f1f79bbSDavid Hildenbrand {
29635f1f79bbSDavid Hildenbrand 	/*
29645f1f79bbSDavid Hildenbrand 	 * When restarting the VM, all memory is usually unplugged. Don't
29655f1f79bbSDavid Hildenbrand 	 * allow to suspend/hibernate.
29665f1f79bbSDavid Hildenbrand 	 */
29675f1f79bbSDavid Hildenbrand 	dev_err(&vdev->dev, "save/restore not supported.\n");
29685f1f79bbSDavid Hildenbrand 	return -EPERM;
29695f1f79bbSDavid Hildenbrand }
29705f1f79bbSDavid Hildenbrand 
virtio_mem_restore(struct virtio_device * vdev)29715f1f79bbSDavid Hildenbrand static int virtio_mem_restore(struct virtio_device *vdev)
29725f1f79bbSDavid Hildenbrand {
29735f1f79bbSDavid Hildenbrand 	return -EPERM;
29745f1f79bbSDavid Hildenbrand }
29755f1f79bbSDavid Hildenbrand #endif
29765f1f79bbSDavid Hildenbrand 
2977f2af6d39SDavid Hildenbrand static unsigned int virtio_mem_features[] = {
2978f2af6d39SDavid Hildenbrand #if defined(CONFIG_NUMA) && defined(CONFIG_ACPI_NUMA)
2979f2af6d39SDavid Hildenbrand 	VIRTIO_MEM_F_ACPI_PXM,
2980f2af6d39SDavid Hildenbrand #endif
298161082ad6SDavid Hildenbrand 	VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE,
2982f2af6d39SDavid Hildenbrand };
2983f2af6d39SDavid Hildenbrand 
29847ab4de60SRikard Falkeborn static const struct virtio_device_id virtio_mem_id_table[] = {
29855f1f79bbSDavid Hildenbrand 	{ VIRTIO_ID_MEM, VIRTIO_DEV_ANY_ID },
29865f1f79bbSDavid Hildenbrand 	{ 0 },
29875f1f79bbSDavid Hildenbrand };
29885f1f79bbSDavid Hildenbrand 
29895f1f79bbSDavid Hildenbrand static struct virtio_driver virtio_mem_driver = {
2990f2af6d39SDavid Hildenbrand 	.feature_table = virtio_mem_features,
2991f2af6d39SDavid Hildenbrand 	.feature_table_size = ARRAY_SIZE(virtio_mem_features),
29925f1f79bbSDavid Hildenbrand 	.driver.name = KBUILD_MODNAME,
29935f1f79bbSDavid Hildenbrand 	.driver.owner = THIS_MODULE,
29945f1f79bbSDavid Hildenbrand 	.id_table = virtio_mem_id_table,
29955f1f79bbSDavid Hildenbrand 	.probe = virtio_mem_probe,
29965f1f79bbSDavid Hildenbrand 	.remove = virtio_mem_remove,
29975f1f79bbSDavid Hildenbrand 	.config_changed = virtio_mem_config_changed,
29985f1f79bbSDavid Hildenbrand #ifdef CONFIG_PM_SLEEP
29995f1f79bbSDavid Hildenbrand 	.freeze	=	virtio_mem_freeze,
30005f1f79bbSDavid Hildenbrand 	.restore =	virtio_mem_restore,
30015f1f79bbSDavid Hildenbrand #endif
30025f1f79bbSDavid Hildenbrand };
30035f1f79bbSDavid Hildenbrand 
30045f1f79bbSDavid Hildenbrand module_virtio_driver(virtio_mem_driver);
30055f1f79bbSDavid Hildenbrand MODULE_DEVICE_TABLE(virtio, virtio_mem_id_table);
30065f1f79bbSDavid Hildenbrand MODULE_AUTHOR("David Hildenbrand <david@redhat.com>");
30075f1f79bbSDavid Hildenbrand MODULE_DESCRIPTION("Virtio-mem driver");
30085f1f79bbSDavid Hildenbrand MODULE_LICENSE("GPL");
3009