xref: /openbmc/linux/drivers/hv/hv_balloon.c (revision ca79522c)
1 /*
2  * Copyright (c) 2012, Microsoft Corporation.
3  *
4  * Author:
5  *   K. Y. Srinivasan <kys@microsoft.com>
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License version 2 as published
9  * by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14  * NON INFRINGEMENT.  See the GNU General Public License for more
15  * details.
16  *
17  */
18 
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 
21 #include <linux/kernel.h>
22 #include <linux/mman.h>
23 #include <linux/delay.h>
24 #include <linux/init.h>
25 #include <linux/module.h>
26 #include <linux/slab.h>
27 #include <linux/kthread.h>
28 #include <linux/completion.h>
29 #include <linux/memory_hotplug.h>
30 #include <linux/memory.h>
31 #include <linux/notifier.h>
32 #include <linux/percpu_counter.h>
33 
34 #include <linux/hyperv.h>
35 
36 /*
37  * We begin with definitions supporting the Dynamic Memory protocol
38  * with the host.
39  *
40  * Begin protocol definitions.
41  */
42 
43 
44 
45 /*
46  * Protocol versions. The low word is the minor version, the high word the major
47  * version.
48  *
49  * History:
50  * Initial version 1.0
51  * Changed to 0.1 on 2009/03/25
52  * Changes to 0.2 on 2009/05/14
53  * Changes to 0.3 on 2009/12/03
54  * Changed to 1.0 on 2011/04/05
55  */
56 
57 #define DYNMEM_MAKE_VERSION(Major, Minor) ((__u32)(((Major) << 16) | (Minor)))
58 #define DYNMEM_MAJOR_VERSION(Version) ((__u32)(Version) >> 16)
59 #define DYNMEM_MINOR_VERSION(Version) ((__u32)(Version) & 0xff)
60 
61 enum {
62 	DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3),
63 	DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0),
64 
65 	DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1,
66 	DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2,
67 
68 	DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN8
69 };
70 
71 
72 
73 /*
74  * Message Types
75  */
76 
77 enum dm_message_type {
78 	/*
79 	 * Version 0.3
80 	 */
81 	DM_ERROR			= 0,
82 	DM_VERSION_REQUEST		= 1,
83 	DM_VERSION_RESPONSE		= 2,
84 	DM_CAPABILITIES_REPORT		= 3,
85 	DM_CAPABILITIES_RESPONSE	= 4,
86 	DM_STATUS_REPORT		= 5,
87 	DM_BALLOON_REQUEST		= 6,
88 	DM_BALLOON_RESPONSE		= 7,
89 	DM_UNBALLOON_REQUEST		= 8,
90 	DM_UNBALLOON_RESPONSE		= 9,
91 	DM_MEM_HOT_ADD_REQUEST		= 10,
92 	DM_MEM_HOT_ADD_RESPONSE		= 11,
93 	DM_VERSION_03_MAX		= 11,
94 	/*
95 	 * Version 1.0.
96 	 */
97 	DM_INFO_MESSAGE			= 12,
98 	DM_VERSION_1_MAX		= 12
99 };
100 
101 
102 /*
103  * Structures defining the dynamic memory management
104  * protocol.
105  */
106 
107 union dm_version {
108 	struct {
109 		__u16 minor_version;
110 		__u16 major_version;
111 	};
112 	__u32 version;
113 } __packed;
114 
115 
116 union dm_caps {
117 	struct {
118 		__u64 balloon:1;
119 		__u64 hot_add:1;
120 		/*
121 		 * To support guests that may have alignment
122 		 * limitations on hot-add, the guest can specify
123 		 * its alignment requirements; a value of n
124 		 * represents an alignment of 2^n in mega bytes.
125 		 */
126 		__u64 hot_add_alignment:4;
127 		__u64 reservedz:58;
128 	} cap_bits;
129 	__u64 caps;
130 } __packed;
131 
132 union dm_mem_page_range {
133 	struct  {
134 		/*
135 		 * The PFN number of the first page in the range.
136 		 * 40 bits is the architectural limit of a PFN
137 		 * number for AMD64.
138 		 */
139 		__u64 start_page:40;
140 		/*
141 		 * The number of pages in the range.
142 		 */
143 		__u64 page_cnt:24;
144 	} finfo;
145 	__u64  page_range;
146 } __packed;
147 
148 
149 
150 /*
151  * The header for all dynamic memory messages:
152  *
153  * type: Type of the message.
154  * size: Size of the message in bytes; including the header.
155  * trans_id: The guest is responsible for manufacturing this ID.
156  */
157 
158 struct dm_header {
159 	__u16 type;
160 	__u16 size;
161 	__u32 trans_id;
162 } __packed;
163 
164 /*
165  * A generic message format for dynamic memory.
166  * Specific message formats are defined later in the file.
167  */
168 
169 struct dm_message {
170 	struct dm_header hdr;
171 	__u8 data[]; /* enclosed message */
172 } __packed;
173 
174 
175 /*
176  * Specific message types supporting the dynamic memory protocol.
177  */
178 
179 /*
180  * Version negotiation message. Sent from the guest to the host.
181  * The guest is free to try different versions until the host
182  * accepts the version.
183  *
184  * dm_version: The protocol version requested.
185  * is_last_attempt: If TRUE, this is the last version guest will request.
186  * reservedz: Reserved field, set to zero.
187  */
188 
189 struct dm_version_request {
190 	struct dm_header hdr;
191 	union dm_version version;
192 	__u32 is_last_attempt:1;
193 	__u32 reservedz:31;
194 } __packed;
195 
196 /*
197  * Version response message; Host to Guest and indicates
198  * if the host has accepted the version sent by the guest.
199  *
200  * is_accepted: If TRUE, host has accepted the version and the guest
201  * should proceed to the next stage of the protocol. FALSE indicates that
202  * guest should re-try with a different version.
203  *
204  * reservedz: Reserved field, set to zero.
205  */
206 
207 struct dm_version_response {
208 	struct dm_header hdr;
209 	__u64 is_accepted:1;
210 	__u64 reservedz:63;
211 } __packed;
212 
213 /*
214  * Message reporting capabilities. This is sent from the guest to the
215  * host.
216  */
217 
218 struct dm_capabilities {
219 	struct dm_header hdr;
220 	union dm_caps caps;
221 	__u64 min_page_cnt;
222 	__u64 max_page_number;
223 } __packed;
224 
225 /*
226  * Response to the capabilities message. This is sent from the host to the
227  * guest. This message notifies if the host has accepted the guest's
228  * capabilities. If the host has not accepted, the guest must shutdown
229  * the service.
230  *
231  * is_accepted: Indicates if the host has accepted guest's capabilities.
232  * reservedz: Must be 0.
233  */
234 
235 struct dm_capabilities_resp_msg {
236 	struct dm_header hdr;
237 	__u64 is_accepted:1;
238 	__u64 reservedz:63;
239 } __packed;
240 
241 /*
242  * This message is used to report memory pressure from the guest.
243  * This message is not part of any transaction and there is no
244  * response to this message.
245  *
246  * num_avail: Available memory in pages.
247  * num_committed: Committed memory in pages.
248  * page_file_size: The accumulated size of all page files
249  *		   in the system in pages.
250  * zero_free: The nunber of zero and free pages.
251  * page_file_writes: The writes to the page file in pages.
252  * io_diff: An indicator of file cache efficiency or page file activity,
253  *	    calculated as File Cache Page Fault Count - Page Read Count.
254  *	    This value is in pages.
255  *
256  * Some of these metrics are Windows specific and fortunately
257  * the algorithm on the host side that computes the guest memory
258  * pressure only uses num_committed value.
259  */
260 
261 struct dm_status {
262 	struct dm_header hdr;
263 	__u64 num_avail;
264 	__u64 num_committed;
265 	__u64 page_file_size;
266 	__u64 zero_free;
267 	__u32 page_file_writes;
268 	__u32 io_diff;
269 } __packed;
270 
271 
272 /*
273  * Message to ask the guest to allocate memory - balloon up message.
274  * This message is sent from the host to the guest. The guest may not be
275  * able to allocate as much memory as requested.
276  *
277  * num_pages: number of pages to allocate.
278  */
279 
280 struct dm_balloon {
281 	struct dm_header hdr;
282 	__u32 num_pages;
283 	__u32 reservedz;
284 } __packed;
285 
286 
287 /*
288  * Balloon response message; this message is sent from the guest
289  * to the host in response to the balloon message.
290  *
291  * reservedz: Reserved; must be set to zero.
292  * more_pages: If FALSE, this is the last message of the transaction.
293  * if TRUE there will atleast one more message from the guest.
294  *
295  * range_count: The number of ranges in the range array.
296  *
297  * range_array: An array of page ranges returned to the host.
298  *
299  */
300 
301 struct dm_balloon_response {
302 	struct dm_header hdr;
303 	__u32 reservedz;
304 	__u32 more_pages:1;
305 	__u32 range_count:31;
306 	union dm_mem_page_range range_array[];
307 } __packed;
308 
309 /*
310  * Un-balloon message; this message is sent from the host
311  * to the guest to give guest more memory.
312  *
313  * more_pages: If FALSE, this is the last message of the transaction.
314  * if TRUE there will atleast one more message from the guest.
315  *
316  * reservedz: Reserved; must be set to zero.
317  *
318  * range_count: The number of ranges in the range array.
319  *
320  * range_array: An array of page ranges returned to the host.
321  *
322  */
323 
324 struct dm_unballoon_request {
325 	struct dm_header hdr;
326 	__u32 more_pages:1;
327 	__u32 reservedz:31;
328 	__u32 range_count;
329 	union dm_mem_page_range range_array[];
330 } __packed;
331 
332 /*
333  * Un-balloon response message; this message is sent from the guest
334  * to the host in response to an unballoon request.
335  *
336  */
337 
338 struct dm_unballoon_response {
339 	struct dm_header hdr;
340 } __packed;
341 
342 
343 /*
344  * Hot add request message. Message sent from the host to the guest.
345  *
346  * mem_range: Memory range to hot add.
347  *
348  * On Linux we currently don't support this since we cannot hot add
349  * arbitrary granularity of memory.
350  */
351 
352 struct dm_hot_add {
353 	struct dm_header hdr;
354 	union dm_mem_page_range range;
355 } __packed;
356 
357 /*
358  * Hot add response message.
359  * This message is sent by the guest to report the status of a hot add request.
360  * If page_count is less than the requested page count, then the host should
361  * assume all further hot add requests will fail, since this indicates that
362  * the guest has hit an upper physical memory barrier.
363  *
364  * Hot adds may also fail due to low resources; in this case, the guest must
365  * not complete this message until the hot add can succeed, and the host must
366  * not send a new hot add request until the response is sent.
367  * If VSC fails to hot add memory DYNMEM_NUMBER_OF_UNSUCCESSFUL_HOTADD_ATTEMPTS
368  * times it fails the request.
369  *
370  *
371  * page_count: number of pages that were successfully hot added.
372  *
373  * result: result of the operation 1: success, 0: failure.
374  *
375  */
376 
377 struct dm_hot_add_response {
378 	struct dm_header hdr;
379 	__u32 page_count;
380 	__u32 result;
381 } __packed;
382 
383 /*
384  * Types of information sent from host to the guest.
385  */
386 
387 enum dm_info_type {
388 	INFO_TYPE_MAX_PAGE_CNT = 0,
389 	MAX_INFO_TYPE
390 };
391 
392 
393 /*
394  * Header for the information message.
395  */
396 
397 struct dm_info_header {
398 	enum dm_info_type type;
399 	__u32 data_size;
400 } __packed;
401 
402 /*
403  * This message is sent from the host to the guest to pass
404  * some relevant information (win8 addition).
405  *
406  * reserved: no used.
407  * info_size: size of the information blob.
408  * info: information blob.
409  */
410 
411 struct dm_info_msg {
412 	struct dm_header hdr;
413 	__u32 reserved;
414 	__u32 info_size;
415 	__u8  info[];
416 };
417 
418 /*
419  * End protocol definitions.
420  */
421 
422 /*
423  * State to manage hot adding memory into the guest.
424  * The range start_pfn : end_pfn specifies the range
425  * that the host has asked us to hot add. The range
426  * start_pfn : ha_end_pfn specifies the range that we have
427  * currently hot added. We hot add in multiples of 128M
428  * chunks; it is possible that we may not be able to bring
429  * online all the pages in the region. The range
430  * covered_start_pfn : covered_end_pfn defines the pages that can
431  * be brough online.
432  */
433 
434 struct hv_hotadd_state {
435 	struct list_head list;
436 	unsigned long start_pfn;
437 	unsigned long covered_start_pfn;
438 	unsigned long covered_end_pfn;
439 	unsigned long ha_end_pfn;
440 	unsigned long end_pfn;
441 };
442 
443 struct balloon_state {
444 	__u32 num_pages;
445 	struct work_struct wrk;
446 };
447 
448 struct hot_add_wrk {
449 	union dm_mem_page_range ha_page_range;
450 	union dm_mem_page_range ha_region_range;
451 	struct work_struct wrk;
452 };
453 
454 static bool hot_add = true;
455 static bool do_hot_add;
456 /*
457  * Delay reporting memory pressure by
458  * the specified number of seconds.
459  */
460 static uint pressure_report_delay = 45;
461 
462 module_param(hot_add, bool, (S_IRUGO | S_IWUSR));
463 MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add");
464 
465 module_param(pressure_report_delay, uint, (S_IRUGO | S_IWUSR));
466 MODULE_PARM_DESC(pressure_report_delay, "Delay in secs in reporting pressure");
467 static atomic_t trans_id = ATOMIC_INIT(0);
468 
469 static int dm_ring_size = (5 * PAGE_SIZE);
470 
471 /*
472  * Driver specific state.
473  */
474 
475 enum hv_dm_state {
476 	DM_INITIALIZING = 0,
477 	DM_INITIALIZED,
478 	DM_BALLOON_UP,
479 	DM_BALLOON_DOWN,
480 	DM_HOT_ADD,
481 	DM_INIT_ERROR
482 };
483 
484 
485 static __u8 recv_buffer[PAGE_SIZE];
486 static __u8 *send_buffer;
487 #define PAGES_IN_2M	512
488 #define HA_CHUNK (32 * 1024)
489 
490 struct hv_dynmem_device {
491 	struct hv_device *dev;
492 	enum hv_dm_state state;
493 	struct completion host_event;
494 	struct completion config_event;
495 
496 	/*
497 	 * Number of pages we have currently ballooned out.
498 	 */
499 	unsigned int num_pages_ballooned;
500 
501 	/*
502 	 * State to manage the ballooning (up) operation.
503 	 */
504 	struct balloon_state balloon_wrk;
505 
506 	/*
507 	 * State to execute the "hot-add" operation.
508 	 */
509 	struct hot_add_wrk ha_wrk;
510 
511 	/*
512 	 * This state tracks if the host has specified a hot-add
513 	 * region.
514 	 */
515 	bool host_specified_ha_region;
516 
517 	/*
518 	 * State to synchronize hot-add.
519 	 */
520 	struct completion  ol_waitevent;
521 	bool ha_waiting;
522 	/*
523 	 * This thread handles hot-add
524 	 * requests from the host as well as notifying
525 	 * the host with regards to memory pressure in
526 	 * the guest.
527 	 */
528 	struct task_struct *thread;
529 
530 	/*
531 	 * A list of hot-add regions.
532 	 */
533 	struct list_head ha_region_list;
534 
535 	/*
536 	 * We start with the highest version we can support
537 	 * and downgrade based on the host; we save here the
538 	 * next version to try.
539 	 */
540 	__u32 next_version;
541 };
542 
543 static struct hv_dynmem_device dm_device;
544 
545 #ifdef CONFIG_MEMORY_HOTPLUG
546 
547 static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size)
548 {
549 	int i;
550 
551 	for (i = 0; i < size; i++) {
552 		struct page *pg;
553 		pg = pfn_to_page(start_pfn + i);
554 		__online_page_set_limits(pg);
555 		__online_page_increment_counters(pg);
556 		__online_page_free(pg);
557 	}
558 }
559 
560 static void hv_mem_hot_add(unsigned long start, unsigned long size,
561 				unsigned long pfn_count,
562 				struct hv_hotadd_state *has)
563 {
564 	int ret = 0;
565 	int i, nid, t;
566 	unsigned long start_pfn;
567 	unsigned long processed_pfn;
568 	unsigned long total_pfn = pfn_count;
569 
570 	for (i = 0; i < (size/HA_CHUNK); i++) {
571 		start_pfn = start + (i * HA_CHUNK);
572 		has->ha_end_pfn +=  HA_CHUNK;
573 
574 		if (total_pfn > HA_CHUNK) {
575 			processed_pfn = HA_CHUNK;
576 			total_pfn -= HA_CHUNK;
577 		} else {
578 			processed_pfn = total_pfn;
579 			total_pfn = 0;
580 		}
581 
582 		has->covered_end_pfn +=  processed_pfn;
583 
584 		init_completion(&dm_device.ol_waitevent);
585 		dm_device.ha_waiting = true;
586 
587 		nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
588 		ret = add_memory(nid, PFN_PHYS((start_pfn)),
589 				(HA_CHUNK << PAGE_SHIFT));
590 
591 		if (ret) {
592 			pr_info("hot_add memory failed error is %d\n", ret);
593 			if (ret == -EEXIST) {
594 				/*
595 				 * This error indicates that the error
596 				 * is not a transient failure. This is the
597 				 * case where the guest's physical address map
598 				 * precludes hot adding memory. Stop all further
599 				 * memory hot-add.
600 				 */
601 				do_hot_add = false;
602 			}
603 			has->ha_end_pfn -= HA_CHUNK;
604 			has->covered_end_pfn -=  processed_pfn;
605 			break;
606 		}
607 
608 		/*
609 		 * Wait for the memory block to be onlined.
610 		 */
611 		t = wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
612 		if (t == 0) {
613 			pr_info("hot_add memory timedout\n");
614 			has->ha_end_pfn -= HA_CHUNK;
615 			has->covered_end_pfn -=  processed_pfn;
616 			break;
617 		}
618 
619 	}
620 
621 	return;
622 }
623 
624 static void hv_online_page(struct page *pg)
625 {
626 	struct list_head *cur;
627 	struct hv_hotadd_state *has;
628 	unsigned long cur_start_pgp;
629 	unsigned long cur_end_pgp;
630 
631 	if (dm_device.ha_waiting) {
632 		dm_device.ha_waiting = false;
633 		complete(&dm_device.ol_waitevent);
634 	}
635 
636 	list_for_each(cur, &dm_device.ha_region_list) {
637 		has = list_entry(cur, struct hv_hotadd_state, list);
638 		cur_start_pgp = (unsigned long)
639 				pfn_to_page(has->covered_start_pfn);
640 		cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
641 
642 		if (((unsigned long)pg >= cur_start_pgp) &&
643 			((unsigned long)pg < cur_end_pgp)) {
644 			/*
645 			 * This frame is currently backed; online the
646 			 * page.
647 			 */
648 			__online_page_set_limits(pg);
649 			__online_page_increment_counters(pg);
650 			__online_page_free(pg);
651 			has->covered_start_pfn++;
652 		}
653 	}
654 }
655 
656 static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
657 {
658 	struct list_head *cur;
659 	struct hv_hotadd_state *has;
660 	unsigned long residual, new_inc;
661 
662 	if (list_empty(&dm_device.ha_region_list))
663 		return false;
664 
665 	list_for_each(cur, &dm_device.ha_region_list) {
666 		has = list_entry(cur, struct hv_hotadd_state, list);
667 
668 		/*
669 		 * If the pfn range we are dealing with is not in the current
670 		 * "hot add block", move on.
671 		 */
672 		if ((start_pfn >= has->end_pfn))
673 			continue;
674 		/*
675 		 * If the current hot add-request extends beyond
676 		 * our current limit; extend it.
677 		 */
678 		if ((start_pfn + pfn_cnt) > has->end_pfn) {
679 			residual = (start_pfn + pfn_cnt - has->end_pfn);
680 			/*
681 			 * Extend the region by multiples of HA_CHUNK.
682 			 */
683 			new_inc = (residual / HA_CHUNK) * HA_CHUNK;
684 			if (residual % HA_CHUNK)
685 				new_inc += HA_CHUNK;
686 
687 			has->end_pfn += new_inc;
688 		}
689 
690 		/*
691 		 * If the current start pfn is not where the covered_end
692 		 * is, update it.
693 		 */
694 
695 		if (has->covered_end_pfn != start_pfn) {
696 			has->covered_end_pfn = start_pfn;
697 			has->covered_start_pfn = start_pfn;
698 		}
699 		return true;
700 
701 	}
702 
703 	return false;
704 }
705 
706 static unsigned long handle_pg_range(unsigned long pg_start,
707 					unsigned long pg_count)
708 {
709 	unsigned long start_pfn = pg_start;
710 	unsigned long pfn_cnt = pg_count;
711 	unsigned long size;
712 	struct list_head *cur;
713 	struct hv_hotadd_state *has;
714 	unsigned long pgs_ol = 0;
715 	unsigned long old_covered_state;
716 
717 	if (list_empty(&dm_device.ha_region_list))
718 		return 0;
719 
720 	list_for_each(cur, &dm_device.ha_region_list) {
721 		has = list_entry(cur, struct hv_hotadd_state, list);
722 
723 		/*
724 		 * If the pfn range we are dealing with is not in the current
725 		 * "hot add block", move on.
726 		 */
727 		if ((start_pfn >= has->end_pfn))
728 			continue;
729 
730 		old_covered_state = has->covered_end_pfn;
731 
732 		if (start_pfn < has->ha_end_pfn) {
733 			/*
734 			 * This is the case where we are backing pages
735 			 * in an already hot added region. Bring
736 			 * these pages online first.
737 			 */
738 			pgs_ol = has->ha_end_pfn - start_pfn;
739 			if (pgs_ol > pfn_cnt)
740 				pgs_ol = pfn_cnt;
741 			hv_bring_pgs_online(start_pfn, pgs_ol);
742 			has->covered_end_pfn +=  pgs_ol;
743 			has->covered_start_pfn +=  pgs_ol;
744 			pfn_cnt -= pgs_ol;
745 		}
746 
747 		if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) {
748 			/*
749 			 * We have some residual hot add range
750 			 * that needs to be hot added; hot add
751 			 * it now. Hot add a multiple of
752 			 * of HA_CHUNK that fully covers the pages
753 			 * we have.
754 			 */
755 			size = (has->end_pfn - has->ha_end_pfn);
756 			if (pfn_cnt <= size) {
757 				size = ((pfn_cnt / HA_CHUNK) * HA_CHUNK);
758 				if (pfn_cnt % HA_CHUNK)
759 					size += HA_CHUNK;
760 			} else {
761 				pfn_cnt = size;
762 			}
763 			hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has);
764 		}
765 		/*
766 		 * If we managed to online any pages that were given to us,
767 		 * we declare success.
768 		 */
769 		return has->covered_end_pfn - old_covered_state;
770 
771 	}
772 
773 	return 0;
774 }
775 
776 static unsigned long process_hot_add(unsigned long pg_start,
777 					unsigned long pfn_cnt,
778 					unsigned long rg_start,
779 					unsigned long rg_size)
780 {
781 	struct hv_hotadd_state *ha_region = NULL;
782 
783 	if (pfn_cnt == 0)
784 		return 0;
785 
786 	if (!dm_device.host_specified_ha_region)
787 		if (pfn_covered(pg_start, pfn_cnt))
788 			goto do_pg_range;
789 
790 	/*
791 	 * If the host has specified a hot-add range; deal with it first.
792 	 */
793 
794 	if (rg_size != 0) {
795 		ha_region = kzalloc(sizeof(struct hv_hotadd_state), GFP_KERNEL);
796 		if (!ha_region)
797 			return 0;
798 
799 		INIT_LIST_HEAD(&ha_region->list);
800 
801 		list_add_tail(&ha_region->list, &dm_device.ha_region_list);
802 		ha_region->start_pfn = rg_start;
803 		ha_region->ha_end_pfn = rg_start;
804 		ha_region->covered_start_pfn = pg_start;
805 		ha_region->covered_end_pfn = pg_start;
806 		ha_region->end_pfn = rg_start + rg_size;
807 	}
808 
809 do_pg_range:
810 	/*
811 	 * Process the page range specified; bringing them
812 	 * online if possible.
813 	 */
814 	return handle_pg_range(pg_start, pfn_cnt);
815 }
816 
817 #endif
818 
819 static void hot_add_req(struct work_struct *dummy)
820 {
821 	struct dm_hot_add_response resp;
822 #ifdef CONFIG_MEMORY_HOTPLUG
823 	unsigned long pg_start, pfn_cnt;
824 	unsigned long rg_start, rg_sz;
825 #endif
826 	struct hv_dynmem_device *dm = &dm_device;
827 
828 	memset(&resp, 0, sizeof(struct dm_hot_add_response));
829 	resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE;
830 	resp.hdr.size = sizeof(struct dm_hot_add_response);
831 	resp.hdr.trans_id = atomic_inc_return(&trans_id);
832 
833 #ifdef CONFIG_MEMORY_HOTPLUG
834 	pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
835 	pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
836 
837 	rg_start = dm->ha_wrk.ha_region_range.finfo.start_page;
838 	rg_sz = dm->ha_wrk.ha_region_range.finfo.page_cnt;
839 
840 	if ((rg_start == 0) && (!dm->host_specified_ha_region)) {
841 		unsigned long region_size;
842 		unsigned long region_start;
843 
844 		/*
845 		 * The host has not specified the hot-add region.
846 		 * Based on the hot-add page range being specified,
847 		 * compute a hot-add region that can cover the pages
848 		 * that need to be hot-added while ensuring the alignment
849 		 * and size requirements of Linux as it relates to hot-add.
850 		 */
851 		region_start = pg_start;
852 		region_size = (pfn_cnt / HA_CHUNK) * HA_CHUNK;
853 		if (pfn_cnt % HA_CHUNK)
854 			region_size += HA_CHUNK;
855 
856 		region_start = (pg_start / HA_CHUNK) * HA_CHUNK;
857 
858 		rg_start = region_start;
859 		rg_sz = region_size;
860 	}
861 
862 	if (do_hot_add)
863 		resp.page_count = process_hot_add(pg_start, pfn_cnt,
864 						rg_start, rg_sz);
865 #endif
866 	/*
867 	 * The result field of the response structure has the
868 	 * following semantics:
869 	 *
870 	 * 1. If all or some pages hot-added: Guest should return success.
871 	 *
872 	 * 2. If no pages could be hot-added:
873 	 *
874 	 * If the guest returns success, then the host
875 	 * will not attempt any further hot-add operations. This
876 	 * signifies a permanent failure.
877 	 *
878 	 * If the guest returns failure, then this failure will be
879 	 * treated as a transient failure and the host may retry the
880 	 * hot-add operation after some delay.
881 	 */
882 	if (resp.page_count > 0)
883 		resp.result = 1;
884 	else if (!do_hot_add)
885 		resp.result = 1;
886 	else
887 		resp.result = 0;
888 
889 	if (!do_hot_add || (resp.page_count == 0))
890 		pr_info("Memory hot add failed\n");
891 
892 	dm->state = DM_INITIALIZED;
893 	vmbus_sendpacket(dm->dev->channel, &resp,
894 			sizeof(struct dm_hot_add_response),
895 			(unsigned long)NULL,
896 			VM_PKT_DATA_INBAND, 0);
897 }
898 
899 static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
900 {
901 	struct dm_info_header *info_hdr;
902 
903 	info_hdr = (struct dm_info_header *)msg->info;
904 
905 	switch (info_hdr->type) {
906 	case INFO_TYPE_MAX_PAGE_CNT:
907 		pr_info("Received INFO_TYPE_MAX_PAGE_CNT\n");
908 		pr_info("Data Size is %d\n", info_hdr->data_size);
909 		break;
910 	default:
911 		pr_info("Received Unknown type: %d\n", info_hdr->type);
912 	}
913 }
914 
915 static unsigned long compute_balloon_floor(void)
916 {
917 	unsigned long min_pages;
918 #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
919 	/* Simple continuous piecewiese linear function:
920 	 *  max MiB -> min MiB  gradient
921 	 *       0         0
922 	 *      16        16
923 	 *      32        24
924 	 *     128        72    (1/2)
925 	 *     512       168    (1/4)
926 	 *    2048       360    (1/8)
927 	 *    8192       552    (1/32)
928 	 *   32768      1320
929 	 *  131072      4392
930 	 */
931 	if (totalram_pages < MB2PAGES(128))
932 		min_pages = MB2PAGES(8) + (totalram_pages >> 1);
933 	else if (totalram_pages < MB2PAGES(512))
934 		min_pages = MB2PAGES(40) + (totalram_pages >> 2);
935 	else if (totalram_pages < MB2PAGES(2048))
936 		min_pages = MB2PAGES(104) + (totalram_pages >> 3);
937 	else
938 		min_pages = MB2PAGES(296) + (totalram_pages >> 5);
939 #undef MB2PAGES
940 	return min_pages;
941 }
942 
943 /*
944  * Post our status as it relates memory pressure to the
945  * host. Host expects the guests to post this status
946  * periodically at 1 second intervals.
947  *
948  * The metrics specified in this protocol are very Windows
949  * specific and so we cook up numbers here to convey our memory
950  * pressure.
951  */
952 
953 static void post_status(struct hv_dynmem_device *dm)
954 {
955 	struct dm_status status;
956 	struct sysinfo val;
957 
958 	if (pressure_report_delay > 0) {
959 		--pressure_report_delay;
960 		return;
961 	}
962 	si_meminfo(&val);
963 	memset(&status, 0, sizeof(struct dm_status));
964 	status.hdr.type = DM_STATUS_REPORT;
965 	status.hdr.size = sizeof(struct dm_status);
966 	status.hdr.trans_id = atomic_inc_return(&trans_id);
967 
968 	/*
969 	 * The host expects the guest to report free memory.
970 	 * Further, the host expects the pressure information to
971 	 * include the ballooned out pages.
972 	 * For a given amount of memory that we are managing, we
973 	 * need to compute a floor below which we should not balloon.
974 	 * Compute this and add it to the pressure report.
975 	 */
976 	status.num_avail = val.freeram;
977 	status.num_committed = vm_memory_committed() +
978 				dm->num_pages_ballooned +
979 				compute_balloon_floor();
980 
981 	vmbus_sendpacket(dm->dev->channel, &status,
982 				sizeof(struct dm_status),
983 				(unsigned long)NULL,
984 				VM_PKT_DATA_INBAND, 0);
985 
986 }
987 
988 static void free_balloon_pages(struct hv_dynmem_device *dm,
989 			 union dm_mem_page_range *range_array)
990 {
991 	int num_pages = range_array->finfo.page_cnt;
992 	__u64 start_frame = range_array->finfo.start_page;
993 	struct page *pg;
994 	int i;
995 
996 	for (i = 0; i < num_pages; i++) {
997 		pg = pfn_to_page(i + start_frame);
998 		__free_page(pg);
999 		dm->num_pages_ballooned--;
1000 	}
1001 }
1002 
1003 
1004 
1005 static int  alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
1006 			 struct dm_balloon_response *bl_resp, int alloc_unit,
1007 			 bool *alloc_error)
1008 {
1009 	int i = 0;
1010 	struct page *pg;
1011 
1012 	if (num_pages < alloc_unit)
1013 		return 0;
1014 
1015 	for (i = 0; (i * alloc_unit) < num_pages; i++) {
1016 		if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) >
1017 			PAGE_SIZE)
1018 			return i * alloc_unit;
1019 
1020 		/*
1021 		 * We execute this code in a thread context. Furthermore,
1022 		 * we don't want the kernel to try too hard.
1023 		 */
1024 		pg = alloc_pages(GFP_HIGHUSER | __GFP_NORETRY |
1025 				__GFP_NOMEMALLOC | __GFP_NOWARN,
1026 				get_order(alloc_unit << PAGE_SHIFT));
1027 
1028 		if (!pg) {
1029 			*alloc_error = true;
1030 			return i * alloc_unit;
1031 		}
1032 
1033 
1034 		dm->num_pages_ballooned += alloc_unit;
1035 
1036 		/*
1037 		 * If we allocatted 2M pages; split them so we
1038 		 * can free them in any order we get.
1039 		 */
1040 
1041 		if (alloc_unit != 1)
1042 			split_page(pg, get_order(alloc_unit << PAGE_SHIFT));
1043 
1044 		bl_resp->range_count++;
1045 		bl_resp->range_array[i].finfo.start_page =
1046 			page_to_pfn(pg);
1047 		bl_resp->range_array[i].finfo.page_cnt = alloc_unit;
1048 		bl_resp->hdr.size += sizeof(union dm_mem_page_range);
1049 
1050 	}
1051 
1052 	return num_pages;
1053 }
1054 
1055 
1056 
1057 static void balloon_up(struct work_struct *dummy)
1058 {
1059 	int num_pages = dm_device.balloon_wrk.num_pages;
1060 	int num_ballooned = 0;
1061 	struct dm_balloon_response *bl_resp;
1062 	int alloc_unit;
1063 	int ret;
1064 	bool alloc_error = false;
1065 	bool done = false;
1066 	int i;
1067 
1068 
1069 	/*
1070 	 * We will attempt 2M allocations. However, if we fail to
1071 	 * allocate 2M chunks, we will go back to 4k allocations.
1072 	 */
1073 	alloc_unit = 512;
1074 
1075 	while (!done) {
1076 		bl_resp = (struct dm_balloon_response *)send_buffer;
1077 		memset(send_buffer, 0, PAGE_SIZE);
1078 		bl_resp->hdr.type = DM_BALLOON_RESPONSE;
1079 		bl_resp->hdr.trans_id = atomic_inc_return(&trans_id);
1080 		bl_resp->hdr.size = sizeof(struct dm_balloon_response);
1081 		bl_resp->more_pages = 1;
1082 
1083 
1084 		num_pages -= num_ballooned;
1085 		num_ballooned = alloc_balloon_pages(&dm_device, num_pages,
1086 						bl_resp, alloc_unit,
1087 						 &alloc_error);
1088 
1089 		if ((alloc_error) && (alloc_unit != 1)) {
1090 			alloc_unit = 1;
1091 			continue;
1092 		}
1093 
1094 		if ((alloc_error) || (num_ballooned == num_pages)) {
1095 			bl_resp->more_pages = 0;
1096 			done = true;
1097 			dm_device.state = DM_INITIALIZED;
1098 		}
1099 
1100 		/*
1101 		 * We are pushing a lot of data through the channel;
1102 		 * deal with transient failures caused because of the
1103 		 * lack of space in the ring buffer.
1104 		 */
1105 
1106 		do {
1107 			ret = vmbus_sendpacket(dm_device.dev->channel,
1108 						bl_resp,
1109 						bl_resp->hdr.size,
1110 						(unsigned long)NULL,
1111 						VM_PKT_DATA_INBAND, 0);
1112 
1113 			if (ret == -EAGAIN)
1114 				msleep(20);
1115 
1116 		} while (ret == -EAGAIN);
1117 
1118 		if (ret) {
1119 			/*
1120 			 * Free up the memory we allocatted.
1121 			 */
1122 			pr_info("Balloon response failed\n");
1123 
1124 			for (i = 0; i < bl_resp->range_count; i++)
1125 				free_balloon_pages(&dm_device,
1126 						 &bl_resp->range_array[i]);
1127 
1128 			done = true;
1129 		}
1130 	}
1131 
1132 }
1133 
1134 static void balloon_down(struct hv_dynmem_device *dm,
1135 			struct dm_unballoon_request *req)
1136 {
1137 	union dm_mem_page_range *range_array = req->range_array;
1138 	int range_count = req->range_count;
1139 	struct dm_unballoon_response resp;
1140 	int i;
1141 
1142 	for (i = 0; i < range_count; i++)
1143 		free_balloon_pages(dm, &range_array[i]);
1144 
1145 	if (req->more_pages == 1)
1146 		return;
1147 
1148 	memset(&resp, 0, sizeof(struct dm_unballoon_response));
1149 	resp.hdr.type = DM_UNBALLOON_RESPONSE;
1150 	resp.hdr.trans_id = atomic_inc_return(&trans_id);
1151 	resp.hdr.size = sizeof(struct dm_unballoon_response);
1152 
1153 	vmbus_sendpacket(dm_device.dev->channel, &resp,
1154 				sizeof(struct dm_unballoon_response),
1155 				(unsigned long)NULL,
1156 				VM_PKT_DATA_INBAND, 0);
1157 
1158 	dm->state = DM_INITIALIZED;
1159 }
1160 
1161 static void balloon_onchannelcallback(void *context);
1162 
1163 static int dm_thread_func(void *dm_dev)
1164 {
1165 	struct hv_dynmem_device *dm = dm_dev;
1166 	int t;
1167 
1168 	while (!kthread_should_stop()) {
1169 		t = wait_for_completion_timeout(&dm_device.config_event, 1*HZ);
1170 		/*
1171 		 * The host expects us to post information on the memory
1172 		 * pressure every second.
1173 		 */
1174 
1175 		if (t == 0)
1176 			post_status(dm);
1177 
1178 	}
1179 
1180 	return 0;
1181 }
1182 
1183 
1184 static void version_resp(struct hv_dynmem_device *dm,
1185 			struct dm_version_response *vresp)
1186 {
1187 	struct dm_version_request version_req;
1188 	int ret;
1189 
1190 	if (vresp->is_accepted) {
1191 		/*
1192 		 * We are done; wakeup the
1193 		 * context waiting for version
1194 		 * negotiation.
1195 		 */
1196 		complete(&dm->host_event);
1197 		return;
1198 	}
1199 	/*
1200 	 * If there are more versions to try, continue
1201 	 * with negotiations; if not
1202 	 * shutdown the service since we are not able
1203 	 * to negotiate a suitable version number
1204 	 * with the host.
1205 	 */
1206 	if (dm->next_version == 0)
1207 		goto version_error;
1208 
1209 	dm->next_version = 0;
1210 	memset(&version_req, 0, sizeof(struct dm_version_request));
1211 	version_req.hdr.type = DM_VERSION_REQUEST;
1212 	version_req.hdr.size = sizeof(struct dm_version_request);
1213 	version_req.hdr.trans_id = atomic_inc_return(&trans_id);
1214 	version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN7;
1215 	version_req.is_last_attempt = 1;
1216 
1217 	ret = vmbus_sendpacket(dm->dev->channel, &version_req,
1218 				sizeof(struct dm_version_request),
1219 				(unsigned long)NULL,
1220 				VM_PKT_DATA_INBAND, 0);
1221 
1222 	if (ret)
1223 		goto version_error;
1224 
1225 	return;
1226 
1227 version_error:
1228 	dm->state = DM_INIT_ERROR;
1229 	complete(&dm->host_event);
1230 }
1231 
1232 static void cap_resp(struct hv_dynmem_device *dm,
1233 			struct dm_capabilities_resp_msg *cap_resp)
1234 {
1235 	if (!cap_resp->is_accepted) {
1236 		pr_info("Capabilities not accepted by host\n");
1237 		dm->state = DM_INIT_ERROR;
1238 	}
1239 	complete(&dm->host_event);
1240 }
1241 
1242 static void balloon_onchannelcallback(void *context)
1243 {
1244 	struct hv_device *dev = context;
1245 	u32 recvlen;
1246 	u64 requestid;
1247 	struct dm_message *dm_msg;
1248 	struct dm_header *dm_hdr;
1249 	struct hv_dynmem_device *dm = hv_get_drvdata(dev);
1250 	struct dm_balloon *bal_msg;
1251 	struct dm_hot_add *ha_msg;
1252 	union dm_mem_page_range *ha_pg_range;
1253 	union dm_mem_page_range *ha_region;
1254 
1255 	memset(recv_buffer, 0, sizeof(recv_buffer));
1256 	vmbus_recvpacket(dev->channel, recv_buffer,
1257 			 PAGE_SIZE, &recvlen, &requestid);
1258 
1259 	if (recvlen > 0) {
1260 		dm_msg = (struct dm_message *)recv_buffer;
1261 		dm_hdr = &dm_msg->hdr;
1262 
1263 		switch (dm_hdr->type) {
1264 		case DM_VERSION_RESPONSE:
1265 			version_resp(dm,
1266 				 (struct dm_version_response *)dm_msg);
1267 			break;
1268 
1269 		case DM_CAPABILITIES_RESPONSE:
1270 			cap_resp(dm,
1271 				 (struct dm_capabilities_resp_msg *)dm_msg);
1272 			break;
1273 
1274 		case DM_BALLOON_REQUEST:
1275 			if (dm->state == DM_BALLOON_UP)
1276 				pr_warn("Currently ballooning\n");
1277 			bal_msg = (struct dm_balloon *)recv_buffer;
1278 			dm->state = DM_BALLOON_UP;
1279 			dm_device.balloon_wrk.num_pages = bal_msg->num_pages;
1280 			schedule_work(&dm_device.balloon_wrk.wrk);
1281 			break;
1282 
1283 		case DM_UNBALLOON_REQUEST:
1284 			dm->state = DM_BALLOON_DOWN;
1285 			balloon_down(dm,
1286 				 (struct dm_unballoon_request *)recv_buffer);
1287 			break;
1288 
1289 		case DM_MEM_HOT_ADD_REQUEST:
1290 			if (dm->state == DM_HOT_ADD)
1291 				pr_warn("Currently hot-adding\n");
1292 			dm->state = DM_HOT_ADD;
1293 			ha_msg = (struct dm_hot_add *)recv_buffer;
1294 			if (ha_msg->hdr.size == sizeof(struct dm_hot_add)) {
1295 				/*
1296 				 * This is a normal hot-add request specifying
1297 				 * hot-add memory.
1298 				 */
1299 				ha_pg_range = &ha_msg->range;
1300 				dm->ha_wrk.ha_page_range = *ha_pg_range;
1301 				dm->ha_wrk.ha_region_range.page_range = 0;
1302 			} else {
1303 				/*
1304 				 * Host is specifying that we first hot-add
1305 				 * a region and then partially populate this
1306 				 * region.
1307 				 */
1308 				dm->host_specified_ha_region = true;
1309 				ha_pg_range = &ha_msg->range;
1310 				ha_region = &ha_pg_range[1];
1311 				dm->ha_wrk.ha_page_range = *ha_pg_range;
1312 				dm->ha_wrk.ha_region_range = *ha_region;
1313 			}
1314 			schedule_work(&dm_device.ha_wrk.wrk);
1315 			break;
1316 
1317 		case DM_INFO_MESSAGE:
1318 			process_info(dm, (struct dm_info_msg *)dm_msg);
1319 			break;
1320 
1321 		default:
1322 			pr_err("Unhandled message: type: %d\n", dm_hdr->type);
1323 
1324 		}
1325 	}
1326 
1327 }
1328 
1329 static int balloon_probe(struct hv_device *dev,
1330 			const struct hv_vmbus_device_id *dev_id)
1331 {
1332 	int ret, t;
1333 	struct dm_version_request version_req;
1334 	struct dm_capabilities cap_msg;
1335 
1336 	do_hot_add = hot_add;
1337 
1338 	/*
1339 	 * First allocate a send buffer.
1340 	 */
1341 
1342 	send_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
1343 	if (!send_buffer)
1344 		return -ENOMEM;
1345 
1346 	ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0,
1347 			balloon_onchannelcallback, dev);
1348 
1349 	if (ret)
1350 		goto probe_error0;
1351 
1352 	dm_device.dev = dev;
1353 	dm_device.state = DM_INITIALIZING;
1354 	dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
1355 	init_completion(&dm_device.host_event);
1356 	init_completion(&dm_device.config_event);
1357 	INIT_LIST_HEAD(&dm_device.ha_region_list);
1358 	INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
1359 	INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
1360 	dm_device.host_specified_ha_region = false;
1361 
1362 	dm_device.thread =
1363 		 kthread_run(dm_thread_func, &dm_device, "hv_balloon");
1364 	if (IS_ERR(dm_device.thread)) {
1365 		ret = PTR_ERR(dm_device.thread);
1366 		goto probe_error1;
1367 	}
1368 
1369 #ifdef CONFIG_MEMORY_HOTPLUG
1370 	set_online_page_callback(&hv_online_page);
1371 #endif
1372 
1373 	hv_set_drvdata(dev, &dm_device);
1374 	/*
1375 	 * Initiate the hand shake with the host and negotiate
1376 	 * a version that the host can support. We start with the
1377 	 * highest version number and go down if the host cannot
1378 	 * support it.
1379 	 */
1380 	memset(&version_req, 0, sizeof(struct dm_version_request));
1381 	version_req.hdr.type = DM_VERSION_REQUEST;
1382 	version_req.hdr.size = sizeof(struct dm_version_request);
1383 	version_req.hdr.trans_id = atomic_inc_return(&trans_id);
1384 	version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN8;
1385 	version_req.is_last_attempt = 0;
1386 
1387 	ret = vmbus_sendpacket(dev->channel, &version_req,
1388 				sizeof(struct dm_version_request),
1389 				(unsigned long)NULL,
1390 				VM_PKT_DATA_INBAND, 0);
1391 	if (ret)
1392 		goto probe_error2;
1393 
1394 	t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
1395 	if (t == 0) {
1396 		ret = -ETIMEDOUT;
1397 		goto probe_error2;
1398 	}
1399 
1400 	/*
1401 	 * If we could not negotiate a compatible version with the host
1402 	 * fail the probe function.
1403 	 */
1404 	if (dm_device.state == DM_INIT_ERROR) {
1405 		ret = -ETIMEDOUT;
1406 		goto probe_error2;
1407 	}
1408 	/*
1409 	 * Now submit our capabilities to the host.
1410 	 */
1411 	memset(&cap_msg, 0, sizeof(struct dm_capabilities));
1412 	cap_msg.hdr.type = DM_CAPABILITIES_REPORT;
1413 	cap_msg.hdr.size = sizeof(struct dm_capabilities);
1414 	cap_msg.hdr.trans_id = atomic_inc_return(&trans_id);
1415 
1416 	cap_msg.caps.cap_bits.balloon = 1;
1417 	cap_msg.caps.cap_bits.hot_add = 1;
1418 
1419 	/*
1420 	 * Specify our alignment requirements as it relates
1421 	 * memory hot-add. Specify 128MB alignment.
1422 	 */
1423 	cap_msg.caps.cap_bits.hot_add_alignment = 7;
1424 
1425 	/*
1426 	 * Currently the host does not use these
1427 	 * values and we set them to what is done in the
1428 	 * Windows driver.
1429 	 */
1430 	cap_msg.min_page_cnt = 0;
1431 	cap_msg.max_page_number = -1;
1432 
1433 	ret = vmbus_sendpacket(dev->channel, &cap_msg,
1434 				sizeof(struct dm_capabilities),
1435 				(unsigned long)NULL,
1436 				VM_PKT_DATA_INBAND, 0);
1437 	if (ret)
1438 		goto probe_error2;
1439 
1440 	t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
1441 	if (t == 0) {
1442 		ret = -ETIMEDOUT;
1443 		goto probe_error2;
1444 	}
1445 
1446 	/*
1447 	 * If the host does not like our capabilities,
1448 	 * fail the probe function.
1449 	 */
1450 	if (dm_device.state == DM_INIT_ERROR) {
1451 		ret = -ETIMEDOUT;
1452 		goto probe_error2;
1453 	}
1454 
1455 	dm_device.state = DM_INITIALIZED;
1456 
1457 	return 0;
1458 
1459 probe_error2:
1460 #ifdef CONFIG_MEMORY_HOTPLUG
1461 	restore_online_page_callback(&hv_online_page);
1462 #endif
1463 	kthread_stop(dm_device.thread);
1464 
1465 probe_error1:
1466 	vmbus_close(dev->channel);
1467 probe_error0:
1468 	kfree(send_buffer);
1469 	return ret;
1470 }
1471 
1472 static int balloon_remove(struct hv_device *dev)
1473 {
1474 	struct hv_dynmem_device *dm = hv_get_drvdata(dev);
1475 	struct list_head *cur, *tmp;
1476 	struct hv_hotadd_state *has;
1477 
1478 	if (dm->num_pages_ballooned != 0)
1479 		pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
1480 
1481 	cancel_work_sync(&dm->balloon_wrk.wrk);
1482 	cancel_work_sync(&dm->ha_wrk.wrk);
1483 
1484 	vmbus_close(dev->channel);
1485 	kthread_stop(dm->thread);
1486 	kfree(send_buffer);
1487 #ifdef CONFIG_MEMORY_HOTPLUG
1488 	restore_online_page_callback(&hv_online_page);
1489 #endif
1490 	list_for_each_safe(cur, tmp, &dm->ha_region_list) {
1491 		has = list_entry(cur, struct hv_hotadd_state, list);
1492 		list_del(&has->list);
1493 		kfree(has);
1494 	}
1495 
1496 	return 0;
1497 }
1498 
1499 static const struct hv_vmbus_device_id id_table[] = {
1500 	/* Dynamic Memory Class ID */
1501 	/* 525074DC-8985-46e2-8057-A307DC18A502 */
1502 	{ HV_DM_GUID, },
1503 	{ },
1504 };
1505 
1506 MODULE_DEVICE_TABLE(vmbus, id_table);
1507 
1508 static  struct hv_driver balloon_drv = {
1509 	.name = "hv_balloon",
1510 	.id_table = id_table,
1511 	.probe =  balloon_probe,
1512 	.remove =  balloon_remove,
1513 };
1514 
1515 static int __init init_balloon_drv(void)
1516 {
1517 
1518 	return vmbus_driver_register(&balloon_drv);
1519 }
1520 
1521 module_init(init_balloon_drv);
1522 
1523 MODULE_DESCRIPTION("Hyper-V Balloon");
1524 MODULE_VERSION(HV_DRV_VERSION);
1525 MODULE_LICENSE("GPL");
1526