1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * VMware Balloon driver. 4 * 5 * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved. 6 * 7 * This is VMware physical memory management driver for Linux. The driver 8 * acts like a "balloon" that can be inflated to reclaim physical pages by 9 * reserving them in the guest and invalidating them in the monitor, 10 * freeing up the underlying machine pages so they can be allocated to 11 * other guests. The balloon can also be deflated to allow the guest to 12 * use more physical memory. Higher level policies can control the sizes 13 * of balloons in VMs in order to manage physical memory resources. 14 */ 15 16 //#define DEBUG 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/mm.h> 22 #include <linux/vmalloc.h> 23 #include <linux/sched.h> 24 #include <linux/module.h> 25 #include <linux/workqueue.h> 26 #include <linux/debugfs.h> 27 #include <linux/seq_file.h> 28 #include <linux/rwsem.h> 29 #include <linux/slab.h> 30 #include <linux/spinlock.h> 31 #include <linux/vmw_vmci_defs.h> 32 #include <linux/vmw_vmci_api.h> 33 #include <asm/hypervisor.h> 34 35 MODULE_AUTHOR("VMware, Inc."); 36 MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); 37 MODULE_ALIAS("dmi:*:svnVMware*:*"); 38 MODULE_ALIAS("vmware_vmmemctl"); 39 MODULE_LICENSE("GPL"); 40 41 /* 42 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't allow wait 43 * (__GFP_RECLAIM) for huge page allocations. Use __GFP_NOWARN, to suppress page 44 * allocation failure warnings. Disallow access to emergency low-memory pools. 45 */ 46 #define VMW_HUGE_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \ 47 __GFP_NOMEMALLOC) 48 49 /* 50 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We allow lightweight 51 * reclamation (__GFP_NORETRY). Use __GFP_NOWARN, to suppress page allocation 52 * failure warnings. Disallow access to emergency low-memory pools. 53 */ 54 #define VMW_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \ 55 __GFP_NOMEMALLOC|__GFP_NORETRY) 56 57 /* Maximum number of refused pages we accumulate during inflation cycle */ 58 #define VMW_BALLOON_MAX_REFUSED 16 59 60 /* 61 * Hypervisor communication port definitions. 62 */ 63 #define VMW_BALLOON_HV_PORT 0x5670 64 #define VMW_BALLOON_HV_MAGIC 0x456c6d6f 65 #define VMW_BALLOON_GUEST_ID 1 /* Linux */ 66 67 enum vmwballoon_capabilities { 68 /* 69 * Bit 0 is reserved and not associated to any capability. 70 */ 71 VMW_BALLOON_BASIC_CMDS = (1 << 1), 72 VMW_BALLOON_BATCHED_CMDS = (1 << 2), 73 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3), 74 VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4), 75 VMW_BALLOON_64_BIT_TARGET = (1 << 5) 76 }; 77 78 #define VMW_BALLOON_CAPABILITIES_COMMON (VMW_BALLOON_BASIC_CMDS \ 79 | VMW_BALLOON_BATCHED_CMDS \ 80 | VMW_BALLOON_BATCHED_2M_CMDS \ 81 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD) 82 83 #define VMW_BALLOON_2M_ORDER (PMD_SHIFT - PAGE_SHIFT) 84 85 /* 86 * 64-bit targets are only supported in 64-bit 87 */ 88 #ifdef CONFIG_64BIT 89 #define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_CAPABILITIES_COMMON \ 90 | VMW_BALLOON_64_BIT_TARGET) 91 #else 92 #define VMW_BALLOON_CAPABILITIES VMW_BALLOON_CAPABILITIES_COMMON 93 #endif 94 95 enum vmballoon_page_size_type { 96 VMW_BALLOON_4K_PAGE, 97 VMW_BALLOON_2M_PAGE, 98 VMW_BALLOON_LAST_SIZE = VMW_BALLOON_2M_PAGE 99 }; 100 101 #define VMW_BALLOON_NUM_PAGE_SIZES (VMW_BALLOON_LAST_SIZE + 1) 102 103 static const char * const vmballoon_page_size_names[] = { 104 [VMW_BALLOON_4K_PAGE] = "4k", 105 [VMW_BALLOON_2M_PAGE] = "2M" 106 }; 107 108 enum vmballoon_op { 109 VMW_BALLOON_INFLATE, 110 VMW_BALLOON_DEFLATE 111 }; 112 113 enum vmballoon_op_stat_type { 114 VMW_BALLOON_OP_STAT, 115 VMW_BALLOON_OP_FAIL_STAT 116 }; 117 118 #define VMW_BALLOON_OP_STAT_TYPES (VMW_BALLOON_OP_FAIL_STAT + 1) 119 120 /** 121 * enum vmballoon_cmd_type - backdoor commands. 122 * 123 * Availability of the commands is as followed: 124 * 125 * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and 126 * %VMW_BALLOON_CMD_GUEST_ID are always available. 127 * 128 * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then 129 * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available. 130 * 131 * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then 132 * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands 133 * are available. 134 * 135 * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then 136 * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 137 * are supported. 138 * 139 * If the host reports VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then 140 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported. 141 * 142 * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor. 143 * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size. 144 * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page. 145 * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about 146 * to be deflated from the balloon. 147 * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that 148 * runs in the VM. 149 * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of 150 * ballooned pages (up to 512). 151 * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of 152 * pages that are about to be deflated from the 153 * balloon (up to 512). 154 * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK 155 * for 2MB pages. 156 * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to 157 * @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB 158 * pages. 159 * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification 160 * that would be invoked when the balloon 161 * size changes. 162 * @VMW_BALLOON_CMD_LAST: Value of the last command. 163 */ 164 enum vmballoon_cmd_type { 165 VMW_BALLOON_CMD_START, 166 VMW_BALLOON_CMD_GET_TARGET, 167 VMW_BALLOON_CMD_LOCK, 168 VMW_BALLOON_CMD_UNLOCK, 169 VMW_BALLOON_CMD_GUEST_ID, 170 /* No command 5 */ 171 VMW_BALLOON_CMD_BATCHED_LOCK = 6, 172 VMW_BALLOON_CMD_BATCHED_UNLOCK, 173 VMW_BALLOON_CMD_BATCHED_2M_LOCK, 174 VMW_BALLOON_CMD_BATCHED_2M_UNLOCK, 175 VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 176 VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 177 }; 178 179 #define VMW_BALLOON_CMD_NUM (VMW_BALLOON_CMD_LAST + 1) 180 181 enum vmballoon_error_codes { 182 VMW_BALLOON_SUCCESS, 183 VMW_BALLOON_ERROR_CMD_INVALID, 184 VMW_BALLOON_ERROR_PPN_INVALID, 185 VMW_BALLOON_ERROR_PPN_LOCKED, 186 VMW_BALLOON_ERROR_PPN_UNLOCKED, 187 VMW_BALLOON_ERROR_PPN_PINNED, 188 VMW_BALLOON_ERROR_PPN_NOTNEEDED, 189 VMW_BALLOON_ERROR_RESET, 190 VMW_BALLOON_ERROR_BUSY 191 }; 192 193 #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000) 194 195 #define VMW_BALLOON_CMD_WITH_TARGET_MASK \ 196 ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \ 197 (1UL << VMW_BALLOON_CMD_LOCK) | \ 198 (1UL << VMW_BALLOON_CMD_UNLOCK) | \ 199 (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \ 200 (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \ 201 (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \ 202 (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK)) 203 204 static const char * const vmballoon_cmd_names[] = { 205 [VMW_BALLOON_CMD_START] = "start", 206 [VMW_BALLOON_CMD_GET_TARGET] = "target", 207 [VMW_BALLOON_CMD_LOCK] = "lock", 208 [VMW_BALLOON_CMD_UNLOCK] = "unlock", 209 [VMW_BALLOON_CMD_GUEST_ID] = "guestType", 210 [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock", 211 [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock", 212 [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock", 213 [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock", 214 [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet" 215 }; 216 217 enum vmballoon_stat_page { 218 VMW_BALLOON_PAGE_STAT_ALLOC, 219 VMW_BALLOON_PAGE_STAT_ALLOC_FAIL, 220 VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC, 221 VMW_BALLOON_PAGE_STAT_REFUSED_FREE, 222 VMW_BALLOON_PAGE_STAT_FREE, 223 VMW_BALLOON_PAGE_STAT_LAST = VMW_BALLOON_PAGE_STAT_FREE 224 }; 225 226 #define VMW_BALLOON_PAGE_STAT_NUM (VMW_BALLOON_PAGE_STAT_LAST + 1) 227 228 enum vmballoon_stat_general { 229 VMW_BALLOON_STAT_TIMER, 230 VMW_BALLOON_STAT_DOORBELL, 231 VMW_BALLOON_STAT_RESET, 232 VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_RESET 233 }; 234 235 #define VMW_BALLOON_STAT_NUM (VMW_BALLOON_STAT_LAST + 1) 236 237 238 static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching); 239 static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled); 240 241 struct vmballoon_ctl { 242 struct list_head pages; 243 struct list_head refused_pages; 244 unsigned int n_refused_pages; 245 unsigned int n_pages; 246 enum vmballoon_page_size_type page_size; 247 enum vmballoon_op op; 248 }; 249 250 struct vmballoon_page_size { 251 /* list of reserved physical pages */ 252 struct list_head pages; 253 }; 254 255 /** 256 * struct vmballoon_batch_entry - a batch entry for lock or unlock. 257 * 258 * @status: the status of the operation, which is written by the hypervisor. 259 * @reserved: reserved for future use. Must be set to zero. 260 * @pfn: the physical frame number of the page to be locked or unlocked. 261 */ 262 struct vmballoon_batch_entry { 263 u64 status : 5; 264 u64 reserved : PAGE_SHIFT - 5; 265 u64 pfn : 52; 266 } __packed; 267 268 struct vmballoon { 269 struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES]; 270 271 /** 272 * @max_page_size: maximum supported page size for ballooning. 273 * 274 * Protected by @conf_sem 275 */ 276 enum vmballoon_page_size_type max_page_size; 277 278 /** 279 * @size: balloon actual size in basic page size (frames). 280 * 281 * While we currently do not support size which is bigger than 32-bit, 282 * in preparation for future support, use 64-bits. 283 */ 284 atomic64_t size; 285 286 /** 287 * @target: balloon target size in basic page size (frames). 288 * 289 * We do not protect the target under the assumption that setting the 290 * value is always done through a single write. If this assumption ever 291 * breaks, we would have to use X_ONCE for accesses, and suffer the less 292 * optimized code. Although we may read stale target value if multiple 293 * accesses happen at once, the performance impact should be minor. 294 */ 295 unsigned long target; 296 297 /** 298 * @reset_required: reset flag 299 * 300 * Setting this flag may introduce races, but the code is expected to 301 * handle them gracefully. In the worst case, another operation will 302 * fail as reset did not take place. Clearing the flag is done while 303 * holding @conf_sem for write. 304 */ 305 bool reset_required; 306 307 /** 308 * @capabilities: hypervisor balloon capabilities. 309 * 310 * Protected by @conf_sem. 311 */ 312 unsigned long capabilities; 313 314 /** 315 * @batch_page: pointer to communication batch page. 316 * 317 * When batching is used, batch_page points to a page, which holds up to 318 * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking. 319 */ 320 struct vmballoon_batch_entry *batch_page; 321 322 /** 323 * @batch_max_pages: maximum pages that can be locked/unlocked. 324 * 325 * Indicates the number of pages that the hypervisor can lock or unlock 326 * at once, according to whether batching is enabled. If batching is 327 * disabled, only a single page can be locked/unlock on each operation. 328 * 329 * Protected by @conf_sem. 330 */ 331 unsigned int batch_max_pages; 332 333 /** 334 * @page: page to be locked/unlocked by the hypervisor 335 * 336 * @page is only used when batching is disabled and a single page is 337 * reclaimed on each iteration. 338 * 339 * Protected by @comm_lock. 340 */ 341 struct page *page; 342 343 /* statistics */ 344 struct vmballoon_stats *stats; 345 346 #ifdef CONFIG_DEBUG_FS 347 /* debugfs file exporting statistics */ 348 struct dentry *dbg_entry; 349 #endif 350 351 struct delayed_work dwork; 352 353 /** 354 * @vmci_doorbell. 355 * 356 * Protected by @conf_sem. 357 */ 358 struct vmci_handle vmci_doorbell; 359 360 /** 361 * @conf_sem: semaphore to protect the configuration and the statistics. 362 */ 363 struct rw_semaphore conf_sem; 364 365 /** 366 * @comm_lock: lock to protect the communication with the host. 367 * 368 * Lock ordering: @conf_sem -> @comm_lock . 369 */ 370 spinlock_t comm_lock; 371 }; 372 373 static struct vmballoon balloon; 374 375 struct vmballoon_stats { 376 /* timer / doorbell operations */ 377 atomic64_t general_stat[VMW_BALLOON_STAT_NUM]; 378 379 /* allocation statistics for huge and small pages */ 380 atomic64_t 381 page_stat[VMW_BALLOON_PAGE_STAT_NUM][VMW_BALLOON_NUM_PAGE_SIZES]; 382 383 /* Monitor operations: total operations, and failures */ 384 atomic64_t ops[VMW_BALLOON_CMD_NUM][VMW_BALLOON_OP_STAT_TYPES]; 385 }; 386 387 static inline bool is_vmballoon_stats_on(void) 388 { 389 return IS_ENABLED(CONFIG_DEBUG_FS) && 390 static_branch_unlikely(&balloon_stat_enabled); 391 } 392 393 static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op, 394 enum vmballoon_op_stat_type type) 395 { 396 if (is_vmballoon_stats_on()) 397 atomic64_inc(&b->stats->ops[op][type]); 398 } 399 400 static inline void vmballoon_stats_gen_inc(struct vmballoon *b, 401 enum vmballoon_stat_general stat) 402 { 403 if (is_vmballoon_stats_on()) 404 atomic64_inc(&b->stats->general_stat[stat]); 405 } 406 407 static inline void vmballoon_stats_gen_add(struct vmballoon *b, 408 enum vmballoon_stat_general stat, 409 unsigned int val) 410 { 411 if (is_vmballoon_stats_on()) 412 atomic64_add(val, &b->stats->general_stat[stat]); 413 } 414 415 static inline void vmballoon_stats_page_inc(struct vmballoon *b, 416 enum vmballoon_stat_page stat, 417 enum vmballoon_page_size_type size) 418 { 419 if (is_vmballoon_stats_on()) 420 atomic64_inc(&b->stats->page_stat[stat][size]); 421 } 422 423 static inline void vmballoon_stats_page_add(struct vmballoon *b, 424 enum vmballoon_stat_page stat, 425 enum vmballoon_page_size_type size, 426 unsigned int val) 427 { 428 if (is_vmballoon_stats_on()) 429 atomic64_add(val, &b->stats->page_stat[stat][size]); 430 } 431 432 static inline unsigned long 433 __vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1, 434 unsigned long arg2, unsigned long *result) 435 { 436 unsigned long status, dummy1, dummy2, dummy3, local_result; 437 438 vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_STAT); 439 440 asm volatile ("inl %%dx" : 441 "=a"(status), 442 "=c"(dummy1), 443 "=d"(dummy2), 444 "=b"(local_result), 445 "=S"(dummy3) : 446 "0"(VMW_BALLOON_HV_MAGIC), 447 "1"(cmd), 448 "2"(VMW_BALLOON_HV_PORT), 449 "3"(arg1), 450 "4"(arg2) : 451 "memory"); 452 453 /* update the result if needed */ 454 if (result) 455 *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 : 456 local_result; 457 458 /* update target when applicable */ 459 if (status == VMW_BALLOON_SUCCESS && 460 ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK)) 461 WRITE_ONCE(b->target, local_result); 462 463 if (status != VMW_BALLOON_SUCCESS && 464 status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) { 465 vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_FAIL_STAT); 466 pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n", 467 __func__, vmballoon_cmd_names[cmd], arg1, arg2, 468 status); 469 } 470 471 /* mark reset required accordingly */ 472 if (status == VMW_BALLOON_ERROR_RESET) 473 b->reset_required = true; 474 475 return status; 476 } 477 478 static __always_inline unsigned long 479 vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1, 480 unsigned long arg2) 481 { 482 unsigned long dummy; 483 484 return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy); 485 } 486 487 /* 488 * Send "start" command to the host, communicating supported version 489 * of the protocol. 490 */ 491 static int vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) 492 { 493 unsigned long status, capabilities; 494 495 status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0, 496 &capabilities); 497 498 switch (status) { 499 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES: 500 b->capabilities = capabilities; 501 break; 502 case VMW_BALLOON_SUCCESS: 503 b->capabilities = VMW_BALLOON_BASIC_CMDS; 504 break; 505 default: 506 return -EIO; 507 } 508 509 /* 510 * 2MB pages are only supported with batching. If batching is for some 511 * reason disabled, do not use 2MB pages, since otherwise the legacy 512 * mechanism is used with 2MB pages, causing a failure. 513 */ 514 b->max_page_size = VMW_BALLOON_4K_PAGE; 515 if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) && 516 (b->capabilities & VMW_BALLOON_BATCHED_CMDS)) 517 b->max_page_size = VMW_BALLOON_2M_PAGE; 518 519 520 return 0; 521 } 522 523 /** 524 * vmballoon_send_guest_id - communicate guest type to the host. 525 * 526 * @b: pointer to the balloon. 527 * 528 * Communicate guest type to the host so that it can adjust ballooning 529 * algorithm to the one most appropriate for the guest. This command 530 * is normally issued after sending "start" command and is part of 531 * standard reset sequence. 532 * 533 * Return: zero on success or appropriate error code. 534 */ 535 static int vmballoon_send_guest_id(struct vmballoon *b) 536 { 537 unsigned long status; 538 539 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID, 540 VMW_BALLOON_GUEST_ID, 0); 541 542 return status == VMW_BALLOON_SUCCESS ? 0 : -EIO; 543 } 544 545 /** 546 * vmballoon_page_order() - return the order of the page 547 * @page_size: the size of the page. 548 * 549 * Return: the allocation order. 550 */ 551 static inline 552 unsigned int vmballoon_page_order(enum vmballoon_page_size_type page_size) 553 { 554 return page_size == VMW_BALLOON_2M_PAGE ? VMW_BALLOON_2M_ORDER : 0; 555 } 556 557 /** 558 * vmballoon_page_in_frames() - returns the number of frames in a page. 559 * @page_size: the size of the page. 560 * 561 * Return: the number of 4k frames. 562 */ 563 static inline unsigned int 564 vmballoon_page_in_frames(enum vmballoon_page_size_type page_size) 565 { 566 return 1 << vmballoon_page_order(page_size); 567 } 568 569 /** 570 * vmballoon_mark_page_offline() - mark a page as offline 571 * @page: pointer for the page. 572 * @page_size: the size of the page. 573 */ 574 static void 575 vmballoon_mark_page_offline(struct page *page, 576 enum vmballoon_page_size_type page_size) 577 { 578 int i; 579 580 for (i = 0; i < vmballoon_page_in_frames(page_size); i++) 581 __SetPageOffline(page + i); 582 } 583 584 /** 585 * vmballoon_mark_page_online() - mark a page as online 586 * @page: pointer for the page. 587 * @page_size: the size of the page. 588 */ 589 static void 590 vmballoon_mark_page_online(struct page *page, 591 enum vmballoon_page_size_type page_size) 592 { 593 int i; 594 595 for (i = 0; i < vmballoon_page_in_frames(page_size); i++) 596 __ClearPageOffline(page + i); 597 } 598 599 /** 600 * vmballoon_send_get_target() - Retrieve desired balloon size from the host. 601 * 602 * @b: pointer to the balloon. 603 * 604 * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required 605 * by the host-guest protocol and EIO if an error occurred in communicating with 606 * the host. 607 */ 608 static int vmballoon_send_get_target(struct vmballoon *b) 609 { 610 unsigned long status; 611 unsigned long limit; 612 613 limit = totalram_pages(); 614 615 /* Ensure limit fits in 32-bits if 64-bit targets are not supported */ 616 if (!(b->capabilities & VMW_BALLOON_64_BIT_TARGET) && 617 limit != (u32)limit) 618 return -EINVAL; 619 620 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0); 621 622 return status == VMW_BALLOON_SUCCESS ? 0 : -EIO; 623 } 624 625 /** 626 * vmballoon_alloc_page_list - allocates a list of pages. 627 * 628 * @b: pointer to the balloon. 629 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation. 630 * @req_n_pages: the number of requested pages. 631 * 632 * Tries to allocate @req_n_pages. Add them to the list of balloon pages in 633 * @ctl.pages and updates @ctl.n_pages to reflect the number of pages. 634 * 635 * Return: zero on success or error code otherwise. 636 */ 637 static int vmballoon_alloc_page_list(struct vmballoon *b, 638 struct vmballoon_ctl *ctl, 639 unsigned int req_n_pages) 640 { 641 struct page *page; 642 unsigned int i; 643 644 for (i = 0; i < req_n_pages; i++) { 645 if (ctl->page_size == VMW_BALLOON_2M_PAGE) 646 page = alloc_pages(VMW_HUGE_PAGE_ALLOC_FLAGS, 647 VMW_BALLOON_2M_ORDER); 648 else 649 page = alloc_page(VMW_PAGE_ALLOC_FLAGS); 650 651 /* Update statistics */ 652 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC, 653 ctl->page_size); 654 655 if (page) { 656 vmballoon_mark_page_offline(page, ctl->page_size); 657 /* Success. Add the page to the list and continue. */ 658 list_add(&page->lru, &ctl->pages); 659 continue; 660 } 661 662 /* Allocation failed. Update statistics and stop. */ 663 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC_FAIL, 664 ctl->page_size); 665 break; 666 } 667 668 ctl->n_pages = i; 669 670 return req_n_pages == ctl->n_pages ? 0 : -ENOMEM; 671 } 672 673 /** 674 * vmballoon_handle_one_result - Handle lock/unlock result for a single page. 675 * 676 * @b: pointer for %struct vmballoon. 677 * @page: pointer for the page whose result should be handled. 678 * @page_size: size of the page. 679 * @status: status of the operation as provided by the hypervisor. 680 */ 681 static int vmballoon_handle_one_result(struct vmballoon *b, struct page *page, 682 enum vmballoon_page_size_type page_size, 683 unsigned long status) 684 { 685 /* On success do nothing. The page is already on the balloon list. */ 686 if (likely(status == VMW_BALLOON_SUCCESS)) 687 return 0; 688 689 pr_debug("%s: failed comm pfn %lx status %lu page_size %s\n", __func__, 690 page_to_pfn(page), status, 691 vmballoon_page_size_names[page_size]); 692 693 /* Error occurred */ 694 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC, 695 page_size); 696 697 return -EIO; 698 } 699 700 /** 701 * vmballoon_status_page - returns the status of (un)lock operation 702 * 703 * @b: pointer to the balloon. 704 * @idx: index for the page for which the operation is performed. 705 * @p: pointer to where the page struct is returned. 706 * 707 * Following a lock or unlock operation, returns the status of the operation for 708 * an individual page. Provides the page that the operation was performed on on 709 * the @page argument. 710 * 711 * Returns: The status of a lock or unlock operation for an individual page. 712 */ 713 static unsigned long vmballoon_status_page(struct vmballoon *b, int idx, 714 struct page **p) 715 { 716 if (static_branch_likely(&vmw_balloon_batching)) { 717 /* batching mode */ 718 *p = pfn_to_page(b->batch_page[idx].pfn); 719 return b->batch_page[idx].status; 720 } 721 722 /* non-batching mode */ 723 *p = b->page; 724 725 /* 726 * If a failure occurs, the indication will be provided in the status 727 * of the entire operation, which is considered before the individual 728 * page status. So for non-batching mode, the indication is always of 729 * success. 730 */ 731 return VMW_BALLOON_SUCCESS; 732 } 733 734 /** 735 * vmballoon_lock_op - notifies the host about inflated/deflated pages. 736 * @b: pointer to the balloon. 737 * @num_pages: number of inflated/deflated pages. 738 * @page_size: size of the page. 739 * @op: the type of operation (lock or unlock). 740 * 741 * Notify the host about page(s) that were ballooned (or removed from the 742 * balloon) so that host can use it without fear that guest will need it (or 743 * stop using them since the VM does). Host may reject some pages, we need to 744 * check the return value and maybe submit a different page. The pages that are 745 * inflated/deflated are pointed by @b->page. 746 * 747 * Return: result as provided by the hypervisor. 748 */ 749 static unsigned long vmballoon_lock_op(struct vmballoon *b, 750 unsigned int num_pages, 751 enum vmballoon_page_size_type page_size, 752 enum vmballoon_op op) 753 { 754 unsigned long cmd, pfn; 755 756 lockdep_assert_held(&b->comm_lock); 757 758 if (static_branch_likely(&vmw_balloon_batching)) { 759 if (op == VMW_BALLOON_INFLATE) 760 cmd = page_size == VMW_BALLOON_2M_PAGE ? 761 VMW_BALLOON_CMD_BATCHED_2M_LOCK : 762 VMW_BALLOON_CMD_BATCHED_LOCK; 763 else 764 cmd = page_size == VMW_BALLOON_2M_PAGE ? 765 VMW_BALLOON_CMD_BATCHED_2M_UNLOCK : 766 VMW_BALLOON_CMD_BATCHED_UNLOCK; 767 768 pfn = PHYS_PFN(virt_to_phys(b->batch_page)); 769 } else { 770 cmd = op == VMW_BALLOON_INFLATE ? VMW_BALLOON_CMD_LOCK : 771 VMW_BALLOON_CMD_UNLOCK; 772 pfn = page_to_pfn(b->page); 773 774 /* In non-batching mode, PFNs must fit in 32-bit */ 775 if (unlikely(pfn != (u32)pfn)) 776 return VMW_BALLOON_ERROR_PPN_INVALID; 777 } 778 779 return vmballoon_cmd(b, cmd, pfn, num_pages); 780 } 781 782 /** 783 * vmballoon_add_page - adds a page towards lock/unlock operation. 784 * 785 * @b: pointer to the balloon. 786 * @idx: index of the page to be ballooned in this batch. 787 * @p: pointer to the page that is about to be ballooned. 788 * 789 * Adds the page to be ballooned. Must be called while holding @comm_lock. 790 */ 791 static void vmballoon_add_page(struct vmballoon *b, unsigned int idx, 792 struct page *p) 793 { 794 lockdep_assert_held(&b->comm_lock); 795 796 if (static_branch_likely(&vmw_balloon_batching)) 797 b->batch_page[idx] = (struct vmballoon_batch_entry) 798 { .pfn = page_to_pfn(p) }; 799 else 800 b->page = p; 801 } 802 803 /** 804 * vmballoon_lock - lock or unlock a batch of pages. 805 * 806 * @b: pointer to the balloon. 807 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation. 808 * 809 * Notifies the host of about ballooned pages (after inflation or deflation, 810 * according to @ctl). If the host rejects the page put it on the 811 * @ctl refuse list. These refused page are then released when moving to the 812 * next size of pages. 813 * 814 * Note that we neither free any @page here nor put them back on the ballooned 815 * pages list. Instead we queue it for later processing. We do that for several 816 * reasons. First, we do not want to free the page under the lock. Second, it 817 * allows us to unify the handling of lock and unlock. In the inflate case, the 818 * caller will check if there are too many refused pages and release them. 819 * Although it is not identical to the past behavior, it should not affect 820 * performance. 821 */ 822 static int vmballoon_lock(struct vmballoon *b, struct vmballoon_ctl *ctl) 823 { 824 unsigned long batch_status; 825 struct page *page; 826 unsigned int i, num_pages; 827 828 num_pages = ctl->n_pages; 829 if (num_pages == 0) 830 return 0; 831 832 /* communication with the host is done under the communication lock */ 833 spin_lock(&b->comm_lock); 834 835 i = 0; 836 list_for_each_entry(page, &ctl->pages, lru) 837 vmballoon_add_page(b, i++, page); 838 839 batch_status = vmballoon_lock_op(b, ctl->n_pages, ctl->page_size, 840 ctl->op); 841 842 /* 843 * Iterate over the pages in the provided list. Since we are changing 844 * @ctl->n_pages we are saving the original value in @num_pages and 845 * use this value to bound the loop. 846 */ 847 for (i = 0; i < num_pages; i++) { 848 unsigned long status; 849 850 status = vmballoon_status_page(b, i, &page); 851 852 /* 853 * Failure of the whole batch overrides a single operation 854 * results. 855 */ 856 if (batch_status != VMW_BALLOON_SUCCESS) 857 status = batch_status; 858 859 /* Continue if no error happened */ 860 if (!vmballoon_handle_one_result(b, page, ctl->page_size, 861 status)) 862 continue; 863 864 /* 865 * Error happened. Move the pages to the refused list and update 866 * the pages number. 867 */ 868 list_move(&page->lru, &ctl->refused_pages); 869 ctl->n_pages--; 870 ctl->n_refused_pages++; 871 } 872 873 spin_unlock(&b->comm_lock); 874 875 return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO; 876 } 877 878 /** 879 * vmballoon_release_page_list() - Releases a page list 880 * 881 * @page_list: list of pages to release. 882 * @n_pages: pointer to the number of pages. 883 * @page_size: whether the pages in the list are 2MB (or else 4KB). 884 * 885 * Releases the list of pages and zeros the number of pages. 886 */ 887 static void vmballoon_release_page_list(struct list_head *page_list, 888 int *n_pages, 889 enum vmballoon_page_size_type page_size) 890 { 891 struct page *page, *tmp; 892 893 list_for_each_entry_safe(page, tmp, page_list, lru) { 894 list_del(&page->lru); 895 vmballoon_mark_page_online(page, page_size); 896 __free_pages(page, vmballoon_page_order(page_size)); 897 } 898 899 *n_pages = 0; 900 } 901 902 903 /* 904 * Release pages that were allocated while attempting to inflate the 905 * balloon but were refused by the host for one reason or another. 906 */ 907 static void vmballoon_release_refused_pages(struct vmballoon *b, 908 struct vmballoon_ctl *ctl) 909 { 910 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE, 911 ctl->page_size); 912 913 vmballoon_release_page_list(&ctl->refused_pages, &ctl->n_refused_pages, 914 ctl->page_size); 915 } 916 917 /** 918 * vmballoon_change - retrieve the required balloon change 919 * 920 * @b: pointer for the balloon. 921 * 922 * Return: the required change for the balloon size. A positive number 923 * indicates inflation, a negative number indicates a deflation. 924 */ 925 static int64_t vmballoon_change(struct vmballoon *b) 926 { 927 int64_t size, target; 928 929 size = atomic64_read(&b->size); 930 target = READ_ONCE(b->target); 931 932 /* 933 * We must cast first because of int sizes 934 * Otherwise we might get huge positives instead of negatives 935 */ 936 937 if (b->reset_required) 938 return 0; 939 940 /* consider a 2MB slack on deflate, unless the balloon is emptied */ 941 if (target < size && target != 0 && 942 size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE)) 943 return 0; 944 945 return target - size; 946 } 947 948 /** 949 * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation. 950 * 951 * @b: pointer to balloon. 952 * @pages: list of pages to enqueue. 953 * @n_pages: pointer to number of pages in list. The value is zeroed. 954 * @page_size: whether the pages are 2MB or 4KB pages. 955 * 956 * Enqueues the provides list of pages in the ballooned page list, clears the 957 * list and zeroes the number of pages that was provided. 958 */ 959 static void vmballoon_enqueue_page_list(struct vmballoon *b, 960 struct list_head *pages, 961 unsigned int *n_pages, 962 enum vmballoon_page_size_type page_size) 963 { 964 struct vmballoon_page_size *page_size_info = &b->page_sizes[page_size]; 965 966 list_splice_init(pages, &page_size_info->pages); 967 *n_pages = 0; 968 } 969 970 /** 971 * vmballoon_dequeue_page_list() - Dequeues page lists for deflation. 972 * 973 * @b: pointer to balloon. 974 * @pages: list of pages to enqueue. 975 * @n_pages: pointer to number of pages in list. The value is zeroed. 976 * @page_size: whether the pages are 2MB or 4KB pages. 977 * @n_req_pages: the number of requested pages. 978 * 979 * Dequeues the number of requested pages from the balloon for deflation. The 980 * number of dequeued pages may be lower, if not enough pages in the requested 981 * size are available. 982 */ 983 static void vmballoon_dequeue_page_list(struct vmballoon *b, 984 struct list_head *pages, 985 unsigned int *n_pages, 986 enum vmballoon_page_size_type page_size, 987 unsigned int n_req_pages) 988 { 989 struct vmballoon_page_size *page_size_info = &b->page_sizes[page_size]; 990 struct page *page, *tmp; 991 unsigned int i = 0; 992 993 list_for_each_entry_safe(page, tmp, &page_size_info->pages, lru) { 994 list_move(&page->lru, pages); 995 if (++i == n_req_pages) 996 break; 997 } 998 *n_pages = i; 999 } 1000 1001 /** 1002 * vmballoon_inflate() - Inflate the balloon towards its target size. 1003 * 1004 * @b: pointer to the balloon. 1005 */ 1006 static void vmballoon_inflate(struct vmballoon *b) 1007 { 1008 int64_t to_inflate_frames; 1009 struct vmballoon_ctl ctl = { 1010 .pages = LIST_HEAD_INIT(ctl.pages), 1011 .refused_pages = LIST_HEAD_INIT(ctl.refused_pages), 1012 .page_size = b->max_page_size, 1013 .op = VMW_BALLOON_INFLATE 1014 }; 1015 1016 while ((to_inflate_frames = vmballoon_change(b)) > 0) { 1017 unsigned int to_inflate_pages, page_in_frames; 1018 int alloc_error, lock_error = 0; 1019 1020 VM_BUG_ON(!list_empty(&ctl.pages)); 1021 VM_BUG_ON(ctl.n_pages != 0); 1022 1023 page_in_frames = vmballoon_page_in_frames(ctl.page_size); 1024 1025 to_inflate_pages = min_t(unsigned long, b->batch_max_pages, 1026 DIV_ROUND_UP_ULL(to_inflate_frames, 1027 page_in_frames)); 1028 1029 /* Start by allocating */ 1030 alloc_error = vmballoon_alloc_page_list(b, &ctl, 1031 to_inflate_pages); 1032 1033 /* Actually lock the pages by telling the hypervisor */ 1034 lock_error = vmballoon_lock(b, &ctl); 1035 1036 /* 1037 * If an error indicates that something serious went wrong, 1038 * stop the inflation. 1039 */ 1040 if (lock_error) 1041 break; 1042 1043 /* Update the balloon size */ 1044 atomic64_add(ctl.n_pages * page_in_frames, &b->size); 1045 1046 vmballoon_enqueue_page_list(b, &ctl.pages, &ctl.n_pages, 1047 ctl.page_size); 1048 1049 /* 1050 * If allocation failed or the number of refused pages exceeds 1051 * the maximum allowed, move to the next page size. 1052 */ 1053 if (alloc_error || 1054 ctl.n_refused_pages >= VMW_BALLOON_MAX_REFUSED) { 1055 if (ctl.page_size == VMW_BALLOON_4K_PAGE) 1056 break; 1057 1058 /* 1059 * Ignore errors from locking as we now switch to 4k 1060 * pages and we might get different errors. 1061 */ 1062 vmballoon_release_refused_pages(b, &ctl); 1063 ctl.page_size--; 1064 } 1065 1066 cond_resched(); 1067 } 1068 1069 /* 1070 * Release pages that were allocated while attempting to inflate the 1071 * balloon but were refused by the host for one reason or another, 1072 * and update the statistics. 1073 */ 1074 if (ctl.n_refused_pages != 0) 1075 vmballoon_release_refused_pages(b, &ctl); 1076 } 1077 1078 /** 1079 * vmballoon_deflate() - Decrease the size of the balloon. 1080 * 1081 * @b: pointer to the balloon 1082 * @n_frames: the number of frames to deflate. If zero, automatically 1083 * calculated according to the target size. 1084 * @coordinated: whether to coordinate with the host 1085 * 1086 * Decrease the size of the balloon allowing guest to use more memory. 1087 * 1088 * Return: The number of deflated frames (i.e., basic page size units) 1089 */ 1090 static unsigned long vmballoon_deflate(struct vmballoon *b, uint64_t n_frames, 1091 bool coordinated) 1092 { 1093 unsigned long deflated_frames = 0; 1094 unsigned long tried_frames = 0; 1095 struct vmballoon_ctl ctl = { 1096 .pages = LIST_HEAD_INIT(ctl.pages), 1097 .refused_pages = LIST_HEAD_INIT(ctl.refused_pages), 1098 .page_size = VMW_BALLOON_4K_PAGE, 1099 .op = VMW_BALLOON_DEFLATE 1100 }; 1101 1102 /* free pages to reach target */ 1103 while (true) { 1104 unsigned int to_deflate_pages, n_unlocked_frames; 1105 unsigned int page_in_frames; 1106 int64_t to_deflate_frames; 1107 bool deflated_all; 1108 1109 page_in_frames = vmballoon_page_in_frames(ctl.page_size); 1110 1111 VM_BUG_ON(!list_empty(&ctl.pages)); 1112 VM_BUG_ON(ctl.n_pages); 1113 VM_BUG_ON(!list_empty(&ctl.refused_pages)); 1114 VM_BUG_ON(ctl.n_refused_pages); 1115 1116 /* 1117 * If we were requested a specific number of frames, we try to 1118 * deflate this number of frames. Otherwise, deflation is 1119 * performed according to the target and balloon size. 1120 */ 1121 to_deflate_frames = n_frames ? n_frames - tried_frames : 1122 -vmballoon_change(b); 1123 1124 /* break if no work to do */ 1125 if (to_deflate_frames <= 0) 1126 break; 1127 1128 /* 1129 * Calculate the number of frames based on current page size, 1130 * but limit the deflated frames to a single chunk 1131 */ 1132 to_deflate_pages = min_t(unsigned long, b->batch_max_pages, 1133 DIV_ROUND_UP_ULL(to_deflate_frames, 1134 page_in_frames)); 1135 1136 /* First take the pages from the balloon pages. */ 1137 vmballoon_dequeue_page_list(b, &ctl.pages, &ctl.n_pages, 1138 ctl.page_size, to_deflate_pages); 1139 1140 /* 1141 * Before pages are moving to the refused list, count their 1142 * frames as frames that we tried to deflate. 1143 */ 1144 tried_frames += ctl.n_pages * page_in_frames; 1145 1146 /* 1147 * Unlock the pages by communicating with the hypervisor if the 1148 * communication is coordinated (i.e., not pop). We ignore the 1149 * return code. Instead we check if all the pages we manage to 1150 * unlock all the pages. If we failed, we will move to the next 1151 * page size, and would eventually try again later. 1152 */ 1153 if (coordinated) 1154 vmballoon_lock(b, &ctl); 1155 1156 /* 1157 * Check if we deflated enough. We will move to the next page 1158 * size if we did not manage to do so. This calculation takes 1159 * place now, as once the pages are released, the number of 1160 * pages is zeroed. 1161 */ 1162 deflated_all = (ctl.n_pages == to_deflate_pages); 1163 1164 /* Update local and global counters */ 1165 n_unlocked_frames = ctl.n_pages * page_in_frames; 1166 atomic64_sub(n_unlocked_frames, &b->size); 1167 deflated_frames += n_unlocked_frames; 1168 1169 vmballoon_stats_page_add(b, VMW_BALLOON_PAGE_STAT_FREE, 1170 ctl.page_size, ctl.n_pages); 1171 1172 /* free the ballooned pages */ 1173 vmballoon_release_page_list(&ctl.pages, &ctl.n_pages, 1174 ctl.page_size); 1175 1176 /* Return the refused pages to the ballooned list. */ 1177 vmballoon_enqueue_page_list(b, &ctl.refused_pages, 1178 &ctl.n_refused_pages, 1179 ctl.page_size); 1180 1181 /* If we failed to unlock all the pages, move to next size. */ 1182 if (!deflated_all) { 1183 if (ctl.page_size == b->max_page_size) 1184 break; 1185 ctl.page_size++; 1186 } 1187 1188 cond_resched(); 1189 } 1190 1191 return deflated_frames; 1192 } 1193 1194 /** 1195 * vmballoon_deinit_batching - disables batching mode. 1196 * 1197 * @b: pointer to &struct vmballoon. 1198 * 1199 * Disables batching, by deallocating the page for communication with the 1200 * hypervisor and disabling the static key to indicate that batching is off. 1201 */ 1202 static void vmballoon_deinit_batching(struct vmballoon *b) 1203 { 1204 free_page((unsigned long)b->batch_page); 1205 b->batch_page = NULL; 1206 static_branch_disable(&vmw_balloon_batching); 1207 b->batch_max_pages = 1; 1208 } 1209 1210 /** 1211 * vmballoon_init_batching - enable batching mode. 1212 * 1213 * @b: pointer to &struct vmballoon. 1214 * 1215 * Enables batching, by allocating a page for communication with the hypervisor 1216 * and enabling the static_key to use batching. 1217 * 1218 * Return: zero on success or an appropriate error-code. 1219 */ 1220 static int vmballoon_init_batching(struct vmballoon *b) 1221 { 1222 struct page *page; 1223 1224 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 1225 if (!page) 1226 return -ENOMEM; 1227 1228 b->batch_page = page_address(page); 1229 b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry); 1230 1231 static_branch_enable(&vmw_balloon_batching); 1232 1233 return 0; 1234 } 1235 1236 /* 1237 * Receive notification and resize balloon 1238 */ 1239 static void vmballoon_doorbell(void *client_data) 1240 { 1241 struct vmballoon *b = client_data; 1242 1243 vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_DOORBELL); 1244 1245 mod_delayed_work(system_freezable_wq, &b->dwork, 0); 1246 } 1247 1248 /* 1249 * Clean up vmci doorbell 1250 */ 1251 static void vmballoon_vmci_cleanup(struct vmballoon *b) 1252 { 1253 vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 1254 VMCI_INVALID_ID, VMCI_INVALID_ID); 1255 1256 if (!vmci_handle_is_invalid(b->vmci_doorbell)) { 1257 vmci_doorbell_destroy(b->vmci_doorbell); 1258 b->vmci_doorbell = VMCI_INVALID_HANDLE; 1259 } 1260 } 1261 1262 /** 1263 * vmballoon_vmci_init - Initialize vmci doorbell. 1264 * 1265 * @b: pointer to the balloon. 1266 * 1267 * Return: zero on success or when wakeup command not supported. Error-code 1268 * otherwise. 1269 * 1270 * Initialize vmci doorbell, to get notified as soon as balloon changes. 1271 */ 1272 static int vmballoon_vmci_init(struct vmballoon *b) 1273 { 1274 unsigned long error; 1275 1276 if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0) 1277 return 0; 1278 1279 error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB, 1280 VMCI_PRIVILEGE_FLAG_RESTRICTED, 1281 vmballoon_doorbell, b); 1282 1283 if (error != VMCI_SUCCESS) 1284 goto fail; 1285 1286 error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 1287 b->vmci_doorbell.context, 1288 b->vmci_doorbell.resource, NULL); 1289 1290 if (error != VMW_BALLOON_SUCCESS) 1291 goto fail; 1292 1293 return 0; 1294 fail: 1295 vmballoon_vmci_cleanup(b); 1296 return -EIO; 1297 } 1298 1299 /** 1300 * vmballoon_pop - Quickly release all pages allocate for the balloon. 1301 * 1302 * @b: pointer to the balloon. 1303 * 1304 * This function is called when host decides to "reset" balloon for one reason 1305 * or another. Unlike normal "deflate" we do not (shall not) notify host of the 1306 * pages being released. 1307 */ 1308 static void vmballoon_pop(struct vmballoon *b) 1309 { 1310 unsigned long size; 1311 1312 while ((size = atomic64_read(&b->size))) 1313 vmballoon_deflate(b, size, false); 1314 } 1315 1316 /* 1317 * Perform standard reset sequence by popping the balloon (in case it 1318 * is not empty) and then restarting protocol. This operation normally 1319 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. 1320 */ 1321 static void vmballoon_reset(struct vmballoon *b) 1322 { 1323 int error; 1324 1325 down_write(&b->conf_sem); 1326 1327 vmballoon_vmci_cleanup(b); 1328 1329 /* free all pages, skipping monitor unlock */ 1330 vmballoon_pop(b); 1331 1332 if (vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) 1333 goto unlock; 1334 1335 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { 1336 if (vmballoon_init_batching(b)) { 1337 /* 1338 * We failed to initialize batching, inform the monitor 1339 * about it by sending a null capability. 1340 * 1341 * The guest will retry in one second. 1342 */ 1343 vmballoon_send_start(b, 0); 1344 goto unlock; 1345 } 1346 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) { 1347 vmballoon_deinit_batching(b); 1348 } 1349 1350 vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_RESET); 1351 b->reset_required = false; 1352 1353 error = vmballoon_vmci_init(b); 1354 if (error) 1355 pr_err("failed to initialize vmci doorbell\n"); 1356 1357 if (vmballoon_send_guest_id(b)) 1358 pr_err("failed to send guest ID to the host\n"); 1359 1360 unlock: 1361 up_write(&b->conf_sem); 1362 } 1363 1364 /** 1365 * vmballoon_work - periodic balloon worker for reset, inflation and deflation. 1366 * 1367 * @work: pointer to the &work_struct which is provided by the workqueue. 1368 * 1369 * Resets the protocol if needed, gets the new size and adjusts balloon as 1370 * needed. Repeat in 1 sec. 1371 */ 1372 static void vmballoon_work(struct work_struct *work) 1373 { 1374 struct delayed_work *dwork = to_delayed_work(work); 1375 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork); 1376 int64_t change = 0; 1377 1378 if (b->reset_required) 1379 vmballoon_reset(b); 1380 1381 down_read(&b->conf_sem); 1382 1383 /* 1384 * Update the stats while holding the semaphore to ensure that 1385 * @stats_enabled is consistent with whether the stats are actually 1386 * enabled 1387 */ 1388 vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER); 1389 1390 if (!vmballoon_send_get_target(b)) 1391 change = vmballoon_change(b); 1392 1393 if (change != 0) { 1394 pr_debug("%s - size: %llu, target %lu\n", __func__, 1395 atomic64_read(&b->size), READ_ONCE(b->target)); 1396 1397 if (change > 0) 1398 vmballoon_inflate(b); 1399 else /* (change < 0) */ 1400 vmballoon_deflate(b, 0, true); 1401 } 1402 1403 up_read(&b->conf_sem); 1404 1405 /* 1406 * We are using a freezable workqueue so that balloon operations are 1407 * stopped while the system transitions to/from sleep/hibernation. 1408 */ 1409 queue_delayed_work(system_freezable_wq, 1410 dwork, round_jiffies_relative(HZ)); 1411 1412 } 1413 1414 /* 1415 * DEBUGFS Interface 1416 */ 1417 #ifdef CONFIG_DEBUG_FS 1418 1419 static const char * const vmballoon_stat_page_names[] = { 1420 [VMW_BALLOON_PAGE_STAT_ALLOC] = "alloc", 1421 [VMW_BALLOON_PAGE_STAT_ALLOC_FAIL] = "allocFail", 1422 [VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC] = "errAlloc", 1423 [VMW_BALLOON_PAGE_STAT_REFUSED_FREE] = "errFree", 1424 [VMW_BALLOON_PAGE_STAT_FREE] = "free" 1425 }; 1426 1427 static const char * const vmballoon_stat_names[] = { 1428 [VMW_BALLOON_STAT_TIMER] = "timer", 1429 [VMW_BALLOON_STAT_DOORBELL] = "doorbell", 1430 [VMW_BALLOON_STAT_RESET] = "reset", 1431 }; 1432 1433 static int vmballoon_enable_stats(struct vmballoon *b) 1434 { 1435 int r = 0; 1436 1437 down_write(&b->conf_sem); 1438 1439 /* did we somehow race with another reader which enabled stats? */ 1440 if (b->stats) 1441 goto out; 1442 1443 b->stats = kzalloc(sizeof(*b->stats), GFP_KERNEL); 1444 1445 if (!b->stats) { 1446 /* allocation failed */ 1447 r = -ENOMEM; 1448 goto out; 1449 } 1450 static_key_enable(&balloon_stat_enabled.key); 1451 out: 1452 up_write(&b->conf_sem); 1453 return r; 1454 } 1455 1456 /** 1457 * vmballoon_debug_show - shows statistics of balloon operations. 1458 * @f: pointer to the &struct seq_file. 1459 * @offset: ignored. 1460 * 1461 * Provides the statistics that can be accessed in vmmemctl in the debugfs. 1462 * To avoid the overhead - mainly that of memory - of collecting the statistics, 1463 * we only collect statistics after the first time the counters are read. 1464 * 1465 * Return: zero on success or an error code. 1466 */ 1467 static int vmballoon_debug_show(struct seq_file *f, void *offset) 1468 { 1469 struct vmballoon *b = f->private; 1470 int i, j; 1471 1472 /* enables stats if they are disabled */ 1473 if (!b->stats) { 1474 int r = vmballoon_enable_stats(b); 1475 1476 if (r) 1477 return r; 1478 } 1479 1480 /* format capabilities info */ 1481 seq_printf(f, "%-22s: %#16x\n", "balloon capabilities", 1482 VMW_BALLOON_CAPABILITIES); 1483 seq_printf(f, "%-22s: %#16lx\n", "used capabilities", b->capabilities); 1484 seq_printf(f, "%-22s: %16s\n", "is resetting", 1485 b->reset_required ? "y" : "n"); 1486 1487 /* format size info */ 1488 seq_printf(f, "%-22s: %16lu\n", "target", READ_ONCE(b->target)); 1489 seq_printf(f, "%-22s: %16llu\n", "current", atomic64_read(&b->size)); 1490 1491 for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) { 1492 if (vmballoon_cmd_names[i] == NULL) 1493 continue; 1494 1495 seq_printf(f, "%-22s: %16llu (%llu failed)\n", 1496 vmballoon_cmd_names[i], 1497 atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_STAT]), 1498 atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_FAIL_STAT])); 1499 } 1500 1501 for (i = 0; i < VMW_BALLOON_STAT_NUM; i++) 1502 seq_printf(f, "%-22s: %16llu\n", 1503 vmballoon_stat_names[i], 1504 atomic64_read(&b->stats->general_stat[i])); 1505 1506 for (i = 0; i < VMW_BALLOON_PAGE_STAT_NUM; i++) { 1507 for (j = 0; j < VMW_BALLOON_NUM_PAGE_SIZES; j++) 1508 seq_printf(f, "%-18s(%s): %16llu\n", 1509 vmballoon_stat_page_names[i], 1510 vmballoon_page_size_names[j], 1511 atomic64_read(&b->stats->page_stat[i][j])); 1512 } 1513 1514 return 0; 1515 } 1516 1517 DEFINE_SHOW_ATTRIBUTE(vmballoon_debug); 1518 1519 static int __init vmballoon_debugfs_init(struct vmballoon *b) 1520 { 1521 int error; 1522 1523 b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b, 1524 &vmballoon_debug_fops); 1525 if (IS_ERR(b->dbg_entry)) { 1526 error = PTR_ERR(b->dbg_entry); 1527 pr_err("failed to create debugfs entry, error: %d\n", error); 1528 return error; 1529 } 1530 1531 return 0; 1532 } 1533 1534 static void __exit vmballoon_debugfs_exit(struct vmballoon *b) 1535 { 1536 static_key_disable(&balloon_stat_enabled.key); 1537 debugfs_remove(b->dbg_entry); 1538 kfree(b->stats); 1539 b->stats = NULL; 1540 } 1541 1542 #else 1543 1544 static inline int vmballoon_debugfs_init(struct vmballoon *b) 1545 { 1546 return 0; 1547 } 1548 1549 static inline void vmballoon_debugfs_exit(struct vmballoon *b) 1550 { 1551 } 1552 1553 #endif /* CONFIG_DEBUG_FS */ 1554 1555 static int __init vmballoon_init(void) 1556 { 1557 enum vmballoon_page_size_type page_size; 1558 int error; 1559 1560 /* 1561 * Check if we are running on VMware's hypervisor and bail out 1562 * if we are not. 1563 */ 1564 if (x86_hyper_type != X86_HYPER_VMWARE) 1565 return -ENODEV; 1566 1567 for (page_size = VMW_BALLOON_4K_PAGE; 1568 page_size <= VMW_BALLOON_LAST_SIZE; page_size++) 1569 INIT_LIST_HEAD(&balloon.page_sizes[page_size].pages); 1570 1571 1572 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); 1573 1574 error = vmballoon_debugfs_init(&balloon); 1575 if (error) 1576 return error; 1577 1578 spin_lock_init(&balloon.comm_lock); 1579 init_rwsem(&balloon.conf_sem); 1580 balloon.vmci_doorbell = VMCI_INVALID_HANDLE; 1581 balloon.batch_page = NULL; 1582 balloon.page = NULL; 1583 balloon.reset_required = true; 1584 1585 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); 1586 1587 return 0; 1588 } 1589 1590 /* 1591 * Using late_initcall() instead of module_init() allows the balloon to use the 1592 * VMCI doorbell even when the balloon is built into the kernel. Otherwise the 1593 * VMCI is probed only after the balloon is initialized. If the balloon is used 1594 * as a module, late_initcall() is equivalent to module_init(). 1595 */ 1596 late_initcall(vmballoon_init); 1597 1598 static void __exit vmballoon_exit(void) 1599 { 1600 vmballoon_vmci_cleanup(&balloon); 1601 cancel_delayed_work_sync(&balloon.dwork); 1602 1603 vmballoon_debugfs_exit(&balloon); 1604 1605 /* 1606 * Deallocate all reserved memory, and reset connection with monitor. 1607 * Reset connection before deallocating memory to avoid potential for 1608 * additional spurious resets from guest touching deallocated pages. 1609 */ 1610 vmballoon_send_start(&balloon, 0); 1611 vmballoon_pop(&balloon); 1612 } 1613 module_exit(vmballoon_exit); 1614