1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * VMware Balloon driver. 4 * 5 * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved. 6 * 7 * This is VMware physical memory management driver for Linux. The driver 8 * acts like a "balloon" that can be inflated to reclaim physical pages by 9 * reserving them in the guest and invalidating them in the monitor, 10 * freeing up the underlying machine pages so they can be allocated to 11 * other guests. The balloon can also be deflated to allow the guest to 12 * use more physical memory. Higher level policies can control the sizes 13 * of balloons in VMs in order to manage physical memory resources. 14 */ 15 16 //#define DEBUG 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/mm.h> 22 #include <linux/vmalloc.h> 23 #include <linux/sched.h> 24 #include <linux/module.h> 25 #include <linux/workqueue.h> 26 #include <linux/debugfs.h> 27 #include <linux/seq_file.h> 28 #include <linux/rwsem.h> 29 #include <linux/slab.h> 30 #include <linux/spinlock.h> 31 #include <linux/vmw_vmci_defs.h> 32 #include <linux/vmw_vmci_api.h> 33 #include <asm/hypervisor.h> 34 35 MODULE_AUTHOR("VMware, Inc."); 36 MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); 37 MODULE_VERSION("1.5.0.0-k"); 38 MODULE_ALIAS("dmi:*:svnVMware*:*"); 39 MODULE_ALIAS("vmware_vmmemctl"); 40 MODULE_LICENSE("GPL"); 41 42 /* 43 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't allow wait 44 * (__GFP_RECLAIM) for huge page allocations. Use __GFP_NOWARN, to suppress page 45 * allocation failure warnings. Disallow access to emergency low-memory pools. 46 */ 47 #define VMW_HUGE_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \ 48 __GFP_NOMEMALLOC) 49 50 /* 51 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We allow lightweight 52 * reclamation (__GFP_NORETRY). Use __GFP_NOWARN, to suppress page allocation 53 * failure warnings. Disallow access to emergency low-memory pools. 54 */ 55 #define VMW_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \ 56 __GFP_NOMEMALLOC|__GFP_NORETRY) 57 58 /* Maximum number of refused pages we accumulate during inflation cycle */ 59 #define VMW_BALLOON_MAX_REFUSED 16 60 61 /* 62 * Hypervisor communication port definitions. 63 */ 64 #define VMW_BALLOON_HV_PORT 0x5670 65 #define VMW_BALLOON_HV_MAGIC 0x456c6d6f 66 #define VMW_BALLOON_GUEST_ID 1 /* Linux */ 67 68 enum vmwballoon_capabilities { 69 /* 70 * Bit 0 is reserved and not associated to any capability. 71 */ 72 VMW_BALLOON_BASIC_CMDS = (1 << 1), 73 VMW_BALLOON_BATCHED_CMDS = (1 << 2), 74 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3), 75 VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4), 76 }; 77 78 #define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \ 79 | VMW_BALLOON_BATCHED_CMDS \ 80 | VMW_BALLOON_BATCHED_2M_CMDS \ 81 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD) 82 83 #define VMW_BALLOON_2M_ORDER (PMD_SHIFT - PAGE_SHIFT) 84 85 enum vmballoon_page_size_type { 86 VMW_BALLOON_4K_PAGE, 87 VMW_BALLOON_2M_PAGE, 88 VMW_BALLOON_LAST_SIZE = VMW_BALLOON_2M_PAGE 89 }; 90 91 #define VMW_BALLOON_NUM_PAGE_SIZES (VMW_BALLOON_LAST_SIZE + 1) 92 93 static const char * const vmballoon_page_size_names[] = { 94 [VMW_BALLOON_4K_PAGE] = "4k", 95 [VMW_BALLOON_2M_PAGE] = "2M" 96 }; 97 98 enum vmballoon_op { 99 VMW_BALLOON_INFLATE, 100 VMW_BALLOON_DEFLATE 101 }; 102 103 enum vmballoon_op_stat_type { 104 VMW_BALLOON_OP_STAT, 105 VMW_BALLOON_OP_FAIL_STAT 106 }; 107 108 #define VMW_BALLOON_OP_STAT_TYPES (VMW_BALLOON_OP_FAIL_STAT + 1) 109 110 /** 111 * enum vmballoon_cmd_type - backdoor commands. 112 * 113 * Availability of the commands is as followed: 114 * 115 * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and 116 * %VMW_BALLOON_CMD_GUEST_ID are always available. 117 * 118 * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then 119 * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available. 120 * 121 * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then 122 * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands 123 * are available. 124 * 125 * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then 126 * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 127 * are supported. 128 * 129 * If the host reports VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then 130 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported. 131 * 132 * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor. 133 * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size. 134 * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page. 135 * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about 136 * to be deflated from the balloon. 137 * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that 138 * runs in the VM. 139 * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of 140 * ballooned pages (up to 512). 141 * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of 142 * pages that are about to be deflated from the 143 * balloon (up to 512). 144 * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK 145 * for 2MB pages. 146 * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to 147 * @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB 148 * pages. 149 * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification 150 * that would be invoked when the balloon 151 * size changes. 152 * @VMW_BALLOON_CMD_LAST: Value of the last command. 153 */ 154 enum vmballoon_cmd_type { 155 VMW_BALLOON_CMD_START, 156 VMW_BALLOON_CMD_GET_TARGET, 157 VMW_BALLOON_CMD_LOCK, 158 VMW_BALLOON_CMD_UNLOCK, 159 VMW_BALLOON_CMD_GUEST_ID, 160 /* No command 5 */ 161 VMW_BALLOON_CMD_BATCHED_LOCK = 6, 162 VMW_BALLOON_CMD_BATCHED_UNLOCK, 163 VMW_BALLOON_CMD_BATCHED_2M_LOCK, 164 VMW_BALLOON_CMD_BATCHED_2M_UNLOCK, 165 VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 166 VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 167 }; 168 169 #define VMW_BALLOON_CMD_NUM (VMW_BALLOON_CMD_LAST + 1) 170 171 enum vmballoon_error_codes { 172 VMW_BALLOON_SUCCESS, 173 VMW_BALLOON_ERROR_CMD_INVALID, 174 VMW_BALLOON_ERROR_PPN_INVALID, 175 VMW_BALLOON_ERROR_PPN_LOCKED, 176 VMW_BALLOON_ERROR_PPN_UNLOCKED, 177 VMW_BALLOON_ERROR_PPN_PINNED, 178 VMW_BALLOON_ERROR_PPN_NOTNEEDED, 179 VMW_BALLOON_ERROR_RESET, 180 VMW_BALLOON_ERROR_BUSY 181 }; 182 183 #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000) 184 185 #define VMW_BALLOON_CMD_WITH_TARGET_MASK \ 186 ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \ 187 (1UL << VMW_BALLOON_CMD_LOCK) | \ 188 (1UL << VMW_BALLOON_CMD_UNLOCK) | \ 189 (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \ 190 (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \ 191 (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \ 192 (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK)) 193 194 static const char * const vmballoon_cmd_names[] = { 195 [VMW_BALLOON_CMD_START] = "start", 196 [VMW_BALLOON_CMD_GET_TARGET] = "target", 197 [VMW_BALLOON_CMD_LOCK] = "lock", 198 [VMW_BALLOON_CMD_UNLOCK] = "unlock", 199 [VMW_BALLOON_CMD_GUEST_ID] = "guestType", 200 [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock", 201 [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock", 202 [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock", 203 [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock", 204 [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet" 205 }; 206 207 enum vmballoon_stat_page { 208 VMW_BALLOON_PAGE_STAT_ALLOC, 209 VMW_BALLOON_PAGE_STAT_ALLOC_FAIL, 210 VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC, 211 VMW_BALLOON_PAGE_STAT_REFUSED_FREE, 212 VMW_BALLOON_PAGE_STAT_FREE, 213 VMW_BALLOON_PAGE_STAT_LAST = VMW_BALLOON_PAGE_STAT_FREE 214 }; 215 216 #define VMW_BALLOON_PAGE_STAT_NUM (VMW_BALLOON_PAGE_STAT_LAST + 1) 217 218 enum vmballoon_stat_general { 219 VMW_BALLOON_STAT_TIMER, 220 VMW_BALLOON_STAT_DOORBELL, 221 VMW_BALLOON_STAT_RESET, 222 VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_RESET 223 }; 224 225 #define VMW_BALLOON_STAT_NUM (VMW_BALLOON_STAT_LAST + 1) 226 227 228 static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching); 229 static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled); 230 231 struct vmballoon_ctl { 232 struct list_head pages; 233 struct list_head refused_pages; 234 unsigned int n_refused_pages; 235 unsigned int n_pages; 236 enum vmballoon_page_size_type page_size; 237 enum vmballoon_op op; 238 }; 239 240 struct vmballoon_page_size { 241 /* list of reserved physical pages */ 242 struct list_head pages; 243 }; 244 245 /** 246 * struct vmballoon_batch_entry - a batch entry for lock or unlock. 247 * 248 * @status: the status of the operation, which is written by the hypervisor. 249 * @reserved: reserved for future use. Must be set to zero. 250 * @pfn: the physical frame number of the page to be locked or unlocked. 251 */ 252 struct vmballoon_batch_entry { 253 u64 status : 5; 254 u64 reserved : PAGE_SHIFT - 5; 255 u64 pfn : 52; 256 } __packed; 257 258 struct vmballoon { 259 struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES]; 260 261 /** 262 * @max_page_size: maximum supported page size for ballooning. 263 * 264 * Protected by @conf_sem 265 */ 266 enum vmballoon_page_size_type max_page_size; 267 268 /** 269 * @size: balloon actual size in basic page size (frames). 270 * 271 * While we currently do not support size which is bigger than 32-bit, 272 * in preparation for future support, use 64-bits. 273 */ 274 atomic64_t size; 275 276 /** 277 * @target: balloon target size in basic page size (frames). 278 * 279 * We do not protect the target under the assumption that setting the 280 * value is always done through a single write. If this assumption ever 281 * breaks, we would have to use X_ONCE for accesses, and suffer the less 282 * optimized code. Although we may read stale target value if multiple 283 * accesses happen at once, the performance impact should be minor. 284 */ 285 unsigned long target; 286 287 /** 288 * @reset_required: reset flag 289 * 290 * Setting this flag may introduce races, but the code is expected to 291 * handle them gracefully. In the worst case, another operation will 292 * fail as reset did not take place. Clearing the flag is done while 293 * holding @conf_sem for write. 294 */ 295 bool reset_required; 296 297 /** 298 * @capabilities: hypervisor balloon capabilities. 299 * 300 * Protected by @conf_sem. 301 */ 302 unsigned long capabilities; 303 304 /** 305 * @batch_page: pointer to communication batch page. 306 * 307 * When batching is used, batch_page points to a page, which holds up to 308 * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking. 309 */ 310 struct vmballoon_batch_entry *batch_page; 311 312 /** 313 * @batch_max_pages: maximum pages that can be locked/unlocked. 314 * 315 * Indicates the number of pages that the hypervisor can lock or unlock 316 * at once, according to whether batching is enabled. If batching is 317 * disabled, only a single page can be locked/unlock on each operation. 318 * 319 * Protected by @conf_sem. 320 */ 321 unsigned int batch_max_pages; 322 323 /** 324 * @page: page to be locked/unlocked by the hypervisor 325 * 326 * @page is only used when batching is disabled and a single page is 327 * reclaimed on each iteration. 328 * 329 * Protected by @comm_lock. 330 */ 331 struct page *page; 332 333 /* statistics */ 334 struct vmballoon_stats *stats; 335 336 #ifdef CONFIG_DEBUG_FS 337 /* debugfs file exporting statistics */ 338 struct dentry *dbg_entry; 339 #endif 340 341 struct delayed_work dwork; 342 343 /** 344 * @vmci_doorbell. 345 * 346 * Protected by @conf_sem. 347 */ 348 struct vmci_handle vmci_doorbell; 349 350 /** 351 * @conf_sem: semaphore to protect the configuration and the statistics. 352 */ 353 struct rw_semaphore conf_sem; 354 355 /** 356 * @comm_lock: lock to protect the communication with the host. 357 * 358 * Lock ordering: @conf_sem -> @comm_lock . 359 */ 360 spinlock_t comm_lock; 361 }; 362 363 static struct vmballoon balloon; 364 365 struct vmballoon_stats { 366 /* timer / doorbell operations */ 367 atomic64_t general_stat[VMW_BALLOON_STAT_NUM]; 368 369 /* allocation statistics for huge and small pages */ 370 atomic64_t 371 page_stat[VMW_BALLOON_PAGE_STAT_NUM][VMW_BALLOON_NUM_PAGE_SIZES]; 372 373 /* Monitor operations: total operations, and failures */ 374 atomic64_t ops[VMW_BALLOON_CMD_NUM][VMW_BALLOON_OP_STAT_TYPES]; 375 }; 376 377 static inline bool is_vmballoon_stats_on(void) 378 { 379 return IS_ENABLED(CONFIG_DEBUG_FS) && 380 static_branch_unlikely(&balloon_stat_enabled); 381 } 382 383 static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op, 384 enum vmballoon_op_stat_type type) 385 { 386 if (is_vmballoon_stats_on()) 387 atomic64_inc(&b->stats->ops[op][type]); 388 } 389 390 static inline void vmballoon_stats_gen_inc(struct vmballoon *b, 391 enum vmballoon_stat_general stat) 392 { 393 if (is_vmballoon_stats_on()) 394 atomic64_inc(&b->stats->general_stat[stat]); 395 } 396 397 static inline void vmballoon_stats_gen_add(struct vmballoon *b, 398 enum vmballoon_stat_general stat, 399 unsigned int val) 400 { 401 if (is_vmballoon_stats_on()) 402 atomic64_add(val, &b->stats->general_stat[stat]); 403 } 404 405 static inline void vmballoon_stats_page_inc(struct vmballoon *b, 406 enum vmballoon_stat_page stat, 407 enum vmballoon_page_size_type size) 408 { 409 if (is_vmballoon_stats_on()) 410 atomic64_inc(&b->stats->page_stat[stat][size]); 411 } 412 413 static inline void vmballoon_stats_page_add(struct vmballoon *b, 414 enum vmballoon_stat_page stat, 415 enum vmballoon_page_size_type size, 416 unsigned int val) 417 { 418 if (is_vmballoon_stats_on()) 419 atomic64_add(val, &b->stats->page_stat[stat][size]); 420 } 421 422 static inline unsigned long 423 __vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1, 424 unsigned long arg2, unsigned long *result) 425 { 426 unsigned long status, dummy1, dummy2, dummy3, local_result; 427 428 vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_STAT); 429 430 asm volatile ("inl %%dx" : 431 "=a"(status), 432 "=c"(dummy1), 433 "=d"(dummy2), 434 "=b"(local_result), 435 "=S"(dummy3) : 436 "0"(VMW_BALLOON_HV_MAGIC), 437 "1"(cmd), 438 "2"(VMW_BALLOON_HV_PORT), 439 "3"(arg1), 440 "4"(arg2) : 441 "memory"); 442 443 /* update the result if needed */ 444 if (result) 445 *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 : 446 local_result; 447 448 /* update target when applicable */ 449 if (status == VMW_BALLOON_SUCCESS && 450 ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK)) 451 WRITE_ONCE(b->target, local_result); 452 453 if (status != VMW_BALLOON_SUCCESS && 454 status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) { 455 vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_FAIL_STAT); 456 pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n", 457 __func__, vmballoon_cmd_names[cmd], arg1, arg2, 458 status); 459 } 460 461 /* mark reset required accordingly */ 462 if (status == VMW_BALLOON_ERROR_RESET) 463 b->reset_required = true; 464 465 return status; 466 } 467 468 static __always_inline unsigned long 469 vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1, 470 unsigned long arg2) 471 { 472 unsigned long dummy; 473 474 return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy); 475 } 476 477 /* 478 * Send "start" command to the host, communicating supported version 479 * of the protocol. 480 */ 481 static int vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) 482 { 483 unsigned long status, capabilities; 484 485 status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0, 486 &capabilities); 487 488 switch (status) { 489 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES: 490 b->capabilities = capabilities; 491 break; 492 case VMW_BALLOON_SUCCESS: 493 b->capabilities = VMW_BALLOON_BASIC_CMDS; 494 break; 495 default: 496 return -EIO; 497 } 498 499 /* 500 * 2MB pages are only supported with batching. If batching is for some 501 * reason disabled, do not use 2MB pages, since otherwise the legacy 502 * mechanism is used with 2MB pages, causing a failure. 503 */ 504 b->max_page_size = VMW_BALLOON_4K_PAGE; 505 if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) && 506 (b->capabilities & VMW_BALLOON_BATCHED_CMDS)) 507 b->max_page_size = VMW_BALLOON_2M_PAGE; 508 509 510 return 0; 511 } 512 513 /** 514 * vmballoon_send_guest_id - communicate guest type to the host. 515 * 516 * @b: pointer to the balloon. 517 * 518 * Communicate guest type to the host so that it can adjust ballooning 519 * algorithm to the one most appropriate for the guest. This command 520 * is normally issued after sending "start" command and is part of 521 * standard reset sequence. 522 * 523 * Return: zero on success or appropriate error code. 524 */ 525 static int vmballoon_send_guest_id(struct vmballoon *b) 526 { 527 unsigned long status; 528 529 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID, 530 VMW_BALLOON_GUEST_ID, 0); 531 532 return status == VMW_BALLOON_SUCCESS ? 0 : -EIO; 533 } 534 535 /** 536 * vmballoon_page_order() - return the order of the page 537 * @page_size: the size of the page. 538 * 539 * Return: the allocation order. 540 */ 541 static inline 542 unsigned int vmballoon_page_order(enum vmballoon_page_size_type page_size) 543 { 544 return page_size == VMW_BALLOON_2M_PAGE ? VMW_BALLOON_2M_ORDER : 0; 545 } 546 547 /** 548 * vmballoon_page_in_frames() - returns the number of frames in a page. 549 * @page_size: the size of the page. 550 * 551 * Return: the number of 4k frames. 552 */ 553 static inline unsigned int 554 vmballoon_page_in_frames(enum vmballoon_page_size_type page_size) 555 { 556 return 1 << vmballoon_page_order(page_size); 557 } 558 559 /** 560 * vmballoon_send_get_target() - Retrieve desired balloon size from the host. 561 * 562 * @b: pointer to the balloon. 563 * 564 * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required 565 * by the host-guest protocol and EIO if an error occurred in communicating with 566 * the host. 567 */ 568 static int vmballoon_send_get_target(struct vmballoon *b) 569 { 570 unsigned long status; 571 unsigned long limit; 572 573 limit = totalram_pages(); 574 575 /* Ensure limit fits in 32-bits */ 576 if (limit != (u32)limit) 577 return -EINVAL; 578 579 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0); 580 581 return status == VMW_BALLOON_SUCCESS ? 0 : -EIO; 582 } 583 584 /** 585 * vmballoon_alloc_page_list - allocates a list of pages. 586 * 587 * @b: pointer to the balloon. 588 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation. 589 * @req_n_pages: the number of requested pages. 590 * 591 * Tries to allocate @req_n_pages. Add them to the list of balloon pages in 592 * @ctl.pages and updates @ctl.n_pages to reflect the number of pages. 593 * 594 * Return: zero on success or error code otherwise. 595 */ 596 static int vmballoon_alloc_page_list(struct vmballoon *b, 597 struct vmballoon_ctl *ctl, 598 unsigned int req_n_pages) 599 { 600 struct page *page; 601 unsigned int i; 602 603 for (i = 0; i < req_n_pages; i++) { 604 if (ctl->page_size == VMW_BALLOON_2M_PAGE) 605 page = alloc_pages(VMW_HUGE_PAGE_ALLOC_FLAGS, 606 VMW_BALLOON_2M_ORDER); 607 else 608 page = alloc_page(VMW_PAGE_ALLOC_FLAGS); 609 610 /* Update statistics */ 611 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC, 612 ctl->page_size); 613 614 if (page) { 615 /* Success. Add the page to the list and continue. */ 616 list_add(&page->lru, &ctl->pages); 617 continue; 618 } 619 620 /* Allocation failed. Update statistics and stop. */ 621 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC_FAIL, 622 ctl->page_size); 623 break; 624 } 625 626 ctl->n_pages = i; 627 628 return req_n_pages == ctl->n_pages ? 0 : -ENOMEM; 629 } 630 631 /** 632 * vmballoon_handle_one_result - Handle lock/unlock result for a single page. 633 * 634 * @b: pointer for %struct vmballoon. 635 * @page: pointer for the page whose result should be handled. 636 * @page_size: size of the page. 637 * @status: status of the operation as provided by the hypervisor. 638 */ 639 static int vmballoon_handle_one_result(struct vmballoon *b, struct page *page, 640 enum vmballoon_page_size_type page_size, 641 unsigned long status) 642 { 643 /* On success do nothing. The page is already on the balloon list. */ 644 if (likely(status == VMW_BALLOON_SUCCESS)) 645 return 0; 646 647 pr_debug("%s: failed comm pfn %lx status %lu page_size %s\n", __func__, 648 page_to_pfn(page), status, 649 vmballoon_page_size_names[page_size]); 650 651 /* Error occurred */ 652 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC, 653 page_size); 654 655 return -EIO; 656 } 657 658 /** 659 * vmballoon_status_page - returns the status of (un)lock operation 660 * 661 * @b: pointer to the balloon. 662 * @idx: index for the page for which the operation is performed. 663 * @p: pointer to where the page struct is returned. 664 * 665 * Following a lock or unlock operation, returns the status of the operation for 666 * an individual page. Provides the page that the operation was performed on on 667 * the @page argument. 668 * 669 * Returns: The status of a lock or unlock operation for an individual page. 670 */ 671 static unsigned long vmballoon_status_page(struct vmballoon *b, int idx, 672 struct page **p) 673 { 674 if (static_branch_likely(&vmw_balloon_batching)) { 675 /* batching mode */ 676 *p = pfn_to_page(b->batch_page[idx].pfn); 677 return b->batch_page[idx].status; 678 } 679 680 /* non-batching mode */ 681 *p = b->page; 682 683 /* 684 * If a failure occurs, the indication will be provided in the status 685 * of the entire operation, which is considered before the individual 686 * page status. So for non-batching mode, the indication is always of 687 * success. 688 */ 689 return VMW_BALLOON_SUCCESS; 690 } 691 692 /** 693 * vmballoon_lock_op - notifies the host about inflated/deflated pages. 694 * @b: pointer to the balloon. 695 * @num_pages: number of inflated/deflated pages. 696 * @page_size: size of the page. 697 * @op: the type of operation (lock or unlock). 698 * 699 * Notify the host about page(s) that were ballooned (or removed from the 700 * balloon) so that host can use it without fear that guest will need it (or 701 * stop using them since the VM does). Host may reject some pages, we need to 702 * check the return value and maybe submit a different page. The pages that are 703 * inflated/deflated are pointed by @b->page. 704 * 705 * Return: result as provided by the hypervisor. 706 */ 707 static unsigned long vmballoon_lock_op(struct vmballoon *b, 708 unsigned int num_pages, 709 enum vmballoon_page_size_type page_size, 710 enum vmballoon_op op) 711 { 712 unsigned long cmd, pfn; 713 714 lockdep_assert_held(&b->comm_lock); 715 716 if (static_branch_likely(&vmw_balloon_batching)) { 717 if (op == VMW_BALLOON_INFLATE) 718 cmd = page_size == VMW_BALLOON_2M_PAGE ? 719 VMW_BALLOON_CMD_BATCHED_2M_LOCK : 720 VMW_BALLOON_CMD_BATCHED_LOCK; 721 else 722 cmd = page_size == VMW_BALLOON_2M_PAGE ? 723 VMW_BALLOON_CMD_BATCHED_2M_UNLOCK : 724 VMW_BALLOON_CMD_BATCHED_UNLOCK; 725 726 pfn = PHYS_PFN(virt_to_phys(b->batch_page)); 727 } else { 728 cmd = op == VMW_BALLOON_INFLATE ? VMW_BALLOON_CMD_LOCK : 729 VMW_BALLOON_CMD_UNLOCK; 730 pfn = page_to_pfn(b->page); 731 732 /* In non-batching mode, PFNs must fit in 32-bit */ 733 if (unlikely(pfn != (u32)pfn)) 734 return VMW_BALLOON_ERROR_PPN_INVALID; 735 } 736 737 return vmballoon_cmd(b, cmd, pfn, num_pages); 738 } 739 740 /** 741 * vmballoon_add_page - adds a page towards lock/unlock operation. 742 * 743 * @b: pointer to the balloon. 744 * @idx: index of the page to be ballooned in this batch. 745 * @p: pointer to the page that is about to be ballooned. 746 * 747 * Adds the page to be ballooned. Must be called while holding @comm_lock. 748 */ 749 static void vmballoon_add_page(struct vmballoon *b, unsigned int idx, 750 struct page *p) 751 { 752 lockdep_assert_held(&b->comm_lock); 753 754 if (static_branch_likely(&vmw_balloon_batching)) 755 b->batch_page[idx] = (struct vmballoon_batch_entry) 756 { .pfn = page_to_pfn(p) }; 757 else 758 b->page = p; 759 } 760 761 /** 762 * vmballoon_lock - lock or unlock a batch of pages. 763 * 764 * @b: pointer to the balloon. 765 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation. 766 * 767 * Notifies the host of about ballooned pages (after inflation or deflation, 768 * according to @ctl). If the host rejects the page put it on the 769 * @ctl refuse list. These refused page are then released when moving to the 770 * next size of pages. 771 * 772 * Note that we neither free any @page here nor put them back on the ballooned 773 * pages list. Instead we queue it for later processing. We do that for several 774 * reasons. First, we do not want to free the page under the lock. Second, it 775 * allows us to unify the handling of lock and unlock. In the inflate case, the 776 * caller will check if there are too many refused pages and release them. 777 * Although it is not identical to the past behavior, it should not affect 778 * performance. 779 */ 780 static int vmballoon_lock(struct vmballoon *b, struct vmballoon_ctl *ctl) 781 { 782 unsigned long batch_status; 783 struct page *page; 784 unsigned int i, num_pages; 785 786 num_pages = ctl->n_pages; 787 if (num_pages == 0) 788 return 0; 789 790 /* communication with the host is done under the communication lock */ 791 spin_lock(&b->comm_lock); 792 793 i = 0; 794 list_for_each_entry(page, &ctl->pages, lru) 795 vmballoon_add_page(b, i++, page); 796 797 batch_status = vmballoon_lock_op(b, ctl->n_pages, ctl->page_size, 798 ctl->op); 799 800 /* 801 * Iterate over the pages in the provided list. Since we are changing 802 * @ctl->n_pages we are saving the original value in @num_pages and 803 * use this value to bound the loop. 804 */ 805 for (i = 0; i < num_pages; i++) { 806 unsigned long status; 807 808 status = vmballoon_status_page(b, i, &page); 809 810 /* 811 * Failure of the whole batch overrides a single operation 812 * results. 813 */ 814 if (batch_status != VMW_BALLOON_SUCCESS) 815 status = batch_status; 816 817 /* Continue if no error happened */ 818 if (!vmballoon_handle_one_result(b, page, ctl->page_size, 819 status)) 820 continue; 821 822 /* 823 * Error happened. Move the pages to the refused list and update 824 * the pages number. 825 */ 826 list_move(&page->lru, &ctl->refused_pages); 827 ctl->n_pages--; 828 ctl->n_refused_pages++; 829 } 830 831 spin_unlock(&b->comm_lock); 832 833 return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO; 834 } 835 836 /** 837 * vmballoon_release_page_list() - Releases a page list 838 * 839 * @page_list: list of pages to release. 840 * @n_pages: pointer to the number of pages. 841 * @page_size: whether the pages in the list are 2MB (or else 4KB). 842 * 843 * Releases the list of pages and zeros the number of pages. 844 */ 845 static void vmballoon_release_page_list(struct list_head *page_list, 846 int *n_pages, 847 enum vmballoon_page_size_type page_size) 848 { 849 struct page *page, *tmp; 850 851 list_for_each_entry_safe(page, tmp, page_list, lru) { 852 list_del(&page->lru); 853 __free_pages(page, vmballoon_page_order(page_size)); 854 } 855 856 *n_pages = 0; 857 } 858 859 860 /* 861 * Release pages that were allocated while attempting to inflate the 862 * balloon but were refused by the host for one reason or another. 863 */ 864 static void vmballoon_release_refused_pages(struct vmballoon *b, 865 struct vmballoon_ctl *ctl) 866 { 867 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE, 868 ctl->page_size); 869 870 vmballoon_release_page_list(&ctl->refused_pages, &ctl->n_refused_pages, 871 ctl->page_size); 872 } 873 874 /** 875 * vmballoon_change - retrieve the required balloon change 876 * 877 * @b: pointer for the balloon. 878 * 879 * Return: the required change for the balloon size. A positive number 880 * indicates inflation, a negative number indicates a deflation. 881 */ 882 static int64_t vmballoon_change(struct vmballoon *b) 883 { 884 int64_t size, target; 885 886 size = atomic64_read(&b->size); 887 target = READ_ONCE(b->target); 888 889 /* 890 * We must cast first because of int sizes 891 * Otherwise we might get huge positives instead of negatives 892 */ 893 894 if (b->reset_required) 895 return 0; 896 897 /* consider a 2MB slack on deflate, unless the balloon is emptied */ 898 if (target < size && target != 0 && 899 size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE)) 900 return 0; 901 902 return target - size; 903 } 904 905 /** 906 * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation. 907 * 908 * @b: pointer to balloon. 909 * @pages: list of pages to enqueue. 910 * @n_pages: pointer to number of pages in list. The value is zeroed. 911 * @page_size: whether the pages are 2MB or 4KB pages. 912 * 913 * Enqueues the provides list of pages in the ballooned page list, clears the 914 * list and zeroes the number of pages that was provided. 915 */ 916 static void vmballoon_enqueue_page_list(struct vmballoon *b, 917 struct list_head *pages, 918 unsigned int *n_pages, 919 enum vmballoon_page_size_type page_size) 920 { 921 struct vmballoon_page_size *page_size_info = &b->page_sizes[page_size]; 922 923 list_splice_init(pages, &page_size_info->pages); 924 *n_pages = 0; 925 } 926 927 /** 928 * vmballoon_dequeue_page_list() - Dequeues page lists for deflation. 929 * 930 * @b: pointer to balloon. 931 * @pages: list of pages to enqueue. 932 * @n_pages: pointer to number of pages in list. The value is zeroed. 933 * @page_size: whether the pages are 2MB or 4KB pages. 934 * @n_req_pages: the number of requested pages. 935 * 936 * Dequeues the number of requested pages from the balloon for deflation. The 937 * number of dequeued pages may be lower, if not enough pages in the requested 938 * size are available. 939 */ 940 static void vmballoon_dequeue_page_list(struct vmballoon *b, 941 struct list_head *pages, 942 unsigned int *n_pages, 943 enum vmballoon_page_size_type page_size, 944 unsigned int n_req_pages) 945 { 946 struct vmballoon_page_size *page_size_info = &b->page_sizes[page_size]; 947 struct page *page, *tmp; 948 unsigned int i = 0; 949 950 list_for_each_entry_safe(page, tmp, &page_size_info->pages, lru) { 951 list_move(&page->lru, pages); 952 if (++i == n_req_pages) 953 break; 954 } 955 *n_pages = i; 956 } 957 958 /** 959 * vmballoon_inflate() - Inflate the balloon towards its target size. 960 * 961 * @b: pointer to the balloon. 962 */ 963 static void vmballoon_inflate(struct vmballoon *b) 964 { 965 int64_t to_inflate_frames; 966 struct vmballoon_ctl ctl = { 967 .pages = LIST_HEAD_INIT(ctl.pages), 968 .refused_pages = LIST_HEAD_INIT(ctl.refused_pages), 969 .page_size = b->max_page_size, 970 .op = VMW_BALLOON_INFLATE 971 }; 972 973 while ((to_inflate_frames = vmballoon_change(b)) > 0) { 974 unsigned int to_inflate_pages, page_in_frames; 975 int alloc_error, lock_error = 0; 976 977 VM_BUG_ON(!list_empty(&ctl.pages)); 978 VM_BUG_ON(ctl.n_pages != 0); 979 980 page_in_frames = vmballoon_page_in_frames(ctl.page_size); 981 982 to_inflate_pages = min_t(unsigned long, b->batch_max_pages, 983 DIV_ROUND_UP_ULL(to_inflate_frames, 984 page_in_frames)); 985 986 /* Start by allocating */ 987 alloc_error = vmballoon_alloc_page_list(b, &ctl, 988 to_inflate_pages); 989 990 /* Actually lock the pages by telling the hypervisor */ 991 lock_error = vmballoon_lock(b, &ctl); 992 993 /* 994 * If an error indicates that something serious went wrong, 995 * stop the inflation. 996 */ 997 if (lock_error) 998 break; 999 1000 /* Update the balloon size */ 1001 atomic64_add(ctl.n_pages * page_in_frames, &b->size); 1002 1003 vmballoon_enqueue_page_list(b, &ctl.pages, &ctl.n_pages, 1004 ctl.page_size); 1005 1006 /* 1007 * If allocation failed or the number of refused pages exceeds 1008 * the maximum allowed, move to the next page size. 1009 */ 1010 if (alloc_error || 1011 ctl.n_refused_pages >= VMW_BALLOON_MAX_REFUSED) { 1012 if (ctl.page_size == VMW_BALLOON_4K_PAGE) 1013 break; 1014 1015 /* 1016 * Ignore errors from locking as we now switch to 4k 1017 * pages and we might get different errors. 1018 */ 1019 vmballoon_release_refused_pages(b, &ctl); 1020 ctl.page_size--; 1021 } 1022 1023 cond_resched(); 1024 } 1025 1026 /* 1027 * Release pages that were allocated while attempting to inflate the 1028 * balloon but were refused by the host for one reason or another, 1029 * and update the statistics. 1030 */ 1031 if (ctl.n_refused_pages != 0) 1032 vmballoon_release_refused_pages(b, &ctl); 1033 } 1034 1035 /** 1036 * vmballoon_deflate() - Decrease the size of the balloon. 1037 * 1038 * @b: pointer to the balloon 1039 * @n_frames: the number of frames to deflate. If zero, automatically 1040 * calculated according to the target size. 1041 * @coordinated: whether to coordinate with the host 1042 * 1043 * Decrease the size of the balloon allowing guest to use more memory. 1044 * 1045 * Return: The number of deflated frames (i.e., basic page size units) 1046 */ 1047 static unsigned long vmballoon_deflate(struct vmballoon *b, uint64_t n_frames, 1048 bool coordinated) 1049 { 1050 unsigned long deflated_frames = 0; 1051 unsigned long tried_frames = 0; 1052 struct vmballoon_ctl ctl = { 1053 .pages = LIST_HEAD_INIT(ctl.pages), 1054 .refused_pages = LIST_HEAD_INIT(ctl.refused_pages), 1055 .page_size = VMW_BALLOON_4K_PAGE, 1056 .op = VMW_BALLOON_DEFLATE 1057 }; 1058 1059 /* free pages to reach target */ 1060 while (true) { 1061 unsigned int to_deflate_pages, n_unlocked_frames; 1062 unsigned int page_in_frames; 1063 int64_t to_deflate_frames; 1064 bool deflated_all; 1065 1066 page_in_frames = vmballoon_page_in_frames(ctl.page_size); 1067 1068 VM_BUG_ON(!list_empty(&ctl.pages)); 1069 VM_BUG_ON(ctl.n_pages); 1070 VM_BUG_ON(!list_empty(&ctl.refused_pages)); 1071 VM_BUG_ON(ctl.n_refused_pages); 1072 1073 /* 1074 * If we were requested a specific number of frames, we try to 1075 * deflate this number of frames. Otherwise, deflation is 1076 * performed according to the target and balloon size. 1077 */ 1078 to_deflate_frames = n_frames ? n_frames - tried_frames : 1079 -vmballoon_change(b); 1080 1081 /* break if no work to do */ 1082 if (to_deflate_frames <= 0) 1083 break; 1084 1085 /* 1086 * Calculate the number of frames based on current page size, 1087 * but limit the deflated frames to a single chunk 1088 */ 1089 to_deflate_pages = min_t(unsigned long, b->batch_max_pages, 1090 DIV_ROUND_UP_ULL(to_deflate_frames, 1091 page_in_frames)); 1092 1093 /* First take the pages from the balloon pages. */ 1094 vmballoon_dequeue_page_list(b, &ctl.pages, &ctl.n_pages, 1095 ctl.page_size, to_deflate_pages); 1096 1097 /* 1098 * Before pages are moving to the refused list, count their 1099 * frames as frames that we tried to deflate. 1100 */ 1101 tried_frames += ctl.n_pages * page_in_frames; 1102 1103 /* 1104 * Unlock the pages by communicating with the hypervisor if the 1105 * communication is coordinated (i.e., not pop). We ignore the 1106 * return code. Instead we check if all the pages we manage to 1107 * unlock all the pages. If we failed, we will move to the next 1108 * page size, and would eventually try again later. 1109 */ 1110 if (coordinated) 1111 vmballoon_lock(b, &ctl); 1112 1113 /* 1114 * Check if we deflated enough. We will move to the next page 1115 * size if we did not manage to do so. This calculation takes 1116 * place now, as once the pages are released, the number of 1117 * pages is zeroed. 1118 */ 1119 deflated_all = (ctl.n_pages == to_deflate_pages); 1120 1121 /* Update local and global counters */ 1122 n_unlocked_frames = ctl.n_pages * page_in_frames; 1123 atomic64_sub(n_unlocked_frames, &b->size); 1124 deflated_frames += n_unlocked_frames; 1125 1126 vmballoon_stats_page_add(b, VMW_BALLOON_PAGE_STAT_FREE, 1127 ctl.page_size, ctl.n_pages); 1128 1129 /* free the ballooned pages */ 1130 vmballoon_release_page_list(&ctl.pages, &ctl.n_pages, 1131 ctl.page_size); 1132 1133 /* Return the refused pages to the ballooned list. */ 1134 vmballoon_enqueue_page_list(b, &ctl.refused_pages, 1135 &ctl.n_refused_pages, 1136 ctl.page_size); 1137 1138 /* If we failed to unlock all the pages, move to next size. */ 1139 if (!deflated_all) { 1140 if (ctl.page_size == b->max_page_size) 1141 break; 1142 ctl.page_size++; 1143 } 1144 1145 cond_resched(); 1146 } 1147 1148 return deflated_frames; 1149 } 1150 1151 /** 1152 * vmballoon_deinit_batching - disables batching mode. 1153 * 1154 * @b: pointer to &struct vmballoon. 1155 * 1156 * Disables batching, by deallocating the page for communication with the 1157 * hypervisor and disabling the static key to indicate that batching is off. 1158 */ 1159 static void vmballoon_deinit_batching(struct vmballoon *b) 1160 { 1161 free_page((unsigned long)b->batch_page); 1162 b->batch_page = NULL; 1163 static_branch_disable(&vmw_balloon_batching); 1164 b->batch_max_pages = 1; 1165 } 1166 1167 /** 1168 * vmballoon_init_batching - enable batching mode. 1169 * 1170 * @b: pointer to &struct vmballoon. 1171 * 1172 * Enables batching, by allocating a page for communication with the hypervisor 1173 * and enabling the static_key to use batching. 1174 * 1175 * Return: zero on success or an appropriate error-code. 1176 */ 1177 static int vmballoon_init_batching(struct vmballoon *b) 1178 { 1179 struct page *page; 1180 1181 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 1182 if (!page) 1183 return -ENOMEM; 1184 1185 b->batch_page = page_address(page); 1186 b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry); 1187 1188 static_branch_enable(&vmw_balloon_batching); 1189 1190 return 0; 1191 } 1192 1193 /* 1194 * Receive notification and resize balloon 1195 */ 1196 static void vmballoon_doorbell(void *client_data) 1197 { 1198 struct vmballoon *b = client_data; 1199 1200 vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_DOORBELL); 1201 1202 mod_delayed_work(system_freezable_wq, &b->dwork, 0); 1203 } 1204 1205 /* 1206 * Clean up vmci doorbell 1207 */ 1208 static void vmballoon_vmci_cleanup(struct vmballoon *b) 1209 { 1210 vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 1211 VMCI_INVALID_ID, VMCI_INVALID_ID); 1212 1213 if (!vmci_handle_is_invalid(b->vmci_doorbell)) { 1214 vmci_doorbell_destroy(b->vmci_doorbell); 1215 b->vmci_doorbell = VMCI_INVALID_HANDLE; 1216 } 1217 } 1218 1219 /** 1220 * vmballoon_vmci_init - Initialize vmci doorbell. 1221 * 1222 * @b: pointer to the balloon. 1223 * 1224 * Return: zero on success or when wakeup command not supported. Error-code 1225 * otherwise. 1226 * 1227 * Initialize vmci doorbell, to get notified as soon as balloon changes. 1228 */ 1229 static int vmballoon_vmci_init(struct vmballoon *b) 1230 { 1231 unsigned long error; 1232 1233 if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0) 1234 return 0; 1235 1236 error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB, 1237 VMCI_PRIVILEGE_FLAG_RESTRICTED, 1238 vmballoon_doorbell, b); 1239 1240 if (error != VMCI_SUCCESS) 1241 goto fail; 1242 1243 error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 1244 b->vmci_doorbell.context, 1245 b->vmci_doorbell.resource, NULL); 1246 1247 if (error != VMW_BALLOON_SUCCESS) 1248 goto fail; 1249 1250 return 0; 1251 fail: 1252 vmballoon_vmci_cleanup(b); 1253 return -EIO; 1254 } 1255 1256 /** 1257 * vmballoon_pop - Quickly release all pages allocate for the balloon. 1258 * 1259 * @b: pointer to the balloon. 1260 * 1261 * This function is called when host decides to "reset" balloon for one reason 1262 * or another. Unlike normal "deflate" we do not (shall not) notify host of the 1263 * pages being released. 1264 */ 1265 static void vmballoon_pop(struct vmballoon *b) 1266 { 1267 unsigned long size; 1268 1269 while ((size = atomic64_read(&b->size))) 1270 vmballoon_deflate(b, size, false); 1271 } 1272 1273 /* 1274 * Perform standard reset sequence by popping the balloon (in case it 1275 * is not empty) and then restarting protocol. This operation normally 1276 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. 1277 */ 1278 static void vmballoon_reset(struct vmballoon *b) 1279 { 1280 int error; 1281 1282 down_write(&b->conf_sem); 1283 1284 vmballoon_vmci_cleanup(b); 1285 1286 /* free all pages, skipping monitor unlock */ 1287 vmballoon_pop(b); 1288 1289 if (vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) 1290 return; 1291 1292 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { 1293 if (vmballoon_init_batching(b)) { 1294 /* 1295 * We failed to initialize batching, inform the monitor 1296 * about it by sending a null capability. 1297 * 1298 * The guest will retry in one second. 1299 */ 1300 vmballoon_send_start(b, 0); 1301 return; 1302 } 1303 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) { 1304 vmballoon_deinit_batching(b); 1305 } 1306 1307 vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_RESET); 1308 b->reset_required = false; 1309 1310 error = vmballoon_vmci_init(b); 1311 if (error) 1312 pr_err("failed to initialize vmci doorbell\n"); 1313 1314 if (vmballoon_send_guest_id(b)) 1315 pr_err("failed to send guest ID to the host\n"); 1316 1317 up_write(&b->conf_sem); 1318 } 1319 1320 /** 1321 * vmballoon_work - periodic balloon worker for reset, inflation and deflation. 1322 * 1323 * @work: pointer to the &work_struct which is provided by the workqueue. 1324 * 1325 * Resets the protocol if needed, gets the new size and adjusts balloon as 1326 * needed. Repeat in 1 sec. 1327 */ 1328 static void vmballoon_work(struct work_struct *work) 1329 { 1330 struct delayed_work *dwork = to_delayed_work(work); 1331 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork); 1332 int64_t change = 0; 1333 1334 if (b->reset_required) 1335 vmballoon_reset(b); 1336 1337 down_read(&b->conf_sem); 1338 1339 /* 1340 * Update the stats while holding the semaphore to ensure that 1341 * @stats_enabled is consistent with whether the stats are actually 1342 * enabled 1343 */ 1344 vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER); 1345 1346 if (!vmballoon_send_get_target(b)) 1347 change = vmballoon_change(b); 1348 1349 if (change != 0) { 1350 pr_debug("%s - size: %llu, target %lu\n", __func__, 1351 atomic64_read(&b->size), READ_ONCE(b->target)); 1352 1353 if (change > 0) 1354 vmballoon_inflate(b); 1355 else /* (change < 0) */ 1356 vmballoon_deflate(b, 0, true); 1357 } 1358 1359 up_read(&b->conf_sem); 1360 1361 /* 1362 * We are using a freezable workqueue so that balloon operations are 1363 * stopped while the system transitions to/from sleep/hibernation. 1364 */ 1365 queue_delayed_work(system_freezable_wq, 1366 dwork, round_jiffies_relative(HZ)); 1367 1368 } 1369 1370 /* 1371 * DEBUGFS Interface 1372 */ 1373 #ifdef CONFIG_DEBUG_FS 1374 1375 static const char * const vmballoon_stat_page_names[] = { 1376 [VMW_BALLOON_PAGE_STAT_ALLOC] = "alloc", 1377 [VMW_BALLOON_PAGE_STAT_ALLOC_FAIL] = "allocFail", 1378 [VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC] = "errAlloc", 1379 [VMW_BALLOON_PAGE_STAT_REFUSED_FREE] = "errFree", 1380 [VMW_BALLOON_PAGE_STAT_FREE] = "free" 1381 }; 1382 1383 static const char * const vmballoon_stat_names[] = { 1384 [VMW_BALLOON_STAT_TIMER] = "timer", 1385 [VMW_BALLOON_STAT_DOORBELL] = "doorbell", 1386 [VMW_BALLOON_STAT_RESET] = "reset", 1387 }; 1388 1389 static int vmballoon_enable_stats(struct vmballoon *b) 1390 { 1391 int r = 0; 1392 1393 down_write(&b->conf_sem); 1394 1395 /* did we somehow race with another reader which enabled stats? */ 1396 if (b->stats) 1397 goto out; 1398 1399 b->stats = kzalloc(sizeof(*b->stats), GFP_KERNEL); 1400 1401 if (!b->stats) { 1402 /* allocation failed */ 1403 r = -ENOMEM; 1404 goto out; 1405 } 1406 static_key_enable(&balloon_stat_enabled.key); 1407 out: 1408 up_write(&b->conf_sem); 1409 return r; 1410 } 1411 1412 /** 1413 * vmballoon_debug_show - shows statistics of balloon operations. 1414 * @f: pointer to the &struct seq_file. 1415 * @offset: ignored. 1416 * 1417 * Provides the statistics that can be accessed in vmmemctl in the debugfs. 1418 * To avoid the overhead - mainly that of memory - of collecting the statistics, 1419 * we only collect statistics after the first time the counters are read. 1420 * 1421 * Return: zero on success or an error code. 1422 */ 1423 static int vmballoon_debug_show(struct seq_file *f, void *offset) 1424 { 1425 struct vmballoon *b = f->private; 1426 int i, j; 1427 1428 /* enables stats if they are disabled */ 1429 if (!b->stats) { 1430 int r = vmballoon_enable_stats(b); 1431 1432 if (r) 1433 return r; 1434 } 1435 1436 /* format capabilities info */ 1437 seq_printf(f, "%-22s: %#16x\n", "balloon capabilities", 1438 VMW_BALLOON_CAPABILITIES); 1439 seq_printf(f, "%-22s: %#16lx\n", "used capabilities", b->capabilities); 1440 seq_printf(f, "%-22s: %16s\n", "is resetting", 1441 b->reset_required ? "y" : "n"); 1442 1443 /* format size info */ 1444 seq_printf(f, "%-22s: %16lu\n", "target", READ_ONCE(b->target)); 1445 seq_printf(f, "%-22s: %16llu\n", "current", atomic64_read(&b->size)); 1446 1447 for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) { 1448 if (vmballoon_cmd_names[i] == NULL) 1449 continue; 1450 1451 seq_printf(f, "%-22s: %16llu (%llu failed)\n", 1452 vmballoon_cmd_names[i], 1453 atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_STAT]), 1454 atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_FAIL_STAT])); 1455 } 1456 1457 for (i = 0; i < VMW_BALLOON_STAT_NUM; i++) 1458 seq_printf(f, "%-22s: %16llu\n", 1459 vmballoon_stat_names[i], 1460 atomic64_read(&b->stats->general_stat[i])); 1461 1462 for (i = 0; i < VMW_BALLOON_PAGE_STAT_NUM; i++) { 1463 for (j = 0; j < VMW_BALLOON_NUM_PAGE_SIZES; j++) 1464 seq_printf(f, "%-18s(%s): %16llu\n", 1465 vmballoon_stat_page_names[i], 1466 vmballoon_page_size_names[j], 1467 atomic64_read(&b->stats->page_stat[i][j])); 1468 } 1469 1470 return 0; 1471 } 1472 1473 DEFINE_SHOW_ATTRIBUTE(vmballoon_debug); 1474 1475 static int __init vmballoon_debugfs_init(struct vmballoon *b) 1476 { 1477 int error; 1478 1479 b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b, 1480 &vmballoon_debug_fops); 1481 if (IS_ERR(b->dbg_entry)) { 1482 error = PTR_ERR(b->dbg_entry); 1483 pr_err("failed to create debugfs entry, error: %d\n", error); 1484 return error; 1485 } 1486 1487 return 0; 1488 } 1489 1490 static void __exit vmballoon_debugfs_exit(struct vmballoon *b) 1491 { 1492 static_key_disable(&balloon_stat_enabled.key); 1493 debugfs_remove(b->dbg_entry); 1494 kfree(b->stats); 1495 b->stats = NULL; 1496 } 1497 1498 #else 1499 1500 static inline int vmballoon_debugfs_init(struct vmballoon *b) 1501 { 1502 return 0; 1503 } 1504 1505 static inline void vmballoon_debugfs_exit(struct vmballoon *b) 1506 { 1507 } 1508 1509 #endif /* CONFIG_DEBUG_FS */ 1510 1511 static int __init vmballoon_init(void) 1512 { 1513 enum vmballoon_page_size_type page_size; 1514 int error; 1515 1516 /* 1517 * Check if we are running on VMware's hypervisor and bail out 1518 * if we are not. 1519 */ 1520 if (x86_hyper_type != X86_HYPER_VMWARE) 1521 return -ENODEV; 1522 1523 for (page_size = VMW_BALLOON_4K_PAGE; 1524 page_size <= VMW_BALLOON_LAST_SIZE; page_size++) 1525 INIT_LIST_HEAD(&balloon.page_sizes[page_size].pages); 1526 1527 1528 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); 1529 1530 error = vmballoon_debugfs_init(&balloon); 1531 if (error) 1532 return error; 1533 1534 spin_lock_init(&balloon.comm_lock); 1535 init_rwsem(&balloon.conf_sem); 1536 balloon.vmci_doorbell = VMCI_INVALID_HANDLE; 1537 balloon.batch_page = NULL; 1538 balloon.page = NULL; 1539 balloon.reset_required = true; 1540 1541 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); 1542 1543 return 0; 1544 } 1545 1546 /* 1547 * Using late_initcall() instead of module_init() allows the balloon to use the 1548 * VMCI doorbell even when the balloon is built into the kernel. Otherwise the 1549 * VMCI is probed only after the balloon is initialized. If the balloon is used 1550 * as a module, late_initcall() is equivalent to module_init(). 1551 */ 1552 late_initcall(vmballoon_init); 1553 1554 static void __exit vmballoon_exit(void) 1555 { 1556 vmballoon_vmci_cleanup(&balloon); 1557 cancel_delayed_work_sync(&balloon.dwork); 1558 1559 vmballoon_debugfs_exit(&balloon); 1560 1561 /* 1562 * Deallocate all reserved memory, and reset connection with monitor. 1563 * Reset connection before deallocating memory to avoid potential for 1564 * additional spurious resets from guest touching deallocated pages. 1565 */ 1566 vmballoon_send_start(&balloon, 0); 1567 vmballoon_pop(&balloon); 1568 } 1569 module_exit(vmballoon_exit); 1570