1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * VMware Balloon driver. 4 * 5 * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved. 6 * 7 * This is VMware physical memory management driver for Linux. The driver 8 * acts like a "balloon" that can be inflated to reclaim physical pages by 9 * reserving them in the guest and invalidating them in the monitor, 10 * freeing up the underlying machine pages so they can be allocated to 11 * other guests. The balloon can also be deflated to allow the guest to 12 * use more physical memory. Higher level policies can control the sizes 13 * of balloons in VMs in order to manage physical memory resources. 14 */ 15 16 //#define DEBUG 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/mm.h> 22 #include <linux/vmalloc.h> 23 #include <linux/sched.h> 24 #include <linux/module.h> 25 #include <linux/workqueue.h> 26 #include <linux/debugfs.h> 27 #include <linux/seq_file.h> 28 #include <linux/vmw_vmci_defs.h> 29 #include <linux/vmw_vmci_api.h> 30 #include <asm/hypervisor.h> 31 32 MODULE_AUTHOR("VMware, Inc."); 33 MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); 34 MODULE_VERSION("1.5.0.0-k"); 35 MODULE_ALIAS("dmi:*:svnVMware*:*"); 36 MODULE_ALIAS("vmware_vmmemctl"); 37 MODULE_LICENSE("GPL"); 38 39 /* 40 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't 41 * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use 42 * __GFP_NOWARN, to suppress page allocation failure warnings. 43 */ 44 #define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN) 45 46 /* 47 * Use GFP_HIGHUSER when executing in a separate kernel thread 48 * context and allocation can sleep. This is less stressful to 49 * the guest memory system, since it allows the thread to block 50 * while memory is reclaimed, and won't take pages from emergency 51 * low-memory pools. 52 */ 53 #define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER) 54 55 /* Maximum number of refused pages we accumulate during inflation cycle */ 56 #define VMW_BALLOON_MAX_REFUSED 16 57 58 /* 59 * Hypervisor communication port definitions. 60 */ 61 #define VMW_BALLOON_HV_PORT 0x5670 62 #define VMW_BALLOON_HV_MAGIC 0x456c6d6f 63 #define VMW_BALLOON_GUEST_ID 1 /* Linux */ 64 65 enum vmwballoon_capabilities { 66 /* 67 * Bit 0 is reserved and not associated to any capability. 68 */ 69 VMW_BALLOON_BASIC_CMDS = (1 << 1), 70 VMW_BALLOON_BATCHED_CMDS = (1 << 2), 71 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3), 72 VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4), 73 }; 74 75 #define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \ 76 | VMW_BALLOON_BATCHED_CMDS \ 77 | VMW_BALLOON_BATCHED_2M_CMDS \ 78 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD) 79 80 #define VMW_BALLOON_2M_SHIFT (9) 81 #define VMW_BALLOON_NUM_PAGE_SIZES (2) 82 83 /* 84 * Backdoor commands availability: 85 * 86 * START, GET_TARGET and GUEST_ID are always available, 87 * 88 * VMW_BALLOON_BASIC_CMDS: 89 * LOCK and UNLOCK commands, 90 * VMW_BALLOON_BATCHED_CMDS: 91 * BATCHED_LOCK and BATCHED_UNLOCK commands. 92 * VMW BALLOON_BATCHED_2M_CMDS: 93 * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands, 94 * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD: 95 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command. 96 */ 97 #define VMW_BALLOON_CMD_START 0 98 #define VMW_BALLOON_CMD_GET_TARGET 1 99 #define VMW_BALLOON_CMD_LOCK 2 100 #define VMW_BALLOON_CMD_UNLOCK 3 101 #define VMW_BALLOON_CMD_GUEST_ID 4 102 #define VMW_BALLOON_CMD_BATCHED_LOCK 6 103 #define VMW_BALLOON_CMD_BATCHED_UNLOCK 7 104 #define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8 105 #define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9 106 #define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10 107 108 109 /* error codes */ 110 #define VMW_BALLOON_SUCCESS 0 111 #define VMW_BALLOON_FAILURE -1 112 #define VMW_BALLOON_ERROR_CMD_INVALID 1 113 #define VMW_BALLOON_ERROR_PPN_INVALID 2 114 #define VMW_BALLOON_ERROR_PPN_LOCKED 3 115 #define VMW_BALLOON_ERROR_PPN_UNLOCKED 4 116 #define VMW_BALLOON_ERROR_PPN_PINNED 5 117 #define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6 118 #define VMW_BALLOON_ERROR_RESET 7 119 #define VMW_BALLOON_ERROR_BUSY 8 120 121 #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000) 122 123 /* Batch page description */ 124 125 /* 126 * Layout of a page in the batch page: 127 * 128 * +-------------+----------+--------+ 129 * | | | | 130 * | Page number | Reserved | Status | 131 * | | | | 132 * +-------------+----------+--------+ 133 * 64 PAGE_SHIFT 6 0 134 * 135 * The reserved field should be set to 0. 136 */ 137 #define VMW_BALLOON_BATCH_MAX_PAGES (PAGE_SIZE / sizeof(u64)) 138 #define VMW_BALLOON_BATCH_STATUS_MASK ((1UL << 5) - 1) 139 #define VMW_BALLOON_BATCH_PAGE_MASK (~((1UL << PAGE_SHIFT) - 1)) 140 141 struct vmballoon_batch_page { 142 u64 pages[VMW_BALLOON_BATCH_MAX_PAGES]; 143 }; 144 145 static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx) 146 { 147 return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK; 148 } 149 150 static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch, 151 int idx) 152 { 153 return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK); 154 } 155 156 static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx, 157 u64 pa) 158 { 159 batch->pages[idx] = pa; 160 } 161 162 163 #define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result) \ 164 ({ \ 165 unsigned long __status, __dummy1, __dummy2, __dummy3; \ 166 __asm__ __volatile__ ("inl %%dx" : \ 167 "=a"(__status), \ 168 "=c"(__dummy1), \ 169 "=d"(__dummy2), \ 170 "=b"(result), \ 171 "=S" (__dummy3) : \ 172 "0"(VMW_BALLOON_HV_MAGIC), \ 173 "1"(VMW_BALLOON_CMD_##cmd), \ 174 "2"(VMW_BALLOON_HV_PORT), \ 175 "3"(arg1), \ 176 "4" (arg2) : \ 177 "memory"); \ 178 if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \ 179 result = __dummy1; \ 180 result &= -1UL; \ 181 __status & -1UL; \ 182 }) 183 184 #ifdef CONFIG_DEBUG_FS 185 struct vmballoon_stats { 186 unsigned int timer; 187 unsigned int doorbell; 188 189 /* allocation statistics */ 190 unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES]; 191 unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES]; 192 unsigned int sleep_alloc; 193 unsigned int sleep_alloc_fail; 194 unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES]; 195 unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES]; 196 unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES]; 197 198 /* monitor operations */ 199 unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES]; 200 unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES]; 201 unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES]; 202 unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES]; 203 unsigned int target; 204 unsigned int target_fail; 205 unsigned int start; 206 unsigned int start_fail; 207 unsigned int guest_type; 208 unsigned int guest_type_fail; 209 unsigned int doorbell_set; 210 unsigned int doorbell_unset; 211 }; 212 213 #define STATS_INC(stat) (stat)++ 214 #else 215 #define STATS_INC(stat) 216 #endif 217 218 struct vmballoon; 219 220 struct vmballoon_ops { 221 void (*add_page)(struct vmballoon *b, int idx, struct page *p); 222 int (*lock)(struct vmballoon *b, unsigned int num_pages, 223 bool is_2m_pages, unsigned int *target); 224 int (*unlock)(struct vmballoon *b, unsigned int num_pages, 225 bool is_2m_pages, unsigned int *target); 226 }; 227 228 struct vmballoon_page_size { 229 /* list of reserved physical pages */ 230 struct list_head pages; 231 232 /* transient list of non-balloonable pages */ 233 struct list_head refused_pages; 234 unsigned int n_refused_pages; 235 }; 236 237 struct vmballoon { 238 struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES]; 239 240 /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */ 241 unsigned supported_page_sizes; 242 243 /* balloon size in pages */ 244 unsigned int size; 245 unsigned int target; 246 247 /* reset flag */ 248 bool reset_required; 249 250 unsigned long capabilities; 251 252 struct vmballoon_batch_page *batch_page; 253 unsigned int batch_max_pages; 254 struct page *page; 255 256 const struct vmballoon_ops *ops; 257 258 #ifdef CONFIG_DEBUG_FS 259 /* statistics */ 260 struct vmballoon_stats stats; 261 262 /* debugfs file exporting statistics */ 263 struct dentry *dbg_entry; 264 #endif 265 266 struct sysinfo sysinfo; 267 268 struct delayed_work dwork; 269 270 struct vmci_handle vmci_doorbell; 271 }; 272 273 static struct vmballoon balloon; 274 275 /* 276 * Send "start" command to the host, communicating supported version 277 * of the protocol. 278 */ 279 static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) 280 { 281 unsigned long status, capabilities, dummy = 0; 282 bool success; 283 284 STATS_INC(b->stats.start); 285 286 status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities); 287 288 switch (status) { 289 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES: 290 b->capabilities = capabilities; 291 success = true; 292 break; 293 case VMW_BALLOON_SUCCESS: 294 b->capabilities = VMW_BALLOON_BASIC_CMDS; 295 success = true; 296 break; 297 default: 298 success = false; 299 } 300 301 /* 302 * 2MB pages are only supported with batching. If batching is for some 303 * reason disabled, do not use 2MB pages, since otherwise the legacy 304 * mechanism is used with 2MB pages, causing a failure. 305 */ 306 if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) && 307 (b->capabilities & VMW_BALLOON_BATCHED_CMDS)) 308 b->supported_page_sizes = 2; 309 else 310 b->supported_page_sizes = 1; 311 312 if (!success) { 313 pr_debug("%s - failed, hv returns %ld\n", __func__, status); 314 STATS_INC(b->stats.start_fail); 315 } 316 return success; 317 } 318 319 static bool vmballoon_check_status(struct vmballoon *b, unsigned long status) 320 { 321 switch (status) { 322 case VMW_BALLOON_SUCCESS: 323 return true; 324 325 case VMW_BALLOON_ERROR_RESET: 326 b->reset_required = true; 327 /* fall through */ 328 329 default: 330 return false; 331 } 332 } 333 334 /* 335 * Communicate guest type to the host so that it can adjust ballooning 336 * algorithm to the one most appropriate for the guest. This command 337 * is normally issued after sending "start" command and is part of 338 * standard reset sequence. 339 */ 340 static bool vmballoon_send_guest_id(struct vmballoon *b) 341 { 342 unsigned long status, dummy = 0; 343 344 status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy, 345 dummy); 346 347 STATS_INC(b->stats.guest_type); 348 349 if (vmballoon_check_status(b, status)) 350 return true; 351 352 pr_debug("%s - failed, hv returns %ld\n", __func__, status); 353 STATS_INC(b->stats.guest_type_fail); 354 return false; 355 } 356 357 static u16 vmballoon_page_size(bool is_2m_page) 358 { 359 if (is_2m_page) 360 return 1 << VMW_BALLOON_2M_SHIFT; 361 362 return 1; 363 } 364 365 /* 366 * Retrieve desired balloon size from the host. 367 */ 368 static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target) 369 { 370 unsigned long status; 371 unsigned long target; 372 unsigned long limit; 373 unsigned long dummy = 0; 374 u32 limit32; 375 376 /* 377 * si_meminfo() is cheap. Moreover, we want to provide dynamic 378 * max balloon size later. So let us call si_meminfo() every 379 * iteration. 380 */ 381 si_meminfo(&b->sysinfo); 382 limit = b->sysinfo.totalram; 383 384 /* Ensure limit fits in 32-bits */ 385 limit32 = (u32)limit; 386 if (limit != limit32) 387 return false; 388 389 /* update stats */ 390 STATS_INC(b->stats.target); 391 392 status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target); 393 if (vmballoon_check_status(b, status)) { 394 *new_target = target; 395 return true; 396 } 397 398 pr_debug("%s - failed, hv returns %ld\n", __func__, status); 399 STATS_INC(b->stats.target_fail); 400 return false; 401 } 402 403 /* 404 * Notify the host about allocated page so that host can use it without 405 * fear that guest will need it. Host may reject some pages, we need to 406 * check the return value and maybe submit a different page. 407 */ 408 static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, 409 unsigned int *hv_status, unsigned int *target) 410 { 411 unsigned long status, dummy = 0; 412 u32 pfn32; 413 414 pfn32 = (u32)pfn; 415 if (pfn32 != pfn) 416 return -EINVAL; 417 418 STATS_INC(b->stats.lock[false]); 419 420 *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target); 421 if (vmballoon_check_status(b, status)) 422 return 0; 423 424 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); 425 STATS_INC(b->stats.lock_fail[false]); 426 return -EIO; 427 } 428 429 static int vmballoon_send_batched_lock(struct vmballoon *b, 430 unsigned int num_pages, bool is_2m_pages, unsigned int *target) 431 { 432 unsigned long status; 433 unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page)); 434 435 STATS_INC(b->stats.lock[is_2m_pages]); 436 437 if (is_2m_pages) 438 status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages, 439 *target); 440 else 441 status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages, 442 *target); 443 444 if (vmballoon_check_status(b, status)) 445 return 0; 446 447 pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status); 448 STATS_INC(b->stats.lock_fail[is_2m_pages]); 449 return 1; 450 } 451 452 /* 453 * Notify the host that guest intends to release given page back into 454 * the pool of available (to the guest) pages. 455 */ 456 static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn, 457 unsigned int *target) 458 { 459 unsigned long status, dummy = 0; 460 u32 pfn32; 461 462 pfn32 = (u32)pfn; 463 if (pfn32 != pfn) 464 return false; 465 466 STATS_INC(b->stats.unlock[false]); 467 468 status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target); 469 if (vmballoon_check_status(b, status)) 470 return true; 471 472 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); 473 STATS_INC(b->stats.unlock_fail[false]); 474 return false; 475 } 476 477 static bool vmballoon_send_batched_unlock(struct vmballoon *b, 478 unsigned int num_pages, bool is_2m_pages, unsigned int *target) 479 { 480 unsigned long status; 481 unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page)); 482 483 STATS_INC(b->stats.unlock[is_2m_pages]); 484 485 if (is_2m_pages) 486 status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages, 487 *target); 488 else 489 status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages, 490 *target); 491 492 if (vmballoon_check_status(b, status)) 493 return true; 494 495 pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status); 496 STATS_INC(b->stats.unlock_fail[is_2m_pages]); 497 return false; 498 } 499 500 static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page) 501 { 502 if (is_2m_page) 503 return alloc_pages(flags, VMW_BALLOON_2M_SHIFT); 504 505 return alloc_page(flags); 506 } 507 508 static void vmballoon_free_page(struct page *page, bool is_2m_page) 509 { 510 if (is_2m_page) 511 __free_pages(page, VMW_BALLOON_2M_SHIFT); 512 else 513 __free_page(page); 514 } 515 516 /* 517 * Quickly release all pages allocated for the balloon. This function is 518 * called when host decides to "reset" balloon for one reason or another. 519 * Unlike normal "deflate" we do not (shall not) notify host of the pages 520 * being released. 521 */ 522 static void vmballoon_pop(struct vmballoon *b) 523 { 524 struct page *page, *next; 525 unsigned is_2m_pages; 526 527 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; 528 is_2m_pages++) { 529 struct vmballoon_page_size *page_size = 530 &b->page_sizes[is_2m_pages]; 531 u16 size_per_page = vmballoon_page_size(is_2m_pages); 532 533 list_for_each_entry_safe(page, next, &page_size->pages, lru) { 534 list_del(&page->lru); 535 vmballoon_free_page(page, is_2m_pages); 536 STATS_INC(b->stats.free[is_2m_pages]); 537 b->size -= size_per_page; 538 cond_resched(); 539 } 540 } 541 542 /* Clearing the batch_page unconditionally has no adverse effect */ 543 free_page((unsigned long)b->batch_page); 544 b->batch_page = NULL; 545 } 546 547 /* 548 * Notify the host of a ballooned page. If host rejects the page put it on the 549 * refuse list, those refused page are then released at the end of the 550 * inflation cycle. 551 */ 552 static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages, 553 bool is_2m_pages, unsigned int *target) 554 { 555 int locked, hv_status; 556 struct page *page = b->page; 557 struct vmballoon_page_size *page_size = &b->page_sizes[false]; 558 559 /* is_2m_pages can never happen as 2m pages support implies batching */ 560 561 locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status, 562 target); 563 if (locked) { 564 STATS_INC(b->stats.refused_alloc[false]); 565 566 if (locked == -EIO && 567 (hv_status == VMW_BALLOON_ERROR_RESET || 568 hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED)) { 569 vmballoon_free_page(page, false); 570 return -EIO; 571 } 572 573 /* 574 * Place page on the list of non-balloonable pages 575 * and retry allocation, unless we already accumulated 576 * too many of them, in which case take a breather. 577 */ 578 if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) { 579 page_size->n_refused_pages++; 580 list_add(&page->lru, &page_size->refused_pages); 581 } else { 582 vmballoon_free_page(page, false); 583 } 584 return locked; 585 } 586 587 /* track allocated page */ 588 list_add(&page->lru, &page_size->pages); 589 590 /* update balloon size */ 591 b->size++; 592 593 return 0; 594 } 595 596 static int vmballoon_lock_batched_page(struct vmballoon *b, 597 unsigned int num_pages, bool is_2m_pages, unsigned int *target) 598 { 599 int locked, i; 600 u16 size_per_page = vmballoon_page_size(is_2m_pages); 601 602 locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages, 603 target); 604 if (locked > 0) { 605 for (i = 0; i < num_pages; i++) { 606 u64 pa = vmballoon_batch_get_pa(b->batch_page, i); 607 struct page *p = pfn_to_page(pa >> PAGE_SHIFT); 608 609 vmballoon_free_page(p, is_2m_pages); 610 } 611 612 return -EIO; 613 } 614 615 for (i = 0; i < num_pages; i++) { 616 u64 pa = vmballoon_batch_get_pa(b->batch_page, i); 617 struct page *p = pfn_to_page(pa >> PAGE_SHIFT); 618 struct vmballoon_page_size *page_size = 619 &b->page_sizes[is_2m_pages]; 620 621 locked = vmballoon_batch_get_status(b->batch_page, i); 622 623 switch (locked) { 624 case VMW_BALLOON_SUCCESS: 625 list_add(&p->lru, &page_size->pages); 626 b->size += size_per_page; 627 break; 628 case VMW_BALLOON_ERROR_PPN_PINNED: 629 case VMW_BALLOON_ERROR_PPN_INVALID: 630 if (page_size->n_refused_pages 631 < VMW_BALLOON_MAX_REFUSED) { 632 list_add(&p->lru, &page_size->refused_pages); 633 page_size->n_refused_pages++; 634 break; 635 } 636 /* Fallthrough */ 637 case VMW_BALLOON_ERROR_RESET: 638 case VMW_BALLOON_ERROR_PPN_NOTNEEDED: 639 vmballoon_free_page(p, is_2m_pages); 640 break; 641 default: 642 /* This should never happen */ 643 WARN_ON_ONCE(true); 644 } 645 } 646 647 return 0; 648 } 649 650 /* 651 * Release the page allocated for the balloon. Note that we first notify 652 * the host so it can make sure the page will be available for the guest 653 * to use, if needed. 654 */ 655 static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages, 656 bool is_2m_pages, unsigned int *target) 657 { 658 struct page *page = b->page; 659 struct vmballoon_page_size *page_size = &b->page_sizes[false]; 660 661 /* is_2m_pages can never happen as 2m pages support implies batching */ 662 663 if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) { 664 list_add(&page->lru, &page_size->pages); 665 return -EIO; 666 } 667 668 /* deallocate page */ 669 vmballoon_free_page(page, false); 670 STATS_INC(b->stats.free[false]); 671 672 /* update balloon size */ 673 b->size--; 674 675 return 0; 676 } 677 678 static int vmballoon_unlock_batched_page(struct vmballoon *b, 679 unsigned int num_pages, bool is_2m_pages, 680 unsigned int *target) 681 { 682 int locked, i, ret = 0; 683 bool hv_success; 684 u16 size_per_page = vmballoon_page_size(is_2m_pages); 685 686 hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages, 687 target); 688 if (!hv_success) 689 ret = -EIO; 690 691 for (i = 0; i < num_pages; i++) { 692 u64 pa = vmballoon_batch_get_pa(b->batch_page, i); 693 struct page *p = pfn_to_page(pa >> PAGE_SHIFT); 694 struct vmballoon_page_size *page_size = 695 &b->page_sizes[is_2m_pages]; 696 697 locked = vmballoon_batch_get_status(b->batch_page, i); 698 if (!hv_success || locked != VMW_BALLOON_SUCCESS) { 699 /* 700 * That page wasn't successfully unlocked by the 701 * hypervisor, re-add it to the list of pages owned by 702 * the balloon driver. 703 */ 704 list_add(&p->lru, &page_size->pages); 705 } else { 706 /* deallocate page */ 707 vmballoon_free_page(p, is_2m_pages); 708 STATS_INC(b->stats.free[is_2m_pages]); 709 710 /* update balloon size */ 711 b->size -= size_per_page; 712 } 713 } 714 715 return ret; 716 } 717 718 /* 719 * Release pages that were allocated while attempting to inflate the 720 * balloon but were refused by the host for one reason or another. 721 */ 722 static void vmballoon_release_refused_pages(struct vmballoon *b, 723 bool is_2m_pages) 724 { 725 struct page *page, *next; 726 struct vmballoon_page_size *page_size = 727 &b->page_sizes[is_2m_pages]; 728 729 list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) { 730 list_del(&page->lru); 731 vmballoon_free_page(page, is_2m_pages); 732 STATS_INC(b->stats.refused_free[is_2m_pages]); 733 } 734 735 page_size->n_refused_pages = 0; 736 } 737 738 static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p) 739 { 740 b->page = p; 741 } 742 743 static void vmballoon_add_batched_page(struct vmballoon *b, int idx, 744 struct page *p) 745 { 746 vmballoon_batch_set_pa(b->batch_page, idx, 747 (u64)page_to_pfn(p) << PAGE_SHIFT); 748 } 749 750 /* 751 * Inflate the balloon towards its target size. Note that we try to limit 752 * the rate of allocation to make sure we are not choking the rest of the 753 * system. 754 */ 755 static void vmballoon_inflate(struct vmballoon *b) 756 { 757 unsigned int num_pages = 0; 758 int error = 0; 759 gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP; 760 bool is_2m_pages; 761 762 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); 763 764 /* 765 * First try NOSLEEP page allocations to inflate balloon. 766 * 767 * If we do not throttle nosleep allocations, we can drain all 768 * free pages in the guest quickly (if the balloon target is high). 769 * As a side-effect, draining free pages helps to inform (force) 770 * the guest to start swapping if balloon target is not met yet, 771 * which is a desired behavior. However, balloon driver can consume 772 * all available CPU cycles if too many pages are allocated in a 773 * second. Therefore, we throttle nosleep allocations even when 774 * the guest is not under memory pressure. OTOH, if we have already 775 * predicted that the guest is under memory pressure, then we 776 * slowdown page allocations considerably. 777 */ 778 779 /* 780 * Start with no sleep allocation rate which may be higher 781 * than sleeping allocation rate. 782 */ 783 is_2m_pages = b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES; 784 785 pr_debug("%s - goal: %d", __func__, b->target - b->size); 786 787 while (!b->reset_required && 788 b->size + num_pages * vmballoon_page_size(is_2m_pages) 789 < b->target) { 790 struct page *page; 791 792 if (flags == VMW_PAGE_ALLOC_NOSLEEP) 793 STATS_INC(b->stats.alloc[is_2m_pages]); 794 else 795 STATS_INC(b->stats.sleep_alloc); 796 797 page = vmballoon_alloc_page(flags, is_2m_pages); 798 if (!page) { 799 STATS_INC(b->stats.alloc_fail[is_2m_pages]); 800 801 if (is_2m_pages) { 802 b->ops->lock(b, num_pages, true, &b->target); 803 804 /* 805 * ignore errors from locking as we now switch 806 * to 4k pages and we might get different 807 * errors. 808 */ 809 810 num_pages = 0; 811 is_2m_pages = false; 812 continue; 813 } 814 815 if (flags == VMW_PAGE_ALLOC_CANSLEEP) { 816 /* 817 * CANSLEEP page allocation failed, so guest 818 * is under severe memory pressure. We just log 819 * the event, but do not stop the inflation 820 * due to its negative impact on performance. 821 */ 822 STATS_INC(b->stats.sleep_alloc_fail); 823 break; 824 } 825 826 /* 827 * NOSLEEP page allocation failed, so the guest is 828 * under memory pressure. Slowing down page alloctions 829 * seems to be reasonable, but doing so might actually 830 * cause the hypervisor to throttle us down, resulting 831 * in degraded performance. We will count on the 832 * scheduler and standard memory management mechanisms 833 * for now. 834 */ 835 flags = VMW_PAGE_ALLOC_CANSLEEP; 836 continue; 837 } 838 839 b->ops->add_page(b, num_pages++, page); 840 if (num_pages == b->batch_max_pages) { 841 error = b->ops->lock(b, num_pages, is_2m_pages, 842 &b->target); 843 num_pages = 0; 844 if (error) 845 break; 846 } 847 848 cond_resched(); 849 } 850 851 if (num_pages > 0) 852 b->ops->lock(b, num_pages, is_2m_pages, &b->target); 853 854 vmballoon_release_refused_pages(b, true); 855 vmballoon_release_refused_pages(b, false); 856 } 857 858 /* 859 * Decrease the size of the balloon allowing guest to use more memory. 860 */ 861 static void vmballoon_deflate(struct vmballoon *b) 862 { 863 unsigned is_2m_pages; 864 865 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); 866 867 /* free pages to reach target */ 868 for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes; 869 is_2m_pages++) { 870 struct page *page, *next; 871 unsigned int num_pages = 0; 872 struct vmballoon_page_size *page_size = 873 &b->page_sizes[is_2m_pages]; 874 875 list_for_each_entry_safe(page, next, &page_size->pages, lru) { 876 if (b->reset_required || 877 (b->target > 0 && 878 b->size - num_pages 879 * vmballoon_page_size(is_2m_pages) 880 < b->target + vmballoon_page_size(true))) 881 break; 882 883 list_del(&page->lru); 884 b->ops->add_page(b, num_pages++, page); 885 886 if (num_pages == b->batch_max_pages) { 887 int error; 888 889 error = b->ops->unlock(b, num_pages, 890 is_2m_pages, &b->target); 891 num_pages = 0; 892 if (error) 893 return; 894 } 895 896 cond_resched(); 897 } 898 899 if (num_pages > 0) 900 b->ops->unlock(b, num_pages, is_2m_pages, &b->target); 901 } 902 } 903 904 static const struct vmballoon_ops vmballoon_basic_ops = { 905 .add_page = vmballoon_add_page, 906 .lock = vmballoon_lock_page, 907 .unlock = vmballoon_unlock_page 908 }; 909 910 static const struct vmballoon_ops vmballoon_batched_ops = { 911 .add_page = vmballoon_add_batched_page, 912 .lock = vmballoon_lock_batched_page, 913 .unlock = vmballoon_unlock_batched_page 914 }; 915 916 static bool vmballoon_init_batching(struct vmballoon *b) 917 { 918 struct page *page; 919 920 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 921 if (!page) 922 return false; 923 924 b->batch_page = page_address(page); 925 return true; 926 } 927 928 /* 929 * Receive notification and resize balloon 930 */ 931 static void vmballoon_doorbell(void *client_data) 932 { 933 struct vmballoon *b = client_data; 934 935 STATS_INC(b->stats.doorbell); 936 937 mod_delayed_work(system_freezable_wq, &b->dwork, 0); 938 } 939 940 /* 941 * Clean up vmci doorbell 942 */ 943 static void vmballoon_vmci_cleanup(struct vmballoon *b) 944 { 945 int error; 946 947 VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID, 948 VMCI_INVALID_ID, error); 949 STATS_INC(b->stats.doorbell_unset); 950 951 if (!vmci_handle_is_invalid(b->vmci_doorbell)) { 952 vmci_doorbell_destroy(b->vmci_doorbell); 953 b->vmci_doorbell = VMCI_INVALID_HANDLE; 954 } 955 } 956 957 /* 958 * Initialize vmci doorbell, to get notified as soon as balloon changes 959 */ 960 static int vmballoon_vmci_init(struct vmballoon *b) 961 { 962 unsigned long error, dummy; 963 964 if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0) 965 return 0; 966 967 error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB, 968 VMCI_PRIVILEGE_FLAG_RESTRICTED, 969 vmballoon_doorbell, b); 970 971 if (error != VMCI_SUCCESS) 972 goto fail; 973 974 error = VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, b->vmci_doorbell.context, 975 b->vmci_doorbell.resource, dummy); 976 977 STATS_INC(b->stats.doorbell_set); 978 979 if (error != VMW_BALLOON_SUCCESS) 980 goto fail; 981 982 return 0; 983 fail: 984 vmballoon_vmci_cleanup(b); 985 return -EIO; 986 } 987 988 /* 989 * Perform standard reset sequence by popping the balloon (in case it 990 * is not empty) and then restarting protocol. This operation normally 991 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. 992 */ 993 static void vmballoon_reset(struct vmballoon *b) 994 { 995 int error; 996 997 vmballoon_vmci_cleanup(b); 998 999 /* free all pages, skipping monitor unlock */ 1000 vmballoon_pop(b); 1001 1002 if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) 1003 return; 1004 1005 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { 1006 b->ops = &vmballoon_batched_ops; 1007 b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES; 1008 if (!vmballoon_init_batching(b)) { 1009 /* 1010 * We failed to initialize batching, inform the monitor 1011 * about it by sending a null capability. 1012 * 1013 * The guest will retry in one second. 1014 */ 1015 vmballoon_send_start(b, 0); 1016 return; 1017 } 1018 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) { 1019 b->ops = &vmballoon_basic_ops; 1020 b->batch_max_pages = 1; 1021 } 1022 1023 b->reset_required = false; 1024 1025 error = vmballoon_vmci_init(b); 1026 if (error) 1027 pr_err("failed to initialize vmci doorbell\n"); 1028 1029 if (!vmballoon_send_guest_id(b)) 1030 pr_err("failed to send guest ID to the host\n"); 1031 } 1032 1033 /* 1034 * Balloon work function: reset protocol, if needed, get the new size and 1035 * adjust balloon as needed. Repeat in 1 sec. 1036 */ 1037 static void vmballoon_work(struct work_struct *work) 1038 { 1039 struct delayed_work *dwork = to_delayed_work(work); 1040 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork); 1041 unsigned int target; 1042 1043 STATS_INC(b->stats.timer); 1044 1045 if (b->reset_required) 1046 vmballoon_reset(b); 1047 1048 if (!b->reset_required && vmballoon_send_get_target(b, &target)) { 1049 /* update target, adjust size */ 1050 b->target = target; 1051 1052 if (b->size < target) 1053 vmballoon_inflate(b); 1054 else if (target == 0 || 1055 b->size > target + vmballoon_page_size(true)) 1056 vmballoon_deflate(b); 1057 } 1058 1059 /* 1060 * We are using a freezable workqueue so that balloon operations are 1061 * stopped while the system transitions to/from sleep/hibernation. 1062 */ 1063 queue_delayed_work(system_freezable_wq, 1064 dwork, round_jiffies_relative(HZ)); 1065 } 1066 1067 /* 1068 * DEBUGFS Interface 1069 */ 1070 #ifdef CONFIG_DEBUG_FS 1071 1072 static int vmballoon_debug_show(struct seq_file *f, void *offset) 1073 { 1074 struct vmballoon *b = f->private; 1075 struct vmballoon_stats *stats = &b->stats; 1076 1077 /* format capabilities info */ 1078 seq_printf(f, 1079 "balloon capabilities: %#4x\n" 1080 "used capabilities: %#4lx\n" 1081 "is resetting: %c\n", 1082 VMW_BALLOON_CAPABILITIES, b->capabilities, 1083 b->reset_required ? 'y' : 'n'); 1084 1085 /* format size info */ 1086 seq_printf(f, 1087 "target: %8d pages\n" 1088 "current: %8d pages\n", 1089 b->target, b->size); 1090 1091 seq_printf(f, 1092 "\n" 1093 "timer: %8u\n" 1094 "doorbell: %8u\n" 1095 "start: %8u (%4u failed)\n" 1096 "guestType: %8u (%4u failed)\n" 1097 "2m-lock: %8u (%4u failed)\n" 1098 "lock: %8u (%4u failed)\n" 1099 "2m-unlock: %8u (%4u failed)\n" 1100 "unlock: %8u (%4u failed)\n" 1101 "target: %8u (%4u failed)\n" 1102 "prim2mAlloc: %8u (%4u failed)\n" 1103 "primNoSleepAlloc: %8u (%4u failed)\n" 1104 "primCanSleepAlloc: %8u (%4u failed)\n" 1105 "prim2mFree: %8u\n" 1106 "primFree: %8u\n" 1107 "err2mAlloc: %8u\n" 1108 "errAlloc: %8u\n" 1109 "err2mFree: %8u\n" 1110 "errFree: %8u\n" 1111 "doorbellSet: %8u\n" 1112 "doorbellUnset: %8u\n", 1113 stats->timer, 1114 stats->doorbell, 1115 stats->start, stats->start_fail, 1116 stats->guest_type, stats->guest_type_fail, 1117 stats->lock[true], stats->lock_fail[true], 1118 stats->lock[false], stats->lock_fail[false], 1119 stats->unlock[true], stats->unlock_fail[true], 1120 stats->unlock[false], stats->unlock_fail[false], 1121 stats->target, stats->target_fail, 1122 stats->alloc[true], stats->alloc_fail[true], 1123 stats->alloc[false], stats->alloc_fail[false], 1124 stats->sleep_alloc, stats->sleep_alloc_fail, 1125 stats->free[true], 1126 stats->free[false], 1127 stats->refused_alloc[true], stats->refused_alloc[false], 1128 stats->refused_free[true], stats->refused_free[false], 1129 stats->doorbell_set, stats->doorbell_unset); 1130 1131 return 0; 1132 } 1133 1134 static int vmballoon_debug_open(struct inode *inode, struct file *file) 1135 { 1136 return single_open(file, vmballoon_debug_show, inode->i_private); 1137 } 1138 1139 static const struct file_operations vmballoon_debug_fops = { 1140 .owner = THIS_MODULE, 1141 .open = vmballoon_debug_open, 1142 .read = seq_read, 1143 .llseek = seq_lseek, 1144 .release = single_release, 1145 }; 1146 1147 static int __init vmballoon_debugfs_init(struct vmballoon *b) 1148 { 1149 int error; 1150 1151 b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b, 1152 &vmballoon_debug_fops); 1153 if (IS_ERR(b->dbg_entry)) { 1154 error = PTR_ERR(b->dbg_entry); 1155 pr_err("failed to create debugfs entry, error: %d\n", error); 1156 return error; 1157 } 1158 1159 return 0; 1160 } 1161 1162 static void __exit vmballoon_debugfs_exit(struct vmballoon *b) 1163 { 1164 debugfs_remove(b->dbg_entry); 1165 } 1166 1167 #else 1168 1169 static inline int vmballoon_debugfs_init(struct vmballoon *b) 1170 { 1171 return 0; 1172 } 1173 1174 static inline void vmballoon_debugfs_exit(struct vmballoon *b) 1175 { 1176 } 1177 1178 #endif /* CONFIG_DEBUG_FS */ 1179 1180 static int __init vmballoon_init(void) 1181 { 1182 int error; 1183 unsigned is_2m_pages; 1184 /* 1185 * Check if we are running on VMware's hypervisor and bail out 1186 * if we are not. 1187 */ 1188 if (x86_hyper_type != X86_HYPER_VMWARE) 1189 return -ENODEV; 1190 1191 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; 1192 is_2m_pages++) { 1193 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages); 1194 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages); 1195 } 1196 1197 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); 1198 1199 error = vmballoon_debugfs_init(&balloon); 1200 if (error) 1201 return error; 1202 1203 balloon.vmci_doorbell = VMCI_INVALID_HANDLE; 1204 balloon.batch_page = NULL; 1205 balloon.page = NULL; 1206 balloon.reset_required = true; 1207 1208 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); 1209 1210 return 0; 1211 } 1212 1213 /* 1214 * Using late_initcall() instead of module_init() allows the balloon to use the 1215 * VMCI doorbell even when the balloon is built into the kernel. Otherwise the 1216 * VMCI is probed only after the balloon is initialized. If the balloon is used 1217 * as a module, late_initcall() is equivalent to module_init(). 1218 */ 1219 late_initcall(vmballoon_init); 1220 1221 static void __exit vmballoon_exit(void) 1222 { 1223 vmballoon_vmci_cleanup(&balloon); 1224 cancel_delayed_work_sync(&balloon.dwork); 1225 1226 vmballoon_debugfs_exit(&balloon); 1227 1228 /* 1229 * Deallocate all reserved memory, and reset connection with monitor. 1230 * Reset connection before deallocating memory to avoid potential for 1231 * additional spurious resets from guest touching deallocated pages. 1232 */ 1233 vmballoon_send_start(&balloon, 0); 1234 vmballoon_pop(&balloon); 1235 } 1236 module_exit(vmballoon_exit); 1237