1 /* 2 * VMware Balloon driver. 3 * 4 * Copyright (C) 2000-2014, VMware, Inc. All Rights Reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License as published by the 8 * Free Software Foundation; version 2 of the License and no later version. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 13 * NON INFRINGEMENT. See the GNU General Public License for more 14 * details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Maintained by: Xavier Deguillard <xdeguillard@vmware.com> 21 * Philip Moltmann <moltmann@vmware.com> 22 */ 23 24 /* 25 * This is VMware physical memory management driver for Linux. The driver 26 * acts like a "balloon" that can be inflated to reclaim physical pages by 27 * reserving them in the guest and invalidating them in the monitor, 28 * freeing up the underlying machine pages so they can be allocated to 29 * other guests. The balloon can also be deflated to allow the guest to 30 * use more physical memory. Higher level policies can control the sizes 31 * of balloons in VMs in order to manage physical memory resources. 32 */ 33 34 //#define DEBUG 35 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 36 37 #include <linux/types.h> 38 #include <linux/kernel.h> 39 #include <linux/mm.h> 40 #include <linux/vmalloc.h> 41 #include <linux/sched.h> 42 #include <linux/module.h> 43 #include <linux/workqueue.h> 44 #include <linux/debugfs.h> 45 #include <linux/seq_file.h> 46 #include <linux/vmw_vmci_defs.h> 47 #include <linux/vmw_vmci_api.h> 48 #include <asm/hypervisor.h> 49 50 MODULE_AUTHOR("VMware, Inc."); 51 MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); 52 MODULE_VERSION("1.5.0.0-k"); 53 MODULE_ALIAS("dmi:*:svnVMware*:*"); 54 MODULE_ALIAS("vmware_vmmemctl"); 55 MODULE_LICENSE("GPL"); 56 57 /* 58 * Various constants controlling rate of inflaint/deflating balloon, 59 * measured in pages. 60 */ 61 62 /* 63 * Rates of memory allocaton when guest experiences memory pressure 64 * (driver performs sleeping allocations). 65 */ 66 #define VMW_BALLOON_RATE_ALLOC_MIN 512U 67 #define VMW_BALLOON_RATE_ALLOC_MAX 2048U 68 #define VMW_BALLOON_RATE_ALLOC_INC 16U 69 70 /* 71 * When guest is under memory pressure, use a reduced page allocation 72 * rate for next several cycles. 73 */ 74 #define VMW_BALLOON_SLOW_CYCLES 4 75 76 /* 77 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't 78 * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use 79 * __GFP_NOWARN, to suppress page allocation failure warnings. 80 */ 81 #define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN) 82 83 /* 84 * Use GFP_HIGHUSER when executing in a separate kernel thread 85 * context and allocation can sleep. This is less stressful to 86 * the guest memory system, since it allows the thread to block 87 * while memory is reclaimed, and won't take pages from emergency 88 * low-memory pools. 89 */ 90 #define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER) 91 92 /* Maximum number of refused pages we accumulate during inflation cycle */ 93 #define VMW_BALLOON_MAX_REFUSED 16 94 95 /* 96 * Hypervisor communication port definitions. 97 */ 98 #define VMW_BALLOON_HV_PORT 0x5670 99 #define VMW_BALLOON_HV_MAGIC 0x456c6d6f 100 #define VMW_BALLOON_GUEST_ID 1 /* Linux */ 101 102 enum vmwballoon_capabilities { 103 /* 104 * Bit 0 is reserved and not associated to any capability. 105 */ 106 VMW_BALLOON_BASIC_CMDS = (1 << 1), 107 VMW_BALLOON_BATCHED_CMDS = (1 << 2), 108 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3), 109 VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4), 110 }; 111 112 #define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \ 113 | VMW_BALLOON_BATCHED_CMDS \ 114 | VMW_BALLOON_BATCHED_2M_CMDS \ 115 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD) 116 117 #define VMW_BALLOON_2M_SHIFT (9) 118 #define VMW_BALLOON_NUM_PAGE_SIZES (2) 119 120 /* 121 * Backdoor commands availability: 122 * 123 * START, GET_TARGET and GUEST_ID are always available, 124 * 125 * VMW_BALLOON_BASIC_CMDS: 126 * LOCK and UNLOCK commands, 127 * VMW_BALLOON_BATCHED_CMDS: 128 * BATCHED_LOCK and BATCHED_UNLOCK commands. 129 * VMW BALLOON_BATCHED_2M_CMDS: 130 * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands, 131 * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD: 132 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command. 133 */ 134 #define VMW_BALLOON_CMD_START 0 135 #define VMW_BALLOON_CMD_GET_TARGET 1 136 #define VMW_BALLOON_CMD_LOCK 2 137 #define VMW_BALLOON_CMD_UNLOCK 3 138 #define VMW_BALLOON_CMD_GUEST_ID 4 139 #define VMW_BALLOON_CMD_BATCHED_LOCK 6 140 #define VMW_BALLOON_CMD_BATCHED_UNLOCK 7 141 #define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8 142 #define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9 143 #define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10 144 145 146 /* error codes */ 147 #define VMW_BALLOON_SUCCESS 0 148 #define VMW_BALLOON_FAILURE -1 149 #define VMW_BALLOON_ERROR_CMD_INVALID 1 150 #define VMW_BALLOON_ERROR_PPN_INVALID 2 151 #define VMW_BALLOON_ERROR_PPN_LOCKED 3 152 #define VMW_BALLOON_ERROR_PPN_UNLOCKED 4 153 #define VMW_BALLOON_ERROR_PPN_PINNED 5 154 #define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6 155 #define VMW_BALLOON_ERROR_RESET 7 156 #define VMW_BALLOON_ERROR_BUSY 8 157 158 #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000) 159 160 /* Batch page description */ 161 162 /* 163 * Layout of a page in the batch page: 164 * 165 * +-------------+----------+--------+ 166 * | | | | 167 * | Page number | Reserved | Status | 168 * | | | | 169 * +-------------+----------+--------+ 170 * 64 PAGE_SHIFT 6 0 171 * 172 * The reserved field should be set to 0. 173 */ 174 #define VMW_BALLOON_BATCH_MAX_PAGES (PAGE_SIZE / sizeof(u64)) 175 #define VMW_BALLOON_BATCH_STATUS_MASK ((1UL << 5) - 1) 176 #define VMW_BALLOON_BATCH_PAGE_MASK (~((1UL << PAGE_SHIFT) - 1)) 177 178 struct vmballoon_batch_page { 179 u64 pages[VMW_BALLOON_BATCH_MAX_PAGES]; 180 }; 181 182 static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx) 183 { 184 return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK; 185 } 186 187 static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch, 188 int idx) 189 { 190 return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK); 191 } 192 193 static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx, 194 u64 pa) 195 { 196 batch->pages[idx] = pa; 197 } 198 199 200 #define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result) \ 201 ({ \ 202 unsigned long __status, __dummy1, __dummy2, __dummy3; \ 203 __asm__ __volatile__ ("inl %%dx" : \ 204 "=a"(__status), \ 205 "=c"(__dummy1), \ 206 "=d"(__dummy2), \ 207 "=b"(result), \ 208 "=S" (__dummy3) : \ 209 "0"(VMW_BALLOON_HV_MAGIC), \ 210 "1"(VMW_BALLOON_CMD_##cmd), \ 211 "2"(VMW_BALLOON_HV_PORT), \ 212 "3"(arg1), \ 213 "4" (arg2) : \ 214 "memory"); \ 215 if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \ 216 result = __dummy1; \ 217 result &= -1UL; \ 218 __status & -1UL; \ 219 }) 220 221 #ifdef CONFIG_DEBUG_FS 222 struct vmballoon_stats { 223 unsigned int timer; 224 unsigned int doorbell; 225 226 /* allocation statistics */ 227 unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES]; 228 unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES]; 229 unsigned int sleep_alloc; 230 unsigned int sleep_alloc_fail; 231 unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES]; 232 unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES]; 233 unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES]; 234 235 /* monitor operations */ 236 unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES]; 237 unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES]; 238 unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES]; 239 unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES]; 240 unsigned int target; 241 unsigned int target_fail; 242 unsigned int start; 243 unsigned int start_fail; 244 unsigned int guest_type; 245 unsigned int guest_type_fail; 246 unsigned int doorbell_set; 247 unsigned int doorbell_unset; 248 }; 249 250 #define STATS_INC(stat) (stat)++ 251 #else 252 #define STATS_INC(stat) 253 #endif 254 255 struct vmballoon; 256 257 struct vmballoon_ops { 258 void (*add_page)(struct vmballoon *b, int idx, struct page *p); 259 int (*lock)(struct vmballoon *b, unsigned int num_pages, 260 bool is_2m_pages, unsigned int *target); 261 int (*unlock)(struct vmballoon *b, unsigned int num_pages, 262 bool is_2m_pages, unsigned int *target); 263 }; 264 265 struct vmballoon_page_size { 266 /* list of reserved physical pages */ 267 struct list_head pages; 268 269 /* transient list of non-balloonable pages */ 270 struct list_head refused_pages; 271 unsigned int n_refused_pages; 272 }; 273 274 struct vmballoon { 275 struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES]; 276 277 /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */ 278 unsigned supported_page_sizes; 279 280 /* balloon size in pages */ 281 unsigned int size; 282 unsigned int target; 283 284 /* reset flag */ 285 bool reset_required; 286 287 /* adjustment rates (pages per second) */ 288 unsigned int rate_alloc; 289 290 /* slowdown page allocations for next few cycles */ 291 unsigned int slow_allocation_cycles; 292 293 unsigned long capabilities; 294 295 struct vmballoon_batch_page *batch_page; 296 unsigned int batch_max_pages; 297 struct page *page; 298 299 const struct vmballoon_ops *ops; 300 301 #ifdef CONFIG_DEBUG_FS 302 /* statistics */ 303 struct vmballoon_stats stats; 304 305 /* debugfs file exporting statistics */ 306 struct dentry *dbg_entry; 307 #endif 308 309 struct sysinfo sysinfo; 310 311 struct delayed_work dwork; 312 313 struct vmci_handle vmci_doorbell; 314 }; 315 316 static struct vmballoon balloon; 317 318 /* 319 * Send "start" command to the host, communicating supported version 320 * of the protocol. 321 */ 322 static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) 323 { 324 unsigned long status, capabilities, dummy = 0; 325 bool success; 326 327 STATS_INC(b->stats.start); 328 329 status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities); 330 331 switch (status) { 332 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES: 333 b->capabilities = capabilities; 334 success = true; 335 break; 336 case VMW_BALLOON_SUCCESS: 337 b->capabilities = VMW_BALLOON_BASIC_CMDS; 338 success = true; 339 break; 340 default: 341 success = false; 342 } 343 344 if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) 345 b->supported_page_sizes = 2; 346 else 347 b->supported_page_sizes = 1; 348 349 if (!success) { 350 pr_debug("%s - failed, hv returns %ld\n", __func__, status); 351 STATS_INC(b->stats.start_fail); 352 } 353 return success; 354 } 355 356 static bool vmballoon_check_status(struct vmballoon *b, unsigned long status) 357 { 358 switch (status) { 359 case VMW_BALLOON_SUCCESS: 360 return true; 361 362 case VMW_BALLOON_ERROR_RESET: 363 b->reset_required = true; 364 /* fall through */ 365 366 default: 367 return false; 368 } 369 } 370 371 /* 372 * Communicate guest type to the host so that it can adjust ballooning 373 * algorithm to the one most appropriate for the guest. This command 374 * is normally issued after sending "start" command and is part of 375 * standard reset sequence. 376 */ 377 static bool vmballoon_send_guest_id(struct vmballoon *b) 378 { 379 unsigned long status, dummy = 0; 380 381 status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy, 382 dummy); 383 384 STATS_INC(b->stats.guest_type); 385 386 if (vmballoon_check_status(b, status)) 387 return true; 388 389 pr_debug("%s - failed, hv returns %ld\n", __func__, status); 390 STATS_INC(b->stats.guest_type_fail); 391 return false; 392 } 393 394 static u16 vmballoon_page_size(bool is_2m_page) 395 { 396 if (is_2m_page) 397 return 1 << VMW_BALLOON_2M_SHIFT; 398 399 return 1; 400 } 401 402 /* 403 * Retrieve desired balloon size from the host. 404 */ 405 static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target) 406 { 407 unsigned long status; 408 unsigned long target; 409 unsigned long limit; 410 unsigned long dummy = 0; 411 u32 limit32; 412 413 /* 414 * si_meminfo() is cheap. Moreover, we want to provide dynamic 415 * max balloon size later. So let us call si_meminfo() every 416 * iteration. 417 */ 418 si_meminfo(&b->sysinfo); 419 limit = b->sysinfo.totalram; 420 421 /* Ensure limit fits in 32-bits */ 422 limit32 = (u32)limit; 423 if (limit != limit32) 424 return false; 425 426 /* update stats */ 427 STATS_INC(b->stats.target); 428 429 status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target); 430 if (vmballoon_check_status(b, status)) { 431 *new_target = target; 432 return true; 433 } 434 435 pr_debug("%s - failed, hv returns %ld\n", __func__, status); 436 STATS_INC(b->stats.target_fail); 437 return false; 438 } 439 440 /* 441 * Notify the host about allocated page so that host can use it without 442 * fear that guest will need it. Host may reject some pages, we need to 443 * check the return value and maybe submit a different page. 444 */ 445 static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, 446 unsigned int *hv_status, unsigned int *target) 447 { 448 unsigned long status, dummy = 0; 449 u32 pfn32; 450 451 pfn32 = (u32)pfn; 452 if (pfn32 != pfn) 453 return -1; 454 455 STATS_INC(b->stats.lock[false]); 456 457 *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target); 458 if (vmballoon_check_status(b, status)) 459 return 0; 460 461 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); 462 STATS_INC(b->stats.lock_fail[false]); 463 return 1; 464 } 465 466 static int vmballoon_send_batched_lock(struct vmballoon *b, 467 unsigned int num_pages, bool is_2m_pages, unsigned int *target) 468 { 469 unsigned long status; 470 unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page)); 471 472 STATS_INC(b->stats.lock[is_2m_pages]); 473 474 if (is_2m_pages) 475 status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages, 476 *target); 477 else 478 status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages, 479 *target); 480 481 if (vmballoon_check_status(b, status)) 482 return 0; 483 484 pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status); 485 STATS_INC(b->stats.lock_fail[is_2m_pages]); 486 return 1; 487 } 488 489 /* 490 * Notify the host that guest intends to release given page back into 491 * the pool of available (to the guest) pages. 492 */ 493 static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn, 494 unsigned int *target) 495 { 496 unsigned long status, dummy = 0; 497 u32 pfn32; 498 499 pfn32 = (u32)pfn; 500 if (pfn32 != pfn) 501 return false; 502 503 STATS_INC(b->stats.unlock[false]); 504 505 status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target); 506 if (vmballoon_check_status(b, status)) 507 return true; 508 509 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); 510 STATS_INC(b->stats.unlock_fail[false]); 511 return false; 512 } 513 514 static bool vmballoon_send_batched_unlock(struct vmballoon *b, 515 unsigned int num_pages, bool is_2m_pages, unsigned int *target) 516 { 517 unsigned long status; 518 unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page)); 519 520 STATS_INC(b->stats.unlock[is_2m_pages]); 521 522 if (is_2m_pages) 523 status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages, 524 *target); 525 else 526 status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages, 527 *target); 528 529 if (vmballoon_check_status(b, status)) 530 return true; 531 532 pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status); 533 STATS_INC(b->stats.unlock_fail[is_2m_pages]); 534 return false; 535 } 536 537 static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page) 538 { 539 if (is_2m_page) 540 return alloc_pages(flags, VMW_BALLOON_2M_SHIFT); 541 542 return alloc_page(flags); 543 } 544 545 static void vmballoon_free_page(struct page *page, bool is_2m_page) 546 { 547 if (is_2m_page) 548 __free_pages(page, VMW_BALLOON_2M_SHIFT); 549 else 550 __free_page(page); 551 } 552 553 /* 554 * Quickly release all pages allocated for the balloon. This function is 555 * called when host decides to "reset" balloon for one reason or another. 556 * Unlike normal "deflate" we do not (shall not) notify host of the pages 557 * being released. 558 */ 559 static void vmballoon_pop(struct vmballoon *b) 560 { 561 struct page *page, *next; 562 unsigned is_2m_pages; 563 564 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; 565 is_2m_pages++) { 566 struct vmballoon_page_size *page_size = 567 &b->page_sizes[is_2m_pages]; 568 u16 size_per_page = vmballoon_page_size(is_2m_pages); 569 570 list_for_each_entry_safe(page, next, &page_size->pages, lru) { 571 list_del(&page->lru); 572 vmballoon_free_page(page, is_2m_pages); 573 STATS_INC(b->stats.free[is_2m_pages]); 574 b->size -= size_per_page; 575 cond_resched(); 576 } 577 } 578 579 /* Clearing the batch_page unconditionally has no adverse effect */ 580 free_page((unsigned long)b->batch_page); 581 b->batch_page = NULL; 582 } 583 584 /* 585 * Notify the host of a ballooned page. If host rejects the page put it on the 586 * refuse list, those refused page are then released at the end of the 587 * inflation cycle. 588 */ 589 static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages, 590 bool is_2m_pages, unsigned int *target) 591 { 592 int locked, hv_status; 593 struct page *page = b->page; 594 struct vmballoon_page_size *page_size = &b->page_sizes[false]; 595 596 /* is_2m_pages can never happen as 2m pages support implies batching */ 597 598 locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status, 599 target); 600 if (locked > 0) { 601 STATS_INC(b->stats.refused_alloc[false]); 602 603 if (hv_status == VMW_BALLOON_ERROR_RESET || 604 hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) { 605 vmballoon_free_page(page, false); 606 return -EIO; 607 } 608 609 /* 610 * Place page on the list of non-balloonable pages 611 * and retry allocation, unless we already accumulated 612 * too many of them, in which case take a breather. 613 */ 614 if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) { 615 page_size->n_refused_pages++; 616 list_add(&page->lru, &page_size->refused_pages); 617 } else { 618 vmballoon_free_page(page, false); 619 } 620 return -EIO; 621 } 622 623 /* track allocated page */ 624 list_add(&page->lru, &page_size->pages); 625 626 /* update balloon size */ 627 b->size++; 628 629 return 0; 630 } 631 632 static int vmballoon_lock_batched_page(struct vmballoon *b, 633 unsigned int num_pages, bool is_2m_pages, unsigned int *target) 634 { 635 int locked, i; 636 u16 size_per_page = vmballoon_page_size(is_2m_pages); 637 638 locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages, 639 target); 640 if (locked > 0) { 641 for (i = 0; i < num_pages; i++) { 642 u64 pa = vmballoon_batch_get_pa(b->batch_page, i); 643 struct page *p = pfn_to_page(pa >> PAGE_SHIFT); 644 645 vmballoon_free_page(p, is_2m_pages); 646 } 647 648 return -EIO; 649 } 650 651 for (i = 0; i < num_pages; i++) { 652 u64 pa = vmballoon_batch_get_pa(b->batch_page, i); 653 struct page *p = pfn_to_page(pa >> PAGE_SHIFT); 654 struct vmballoon_page_size *page_size = 655 &b->page_sizes[is_2m_pages]; 656 657 locked = vmballoon_batch_get_status(b->batch_page, i); 658 659 switch (locked) { 660 case VMW_BALLOON_SUCCESS: 661 list_add(&p->lru, &page_size->pages); 662 b->size += size_per_page; 663 break; 664 case VMW_BALLOON_ERROR_PPN_PINNED: 665 case VMW_BALLOON_ERROR_PPN_INVALID: 666 if (page_size->n_refused_pages 667 < VMW_BALLOON_MAX_REFUSED) { 668 list_add(&p->lru, &page_size->refused_pages); 669 page_size->n_refused_pages++; 670 break; 671 } 672 /* Fallthrough */ 673 case VMW_BALLOON_ERROR_RESET: 674 case VMW_BALLOON_ERROR_PPN_NOTNEEDED: 675 vmballoon_free_page(p, is_2m_pages); 676 break; 677 default: 678 /* This should never happen */ 679 WARN_ON_ONCE(true); 680 } 681 } 682 683 return 0; 684 } 685 686 /* 687 * Release the page allocated for the balloon. Note that we first notify 688 * the host so it can make sure the page will be available for the guest 689 * to use, if needed. 690 */ 691 static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages, 692 bool is_2m_pages, unsigned int *target) 693 { 694 struct page *page = b->page; 695 struct vmballoon_page_size *page_size = &b->page_sizes[false]; 696 697 /* is_2m_pages can never happen as 2m pages support implies batching */ 698 699 if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) { 700 list_add(&page->lru, &page_size->pages); 701 return -EIO; 702 } 703 704 /* deallocate page */ 705 vmballoon_free_page(page, false); 706 STATS_INC(b->stats.free[false]); 707 708 /* update balloon size */ 709 b->size--; 710 711 return 0; 712 } 713 714 static int vmballoon_unlock_batched_page(struct vmballoon *b, 715 unsigned int num_pages, bool is_2m_pages, 716 unsigned int *target) 717 { 718 int locked, i, ret = 0; 719 bool hv_success; 720 u16 size_per_page = vmballoon_page_size(is_2m_pages); 721 722 hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages, 723 target); 724 if (!hv_success) 725 ret = -EIO; 726 727 for (i = 0; i < num_pages; i++) { 728 u64 pa = vmballoon_batch_get_pa(b->batch_page, i); 729 struct page *p = pfn_to_page(pa >> PAGE_SHIFT); 730 struct vmballoon_page_size *page_size = 731 &b->page_sizes[is_2m_pages]; 732 733 locked = vmballoon_batch_get_status(b->batch_page, i); 734 if (!hv_success || locked != VMW_BALLOON_SUCCESS) { 735 /* 736 * That page wasn't successfully unlocked by the 737 * hypervisor, re-add it to the list of pages owned by 738 * the balloon driver. 739 */ 740 list_add(&p->lru, &page_size->pages); 741 } else { 742 /* deallocate page */ 743 vmballoon_free_page(p, is_2m_pages); 744 STATS_INC(b->stats.free[is_2m_pages]); 745 746 /* update balloon size */ 747 b->size -= size_per_page; 748 } 749 } 750 751 return ret; 752 } 753 754 /* 755 * Release pages that were allocated while attempting to inflate the 756 * balloon but were refused by the host for one reason or another. 757 */ 758 static void vmballoon_release_refused_pages(struct vmballoon *b, 759 bool is_2m_pages) 760 { 761 struct page *page, *next; 762 struct vmballoon_page_size *page_size = 763 &b->page_sizes[is_2m_pages]; 764 765 list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) { 766 list_del(&page->lru); 767 vmballoon_free_page(page, is_2m_pages); 768 STATS_INC(b->stats.refused_free[is_2m_pages]); 769 } 770 771 page_size->n_refused_pages = 0; 772 } 773 774 static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p) 775 { 776 b->page = p; 777 } 778 779 static void vmballoon_add_batched_page(struct vmballoon *b, int idx, 780 struct page *p) 781 { 782 vmballoon_batch_set_pa(b->batch_page, idx, 783 (u64)page_to_pfn(p) << PAGE_SHIFT); 784 } 785 786 /* 787 * Inflate the balloon towards its target size. Note that we try to limit 788 * the rate of allocation to make sure we are not choking the rest of the 789 * system. 790 */ 791 static void vmballoon_inflate(struct vmballoon *b) 792 { 793 unsigned rate; 794 unsigned int allocations = 0; 795 unsigned int num_pages = 0; 796 int error = 0; 797 gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP; 798 bool is_2m_pages; 799 800 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); 801 802 /* 803 * First try NOSLEEP page allocations to inflate balloon. 804 * 805 * If we do not throttle nosleep allocations, we can drain all 806 * free pages in the guest quickly (if the balloon target is high). 807 * As a side-effect, draining free pages helps to inform (force) 808 * the guest to start swapping if balloon target is not met yet, 809 * which is a desired behavior. However, balloon driver can consume 810 * all available CPU cycles if too many pages are allocated in a 811 * second. Therefore, we throttle nosleep allocations even when 812 * the guest is not under memory pressure. OTOH, if we have already 813 * predicted that the guest is under memory pressure, then we 814 * slowdown page allocations considerably. 815 */ 816 817 /* 818 * Start with no sleep allocation rate which may be higher 819 * than sleeping allocation rate. 820 */ 821 if (b->slow_allocation_cycles) { 822 rate = b->rate_alloc; 823 is_2m_pages = false; 824 } else { 825 rate = UINT_MAX; 826 is_2m_pages = 827 b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES; 828 } 829 830 pr_debug("%s - goal: %d, no-sleep rate: %u, sleep rate: %d\n", 831 __func__, b->target - b->size, rate, b->rate_alloc); 832 833 while (!b->reset_required && 834 b->size + num_pages * vmballoon_page_size(is_2m_pages) 835 < b->target) { 836 struct page *page; 837 838 if (flags == VMW_PAGE_ALLOC_NOSLEEP) 839 STATS_INC(b->stats.alloc[is_2m_pages]); 840 else 841 STATS_INC(b->stats.sleep_alloc); 842 843 page = vmballoon_alloc_page(flags, is_2m_pages); 844 if (!page) { 845 STATS_INC(b->stats.alloc_fail[is_2m_pages]); 846 847 if (is_2m_pages) { 848 b->ops->lock(b, num_pages, true, &b->target); 849 850 /* 851 * ignore errors from locking as we now switch 852 * to 4k pages and we might get different 853 * errors. 854 */ 855 856 num_pages = 0; 857 is_2m_pages = false; 858 continue; 859 } 860 861 if (flags == VMW_PAGE_ALLOC_CANSLEEP) { 862 /* 863 * CANSLEEP page allocation failed, so guest 864 * is under severe memory pressure. Quickly 865 * decrease allocation rate. 866 */ 867 b->rate_alloc = max(b->rate_alloc / 2, 868 VMW_BALLOON_RATE_ALLOC_MIN); 869 STATS_INC(b->stats.sleep_alloc_fail); 870 break; 871 } 872 873 /* 874 * NOSLEEP page allocation failed, so the guest is 875 * under memory pressure. Let us slow down page 876 * allocations for next few cycles so that the guest 877 * gets out of memory pressure. Also, if we already 878 * allocated b->rate_alloc pages, let's pause, 879 * otherwise switch to sleeping allocations. 880 */ 881 b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES; 882 883 if (allocations >= b->rate_alloc) 884 break; 885 886 flags = VMW_PAGE_ALLOC_CANSLEEP; 887 /* Lower rate for sleeping allocations. */ 888 rate = b->rate_alloc; 889 continue; 890 } 891 892 b->ops->add_page(b, num_pages++, page); 893 if (num_pages == b->batch_max_pages) { 894 error = b->ops->lock(b, num_pages, is_2m_pages, 895 &b->target); 896 num_pages = 0; 897 if (error) 898 break; 899 } 900 901 cond_resched(); 902 903 if (allocations >= rate) { 904 /* We allocated enough pages, let's take a break. */ 905 break; 906 } 907 } 908 909 if (num_pages > 0) 910 b->ops->lock(b, num_pages, is_2m_pages, &b->target); 911 912 /* 913 * We reached our goal without failures so try increasing 914 * allocation rate. 915 */ 916 if (error == 0 && allocations >= b->rate_alloc) { 917 unsigned int mult = allocations / b->rate_alloc; 918 919 b->rate_alloc = 920 min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC, 921 VMW_BALLOON_RATE_ALLOC_MAX); 922 } 923 924 vmballoon_release_refused_pages(b, true); 925 vmballoon_release_refused_pages(b, false); 926 } 927 928 /* 929 * Decrease the size of the balloon allowing guest to use more memory. 930 */ 931 static void vmballoon_deflate(struct vmballoon *b) 932 { 933 unsigned is_2m_pages; 934 935 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); 936 937 /* free pages to reach target */ 938 for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes; 939 is_2m_pages++) { 940 struct page *page, *next; 941 unsigned int num_pages = 0; 942 struct vmballoon_page_size *page_size = 943 &b->page_sizes[is_2m_pages]; 944 945 list_for_each_entry_safe(page, next, &page_size->pages, lru) { 946 if (b->reset_required || 947 (b->target > 0 && 948 b->size - num_pages 949 * vmballoon_page_size(is_2m_pages) 950 < b->target + vmballoon_page_size(true))) 951 break; 952 953 list_del(&page->lru); 954 b->ops->add_page(b, num_pages++, page); 955 956 if (num_pages == b->batch_max_pages) { 957 int error; 958 959 error = b->ops->unlock(b, num_pages, 960 is_2m_pages, &b->target); 961 num_pages = 0; 962 if (error) 963 return; 964 } 965 966 cond_resched(); 967 } 968 969 if (num_pages > 0) 970 b->ops->unlock(b, num_pages, is_2m_pages, &b->target); 971 } 972 } 973 974 static const struct vmballoon_ops vmballoon_basic_ops = { 975 .add_page = vmballoon_add_page, 976 .lock = vmballoon_lock_page, 977 .unlock = vmballoon_unlock_page 978 }; 979 980 static const struct vmballoon_ops vmballoon_batched_ops = { 981 .add_page = vmballoon_add_batched_page, 982 .lock = vmballoon_lock_batched_page, 983 .unlock = vmballoon_unlock_batched_page 984 }; 985 986 static bool vmballoon_init_batching(struct vmballoon *b) 987 { 988 struct page *page; 989 990 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 991 if (!page) 992 return false; 993 994 b->batch_page = page_address(page); 995 return true; 996 } 997 998 /* 999 * Receive notification and resize balloon 1000 */ 1001 static void vmballoon_doorbell(void *client_data) 1002 { 1003 struct vmballoon *b = client_data; 1004 1005 STATS_INC(b->stats.doorbell); 1006 1007 mod_delayed_work(system_freezable_wq, &b->dwork, 0); 1008 } 1009 1010 /* 1011 * Clean up vmci doorbell 1012 */ 1013 static void vmballoon_vmci_cleanup(struct vmballoon *b) 1014 { 1015 int error; 1016 1017 VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID, 1018 VMCI_INVALID_ID, error); 1019 STATS_INC(b->stats.doorbell_unset); 1020 1021 if (!vmci_handle_is_invalid(b->vmci_doorbell)) { 1022 vmci_doorbell_destroy(b->vmci_doorbell); 1023 b->vmci_doorbell = VMCI_INVALID_HANDLE; 1024 } 1025 } 1026 1027 /* 1028 * Initialize vmci doorbell, to get notified as soon as balloon changes 1029 */ 1030 static int vmballoon_vmci_init(struct vmballoon *b) 1031 { 1032 int error = 0; 1033 1034 if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) != 0) { 1035 error = vmci_doorbell_create(&b->vmci_doorbell, 1036 VMCI_FLAG_DELAYED_CB, 1037 VMCI_PRIVILEGE_FLAG_RESTRICTED, 1038 vmballoon_doorbell, b); 1039 1040 if (error == VMCI_SUCCESS) { 1041 VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, 1042 b->vmci_doorbell.context, 1043 b->vmci_doorbell.resource, error); 1044 STATS_INC(b->stats.doorbell_set); 1045 } 1046 } 1047 1048 if (error != 0) { 1049 vmballoon_vmci_cleanup(b); 1050 1051 return -EIO; 1052 } 1053 1054 return 0; 1055 } 1056 1057 /* 1058 * Perform standard reset sequence by popping the balloon (in case it 1059 * is not empty) and then restarting protocol. This operation normally 1060 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. 1061 */ 1062 static void vmballoon_reset(struct vmballoon *b) 1063 { 1064 int error; 1065 1066 vmballoon_vmci_cleanup(b); 1067 1068 /* free all pages, skipping monitor unlock */ 1069 vmballoon_pop(b); 1070 1071 if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) 1072 return; 1073 1074 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { 1075 b->ops = &vmballoon_batched_ops; 1076 b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES; 1077 if (!vmballoon_init_batching(b)) { 1078 /* 1079 * We failed to initialize batching, inform the monitor 1080 * about it by sending a null capability. 1081 * 1082 * The guest will retry in one second. 1083 */ 1084 vmballoon_send_start(b, 0); 1085 return; 1086 } 1087 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) { 1088 b->ops = &vmballoon_basic_ops; 1089 b->batch_max_pages = 1; 1090 } 1091 1092 b->reset_required = false; 1093 1094 error = vmballoon_vmci_init(b); 1095 if (error) 1096 pr_err("failed to initialize vmci doorbell\n"); 1097 1098 if (!vmballoon_send_guest_id(b)) 1099 pr_err("failed to send guest ID to the host\n"); 1100 } 1101 1102 /* 1103 * Balloon work function: reset protocol, if needed, get the new size and 1104 * adjust balloon as needed. Repeat in 1 sec. 1105 */ 1106 static void vmballoon_work(struct work_struct *work) 1107 { 1108 struct delayed_work *dwork = to_delayed_work(work); 1109 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork); 1110 unsigned int target; 1111 1112 STATS_INC(b->stats.timer); 1113 1114 if (b->reset_required) 1115 vmballoon_reset(b); 1116 1117 if (b->slow_allocation_cycles > 0) 1118 b->slow_allocation_cycles--; 1119 1120 if (!b->reset_required && vmballoon_send_get_target(b, &target)) { 1121 /* update target, adjust size */ 1122 b->target = target; 1123 1124 if (b->size < target) 1125 vmballoon_inflate(b); 1126 else if (target == 0 || 1127 b->size > target + vmballoon_page_size(true)) 1128 vmballoon_deflate(b); 1129 } 1130 1131 /* 1132 * We are using a freezable workqueue so that balloon operations are 1133 * stopped while the system transitions to/from sleep/hibernation. 1134 */ 1135 queue_delayed_work(system_freezable_wq, 1136 dwork, round_jiffies_relative(HZ)); 1137 } 1138 1139 /* 1140 * DEBUGFS Interface 1141 */ 1142 #ifdef CONFIG_DEBUG_FS 1143 1144 static int vmballoon_debug_show(struct seq_file *f, void *offset) 1145 { 1146 struct vmballoon *b = f->private; 1147 struct vmballoon_stats *stats = &b->stats; 1148 1149 /* format capabilities info */ 1150 seq_printf(f, 1151 "balloon capabilities: %#4x\n" 1152 "used capabilities: %#4lx\n" 1153 "is resetting: %c\n", 1154 VMW_BALLOON_CAPABILITIES, b->capabilities, 1155 b->reset_required ? 'y' : 'n'); 1156 1157 /* format size info */ 1158 seq_printf(f, 1159 "target: %8d pages\n" 1160 "current: %8d pages\n", 1161 b->target, b->size); 1162 1163 /* format rate info */ 1164 seq_printf(f, 1165 "rateSleepAlloc: %8d pages/sec\n", 1166 b->rate_alloc); 1167 1168 seq_printf(f, 1169 "\n" 1170 "timer: %8u\n" 1171 "doorbell: %8u\n" 1172 "start: %8u (%4u failed)\n" 1173 "guestType: %8u (%4u failed)\n" 1174 "2m-lock: %8u (%4u failed)\n" 1175 "lock: %8u (%4u failed)\n" 1176 "2m-unlock: %8u (%4u failed)\n" 1177 "unlock: %8u (%4u failed)\n" 1178 "target: %8u (%4u failed)\n" 1179 "prim2mAlloc: %8u (%4u failed)\n" 1180 "primNoSleepAlloc: %8u (%4u failed)\n" 1181 "primCanSleepAlloc: %8u (%4u failed)\n" 1182 "prim2mFree: %8u\n" 1183 "primFree: %8u\n" 1184 "err2mAlloc: %8u\n" 1185 "errAlloc: %8u\n" 1186 "err2mFree: %8u\n" 1187 "errFree: %8u\n" 1188 "doorbellSet: %8u\n" 1189 "doorbellUnset: %8u\n", 1190 stats->timer, 1191 stats->doorbell, 1192 stats->start, stats->start_fail, 1193 stats->guest_type, stats->guest_type_fail, 1194 stats->lock[true], stats->lock_fail[true], 1195 stats->lock[false], stats->lock_fail[false], 1196 stats->unlock[true], stats->unlock_fail[true], 1197 stats->unlock[false], stats->unlock_fail[false], 1198 stats->target, stats->target_fail, 1199 stats->alloc[true], stats->alloc_fail[true], 1200 stats->alloc[false], stats->alloc_fail[false], 1201 stats->sleep_alloc, stats->sleep_alloc_fail, 1202 stats->free[true], 1203 stats->free[false], 1204 stats->refused_alloc[true], stats->refused_alloc[false], 1205 stats->refused_free[true], stats->refused_free[false], 1206 stats->doorbell_set, stats->doorbell_unset); 1207 1208 return 0; 1209 } 1210 1211 static int vmballoon_debug_open(struct inode *inode, struct file *file) 1212 { 1213 return single_open(file, vmballoon_debug_show, inode->i_private); 1214 } 1215 1216 static const struct file_operations vmballoon_debug_fops = { 1217 .owner = THIS_MODULE, 1218 .open = vmballoon_debug_open, 1219 .read = seq_read, 1220 .llseek = seq_lseek, 1221 .release = single_release, 1222 }; 1223 1224 static int __init vmballoon_debugfs_init(struct vmballoon *b) 1225 { 1226 int error; 1227 1228 b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b, 1229 &vmballoon_debug_fops); 1230 if (IS_ERR(b->dbg_entry)) { 1231 error = PTR_ERR(b->dbg_entry); 1232 pr_err("failed to create debugfs entry, error: %d\n", error); 1233 return error; 1234 } 1235 1236 return 0; 1237 } 1238 1239 static void __exit vmballoon_debugfs_exit(struct vmballoon *b) 1240 { 1241 debugfs_remove(b->dbg_entry); 1242 } 1243 1244 #else 1245 1246 static inline int vmballoon_debugfs_init(struct vmballoon *b) 1247 { 1248 return 0; 1249 } 1250 1251 static inline void vmballoon_debugfs_exit(struct vmballoon *b) 1252 { 1253 } 1254 1255 #endif /* CONFIG_DEBUG_FS */ 1256 1257 static int __init vmballoon_init(void) 1258 { 1259 int error; 1260 unsigned is_2m_pages; 1261 /* 1262 * Check if we are running on VMware's hypervisor and bail out 1263 * if we are not. 1264 */ 1265 if (x86_hyper_type != X86_HYPER_VMWARE) 1266 return -ENODEV; 1267 1268 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; 1269 is_2m_pages++) { 1270 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages); 1271 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages); 1272 } 1273 1274 /* initialize rates */ 1275 balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX; 1276 1277 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); 1278 1279 error = vmballoon_debugfs_init(&balloon); 1280 if (error) 1281 return error; 1282 1283 balloon.vmci_doorbell = VMCI_INVALID_HANDLE; 1284 balloon.batch_page = NULL; 1285 balloon.page = NULL; 1286 balloon.reset_required = true; 1287 1288 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); 1289 1290 return 0; 1291 } 1292 module_init(vmballoon_init); 1293 1294 static void __exit vmballoon_exit(void) 1295 { 1296 vmballoon_vmci_cleanup(&balloon); 1297 cancel_delayed_work_sync(&balloon.dwork); 1298 1299 vmballoon_debugfs_exit(&balloon); 1300 1301 /* 1302 * Deallocate all reserved memory, and reset connection with monitor. 1303 * Reset connection before deallocating memory to avoid potential for 1304 * additional spurious resets from guest touching deallocated pages. 1305 */ 1306 vmballoon_send_start(&balloon, 0); 1307 vmballoon_pop(&balloon); 1308 } 1309 module_exit(vmballoon_exit); 1310