1 /* 2 * VMware Balloon driver. 3 * 4 * Copyright (C) 2000-2014, VMware, Inc. All Rights Reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License as published by the 8 * Free Software Foundation; version 2 of the License and no later version. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 13 * NON INFRINGEMENT. See the GNU General Public License for more 14 * details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Maintained by: Xavier Deguillard <xdeguillard@vmware.com> 21 * Philip Moltmann <moltmann@vmware.com> 22 */ 23 24 /* 25 * This is VMware physical memory management driver for Linux. The driver 26 * acts like a "balloon" that can be inflated to reclaim physical pages by 27 * reserving them in the guest and invalidating them in the monitor, 28 * freeing up the underlying machine pages so they can be allocated to 29 * other guests. The balloon can also be deflated to allow the guest to 30 * use more physical memory. Higher level policies can control the sizes 31 * of balloons in VMs in order to manage physical memory resources. 32 */ 33 34 //#define DEBUG 35 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 36 37 #include <linux/types.h> 38 #include <linux/kernel.h> 39 #include <linux/mm.h> 40 #include <linux/vmalloc.h> 41 #include <linux/sched.h> 42 #include <linux/module.h> 43 #include <linux/workqueue.h> 44 #include <linux/debugfs.h> 45 #include <linux/seq_file.h> 46 #include <linux/vmw_vmci_defs.h> 47 #include <linux/vmw_vmci_api.h> 48 #include <asm/hypervisor.h> 49 50 MODULE_AUTHOR("VMware, Inc."); 51 MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); 52 MODULE_VERSION("1.5.0.0-k"); 53 MODULE_ALIAS("dmi:*:svnVMware*:*"); 54 MODULE_ALIAS("vmware_vmmemctl"); 55 MODULE_LICENSE("GPL"); 56 57 /* 58 * Various constants controlling rate of inflaint/deflating balloon, 59 * measured in pages. 60 */ 61 62 /* 63 * Rates of memory allocaton when guest experiences memory pressure 64 * (driver performs sleeping allocations). 65 */ 66 #define VMW_BALLOON_RATE_ALLOC_MIN 512U 67 #define VMW_BALLOON_RATE_ALLOC_MAX 2048U 68 #define VMW_BALLOON_RATE_ALLOC_INC 16U 69 70 /* 71 * When guest is under memory pressure, use a reduced page allocation 72 * rate for next several cycles. 73 */ 74 #define VMW_BALLOON_SLOW_CYCLES 4 75 76 /* 77 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't 78 * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use 79 * __GFP_NOWARN, to suppress page allocation failure warnings. 80 */ 81 #define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN) 82 83 /* 84 * Use GFP_HIGHUSER when executing in a separate kernel thread 85 * context and allocation can sleep. This is less stressful to 86 * the guest memory system, since it allows the thread to block 87 * while memory is reclaimed, and won't take pages from emergency 88 * low-memory pools. 89 */ 90 #define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER) 91 92 /* Maximum number of refused pages we accumulate during inflation cycle */ 93 #define VMW_BALLOON_MAX_REFUSED 16 94 95 /* 96 * Hypervisor communication port definitions. 97 */ 98 #define VMW_BALLOON_HV_PORT 0x5670 99 #define VMW_BALLOON_HV_MAGIC 0x456c6d6f 100 #define VMW_BALLOON_GUEST_ID 1 /* Linux */ 101 102 enum vmwballoon_capabilities { 103 /* 104 * Bit 0 is reserved and not associated to any capability. 105 */ 106 VMW_BALLOON_BASIC_CMDS = (1 << 1), 107 VMW_BALLOON_BATCHED_CMDS = (1 << 2), 108 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3), 109 VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4), 110 }; 111 112 #define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \ 113 | VMW_BALLOON_BATCHED_CMDS \ 114 | VMW_BALLOON_BATCHED_2M_CMDS \ 115 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD) 116 117 #define VMW_BALLOON_2M_SHIFT (9) 118 #define VMW_BALLOON_NUM_PAGE_SIZES (2) 119 120 /* 121 * Backdoor commands availability: 122 * 123 * START, GET_TARGET and GUEST_ID are always available, 124 * 125 * VMW_BALLOON_BASIC_CMDS: 126 * LOCK and UNLOCK commands, 127 * VMW_BALLOON_BATCHED_CMDS: 128 * BATCHED_LOCK and BATCHED_UNLOCK commands. 129 * VMW BALLOON_BATCHED_2M_CMDS: 130 * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands, 131 * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD: 132 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command. 133 */ 134 #define VMW_BALLOON_CMD_START 0 135 #define VMW_BALLOON_CMD_GET_TARGET 1 136 #define VMW_BALLOON_CMD_LOCK 2 137 #define VMW_BALLOON_CMD_UNLOCK 3 138 #define VMW_BALLOON_CMD_GUEST_ID 4 139 #define VMW_BALLOON_CMD_BATCHED_LOCK 6 140 #define VMW_BALLOON_CMD_BATCHED_UNLOCK 7 141 #define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8 142 #define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9 143 #define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10 144 145 146 /* error codes */ 147 #define VMW_BALLOON_SUCCESS 0 148 #define VMW_BALLOON_FAILURE -1 149 #define VMW_BALLOON_ERROR_CMD_INVALID 1 150 #define VMW_BALLOON_ERROR_PPN_INVALID 2 151 #define VMW_BALLOON_ERROR_PPN_LOCKED 3 152 #define VMW_BALLOON_ERROR_PPN_UNLOCKED 4 153 #define VMW_BALLOON_ERROR_PPN_PINNED 5 154 #define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6 155 #define VMW_BALLOON_ERROR_RESET 7 156 #define VMW_BALLOON_ERROR_BUSY 8 157 158 #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000) 159 160 /* Batch page description */ 161 162 /* 163 * Layout of a page in the batch page: 164 * 165 * +-------------+----------+--------+ 166 * | | | | 167 * | Page number | Reserved | Status | 168 * | | | | 169 * +-------------+----------+--------+ 170 * 64 PAGE_SHIFT 6 0 171 * 172 * The reserved field should be set to 0. 173 */ 174 #define VMW_BALLOON_BATCH_MAX_PAGES (PAGE_SIZE / sizeof(u64)) 175 #define VMW_BALLOON_BATCH_STATUS_MASK ((1UL << 5) - 1) 176 #define VMW_BALLOON_BATCH_PAGE_MASK (~((1UL << PAGE_SHIFT) - 1)) 177 178 struct vmballoon_batch_page { 179 u64 pages[VMW_BALLOON_BATCH_MAX_PAGES]; 180 }; 181 182 static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx) 183 { 184 return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK; 185 } 186 187 static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch, 188 int idx) 189 { 190 return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK); 191 } 192 193 static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx, 194 u64 pa) 195 { 196 batch->pages[idx] = pa; 197 } 198 199 200 #define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result) \ 201 ({ \ 202 unsigned long __status, __dummy1, __dummy2, __dummy3; \ 203 __asm__ __volatile__ ("inl %%dx" : \ 204 "=a"(__status), \ 205 "=c"(__dummy1), \ 206 "=d"(__dummy2), \ 207 "=b"(result), \ 208 "=S" (__dummy3) : \ 209 "0"(VMW_BALLOON_HV_MAGIC), \ 210 "1"(VMW_BALLOON_CMD_##cmd), \ 211 "2"(VMW_BALLOON_HV_PORT), \ 212 "3"(arg1), \ 213 "4" (arg2) : \ 214 "memory"); \ 215 if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \ 216 result = __dummy1; \ 217 result &= -1UL; \ 218 __status & -1UL; \ 219 }) 220 221 #ifdef CONFIG_DEBUG_FS 222 struct vmballoon_stats { 223 unsigned int timer; 224 unsigned int doorbell; 225 226 /* allocation statistics */ 227 unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES]; 228 unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES]; 229 unsigned int sleep_alloc; 230 unsigned int sleep_alloc_fail; 231 unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES]; 232 unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES]; 233 unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES]; 234 235 /* monitor operations */ 236 unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES]; 237 unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES]; 238 unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES]; 239 unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES]; 240 unsigned int target; 241 unsigned int target_fail; 242 unsigned int start; 243 unsigned int start_fail; 244 unsigned int guest_type; 245 unsigned int guest_type_fail; 246 unsigned int doorbell_set; 247 unsigned int doorbell_unset; 248 }; 249 250 #define STATS_INC(stat) (stat)++ 251 #else 252 #define STATS_INC(stat) 253 #endif 254 255 struct vmballoon; 256 257 struct vmballoon_ops { 258 void (*add_page)(struct vmballoon *b, int idx, struct page *p); 259 int (*lock)(struct vmballoon *b, unsigned int num_pages, 260 bool is_2m_pages, unsigned int *target); 261 int (*unlock)(struct vmballoon *b, unsigned int num_pages, 262 bool is_2m_pages, unsigned int *target); 263 }; 264 265 struct vmballoon_page_size { 266 /* list of reserved physical pages */ 267 struct list_head pages; 268 269 /* transient list of non-balloonable pages */ 270 struct list_head refused_pages; 271 unsigned int n_refused_pages; 272 }; 273 274 struct vmballoon { 275 struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES]; 276 277 /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */ 278 unsigned supported_page_sizes; 279 280 /* balloon size in pages */ 281 unsigned int size; 282 unsigned int target; 283 284 /* reset flag */ 285 bool reset_required; 286 287 /* adjustment rates (pages per second) */ 288 unsigned int rate_alloc; 289 290 /* slowdown page allocations for next few cycles */ 291 unsigned int slow_allocation_cycles; 292 293 unsigned long capabilities; 294 295 struct vmballoon_batch_page *batch_page; 296 unsigned int batch_max_pages; 297 struct page *page; 298 299 const struct vmballoon_ops *ops; 300 301 #ifdef CONFIG_DEBUG_FS 302 /* statistics */ 303 struct vmballoon_stats stats; 304 305 /* debugfs file exporting statistics */ 306 struct dentry *dbg_entry; 307 #endif 308 309 struct sysinfo sysinfo; 310 311 struct delayed_work dwork; 312 313 struct vmci_handle vmci_doorbell; 314 }; 315 316 static struct vmballoon balloon; 317 318 /* 319 * Send "start" command to the host, communicating supported version 320 * of the protocol. 321 */ 322 static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) 323 { 324 unsigned long status, capabilities, dummy = 0; 325 bool success; 326 327 STATS_INC(b->stats.start); 328 329 status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities); 330 331 switch (status) { 332 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES: 333 b->capabilities = capabilities; 334 success = true; 335 break; 336 case VMW_BALLOON_SUCCESS: 337 b->capabilities = VMW_BALLOON_BASIC_CMDS; 338 success = true; 339 break; 340 default: 341 success = false; 342 } 343 344 if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) 345 b->supported_page_sizes = 2; 346 else 347 b->supported_page_sizes = 1; 348 349 if (!success) { 350 pr_debug("%s - failed, hv returns %ld\n", __func__, status); 351 STATS_INC(b->stats.start_fail); 352 } 353 return success; 354 } 355 356 static bool vmballoon_check_status(struct vmballoon *b, unsigned long status) 357 { 358 switch (status) { 359 case VMW_BALLOON_SUCCESS: 360 return true; 361 362 case VMW_BALLOON_ERROR_RESET: 363 b->reset_required = true; 364 /* fall through */ 365 366 default: 367 return false; 368 } 369 } 370 371 /* 372 * Communicate guest type to the host so that it can adjust ballooning 373 * algorithm to the one most appropriate for the guest. This command 374 * is normally issued after sending "start" command and is part of 375 * standard reset sequence. 376 */ 377 static bool vmballoon_send_guest_id(struct vmballoon *b) 378 { 379 unsigned long status, dummy = 0; 380 381 status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy, 382 dummy); 383 384 STATS_INC(b->stats.guest_type); 385 386 if (vmballoon_check_status(b, status)) 387 return true; 388 389 pr_debug("%s - failed, hv returns %ld\n", __func__, status); 390 STATS_INC(b->stats.guest_type_fail); 391 return false; 392 } 393 394 static u16 vmballoon_page_size(bool is_2m_page) 395 { 396 if (is_2m_page) 397 return 1 << VMW_BALLOON_2M_SHIFT; 398 399 return 1; 400 } 401 402 /* 403 * Retrieve desired balloon size from the host. 404 */ 405 static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target) 406 { 407 unsigned long status; 408 unsigned long target; 409 unsigned long limit; 410 unsigned long dummy = 0; 411 u32 limit32; 412 413 /* 414 * si_meminfo() is cheap. Moreover, we want to provide dynamic 415 * max balloon size later. So let us call si_meminfo() every 416 * iteration. 417 */ 418 si_meminfo(&b->sysinfo); 419 limit = b->sysinfo.totalram; 420 421 /* Ensure limit fits in 32-bits */ 422 limit32 = (u32)limit; 423 if (limit != limit32) 424 return false; 425 426 /* update stats */ 427 STATS_INC(b->stats.target); 428 429 status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target); 430 if (vmballoon_check_status(b, status)) { 431 *new_target = target; 432 return true; 433 } 434 435 pr_debug("%s - failed, hv returns %ld\n", __func__, status); 436 STATS_INC(b->stats.target_fail); 437 return false; 438 } 439 440 /* 441 * Notify the host about allocated page so that host can use it without 442 * fear that guest will need it. Host may reject some pages, we need to 443 * check the return value and maybe submit a different page. 444 */ 445 static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, 446 unsigned int *hv_status, unsigned int *target) 447 { 448 unsigned long status, dummy = 0; 449 u32 pfn32; 450 451 pfn32 = (u32)pfn; 452 if (pfn32 != pfn) 453 return -1; 454 455 STATS_INC(b->stats.lock[false]); 456 457 *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target); 458 if (vmballoon_check_status(b, status)) 459 return 0; 460 461 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); 462 STATS_INC(b->stats.lock_fail[false]); 463 return 1; 464 } 465 466 static int vmballoon_send_batched_lock(struct vmballoon *b, 467 unsigned int num_pages, bool is_2m_pages, unsigned int *target) 468 { 469 unsigned long status; 470 unsigned long pfn = page_to_pfn(b->page); 471 472 STATS_INC(b->stats.lock[is_2m_pages]); 473 474 if (is_2m_pages) 475 status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages, 476 *target); 477 else 478 status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages, 479 *target); 480 481 if (vmballoon_check_status(b, status)) 482 return 0; 483 484 pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status); 485 STATS_INC(b->stats.lock_fail[is_2m_pages]); 486 return 1; 487 } 488 489 /* 490 * Notify the host that guest intends to release given page back into 491 * the pool of available (to the guest) pages. 492 */ 493 static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn, 494 unsigned int *target) 495 { 496 unsigned long status, dummy = 0; 497 u32 pfn32; 498 499 pfn32 = (u32)pfn; 500 if (pfn32 != pfn) 501 return false; 502 503 STATS_INC(b->stats.unlock[false]); 504 505 status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target); 506 if (vmballoon_check_status(b, status)) 507 return true; 508 509 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); 510 STATS_INC(b->stats.unlock_fail[false]); 511 return false; 512 } 513 514 static bool vmballoon_send_batched_unlock(struct vmballoon *b, 515 unsigned int num_pages, bool is_2m_pages, unsigned int *target) 516 { 517 unsigned long status; 518 unsigned long pfn = page_to_pfn(b->page); 519 520 STATS_INC(b->stats.unlock[is_2m_pages]); 521 522 if (is_2m_pages) 523 status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages, 524 *target); 525 else 526 status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages, 527 *target); 528 529 if (vmballoon_check_status(b, status)) 530 return true; 531 532 pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status); 533 STATS_INC(b->stats.unlock_fail[is_2m_pages]); 534 return false; 535 } 536 537 static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page) 538 { 539 if (is_2m_page) 540 return alloc_pages(flags, VMW_BALLOON_2M_SHIFT); 541 542 return alloc_page(flags); 543 } 544 545 static void vmballoon_free_page(struct page *page, bool is_2m_page) 546 { 547 if (is_2m_page) 548 __free_pages(page, VMW_BALLOON_2M_SHIFT); 549 else 550 __free_page(page); 551 } 552 553 /* 554 * Quickly release all pages allocated for the balloon. This function is 555 * called when host decides to "reset" balloon for one reason or another. 556 * Unlike normal "deflate" we do not (shall not) notify host of the pages 557 * being released. 558 */ 559 static void vmballoon_pop(struct vmballoon *b) 560 { 561 struct page *page, *next; 562 unsigned is_2m_pages; 563 564 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; 565 is_2m_pages++) { 566 struct vmballoon_page_size *page_size = 567 &b->page_sizes[is_2m_pages]; 568 u16 size_per_page = vmballoon_page_size(is_2m_pages); 569 570 list_for_each_entry_safe(page, next, &page_size->pages, lru) { 571 list_del(&page->lru); 572 vmballoon_free_page(page, is_2m_pages); 573 STATS_INC(b->stats.free[is_2m_pages]); 574 b->size -= size_per_page; 575 cond_resched(); 576 } 577 } 578 579 if (b->batch_page) { 580 vunmap(b->batch_page); 581 b->batch_page = NULL; 582 } 583 584 if (b->page) { 585 __free_page(b->page); 586 b->page = NULL; 587 } 588 } 589 590 /* 591 * Notify the host of a ballooned page. If host rejects the page put it on the 592 * refuse list, those refused page are then released at the end of the 593 * inflation cycle. 594 */ 595 static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages, 596 bool is_2m_pages, unsigned int *target) 597 { 598 int locked, hv_status; 599 struct page *page = b->page; 600 struct vmballoon_page_size *page_size = &b->page_sizes[false]; 601 602 /* is_2m_pages can never happen as 2m pages support implies batching */ 603 604 locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status, 605 target); 606 if (locked > 0) { 607 STATS_INC(b->stats.refused_alloc[false]); 608 609 if (hv_status == VMW_BALLOON_ERROR_RESET || 610 hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) { 611 vmballoon_free_page(page, false); 612 return -EIO; 613 } 614 615 /* 616 * Place page on the list of non-balloonable pages 617 * and retry allocation, unless we already accumulated 618 * too many of them, in which case take a breather. 619 */ 620 if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) { 621 page_size->n_refused_pages++; 622 list_add(&page->lru, &page_size->refused_pages); 623 } else { 624 vmballoon_free_page(page, false); 625 } 626 return -EIO; 627 } 628 629 /* track allocated page */ 630 list_add(&page->lru, &page_size->pages); 631 632 /* update balloon size */ 633 b->size++; 634 635 return 0; 636 } 637 638 static int vmballoon_lock_batched_page(struct vmballoon *b, 639 unsigned int num_pages, bool is_2m_pages, unsigned int *target) 640 { 641 int locked, i; 642 u16 size_per_page = vmballoon_page_size(is_2m_pages); 643 644 locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages, 645 target); 646 if (locked > 0) { 647 for (i = 0; i < num_pages; i++) { 648 u64 pa = vmballoon_batch_get_pa(b->batch_page, i); 649 struct page *p = pfn_to_page(pa >> PAGE_SHIFT); 650 651 vmballoon_free_page(p, is_2m_pages); 652 } 653 654 return -EIO; 655 } 656 657 for (i = 0; i < num_pages; i++) { 658 u64 pa = vmballoon_batch_get_pa(b->batch_page, i); 659 struct page *p = pfn_to_page(pa >> PAGE_SHIFT); 660 struct vmballoon_page_size *page_size = 661 &b->page_sizes[is_2m_pages]; 662 663 locked = vmballoon_batch_get_status(b->batch_page, i); 664 665 switch (locked) { 666 case VMW_BALLOON_SUCCESS: 667 list_add(&p->lru, &page_size->pages); 668 b->size += size_per_page; 669 break; 670 case VMW_BALLOON_ERROR_PPN_PINNED: 671 case VMW_BALLOON_ERROR_PPN_INVALID: 672 if (page_size->n_refused_pages 673 < VMW_BALLOON_MAX_REFUSED) { 674 list_add(&p->lru, &page_size->refused_pages); 675 page_size->n_refused_pages++; 676 break; 677 } 678 /* Fallthrough */ 679 case VMW_BALLOON_ERROR_RESET: 680 case VMW_BALLOON_ERROR_PPN_NOTNEEDED: 681 vmballoon_free_page(p, is_2m_pages); 682 break; 683 default: 684 /* This should never happen */ 685 WARN_ON_ONCE(true); 686 } 687 } 688 689 return 0; 690 } 691 692 /* 693 * Release the page allocated for the balloon. Note that we first notify 694 * the host so it can make sure the page will be available for the guest 695 * to use, if needed. 696 */ 697 static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages, 698 bool is_2m_pages, unsigned int *target) 699 { 700 struct page *page = b->page; 701 struct vmballoon_page_size *page_size = &b->page_sizes[false]; 702 703 /* is_2m_pages can never happen as 2m pages support implies batching */ 704 705 if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) { 706 list_add(&page->lru, &page_size->pages); 707 return -EIO; 708 } 709 710 /* deallocate page */ 711 vmballoon_free_page(page, false); 712 STATS_INC(b->stats.free[false]); 713 714 /* update balloon size */ 715 b->size--; 716 717 return 0; 718 } 719 720 static int vmballoon_unlock_batched_page(struct vmballoon *b, 721 unsigned int num_pages, bool is_2m_pages, 722 unsigned int *target) 723 { 724 int locked, i, ret = 0; 725 bool hv_success; 726 u16 size_per_page = vmballoon_page_size(is_2m_pages); 727 728 hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages, 729 target); 730 if (!hv_success) 731 ret = -EIO; 732 733 for (i = 0; i < num_pages; i++) { 734 u64 pa = vmballoon_batch_get_pa(b->batch_page, i); 735 struct page *p = pfn_to_page(pa >> PAGE_SHIFT); 736 struct vmballoon_page_size *page_size = 737 &b->page_sizes[is_2m_pages]; 738 739 locked = vmballoon_batch_get_status(b->batch_page, i); 740 if (!hv_success || locked != VMW_BALLOON_SUCCESS) { 741 /* 742 * That page wasn't successfully unlocked by the 743 * hypervisor, re-add it to the list of pages owned by 744 * the balloon driver. 745 */ 746 list_add(&p->lru, &page_size->pages); 747 } else { 748 /* deallocate page */ 749 vmballoon_free_page(p, is_2m_pages); 750 STATS_INC(b->stats.free[is_2m_pages]); 751 752 /* update balloon size */ 753 b->size -= size_per_page; 754 } 755 } 756 757 return ret; 758 } 759 760 /* 761 * Release pages that were allocated while attempting to inflate the 762 * balloon but were refused by the host for one reason or another. 763 */ 764 static void vmballoon_release_refused_pages(struct vmballoon *b, 765 bool is_2m_pages) 766 { 767 struct page *page, *next; 768 struct vmballoon_page_size *page_size = 769 &b->page_sizes[is_2m_pages]; 770 771 list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) { 772 list_del(&page->lru); 773 vmballoon_free_page(page, is_2m_pages); 774 STATS_INC(b->stats.refused_free[is_2m_pages]); 775 } 776 777 page_size->n_refused_pages = 0; 778 } 779 780 static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p) 781 { 782 b->page = p; 783 } 784 785 static void vmballoon_add_batched_page(struct vmballoon *b, int idx, 786 struct page *p) 787 { 788 vmballoon_batch_set_pa(b->batch_page, idx, 789 (u64)page_to_pfn(p) << PAGE_SHIFT); 790 } 791 792 /* 793 * Inflate the balloon towards its target size. Note that we try to limit 794 * the rate of allocation to make sure we are not choking the rest of the 795 * system. 796 */ 797 static void vmballoon_inflate(struct vmballoon *b) 798 { 799 unsigned rate; 800 unsigned int allocations = 0; 801 unsigned int num_pages = 0; 802 int error = 0; 803 gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP; 804 bool is_2m_pages; 805 806 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); 807 808 /* 809 * First try NOSLEEP page allocations to inflate balloon. 810 * 811 * If we do not throttle nosleep allocations, we can drain all 812 * free pages in the guest quickly (if the balloon target is high). 813 * As a side-effect, draining free pages helps to inform (force) 814 * the guest to start swapping if balloon target is not met yet, 815 * which is a desired behavior. However, balloon driver can consume 816 * all available CPU cycles if too many pages are allocated in a 817 * second. Therefore, we throttle nosleep allocations even when 818 * the guest is not under memory pressure. OTOH, if we have already 819 * predicted that the guest is under memory pressure, then we 820 * slowdown page allocations considerably. 821 */ 822 823 /* 824 * Start with no sleep allocation rate which may be higher 825 * than sleeping allocation rate. 826 */ 827 if (b->slow_allocation_cycles) { 828 rate = b->rate_alloc; 829 is_2m_pages = false; 830 } else { 831 rate = UINT_MAX; 832 is_2m_pages = 833 b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES; 834 } 835 836 pr_debug("%s - goal: %d, no-sleep rate: %u, sleep rate: %d\n", 837 __func__, b->target - b->size, rate, b->rate_alloc); 838 839 while (!b->reset_required && 840 b->size + num_pages * vmballoon_page_size(is_2m_pages) 841 < b->target) { 842 struct page *page; 843 844 if (flags == VMW_PAGE_ALLOC_NOSLEEP) 845 STATS_INC(b->stats.alloc[is_2m_pages]); 846 else 847 STATS_INC(b->stats.sleep_alloc); 848 849 page = vmballoon_alloc_page(flags, is_2m_pages); 850 if (!page) { 851 STATS_INC(b->stats.alloc_fail[is_2m_pages]); 852 853 if (is_2m_pages) { 854 b->ops->lock(b, num_pages, true, &b->target); 855 856 /* 857 * ignore errors from locking as we now switch 858 * to 4k pages and we might get different 859 * errors. 860 */ 861 862 num_pages = 0; 863 is_2m_pages = false; 864 continue; 865 } 866 867 if (flags == VMW_PAGE_ALLOC_CANSLEEP) { 868 /* 869 * CANSLEEP page allocation failed, so guest 870 * is under severe memory pressure. Quickly 871 * decrease allocation rate. 872 */ 873 b->rate_alloc = max(b->rate_alloc / 2, 874 VMW_BALLOON_RATE_ALLOC_MIN); 875 STATS_INC(b->stats.sleep_alloc_fail); 876 break; 877 } 878 879 /* 880 * NOSLEEP page allocation failed, so the guest is 881 * under memory pressure. Let us slow down page 882 * allocations for next few cycles so that the guest 883 * gets out of memory pressure. Also, if we already 884 * allocated b->rate_alloc pages, let's pause, 885 * otherwise switch to sleeping allocations. 886 */ 887 b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES; 888 889 if (allocations >= b->rate_alloc) 890 break; 891 892 flags = VMW_PAGE_ALLOC_CANSLEEP; 893 /* Lower rate for sleeping allocations. */ 894 rate = b->rate_alloc; 895 continue; 896 } 897 898 b->ops->add_page(b, num_pages++, page); 899 if (num_pages == b->batch_max_pages) { 900 error = b->ops->lock(b, num_pages, is_2m_pages, 901 &b->target); 902 num_pages = 0; 903 if (error) 904 break; 905 } 906 907 cond_resched(); 908 909 if (allocations >= rate) { 910 /* We allocated enough pages, let's take a break. */ 911 break; 912 } 913 } 914 915 if (num_pages > 0) 916 b->ops->lock(b, num_pages, is_2m_pages, &b->target); 917 918 /* 919 * We reached our goal without failures so try increasing 920 * allocation rate. 921 */ 922 if (error == 0 && allocations >= b->rate_alloc) { 923 unsigned int mult = allocations / b->rate_alloc; 924 925 b->rate_alloc = 926 min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC, 927 VMW_BALLOON_RATE_ALLOC_MAX); 928 } 929 930 vmballoon_release_refused_pages(b, true); 931 vmballoon_release_refused_pages(b, false); 932 } 933 934 /* 935 * Decrease the size of the balloon allowing guest to use more memory. 936 */ 937 static void vmballoon_deflate(struct vmballoon *b) 938 { 939 unsigned is_2m_pages; 940 941 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); 942 943 /* free pages to reach target */ 944 for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes; 945 is_2m_pages++) { 946 struct page *page, *next; 947 unsigned int num_pages = 0; 948 struct vmballoon_page_size *page_size = 949 &b->page_sizes[is_2m_pages]; 950 951 list_for_each_entry_safe(page, next, &page_size->pages, lru) { 952 if (b->reset_required || 953 (b->target > 0 && 954 b->size - num_pages 955 * vmballoon_page_size(is_2m_pages) 956 < b->target + vmballoon_page_size(true))) 957 break; 958 959 list_del(&page->lru); 960 b->ops->add_page(b, num_pages++, page); 961 962 if (num_pages == b->batch_max_pages) { 963 int error; 964 965 error = b->ops->unlock(b, num_pages, 966 is_2m_pages, &b->target); 967 num_pages = 0; 968 if (error) 969 return; 970 } 971 972 cond_resched(); 973 } 974 975 if (num_pages > 0) 976 b->ops->unlock(b, num_pages, is_2m_pages, &b->target); 977 } 978 } 979 980 static const struct vmballoon_ops vmballoon_basic_ops = { 981 .add_page = vmballoon_add_page, 982 .lock = vmballoon_lock_page, 983 .unlock = vmballoon_unlock_page 984 }; 985 986 static const struct vmballoon_ops vmballoon_batched_ops = { 987 .add_page = vmballoon_add_batched_page, 988 .lock = vmballoon_lock_batched_page, 989 .unlock = vmballoon_unlock_batched_page 990 }; 991 992 static bool vmballoon_init_batching(struct vmballoon *b) 993 { 994 b->page = alloc_page(VMW_PAGE_ALLOC_NOSLEEP); 995 if (!b->page) 996 return false; 997 998 b->batch_page = vmap(&b->page, 1, VM_MAP, PAGE_KERNEL); 999 if (!b->batch_page) { 1000 __free_page(b->page); 1001 return false; 1002 } 1003 1004 return true; 1005 } 1006 1007 /* 1008 * Receive notification and resize balloon 1009 */ 1010 static void vmballoon_doorbell(void *client_data) 1011 { 1012 struct vmballoon *b = client_data; 1013 1014 STATS_INC(b->stats.doorbell); 1015 1016 mod_delayed_work(system_freezable_wq, &b->dwork, 0); 1017 } 1018 1019 /* 1020 * Clean up vmci doorbell 1021 */ 1022 static void vmballoon_vmci_cleanup(struct vmballoon *b) 1023 { 1024 int error; 1025 1026 VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID, 1027 VMCI_INVALID_ID, error); 1028 STATS_INC(b->stats.doorbell_unset); 1029 1030 if (!vmci_handle_is_invalid(b->vmci_doorbell)) { 1031 vmci_doorbell_destroy(b->vmci_doorbell); 1032 b->vmci_doorbell = VMCI_INVALID_HANDLE; 1033 } 1034 } 1035 1036 /* 1037 * Initialize vmci doorbell, to get notified as soon as balloon changes 1038 */ 1039 static int vmballoon_vmci_init(struct vmballoon *b) 1040 { 1041 int error = 0; 1042 1043 if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) != 0) { 1044 error = vmci_doorbell_create(&b->vmci_doorbell, 1045 VMCI_FLAG_DELAYED_CB, 1046 VMCI_PRIVILEGE_FLAG_RESTRICTED, 1047 vmballoon_doorbell, b); 1048 1049 if (error == VMCI_SUCCESS) { 1050 VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, 1051 b->vmci_doorbell.context, 1052 b->vmci_doorbell.resource, error); 1053 STATS_INC(b->stats.doorbell_set); 1054 } 1055 } 1056 1057 if (error != 0) { 1058 vmballoon_vmci_cleanup(b); 1059 1060 return -EIO; 1061 } 1062 1063 return 0; 1064 } 1065 1066 /* 1067 * Perform standard reset sequence by popping the balloon (in case it 1068 * is not empty) and then restarting protocol. This operation normally 1069 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. 1070 */ 1071 static void vmballoon_reset(struct vmballoon *b) 1072 { 1073 int error; 1074 1075 vmballoon_vmci_cleanup(b); 1076 1077 /* free all pages, skipping monitor unlock */ 1078 vmballoon_pop(b); 1079 1080 if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) 1081 return; 1082 1083 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { 1084 b->ops = &vmballoon_batched_ops; 1085 b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES; 1086 if (!vmballoon_init_batching(b)) { 1087 /* 1088 * We failed to initialize batching, inform the monitor 1089 * about it by sending a null capability. 1090 * 1091 * The guest will retry in one second. 1092 */ 1093 vmballoon_send_start(b, 0); 1094 return; 1095 } 1096 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) { 1097 b->ops = &vmballoon_basic_ops; 1098 b->batch_max_pages = 1; 1099 } 1100 1101 b->reset_required = false; 1102 1103 error = vmballoon_vmci_init(b); 1104 if (error) 1105 pr_err("failed to initialize vmci doorbell\n"); 1106 1107 if (!vmballoon_send_guest_id(b)) 1108 pr_err("failed to send guest ID to the host\n"); 1109 } 1110 1111 /* 1112 * Balloon work function: reset protocol, if needed, get the new size and 1113 * adjust balloon as needed. Repeat in 1 sec. 1114 */ 1115 static void vmballoon_work(struct work_struct *work) 1116 { 1117 struct delayed_work *dwork = to_delayed_work(work); 1118 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork); 1119 unsigned int target; 1120 1121 STATS_INC(b->stats.timer); 1122 1123 if (b->reset_required) 1124 vmballoon_reset(b); 1125 1126 if (b->slow_allocation_cycles > 0) 1127 b->slow_allocation_cycles--; 1128 1129 if (!b->reset_required && vmballoon_send_get_target(b, &target)) { 1130 /* update target, adjust size */ 1131 b->target = target; 1132 1133 if (b->size < target) 1134 vmballoon_inflate(b); 1135 else if (target == 0 || 1136 b->size > target + vmballoon_page_size(true)) 1137 vmballoon_deflate(b); 1138 } 1139 1140 /* 1141 * We are using a freezable workqueue so that balloon operations are 1142 * stopped while the system transitions to/from sleep/hibernation. 1143 */ 1144 queue_delayed_work(system_freezable_wq, 1145 dwork, round_jiffies_relative(HZ)); 1146 } 1147 1148 /* 1149 * DEBUGFS Interface 1150 */ 1151 #ifdef CONFIG_DEBUG_FS 1152 1153 static int vmballoon_debug_show(struct seq_file *f, void *offset) 1154 { 1155 struct vmballoon *b = f->private; 1156 struct vmballoon_stats *stats = &b->stats; 1157 1158 /* format capabilities info */ 1159 seq_printf(f, 1160 "balloon capabilities: %#4x\n" 1161 "used capabilities: %#4lx\n" 1162 "is resetting: %c\n", 1163 VMW_BALLOON_CAPABILITIES, b->capabilities, 1164 b->reset_required ? 'y' : 'n'); 1165 1166 /* format size info */ 1167 seq_printf(f, 1168 "target: %8d pages\n" 1169 "current: %8d pages\n", 1170 b->target, b->size); 1171 1172 /* format rate info */ 1173 seq_printf(f, 1174 "rateSleepAlloc: %8d pages/sec\n", 1175 b->rate_alloc); 1176 1177 seq_printf(f, 1178 "\n" 1179 "timer: %8u\n" 1180 "doorbell: %8u\n" 1181 "start: %8u (%4u failed)\n" 1182 "guestType: %8u (%4u failed)\n" 1183 "2m-lock: %8u (%4u failed)\n" 1184 "lock: %8u (%4u failed)\n" 1185 "2m-unlock: %8u (%4u failed)\n" 1186 "unlock: %8u (%4u failed)\n" 1187 "target: %8u (%4u failed)\n" 1188 "prim2mAlloc: %8u (%4u failed)\n" 1189 "primNoSleepAlloc: %8u (%4u failed)\n" 1190 "primCanSleepAlloc: %8u (%4u failed)\n" 1191 "prim2mFree: %8u\n" 1192 "primFree: %8u\n" 1193 "err2mAlloc: %8u\n" 1194 "errAlloc: %8u\n" 1195 "err2mFree: %8u\n" 1196 "errFree: %8u\n" 1197 "doorbellSet: %8u\n" 1198 "doorbellUnset: %8u\n", 1199 stats->timer, 1200 stats->doorbell, 1201 stats->start, stats->start_fail, 1202 stats->guest_type, stats->guest_type_fail, 1203 stats->lock[true], stats->lock_fail[true], 1204 stats->lock[false], stats->lock_fail[false], 1205 stats->unlock[true], stats->unlock_fail[true], 1206 stats->unlock[false], stats->unlock_fail[false], 1207 stats->target, stats->target_fail, 1208 stats->alloc[true], stats->alloc_fail[true], 1209 stats->alloc[false], stats->alloc_fail[false], 1210 stats->sleep_alloc, stats->sleep_alloc_fail, 1211 stats->free[true], 1212 stats->free[false], 1213 stats->refused_alloc[true], stats->refused_alloc[false], 1214 stats->refused_free[true], stats->refused_free[false], 1215 stats->doorbell_set, stats->doorbell_unset); 1216 1217 return 0; 1218 } 1219 1220 static int vmballoon_debug_open(struct inode *inode, struct file *file) 1221 { 1222 return single_open(file, vmballoon_debug_show, inode->i_private); 1223 } 1224 1225 static const struct file_operations vmballoon_debug_fops = { 1226 .owner = THIS_MODULE, 1227 .open = vmballoon_debug_open, 1228 .read = seq_read, 1229 .llseek = seq_lseek, 1230 .release = single_release, 1231 }; 1232 1233 static int __init vmballoon_debugfs_init(struct vmballoon *b) 1234 { 1235 int error; 1236 1237 b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b, 1238 &vmballoon_debug_fops); 1239 if (IS_ERR(b->dbg_entry)) { 1240 error = PTR_ERR(b->dbg_entry); 1241 pr_err("failed to create debugfs entry, error: %d\n", error); 1242 return error; 1243 } 1244 1245 return 0; 1246 } 1247 1248 static void __exit vmballoon_debugfs_exit(struct vmballoon *b) 1249 { 1250 debugfs_remove(b->dbg_entry); 1251 } 1252 1253 #else 1254 1255 static inline int vmballoon_debugfs_init(struct vmballoon *b) 1256 { 1257 return 0; 1258 } 1259 1260 static inline void vmballoon_debugfs_exit(struct vmballoon *b) 1261 { 1262 } 1263 1264 #endif /* CONFIG_DEBUG_FS */ 1265 1266 static int __init vmballoon_init(void) 1267 { 1268 int error; 1269 unsigned is_2m_pages; 1270 /* 1271 * Check if we are running on VMware's hypervisor and bail out 1272 * if we are not. 1273 */ 1274 if (x86_hyper_type != X86_HYPER_VMWARE) 1275 return -ENODEV; 1276 1277 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; 1278 is_2m_pages++) { 1279 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages); 1280 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages); 1281 } 1282 1283 /* initialize rates */ 1284 balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX; 1285 1286 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); 1287 1288 error = vmballoon_debugfs_init(&balloon); 1289 if (error) 1290 return error; 1291 1292 balloon.vmci_doorbell = VMCI_INVALID_HANDLE; 1293 balloon.batch_page = NULL; 1294 balloon.page = NULL; 1295 balloon.reset_required = true; 1296 1297 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); 1298 1299 return 0; 1300 } 1301 module_init(vmballoon_init); 1302 1303 static void __exit vmballoon_exit(void) 1304 { 1305 vmballoon_vmci_cleanup(&balloon); 1306 cancel_delayed_work_sync(&balloon.dwork); 1307 1308 vmballoon_debugfs_exit(&balloon); 1309 1310 /* 1311 * Deallocate all reserved memory, and reset connection with monitor. 1312 * Reset connection before deallocating memory to avoid potential for 1313 * additional spurious resets from guest touching deallocated pages. 1314 */ 1315 vmballoon_send_start(&balloon, 0); 1316 vmballoon_pop(&balloon); 1317 } 1318 module_exit(vmballoon_exit); 1319