1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Virtio balloon implementation, inspired by Dor Laor and Marcelo 4 * Tosatti's implementations. 5 * 6 * Copyright 2008 Rusty Russell IBM Corporation 7 */ 8 9 #include <linux/virtio.h> 10 #include <linux/virtio_balloon.h> 11 #include <linux/swap.h> 12 #include <linux/workqueue.h> 13 #include <linux/delay.h> 14 #include <linux/slab.h> 15 #include <linux/module.h> 16 #include <linux/balloon_compaction.h> 17 #include <linux/wait.h> 18 #include <linux/mm.h> 19 #include <linux/mount.h> 20 #include <linux/magic.h> 21 22 /* 23 * Balloon device works in 4K page units. So each page is pointed to by 24 * multiple balloon pages. All memory counters in this driver are in balloon 25 * page units. 26 */ 27 #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) 28 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 29 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 30 31 #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \ 32 __GFP_NOMEMALLOC) 33 /* The order of free page blocks to report to host */ 34 #define VIRTIO_BALLOON_FREE_PAGE_ORDER (MAX_ORDER - 1) 35 /* The size of a free page block in bytes */ 36 #define VIRTIO_BALLOON_FREE_PAGE_SIZE \ 37 (1 << (VIRTIO_BALLOON_FREE_PAGE_ORDER + PAGE_SHIFT)) 38 39 #ifdef CONFIG_BALLOON_COMPACTION 40 static struct vfsmount *balloon_mnt; 41 #endif 42 43 enum virtio_balloon_vq { 44 VIRTIO_BALLOON_VQ_INFLATE, 45 VIRTIO_BALLOON_VQ_DEFLATE, 46 VIRTIO_BALLOON_VQ_STATS, 47 VIRTIO_BALLOON_VQ_FREE_PAGE, 48 VIRTIO_BALLOON_VQ_MAX 49 }; 50 51 enum virtio_balloon_config_read { 52 VIRTIO_BALLOON_CONFIG_READ_CMD_ID = 0, 53 }; 54 55 struct virtio_balloon { 56 struct virtio_device *vdev; 57 struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; 58 59 /* Balloon's own wq for cpu-intensive work items */ 60 struct workqueue_struct *balloon_wq; 61 /* The free page reporting work item submitted to the balloon wq */ 62 struct work_struct report_free_page_work; 63 64 /* The balloon servicing is delegated to a freezable workqueue. */ 65 struct work_struct update_balloon_stats_work; 66 struct work_struct update_balloon_size_work; 67 68 /* Prevent updating balloon when it is being canceled. */ 69 spinlock_t stop_update_lock; 70 bool stop_update; 71 /* Bitmap to indicate if reading the related config fields are needed */ 72 unsigned long config_read_bitmap; 73 74 /* The list of allocated free pages, waiting to be given back to mm */ 75 struct list_head free_page_list; 76 spinlock_t free_page_list_lock; 77 /* The number of free page blocks on the above list */ 78 unsigned long num_free_page_blocks; 79 /* 80 * The cmd id received from host. 81 * Read it via virtio_balloon_cmd_id_received to get the latest value 82 * sent from host. 83 */ 84 u32 cmd_id_received_cache; 85 /* The cmd id that is actively in use */ 86 __virtio32 cmd_id_active; 87 /* Buffer to store the stop sign */ 88 __virtio32 cmd_id_stop; 89 90 /* Waiting for host to ack the pages we released. */ 91 wait_queue_head_t acked; 92 93 /* Number of balloon pages we've told the Host we're not using. */ 94 unsigned int num_pages; 95 /* 96 * The pages we've told the Host we're not using are enqueued 97 * at vb_dev_info->pages list. 98 * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE 99 * to num_pages above. 100 */ 101 struct balloon_dev_info vb_dev_info; 102 103 /* Synchronize access/update to this struct virtio_balloon elements */ 104 struct mutex balloon_lock; 105 106 /* The array of pfns we tell the Host about. */ 107 unsigned int num_pfns; 108 __virtio32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX]; 109 110 /* Memory statistics */ 111 struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; 112 113 /* To register a shrinker to shrink memory upon memory pressure */ 114 struct shrinker shrinker; 115 }; 116 117 static struct virtio_device_id id_table[] = { 118 { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID }, 119 { 0 }, 120 }; 121 122 static u32 page_to_balloon_pfn(struct page *page) 123 { 124 unsigned long pfn = page_to_pfn(page); 125 126 BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT); 127 /* Convert pfn from Linux page size to balloon page size. */ 128 return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE; 129 } 130 131 static void balloon_ack(struct virtqueue *vq) 132 { 133 struct virtio_balloon *vb = vq->vdev->priv; 134 135 wake_up(&vb->acked); 136 } 137 138 static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) 139 { 140 struct scatterlist sg; 141 unsigned int len; 142 143 sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); 144 145 /* We should always be able to add one buffer to an empty queue. */ 146 virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); 147 virtqueue_kick(vq); 148 149 /* When host has read buffer, this completes via balloon_ack */ 150 wait_event(vb->acked, virtqueue_get_buf(vq, &len)); 151 152 } 153 154 static void set_page_pfns(struct virtio_balloon *vb, 155 __virtio32 pfns[], struct page *page) 156 { 157 unsigned int i; 158 159 /* 160 * Set balloon pfns pointing at this page. 161 * Note that the first pfn points at start of the page. 162 */ 163 for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++) 164 pfns[i] = cpu_to_virtio32(vb->vdev, 165 page_to_balloon_pfn(page) + i); 166 } 167 168 static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) 169 { 170 unsigned num_allocated_pages; 171 unsigned num_pfns; 172 struct page *page; 173 LIST_HEAD(pages); 174 175 /* We can only do one array worth at a time. */ 176 num = min(num, ARRAY_SIZE(vb->pfns)); 177 178 for (num_pfns = 0; num_pfns < num; 179 num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { 180 struct page *page = balloon_page_alloc(); 181 182 if (!page) { 183 dev_info_ratelimited(&vb->vdev->dev, 184 "Out of puff! Can't get %u pages\n", 185 VIRTIO_BALLOON_PAGES_PER_PAGE); 186 /* Sleep for at least 1/5 of a second before retry. */ 187 msleep(200); 188 break; 189 } 190 191 balloon_page_push(&pages, page); 192 } 193 194 mutex_lock(&vb->balloon_lock); 195 196 vb->num_pfns = 0; 197 198 while ((page = balloon_page_pop(&pages))) { 199 balloon_page_enqueue(&vb->vb_dev_info, page); 200 201 set_page_pfns(vb, vb->pfns + vb->num_pfns, page); 202 vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; 203 if (!virtio_has_feature(vb->vdev, 204 VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) 205 adjust_managed_page_count(page, -1); 206 vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE; 207 } 208 209 num_allocated_pages = vb->num_pfns; 210 /* Did we get any? */ 211 if (vb->num_pfns != 0) 212 tell_host(vb, vb->inflate_vq); 213 mutex_unlock(&vb->balloon_lock); 214 215 return num_allocated_pages; 216 } 217 218 static void release_pages_balloon(struct virtio_balloon *vb, 219 struct list_head *pages) 220 { 221 struct page *page, *next; 222 223 list_for_each_entry_safe(page, next, pages, lru) { 224 if (!virtio_has_feature(vb->vdev, 225 VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) 226 adjust_managed_page_count(page, 1); 227 list_del(&page->lru); 228 put_page(page); /* balloon reference */ 229 } 230 } 231 232 static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) 233 { 234 unsigned num_freed_pages; 235 struct page *page; 236 struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; 237 LIST_HEAD(pages); 238 239 /* We can only do one array worth at a time. */ 240 num = min(num, ARRAY_SIZE(vb->pfns)); 241 242 mutex_lock(&vb->balloon_lock); 243 /* We can't release more pages than taken */ 244 num = min(num, (size_t)vb->num_pages); 245 for (vb->num_pfns = 0; vb->num_pfns < num; 246 vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { 247 page = balloon_page_dequeue(vb_dev_info); 248 if (!page) 249 break; 250 set_page_pfns(vb, vb->pfns + vb->num_pfns, page); 251 list_add(&page->lru, &pages); 252 vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE; 253 } 254 255 num_freed_pages = vb->num_pfns; 256 /* 257 * Note that if 258 * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); 259 * is true, we *have* to do it in this order 260 */ 261 if (vb->num_pfns != 0) 262 tell_host(vb, vb->deflate_vq); 263 release_pages_balloon(vb, &pages); 264 mutex_unlock(&vb->balloon_lock); 265 return num_freed_pages; 266 } 267 268 static inline void update_stat(struct virtio_balloon *vb, int idx, 269 u16 tag, u64 val) 270 { 271 BUG_ON(idx >= VIRTIO_BALLOON_S_NR); 272 vb->stats[idx].tag = cpu_to_virtio16(vb->vdev, tag); 273 vb->stats[idx].val = cpu_to_virtio64(vb->vdev, val); 274 } 275 276 #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT) 277 278 static unsigned int update_balloon_stats(struct virtio_balloon *vb) 279 { 280 unsigned long events[NR_VM_EVENT_ITEMS]; 281 struct sysinfo i; 282 unsigned int idx = 0; 283 long available; 284 unsigned long caches; 285 286 all_vm_events(events); 287 si_meminfo(&i); 288 289 available = si_mem_available(); 290 caches = global_node_page_state(NR_FILE_PAGES); 291 292 #ifdef CONFIG_VM_EVENT_COUNTERS 293 update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, 294 pages_to_bytes(events[PSWPIN])); 295 update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, 296 pages_to_bytes(events[PSWPOUT])); 297 update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); 298 update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); 299 #ifdef CONFIG_HUGETLB_PAGE 300 update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC, 301 events[HTLB_BUDDY_PGALLOC]); 302 update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGFAIL, 303 events[HTLB_BUDDY_PGALLOC_FAIL]); 304 #endif 305 #endif 306 update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, 307 pages_to_bytes(i.freeram)); 308 update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, 309 pages_to_bytes(i.totalram)); 310 update_stat(vb, idx++, VIRTIO_BALLOON_S_AVAIL, 311 pages_to_bytes(available)); 312 update_stat(vb, idx++, VIRTIO_BALLOON_S_CACHES, 313 pages_to_bytes(caches)); 314 315 return idx; 316 } 317 318 /* 319 * While most virtqueues communicate guest-initiated requests to the hypervisor, 320 * the stats queue operates in reverse. The driver initializes the virtqueue 321 * with a single buffer. From that point forward, all conversations consist of 322 * a hypervisor request (a call to this function) which directs us to refill 323 * the virtqueue with a fresh stats buffer. Since stats collection can sleep, 324 * we delegate the job to a freezable workqueue that will do the actual work via 325 * stats_handle_request(). 326 */ 327 static void stats_request(struct virtqueue *vq) 328 { 329 struct virtio_balloon *vb = vq->vdev->priv; 330 331 spin_lock(&vb->stop_update_lock); 332 if (!vb->stop_update) 333 queue_work(system_freezable_wq, &vb->update_balloon_stats_work); 334 spin_unlock(&vb->stop_update_lock); 335 } 336 337 static void stats_handle_request(struct virtio_balloon *vb) 338 { 339 struct virtqueue *vq; 340 struct scatterlist sg; 341 unsigned int len, num_stats; 342 343 num_stats = update_balloon_stats(vb); 344 345 vq = vb->stats_vq; 346 if (!virtqueue_get_buf(vq, &len)) 347 return; 348 sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); 349 virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); 350 virtqueue_kick(vq); 351 } 352 353 static inline s64 towards_target(struct virtio_balloon *vb) 354 { 355 s64 target; 356 u32 num_pages; 357 358 virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, 359 &num_pages); 360 361 /* Legacy balloon config space is LE, unlike all other devices. */ 362 if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) 363 num_pages = le32_to_cpu((__force __le32)num_pages); 364 365 target = num_pages; 366 return target - vb->num_pages; 367 } 368 369 /* Gives back @num_to_return blocks of free pages to mm. */ 370 static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb, 371 unsigned long num_to_return) 372 { 373 struct page *page; 374 unsigned long num_returned; 375 376 spin_lock_irq(&vb->free_page_list_lock); 377 for (num_returned = 0; num_returned < num_to_return; num_returned++) { 378 page = balloon_page_pop(&vb->free_page_list); 379 if (!page) 380 break; 381 free_pages((unsigned long)page_address(page), 382 VIRTIO_BALLOON_FREE_PAGE_ORDER); 383 } 384 vb->num_free_page_blocks -= num_returned; 385 spin_unlock_irq(&vb->free_page_list_lock); 386 387 return num_returned; 388 } 389 390 static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb) 391 { 392 if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) 393 return; 394 395 /* No need to queue the work if the bit was already set. */ 396 if (test_and_set_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID, 397 &vb->config_read_bitmap)) 398 return; 399 400 queue_work(vb->balloon_wq, &vb->report_free_page_work); 401 } 402 403 static void virtballoon_changed(struct virtio_device *vdev) 404 { 405 struct virtio_balloon *vb = vdev->priv; 406 unsigned long flags; 407 408 spin_lock_irqsave(&vb->stop_update_lock, flags); 409 if (!vb->stop_update) { 410 queue_work(system_freezable_wq, 411 &vb->update_balloon_size_work); 412 virtio_balloon_queue_free_page_work(vb); 413 } 414 spin_unlock_irqrestore(&vb->stop_update_lock, flags); 415 } 416 417 static void update_balloon_size(struct virtio_balloon *vb) 418 { 419 u32 actual = vb->num_pages; 420 421 /* Legacy balloon config space is LE, unlike all other devices. */ 422 if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) 423 actual = (__force u32)cpu_to_le32(actual); 424 425 virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual, 426 &actual); 427 } 428 429 static void update_balloon_stats_func(struct work_struct *work) 430 { 431 struct virtio_balloon *vb; 432 433 vb = container_of(work, struct virtio_balloon, 434 update_balloon_stats_work); 435 stats_handle_request(vb); 436 } 437 438 static void update_balloon_size_func(struct work_struct *work) 439 { 440 struct virtio_balloon *vb; 441 s64 diff; 442 443 vb = container_of(work, struct virtio_balloon, 444 update_balloon_size_work); 445 diff = towards_target(vb); 446 447 if (!diff) 448 return; 449 450 if (diff > 0) 451 diff -= fill_balloon(vb, diff); 452 else 453 diff += leak_balloon(vb, -diff); 454 update_balloon_size(vb); 455 456 if (diff) 457 queue_work(system_freezable_wq, work); 458 } 459 460 static int init_vqs(struct virtio_balloon *vb) 461 { 462 struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX]; 463 vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX]; 464 const char *names[VIRTIO_BALLOON_VQ_MAX]; 465 int err; 466 467 /* 468 * Inflateq and deflateq are used unconditionally. The names[] 469 * will be NULL if the related feature is not enabled, which will 470 * cause no allocation for the corresponding virtqueue in find_vqs. 471 */ 472 callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack; 473 names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate"; 474 callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack; 475 names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate"; 476 names[VIRTIO_BALLOON_VQ_STATS] = NULL; 477 names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; 478 479 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { 480 names[VIRTIO_BALLOON_VQ_STATS] = "stats"; 481 callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request; 482 } 483 484 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { 485 names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq"; 486 callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; 487 } 488 489 err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, 490 vqs, callbacks, names, NULL, NULL); 491 if (err) 492 return err; 493 494 vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE]; 495 vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE]; 496 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { 497 struct scatterlist sg; 498 unsigned int num_stats; 499 vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS]; 500 501 /* 502 * Prime this virtqueue with one buffer so the hypervisor can 503 * use it to signal us later (it can't be broken yet!). 504 */ 505 num_stats = update_balloon_stats(vb); 506 507 sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); 508 err = virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, 509 GFP_KERNEL); 510 if (err) { 511 dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n", 512 __func__); 513 return err; 514 } 515 virtqueue_kick(vb->stats_vq); 516 } 517 518 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) 519 vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE]; 520 521 return 0; 522 } 523 524 static u32 virtio_balloon_cmd_id_received(struct virtio_balloon *vb) 525 { 526 if (test_and_clear_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID, 527 &vb->config_read_bitmap)) 528 virtio_cread(vb->vdev, struct virtio_balloon_config, 529 free_page_report_cmd_id, 530 &vb->cmd_id_received_cache); 531 532 return vb->cmd_id_received_cache; 533 } 534 535 static int send_cmd_id_start(struct virtio_balloon *vb) 536 { 537 struct scatterlist sg; 538 struct virtqueue *vq = vb->free_page_vq; 539 int err, unused; 540 541 /* Detach all the used buffers from the vq */ 542 while (virtqueue_get_buf(vq, &unused)) 543 ; 544 545 vb->cmd_id_active = virtio32_to_cpu(vb->vdev, 546 virtio_balloon_cmd_id_received(vb)); 547 sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active)); 548 err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL); 549 if (!err) 550 virtqueue_kick(vq); 551 return err; 552 } 553 554 static int send_cmd_id_stop(struct virtio_balloon *vb) 555 { 556 struct scatterlist sg; 557 struct virtqueue *vq = vb->free_page_vq; 558 int err, unused; 559 560 /* Detach all the used buffers from the vq */ 561 while (virtqueue_get_buf(vq, &unused)) 562 ; 563 564 sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop)); 565 err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL); 566 if (!err) 567 virtqueue_kick(vq); 568 return err; 569 } 570 571 static int get_free_page_and_send(struct virtio_balloon *vb) 572 { 573 struct virtqueue *vq = vb->free_page_vq; 574 struct page *page; 575 struct scatterlist sg; 576 int err, unused; 577 void *p; 578 579 /* Detach all the used buffers from the vq */ 580 while (virtqueue_get_buf(vq, &unused)) 581 ; 582 583 page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG, 584 VIRTIO_BALLOON_FREE_PAGE_ORDER); 585 /* 586 * When the allocation returns NULL, it indicates that we have got all 587 * the possible free pages, so return -EINTR to stop. 588 */ 589 if (!page) 590 return -EINTR; 591 592 p = page_address(page); 593 sg_init_one(&sg, p, VIRTIO_BALLOON_FREE_PAGE_SIZE); 594 /* There is always 1 entry reserved for the cmd id to use. */ 595 if (vq->num_free > 1) { 596 err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL); 597 if (unlikely(err)) { 598 free_pages((unsigned long)p, 599 VIRTIO_BALLOON_FREE_PAGE_ORDER); 600 return err; 601 } 602 virtqueue_kick(vq); 603 spin_lock_irq(&vb->free_page_list_lock); 604 balloon_page_push(&vb->free_page_list, page); 605 vb->num_free_page_blocks++; 606 spin_unlock_irq(&vb->free_page_list_lock); 607 } else { 608 /* 609 * The vq has no available entry to add this page block, so 610 * just free it. 611 */ 612 free_pages((unsigned long)p, VIRTIO_BALLOON_FREE_PAGE_ORDER); 613 } 614 615 return 0; 616 } 617 618 static int send_free_pages(struct virtio_balloon *vb) 619 { 620 int err; 621 u32 cmd_id_active; 622 623 while (1) { 624 /* 625 * If a stop id or a new cmd id was just received from host, 626 * stop the reporting. 627 */ 628 cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active); 629 if (unlikely(cmd_id_active != 630 virtio_balloon_cmd_id_received(vb))) 631 break; 632 633 /* 634 * The free page blocks are allocated and sent to host one by 635 * one. 636 */ 637 err = get_free_page_and_send(vb); 638 if (err == -EINTR) 639 break; 640 else if (unlikely(err)) 641 return err; 642 } 643 644 return 0; 645 } 646 647 static void virtio_balloon_report_free_page(struct virtio_balloon *vb) 648 { 649 int err; 650 struct device *dev = &vb->vdev->dev; 651 652 /* Start by sending the received cmd id to host with an outbuf. */ 653 err = send_cmd_id_start(vb); 654 if (unlikely(err)) 655 dev_err(dev, "Failed to send a start id, err = %d\n", err); 656 657 err = send_free_pages(vb); 658 if (unlikely(err)) 659 dev_err(dev, "Failed to send a free page, err = %d\n", err); 660 661 /* End by sending a stop id to host with an outbuf. */ 662 err = send_cmd_id_stop(vb); 663 if (unlikely(err)) 664 dev_err(dev, "Failed to send a stop id, err = %d\n", err); 665 } 666 667 static void report_free_page_func(struct work_struct *work) 668 { 669 struct virtio_balloon *vb = container_of(work, struct virtio_balloon, 670 report_free_page_work); 671 u32 cmd_id_received; 672 673 cmd_id_received = virtio_balloon_cmd_id_received(vb); 674 if (cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) { 675 /* Pass ULONG_MAX to give back all the free pages */ 676 return_free_pages_to_mm(vb, ULONG_MAX); 677 } else if (cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP && 678 cmd_id_received != 679 virtio32_to_cpu(vb->vdev, vb->cmd_id_active)) { 680 virtio_balloon_report_free_page(vb); 681 } 682 } 683 684 #ifdef CONFIG_BALLOON_COMPACTION 685 /* 686 * virtballoon_migratepage - perform the balloon page migration on behalf of 687 * a compation thread. (called under page lock) 688 * @vb_dev_info: the balloon device 689 * @newpage: page that will replace the isolated page after migration finishes. 690 * @page : the isolated (old) page that is about to be migrated to newpage. 691 * @mode : compaction mode -- not used for balloon page migration. 692 * 693 * After a ballooned page gets isolated by compaction procedures, this is the 694 * function that performs the page migration on behalf of a compaction thread 695 * The page migration for virtio balloon is done in a simple swap fashion which 696 * follows these two macro steps: 697 * 1) insert newpage into vb->pages list and update the host about it; 698 * 2) update the host about the old page removed from vb->pages list; 699 * 700 * This function preforms the balloon page migration task. 701 * Called through balloon_mapping->a_ops->migratepage 702 */ 703 static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, 704 struct page *newpage, struct page *page, enum migrate_mode mode) 705 { 706 struct virtio_balloon *vb = container_of(vb_dev_info, 707 struct virtio_balloon, vb_dev_info); 708 unsigned long flags; 709 710 /* 711 * In order to avoid lock contention while migrating pages concurrently 712 * to leak_balloon() or fill_balloon() we just give up the balloon_lock 713 * this turn, as it is easier to retry the page migration later. 714 * This also prevents fill_balloon() getting stuck into a mutex 715 * recursion in the case it ends up triggering memory compaction 716 * while it is attempting to inflate the ballon. 717 */ 718 if (!mutex_trylock(&vb->balloon_lock)) 719 return -EAGAIN; 720 721 get_page(newpage); /* balloon reference */ 722 723 /* balloon's page migration 1st step -- inflate "newpage" */ 724 spin_lock_irqsave(&vb_dev_info->pages_lock, flags); 725 balloon_page_insert(vb_dev_info, newpage); 726 vb_dev_info->isolated_pages--; 727 __count_vm_event(BALLOON_MIGRATE); 728 spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); 729 vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; 730 set_page_pfns(vb, vb->pfns, newpage); 731 tell_host(vb, vb->inflate_vq); 732 733 /* balloon's page migration 2nd step -- deflate "page" */ 734 spin_lock_irqsave(&vb_dev_info->pages_lock, flags); 735 balloon_page_delete(page); 736 spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); 737 vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; 738 set_page_pfns(vb, vb->pfns, page); 739 tell_host(vb, vb->deflate_vq); 740 741 mutex_unlock(&vb->balloon_lock); 742 743 put_page(page); /* balloon reference */ 744 745 return MIGRATEPAGE_SUCCESS; 746 } 747 748 static struct dentry *balloon_mount(struct file_system_type *fs_type, 749 int flags, const char *dev_name, void *data) 750 { 751 static const struct dentry_operations ops = { 752 .d_dname = simple_dname, 753 }; 754 755 return mount_pseudo(fs_type, "balloon-kvm:", NULL, &ops, 756 BALLOON_KVM_MAGIC); 757 } 758 759 static struct file_system_type balloon_fs = { 760 .name = "balloon-kvm", 761 .mount = balloon_mount, 762 .kill_sb = kill_anon_super, 763 }; 764 765 #endif /* CONFIG_BALLOON_COMPACTION */ 766 767 static unsigned long shrink_free_pages(struct virtio_balloon *vb, 768 unsigned long pages_to_free) 769 { 770 unsigned long blocks_to_free, blocks_freed; 771 772 pages_to_free = round_up(pages_to_free, 773 1 << VIRTIO_BALLOON_FREE_PAGE_ORDER); 774 blocks_to_free = pages_to_free >> VIRTIO_BALLOON_FREE_PAGE_ORDER; 775 blocks_freed = return_free_pages_to_mm(vb, blocks_to_free); 776 777 return blocks_freed << VIRTIO_BALLOON_FREE_PAGE_ORDER; 778 } 779 780 static unsigned long shrink_balloon_pages(struct virtio_balloon *vb, 781 unsigned long pages_to_free) 782 { 783 unsigned long pages_freed = 0; 784 785 /* 786 * One invocation of leak_balloon can deflate at most 787 * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it 788 * multiple times to deflate pages till reaching pages_to_free. 789 */ 790 while (vb->num_pages && pages_to_free) { 791 pages_freed += leak_balloon(vb, pages_to_free) / 792 VIRTIO_BALLOON_PAGES_PER_PAGE; 793 pages_to_free -= pages_freed; 794 } 795 update_balloon_size(vb); 796 797 return pages_freed; 798 } 799 800 static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, 801 struct shrink_control *sc) 802 { 803 unsigned long pages_to_free, pages_freed = 0; 804 struct virtio_balloon *vb = container_of(shrinker, 805 struct virtio_balloon, shrinker); 806 807 pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE; 808 809 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) 810 pages_freed = shrink_free_pages(vb, pages_to_free); 811 812 if (pages_freed >= pages_to_free) 813 return pages_freed; 814 815 pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed); 816 817 return pages_freed; 818 } 819 820 static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, 821 struct shrink_control *sc) 822 { 823 struct virtio_balloon *vb = container_of(shrinker, 824 struct virtio_balloon, shrinker); 825 unsigned long count; 826 827 count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE; 828 count += vb->num_free_page_blocks >> VIRTIO_BALLOON_FREE_PAGE_ORDER; 829 830 return count; 831 } 832 833 static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb) 834 { 835 unregister_shrinker(&vb->shrinker); 836 } 837 838 static int virtio_balloon_register_shrinker(struct virtio_balloon *vb) 839 { 840 vb->shrinker.scan_objects = virtio_balloon_shrinker_scan; 841 vb->shrinker.count_objects = virtio_balloon_shrinker_count; 842 vb->shrinker.seeks = DEFAULT_SEEKS; 843 844 return register_shrinker(&vb->shrinker); 845 } 846 847 static int virtballoon_probe(struct virtio_device *vdev) 848 { 849 struct virtio_balloon *vb; 850 __u32 poison_val; 851 int err; 852 853 if (!vdev->config->get) { 854 dev_err(&vdev->dev, "%s failure: config access disabled\n", 855 __func__); 856 return -EINVAL; 857 } 858 859 vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL); 860 if (!vb) { 861 err = -ENOMEM; 862 goto out; 863 } 864 865 INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func); 866 INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func); 867 spin_lock_init(&vb->stop_update_lock); 868 mutex_init(&vb->balloon_lock); 869 init_waitqueue_head(&vb->acked); 870 vb->vdev = vdev; 871 872 balloon_devinfo_init(&vb->vb_dev_info); 873 874 err = init_vqs(vb); 875 if (err) 876 goto out_free_vb; 877 878 #ifdef CONFIG_BALLOON_COMPACTION 879 balloon_mnt = kern_mount(&balloon_fs); 880 if (IS_ERR(balloon_mnt)) { 881 err = PTR_ERR(balloon_mnt); 882 goto out_del_vqs; 883 } 884 885 vb->vb_dev_info.migratepage = virtballoon_migratepage; 886 vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); 887 if (IS_ERR(vb->vb_dev_info.inode)) { 888 err = PTR_ERR(vb->vb_dev_info.inode); 889 kern_unmount(balloon_mnt); 890 goto out_del_vqs; 891 } 892 vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; 893 #endif 894 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { 895 /* 896 * There is always one entry reserved for cmd id, so the ring 897 * size needs to be at least two to report free page hints. 898 */ 899 if (virtqueue_get_vring_size(vb->free_page_vq) < 2) { 900 err = -ENOSPC; 901 goto out_del_vqs; 902 } 903 vb->balloon_wq = alloc_workqueue("balloon-wq", 904 WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); 905 if (!vb->balloon_wq) { 906 err = -ENOMEM; 907 goto out_del_vqs; 908 } 909 INIT_WORK(&vb->report_free_page_work, report_free_page_func); 910 vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP; 911 vb->cmd_id_active = cpu_to_virtio32(vb->vdev, 912 VIRTIO_BALLOON_CMD_ID_STOP); 913 vb->cmd_id_stop = cpu_to_virtio32(vb->vdev, 914 VIRTIO_BALLOON_CMD_ID_STOP); 915 spin_lock_init(&vb->free_page_list_lock); 916 INIT_LIST_HEAD(&vb->free_page_list); 917 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) { 918 memset(&poison_val, PAGE_POISON, sizeof(poison_val)); 919 virtio_cwrite(vb->vdev, struct virtio_balloon_config, 920 poison_val, &poison_val); 921 } 922 } 923 /* 924 * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a 925 * shrinker needs to be registered to relieve memory pressure. 926 */ 927 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) { 928 err = virtio_balloon_register_shrinker(vb); 929 if (err) 930 goto out_del_balloon_wq; 931 } 932 virtio_device_ready(vdev); 933 934 if (towards_target(vb)) 935 virtballoon_changed(vdev); 936 return 0; 937 938 out_del_balloon_wq: 939 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) 940 destroy_workqueue(vb->balloon_wq); 941 out_del_vqs: 942 vdev->config->del_vqs(vdev); 943 out_free_vb: 944 kfree(vb); 945 out: 946 return err; 947 } 948 949 static void remove_common(struct virtio_balloon *vb) 950 { 951 /* There might be pages left in the balloon: free them. */ 952 while (vb->num_pages) 953 leak_balloon(vb, vb->num_pages); 954 update_balloon_size(vb); 955 956 /* Now we reset the device so we can clean up the queues. */ 957 vb->vdev->config->reset(vb->vdev); 958 959 vb->vdev->config->del_vqs(vb->vdev); 960 } 961 962 static void virtballoon_remove(struct virtio_device *vdev) 963 { 964 struct virtio_balloon *vb = vdev->priv; 965 966 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) 967 virtio_balloon_unregister_shrinker(vb); 968 spin_lock_irq(&vb->stop_update_lock); 969 vb->stop_update = true; 970 spin_unlock_irq(&vb->stop_update_lock); 971 cancel_work_sync(&vb->update_balloon_size_work); 972 cancel_work_sync(&vb->update_balloon_stats_work); 973 974 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { 975 cancel_work_sync(&vb->report_free_page_work); 976 destroy_workqueue(vb->balloon_wq); 977 } 978 979 remove_common(vb); 980 #ifdef CONFIG_BALLOON_COMPACTION 981 if (vb->vb_dev_info.inode) 982 iput(vb->vb_dev_info.inode); 983 984 kern_unmount(balloon_mnt); 985 #endif 986 kfree(vb); 987 } 988 989 #ifdef CONFIG_PM_SLEEP 990 static int virtballoon_freeze(struct virtio_device *vdev) 991 { 992 struct virtio_balloon *vb = vdev->priv; 993 994 /* 995 * The workqueue is already frozen by the PM core before this 996 * function is called. 997 */ 998 remove_common(vb); 999 return 0; 1000 } 1001 1002 static int virtballoon_restore(struct virtio_device *vdev) 1003 { 1004 struct virtio_balloon *vb = vdev->priv; 1005 int ret; 1006 1007 ret = init_vqs(vdev->priv); 1008 if (ret) 1009 return ret; 1010 1011 virtio_device_ready(vdev); 1012 1013 if (towards_target(vb)) 1014 virtballoon_changed(vdev); 1015 update_balloon_size(vb); 1016 return 0; 1017 } 1018 #endif 1019 1020 static int virtballoon_validate(struct virtio_device *vdev) 1021 { 1022 if (!page_poisoning_enabled()) 1023 __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON); 1024 1025 __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM); 1026 return 0; 1027 } 1028 1029 static unsigned int features[] = { 1030 VIRTIO_BALLOON_F_MUST_TELL_HOST, 1031 VIRTIO_BALLOON_F_STATS_VQ, 1032 VIRTIO_BALLOON_F_DEFLATE_ON_OOM, 1033 VIRTIO_BALLOON_F_FREE_PAGE_HINT, 1034 VIRTIO_BALLOON_F_PAGE_POISON, 1035 }; 1036 1037 static struct virtio_driver virtio_balloon_driver = { 1038 .feature_table = features, 1039 .feature_table_size = ARRAY_SIZE(features), 1040 .driver.name = KBUILD_MODNAME, 1041 .driver.owner = THIS_MODULE, 1042 .id_table = id_table, 1043 .validate = virtballoon_validate, 1044 .probe = virtballoon_probe, 1045 .remove = virtballoon_remove, 1046 .config_changed = virtballoon_changed, 1047 #ifdef CONFIG_PM_SLEEP 1048 .freeze = virtballoon_freeze, 1049 .restore = virtballoon_restore, 1050 #endif 1051 }; 1052 1053 module_virtio_driver(virtio_balloon_driver); 1054 MODULE_DEVICE_TABLE(virtio, id_table); 1055 MODULE_DESCRIPTION("Virtio balloon driver"); 1056 MODULE_LICENSE("GPL"); 1057