1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Virtio balloon implementation, inspired by Dor Laor and Marcelo 4 * Tosatti's implementations. 5 * 6 * Copyright 2008 Rusty Russell IBM Corporation 7 */ 8 9 #include <linux/virtio.h> 10 #include <linux/virtio_balloon.h> 11 #include <linux/swap.h> 12 #include <linux/workqueue.h> 13 #include <linux/delay.h> 14 #include <linux/slab.h> 15 #include <linux/module.h> 16 #include <linux/balloon_compaction.h> 17 #include <linux/wait.h> 18 #include <linux/mm.h> 19 #include <linux/mount.h> 20 #include <linux/magic.h> 21 #include <linux/pseudo_fs.h> 22 23 /* 24 * Balloon device works in 4K page units. So each page is pointed to by 25 * multiple balloon pages. All memory counters in this driver are in balloon 26 * page units. 27 */ 28 #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) 29 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 30 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 31 32 #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \ 33 __GFP_NOMEMALLOC) 34 /* The order of free page blocks to report to host */ 35 #define VIRTIO_BALLOON_HINT_BLOCK_ORDER (MAX_ORDER - 1) 36 /* The size of a free page block in bytes */ 37 #define VIRTIO_BALLOON_HINT_BLOCK_BYTES \ 38 (1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT)) 39 #define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER) 40 41 #ifdef CONFIG_BALLOON_COMPACTION 42 static struct vfsmount *balloon_mnt; 43 #endif 44 45 enum virtio_balloon_vq { 46 VIRTIO_BALLOON_VQ_INFLATE, 47 VIRTIO_BALLOON_VQ_DEFLATE, 48 VIRTIO_BALLOON_VQ_STATS, 49 VIRTIO_BALLOON_VQ_FREE_PAGE, 50 VIRTIO_BALLOON_VQ_MAX 51 }; 52 53 enum virtio_balloon_config_read { 54 VIRTIO_BALLOON_CONFIG_READ_CMD_ID = 0, 55 }; 56 57 struct virtio_balloon { 58 struct virtio_device *vdev; 59 struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; 60 61 /* Balloon's own wq for cpu-intensive work items */ 62 struct workqueue_struct *balloon_wq; 63 /* The free page reporting work item submitted to the balloon wq */ 64 struct work_struct report_free_page_work; 65 66 /* The balloon servicing is delegated to a freezable workqueue. */ 67 struct work_struct update_balloon_stats_work; 68 struct work_struct update_balloon_size_work; 69 70 /* Prevent updating balloon when it is being canceled. */ 71 spinlock_t stop_update_lock; 72 bool stop_update; 73 /* Bitmap to indicate if reading the related config fields are needed */ 74 unsigned long config_read_bitmap; 75 76 /* The list of allocated free pages, waiting to be given back to mm */ 77 struct list_head free_page_list; 78 spinlock_t free_page_list_lock; 79 /* The number of free page blocks on the above list */ 80 unsigned long num_free_page_blocks; 81 /* 82 * The cmd id received from host. 83 * Read it via virtio_balloon_cmd_id_received to get the latest value 84 * sent from host. 85 */ 86 u32 cmd_id_received_cache; 87 /* The cmd id that is actively in use */ 88 __virtio32 cmd_id_active; 89 /* Buffer to store the stop sign */ 90 __virtio32 cmd_id_stop; 91 92 /* Waiting for host to ack the pages we released. */ 93 wait_queue_head_t acked; 94 95 /* Number of balloon pages we've told the Host we're not using. */ 96 unsigned int num_pages; 97 /* 98 * The pages we've told the Host we're not using are enqueued 99 * at vb_dev_info->pages list. 100 * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE 101 * to num_pages above. 102 */ 103 struct balloon_dev_info vb_dev_info; 104 105 /* Synchronize access/update to this struct virtio_balloon elements */ 106 struct mutex balloon_lock; 107 108 /* The array of pfns we tell the Host about. */ 109 unsigned int num_pfns; 110 __virtio32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX]; 111 112 /* Memory statistics */ 113 struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; 114 115 /* To register a shrinker to shrink memory upon memory pressure */ 116 struct shrinker shrinker; 117 }; 118 119 static struct virtio_device_id id_table[] = { 120 { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID }, 121 { 0 }, 122 }; 123 124 static u32 page_to_balloon_pfn(struct page *page) 125 { 126 unsigned long pfn = page_to_pfn(page); 127 128 BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT); 129 /* Convert pfn from Linux page size to balloon page size. */ 130 return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE; 131 } 132 133 static void balloon_ack(struct virtqueue *vq) 134 { 135 struct virtio_balloon *vb = vq->vdev->priv; 136 137 wake_up(&vb->acked); 138 } 139 140 static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) 141 { 142 struct scatterlist sg; 143 unsigned int len; 144 145 sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); 146 147 /* We should always be able to add one buffer to an empty queue. */ 148 virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); 149 virtqueue_kick(vq); 150 151 /* When host has read buffer, this completes via balloon_ack */ 152 wait_event(vb->acked, virtqueue_get_buf(vq, &len)); 153 154 } 155 156 static void set_page_pfns(struct virtio_balloon *vb, 157 __virtio32 pfns[], struct page *page) 158 { 159 unsigned int i; 160 161 BUILD_BUG_ON(VIRTIO_BALLOON_PAGES_PER_PAGE > VIRTIO_BALLOON_ARRAY_PFNS_MAX); 162 163 /* 164 * Set balloon pfns pointing at this page. 165 * Note that the first pfn points at start of the page. 166 */ 167 for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++) 168 pfns[i] = cpu_to_virtio32(vb->vdev, 169 page_to_balloon_pfn(page) + i); 170 } 171 172 static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) 173 { 174 unsigned num_allocated_pages; 175 unsigned num_pfns; 176 struct page *page; 177 LIST_HEAD(pages); 178 179 /* We can only do one array worth at a time. */ 180 num = min(num, ARRAY_SIZE(vb->pfns)); 181 182 for (num_pfns = 0; num_pfns < num; 183 num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { 184 struct page *page = balloon_page_alloc(); 185 186 if (!page) { 187 dev_info_ratelimited(&vb->vdev->dev, 188 "Out of puff! Can't get %u pages\n", 189 VIRTIO_BALLOON_PAGES_PER_PAGE); 190 /* Sleep for at least 1/5 of a second before retry. */ 191 msleep(200); 192 break; 193 } 194 195 balloon_page_push(&pages, page); 196 } 197 198 mutex_lock(&vb->balloon_lock); 199 200 vb->num_pfns = 0; 201 202 while ((page = balloon_page_pop(&pages))) { 203 balloon_page_enqueue(&vb->vb_dev_info, page); 204 205 set_page_pfns(vb, vb->pfns + vb->num_pfns, page); 206 vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; 207 if (!virtio_has_feature(vb->vdev, 208 VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) 209 adjust_managed_page_count(page, -1); 210 vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE; 211 } 212 213 num_allocated_pages = vb->num_pfns; 214 /* Did we get any? */ 215 if (vb->num_pfns != 0) 216 tell_host(vb, vb->inflate_vq); 217 mutex_unlock(&vb->balloon_lock); 218 219 return num_allocated_pages; 220 } 221 222 static void release_pages_balloon(struct virtio_balloon *vb, 223 struct list_head *pages) 224 { 225 struct page *page, *next; 226 227 list_for_each_entry_safe(page, next, pages, lru) { 228 if (!virtio_has_feature(vb->vdev, 229 VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) 230 adjust_managed_page_count(page, 1); 231 list_del(&page->lru); 232 put_page(page); /* balloon reference */ 233 } 234 } 235 236 static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) 237 { 238 unsigned num_freed_pages; 239 struct page *page; 240 struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; 241 LIST_HEAD(pages); 242 243 /* We can only do one array worth at a time. */ 244 num = min(num, ARRAY_SIZE(vb->pfns)); 245 246 mutex_lock(&vb->balloon_lock); 247 /* We can't release more pages than taken */ 248 num = min(num, (size_t)vb->num_pages); 249 for (vb->num_pfns = 0; vb->num_pfns < num; 250 vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { 251 page = balloon_page_dequeue(vb_dev_info); 252 if (!page) 253 break; 254 set_page_pfns(vb, vb->pfns + vb->num_pfns, page); 255 list_add(&page->lru, &pages); 256 vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE; 257 } 258 259 num_freed_pages = vb->num_pfns; 260 /* 261 * Note that if 262 * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); 263 * is true, we *have* to do it in this order 264 */ 265 if (vb->num_pfns != 0) 266 tell_host(vb, vb->deflate_vq); 267 release_pages_balloon(vb, &pages); 268 mutex_unlock(&vb->balloon_lock); 269 return num_freed_pages; 270 } 271 272 static inline void update_stat(struct virtio_balloon *vb, int idx, 273 u16 tag, u64 val) 274 { 275 BUG_ON(idx >= VIRTIO_BALLOON_S_NR); 276 vb->stats[idx].tag = cpu_to_virtio16(vb->vdev, tag); 277 vb->stats[idx].val = cpu_to_virtio64(vb->vdev, val); 278 } 279 280 #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT) 281 282 static unsigned int update_balloon_stats(struct virtio_balloon *vb) 283 { 284 unsigned long events[NR_VM_EVENT_ITEMS]; 285 struct sysinfo i; 286 unsigned int idx = 0; 287 long available; 288 unsigned long caches; 289 290 all_vm_events(events); 291 si_meminfo(&i); 292 293 available = si_mem_available(); 294 caches = global_node_page_state(NR_FILE_PAGES); 295 296 #ifdef CONFIG_VM_EVENT_COUNTERS 297 update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, 298 pages_to_bytes(events[PSWPIN])); 299 update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, 300 pages_to_bytes(events[PSWPOUT])); 301 update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); 302 update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); 303 #ifdef CONFIG_HUGETLB_PAGE 304 update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC, 305 events[HTLB_BUDDY_PGALLOC]); 306 update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGFAIL, 307 events[HTLB_BUDDY_PGALLOC_FAIL]); 308 #endif 309 #endif 310 update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, 311 pages_to_bytes(i.freeram)); 312 update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, 313 pages_to_bytes(i.totalram)); 314 update_stat(vb, idx++, VIRTIO_BALLOON_S_AVAIL, 315 pages_to_bytes(available)); 316 update_stat(vb, idx++, VIRTIO_BALLOON_S_CACHES, 317 pages_to_bytes(caches)); 318 319 return idx; 320 } 321 322 /* 323 * While most virtqueues communicate guest-initiated requests to the hypervisor, 324 * the stats queue operates in reverse. The driver initializes the virtqueue 325 * with a single buffer. From that point forward, all conversations consist of 326 * a hypervisor request (a call to this function) which directs us to refill 327 * the virtqueue with a fresh stats buffer. Since stats collection can sleep, 328 * we delegate the job to a freezable workqueue that will do the actual work via 329 * stats_handle_request(). 330 */ 331 static void stats_request(struct virtqueue *vq) 332 { 333 struct virtio_balloon *vb = vq->vdev->priv; 334 335 spin_lock(&vb->stop_update_lock); 336 if (!vb->stop_update) 337 queue_work(system_freezable_wq, &vb->update_balloon_stats_work); 338 spin_unlock(&vb->stop_update_lock); 339 } 340 341 static void stats_handle_request(struct virtio_balloon *vb) 342 { 343 struct virtqueue *vq; 344 struct scatterlist sg; 345 unsigned int len, num_stats; 346 347 num_stats = update_balloon_stats(vb); 348 349 vq = vb->stats_vq; 350 if (!virtqueue_get_buf(vq, &len)) 351 return; 352 sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); 353 virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); 354 virtqueue_kick(vq); 355 } 356 357 static inline s64 towards_target(struct virtio_balloon *vb) 358 { 359 s64 target; 360 u32 num_pages; 361 362 virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, 363 &num_pages); 364 365 /* Legacy balloon config space is LE, unlike all other devices. */ 366 if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) 367 num_pages = le32_to_cpu((__force __le32)num_pages); 368 369 target = num_pages; 370 return target - vb->num_pages; 371 } 372 373 /* Gives back @num_to_return blocks of free pages to mm. */ 374 static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb, 375 unsigned long num_to_return) 376 { 377 struct page *page; 378 unsigned long num_returned; 379 380 spin_lock_irq(&vb->free_page_list_lock); 381 for (num_returned = 0; num_returned < num_to_return; num_returned++) { 382 page = balloon_page_pop(&vb->free_page_list); 383 if (!page) 384 break; 385 free_pages((unsigned long)page_address(page), 386 VIRTIO_BALLOON_HINT_BLOCK_ORDER); 387 } 388 vb->num_free_page_blocks -= num_returned; 389 spin_unlock_irq(&vb->free_page_list_lock); 390 391 return num_returned; 392 } 393 394 static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb) 395 { 396 if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) 397 return; 398 399 /* No need to queue the work if the bit was already set. */ 400 if (test_and_set_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID, 401 &vb->config_read_bitmap)) 402 return; 403 404 queue_work(vb->balloon_wq, &vb->report_free_page_work); 405 } 406 407 static void virtballoon_changed(struct virtio_device *vdev) 408 { 409 struct virtio_balloon *vb = vdev->priv; 410 unsigned long flags; 411 412 spin_lock_irqsave(&vb->stop_update_lock, flags); 413 if (!vb->stop_update) { 414 queue_work(system_freezable_wq, 415 &vb->update_balloon_size_work); 416 virtio_balloon_queue_free_page_work(vb); 417 } 418 spin_unlock_irqrestore(&vb->stop_update_lock, flags); 419 } 420 421 static void update_balloon_size(struct virtio_balloon *vb) 422 { 423 u32 actual = vb->num_pages; 424 425 /* Legacy balloon config space is LE, unlike all other devices. */ 426 if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) 427 actual = (__force u32)cpu_to_le32(actual); 428 429 virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual, 430 &actual); 431 } 432 433 static void update_balloon_stats_func(struct work_struct *work) 434 { 435 struct virtio_balloon *vb; 436 437 vb = container_of(work, struct virtio_balloon, 438 update_balloon_stats_work); 439 stats_handle_request(vb); 440 } 441 442 static void update_balloon_size_func(struct work_struct *work) 443 { 444 struct virtio_balloon *vb; 445 s64 diff; 446 447 vb = container_of(work, struct virtio_balloon, 448 update_balloon_size_work); 449 diff = towards_target(vb); 450 451 if (!diff) 452 return; 453 454 if (diff > 0) 455 diff -= fill_balloon(vb, diff); 456 else 457 diff += leak_balloon(vb, -diff); 458 update_balloon_size(vb); 459 460 if (diff) 461 queue_work(system_freezable_wq, work); 462 } 463 464 static int init_vqs(struct virtio_balloon *vb) 465 { 466 struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX]; 467 vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX]; 468 const char *names[VIRTIO_BALLOON_VQ_MAX]; 469 int err; 470 471 /* 472 * Inflateq and deflateq are used unconditionally. The names[] 473 * will be NULL if the related feature is not enabled, which will 474 * cause no allocation for the corresponding virtqueue in find_vqs. 475 */ 476 callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack; 477 names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate"; 478 callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack; 479 names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate"; 480 callbacks[VIRTIO_BALLOON_VQ_STATS] = NULL; 481 names[VIRTIO_BALLOON_VQ_STATS] = NULL; 482 callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; 483 names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; 484 485 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { 486 names[VIRTIO_BALLOON_VQ_STATS] = "stats"; 487 callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request; 488 } 489 490 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { 491 names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq"; 492 callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; 493 } 494 495 err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, 496 vqs, callbacks, names, NULL, NULL); 497 if (err) 498 return err; 499 500 vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE]; 501 vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE]; 502 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { 503 struct scatterlist sg; 504 unsigned int num_stats; 505 vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS]; 506 507 /* 508 * Prime this virtqueue with one buffer so the hypervisor can 509 * use it to signal us later (it can't be broken yet!). 510 */ 511 num_stats = update_balloon_stats(vb); 512 513 sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); 514 err = virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, 515 GFP_KERNEL); 516 if (err) { 517 dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n", 518 __func__); 519 return err; 520 } 521 virtqueue_kick(vb->stats_vq); 522 } 523 524 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) 525 vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE]; 526 527 return 0; 528 } 529 530 static u32 virtio_balloon_cmd_id_received(struct virtio_balloon *vb) 531 { 532 if (test_and_clear_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID, 533 &vb->config_read_bitmap)) 534 virtio_cread(vb->vdev, struct virtio_balloon_config, 535 free_page_report_cmd_id, 536 &vb->cmd_id_received_cache); 537 538 return vb->cmd_id_received_cache; 539 } 540 541 static int send_cmd_id_start(struct virtio_balloon *vb) 542 { 543 struct scatterlist sg; 544 struct virtqueue *vq = vb->free_page_vq; 545 int err, unused; 546 547 /* Detach all the used buffers from the vq */ 548 while (virtqueue_get_buf(vq, &unused)) 549 ; 550 551 vb->cmd_id_active = virtio32_to_cpu(vb->vdev, 552 virtio_balloon_cmd_id_received(vb)); 553 sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active)); 554 err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL); 555 if (!err) 556 virtqueue_kick(vq); 557 return err; 558 } 559 560 static int send_cmd_id_stop(struct virtio_balloon *vb) 561 { 562 struct scatterlist sg; 563 struct virtqueue *vq = vb->free_page_vq; 564 int err, unused; 565 566 /* Detach all the used buffers from the vq */ 567 while (virtqueue_get_buf(vq, &unused)) 568 ; 569 570 sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop)); 571 err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL); 572 if (!err) 573 virtqueue_kick(vq); 574 return err; 575 } 576 577 static int get_free_page_and_send(struct virtio_balloon *vb) 578 { 579 struct virtqueue *vq = vb->free_page_vq; 580 struct page *page; 581 struct scatterlist sg; 582 int err, unused; 583 void *p; 584 585 /* Detach all the used buffers from the vq */ 586 while (virtqueue_get_buf(vq, &unused)) 587 ; 588 589 page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG, 590 VIRTIO_BALLOON_HINT_BLOCK_ORDER); 591 /* 592 * When the allocation returns NULL, it indicates that we have got all 593 * the possible free pages, so return -EINTR to stop. 594 */ 595 if (!page) 596 return -EINTR; 597 598 p = page_address(page); 599 sg_init_one(&sg, p, VIRTIO_BALLOON_HINT_BLOCK_BYTES); 600 /* There is always 1 entry reserved for the cmd id to use. */ 601 if (vq->num_free > 1) { 602 err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL); 603 if (unlikely(err)) { 604 free_pages((unsigned long)p, 605 VIRTIO_BALLOON_HINT_BLOCK_ORDER); 606 return err; 607 } 608 virtqueue_kick(vq); 609 spin_lock_irq(&vb->free_page_list_lock); 610 balloon_page_push(&vb->free_page_list, page); 611 vb->num_free_page_blocks++; 612 spin_unlock_irq(&vb->free_page_list_lock); 613 } else { 614 /* 615 * The vq has no available entry to add this page block, so 616 * just free it. 617 */ 618 free_pages((unsigned long)p, VIRTIO_BALLOON_HINT_BLOCK_ORDER); 619 } 620 621 return 0; 622 } 623 624 static int send_free_pages(struct virtio_balloon *vb) 625 { 626 int err; 627 u32 cmd_id_active; 628 629 while (1) { 630 /* 631 * If a stop id or a new cmd id was just received from host, 632 * stop the reporting. 633 */ 634 cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active); 635 if (unlikely(cmd_id_active != 636 virtio_balloon_cmd_id_received(vb))) 637 break; 638 639 /* 640 * The free page blocks are allocated and sent to host one by 641 * one. 642 */ 643 err = get_free_page_and_send(vb); 644 if (err == -EINTR) 645 break; 646 else if (unlikely(err)) 647 return err; 648 } 649 650 return 0; 651 } 652 653 static void virtio_balloon_report_free_page(struct virtio_balloon *vb) 654 { 655 int err; 656 struct device *dev = &vb->vdev->dev; 657 658 /* Start by sending the received cmd id to host with an outbuf. */ 659 err = send_cmd_id_start(vb); 660 if (unlikely(err)) 661 dev_err(dev, "Failed to send a start id, err = %d\n", err); 662 663 err = send_free_pages(vb); 664 if (unlikely(err)) 665 dev_err(dev, "Failed to send a free page, err = %d\n", err); 666 667 /* End by sending a stop id to host with an outbuf. */ 668 err = send_cmd_id_stop(vb); 669 if (unlikely(err)) 670 dev_err(dev, "Failed to send a stop id, err = %d\n", err); 671 } 672 673 static void report_free_page_func(struct work_struct *work) 674 { 675 struct virtio_balloon *vb = container_of(work, struct virtio_balloon, 676 report_free_page_work); 677 u32 cmd_id_received; 678 679 cmd_id_received = virtio_balloon_cmd_id_received(vb); 680 if (cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) { 681 /* Pass ULONG_MAX to give back all the free pages */ 682 return_free_pages_to_mm(vb, ULONG_MAX); 683 } else if (cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP && 684 cmd_id_received != 685 virtio32_to_cpu(vb->vdev, vb->cmd_id_active)) { 686 virtio_balloon_report_free_page(vb); 687 } 688 } 689 690 #ifdef CONFIG_BALLOON_COMPACTION 691 /* 692 * virtballoon_migratepage - perform the balloon page migration on behalf of 693 * a compation thread. (called under page lock) 694 * @vb_dev_info: the balloon device 695 * @newpage: page that will replace the isolated page after migration finishes. 696 * @page : the isolated (old) page that is about to be migrated to newpage. 697 * @mode : compaction mode -- not used for balloon page migration. 698 * 699 * After a ballooned page gets isolated by compaction procedures, this is the 700 * function that performs the page migration on behalf of a compaction thread 701 * The page migration for virtio balloon is done in a simple swap fashion which 702 * follows these two macro steps: 703 * 1) insert newpage into vb->pages list and update the host about it; 704 * 2) update the host about the old page removed from vb->pages list; 705 * 706 * This function preforms the balloon page migration task. 707 * Called through balloon_mapping->a_ops->migratepage 708 */ 709 static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, 710 struct page *newpage, struct page *page, enum migrate_mode mode) 711 { 712 struct virtio_balloon *vb = container_of(vb_dev_info, 713 struct virtio_balloon, vb_dev_info); 714 unsigned long flags; 715 716 /* 717 * In order to avoid lock contention while migrating pages concurrently 718 * to leak_balloon() or fill_balloon() we just give up the balloon_lock 719 * this turn, as it is easier to retry the page migration later. 720 * This also prevents fill_balloon() getting stuck into a mutex 721 * recursion in the case it ends up triggering memory compaction 722 * while it is attempting to inflate the ballon. 723 */ 724 if (!mutex_trylock(&vb->balloon_lock)) 725 return -EAGAIN; 726 727 get_page(newpage); /* balloon reference */ 728 729 /* 730 * When we migrate a page to a different zone and adjusted the 731 * managed page count when inflating, we have to fixup the count of 732 * both involved zones. 733 */ 734 if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM) && 735 page_zone(page) != page_zone(newpage)) { 736 adjust_managed_page_count(page, 1); 737 adjust_managed_page_count(newpage, -1); 738 } 739 740 /* balloon's page migration 1st step -- inflate "newpage" */ 741 spin_lock_irqsave(&vb_dev_info->pages_lock, flags); 742 balloon_page_insert(vb_dev_info, newpage); 743 vb_dev_info->isolated_pages--; 744 __count_vm_event(BALLOON_MIGRATE); 745 spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); 746 vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; 747 set_page_pfns(vb, vb->pfns, newpage); 748 tell_host(vb, vb->inflate_vq); 749 750 /* balloon's page migration 2nd step -- deflate "page" */ 751 spin_lock_irqsave(&vb_dev_info->pages_lock, flags); 752 balloon_page_delete(page); 753 spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); 754 vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; 755 set_page_pfns(vb, vb->pfns, page); 756 tell_host(vb, vb->deflate_vq); 757 758 mutex_unlock(&vb->balloon_lock); 759 760 put_page(page); /* balloon reference */ 761 762 return MIGRATEPAGE_SUCCESS; 763 } 764 765 static int balloon_init_fs_context(struct fs_context *fc) 766 { 767 return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM; 768 } 769 770 static struct file_system_type balloon_fs = { 771 .name = "balloon-kvm", 772 .init_fs_context = balloon_init_fs_context, 773 .kill_sb = kill_anon_super, 774 }; 775 776 #endif /* CONFIG_BALLOON_COMPACTION */ 777 778 static unsigned long shrink_free_pages(struct virtio_balloon *vb, 779 unsigned long pages_to_free) 780 { 781 unsigned long blocks_to_free, blocks_freed; 782 783 pages_to_free = round_up(pages_to_free, 784 VIRTIO_BALLOON_HINT_BLOCK_PAGES); 785 blocks_to_free = pages_to_free / VIRTIO_BALLOON_HINT_BLOCK_PAGES; 786 blocks_freed = return_free_pages_to_mm(vb, blocks_to_free); 787 788 return blocks_freed * VIRTIO_BALLOON_HINT_BLOCK_PAGES; 789 } 790 791 static unsigned long leak_balloon_pages(struct virtio_balloon *vb, 792 unsigned long pages_to_free) 793 { 794 return leak_balloon(vb, pages_to_free * VIRTIO_BALLOON_PAGES_PER_PAGE) / 795 VIRTIO_BALLOON_PAGES_PER_PAGE; 796 } 797 798 static unsigned long shrink_balloon_pages(struct virtio_balloon *vb, 799 unsigned long pages_to_free) 800 { 801 unsigned long pages_freed = 0; 802 803 /* 804 * One invocation of leak_balloon can deflate at most 805 * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it 806 * multiple times to deflate pages till reaching pages_to_free. 807 */ 808 while (vb->num_pages && pages_freed < pages_to_free) 809 pages_freed += leak_balloon_pages(vb, 810 pages_to_free - pages_freed); 811 812 update_balloon_size(vb); 813 814 return pages_freed; 815 } 816 817 static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, 818 struct shrink_control *sc) 819 { 820 unsigned long pages_to_free, pages_freed = 0; 821 struct virtio_balloon *vb = container_of(shrinker, 822 struct virtio_balloon, shrinker); 823 824 pages_to_free = sc->nr_to_scan; 825 826 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) 827 pages_freed = shrink_free_pages(vb, pages_to_free); 828 829 if (pages_freed >= pages_to_free) 830 return pages_freed; 831 832 pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed); 833 834 return pages_freed; 835 } 836 837 static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, 838 struct shrink_control *sc) 839 { 840 struct virtio_balloon *vb = container_of(shrinker, 841 struct virtio_balloon, shrinker); 842 unsigned long count; 843 844 count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE; 845 count += vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES; 846 847 return count; 848 } 849 850 static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb) 851 { 852 unregister_shrinker(&vb->shrinker); 853 } 854 855 static int virtio_balloon_register_shrinker(struct virtio_balloon *vb) 856 { 857 vb->shrinker.scan_objects = virtio_balloon_shrinker_scan; 858 vb->shrinker.count_objects = virtio_balloon_shrinker_count; 859 vb->shrinker.seeks = DEFAULT_SEEKS; 860 861 return register_shrinker(&vb->shrinker); 862 } 863 864 static int virtballoon_probe(struct virtio_device *vdev) 865 { 866 struct virtio_balloon *vb; 867 __u32 poison_val; 868 int err; 869 870 if (!vdev->config->get) { 871 dev_err(&vdev->dev, "%s failure: config access disabled\n", 872 __func__); 873 return -EINVAL; 874 } 875 876 vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL); 877 if (!vb) { 878 err = -ENOMEM; 879 goto out; 880 } 881 882 INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func); 883 INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func); 884 spin_lock_init(&vb->stop_update_lock); 885 mutex_init(&vb->balloon_lock); 886 init_waitqueue_head(&vb->acked); 887 vb->vdev = vdev; 888 889 balloon_devinfo_init(&vb->vb_dev_info); 890 891 err = init_vqs(vb); 892 if (err) 893 goto out_free_vb; 894 895 #ifdef CONFIG_BALLOON_COMPACTION 896 balloon_mnt = kern_mount(&balloon_fs); 897 if (IS_ERR(balloon_mnt)) { 898 err = PTR_ERR(balloon_mnt); 899 goto out_del_vqs; 900 } 901 902 vb->vb_dev_info.migratepage = virtballoon_migratepage; 903 vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); 904 if (IS_ERR(vb->vb_dev_info.inode)) { 905 err = PTR_ERR(vb->vb_dev_info.inode); 906 goto out_kern_unmount; 907 } 908 vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; 909 #endif 910 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { 911 /* 912 * There is always one entry reserved for cmd id, so the ring 913 * size needs to be at least two to report free page hints. 914 */ 915 if (virtqueue_get_vring_size(vb->free_page_vq) < 2) { 916 err = -ENOSPC; 917 goto out_iput; 918 } 919 vb->balloon_wq = alloc_workqueue("balloon-wq", 920 WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); 921 if (!vb->balloon_wq) { 922 err = -ENOMEM; 923 goto out_iput; 924 } 925 INIT_WORK(&vb->report_free_page_work, report_free_page_func); 926 vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP; 927 vb->cmd_id_active = cpu_to_virtio32(vb->vdev, 928 VIRTIO_BALLOON_CMD_ID_STOP); 929 vb->cmd_id_stop = cpu_to_virtio32(vb->vdev, 930 VIRTIO_BALLOON_CMD_ID_STOP); 931 spin_lock_init(&vb->free_page_list_lock); 932 INIT_LIST_HEAD(&vb->free_page_list); 933 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) { 934 memset(&poison_val, PAGE_POISON, sizeof(poison_val)); 935 virtio_cwrite(vb->vdev, struct virtio_balloon_config, 936 poison_val, &poison_val); 937 } 938 } 939 /* 940 * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a 941 * shrinker needs to be registered to relieve memory pressure. 942 */ 943 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) { 944 err = virtio_balloon_register_shrinker(vb); 945 if (err) 946 goto out_del_balloon_wq; 947 } 948 virtio_device_ready(vdev); 949 950 if (towards_target(vb)) 951 virtballoon_changed(vdev); 952 return 0; 953 954 out_del_balloon_wq: 955 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) 956 destroy_workqueue(vb->balloon_wq); 957 out_iput: 958 #ifdef CONFIG_BALLOON_COMPACTION 959 iput(vb->vb_dev_info.inode); 960 out_kern_unmount: 961 kern_unmount(balloon_mnt); 962 #endif 963 out_del_vqs: 964 vdev->config->del_vqs(vdev); 965 out_free_vb: 966 kfree(vb); 967 out: 968 return err; 969 } 970 971 static void remove_common(struct virtio_balloon *vb) 972 { 973 /* There might be pages left in the balloon: free them. */ 974 while (vb->num_pages) 975 leak_balloon(vb, vb->num_pages); 976 update_balloon_size(vb); 977 978 /* There might be free pages that are being reported: release them. */ 979 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) 980 return_free_pages_to_mm(vb, ULONG_MAX); 981 982 /* Now we reset the device so we can clean up the queues. */ 983 vb->vdev->config->reset(vb->vdev); 984 985 vb->vdev->config->del_vqs(vb->vdev); 986 } 987 988 static void virtballoon_remove(struct virtio_device *vdev) 989 { 990 struct virtio_balloon *vb = vdev->priv; 991 992 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) 993 virtio_balloon_unregister_shrinker(vb); 994 spin_lock_irq(&vb->stop_update_lock); 995 vb->stop_update = true; 996 spin_unlock_irq(&vb->stop_update_lock); 997 cancel_work_sync(&vb->update_balloon_size_work); 998 cancel_work_sync(&vb->update_balloon_stats_work); 999 1000 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { 1001 cancel_work_sync(&vb->report_free_page_work); 1002 destroy_workqueue(vb->balloon_wq); 1003 } 1004 1005 remove_common(vb); 1006 #ifdef CONFIG_BALLOON_COMPACTION 1007 if (vb->vb_dev_info.inode) 1008 iput(vb->vb_dev_info.inode); 1009 1010 kern_unmount(balloon_mnt); 1011 #endif 1012 kfree(vb); 1013 } 1014 1015 #ifdef CONFIG_PM_SLEEP 1016 static int virtballoon_freeze(struct virtio_device *vdev) 1017 { 1018 struct virtio_balloon *vb = vdev->priv; 1019 1020 /* 1021 * The workqueue is already frozen by the PM core before this 1022 * function is called. 1023 */ 1024 remove_common(vb); 1025 return 0; 1026 } 1027 1028 static int virtballoon_restore(struct virtio_device *vdev) 1029 { 1030 struct virtio_balloon *vb = vdev->priv; 1031 int ret; 1032 1033 ret = init_vqs(vdev->priv); 1034 if (ret) 1035 return ret; 1036 1037 virtio_device_ready(vdev); 1038 1039 if (towards_target(vb)) 1040 virtballoon_changed(vdev); 1041 update_balloon_size(vb); 1042 return 0; 1043 } 1044 #endif 1045 1046 static int virtballoon_validate(struct virtio_device *vdev) 1047 { 1048 if (!page_poisoning_enabled()) 1049 __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON); 1050 1051 __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM); 1052 return 0; 1053 } 1054 1055 static unsigned int features[] = { 1056 VIRTIO_BALLOON_F_MUST_TELL_HOST, 1057 VIRTIO_BALLOON_F_STATS_VQ, 1058 VIRTIO_BALLOON_F_DEFLATE_ON_OOM, 1059 VIRTIO_BALLOON_F_FREE_PAGE_HINT, 1060 VIRTIO_BALLOON_F_PAGE_POISON, 1061 }; 1062 1063 static struct virtio_driver virtio_balloon_driver = { 1064 .feature_table = features, 1065 .feature_table_size = ARRAY_SIZE(features), 1066 .driver.name = KBUILD_MODNAME, 1067 .driver.owner = THIS_MODULE, 1068 .id_table = id_table, 1069 .validate = virtballoon_validate, 1070 .probe = virtballoon_probe, 1071 .remove = virtballoon_remove, 1072 .config_changed = virtballoon_changed, 1073 #ifdef CONFIG_PM_SLEEP 1074 .freeze = virtballoon_freeze, 1075 .restore = virtballoon_restore, 1076 #endif 1077 }; 1078 1079 module_virtio_driver(virtio_balloon_driver); 1080 MODULE_DEVICE_TABLE(virtio, id_table); 1081 MODULE_DESCRIPTION("Virtio balloon driver"); 1082 MODULE_LICENSE("GPL"); 1083