1 /* 2 * Virtio balloon implementation, inspired by Dor Laor and Marcelo 3 * Tosatti's implementations. 4 * 5 * Copyright 2008 Rusty Russell IBM Corporation 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22 #include <linux/virtio.h> 23 #include <linux/virtio_balloon.h> 24 #include <linux/swap.h> 25 #include <linux/workqueue.h> 26 #include <linux/delay.h> 27 #include <linux/slab.h> 28 #include <linux/module.h> 29 #include <linux/balloon_compaction.h> 30 #include <linux/wait.h> 31 #include <linux/mm.h> 32 #include <linux/mount.h> 33 #include <linux/magic.h> 34 35 /* 36 * Balloon device works in 4K page units. So each page is pointed to by 37 * multiple balloon pages. All memory counters in this driver are in balloon 38 * page units. 39 */ 40 #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) 41 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 42 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 43 44 #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \ 45 __GFP_NOMEMALLOC) 46 /* The order of free page blocks to report to host */ 47 #define VIRTIO_BALLOON_FREE_PAGE_ORDER (MAX_ORDER - 1) 48 /* The size of a free page block in bytes */ 49 #define VIRTIO_BALLOON_FREE_PAGE_SIZE \ 50 (1 << (VIRTIO_BALLOON_FREE_PAGE_ORDER + PAGE_SHIFT)) 51 52 #ifdef CONFIG_BALLOON_COMPACTION 53 static struct vfsmount *balloon_mnt; 54 #endif 55 56 enum virtio_balloon_vq { 57 VIRTIO_BALLOON_VQ_INFLATE, 58 VIRTIO_BALLOON_VQ_DEFLATE, 59 VIRTIO_BALLOON_VQ_STATS, 60 VIRTIO_BALLOON_VQ_FREE_PAGE, 61 VIRTIO_BALLOON_VQ_MAX 62 }; 63 64 struct virtio_balloon { 65 struct virtio_device *vdev; 66 struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; 67 68 /* Balloon's own wq for cpu-intensive work items */ 69 struct workqueue_struct *balloon_wq; 70 /* The free page reporting work item submitted to the balloon wq */ 71 struct work_struct report_free_page_work; 72 73 /* The balloon servicing is delegated to a freezable workqueue. */ 74 struct work_struct update_balloon_stats_work; 75 struct work_struct update_balloon_size_work; 76 77 /* Prevent updating balloon when it is being canceled. */ 78 spinlock_t stop_update_lock; 79 bool stop_update; 80 81 /* The list of allocated free pages, waiting to be given back to mm */ 82 struct list_head free_page_list; 83 spinlock_t free_page_list_lock; 84 /* The number of free page blocks on the above list */ 85 unsigned long num_free_page_blocks; 86 /* The cmd id received from host */ 87 u32 cmd_id_received; 88 /* The cmd id that is actively in use */ 89 __virtio32 cmd_id_active; 90 /* Buffer to store the stop sign */ 91 __virtio32 cmd_id_stop; 92 93 /* Waiting for host to ack the pages we released. */ 94 wait_queue_head_t acked; 95 96 /* Number of balloon pages we've told the Host we're not using. */ 97 unsigned int num_pages; 98 /* 99 * The pages we've told the Host we're not using are enqueued 100 * at vb_dev_info->pages list. 101 * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE 102 * to num_pages above. 103 */ 104 struct balloon_dev_info vb_dev_info; 105 106 /* Synchronize access/update to this struct virtio_balloon elements */ 107 struct mutex balloon_lock; 108 109 /* The array of pfns we tell the Host about. */ 110 unsigned int num_pfns; 111 __virtio32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX]; 112 113 /* Memory statistics */ 114 struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; 115 116 /* To register a shrinker to shrink memory upon memory pressure */ 117 struct shrinker shrinker; 118 }; 119 120 static struct virtio_device_id id_table[] = { 121 { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID }, 122 { 0 }, 123 }; 124 125 static u32 page_to_balloon_pfn(struct page *page) 126 { 127 unsigned long pfn = page_to_pfn(page); 128 129 BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT); 130 /* Convert pfn from Linux page size to balloon page size. */ 131 return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE; 132 } 133 134 static void balloon_ack(struct virtqueue *vq) 135 { 136 struct virtio_balloon *vb = vq->vdev->priv; 137 138 wake_up(&vb->acked); 139 } 140 141 static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) 142 { 143 struct scatterlist sg; 144 unsigned int len; 145 146 sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); 147 148 /* We should always be able to add one buffer to an empty queue. */ 149 virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); 150 virtqueue_kick(vq); 151 152 /* When host has read buffer, this completes via balloon_ack */ 153 wait_event(vb->acked, virtqueue_get_buf(vq, &len)); 154 155 } 156 157 static void set_page_pfns(struct virtio_balloon *vb, 158 __virtio32 pfns[], struct page *page) 159 { 160 unsigned int i; 161 162 /* 163 * Set balloon pfns pointing at this page. 164 * Note that the first pfn points at start of the page. 165 */ 166 for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++) 167 pfns[i] = cpu_to_virtio32(vb->vdev, 168 page_to_balloon_pfn(page) + i); 169 } 170 171 static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) 172 { 173 unsigned num_allocated_pages; 174 unsigned num_pfns; 175 struct page *page; 176 LIST_HEAD(pages); 177 178 /* We can only do one array worth at a time. */ 179 num = min(num, ARRAY_SIZE(vb->pfns)); 180 181 for (num_pfns = 0; num_pfns < num; 182 num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { 183 struct page *page = balloon_page_alloc(); 184 185 if (!page) { 186 dev_info_ratelimited(&vb->vdev->dev, 187 "Out of puff! Can't get %u pages\n", 188 VIRTIO_BALLOON_PAGES_PER_PAGE); 189 /* Sleep for at least 1/5 of a second before retry. */ 190 msleep(200); 191 break; 192 } 193 194 balloon_page_push(&pages, page); 195 } 196 197 mutex_lock(&vb->balloon_lock); 198 199 vb->num_pfns = 0; 200 201 while ((page = balloon_page_pop(&pages))) { 202 balloon_page_enqueue(&vb->vb_dev_info, page); 203 204 set_page_pfns(vb, vb->pfns + vb->num_pfns, page); 205 vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; 206 if (!virtio_has_feature(vb->vdev, 207 VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) 208 adjust_managed_page_count(page, -1); 209 vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE; 210 } 211 212 num_allocated_pages = vb->num_pfns; 213 /* Did we get any? */ 214 if (vb->num_pfns != 0) 215 tell_host(vb, vb->inflate_vq); 216 mutex_unlock(&vb->balloon_lock); 217 218 return num_allocated_pages; 219 } 220 221 static void release_pages_balloon(struct virtio_balloon *vb, 222 struct list_head *pages) 223 { 224 struct page *page, *next; 225 226 list_for_each_entry_safe(page, next, pages, lru) { 227 if (!virtio_has_feature(vb->vdev, 228 VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) 229 adjust_managed_page_count(page, 1); 230 list_del(&page->lru); 231 put_page(page); /* balloon reference */ 232 } 233 } 234 235 static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) 236 { 237 unsigned num_freed_pages; 238 struct page *page; 239 struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; 240 LIST_HEAD(pages); 241 242 /* We can only do one array worth at a time. */ 243 num = min(num, ARRAY_SIZE(vb->pfns)); 244 245 mutex_lock(&vb->balloon_lock); 246 /* We can't release more pages than taken */ 247 num = min(num, (size_t)vb->num_pages); 248 for (vb->num_pfns = 0; vb->num_pfns < num; 249 vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { 250 page = balloon_page_dequeue(vb_dev_info); 251 if (!page) 252 break; 253 set_page_pfns(vb, vb->pfns + vb->num_pfns, page); 254 list_add(&page->lru, &pages); 255 vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE; 256 } 257 258 num_freed_pages = vb->num_pfns; 259 /* 260 * Note that if 261 * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); 262 * is true, we *have* to do it in this order 263 */ 264 if (vb->num_pfns != 0) 265 tell_host(vb, vb->deflate_vq); 266 release_pages_balloon(vb, &pages); 267 mutex_unlock(&vb->balloon_lock); 268 return num_freed_pages; 269 } 270 271 static inline void update_stat(struct virtio_balloon *vb, int idx, 272 u16 tag, u64 val) 273 { 274 BUG_ON(idx >= VIRTIO_BALLOON_S_NR); 275 vb->stats[idx].tag = cpu_to_virtio16(vb->vdev, tag); 276 vb->stats[idx].val = cpu_to_virtio64(vb->vdev, val); 277 } 278 279 #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT) 280 281 static unsigned int update_balloon_stats(struct virtio_balloon *vb) 282 { 283 unsigned long events[NR_VM_EVENT_ITEMS]; 284 struct sysinfo i; 285 unsigned int idx = 0; 286 long available; 287 unsigned long caches; 288 289 all_vm_events(events); 290 si_meminfo(&i); 291 292 available = si_mem_available(); 293 caches = global_node_page_state(NR_FILE_PAGES); 294 295 #ifdef CONFIG_VM_EVENT_COUNTERS 296 update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, 297 pages_to_bytes(events[PSWPIN])); 298 update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, 299 pages_to_bytes(events[PSWPOUT])); 300 update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); 301 update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); 302 #ifdef CONFIG_HUGETLB_PAGE 303 update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC, 304 events[HTLB_BUDDY_PGALLOC]); 305 update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGFAIL, 306 events[HTLB_BUDDY_PGALLOC_FAIL]); 307 #endif 308 #endif 309 update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, 310 pages_to_bytes(i.freeram)); 311 update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, 312 pages_to_bytes(i.totalram)); 313 update_stat(vb, idx++, VIRTIO_BALLOON_S_AVAIL, 314 pages_to_bytes(available)); 315 update_stat(vb, idx++, VIRTIO_BALLOON_S_CACHES, 316 pages_to_bytes(caches)); 317 318 return idx; 319 } 320 321 /* 322 * While most virtqueues communicate guest-initiated requests to the hypervisor, 323 * the stats queue operates in reverse. The driver initializes the virtqueue 324 * with a single buffer. From that point forward, all conversations consist of 325 * a hypervisor request (a call to this function) which directs us to refill 326 * the virtqueue with a fresh stats buffer. Since stats collection can sleep, 327 * we delegate the job to a freezable workqueue that will do the actual work via 328 * stats_handle_request(). 329 */ 330 static void stats_request(struct virtqueue *vq) 331 { 332 struct virtio_balloon *vb = vq->vdev->priv; 333 334 spin_lock(&vb->stop_update_lock); 335 if (!vb->stop_update) 336 queue_work(system_freezable_wq, &vb->update_balloon_stats_work); 337 spin_unlock(&vb->stop_update_lock); 338 } 339 340 static void stats_handle_request(struct virtio_balloon *vb) 341 { 342 struct virtqueue *vq; 343 struct scatterlist sg; 344 unsigned int len, num_stats; 345 346 num_stats = update_balloon_stats(vb); 347 348 vq = vb->stats_vq; 349 if (!virtqueue_get_buf(vq, &len)) 350 return; 351 sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); 352 virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); 353 virtqueue_kick(vq); 354 } 355 356 static inline s64 towards_target(struct virtio_balloon *vb) 357 { 358 s64 target; 359 u32 num_pages; 360 361 virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, 362 &num_pages); 363 364 /* Legacy balloon config space is LE, unlike all other devices. */ 365 if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) 366 num_pages = le32_to_cpu((__force __le32)num_pages); 367 368 target = num_pages; 369 return target - vb->num_pages; 370 } 371 372 /* Gives back @num_to_return blocks of free pages to mm. */ 373 static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb, 374 unsigned long num_to_return) 375 { 376 struct page *page; 377 unsigned long num_returned; 378 379 spin_lock_irq(&vb->free_page_list_lock); 380 for (num_returned = 0; num_returned < num_to_return; num_returned++) { 381 page = balloon_page_pop(&vb->free_page_list); 382 if (!page) 383 break; 384 free_pages((unsigned long)page_address(page), 385 VIRTIO_BALLOON_FREE_PAGE_ORDER); 386 } 387 vb->num_free_page_blocks -= num_returned; 388 spin_unlock_irq(&vb->free_page_list_lock); 389 390 return num_returned; 391 } 392 393 static void virtballoon_changed(struct virtio_device *vdev) 394 { 395 struct virtio_balloon *vb = vdev->priv; 396 unsigned long flags; 397 s64 diff = towards_target(vb); 398 399 if (diff) { 400 spin_lock_irqsave(&vb->stop_update_lock, flags); 401 if (!vb->stop_update) 402 queue_work(system_freezable_wq, 403 &vb->update_balloon_size_work); 404 spin_unlock_irqrestore(&vb->stop_update_lock, flags); 405 } 406 407 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { 408 virtio_cread(vdev, struct virtio_balloon_config, 409 free_page_report_cmd_id, &vb->cmd_id_received); 410 if (vb->cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) { 411 /* Pass ULONG_MAX to give back all the free pages */ 412 return_free_pages_to_mm(vb, ULONG_MAX); 413 } else if (vb->cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP && 414 vb->cmd_id_received != 415 virtio32_to_cpu(vdev, vb->cmd_id_active)) { 416 spin_lock_irqsave(&vb->stop_update_lock, flags); 417 if (!vb->stop_update) { 418 queue_work(vb->balloon_wq, 419 &vb->report_free_page_work); 420 } 421 spin_unlock_irqrestore(&vb->stop_update_lock, flags); 422 } 423 } 424 } 425 426 static void update_balloon_size(struct virtio_balloon *vb) 427 { 428 u32 actual = vb->num_pages; 429 430 /* Legacy balloon config space is LE, unlike all other devices. */ 431 if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) 432 actual = (__force u32)cpu_to_le32(actual); 433 434 virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual, 435 &actual); 436 } 437 438 static void update_balloon_stats_func(struct work_struct *work) 439 { 440 struct virtio_balloon *vb; 441 442 vb = container_of(work, struct virtio_balloon, 443 update_balloon_stats_work); 444 stats_handle_request(vb); 445 } 446 447 static void update_balloon_size_func(struct work_struct *work) 448 { 449 struct virtio_balloon *vb; 450 s64 diff; 451 452 vb = container_of(work, struct virtio_balloon, 453 update_balloon_size_work); 454 diff = towards_target(vb); 455 456 if (diff > 0) 457 diff -= fill_balloon(vb, diff); 458 else if (diff < 0) 459 diff += leak_balloon(vb, -diff); 460 update_balloon_size(vb); 461 462 if (diff) 463 queue_work(system_freezable_wq, work); 464 } 465 466 static int init_vqs(struct virtio_balloon *vb) 467 { 468 struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX]; 469 vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX]; 470 const char *names[VIRTIO_BALLOON_VQ_MAX]; 471 int err; 472 473 /* 474 * Inflateq and deflateq are used unconditionally. The names[] 475 * will be NULL if the related feature is not enabled, which will 476 * cause no allocation for the corresponding virtqueue in find_vqs. 477 */ 478 callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack; 479 names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate"; 480 callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack; 481 names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate"; 482 names[VIRTIO_BALLOON_VQ_STATS] = NULL; 483 names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; 484 485 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { 486 names[VIRTIO_BALLOON_VQ_STATS] = "stats"; 487 callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request; 488 } 489 490 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { 491 names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq"; 492 callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; 493 } 494 495 err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, 496 vqs, callbacks, names, NULL, NULL); 497 if (err) 498 return err; 499 500 vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE]; 501 vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE]; 502 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { 503 struct scatterlist sg; 504 unsigned int num_stats; 505 vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS]; 506 507 /* 508 * Prime this virtqueue with one buffer so the hypervisor can 509 * use it to signal us later (it can't be broken yet!). 510 */ 511 num_stats = update_balloon_stats(vb); 512 513 sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); 514 err = virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, 515 GFP_KERNEL); 516 if (err) { 517 dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n", 518 __func__); 519 return err; 520 } 521 virtqueue_kick(vb->stats_vq); 522 } 523 524 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) 525 vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE]; 526 527 return 0; 528 } 529 530 static int send_cmd_id_start(struct virtio_balloon *vb) 531 { 532 struct scatterlist sg; 533 struct virtqueue *vq = vb->free_page_vq; 534 int err, unused; 535 536 /* Detach all the used buffers from the vq */ 537 while (virtqueue_get_buf(vq, &unused)) 538 ; 539 540 vb->cmd_id_active = cpu_to_virtio32(vb->vdev, vb->cmd_id_received); 541 sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active)); 542 err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL); 543 if (!err) 544 virtqueue_kick(vq); 545 return err; 546 } 547 548 static int send_cmd_id_stop(struct virtio_balloon *vb) 549 { 550 struct scatterlist sg; 551 struct virtqueue *vq = vb->free_page_vq; 552 int err, unused; 553 554 /* Detach all the used buffers from the vq */ 555 while (virtqueue_get_buf(vq, &unused)) 556 ; 557 558 sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop)); 559 err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL); 560 if (!err) 561 virtqueue_kick(vq); 562 return err; 563 } 564 565 static int get_free_page_and_send(struct virtio_balloon *vb) 566 { 567 struct virtqueue *vq = vb->free_page_vq; 568 struct page *page; 569 struct scatterlist sg; 570 int err, unused; 571 void *p; 572 573 /* Detach all the used buffers from the vq */ 574 while (virtqueue_get_buf(vq, &unused)) 575 ; 576 577 page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG, 578 VIRTIO_BALLOON_FREE_PAGE_ORDER); 579 /* 580 * When the allocation returns NULL, it indicates that we have got all 581 * the possible free pages, so return -EINTR to stop. 582 */ 583 if (!page) 584 return -EINTR; 585 586 p = page_address(page); 587 sg_init_one(&sg, p, VIRTIO_BALLOON_FREE_PAGE_SIZE); 588 /* There is always 1 entry reserved for the cmd id to use. */ 589 if (vq->num_free > 1) { 590 err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL); 591 if (unlikely(err)) { 592 free_pages((unsigned long)p, 593 VIRTIO_BALLOON_FREE_PAGE_ORDER); 594 return err; 595 } 596 virtqueue_kick(vq); 597 spin_lock_irq(&vb->free_page_list_lock); 598 balloon_page_push(&vb->free_page_list, page); 599 vb->num_free_page_blocks++; 600 spin_unlock_irq(&vb->free_page_list_lock); 601 } else { 602 /* 603 * The vq has no available entry to add this page block, so 604 * just free it. 605 */ 606 free_pages((unsigned long)p, VIRTIO_BALLOON_FREE_PAGE_ORDER); 607 } 608 609 return 0; 610 } 611 612 static int send_free_pages(struct virtio_balloon *vb) 613 { 614 int err; 615 u32 cmd_id_active; 616 617 while (1) { 618 /* 619 * If a stop id or a new cmd id was just received from host, 620 * stop the reporting. 621 */ 622 cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active); 623 if (cmd_id_active != vb->cmd_id_received) 624 break; 625 626 /* 627 * The free page blocks are allocated and sent to host one by 628 * one. 629 */ 630 err = get_free_page_and_send(vb); 631 if (err == -EINTR) 632 break; 633 else if (unlikely(err)) 634 return err; 635 } 636 637 return 0; 638 } 639 640 static void report_free_page_func(struct work_struct *work) 641 { 642 int err; 643 struct virtio_balloon *vb = container_of(work, struct virtio_balloon, 644 report_free_page_work); 645 struct device *dev = &vb->vdev->dev; 646 647 /* Start by sending the received cmd id to host with an outbuf. */ 648 err = send_cmd_id_start(vb); 649 if (unlikely(err)) 650 dev_err(dev, "Failed to send a start id, err = %d\n", err); 651 652 err = send_free_pages(vb); 653 if (unlikely(err)) 654 dev_err(dev, "Failed to send a free page, err = %d\n", err); 655 656 /* End by sending a stop id to host with an outbuf. */ 657 err = send_cmd_id_stop(vb); 658 if (unlikely(err)) 659 dev_err(dev, "Failed to send a stop id, err = %d\n", err); 660 } 661 662 #ifdef CONFIG_BALLOON_COMPACTION 663 /* 664 * virtballoon_migratepage - perform the balloon page migration on behalf of 665 * a compation thread. (called under page lock) 666 * @vb_dev_info: the balloon device 667 * @newpage: page that will replace the isolated page after migration finishes. 668 * @page : the isolated (old) page that is about to be migrated to newpage. 669 * @mode : compaction mode -- not used for balloon page migration. 670 * 671 * After a ballooned page gets isolated by compaction procedures, this is the 672 * function that performs the page migration on behalf of a compaction thread 673 * The page migration for virtio balloon is done in a simple swap fashion which 674 * follows these two macro steps: 675 * 1) insert newpage into vb->pages list and update the host about it; 676 * 2) update the host about the old page removed from vb->pages list; 677 * 678 * This function preforms the balloon page migration task. 679 * Called through balloon_mapping->a_ops->migratepage 680 */ 681 static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, 682 struct page *newpage, struct page *page, enum migrate_mode mode) 683 { 684 struct virtio_balloon *vb = container_of(vb_dev_info, 685 struct virtio_balloon, vb_dev_info); 686 unsigned long flags; 687 688 /* 689 * In order to avoid lock contention while migrating pages concurrently 690 * to leak_balloon() or fill_balloon() we just give up the balloon_lock 691 * this turn, as it is easier to retry the page migration later. 692 * This also prevents fill_balloon() getting stuck into a mutex 693 * recursion in the case it ends up triggering memory compaction 694 * while it is attempting to inflate the ballon. 695 */ 696 if (!mutex_trylock(&vb->balloon_lock)) 697 return -EAGAIN; 698 699 get_page(newpage); /* balloon reference */ 700 701 /* balloon's page migration 1st step -- inflate "newpage" */ 702 spin_lock_irqsave(&vb_dev_info->pages_lock, flags); 703 balloon_page_insert(vb_dev_info, newpage); 704 vb_dev_info->isolated_pages--; 705 __count_vm_event(BALLOON_MIGRATE); 706 spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); 707 vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; 708 set_page_pfns(vb, vb->pfns, newpage); 709 tell_host(vb, vb->inflate_vq); 710 711 /* balloon's page migration 2nd step -- deflate "page" */ 712 spin_lock_irqsave(&vb_dev_info->pages_lock, flags); 713 balloon_page_delete(page); 714 spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags); 715 vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE; 716 set_page_pfns(vb, vb->pfns, page); 717 tell_host(vb, vb->deflate_vq); 718 719 mutex_unlock(&vb->balloon_lock); 720 721 put_page(page); /* balloon reference */ 722 723 return MIGRATEPAGE_SUCCESS; 724 } 725 726 static struct dentry *balloon_mount(struct file_system_type *fs_type, 727 int flags, const char *dev_name, void *data) 728 { 729 static const struct dentry_operations ops = { 730 .d_dname = simple_dname, 731 }; 732 733 return mount_pseudo(fs_type, "balloon-kvm:", NULL, &ops, 734 BALLOON_KVM_MAGIC); 735 } 736 737 static struct file_system_type balloon_fs = { 738 .name = "balloon-kvm", 739 .mount = balloon_mount, 740 .kill_sb = kill_anon_super, 741 }; 742 743 #endif /* CONFIG_BALLOON_COMPACTION */ 744 745 static unsigned long shrink_free_pages(struct virtio_balloon *vb, 746 unsigned long pages_to_free) 747 { 748 unsigned long blocks_to_free, blocks_freed; 749 750 pages_to_free = round_up(pages_to_free, 751 1 << VIRTIO_BALLOON_FREE_PAGE_ORDER); 752 blocks_to_free = pages_to_free >> VIRTIO_BALLOON_FREE_PAGE_ORDER; 753 blocks_freed = return_free_pages_to_mm(vb, blocks_to_free); 754 755 return blocks_freed << VIRTIO_BALLOON_FREE_PAGE_ORDER; 756 } 757 758 static unsigned long shrink_balloon_pages(struct virtio_balloon *vb, 759 unsigned long pages_to_free) 760 { 761 unsigned long pages_freed = 0; 762 763 /* 764 * One invocation of leak_balloon can deflate at most 765 * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it 766 * multiple times to deflate pages till reaching pages_to_free. 767 */ 768 while (vb->num_pages && pages_to_free) { 769 pages_freed += leak_balloon(vb, pages_to_free) / 770 VIRTIO_BALLOON_PAGES_PER_PAGE; 771 pages_to_free -= pages_freed; 772 } 773 update_balloon_size(vb); 774 775 return pages_freed; 776 } 777 778 static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, 779 struct shrink_control *sc) 780 { 781 unsigned long pages_to_free, pages_freed = 0; 782 struct virtio_balloon *vb = container_of(shrinker, 783 struct virtio_balloon, shrinker); 784 785 pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE; 786 787 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) 788 pages_freed = shrink_free_pages(vb, pages_to_free); 789 790 if (pages_freed >= pages_to_free) 791 return pages_freed; 792 793 pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed); 794 795 return pages_freed; 796 } 797 798 static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, 799 struct shrink_control *sc) 800 { 801 struct virtio_balloon *vb = container_of(shrinker, 802 struct virtio_balloon, shrinker); 803 unsigned long count; 804 805 count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE; 806 count += vb->num_free_page_blocks >> VIRTIO_BALLOON_FREE_PAGE_ORDER; 807 808 return count; 809 } 810 811 static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb) 812 { 813 unregister_shrinker(&vb->shrinker); 814 } 815 816 static int virtio_balloon_register_shrinker(struct virtio_balloon *vb) 817 { 818 vb->shrinker.scan_objects = virtio_balloon_shrinker_scan; 819 vb->shrinker.count_objects = virtio_balloon_shrinker_count; 820 vb->shrinker.seeks = DEFAULT_SEEKS; 821 822 return register_shrinker(&vb->shrinker); 823 } 824 825 static int virtballoon_probe(struct virtio_device *vdev) 826 { 827 struct virtio_balloon *vb; 828 __u32 poison_val; 829 int err; 830 831 if (!vdev->config->get) { 832 dev_err(&vdev->dev, "%s failure: config access disabled\n", 833 __func__); 834 return -EINVAL; 835 } 836 837 vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL); 838 if (!vb) { 839 err = -ENOMEM; 840 goto out; 841 } 842 843 INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func); 844 INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func); 845 spin_lock_init(&vb->stop_update_lock); 846 mutex_init(&vb->balloon_lock); 847 init_waitqueue_head(&vb->acked); 848 vb->vdev = vdev; 849 850 balloon_devinfo_init(&vb->vb_dev_info); 851 852 err = init_vqs(vb); 853 if (err) 854 goto out_free_vb; 855 856 #ifdef CONFIG_BALLOON_COMPACTION 857 balloon_mnt = kern_mount(&balloon_fs); 858 if (IS_ERR(balloon_mnt)) { 859 err = PTR_ERR(balloon_mnt); 860 goto out_del_vqs; 861 } 862 863 vb->vb_dev_info.migratepage = virtballoon_migratepage; 864 vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); 865 if (IS_ERR(vb->vb_dev_info.inode)) { 866 err = PTR_ERR(vb->vb_dev_info.inode); 867 kern_unmount(balloon_mnt); 868 goto out_del_vqs; 869 } 870 vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; 871 #endif 872 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { 873 /* 874 * There is always one entry reserved for cmd id, so the ring 875 * size needs to be at least two to report free page hints. 876 */ 877 if (virtqueue_get_vring_size(vb->free_page_vq) < 2) { 878 err = -ENOSPC; 879 goto out_del_vqs; 880 } 881 vb->balloon_wq = alloc_workqueue("balloon-wq", 882 WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); 883 if (!vb->balloon_wq) { 884 err = -ENOMEM; 885 goto out_del_vqs; 886 } 887 INIT_WORK(&vb->report_free_page_work, report_free_page_func); 888 vb->cmd_id_received = VIRTIO_BALLOON_CMD_ID_STOP; 889 vb->cmd_id_active = cpu_to_virtio32(vb->vdev, 890 VIRTIO_BALLOON_CMD_ID_STOP); 891 vb->cmd_id_stop = cpu_to_virtio32(vb->vdev, 892 VIRTIO_BALLOON_CMD_ID_STOP); 893 vb->num_free_page_blocks = 0; 894 spin_lock_init(&vb->free_page_list_lock); 895 INIT_LIST_HEAD(&vb->free_page_list); 896 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) { 897 memset(&poison_val, PAGE_POISON, sizeof(poison_val)); 898 virtio_cwrite(vb->vdev, struct virtio_balloon_config, 899 poison_val, &poison_val); 900 } 901 } 902 /* 903 * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a 904 * shrinker needs to be registered to relieve memory pressure. 905 */ 906 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) { 907 err = virtio_balloon_register_shrinker(vb); 908 if (err) 909 goto out_del_balloon_wq; 910 } 911 virtio_device_ready(vdev); 912 913 if (towards_target(vb)) 914 virtballoon_changed(vdev); 915 return 0; 916 917 out_del_balloon_wq: 918 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) 919 destroy_workqueue(vb->balloon_wq); 920 out_del_vqs: 921 vdev->config->del_vqs(vdev); 922 out_free_vb: 923 kfree(vb); 924 out: 925 return err; 926 } 927 928 static void remove_common(struct virtio_balloon *vb) 929 { 930 /* There might be pages left in the balloon: free them. */ 931 while (vb->num_pages) 932 leak_balloon(vb, vb->num_pages); 933 update_balloon_size(vb); 934 935 /* Now we reset the device so we can clean up the queues. */ 936 vb->vdev->config->reset(vb->vdev); 937 938 vb->vdev->config->del_vqs(vb->vdev); 939 } 940 941 static void virtballoon_remove(struct virtio_device *vdev) 942 { 943 struct virtio_balloon *vb = vdev->priv; 944 945 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) 946 virtio_balloon_unregister_shrinker(vb); 947 spin_lock_irq(&vb->stop_update_lock); 948 vb->stop_update = true; 949 spin_unlock_irq(&vb->stop_update_lock); 950 cancel_work_sync(&vb->update_balloon_size_work); 951 cancel_work_sync(&vb->update_balloon_stats_work); 952 953 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { 954 cancel_work_sync(&vb->report_free_page_work); 955 destroy_workqueue(vb->balloon_wq); 956 } 957 958 remove_common(vb); 959 #ifdef CONFIG_BALLOON_COMPACTION 960 if (vb->vb_dev_info.inode) 961 iput(vb->vb_dev_info.inode); 962 963 kern_unmount(balloon_mnt); 964 #endif 965 kfree(vb); 966 } 967 968 #ifdef CONFIG_PM_SLEEP 969 static int virtballoon_freeze(struct virtio_device *vdev) 970 { 971 struct virtio_balloon *vb = vdev->priv; 972 973 /* 974 * The workqueue is already frozen by the PM core before this 975 * function is called. 976 */ 977 remove_common(vb); 978 return 0; 979 } 980 981 static int virtballoon_restore(struct virtio_device *vdev) 982 { 983 struct virtio_balloon *vb = vdev->priv; 984 int ret; 985 986 ret = init_vqs(vdev->priv); 987 if (ret) 988 return ret; 989 990 virtio_device_ready(vdev); 991 992 if (towards_target(vb)) 993 virtballoon_changed(vdev); 994 update_balloon_size(vb); 995 return 0; 996 } 997 #endif 998 999 static int virtballoon_validate(struct virtio_device *vdev) 1000 { 1001 if (!page_poisoning_enabled()) 1002 __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON); 1003 1004 __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM); 1005 return 0; 1006 } 1007 1008 static unsigned int features[] = { 1009 VIRTIO_BALLOON_F_MUST_TELL_HOST, 1010 VIRTIO_BALLOON_F_STATS_VQ, 1011 VIRTIO_BALLOON_F_DEFLATE_ON_OOM, 1012 VIRTIO_BALLOON_F_FREE_PAGE_HINT, 1013 VIRTIO_BALLOON_F_PAGE_POISON, 1014 }; 1015 1016 static struct virtio_driver virtio_balloon_driver = { 1017 .feature_table = features, 1018 .feature_table_size = ARRAY_SIZE(features), 1019 .driver.name = KBUILD_MODNAME, 1020 .driver.owner = THIS_MODULE, 1021 .id_table = id_table, 1022 .validate = virtballoon_validate, 1023 .probe = virtballoon_probe, 1024 .remove = virtballoon_remove, 1025 .config_changed = virtballoon_changed, 1026 #ifdef CONFIG_PM_SLEEP 1027 .freeze = virtballoon_freeze, 1028 .restore = virtballoon_restore, 1029 #endif 1030 }; 1031 1032 module_virtio_driver(virtio_balloon_driver); 1033 MODULE_DEVICE_TABLE(virtio, id_table); 1034 MODULE_DESCRIPTION("Virtio balloon driver"); 1035 MODULE_LICENSE("GPL"); 1036