xref: /openbmc/qemu/hw/hyperv/hv-balloon.c (revision 256f0166580d991d8759a1e71d7aca17a23c55f2)
1  /*
2   * QEMU Hyper-V Dynamic Memory Protocol driver
3   *
4   * Copyright (C) 2020-2023 Oracle and/or its affiliates.
5   *
6   * This work is licensed under the terms of the GNU GPL, version 2 or later.
7   * See the COPYING file in the top-level directory.
8   */
9  
10  #include "qemu/osdep.h"
11  #include "hv-balloon-internal.h"
12  
13  #include "exec/address-spaces.h"
14  #include "exec/cpu-common.h"
15  #include "exec/ramblock.h"
16  #include "hw/boards.h"
17  #include "hw/hyperv/dynmem-proto.h"
18  #include "hw/hyperv/hv-balloon.h"
19  #include "hw/hyperv/vmbus.h"
20  #include "hw/mem/memory-device.h"
21  #include "hw/mem/pc-dimm.h"
22  #include "hw/qdev-core.h"
23  #include "hw/qdev-properties.h"
24  #include "monitor/qdev.h"
25  #include "qapi/error.h"
26  #include "qapi/qapi-commands-machine.h"
27  #include "qapi/qapi-events-machine.h"
28  #include "qapi/qapi-types-machine.h"
29  #include "qapi/qmp/qdict.h"
30  #include "qapi/visitor.h"
31  #include "qemu/error-report.h"
32  #include "qemu/module.h"
33  #include "qemu/units.h"
34  #include "qemu/timer.h"
35  #include "sysemu/balloon.h"
36  #include "sysemu/hostmem.h"
37  #include "sysemu/reset.h"
38  #include "hv-balloon-our_range_memslots.h"
39  #include "hv-balloon-page_range_tree.h"
40  #include "trace.h"
41  
42  #define HV_BALLOON_ADDR_PROP "addr"
43  #define HV_BALLOON_MEMDEV_PROP "memdev"
44  #define HV_BALLOON_GUID "525074DC-8985-46e2-8057-A307DC18A502"
45  
46  /*
47   * Some Windows versions (at least Server 2019) will crash with various
48   * error codes when receiving DM protocol requests (at least
49   * DM_MEM_HOT_ADD_REQUEST) immediately after boot.
50   *
51   * It looks like Hyper-V from Server 2016 uses a 50-second after-boot
52   * delay, probably to workaround this issue, so we'll use this value, too.
53   */
54  #define HV_BALLOON_POST_INIT_WAIT (50 * 1000)
55  
56  #define HV_BALLOON_HA_CHUNK_SIZE (2 * GiB)
57  #define HV_BALLOON_HA_CHUNK_PAGES (HV_BALLOON_HA_CHUNK_SIZE / HV_BALLOON_PAGE_SIZE)
58  
59  #define HV_BALLOON_HA_MEMSLOT_SIZE_ALIGN (128 * MiB)
60  
61  #define HV_BALLOON_HR_CHUNK_PAGES 585728
62  /*
63   *                                ^ that's the maximum number of pages
64   * that Windows returns in one hot remove response
65   *
66   * If the number requested is too high Windows will no longer honor
67   * these requests
68   */
69  
70  struct HvBalloonClass {
71      VMBusDeviceClass parent_class;
72  } HvBalloonClass;
73  
74  typedef enum State {
75      /* not a real state */
76      S_NO_CHANGE = 0,
77  
78      S_WAIT_RESET,
79      S_POST_RESET_CLOSED,
80  
81      /* init flow */
82      S_VERSION,
83      S_CAPS,
84      S_POST_INIT_WAIT,
85  
86      S_IDLE,
87  
88      /* balloon op flow */
89      S_BALLOON_POSTING,
90      S_BALLOON_RB_WAIT,
91      S_BALLOON_REPLY_WAIT,
92  
93      /* unballoon + hot add ops flow */
94      S_UNBALLOON_POSTING,
95      S_UNBALLOON_RB_WAIT,
96      S_UNBALLOON_REPLY_WAIT,
97      S_HOT_ADD_SETUP,
98      S_HOT_ADD_RB_WAIT,
99      S_HOT_ADD_POSTING,
100      S_HOT_ADD_REPLY_WAIT,
101  } State;
102  
103  typedef struct StateDesc {
104      State state;
105      const char *desc;
106  } StateDesc;
107  
108  typedef struct HvBalloon {
109      VMBusDevice parent;
110      State state;
111  
112      union dm_version version;
113      union dm_caps caps;
114  
115      QEMUTimer post_init_timer;
116  
117      unsigned int trans_id;
118  
119      struct {
120          bool enabled;
121          bool received;
122          uint64_t committed;
123          uint64_t available;
124      } status_report;
125  
126      /* Guest target size */
127      uint64_t target;
128      bool target_changed;
129  
130      /* Current (un)balloon / hot-add operation parameters */
131      union {
132          uint64_t balloon_diff;
133  
134          struct {
135              uint64_t unballoon_diff;
136              uint64_t hot_add_diff;
137          };
138  
139          struct {
140              PageRange hot_add_range;
141              uint64_t ha_current_count;
142          };
143      };
144  
145      OurRangeMemslots *our_range;
146  
147      /* Count of memslots covering our memory */
148      unsigned int memslot_count;
149  
150      /* Nominal size of each memslot (the last one might be smaller) */
151      uint64_t memslot_size;
152  
153      /* Non-ours removed memory */
154      PageRangeTree removed_guest, removed_both;
155  
156      /* Grand totals of removed memory (both ours and non-ours) */
157      uint64_t removed_guest_ctr, removed_both_ctr;
158  
159      /* MEMORY_DEVICE props */
160      uint64_t addr;
161      HostMemoryBackend *hostmem;
162      MemoryRegion *mr;
163  } HvBalloon;
164  
165  OBJECT_DEFINE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, HV_BALLOON, VMBUS_DEVICE, \
166                                     { TYPE_MEMORY_DEVICE }, { })
167  
168  #define HV_BALLOON_SET_STATE(hvb, news)             \
169      do {                                            \
170          assert(news != S_NO_CHANGE);                \
171          hv_balloon_state_set(hvb, news, # news);    \
172      } while (0)
173  
174  #define HV_BALLOON_STATE_DESC_SET(stdesc, news)         \
175      _hv_balloon_state_desc_set(stdesc, news, # news)
176  
177  #define HV_BALLOON_STATE_DESC_INIT \
178      {                              \
179          .state = S_NO_CHANGE,      \
180      }
181  
182  typedef struct HvBalloonReq {
183      VMBusChanReq vmreq;
184  } HvBalloonReq;
185  
186  /* total our memory includes parts currently removed from the guest */
hv_balloon_total_our_ram(HvBalloon * balloon)187  static uint64_t hv_balloon_total_our_ram(HvBalloon *balloon)
188  {
189      if (!balloon->our_range) {
190          return 0;
191      }
192  
193      return balloon->our_range->range.added;
194  }
195  
196  /* TODO: unify the code below with virtio-balloon and cache the value */
build_dimm_list(Object * obj,void * opaque)197  static int build_dimm_list(Object *obj, void *opaque)
198  {
199      GSList **list = opaque;
200  
201      if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
202          DeviceState *dev = DEVICE(obj);
203          if (dev->realized) { /* only realized DIMMs matter */
204              *list = g_slist_prepend(*list, dev);
205          }
206      }
207  
208      object_child_foreach(obj, build_dimm_list, opaque);
209      return 0;
210  }
211  
get_current_ram_size(void)212  static ram_addr_t get_current_ram_size(void)
213  {
214      GSList *list = NULL, *item;
215      ram_addr_t size = current_machine->ram_size;
216  
217      build_dimm_list(qdev_get_machine(), &list);
218      for (item = list; item; item = g_slist_next(item)) {
219          Object *obj = OBJECT(item->data);
220          if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM))
221              size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
222                                              &error_abort);
223      }
224      g_slist_free(list);
225  
226      return size;
227  }
228  
229  /* total RAM includes memory currently removed from the guest */
hv_balloon_total_ram(HvBalloon * balloon)230  static uint64_t hv_balloon_total_ram(HvBalloon *balloon)
231  {
232      ram_addr_t ram_size = get_current_ram_size();
233      uint64_t ram_size_pages = ram_size >> HV_BALLOON_PFN_SHIFT;
234      uint64_t our_ram_size_pages = hv_balloon_total_our_ram(balloon);
235  
236      assert(ram_size_pages > 0);
237  
238      return SUM_SATURATE_U64(ram_size_pages, our_ram_size_pages);
239  }
240  
241  /*
242   * calculating the total RAM size is a slow operation,
243   * avoid it as much as possible
244   */
hv_balloon_total_removed_rs(HvBalloon * balloon,uint64_t ram_size_pages)245  static uint64_t hv_balloon_total_removed_rs(HvBalloon *balloon,
246                                              uint64_t ram_size_pages)
247  {
248      uint64_t total_removed;
249  
250      total_removed = SUM_SATURATE_U64(balloon->removed_guest_ctr,
251                                       balloon->removed_both_ctr);
252  
253      /* possible if guest returns pages outside actual RAM */
254      if (total_removed > ram_size_pages) {
255          total_removed = ram_size_pages;
256      }
257  
258      return total_removed;
259  }
260  
261  /* Returns whether the state has actually changed */
hv_balloon_state_set(HvBalloon * balloon,State newst,const char * newststr)262  static bool hv_balloon_state_set(HvBalloon *balloon,
263                                   State newst, const char *newststr)
264  {
265      if (newst == S_NO_CHANGE || balloon->state == newst) {
266          return false;
267      }
268  
269      balloon->state = newst;
270      trace_hv_balloon_state_change(newststr);
271      return true;
272  }
273  
_hv_balloon_state_desc_set(StateDesc * stdesc,State newst,const char * newststr)274  static void _hv_balloon_state_desc_set(StateDesc *stdesc,
275                                         State newst, const char *newststr)
276  {
277      /* state setting is only permitted on a freshly init desc */
278      assert(stdesc->state == S_NO_CHANGE);
279  
280      assert(newst != S_NO_CHANGE);
281  
282      stdesc->state = newst;
283      stdesc->desc = newststr;
284  }
285  
hv_balloon_get_channel_maybe(HvBalloon * balloon)286  static VMBusChannel *hv_balloon_get_channel_maybe(HvBalloon *balloon)
287  {
288      return vmbus_device_channel(&balloon->parent, 0);
289  }
290  
hv_balloon_get_channel(HvBalloon * balloon)291  static VMBusChannel *hv_balloon_get_channel(HvBalloon *balloon)
292  {
293      VMBusChannel *chan;
294  
295      chan = hv_balloon_get_channel_maybe(balloon);
296      assert(chan != NULL);
297      return chan;
298  }
299  
hv_balloon_send_packet(VMBusChannel * chan,struct dm_message * msg)300  static ssize_t hv_balloon_send_packet(VMBusChannel *chan,
301                                        struct dm_message *msg)
302  {
303      int ret;
304  
305      ret = vmbus_channel_reserve(chan, 0, msg->hdr.size);
306      if (ret < 0) {
307          return ret;
308      }
309  
310      return vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
311                                NULL, 0, msg, msg->hdr.size, false,
312                                msg->hdr.trans_id);
313  }
314  
hv_balloon_unballoon_get_source(HvBalloon * balloon,PageRangeTree * dtree,uint64_t ** dctr,bool * is_our_range)315  static bool hv_balloon_unballoon_get_source(HvBalloon *balloon,
316                                              PageRangeTree *dtree,
317                                              uint64_t **dctr,
318                                              bool *is_our_range)
319  {
320      OurRange *our_range = OUR_RANGE(balloon->our_range);
321  
322      /* Try the boot memory first */
323      if (g_tree_nnodes(balloon->removed_guest.t) > 0) {
324          *dtree = balloon->removed_guest;
325          *dctr = &balloon->removed_guest_ctr;
326          *is_our_range = false;
327      } else if (g_tree_nnodes(balloon->removed_both.t) > 0) {
328          *dtree = balloon->removed_both;
329          *dctr = &balloon->removed_both_ctr;
330          *is_our_range = false;
331      } else if (!our_range) {
332          return false;
333      } else if (!our_range_is_removed_tree_empty(our_range, false)) {
334          *dtree = our_range_get_removed_tree(our_range, false);
335          *dctr = &balloon->removed_guest_ctr;
336          *is_our_range = true;
337      } else if (!our_range_is_removed_tree_empty(our_range, true)) {
338          *dtree = our_range_get_removed_tree(our_range, true);
339          *dctr = &balloon->removed_both_ctr;
340          *is_our_range = true;
341      } else {
342          return false;
343      }
344  
345      return true;
346  }
347  
hv_balloon_unballoon_rb_wait(HvBalloon * balloon,StateDesc * stdesc)348  static void hv_balloon_unballoon_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
349  {
350      VMBusChannel *chan = hv_balloon_get_channel(balloon);
351      struct dm_unballoon_request *ur;
352      size_t ur_size = sizeof(*ur) + sizeof(ur->range_array[0]);
353  
354      assert(balloon->state == S_UNBALLOON_RB_WAIT);
355  
356      if (vmbus_channel_reserve(chan, 0, ur_size) < 0) {
357          return;
358      }
359  
360      HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_POSTING);
361  }
362  
hv_balloon_unballoon_posting(HvBalloon * balloon,StateDesc * stdesc)363  static void hv_balloon_unballoon_posting(HvBalloon *balloon, StateDesc *stdesc)
364  {
365      VMBusChannel *chan = hv_balloon_get_channel(balloon);
366      PageRangeTree dtree;
367      uint64_t *dctr;
368      bool our_range;
369      g_autofree struct dm_unballoon_request *ur = NULL;
370      size_t ur_size = sizeof(*ur) + sizeof(ur->range_array[0]);
371      PageRange range;
372      bool bret;
373      ssize_t ret;
374  
375      assert(balloon->state == S_UNBALLOON_POSTING);
376      assert(balloon->unballoon_diff > 0);
377  
378      if (!hv_balloon_unballoon_get_source(balloon, &dtree, &dctr, &our_range)) {
379          error_report("trying to unballoon but nothing seems to be ballooned");
380          /*
381           * there is little we can do as we might have already
382           * sent the guest a partial request we can't cancel
383           */
384          return;
385      }
386  
387      assert(balloon->our_range || !our_range);
388      assert(dtree.t);
389      assert(dctr);
390  
391      ur = g_malloc0(ur_size);
392      ur->hdr.type = DM_UNBALLOON_REQUEST;
393      ur->hdr.size = ur_size;
394      ur->hdr.trans_id = balloon->trans_id;
395  
396      bret = hvb_page_range_tree_pop(dtree, &range, MIN(balloon->unballoon_diff,
397                                                        HV_BALLOON_HA_CHUNK_PAGES));
398      assert(bret);
399      /* TODO: madvise? */
400  
401      *dctr -= range.count;
402      balloon->unballoon_diff -= range.count;
403  
404      ur->range_count = 1;
405      ur->range_array[0].finfo.start_page = range.start;
406      ur->range_array[0].finfo.page_cnt = range.count;
407      ur->more_pages = balloon->unballoon_diff > 0;
408  
409      trace_hv_balloon_outgoing_unballoon(ur->hdr.trans_id,
410                                          range.count, range.start,
411                                          balloon->unballoon_diff);
412  
413      if (ur->more_pages) {
414          HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_RB_WAIT);
415      } else {
416          HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_REPLY_WAIT);
417      }
418  
419      ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
420                               NULL, 0, ur, ur_size, false,
421                               ur->hdr.trans_id);
422      if (ret <= 0) {
423          error_report("error %zd when posting unballoon msg, expect problems",
424                       ret);
425      }
426  }
427  
hv_balloon_our_range_ensure(HvBalloon * balloon)428  static bool hv_balloon_our_range_ensure(HvBalloon *balloon)
429  {
430      uint64_t align;
431      MemoryRegion *hostmem_mr;
432      g_autoptr(OurRangeMemslots) our_range_memslots = NULL;
433      OurRange *our_range;
434  
435      if (balloon->our_range) {
436          return true;
437      }
438  
439      if (!balloon->hostmem) {
440          return false;
441      }
442  
443      align = (1 << balloon->caps.cap_bits.hot_add_alignment) * MiB;
444      assert(QEMU_IS_ALIGNED(balloon->addr, align));
445  
446      hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
447  
448      our_range_memslots = hvb_our_range_memslots_new(balloon->addr,
449                                                      balloon->mr, hostmem_mr,
450                                                      OBJECT(balloon),
451                                                      balloon->memslot_count,
452                                                      balloon->memslot_size);
453      our_range = OUR_RANGE(our_range_memslots);
454  
455      if (hvb_page_range_tree_intree_any(balloon->removed_guest,
456                                         our_range->range.start,
457                                         our_range->range.count) ||
458          hvb_page_range_tree_intree_any(balloon->removed_both,
459                                         our_range->range.start,
460                                         our_range->range.count)) {
461          error_report("some parts of the memory backend were already returned by the guest. this should not happen, please reboot the guest and try again");
462          return false;
463      }
464  
465      trace_hv_balloon_our_range_add(our_range->range.count,
466                                     our_range->range.start);
467  
468      balloon->our_range = g_steal_pointer(&our_range_memslots);
469      return true;
470  }
471  
hv_balloon_hot_add_setup(HvBalloon * balloon,StateDesc * stdesc)472  static void hv_balloon_hot_add_setup(HvBalloon *balloon, StateDesc *stdesc)
473  {
474      /* need to make copy since it is in union with hot_add_range */
475      uint64_t hot_add_diff = balloon->hot_add_diff;
476      PageRange *hot_add_range = &balloon->hot_add_range;
477      uint64_t align, our_range_remaining;
478      OurRange *our_range;
479  
480      assert(balloon->state == S_HOT_ADD_SETUP);
481      assert(hot_add_diff > 0);
482  
483      if (!hv_balloon_our_range_ensure(balloon)) {
484          goto ret_idle;
485      }
486  
487      our_range = OUR_RANGE(balloon->our_range);
488  
489      align = (1 << balloon->caps.cap_bits.hot_add_alignment) *
490          (MiB / HV_BALLOON_PAGE_SIZE);
491  
492      /* Absolute GPA in pages */
493      hot_add_range->start = our_range_get_remaining_start(our_range);
494      assert(QEMU_IS_ALIGNED(hot_add_range->start, align));
495  
496      our_range_remaining = our_range_get_remaining_size(our_range);
497      hot_add_range->count = MIN(our_range_remaining, hot_add_diff);
498      hot_add_range->count = QEMU_ALIGN_DOWN(hot_add_range->count, align);
499      if (hot_add_range->count == 0) {
500          goto ret_idle;
501      }
502  
503      hvb_our_range_memslots_ensure_mapped_additional(balloon->our_range,
504                                                      hot_add_range->count);
505  
506      HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_RB_WAIT);
507      return;
508  
509  ret_idle:
510      HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
511  }
512  
hv_balloon_hot_add_rb_wait(HvBalloon * balloon,StateDesc * stdesc)513  static void hv_balloon_hot_add_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
514  {
515      VMBusChannel *chan = hv_balloon_get_channel(balloon);
516      struct dm_hot_add_with_region *ha;
517      size_t ha_size = sizeof(*ha);
518  
519      assert(balloon->state == S_HOT_ADD_RB_WAIT);
520  
521      if (vmbus_channel_reserve(chan, 0, ha_size) < 0) {
522          return;
523      }
524  
525      HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_POSTING);
526  }
527  
hv_balloon_hot_add_posting(HvBalloon * balloon,StateDesc * stdesc)528  static void hv_balloon_hot_add_posting(HvBalloon *balloon, StateDesc *stdesc)
529  {
530      PageRange *hot_add_range = &balloon->hot_add_range;
531      uint64_t *current_count = &balloon->ha_current_count;
532      VMBusChannel *chan = hv_balloon_get_channel(balloon);
533      g_autofree struct dm_hot_add_with_region *ha = NULL;
534      size_t ha_size = sizeof(*ha);
535      union dm_mem_page_range *ha_region;
536      uint64_t align, chunk_max_size;
537      ssize_t ret;
538  
539      assert(balloon->state == S_HOT_ADD_POSTING);
540      assert(hot_add_range->count > 0);
541  
542      align = (1 << balloon->caps.cap_bits.hot_add_alignment) *
543          (MiB / HV_BALLOON_PAGE_SIZE);
544      if (align >= HV_BALLOON_HA_CHUNK_PAGES) {
545          /*
546           * If the required alignment is higher than the chunk size we let it
547           * override that size.
548           */
549          chunk_max_size = align;
550      } else {
551          chunk_max_size = QEMU_ALIGN_DOWN(HV_BALLOON_HA_CHUNK_PAGES, align);
552      }
553  
554      /*
555       * hot_add_range->count starts aligned in hv_balloon_hot_add_setup(),
556       * then it is either reduced by subtracting aligned current_count or
557       * further hot-adds are prevented by marking the whole remaining our range
558       * as unusable in hv_balloon_handle_hot_add_response().
559       */
560      *current_count = MIN(hot_add_range->count, chunk_max_size);
561  
562      ha = g_malloc0(ha_size);
563      ha_region = &ha->region;
564      ha->hdr.type = DM_MEM_HOT_ADD_REQUEST;
565      ha->hdr.size = ha_size;
566      ha->hdr.trans_id = balloon->trans_id;
567  
568      ha->range.finfo.start_page = hot_add_range->start;
569      ha->range.finfo.page_cnt = *current_count;
570      ha_region->finfo.start_page = hot_add_range->start;
571      ha_region->finfo.page_cnt = ha->range.finfo.page_cnt;
572  
573      trace_hv_balloon_outgoing_hot_add(ha->hdr.trans_id,
574                                        *current_count, hot_add_range->start);
575  
576      ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
577                               NULL, 0, ha, ha_size, false,
578                               ha->hdr.trans_id);
579      if (ret <= 0) {
580          error_report("error %zd when posting hot add msg, expect problems",
581                       ret);
582      }
583  
584      HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_REPLY_WAIT);
585  }
586  
hv_balloon_balloon_rb_wait(HvBalloon * balloon,StateDesc * stdesc)587  static void hv_balloon_balloon_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
588  {
589      VMBusChannel *chan = hv_balloon_get_channel(balloon);
590      size_t bl_size = sizeof(struct dm_balloon);
591  
592      assert(balloon->state == S_BALLOON_RB_WAIT);
593  
594      if (vmbus_channel_reserve(chan, 0, bl_size) < 0) {
595          return;
596      }
597  
598      HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_POSTING);
599  }
600  
hv_balloon_balloon_posting(HvBalloon * balloon,StateDesc * stdesc)601  static void hv_balloon_balloon_posting(HvBalloon *balloon, StateDesc *stdesc)
602  {
603      VMBusChannel *chan = hv_balloon_get_channel(balloon);
604      struct dm_balloon bl;
605      size_t bl_size = sizeof(bl);
606      ssize_t ret;
607  
608      assert(balloon->state == S_BALLOON_POSTING);
609      assert(balloon->balloon_diff > 0);
610  
611      memset(&bl, 0, sizeof(bl));
612      bl.hdr.type = DM_BALLOON_REQUEST;
613      bl.hdr.size = bl_size;
614      bl.hdr.trans_id = balloon->trans_id;
615      bl.num_pages = MIN(balloon->balloon_diff, HV_BALLOON_HR_CHUNK_PAGES);
616  
617      trace_hv_balloon_outgoing_balloon(bl.hdr.trans_id, bl.num_pages,
618                                        balloon->balloon_diff);
619  
620      ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
621                               NULL, 0, &bl, bl_size, false,
622                               bl.hdr.trans_id);
623      if (ret <= 0) {
624          error_report("error %zd when posting balloon msg, expect problems",
625                       ret);
626      }
627  
628      HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_REPLY_WAIT);
629  }
630  
hv_balloon_idle_state_process_target(HvBalloon * balloon,StateDesc * stdesc)631  static void hv_balloon_idle_state_process_target(HvBalloon *balloon,
632                                                   StateDesc *stdesc)
633  {
634      bool can_balloon = balloon->caps.cap_bits.balloon;
635      uint64_t ram_size_pages, total_removed;
636  
637      ram_size_pages = hv_balloon_total_ram(balloon);
638      total_removed = hv_balloon_total_removed_rs(balloon, ram_size_pages);
639  
640      /*
641       * we need to cache the values computed from the balloon target value when
642       * starting the adjustment procedure in case someone changes the target when
643       * the procedure is in progress
644       */
645      if (balloon->target > ram_size_pages - total_removed) {
646          bool can_hot_add = balloon->caps.cap_bits.hot_add;
647          uint64_t target_diff = balloon->target -
648              (ram_size_pages - total_removed);
649  
650          balloon->unballoon_diff = MIN(target_diff, total_removed);
651  
652          if (can_hot_add) {
653              balloon->hot_add_diff = target_diff - balloon->unballoon_diff;
654          } else {
655              balloon->hot_add_diff = 0;
656          }
657  
658          if (balloon->unballoon_diff > 0) {
659              assert(can_balloon);
660              HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_RB_WAIT);
661          } else if (balloon->hot_add_diff > 0) {
662              HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_SETUP);
663          }
664      } else if (can_balloon &&
665                 balloon->target < ram_size_pages - total_removed) {
666          balloon->balloon_diff = ram_size_pages - total_removed -
667              balloon->target;
668          HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_RB_WAIT);
669      }
670  }
671  
hv_balloon_idle_state(HvBalloon * balloon,StateDesc * stdesc)672  static void hv_balloon_idle_state(HvBalloon *balloon,
673                                    StateDesc *stdesc)
674  {
675      assert(balloon->state == S_IDLE);
676  
677      if (balloon->target_changed) {
678          balloon->target_changed = false;
679          hv_balloon_idle_state_process_target(balloon, stdesc);
680          return;
681      }
682  }
683  
684  static const struct {
685      void (*handler)(HvBalloon *balloon, StateDesc *stdesc);
686  } state_handlers[] = {
687      [S_IDLE].handler = hv_balloon_idle_state,
688      [S_BALLOON_POSTING].handler = hv_balloon_balloon_posting,
689      [S_BALLOON_RB_WAIT].handler = hv_balloon_balloon_rb_wait,
690      [S_UNBALLOON_POSTING].handler = hv_balloon_unballoon_posting,
691      [S_UNBALLOON_RB_WAIT].handler = hv_balloon_unballoon_rb_wait,
692      [S_HOT_ADD_SETUP].handler = hv_balloon_hot_add_setup,
693      [S_HOT_ADD_RB_WAIT].handler = hv_balloon_hot_add_rb_wait,
694      [S_HOT_ADD_POSTING].handler = hv_balloon_hot_add_posting,
695  };
696  
hv_balloon_handle_state(HvBalloon * balloon,StateDesc * stdesc)697  static void hv_balloon_handle_state(HvBalloon *balloon, StateDesc *stdesc)
698  {
699      if (balloon->state >= ARRAY_SIZE(state_handlers) ||
700          !state_handlers[balloon->state].handler) {
701          return;
702      }
703  
704      state_handlers[balloon->state].handler(balloon, stdesc);
705  }
706  
hv_balloon_remove_response_insert_range(PageRangeTree tree,const PageRange * range,uint64_t * ctr1,uint64_t * ctr2,uint64_t * ctr3)707  static void hv_balloon_remove_response_insert_range(PageRangeTree tree,
708                                                      const PageRange *range,
709                                                      uint64_t *ctr1,
710                                                      uint64_t *ctr2,
711                                                      uint64_t *ctr3)
712  {
713      uint64_t dupcount, effcount;
714  
715      if (range->count == 0) {
716          return;
717      }
718  
719      dupcount = 0;
720      hvb_page_range_tree_insert(tree, range->start, range->count, &dupcount);
721  
722      assert(dupcount <= range->count);
723      effcount = range->count - dupcount;
724  
725      *ctr1 += effcount;
726      *ctr2 += effcount;
727      if (ctr3) {
728          *ctr3 += effcount;
729      }
730  }
731  
hv_balloon_remove_response_handle_range(HvBalloon * balloon,PageRange * range,bool both,uint64_t * removedctr)732  static void hv_balloon_remove_response_handle_range(HvBalloon *balloon,
733                                                      PageRange *range,
734                                                      bool both,
735                                                      uint64_t *removedctr)
736  {
737      OurRange *our_range = OUR_RANGE(balloon->our_range);
738      PageRangeTree globaltree =
739          both ? balloon->removed_both : balloon->removed_guest;
740      uint64_t *globalctr =
741          both ? &balloon->removed_both_ctr : &balloon->removed_guest_ctr;
742      PageRange rangeeff;
743  
744      if (range->count == 0) {
745          return;
746      }
747  
748      trace_hv_balloon_remove_response(range->count, range->start, both);
749  
750      if (our_range) {
751          /* Includes the not-yet-hot-added and unusable parts. */
752          rangeeff = our_range->range;
753      } else {
754          rangeeff.start = rangeeff.count = 0;
755      }
756  
757      if (page_range_intersection_size(range, rangeeff.start, rangeeff.count) > 0) {
758          PageRangeTree ourtree = our_range_get_removed_tree(our_range, both);
759          PageRange rangehole, rangecommon;
760          uint64_t ourremoved = 0;
761  
762          /* process the hole before our range, if it exists */
763          page_range_part_before(range, rangeeff.start, &rangehole);
764          hv_balloon_remove_response_insert_range(globaltree, &rangehole,
765                                                  globalctr, removedctr, NULL);
766          if (rangehole.count > 0) {
767              trace_hv_balloon_remove_response_hole(rangehole.count,
768                                                    rangehole.start,
769                                                    range->count, range->start,
770                                                    rangeeff.start, both);
771          }
772  
773          /* process our part */
774          page_range_intersect(range, rangeeff.start, rangeeff.count,
775                               &rangecommon);
776          hv_balloon_remove_response_insert_range(ourtree, &rangecommon,
777                                                  globalctr, removedctr,
778                                                  &ourremoved);
779          if (rangecommon.count > 0) {
780              trace_hv_balloon_remove_response_common(rangecommon.count,
781                                                      rangecommon.start,
782                                                      range->count, range->start,
783                                                      rangeeff.count,
784                                                      rangeeff.start, ourremoved,
785                                                      both);
786          }
787  
788          /* calculate what's left after our range */
789          rangecommon = *range;
790          page_range_part_after(&rangecommon, rangeeff.start, rangeeff.count,
791                                range);
792      }
793  
794      /* process the remainder of the range that lies after our range */
795      if (range->count > 0) {
796          hv_balloon_remove_response_insert_range(globaltree, range,
797                                                  globalctr, removedctr, NULL);
798          trace_hv_balloon_remove_response_remainder(range->count, range->start,
799                                                     both);
800          range->count = 0;
801      }
802  }
803  
hv_balloon_remove_response_handle_pages(HvBalloon * balloon,PageRange * range,uint64_t start,uint64_t count,bool both,uint64_t * removedctr)804  static void hv_balloon_remove_response_handle_pages(HvBalloon *balloon,
805                                                      PageRange *range,
806                                                      uint64_t start,
807                                                      uint64_t count,
808                                                      bool both,
809                                                      uint64_t *removedctr)
810  {
811      assert(count > 0);
812  
813      /*
814       * if there is an existing range that the new range can't be joined to
815       * dump it into tree(s)
816       */
817      if (range->count > 0 && !page_range_joinable(range, start, count)) {
818          hv_balloon_remove_response_handle_range(balloon, range, both,
819                                                  removedctr);
820      }
821  
822      if (range->count == 0) {
823          range->start = start;
824          range->count = count;
825      } else if (page_range_joinable_left(range, start, count)) {
826          range->start = start;
827          range->count += count;
828      } else { /* page_range_joinable_right() */
829          range->count += count;
830      }
831  }
832  
hv_balloon_handle_remove_host_addr_node(gpointer key,gpointer value,gpointer data)833  static gboolean hv_balloon_handle_remove_host_addr_node(gpointer key,
834                                                          gpointer value,
835                                                          gpointer data)
836  {
837      PageRange *range = value;
838      uint64_t pageoff;
839  
840      for (pageoff = 0; pageoff < range->count; ) {
841          uint64_t addr_64 = (range->start + pageoff) * HV_BALLOON_PAGE_SIZE;
842          void *addr;
843          RAMBlock *rb;
844          ram_addr_t rb_offset;
845          size_t rb_page_size;
846          size_t discard_size;
847  
848          assert(addr_64 <= UINTPTR_MAX);
849          addr = (void *)((uintptr_t)addr_64);
850          rb = qemu_ram_block_from_host(addr, false, &rb_offset);
851          rb_page_size = qemu_ram_pagesize(rb);
852  
853          if (rb_page_size != HV_BALLOON_PAGE_SIZE) {
854              /* TODO: these should end in "removed_guest" */
855              warn_report("guest reported removed page backed by unsupported page size %zu",
856                          rb_page_size);
857              pageoff++;
858              continue;
859          }
860  
861          discard_size = MIN(range->count - pageoff,
862                             (rb->max_length - rb_offset) /
863                             HV_BALLOON_PAGE_SIZE);
864          discard_size = MAX(discard_size, 1);
865  
866          if (ram_block_discard_range(rb, rb_offset, discard_size *
867                                      HV_BALLOON_PAGE_SIZE) != 0) {
868              warn_report("guest reported removed page failed discard");
869          }
870  
871          pageoff += discard_size;
872      }
873  
874      return false;
875  }
876  
hv_balloon_handle_remove_host_addr_tree(PageRangeTree tree)877  static void hv_balloon_handle_remove_host_addr_tree(PageRangeTree tree)
878  {
879      g_tree_foreach(tree.t, hv_balloon_handle_remove_host_addr_node, NULL);
880  }
881  
hv_balloon_handle_remove_section(PageRangeTree tree,const MemoryRegionSection * section,uint64_t count)882  static int hv_balloon_handle_remove_section(PageRangeTree tree,
883                                              const MemoryRegionSection *section,
884                                              uint64_t count)
885  {
886      void *addr = memory_region_get_ram_ptr(section->mr) +
887          section->offset_within_region;
888      uint64_t addr_page;
889  
890      assert(count > 0);
891  
892      if ((uintptr_t)addr % HV_BALLOON_PAGE_SIZE) {
893          warn_report("guest reported removed pages at an unaligned host addr %p",
894                      addr);
895          return -EINVAL;
896      }
897  
898      addr_page = (uintptr_t)addr / HV_BALLOON_PAGE_SIZE;
899      hvb_page_range_tree_insert(tree, addr_page, count, NULL);
900  
901      return 0;
902  }
903  
hv_balloon_handle_remove_ranges(HvBalloon * balloon,union dm_mem_page_range ranges[],uint32_t count)904  static void hv_balloon_handle_remove_ranges(HvBalloon *balloon,
905                                              union dm_mem_page_range ranges[],
906                                              uint32_t count)
907  {
908      uint64_t removedcnt;
909      PageRangeTree removed_host_addr;
910      PageRange range_guest, range_both;
911  
912      hvb_page_range_tree_init(&removed_host_addr);
913      range_guest.count = range_both.count = removedcnt = 0;
914      for (unsigned int ctr = 0; ctr < count; ctr++) {
915          union dm_mem_page_range *mr = &ranges[ctr];
916          hwaddr pa;
917          MemoryRegionSection section;
918  
919          for (unsigned int offset = 0; offset < mr->finfo.page_cnt; ) {
920              int ret;
921              uint64_t pageno = mr->finfo.start_page + offset;
922              uint64_t pagecnt = 1;
923  
924              pa = (hwaddr)pageno << HV_BALLOON_PFN_SHIFT;
925              section = memory_region_find(get_system_memory(), pa,
926                                           (mr->finfo.page_cnt - offset) *
927                                           HV_BALLOON_PAGE_SIZE);
928              if (!section.mr) {
929                  warn_report("guest reported removed page %"PRIu64" not found in RAM",
930                              pageno);
931                  ret = -EINVAL;
932                  goto finish_page;
933              }
934  
935              pagecnt = int128_get64(section.size) / HV_BALLOON_PAGE_SIZE;
936              if (pagecnt <= 0) {
937                  warn_report("guest reported removed page %"PRIu64" in a section smaller than page size",
938                              pageno);
939                  pagecnt = 1; /* skip the whole page */
940                  ret = -EINVAL;
941                  goto finish_page;
942              }
943  
944              if (!memory_region_is_ram(section.mr) ||
945                  memory_region_is_rom(section.mr) ||
946                  memory_region_is_romd(section.mr)) {
947                  warn_report("guest reported removed page %"PRIu64" in a section that is not an ordinary RAM",
948                              pageno);
949                  ret = -EINVAL;
950                  goto finish_page;
951              }
952  
953              ret = hv_balloon_handle_remove_section(removed_host_addr, &section,
954                                                     pagecnt);
955  
956          finish_page:
957              if (ret == 0) {
958                  hv_balloon_remove_response_handle_pages(balloon,
959                                                          &range_both,
960                                                          pageno, pagecnt,
961                                                          true, &removedcnt);
962              } else {
963                  hv_balloon_remove_response_handle_pages(balloon,
964                                                          &range_guest,
965                                                          pageno, pagecnt,
966                                                          false, &removedcnt);
967              }
968  
969              if (section.mr) {
970                  memory_region_unref(section.mr);
971              }
972  
973              offset += pagecnt;
974          }
975      }
976  
977      hv_balloon_remove_response_handle_range(balloon, &range_both, true,
978                                              &removedcnt);
979      hv_balloon_remove_response_handle_range(balloon, &range_guest, false,
980                                              &removedcnt);
981  
982      hv_balloon_handle_remove_host_addr_tree(removed_host_addr);
983      hvb_page_range_tree_destroy(&removed_host_addr);
984  
985      if (removedcnt > balloon->balloon_diff) {
986          warn_report("guest reported more pages removed than currently pending (%"PRIu64" vs %"PRIu64")",
987                      removedcnt, balloon->balloon_diff);
988          balloon->balloon_diff = 0;
989      } else {
990          balloon->balloon_diff -= removedcnt;
991      }
992  }
993  
hv_balloon_handle_msg_size(HvBalloonReq * req,size_t minsize,const char * msgname)994  static bool hv_balloon_handle_msg_size(HvBalloonReq *req, size_t minsize,
995                                         const char *msgname)
996  {
997      VMBusChanReq *vmreq = &req->vmreq;
998      uint32_t msglen = vmreq->msglen;
999  
1000      if (msglen >= minsize) {
1001          return true;
1002      }
1003  
1004      warn_report("%s message too short (%u vs %zu), ignoring", msgname,
1005                  (unsigned int)msglen, minsize);
1006      return false;
1007  }
1008  
hv_balloon_handle_version_request(HvBalloon * balloon,HvBalloonReq * req,StateDesc * stdesc)1009  static void hv_balloon_handle_version_request(HvBalloon *balloon,
1010                                                HvBalloonReq *req,
1011                                                StateDesc *stdesc)
1012  {
1013      VMBusChanReq *vmreq = &req->vmreq;
1014      struct dm_version_request *msgVr = vmreq->msg;
1015      struct dm_version_response respVr;
1016  
1017      if (balloon->state != S_VERSION) {
1018          warn_report("unexpected DM_VERSION_REQUEST in %d state",
1019                      balloon->state);
1020          return;
1021      }
1022  
1023      if (!hv_balloon_handle_msg_size(req, sizeof(*msgVr),
1024                                      "DM_VERSION_REQUEST")) {
1025          return;
1026      }
1027  
1028      trace_hv_balloon_incoming_version(msgVr->version.major_version,
1029                                        msgVr->version.minor_version);
1030  
1031      memset(&respVr, 0, sizeof(respVr));
1032      respVr.hdr.type = DM_VERSION_RESPONSE;
1033      respVr.hdr.size = sizeof(respVr);
1034      respVr.hdr.trans_id = msgVr->hdr.trans_id;
1035      respVr.is_accepted = msgVr->version.version >= DYNMEM_PROTOCOL_VERSION_1 &&
1036          msgVr->version.version <= DYNMEM_PROTOCOL_VERSION_3;
1037  
1038      hv_balloon_send_packet(vmreq->chan, (struct dm_message *)&respVr);
1039  
1040      if (respVr.is_accepted) {
1041          HV_BALLOON_STATE_DESC_SET(stdesc, S_CAPS);
1042      }
1043  }
1044  
hv_balloon_handle_caps_report(HvBalloon * balloon,HvBalloonReq * req,StateDesc * stdesc)1045  static void hv_balloon_handle_caps_report(HvBalloon *balloon,
1046                                            HvBalloonReq *req,
1047                                            StateDesc *stdesc)
1048  {
1049      VMBusChanReq *vmreq = &req->vmreq;
1050      struct dm_capabilities *msgCap = vmreq->msg;
1051      struct dm_capabilities_resp_msg respCap;
1052  
1053      if (balloon->state != S_CAPS) {
1054          warn_report("unexpected DM_CAPABILITIES_REPORT in %d state",
1055                      balloon->state);
1056          return;
1057      }
1058  
1059      if (!hv_balloon_handle_msg_size(req, sizeof(*msgCap),
1060                                      "DM_CAPABILITIES_REPORT")) {
1061          return;
1062      }
1063  
1064      trace_hv_balloon_incoming_caps(msgCap->caps.caps);
1065      balloon->caps = msgCap->caps;
1066  
1067      memset(&respCap, 0, sizeof(respCap));
1068      respCap.hdr.type = DM_CAPABILITIES_RESPONSE;
1069      respCap.hdr.size = sizeof(respCap);
1070      respCap.hdr.trans_id = msgCap->hdr.trans_id;
1071      respCap.is_accepted = 1;
1072      respCap.hot_remove = 1;
1073      respCap.suppress_pressure_reports = !balloon->status_report.enabled;
1074      hv_balloon_send_packet(vmreq->chan, (struct dm_message *)&respCap);
1075  
1076      timer_mod(&balloon->post_init_timer,
1077                qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
1078                HV_BALLOON_POST_INIT_WAIT);
1079  
1080      HV_BALLOON_STATE_DESC_SET(stdesc, S_POST_INIT_WAIT);
1081  }
1082  
hv_balloon_handle_status_report(HvBalloon * balloon,HvBalloonReq * req)1083  static void hv_balloon_handle_status_report(HvBalloon *balloon,
1084                                              HvBalloonReq *req)
1085  {
1086      VMBusChanReq *vmreq = &req->vmreq;
1087      struct dm_status *msgStatus = vmreq->msg;
1088  
1089      if (!hv_balloon_handle_msg_size(req, sizeof(*msgStatus),
1090                                      "DM_STATUS_REPORT")) {
1091          return;
1092      }
1093  
1094      if (!balloon->status_report.enabled) {
1095          return;
1096      }
1097  
1098      balloon->status_report.committed = msgStatus->num_committed;
1099      balloon->status_report.committed *= HV_BALLOON_PAGE_SIZE;
1100      balloon->status_report.available = msgStatus->num_avail;
1101      balloon->status_report.available *= HV_BALLOON_PAGE_SIZE;
1102      balloon->status_report.received = true;
1103  
1104      qapi_event_send_hv_balloon_status_report(balloon->status_report.committed,
1105                                               balloon->status_report.available);
1106  }
1107  
qmp_query_hv_balloon_status_report(Error ** errp)1108  HvBalloonInfo *qmp_query_hv_balloon_status_report(Error **errp)
1109  {
1110      HvBalloon *balloon;
1111      HvBalloonInfo *info;
1112  
1113      balloon = HV_BALLOON(object_resolve_path_type("", TYPE_HV_BALLOON, NULL));
1114      if (!balloon) {
1115          error_setg(errp, "no %s device present", TYPE_HV_BALLOON);
1116          return NULL;
1117      }
1118  
1119      if (!balloon->status_report.enabled) {
1120          error_setg(errp, "guest memory status reporting not enabled");
1121          return NULL;
1122      }
1123  
1124      if (!balloon->status_report.received) {
1125          error_setg(errp, "no guest memory status report received yet");
1126          return NULL;
1127      }
1128  
1129      info = g_malloc0(sizeof(*info));
1130      info->committed = balloon->status_report.committed;
1131      info->available = balloon->status_report.available;
1132      return info;
1133  }
1134  
hv_balloon_handle_unballoon_response(HvBalloon * balloon,HvBalloonReq * req,StateDesc * stdesc)1135  static void hv_balloon_handle_unballoon_response(HvBalloon *balloon,
1136                                                   HvBalloonReq *req,
1137                                                   StateDesc *stdesc)
1138  {
1139      VMBusChanReq *vmreq = &req->vmreq;
1140      struct dm_unballoon_response *msgUrR = vmreq->msg;
1141  
1142      if (balloon->state != S_UNBALLOON_REPLY_WAIT) {
1143          warn_report("unexpected DM_UNBALLOON_RESPONSE in %d state",
1144                      balloon->state);
1145          return;
1146      }
1147  
1148      if (!hv_balloon_handle_msg_size(req, sizeof(*msgUrR),
1149                                      "DM_UNBALLOON_RESPONSE"))
1150          return;
1151  
1152      trace_hv_balloon_incoming_unballoon(msgUrR->hdr.trans_id);
1153  
1154      balloon->trans_id++;
1155  
1156      if (balloon->hot_add_diff > 0) {
1157          bool can_hot_add = balloon->caps.cap_bits.hot_add;
1158  
1159          assert(can_hot_add);
1160          HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_SETUP);
1161      } else {
1162          HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
1163      }
1164  }
1165  
hv_balloon_handle_hot_add_response(HvBalloon * balloon,HvBalloonReq * req,StateDesc * stdesc)1166  static void hv_balloon_handle_hot_add_response(HvBalloon *balloon,
1167                                                 HvBalloonReq *req,
1168                                                 StateDesc *stdesc)
1169  {
1170      PageRange *hot_add_range = &balloon->hot_add_range;
1171      VMBusChanReq *vmreq = &req->vmreq;
1172      struct dm_hot_add_response *msgHaR = vmreq->msg;
1173      OurRange *our_range;
1174  
1175      if (balloon->state != S_HOT_ADD_REPLY_WAIT) {
1176          warn_report("unexpected DM_HOT_ADD_RESPONSE in %d state",
1177                      balloon->state);
1178          return;
1179      }
1180  
1181      assert(balloon->our_range);
1182      our_range = OUR_RANGE(balloon->our_range);
1183  
1184      if (!hv_balloon_handle_msg_size(req, sizeof(*msgHaR),
1185                                      "DM_HOT_ADD_RESPONSE"))
1186          return;
1187  
1188      trace_hv_balloon_incoming_hot_add(msgHaR->hdr.trans_id, msgHaR->result,
1189                                        msgHaR->page_count);
1190  
1191      balloon->trans_id++;
1192  
1193      if (msgHaR->result) {
1194          if (msgHaR->page_count > balloon->ha_current_count) {
1195              warn_report("DM_HOT_ADD_RESPONSE page count higher than requested (%"PRIu32" vs %"PRIu64")",
1196                          msgHaR->page_count, balloon->ha_current_count);
1197              msgHaR->page_count = balloon->ha_current_count;
1198          }
1199  
1200          hvb_our_range_mark_added(our_range, msgHaR->page_count);
1201          hot_add_range->start += msgHaR->page_count;
1202          hot_add_range->count -= msgHaR->page_count;
1203      }
1204  
1205      if (!msgHaR->result || msgHaR->page_count < balloon->ha_current_count) {
1206          /*
1207           * the current planned range was only partially hot-added, take note
1208           * how much of it remains and don't attempt any further hot adds
1209           */
1210          our_range_mark_remaining_unusable(our_range);
1211  
1212          goto ret_idle;
1213      }
1214  
1215      /* any pages remaining to hot-add in our range? */
1216      if (hot_add_range->count > 0) {
1217          HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_RB_WAIT);
1218          return;
1219      }
1220  
1221  ret_idle:
1222      HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
1223  }
1224  
hv_balloon_handle_balloon_response(HvBalloon * balloon,HvBalloonReq * req,StateDesc * stdesc)1225  static void hv_balloon_handle_balloon_response(HvBalloon *balloon,
1226                                                 HvBalloonReq *req,
1227                                                 StateDesc *stdesc)
1228  {
1229      VMBusChanReq *vmreq = &req->vmreq;
1230      struct dm_balloon_response *msgBR = vmreq->msg;
1231  
1232      if (balloon->state != S_BALLOON_REPLY_WAIT) {
1233          warn_report("unexpected DM_BALLOON_RESPONSE in %d state",
1234                      balloon->state);
1235          return;
1236      }
1237  
1238      if (!hv_balloon_handle_msg_size(req, sizeof(*msgBR),
1239                                      "DM_BALLOON_RESPONSE"))
1240          return;
1241  
1242      trace_hv_balloon_incoming_balloon(msgBR->hdr.trans_id, msgBR->range_count,
1243                                        msgBR->more_pages);
1244  
1245      if (vmreq->msglen < sizeof(*msgBR) +
1246          (uint64_t)sizeof(msgBR->range_array[0]) * msgBR->range_count) {
1247          warn_report("DM_BALLOON_RESPONSE too short for the range count");
1248          return;
1249      }
1250  
1251      if (msgBR->range_count == 0) {
1252          /* The guest is already at its minimum size */
1253          balloon->balloon_diff = 0;
1254          goto ret_end_trans;
1255      } else {
1256          hv_balloon_handle_remove_ranges(balloon,
1257                                          msgBR->range_array,
1258                                          msgBR->range_count);
1259      }
1260  
1261      /* More responses expected? */
1262      if (msgBR->more_pages) {
1263          return;
1264      }
1265  
1266  ret_end_trans:
1267      balloon->trans_id++;
1268  
1269      if (balloon->balloon_diff > 0) {
1270          HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_RB_WAIT);
1271      } else {
1272          HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
1273      }
1274  }
1275  
hv_balloon_handle_packet(HvBalloon * balloon,HvBalloonReq * req,StateDesc * stdesc)1276  static void hv_balloon_handle_packet(HvBalloon *balloon, HvBalloonReq *req,
1277                                       StateDesc *stdesc)
1278  {
1279      VMBusChanReq *vmreq = &req->vmreq;
1280      struct dm_message *msg = vmreq->msg;
1281  
1282      if (vmreq->msglen < sizeof(msg->hdr)) {
1283          return;
1284      }
1285  
1286      switch (msg->hdr.type) {
1287      case DM_VERSION_REQUEST:
1288          hv_balloon_handle_version_request(balloon, req, stdesc);
1289          break;
1290  
1291      case DM_CAPABILITIES_REPORT:
1292          hv_balloon_handle_caps_report(balloon, req, stdesc);
1293          break;
1294  
1295      case DM_STATUS_REPORT:
1296          hv_balloon_handle_status_report(balloon, req);
1297          break;
1298  
1299      case DM_MEM_HOT_ADD_RESPONSE:
1300          hv_balloon_handle_hot_add_response(balloon, req, stdesc);
1301          break;
1302  
1303      case DM_UNBALLOON_RESPONSE:
1304          hv_balloon_handle_unballoon_response(balloon, req, stdesc);
1305          break;
1306  
1307      case DM_BALLOON_RESPONSE:
1308          hv_balloon_handle_balloon_response(balloon, req, stdesc);
1309          break;
1310  
1311      default:
1312          warn_report("unknown DM message %u", msg->hdr.type);
1313          break;
1314      }
1315  }
1316  
hv_balloon_recv_channel(HvBalloon * balloon,StateDesc * stdesc)1317  static bool hv_balloon_recv_channel(HvBalloon *balloon, StateDesc *stdesc)
1318  {
1319      VMBusChannel *chan;
1320      HvBalloonReq *req;
1321  
1322      if (balloon->state == S_WAIT_RESET ||
1323          balloon->state == S_POST_RESET_CLOSED) {
1324          return false;
1325      }
1326  
1327      chan = hv_balloon_get_channel(balloon);
1328      if (vmbus_channel_recv_start(chan)) {
1329          return false;
1330      }
1331  
1332      while ((req = vmbus_channel_recv_peek(chan, sizeof(*req)))) {
1333          hv_balloon_handle_packet(balloon, req, stdesc);
1334          vmbus_free_req(req);
1335          vmbus_channel_recv_pop(chan);
1336  
1337          if (stdesc->state != S_NO_CHANGE) {
1338              break;
1339          }
1340      }
1341  
1342      return vmbus_channel_recv_done(chan) > 0;
1343  }
1344  
1345  /* old state handler -> new state transition (potential) */
hv_balloon_event_loop_state(HvBalloon * balloon)1346  static bool hv_balloon_event_loop_state(HvBalloon *balloon)
1347  {
1348      StateDesc state_new = HV_BALLOON_STATE_DESC_INIT;
1349  
1350      hv_balloon_handle_state(balloon, &state_new);
1351      return hv_balloon_state_set(balloon, state_new.state, state_new.desc);
1352  }
1353  
1354  /* VMBus message -> new state transition (potential) */
hv_balloon_event_loop_recv(HvBalloon * balloon)1355  static bool hv_balloon_event_loop_recv(HvBalloon *balloon)
1356  {
1357      StateDesc state_new = HV_BALLOON_STATE_DESC_INIT;
1358      bool any_recv, state_changed;
1359  
1360      any_recv = hv_balloon_recv_channel(balloon, &state_new);
1361      state_changed = hv_balloon_state_set(balloon,
1362                                           state_new.state, state_new.desc);
1363  
1364      return state_changed || any_recv;
1365  }
1366  
hv_balloon_event_loop(HvBalloon * balloon)1367  static void hv_balloon_event_loop(HvBalloon *balloon)
1368  {
1369      bool state_repeat, recv_repeat;
1370  
1371      do {
1372          state_repeat = hv_balloon_event_loop_state(balloon);
1373          recv_repeat = hv_balloon_event_loop_recv(balloon);
1374      } while (state_repeat || recv_repeat);
1375  }
1376  
hv_balloon_vmdev_chan_notify(VMBusChannel * chan)1377  static void hv_balloon_vmdev_chan_notify(VMBusChannel *chan)
1378  {
1379      HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
1380  
1381      hv_balloon_event_loop(balloon);
1382  }
1383  
hv_balloon_stat(void * opaque,BalloonInfo * info)1384  static void hv_balloon_stat(void *opaque, BalloonInfo *info)
1385  {
1386      HvBalloon *balloon = opaque;
1387      info->actual = (hv_balloon_total_ram(balloon) - balloon->removed_both_ctr)
1388          << HV_BALLOON_PFN_SHIFT;
1389  }
1390  
hv_balloon_to_target(void * opaque,ram_addr_t target)1391  static void hv_balloon_to_target(void *opaque, ram_addr_t target)
1392  {
1393      HvBalloon *balloon = opaque;
1394      uint64_t target_pages = target >> HV_BALLOON_PFN_SHIFT;
1395  
1396      if (!target_pages) {
1397          return;
1398      }
1399  
1400      /*
1401       * always set target_changed, even with unchanged target, as the user
1402       * might be asking us to try again reaching it
1403       */
1404      balloon->target = target_pages;
1405      balloon->target_changed = true;
1406  
1407      hv_balloon_event_loop(balloon);
1408  }
1409  
hv_balloon_vmdev_open_channel(VMBusChannel * chan)1410  static int hv_balloon_vmdev_open_channel(VMBusChannel *chan)
1411  {
1412      HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
1413  
1414      if (balloon->state != S_POST_RESET_CLOSED) {
1415          warn_report("guest trying to open a DM channel in invalid %d state",
1416                      balloon->state);
1417          return -EINVAL;
1418      }
1419  
1420      HV_BALLOON_SET_STATE(balloon, S_VERSION);
1421      hv_balloon_event_loop(balloon);
1422  
1423      return 0;
1424  }
1425  
hv_balloon_vmdev_close_channel(VMBusChannel * chan)1426  static void hv_balloon_vmdev_close_channel(VMBusChannel *chan)
1427  {
1428      HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
1429  
1430      timer_del(&balloon->post_init_timer);
1431  
1432      /* Don't report stale data */
1433      balloon->status_report.received = false;
1434  
1435      HV_BALLOON_SET_STATE(balloon, S_WAIT_RESET);
1436      hv_balloon_event_loop(balloon);
1437  }
1438  
hv_balloon_post_init_timer(void * opaque)1439  static void hv_balloon_post_init_timer(void *opaque)
1440  {
1441      HvBalloon *balloon = opaque;
1442  
1443      if (balloon->state != S_POST_INIT_WAIT) {
1444          return;
1445      }
1446  
1447      HV_BALLOON_SET_STATE(balloon, S_IDLE);
1448      hv_balloon_event_loop(balloon);
1449  }
1450  
hv_balloon_system_reset_unrealize_common(HvBalloon * balloon)1451  static void hv_balloon_system_reset_unrealize_common(HvBalloon *balloon)
1452  {
1453      g_clear_pointer(&balloon->our_range, hvb_our_range_memslots_free);
1454  }
1455  
hv_balloon_system_reset(void * opaque)1456  static void hv_balloon_system_reset(void *opaque)
1457  {
1458      HvBalloon *balloon = HV_BALLOON(opaque);
1459  
1460      hv_balloon_system_reset_unrealize_common(balloon);
1461  }
1462  
hv_balloon_ensure_mr(HvBalloon * balloon)1463  static void hv_balloon_ensure_mr(HvBalloon *balloon)
1464  {
1465      MemoryRegion *hostmem_mr;
1466  
1467      assert(balloon->hostmem);
1468  
1469      if (balloon->mr) {
1470          return;
1471      }
1472  
1473      hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
1474  
1475      balloon->mr = g_new0(MemoryRegion, 1);
1476      memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON,
1477                         memory_region_size(hostmem_mr));
1478      balloon->mr->align = memory_region_get_alignment(hostmem_mr);
1479  }
1480  
hv_balloon_free_mr(HvBalloon * balloon)1481  static void hv_balloon_free_mr(HvBalloon *balloon)
1482  {
1483      if (!balloon->mr) {
1484          return;
1485      }
1486  
1487      object_unparent(OBJECT(balloon->mr));
1488      g_clear_pointer(&balloon->mr, g_free);
1489  }
1490  
hv_balloon_vmdev_realize(VMBusDevice * vdev,Error ** errp)1491  static void hv_balloon_vmdev_realize(VMBusDevice *vdev, Error **errp)
1492  {
1493      ERRP_GUARD();
1494      HvBalloon *balloon = HV_BALLOON(vdev);
1495      int ret;
1496  
1497      balloon->state = S_WAIT_RESET;
1498  
1499      ret = qemu_add_balloon_handler(hv_balloon_to_target, hv_balloon_stat,
1500                                     balloon);
1501      if (ret < 0) {
1502          /* This also protects against having multiple hv-balloon instances */
1503          error_setg(errp, "Only one balloon device is supported");
1504          return;
1505      }
1506  
1507      if (balloon->hostmem) {
1508          if (host_memory_backend_is_mapped(balloon->hostmem)) {
1509              Object *obj = OBJECT(balloon->hostmem);
1510  
1511              error_setg(errp, "'%s' property specifies a busy memdev: %s",
1512                         HV_BALLOON_MEMDEV_PROP,
1513                         object_get_canonical_path_component(obj));
1514              goto out_balloon_handler;
1515          }
1516  
1517          hv_balloon_ensure_mr(balloon);
1518  
1519          /* This is rather unlikely to happen, but let's still check for it. */
1520          if (!QEMU_IS_ALIGNED(memory_region_size(balloon->mr),
1521                               HV_BALLOON_PAGE_SIZE)) {
1522              error_setg(errp, "'%s' property memdev size has to be a multiple of 0x%" PRIx64,
1523                         HV_BALLOON_MEMDEV_PROP, (uint64_t)HV_BALLOON_PAGE_SIZE);
1524              goto out_balloon_handler;
1525          }
1526  
1527          host_memory_backend_set_mapped(balloon->hostmem, true);
1528          vmstate_register_ram(host_memory_backend_get_memory(balloon->hostmem),
1529                               DEVICE(balloon));
1530      } else if (balloon->addr) {
1531          error_setg(errp, "'%s' property must not be set without a memdev",
1532                     HV_BALLOON_MEMDEV_PROP);
1533          goto out_balloon_handler;
1534      }
1535  
1536      timer_init_ms(&balloon->post_init_timer, QEMU_CLOCK_VIRTUAL,
1537                    hv_balloon_post_init_timer, balloon);
1538  
1539      qemu_register_reset(hv_balloon_system_reset, balloon);
1540  
1541      return;
1542  
1543  out_balloon_handler:
1544      qemu_remove_balloon_handler(balloon);
1545  }
1546  
1547  /*
1548   * VMBus device reset has to be implemented in case the guest decides to
1549   * disconnect and reconnect to the VMBus without rebooting the whole system.
1550   *
1551   * However, the hot-added memory can't be removed here as Windows keeps on using
1552   * it until the system is restarted, even after disconnecting from the VMBus.
1553   */
hv_balloon_vmdev_reset(VMBusDevice * vdev)1554  static void hv_balloon_vmdev_reset(VMBusDevice *vdev)
1555  {
1556      HvBalloon *balloon = HV_BALLOON(vdev);
1557  
1558      if (balloon->state == S_POST_RESET_CLOSED) {
1559          return;
1560      }
1561  
1562      if (balloon->our_range) {
1563          hvb_our_range_clear_removed_trees(OUR_RANGE(balloon->our_range));
1564      }
1565  
1566      hvb_page_range_tree_destroy(&balloon->removed_guest);
1567      hvb_page_range_tree_destroy(&balloon->removed_both);
1568      hvb_page_range_tree_init(&balloon->removed_guest);
1569      hvb_page_range_tree_init(&balloon->removed_both);
1570  
1571      balloon->trans_id = 0;
1572      balloon->removed_guest_ctr = 0;
1573      balloon->removed_both_ctr = 0;
1574  
1575      HV_BALLOON_SET_STATE(balloon, S_POST_RESET_CLOSED);
1576      hv_balloon_event_loop(balloon);
1577  }
1578  
1579  /*
1580   * Clean up things that were (possibly) allocated pre-realization, for example
1581   * from memory_device_pre_plug(), so we don't leak them if the device don't
1582   * actually get realized in the end.
1583   */
hv_balloon_unrealize_finalize_common(HvBalloon * balloon)1584  static void hv_balloon_unrealize_finalize_common(HvBalloon *balloon)
1585  {
1586      hv_balloon_free_mr(balloon);
1587      balloon->addr = 0;
1588  
1589      balloon->memslot_count = 0;
1590  }
1591  
hv_balloon_vmdev_unrealize(VMBusDevice * vdev)1592  static void hv_balloon_vmdev_unrealize(VMBusDevice *vdev)
1593  {
1594      HvBalloon *balloon = HV_BALLOON(vdev);
1595  
1596      qemu_unregister_reset(hv_balloon_system_reset, balloon);
1597  
1598      hv_balloon_system_reset_unrealize_common(balloon);
1599  
1600      qemu_remove_balloon_handler(balloon);
1601  
1602      if (balloon->hostmem) {
1603          vmstate_unregister_ram(host_memory_backend_get_memory(balloon->hostmem),
1604                                 DEVICE(balloon));
1605          host_memory_backend_set_mapped(balloon->hostmem, false);
1606      }
1607  
1608      hvb_page_range_tree_destroy(&balloon->removed_guest);
1609      hvb_page_range_tree_destroy(&balloon->removed_both);
1610  
1611      hv_balloon_unrealize_finalize_common(balloon);
1612  }
1613  
hv_balloon_md_get_addr(const MemoryDeviceState * md)1614  static uint64_t hv_balloon_md_get_addr(const MemoryDeviceState *md)
1615  {
1616      return object_property_get_uint(OBJECT(md), HV_BALLOON_ADDR_PROP,
1617                                      &error_abort);
1618  }
1619  
hv_balloon_md_set_addr(MemoryDeviceState * md,uint64_t addr,Error ** errp)1620  static void hv_balloon_md_set_addr(MemoryDeviceState *md, uint64_t addr,
1621                                     Error **errp)
1622  {
1623      object_property_set_uint(OBJECT(md), HV_BALLOON_ADDR_PROP, addr, errp);
1624  }
1625  
hv_balloon_md_get_memory_region(MemoryDeviceState * md,Error ** errp)1626  static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md,
1627                                                       Error **errp)
1628  {
1629      HvBalloon *balloon = HV_BALLOON(md);
1630  
1631      if (!balloon->hostmem) {
1632          return NULL;
1633      }
1634  
1635      hv_balloon_ensure_mr(balloon);
1636  
1637      return balloon->mr;
1638  }
1639  
hv_balloon_md_get_min_alignment(const MemoryDeviceState * md)1640  static uint64_t hv_balloon_md_get_min_alignment(const MemoryDeviceState *md)
1641  {
1642      /*
1643       * The VM can indicate an alignment up to 32 GiB. Memory device core can
1644       * usually only handle/guarantee 1 GiB alignment. The user will have to
1645       * specify a larger maxmem eventually.
1646       *
1647       * The memory device core will warn the user in case maxmem might have to be
1648       * increased and will fail plugging the device if there is not sufficient
1649       * space after alignment.
1650       *
1651       * TODO: we could do the alignment ourselves in a slightly bigger region.
1652       * But this feels better, although the warning might be annoying. Maybe
1653       * we can optimize that in the future (e.g., with such a device on the
1654       * cmdline place/size the device memory region differently.
1655       */
1656      return 32 * GiB;
1657  }
1658  
hv_balloon_md_fill_device_info(const MemoryDeviceState * md,MemoryDeviceInfo * info)1659  static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md,
1660                                             MemoryDeviceInfo *info)
1661  {
1662      HvBalloonDeviceInfo *hi = g_new0(HvBalloonDeviceInfo, 1);
1663      const HvBalloon *balloon = HV_BALLOON(md);
1664      DeviceState *dev = DEVICE(md);
1665  
1666      if (dev->id) {
1667          hi->id = g_strdup(dev->id);
1668      }
1669  
1670      if (balloon->hostmem) {
1671          hi->memdev = object_get_canonical_path(OBJECT(balloon->hostmem));
1672          hi->memaddr = balloon->addr;
1673          hi->has_memaddr = true;
1674          hi->max_size = memory_region_size(balloon->mr);
1675          /* TODO: expose current provided size or something else? */
1676      } else {
1677          hi->max_size = 0;
1678      }
1679  
1680      info->u.hv_balloon.data = hi;
1681      info->type = MEMORY_DEVICE_INFO_KIND_HV_BALLOON;
1682  }
1683  
hv_balloon_decide_memslots(MemoryDeviceState * md,unsigned int limit)1684  static void hv_balloon_decide_memslots(MemoryDeviceState *md,
1685                                         unsigned int limit)
1686  {
1687      HvBalloon *balloon = HV_BALLOON(md);
1688      MemoryRegion *hostmem_mr;
1689      uint64_t region_size, memslot_size, memslots;
1690  
1691      /* We're called exactly once, before realizing the device. */
1692      assert(!balloon->memslot_count);
1693  
1694      /* We should not be called if we don't have a memory backend */
1695      assert(balloon->hostmem);
1696  
1697      hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
1698      region_size = memory_region_size(hostmem_mr);
1699  
1700      assert(region_size > 0);
1701      memslot_size = QEMU_ALIGN_UP(region_size / limit,
1702                                   HV_BALLOON_HA_MEMSLOT_SIZE_ALIGN);
1703      memslots = QEMU_ALIGN_UP(region_size, memslot_size) / memslot_size;
1704  
1705      if (memslots > 1) {
1706          balloon->memslot_size = memslot_size;
1707      } else {
1708          balloon->memslot_size = region_size;
1709      }
1710  
1711      assert(memslots <= UINT_MAX);
1712      balloon->memslot_count = memslots;
1713  }
1714  
hv_balloon_get_memslots(MemoryDeviceState * md)1715  static unsigned int hv_balloon_get_memslots(MemoryDeviceState *md)
1716  {
1717      const HvBalloon *balloon = HV_BALLOON(md);
1718  
1719      /* We're called after setting the suggested limit. */
1720      assert(balloon->memslot_count > 0);
1721  
1722      return balloon->memslot_count;
1723  }
1724  
hv_balloon_init(Object * obj)1725  static void hv_balloon_init(Object *obj)
1726  {
1727  }
1728  
hv_balloon_finalize(Object * obj)1729  static void hv_balloon_finalize(Object *obj)
1730  {
1731      HvBalloon *balloon = HV_BALLOON(obj);
1732  
1733      hv_balloon_unrealize_finalize_common(balloon);
1734  }
1735  
1736  static Property hv_balloon_properties[] = {
1737      DEFINE_PROP_BOOL("status-report", HvBalloon,
1738                       status_report.enabled, false),
1739  
1740      /* MEMORY_DEVICE props */
1741      DEFINE_PROP_LINK(HV_BALLOON_MEMDEV_PROP, HvBalloon, hostmem,
1742                       TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1743      DEFINE_PROP_UINT64(HV_BALLOON_ADDR_PROP, HvBalloon, addr, 0),
1744  
1745      DEFINE_PROP_END_OF_LIST(),
1746  };
1747  
hv_balloon_class_init(ObjectClass * klass,void * data)1748  static void hv_balloon_class_init(ObjectClass *klass, void *data)
1749  {
1750      DeviceClass *dc = DEVICE_CLASS(klass);
1751      VMBusDeviceClass *vdc = VMBUS_DEVICE_CLASS(klass);
1752      MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass);
1753  
1754      device_class_set_props(dc, hv_balloon_properties);
1755      qemu_uuid_parse(HV_BALLOON_GUID, &vdc->classid);
1756      set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1757  
1758      vdc->vmdev_realize = hv_balloon_vmdev_realize;
1759      vdc->vmdev_unrealize = hv_balloon_vmdev_unrealize;
1760      vdc->vmdev_reset = hv_balloon_vmdev_reset;
1761      vdc->open_channel = hv_balloon_vmdev_open_channel;
1762      vdc->close_channel = hv_balloon_vmdev_close_channel;
1763      vdc->chan_notify_cb = hv_balloon_vmdev_chan_notify;
1764  
1765      mdc->get_addr = hv_balloon_md_get_addr;
1766      mdc->set_addr = hv_balloon_md_set_addr;
1767      mdc->get_plugged_size = memory_device_get_region_size;
1768      mdc->get_memory_region = hv_balloon_md_get_memory_region;
1769      mdc->decide_memslots = hv_balloon_decide_memslots;
1770      mdc->get_memslots = hv_balloon_get_memslots;
1771      mdc->get_min_alignment = hv_balloon_md_get_min_alignment;
1772      mdc->fill_device_info = hv_balloon_md_fill_device_info;
1773  }
1774