xref: /openbmc/qemu/hw/virtio/virtio-mem.c (revision 0fbb5d2d)
1 /*
2  * Virtio MEM device
3  *
4  * Copyright (C) 2020 Red Hat, Inc.
5  *
6  * Authors:
7  *  David Hildenbrand <david@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "qemu-common.h"
15 #include "qemu/iov.h"
16 #include "qemu/cutils.h"
17 #include "qemu/error-report.h"
18 #include "qemu/units.h"
19 #include "sysemu/numa.h"
20 #include "sysemu/sysemu.h"
21 #include "sysemu/reset.h"
22 #include "hw/virtio/virtio.h"
23 #include "hw/virtio/virtio-bus.h"
24 #include "hw/virtio/virtio-access.h"
25 #include "hw/virtio/virtio-mem.h"
26 #include "qapi/error.h"
27 #include "qapi/visitor.h"
28 #include "exec/ram_addr.h"
29 #include "migration/misc.h"
30 #include "hw/boards.h"
31 #include "hw/qdev-properties.h"
32 #include CONFIG_DEVICES
33 #include "trace.h"
34 
35 /*
36  * We only had legacy x86 guests that did not support
37  * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests.
38  */
39 #if defined(TARGET_X86_64) || defined(TARGET_I386)
40 #define VIRTIO_MEM_HAS_LEGACY_GUESTS
41 #endif
42 
43 /*
44  * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking
45  * bitmap small.
46  */
47 #define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
48 
49 #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \
50     defined(__powerpc64__)
51 #define VIRTIO_MEM_DEFAULT_THP_SIZE ((uint32_t)(2 * MiB))
52 #else
53         /* fallback to 1 MiB (e.g., the THP size on s390x) */
54 #define VIRTIO_MEM_DEFAULT_THP_SIZE VIRTIO_MEM_MIN_BLOCK_SIZE
55 #endif
56 
57 /*
58  * We want to have a reasonable default block size such that
59  * 1. We avoid splitting THPs when unplugging memory, which degrades
60  *    performance.
61  * 2. We avoid placing THPs for plugged blocks that also cover unplugged
62  *    blocks.
63  *
64  * The actual THP size might differ between Linux kernels, so we try to probe
65  * it. In the future (if we ever run into issues regarding 2.), we might want
66  * to disable THP in case we fail to properly probe the THP size, or if the
67  * block size is configured smaller than the THP size.
68  */
69 static uint32_t thp_size;
70 
71 #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
72 static uint32_t virtio_mem_thp_size(void)
73 {
74     gchar *content = NULL;
75     const char *endptr;
76     uint64_t tmp;
77 
78     if (thp_size) {
79         return thp_size;
80     }
81 
82     /*
83      * Try to probe the actual THP size, fallback to (sane but eventually
84      * incorrect) default sizes.
85      */
86     if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
87         !qemu_strtou64(content, &endptr, 0, &tmp) &&
88         (!endptr || *endptr == '\n')) {
89         /*
90          * Sanity-check the value, if it's too big (e.g., aarch64 with 64k base
91          * pages) or weird, fallback to something smaller.
92          */
93         if (!tmp || !is_power_of_2(tmp) || tmp > 16 * MiB) {
94             warn_report("Read unsupported THP size: %" PRIx64, tmp);
95         } else {
96             thp_size = tmp;
97         }
98     }
99 
100     if (!thp_size) {
101         thp_size = VIRTIO_MEM_DEFAULT_THP_SIZE;
102         warn_report("Could not detect THP size, falling back to %" PRIx64
103                     "  MiB.", thp_size / MiB);
104     }
105 
106     g_free(content);
107     return thp_size;
108 }
109 
110 static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
111 {
112     const uint64_t page_size = qemu_ram_pagesize(rb);
113 
114     /* We can have hugetlbfs with a page size smaller than the THP size. */
115     if (page_size == qemu_real_host_page_size) {
116         return MAX(page_size, virtio_mem_thp_size());
117     }
118     return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);
119 }
120 
121 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
122 static bool virtio_mem_has_shared_zeropage(RAMBlock *rb)
123 {
124     /*
125      * We only have a guaranteed shared zeropage on ordinary MAP_PRIVATE
126      * anonymous RAM. In any other case, reading unplugged *can* populate a
127      * fresh page, consuming actual memory.
128      */
129     return !qemu_ram_is_shared(rb) && rb->fd < 0 &&
130            qemu_ram_pagesize(rb) == qemu_real_host_page_size;
131 }
132 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
133 
134 /*
135  * Size the usable region bigger than the requested size if possible. Esp.
136  * Linux guests will only add (aligned) memory blocks in case they fully
137  * fit into the usable region, but plug+online only a subset of the pages.
138  * The memory block size corresponds mostly to the section size.
139  *
140  * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and
141  * a section size of 1GB on arm64 (as long as the start address is properly
142  * aligned, similar to ordinary DIMMs).
143  *
144  * We can change this at any time and maybe even make it configurable if
145  * necessary (as the section size can change). But it's more likely that the
146  * section size will rather get smaller and not bigger over time.
147  */
148 #if defined(TARGET_X86_64) || defined(TARGET_I386)
149 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
150 #else
151 #error VIRTIO_MEM_USABLE_EXTENT not defined
152 #endif
153 
154 static bool virtio_mem_is_busy(void)
155 {
156     /*
157      * Postcopy cannot handle concurrent discards and we don't want to migrate
158      * pages on-demand with stale content when plugging new blocks.
159      *
160      * For precopy, we don't want unplugged blocks in our migration stream, and
161      * when plugging new blocks, the page content might differ between source
162      * and destination (observable by the guest when not initializing pages
163      * after plugging them) until we're running on the destination (as we didn't
164      * migrate these blocks when they were unplugged).
165      */
166     return migration_in_incoming_postcopy() || !migration_is_idle();
167 }
168 
169 typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
170                                    uint64_t offset, uint64_t size);
171 
172 static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
173                                                virtio_mem_range_cb cb)
174 {
175     unsigned long first_zero_bit, last_zero_bit;
176     uint64_t offset, size;
177     int ret = 0;
178 
179     first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
180     while (first_zero_bit < vmem->bitmap_size) {
181         offset = first_zero_bit * vmem->block_size;
182         last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
183                                       first_zero_bit + 1) - 1;
184         size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
185 
186         ret = cb(vmem, arg, offset, size);
187         if (ret) {
188             break;
189         }
190         first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
191                                             last_zero_bit + 2);
192     }
193     return ret;
194 }
195 
196 /*
197  * Adjust the memory section to cover the intersection with the given range.
198  *
199  * Returns false if the intersection is empty, otherwise returns true.
200  */
201 static bool virito_mem_intersect_memory_section(MemoryRegionSection *s,
202                                                 uint64_t offset, uint64_t size)
203 {
204     uint64_t start = MAX(s->offset_within_region, offset);
205     uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
206                        offset + size);
207 
208     if (end <= start) {
209         return false;
210     }
211 
212     s->offset_within_address_space += start - s->offset_within_region;
213     s->offset_within_region = start;
214     s->size = int128_make64(end - start);
215     return true;
216 }
217 
218 typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
219 
220 static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
221                                                MemoryRegionSection *s,
222                                                void *arg,
223                                                virtio_mem_section_cb cb)
224 {
225     unsigned long first_bit, last_bit;
226     uint64_t offset, size;
227     int ret = 0;
228 
229     first_bit = s->offset_within_region / vmem->bitmap_size;
230     first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
231     while (first_bit < vmem->bitmap_size) {
232         MemoryRegionSection tmp = *s;
233 
234         offset = first_bit * vmem->block_size;
235         last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
236                                       first_bit + 1) - 1;
237         size = (last_bit - first_bit + 1) * vmem->block_size;
238 
239         if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
240             break;
241         }
242         ret = cb(&tmp, arg);
243         if (ret) {
244             break;
245         }
246         first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
247                                   last_bit + 2);
248     }
249     return ret;
250 }
251 
252 static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
253                                                  MemoryRegionSection *s,
254                                                  void *arg,
255                                                  virtio_mem_section_cb cb)
256 {
257     unsigned long first_bit, last_bit;
258     uint64_t offset, size;
259     int ret = 0;
260 
261     first_bit = s->offset_within_region / vmem->bitmap_size;
262     first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
263     while (first_bit < vmem->bitmap_size) {
264         MemoryRegionSection tmp = *s;
265 
266         offset = first_bit * vmem->block_size;
267         last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
268                                  first_bit + 1) - 1;
269         size = (last_bit - first_bit + 1) * vmem->block_size;
270 
271         if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
272             break;
273         }
274         ret = cb(&tmp, arg);
275         if (ret) {
276             break;
277         }
278         first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
279                                        last_bit + 2);
280     }
281     return ret;
282 }
283 
284 static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
285 {
286     RamDiscardListener *rdl = arg;
287 
288     return rdl->notify_populate(rdl, s);
289 }
290 
291 static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
292 {
293     RamDiscardListener *rdl = arg;
294 
295     rdl->notify_discard(rdl, s);
296     return 0;
297 }
298 
299 static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
300                                      uint64_t size)
301 {
302     RamDiscardListener *rdl;
303 
304     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
305         MemoryRegionSection tmp = *rdl->section;
306 
307         if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
308             continue;
309         }
310         rdl->notify_discard(rdl, &tmp);
311     }
312 }
313 
314 static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
315                                   uint64_t size)
316 {
317     RamDiscardListener *rdl, *rdl2;
318     int ret = 0;
319 
320     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
321         MemoryRegionSection tmp = *rdl->section;
322 
323         if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
324             continue;
325         }
326         ret = rdl->notify_populate(rdl, &tmp);
327         if (ret) {
328             break;
329         }
330     }
331 
332     if (ret) {
333         /* Notify all already-notified listeners. */
334         QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
335             MemoryRegionSection tmp = *rdl->section;
336 
337             if (rdl2 == rdl) {
338                 break;
339             }
340             if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
341                 continue;
342             }
343             rdl2->notify_discard(rdl2, &tmp);
344         }
345     }
346     return ret;
347 }
348 
349 static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
350 {
351     RamDiscardListener *rdl;
352 
353     if (!vmem->size) {
354         return;
355     }
356 
357     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
358         if (rdl->double_discard_supported) {
359             rdl->notify_discard(rdl, rdl->section);
360         } else {
361             virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
362                                                 virtio_mem_notify_discard_cb);
363         }
364     }
365 }
366 
367 static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa,
368                                    uint64_t size, bool plugged)
369 {
370     const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
371     const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1;
372     unsigned long found_bit;
373 
374     /* We fake a shorter bitmap to avoid searching too far. */
375     if (plugged) {
376         found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit);
377     } else {
378         found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit);
379     }
380     return found_bit > last_bit;
381 }
382 
383 static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa,
384                                   uint64_t size, bool plugged)
385 {
386     const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size;
387     const unsigned long nbits = size / vmem->block_size;
388 
389     if (plugged) {
390         bitmap_set(vmem->bitmap, bit, nbits);
391     } else {
392         bitmap_clear(vmem->bitmap, bit, nbits);
393     }
394 }
395 
396 static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem,
397                                      struct virtio_mem_resp *resp)
398 {
399     VirtIODevice *vdev = VIRTIO_DEVICE(vmem);
400     VirtQueue *vq = vmem->vq;
401 
402     trace_virtio_mem_send_response(le16_to_cpu(resp->type));
403     iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp));
404 
405     virtqueue_push(vq, elem, sizeof(*resp));
406     virtio_notify(vdev, vq);
407 }
408 
409 static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
410                                             VirtQueueElement *elem,
411                                             uint16_t type)
412 {
413     struct virtio_mem_resp resp = {
414         .type = cpu_to_le16(type),
415     };
416 
417     virtio_mem_send_response(vmem, elem, &resp);
418 }
419 
420 static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
421                                    uint64_t size)
422 {
423     if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
424         return false;
425     }
426     if (gpa + size < gpa || !size) {
427         return false;
428     }
429     if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) {
430         return false;
431     }
432     if (gpa + size > vmem->addr + vmem->usable_region_size) {
433         return false;
434     }
435     return true;
436 }
437 
438 static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
439                                       uint64_t size, bool plug)
440 {
441     const uint64_t offset = start_gpa - vmem->addr;
442     RAMBlock *rb = vmem->memdev->mr.ram_block;
443 
444     if (virtio_mem_is_busy()) {
445         return -EBUSY;
446     }
447 
448     if (!plug) {
449         if (ram_block_discard_range(rb, offset, size)) {
450             return -EBUSY;
451         }
452         virtio_mem_notify_unplug(vmem, offset, size);
453     } else {
454         int ret = 0;
455 
456         if (vmem->prealloc) {
457             void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
458             int fd = memory_region_get_fd(&vmem->memdev->mr);
459             Error *local_err = NULL;
460 
461             os_mem_prealloc(fd, area, size, 1, &local_err);
462             if (local_err) {
463                 static bool warned;
464 
465                 /*
466                  * Warn only once, we don't want to fill the log with these
467                  * warnings.
468                  */
469                 if (!warned) {
470                     warn_report_err(local_err);
471                     warned = true;
472                 } else {
473                     error_free(local_err);
474                 }
475                 ret = -EBUSY;
476             }
477         }
478         if (!ret) {
479             ret = virtio_mem_notify_plug(vmem, offset, size);
480         }
481 
482         if (ret) {
483             /* Could be preallocation or a notifier populated memory. */
484             ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
485             return -EBUSY;
486         }
487     }
488     virtio_mem_set_bitmap(vmem, start_gpa, size, plug);
489     return 0;
490 }
491 
492 static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa,
493                                            uint16_t nb_blocks, bool plug)
494 {
495     const uint64_t size = nb_blocks * vmem->block_size;
496     int ret;
497 
498     if (!virtio_mem_valid_range(vmem, gpa, size)) {
499         return VIRTIO_MEM_RESP_ERROR;
500     }
501 
502     if (plug && (vmem->size + size > vmem->requested_size)) {
503         return VIRTIO_MEM_RESP_NACK;
504     }
505 
506     /* test if really all blocks are in the opposite state */
507     if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) {
508         return VIRTIO_MEM_RESP_ERROR;
509     }
510 
511     ret = virtio_mem_set_block_state(vmem, gpa, size, plug);
512     if (ret) {
513         return VIRTIO_MEM_RESP_BUSY;
514     }
515     if (plug) {
516         vmem->size += size;
517     } else {
518         vmem->size -= size;
519     }
520     notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
521     return VIRTIO_MEM_RESP_ACK;
522 }
523 
524 static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
525                                     struct virtio_mem_req *req)
526 {
527     const uint64_t gpa = le64_to_cpu(req->u.plug.addr);
528     const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks);
529     uint16_t type;
530 
531     trace_virtio_mem_plug_request(gpa, nb_blocks);
532     type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true);
533     virtio_mem_send_response_simple(vmem, elem, type);
534 }
535 
536 static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
537                                       struct virtio_mem_req *req)
538 {
539     const uint64_t gpa = le64_to_cpu(req->u.unplug.addr);
540     const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks);
541     uint16_t type;
542 
543     trace_virtio_mem_unplug_request(gpa, nb_blocks);
544     type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false);
545     virtio_mem_send_response_simple(vmem, elem, type);
546 }
547 
548 static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
549                                             uint64_t requested_size,
550                                             bool can_shrink)
551 {
552     uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr),
553                            requested_size + VIRTIO_MEM_USABLE_EXTENT);
554 
555     /* The usable region size always has to be multiples of the block size. */
556     newsize = QEMU_ALIGN_UP(newsize, vmem->block_size);
557 
558     if (!requested_size) {
559         newsize = 0;
560     }
561 
562     if (newsize < vmem->usable_region_size && !can_shrink) {
563         return;
564     }
565 
566     trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize);
567     vmem->usable_region_size = newsize;
568 }
569 
570 static int virtio_mem_unplug_all(VirtIOMEM *vmem)
571 {
572     RAMBlock *rb = vmem->memdev->mr.ram_block;
573 
574     if (virtio_mem_is_busy()) {
575         return -EBUSY;
576     }
577 
578     if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
579         return -EBUSY;
580     }
581     virtio_mem_notify_unplug_all(vmem);
582 
583     bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
584     if (vmem->size) {
585         vmem->size = 0;
586         notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
587     }
588     trace_virtio_mem_unplugged_all();
589     virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
590     return 0;
591 }
592 
593 static void virtio_mem_unplug_all_request(VirtIOMEM *vmem,
594                                           VirtQueueElement *elem)
595 {
596     trace_virtio_mem_unplug_all_request();
597     if (virtio_mem_unplug_all(vmem)) {
598         virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY);
599     } else {
600         virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK);
601     }
602 }
603 
604 static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem,
605                                      struct virtio_mem_req *req)
606 {
607     const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks);
608     const uint64_t gpa = le64_to_cpu(req->u.state.addr);
609     const uint64_t size = nb_blocks * vmem->block_size;
610     struct virtio_mem_resp resp = {
611         .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK),
612     };
613 
614     trace_virtio_mem_state_request(gpa, nb_blocks);
615     if (!virtio_mem_valid_range(vmem, gpa, size)) {
616         virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR);
617         return;
618     }
619 
620     if (virtio_mem_test_bitmap(vmem, gpa, size, true)) {
621         resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED);
622     } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) {
623         resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED);
624     } else {
625         resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED);
626     }
627     trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state));
628     virtio_mem_send_response(vmem, elem, &resp);
629 }
630 
631 static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq)
632 {
633     const int len = sizeof(struct virtio_mem_req);
634     VirtIOMEM *vmem = VIRTIO_MEM(vdev);
635     VirtQueueElement *elem;
636     struct virtio_mem_req req;
637     uint16_t type;
638 
639     while (true) {
640         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
641         if (!elem) {
642             return;
643         }
644 
645         if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) {
646             virtio_error(vdev, "virtio-mem protocol violation: invalid request"
647                          " size: %d", len);
648             virtqueue_detach_element(vq, elem, 0);
649             g_free(elem);
650             return;
651         }
652 
653         if (iov_size(elem->in_sg, elem->in_num) <
654             sizeof(struct virtio_mem_resp)) {
655             virtio_error(vdev, "virtio-mem protocol violation: not enough space"
656                          " for response: %zu",
657                          iov_size(elem->in_sg, elem->in_num));
658             virtqueue_detach_element(vq, elem, 0);
659             g_free(elem);
660             return;
661         }
662 
663         type = le16_to_cpu(req.type);
664         switch (type) {
665         case VIRTIO_MEM_REQ_PLUG:
666             virtio_mem_plug_request(vmem, elem, &req);
667             break;
668         case VIRTIO_MEM_REQ_UNPLUG:
669             virtio_mem_unplug_request(vmem, elem, &req);
670             break;
671         case VIRTIO_MEM_REQ_UNPLUG_ALL:
672             virtio_mem_unplug_all_request(vmem, elem);
673             break;
674         case VIRTIO_MEM_REQ_STATE:
675             virtio_mem_state_request(vmem, elem, &req);
676             break;
677         default:
678             virtio_error(vdev, "virtio-mem protocol violation: unknown request"
679                          " type: %d", type);
680             virtqueue_detach_element(vq, elem, 0);
681             g_free(elem);
682             return;
683         }
684 
685         g_free(elem);
686     }
687 }
688 
689 static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data)
690 {
691     VirtIOMEM *vmem = VIRTIO_MEM(vdev);
692     struct virtio_mem_config *config = (void *) config_data;
693 
694     config->block_size = cpu_to_le64(vmem->block_size);
695     config->node_id = cpu_to_le16(vmem->node);
696     config->requested_size = cpu_to_le64(vmem->requested_size);
697     config->plugged_size = cpu_to_le64(vmem->size);
698     config->addr = cpu_to_le64(vmem->addr);
699     config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr));
700     config->usable_region_size = cpu_to_le64(vmem->usable_region_size);
701 }
702 
703 static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features,
704                                         Error **errp)
705 {
706     MachineState *ms = MACHINE(qdev_get_machine());
707     VirtIOMEM *vmem = VIRTIO_MEM(vdev);
708 
709     if (ms->numa_state) {
710 #if defined(CONFIG_ACPI)
711         virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM);
712 #endif
713     }
714     assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO);
715     if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) {
716         virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE);
717     }
718     return features;
719 }
720 
721 static int virtio_mem_validate_features(VirtIODevice *vdev)
722 {
723     if (virtio_host_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE) &&
724         !virtio_vdev_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE)) {
725         return -EFAULT;
726     }
727     return 0;
728 }
729 
730 static void virtio_mem_system_reset(void *opaque)
731 {
732     VirtIOMEM *vmem = VIRTIO_MEM(opaque);
733 
734     /*
735      * During usual resets, we will unplug all memory and shrink the usable
736      * region size. This is, however, not possible in all scenarios. Then,
737      * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL).
738      */
739     virtio_mem_unplug_all(vmem);
740 }
741 
742 static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
743 {
744     MachineState *ms = MACHINE(qdev_get_machine());
745     int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0;
746     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
747     VirtIOMEM *vmem = VIRTIO_MEM(dev);
748     uint64_t page_size;
749     RAMBlock *rb;
750     int ret;
751 
752     if (!vmem->memdev) {
753         error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP);
754         return;
755     } else if (host_memory_backend_is_mapped(vmem->memdev)) {
756         error_setg(errp, "'%s' property specifies a busy memdev: %s",
757                    VIRTIO_MEM_MEMDEV_PROP,
758                    object_get_canonical_path_component(OBJECT(vmem->memdev)));
759         return;
760     } else if (!memory_region_is_ram(&vmem->memdev->mr) ||
761         memory_region_is_rom(&vmem->memdev->mr) ||
762         !vmem->memdev->mr.ram_block) {
763         error_setg(errp, "'%s' property specifies an unsupported memdev",
764                    VIRTIO_MEM_MEMDEV_PROP);
765         return;
766     }
767 
768     if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) ||
769         (!nb_numa_nodes && vmem->node)) {
770         error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds"
771                    "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP,
772                    vmem->node, nb_numa_nodes ? nb_numa_nodes : 1);
773         return;
774     }
775 
776     if (enable_mlock) {
777         error_setg(errp, "Incompatible with mlock");
778         return;
779     }
780 
781     rb = vmem->memdev->mr.ram_block;
782     page_size = qemu_ram_pagesize(rb);
783 
784 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
785     switch (vmem->unplugged_inaccessible) {
786     case ON_OFF_AUTO_AUTO:
787         if (virtio_mem_has_shared_zeropage(rb)) {
788             vmem->unplugged_inaccessible = ON_OFF_AUTO_OFF;
789         } else {
790             vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
791         }
792         break;
793     case ON_OFF_AUTO_OFF:
794         if (!virtio_mem_has_shared_zeropage(rb)) {
795             warn_report("'%s' property set to 'off' with a memdev that does"
796                         " not support the shared zeropage.",
797                         VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);
798         }
799         break;
800     default:
801         break;
802     }
803 #else /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
804     vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
805 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
806 
807     /*
808      * If the block size wasn't configured by the user, use a sane default. This
809      * allows using hugetlbfs backends of any page size without manual
810      * intervention.
811      */
812     if (!vmem->block_size) {
813         vmem->block_size = virtio_mem_default_block_size(rb);
814     }
815 
816     if (vmem->block_size < page_size) {
817         error_setg(errp, "'%s' property has to be at least the page size (0x%"
818                    PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size);
819         return;
820     } else if (vmem->block_size < virtio_mem_default_block_size(rb)) {
821         warn_report("'%s' property is smaller than the default block size (%"
822                     PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP,
823                     virtio_mem_default_block_size(rb) / MiB);
824     }
825     if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
826         error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
827                    ")", VIRTIO_MEM_REQUESTED_SIZE_PROP,
828                    VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
829         return;
830     } else if (!QEMU_IS_ALIGNED(vmem->addr, vmem->block_size)) {
831         error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
832                    ")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP,
833                    vmem->block_size);
834         return;
835     } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr),
836                                 vmem->block_size)) {
837         error_setg(errp, "'%s' property memdev size has to be multiples of"
838                    "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP,
839                    VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
840         return;
841     }
842 
843     if (ram_block_coordinated_discard_require(true)) {
844         error_setg(errp, "Discarding RAM is disabled");
845         return;
846     }
847 
848     ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
849     if (ret) {
850         error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
851         ram_block_coordinated_discard_require(false);
852         return;
853     }
854 
855     virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
856 
857     vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) /
858                         vmem->block_size;
859     vmem->bitmap = bitmap_new(vmem->bitmap_size);
860 
861     virtio_init(vdev, TYPE_VIRTIO_MEM, VIRTIO_ID_MEM,
862                 sizeof(struct virtio_mem_config));
863     vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
864 
865     host_memory_backend_set_mapped(vmem->memdev, true);
866     vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
867     qemu_register_reset(virtio_mem_system_reset, vmem);
868 
869     /*
870      * Set ourselves as RamDiscardManager before the plug handler maps the
871      * memory region and exposes it via an address space.
872      */
873     memory_region_set_ram_discard_manager(&vmem->memdev->mr,
874                                           RAM_DISCARD_MANAGER(vmem));
875 }
876 
877 static void virtio_mem_device_unrealize(DeviceState *dev)
878 {
879     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
880     VirtIOMEM *vmem = VIRTIO_MEM(dev);
881 
882     /*
883      * The unplug handler unmapped the memory region, it cannot be
884      * found via an address space anymore. Unset ourselves.
885      */
886     memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
887     qemu_unregister_reset(virtio_mem_system_reset, vmem);
888     vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
889     host_memory_backend_set_mapped(vmem->memdev, false);
890     virtio_del_queue(vdev, 0);
891     virtio_cleanup(vdev);
892     g_free(vmem->bitmap);
893     ram_block_coordinated_discard_require(false);
894 }
895 
896 static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
897                                        uint64_t offset, uint64_t size)
898 {
899     RAMBlock *rb = vmem->memdev->mr.ram_block;
900 
901     return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
902 }
903 
904 static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
905 {
906     /* Make sure all memory is really discarded after migration. */
907     return virtio_mem_for_each_unplugged_range(vmem, NULL,
908                                                virtio_mem_discard_range_cb);
909 }
910 
911 static int virtio_mem_post_load(void *opaque, int version_id)
912 {
913     VirtIOMEM *vmem = VIRTIO_MEM(opaque);
914     RamDiscardListener *rdl;
915     int ret;
916 
917     /*
918      * We started out with all memory discarded and our memory region is mapped
919      * into an address space. Replay, now that we updated the bitmap.
920      */
921     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
922         ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
923                                                  virtio_mem_notify_populate_cb);
924         if (ret) {
925             return ret;
926         }
927     }
928 
929     if (migration_in_incoming_postcopy()) {
930         return 0;
931     }
932 
933     return virtio_mem_restore_unplugged(vmem);
934 }
935 
936 typedef struct VirtIOMEMMigSanityChecks {
937     VirtIOMEM *parent;
938     uint64_t addr;
939     uint64_t region_size;
940     uint64_t block_size;
941     uint32_t node;
942 } VirtIOMEMMigSanityChecks;
943 
944 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque)
945 {
946     VirtIOMEMMigSanityChecks *tmp = opaque;
947     VirtIOMEM *vmem = tmp->parent;
948 
949     tmp->addr = vmem->addr;
950     tmp->region_size = memory_region_size(&vmem->memdev->mr);
951     tmp->block_size = vmem->block_size;
952     tmp->node = vmem->node;
953     return 0;
954 }
955 
956 static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id)
957 {
958     VirtIOMEMMigSanityChecks *tmp = opaque;
959     VirtIOMEM *vmem = tmp->parent;
960     const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr);
961 
962     if (tmp->addr != vmem->addr) {
963         error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
964                      VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr);
965         return -EINVAL;
966     }
967     /*
968      * Note: Preparation for resizeable memory regions. The maximum size
969      * of the memory region must not change during migration.
970      */
971     if (tmp->region_size != new_region_size) {
972         error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%"
973                      PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size,
974                      new_region_size);
975         return -EINVAL;
976     }
977     if (tmp->block_size != vmem->block_size) {
978         error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
979                      VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size,
980                      vmem->block_size);
981         return -EINVAL;
982     }
983     if (tmp->node != vmem->node) {
984         error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32,
985                      VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node);
986         return -EINVAL;
987     }
988     return 0;
989 }
990 
991 static const VMStateDescription vmstate_virtio_mem_sanity_checks = {
992     .name = "virtio-mem-device/sanity-checks",
993     .pre_save = virtio_mem_mig_sanity_checks_pre_save,
994     .post_load = virtio_mem_mig_sanity_checks_post_load,
995     .fields = (VMStateField[]) {
996         VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks),
997         VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks),
998         VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks),
999         VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks),
1000         VMSTATE_END_OF_LIST(),
1001     },
1002 };
1003 
1004 static const VMStateDescription vmstate_virtio_mem_device = {
1005     .name = "virtio-mem-device",
1006     .minimum_version_id = 1,
1007     .version_id = 1,
1008     .priority = MIG_PRI_VIRTIO_MEM,
1009     .post_load = virtio_mem_post_load,
1010     .fields = (VMStateField[]) {
1011         VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
1012                          vmstate_virtio_mem_sanity_checks),
1013         VMSTATE_UINT64(usable_region_size, VirtIOMEM),
1014         VMSTATE_UINT64(size, VirtIOMEM),
1015         VMSTATE_UINT64(requested_size, VirtIOMEM),
1016         VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size),
1017         VMSTATE_END_OF_LIST()
1018     },
1019 };
1020 
1021 static const VMStateDescription vmstate_virtio_mem = {
1022     .name = "virtio-mem",
1023     .minimum_version_id = 1,
1024     .version_id = 1,
1025     .fields = (VMStateField[]) {
1026         VMSTATE_VIRTIO_DEVICE,
1027         VMSTATE_END_OF_LIST()
1028     },
1029 };
1030 
1031 static void virtio_mem_fill_device_info(const VirtIOMEM *vmem,
1032                                         VirtioMEMDeviceInfo *vi)
1033 {
1034     vi->memaddr = vmem->addr;
1035     vi->node = vmem->node;
1036     vi->requested_size = vmem->requested_size;
1037     vi->size = vmem->size;
1038     vi->max_size = memory_region_size(&vmem->memdev->mr);
1039     vi->block_size = vmem->block_size;
1040     vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev));
1041 }
1042 
1043 static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp)
1044 {
1045     if (!vmem->memdev) {
1046         error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP);
1047         return NULL;
1048     }
1049 
1050     return &vmem->memdev->mr;
1051 }
1052 
1053 static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,
1054                                                 Notifier *notifier)
1055 {
1056     notifier_list_add(&vmem->size_change_notifiers, notifier);
1057 }
1058 
1059 static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem,
1060                                                    Notifier *notifier)
1061 {
1062     notifier_remove(notifier);
1063 }
1064 
1065 static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name,
1066                                 void *opaque, Error **errp)
1067 {
1068     const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1069     uint64_t value = vmem->size;
1070 
1071     visit_type_size(v, name, &value, errp);
1072 }
1073 
1074 static void virtio_mem_get_requested_size(Object *obj, Visitor *v,
1075                                           const char *name, void *opaque,
1076                                           Error **errp)
1077 {
1078     const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1079     uint64_t value = vmem->requested_size;
1080 
1081     visit_type_size(v, name, &value, errp);
1082 }
1083 
1084 static void virtio_mem_set_requested_size(Object *obj, Visitor *v,
1085                                           const char *name, void *opaque,
1086                                           Error **errp)
1087 {
1088     VirtIOMEM *vmem = VIRTIO_MEM(obj);
1089     Error *err = NULL;
1090     uint64_t value;
1091 
1092     visit_type_size(v, name, &value, &err);
1093     if (err) {
1094         error_propagate(errp, err);
1095         return;
1096     }
1097 
1098     /*
1099      * The block size and memory backend are not fixed until the device was
1100      * realized. realize() will verify these properties then.
1101      */
1102     if (DEVICE(obj)->realized) {
1103         if (!QEMU_IS_ALIGNED(value, vmem->block_size)) {
1104             error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64
1105                        ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP,
1106                        vmem->block_size);
1107             return;
1108         } else if (value > memory_region_size(&vmem->memdev->mr)) {
1109             error_setg(errp, "'%s' cannot exceed the memory backend size"
1110                        "(0x%" PRIx64 ")", name,
1111                        memory_region_size(&vmem->memdev->mr));
1112             return;
1113         }
1114 
1115         if (value != vmem->requested_size) {
1116             virtio_mem_resize_usable_region(vmem, value, false);
1117             vmem->requested_size = value;
1118         }
1119         /*
1120          * Trigger a config update so the guest gets notified. We trigger
1121          * even if the size didn't change (especially helpful for debugging).
1122          */
1123         virtio_notify_config(VIRTIO_DEVICE(vmem));
1124     } else {
1125         vmem->requested_size = value;
1126     }
1127 }
1128 
1129 static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name,
1130                                       void *opaque, Error **errp)
1131 {
1132     const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1133     uint64_t value = vmem->block_size;
1134 
1135     /*
1136      * If not configured by the user (and we're not realized yet), use the
1137      * default block size we would use with the current memory backend.
1138      */
1139     if (!value) {
1140         if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) {
1141             value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block);
1142         } else {
1143             value = virtio_mem_thp_size();
1144         }
1145     }
1146 
1147     visit_type_size(v, name, &value, errp);
1148 }
1149 
1150 static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
1151                                       void *opaque, Error **errp)
1152 {
1153     VirtIOMEM *vmem = VIRTIO_MEM(obj);
1154     Error *err = NULL;
1155     uint64_t value;
1156 
1157     if (DEVICE(obj)->realized) {
1158         error_setg(errp, "'%s' cannot be changed", name);
1159         return;
1160     }
1161 
1162     visit_type_size(v, name, &value, &err);
1163     if (err) {
1164         error_propagate(errp, err);
1165         return;
1166     }
1167 
1168     if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) {
1169         error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name,
1170                    VIRTIO_MEM_MIN_BLOCK_SIZE);
1171         return;
1172     } else if (!is_power_of_2(value)) {
1173         error_setg(errp, "'%s' property has to be a power of two", name);
1174         return;
1175     }
1176     vmem->block_size = value;
1177 }
1178 
1179 static void virtio_mem_instance_init(Object *obj)
1180 {
1181     VirtIOMEM *vmem = VIRTIO_MEM(obj);
1182 
1183     notifier_list_init(&vmem->size_change_notifiers);
1184     QLIST_INIT(&vmem->rdl_list);
1185 
1186     object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
1187                         NULL, NULL, NULL);
1188     object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size",
1189                         virtio_mem_get_requested_size,
1190                         virtio_mem_set_requested_size, NULL, NULL);
1191     object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size",
1192                         virtio_mem_get_block_size, virtio_mem_set_block_size,
1193                         NULL, NULL);
1194 }
1195 
1196 static Property virtio_mem_properties[] = {
1197     DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),
1198     DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),
1199     DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false),
1200     DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev,
1201                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1202 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
1203     DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM,
1204                             unplugged_inaccessible, ON_OFF_AUTO_AUTO),
1205 #endif
1206     DEFINE_PROP_END_OF_LIST(),
1207 };
1208 
1209 static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
1210                                                    const MemoryRegion *mr)
1211 {
1212     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1213 
1214     g_assert(mr == &vmem->memdev->mr);
1215     return vmem->block_size;
1216 }
1217 
1218 static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
1219                                         const MemoryRegionSection *s)
1220 {
1221     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1222     uint64_t start_gpa = vmem->addr + s->offset_within_region;
1223     uint64_t end_gpa = start_gpa + int128_get64(s->size);
1224 
1225     g_assert(s->mr == &vmem->memdev->mr);
1226 
1227     start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
1228     end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
1229 
1230     if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
1231         return false;
1232     }
1233 
1234     return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true);
1235 }
1236 
1237 struct VirtIOMEMReplayData {
1238     void *fn;
1239     void *opaque;
1240 };
1241 
1242 static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
1243 {
1244     struct VirtIOMEMReplayData *data = arg;
1245 
1246     return ((ReplayRamPopulate)data->fn)(s, data->opaque);
1247 }
1248 
1249 static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
1250                                            MemoryRegionSection *s,
1251                                            ReplayRamPopulate replay_fn,
1252                                            void *opaque)
1253 {
1254     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1255     struct VirtIOMEMReplayData data = {
1256         .fn = replay_fn,
1257         .opaque = opaque,
1258     };
1259 
1260     g_assert(s->mr == &vmem->memdev->mr);
1261     return virtio_mem_for_each_plugged_section(vmem, s, &data,
1262                                             virtio_mem_rdm_replay_populated_cb);
1263 }
1264 
1265 static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
1266                                               void *arg)
1267 {
1268     struct VirtIOMEMReplayData *data = arg;
1269 
1270     ((ReplayRamDiscard)data->fn)(s, data->opaque);
1271     return 0;
1272 }
1273 
1274 static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
1275                                             MemoryRegionSection *s,
1276                                             ReplayRamDiscard replay_fn,
1277                                             void *opaque)
1278 {
1279     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1280     struct VirtIOMEMReplayData data = {
1281         .fn = replay_fn,
1282         .opaque = opaque,
1283     };
1284 
1285     g_assert(s->mr == &vmem->memdev->mr);
1286     virtio_mem_for_each_unplugged_section(vmem, s, &data,
1287                                           virtio_mem_rdm_replay_discarded_cb);
1288 }
1289 
1290 static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
1291                                              RamDiscardListener *rdl,
1292                                              MemoryRegionSection *s)
1293 {
1294     VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1295     int ret;
1296 
1297     g_assert(s->mr == &vmem->memdev->mr);
1298     rdl->section = memory_region_section_new_copy(s);
1299 
1300     QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
1301     ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1302                                               virtio_mem_notify_populate_cb);
1303     if (ret) {
1304         error_report("%s: Replaying plugged ranges failed: %s", __func__,
1305                      strerror(-ret));
1306     }
1307 }
1308 
1309 static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
1310                                                RamDiscardListener *rdl)
1311 {
1312     VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1313 
1314     g_assert(rdl->section->mr == &vmem->memdev->mr);
1315     if (vmem->size) {
1316         if (rdl->double_discard_supported) {
1317             rdl->notify_discard(rdl, rdl->section);
1318         } else {
1319             virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1320                                                 virtio_mem_notify_discard_cb);
1321         }
1322     }
1323 
1324     memory_region_section_free_copy(rdl->section);
1325     rdl->section = NULL;
1326     QLIST_REMOVE(rdl, next);
1327 }
1328 
1329 static void virtio_mem_class_init(ObjectClass *klass, void *data)
1330 {
1331     DeviceClass *dc = DEVICE_CLASS(klass);
1332     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1333     VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
1334     RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
1335 
1336     device_class_set_props(dc, virtio_mem_properties);
1337     dc->vmsd = &vmstate_virtio_mem;
1338 
1339     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1340     vdc->realize = virtio_mem_device_realize;
1341     vdc->unrealize = virtio_mem_device_unrealize;
1342     vdc->get_config = virtio_mem_get_config;
1343     vdc->get_features = virtio_mem_get_features;
1344     vdc->validate_features = virtio_mem_validate_features;
1345     vdc->vmsd = &vmstate_virtio_mem_device;
1346 
1347     vmc->fill_device_info = virtio_mem_fill_device_info;
1348     vmc->get_memory_region = virtio_mem_get_memory_region;
1349     vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
1350     vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
1351 
1352     rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
1353     rdmc->is_populated = virtio_mem_rdm_is_populated;
1354     rdmc->replay_populated = virtio_mem_rdm_replay_populated;
1355     rdmc->replay_discarded = virtio_mem_rdm_replay_discarded;
1356     rdmc->register_listener = virtio_mem_rdm_register_listener;
1357     rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
1358 }
1359 
1360 static const TypeInfo virtio_mem_info = {
1361     .name = TYPE_VIRTIO_MEM,
1362     .parent = TYPE_VIRTIO_DEVICE,
1363     .instance_size = sizeof(VirtIOMEM),
1364     .instance_init = virtio_mem_instance_init,
1365     .class_init = virtio_mem_class_init,
1366     .class_size = sizeof(VirtIOMEMClass),
1367     .interfaces = (InterfaceInfo[]) {
1368         { TYPE_RAM_DISCARD_MANAGER },
1369         { }
1370     },
1371 };
1372 
1373 static void virtio_register_types(void)
1374 {
1375     type_register_static(&virtio_mem_info);
1376 }
1377 
1378 type_init(virtio_register_types)
1379