xref: /openbmc/qemu/hw/virtio/virtio-mem.c (revision 6538692e)
1 /*
2  * Virtio MEM device
3  *
4  * Copyright (C) 2020 Red Hat, Inc.
5  *
6  * Authors:
7  *  David Hildenbrand <david@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "qemu-common.h"
15 #include "qemu/iov.h"
16 #include "qemu/cutils.h"
17 #include "qemu/error-report.h"
18 #include "qemu/units.h"
19 #include "sysemu/numa.h"
20 #include "sysemu/sysemu.h"
21 #include "sysemu/reset.h"
22 #include "hw/virtio/virtio.h"
23 #include "hw/virtio/virtio-bus.h"
24 #include "hw/virtio/virtio-access.h"
25 #include "hw/virtio/virtio-mem.h"
26 #include "qapi/error.h"
27 #include "qapi/visitor.h"
28 #include "exec/ram_addr.h"
29 #include "migration/misc.h"
30 #include "hw/boards.h"
31 #include "hw/qdev-properties.h"
32 #include CONFIG_DEVICES
33 #include "trace.h"
34 
35 /*
36  * We only had legacy x86 guests that did not support
37  * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests.
38  */
39 #if defined(TARGET_X86_64) || defined(TARGET_I386)
40 #define VIRTIO_MEM_HAS_LEGACY_GUESTS
41 #endif
42 
43 /*
44  * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking
45  * bitmap small.
46  */
47 #define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
48 
49 static uint32_t virtio_mem_default_thp_size(void)
50 {
51     uint32_t default_thp_size = VIRTIO_MEM_MIN_BLOCK_SIZE;
52 
53 #if defined(__x86_64__) || defined(__arm__) || defined(__powerpc64__)
54     default_thp_size = 2 * MiB;
55 #elif defined(__aarch64__)
56     if (qemu_real_host_page_size == 4 * KiB) {
57         default_thp_size = 2 * MiB;
58     } else if (qemu_real_host_page_size == 16 * KiB) {
59         default_thp_size = 32 * MiB;
60     } else if (qemu_real_host_page_size == 64 * KiB) {
61         default_thp_size = 512 * MiB;
62     }
63 #endif
64 
65     return default_thp_size;
66 }
67 
68 /*
69  * We want to have a reasonable default block size such that
70  * 1. We avoid splitting THPs when unplugging memory, which degrades
71  *    performance.
72  * 2. We avoid placing THPs for plugged blocks that also cover unplugged
73  *    blocks.
74  *
75  * The actual THP size might differ between Linux kernels, so we try to probe
76  * it. In the future (if we ever run into issues regarding 2.), we might want
77  * to disable THP in case we fail to properly probe the THP size, or if the
78  * block size is configured smaller than the THP size.
79  */
80 static uint32_t thp_size;
81 
82 #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
83 static uint32_t virtio_mem_thp_size(void)
84 {
85     gchar *content = NULL;
86     const char *endptr;
87     uint64_t tmp;
88 
89     if (thp_size) {
90         return thp_size;
91     }
92 
93     /*
94      * Try to probe the actual THP size, fallback to (sane but eventually
95      * incorrect) default sizes.
96      */
97     if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
98         !qemu_strtou64(content, &endptr, 0, &tmp) &&
99         (!endptr || *endptr == '\n')) {
100         /* Sanity-check the value and fallback to something reasonable. */
101         if (!tmp || !is_power_of_2(tmp)) {
102             warn_report("Read unsupported THP size: %" PRIx64, tmp);
103         } else {
104             thp_size = tmp;
105         }
106     }
107 
108     if (!thp_size) {
109         thp_size = virtio_mem_default_thp_size();
110         warn_report("Could not detect THP size, falling back to %" PRIx64
111                     "  MiB.", thp_size / MiB);
112     }
113 
114     g_free(content);
115     return thp_size;
116 }
117 
118 static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
119 {
120     const uint64_t page_size = qemu_ram_pagesize(rb);
121 
122     /* We can have hugetlbfs with a page size smaller than the THP size. */
123     if (page_size == qemu_real_host_page_size) {
124         return MAX(page_size, virtio_mem_thp_size());
125     }
126     return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);
127 }
128 
129 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
130 static bool virtio_mem_has_shared_zeropage(RAMBlock *rb)
131 {
132     /*
133      * We only have a guaranteed shared zeropage on ordinary MAP_PRIVATE
134      * anonymous RAM. In any other case, reading unplugged *can* populate a
135      * fresh page, consuming actual memory.
136      */
137     return !qemu_ram_is_shared(rb) && rb->fd < 0 &&
138            qemu_ram_pagesize(rb) == qemu_real_host_page_size;
139 }
140 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
141 
142 /*
143  * Size the usable region bigger than the requested size if possible. Esp.
144  * Linux guests will only add (aligned) memory blocks in case they fully
145  * fit into the usable region, but plug+online only a subset of the pages.
146  * The memory block size corresponds mostly to the section size.
147  *
148  * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and
149  * a section size of 512MB on arm64 (as long as the start address is properly
150  * aligned, similar to ordinary DIMMs).
151  *
152  * We can change this at any time and maybe even make it configurable if
153  * necessary (as the section size can change). But it's more likely that the
154  * section size will rather get smaller and not bigger over time.
155  */
156 #if defined(TARGET_X86_64) || defined(TARGET_I386)
157 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
158 #elif defined(TARGET_ARM)
159 #define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB))
160 #else
161 #error VIRTIO_MEM_USABLE_EXTENT not defined
162 #endif
163 
164 static bool virtio_mem_is_busy(void)
165 {
166     /*
167      * Postcopy cannot handle concurrent discards and we don't want to migrate
168      * pages on-demand with stale content when plugging new blocks.
169      *
170      * For precopy, we don't want unplugged blocks in our migration stream, and
171      * when plugging new blocks, the page content might differ between source
172      * and destination (observable by the guest when not initializing pages
173      * after plugging them) until we're running on the destination (as we didn't
174      * migrate these blocks when they were unplugged).
175      */
176     return migration_in_incoming_postcopy() || !migration_is_idle();
177 }
178 
179 typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
180                                    uint64_t offset, uint64_t size);
181 
182 static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
183                                                virtio_mem_range_cb cb)
184 {
185     unsigned long first_zero_bit, last_zero_bit;
186     uint64_t offset, size;
187     int ret = 0;
188 
189     first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
190     while (first_zero_bit < vmem->bitmap_size) {
191         offset = first_zero_bit * vmem->block_size;
192         last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
193                                       first_zero_bit + 1) - 1;
194         size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
195 
196         ret = cb(vmem, arg, offset, size);
197         if (ret) {
198             break;
199         }
200         first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
201                                             last_zero_bit + 2);
202     }
203     return ret;
204 }
205 
206 /*
207  * Adjust the memory section to cover the intersection with the given range.
208  *
209  * Returns false if the intersection is empty, otherwise returns true.
210  */
211 static bool virito_mem_intersect_memory_section(MemoryRegionSection *s,
212                                                 uint64_t offset, uint64_t size)
213 {
214     uint64_t start = MAX(s->offset_within_region, offset);
215     uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
216                        offset + size);
217 
218     if (end <= start) {
219         return false;
220     }
221 
222     s->offset_within_address_space += start - s->offset_within_region;
223     s->offset_within_region = start;
224     s->size = int128_make64(end - start);
225     return true;
226 }
227 
228 typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
229 
230 static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
231                                                MemoryRegionSection *s,
232                                                void *arg,
233                                                virtio_mem_section_cb cb)
234 {
235     unsigned long first_bit, last_bit;
236     uint64_t offset, size;
237     int ret = 0;
238 
239     first_bit = s->offset_within_region / vmem->bitmap_size;
240     first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
241     while (first_bit < vmem->bitmap_size) {
242         MemoryRegionSection tmp = *s;
243 
244         offset = first_bit * vmem->block_size;
245         last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
246                                       first_bit + 1) - 1;
247         size = (last_bit - first_bit + 1) * vmem->block_size;
248 
249         if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
250             break;
251         }
252         ret = cb(&tmp, arg);
253         if (ret) {
254             break;
255         }
256         first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
257                                   last_bit + 2);
258     }
259     return ret;
260 }
261 
262 static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
263                                                  MemoryRegionSection *s,
264                                                  void *arg,
265                                                  virtio_mem_section_cb cb)
266 {
267     unsigned long first_bit, last_bit;
268     uint64_t offset, size;
269     int ret = 0;
270 
271     first_bit = s->offset_within_region / vmem->bitmap_size;
272     first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
273     while (first_bit < vmem->bitmap_size) {
274         MemoryRegionSection tmp = *s;
275 
276         offset = first_bit * vmem->block_size;
277         last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
278                                  first_bit + 1) - 1;
279         size = (last_bit - first_bit + 1) * vmem->block_size;
280 
281         if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
282             break;
283         }
284         ret = cb(&tmp, arg);
285         if (ret) {
286             break;
287         }
288         first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
289                                        last_bit + 2);
290     }
291     return ret;
292 }
293 
294 static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
295 {
296     RamDiscardListener *rdl = arg;
297 
298     return rdl->notify_populate(rdl, s);
299 }
300 
301 static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
302 {
303     RamDiscardListener *rdl = arg;
304 
305     rdl->notify_discard(rdl, s);
306     return 0;
307 }
308 
309 static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
310                                      uint64_t size)
311 {
312     RamDiscardListener *rdl;
313 
314     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
315         MemoryRegionSection tmp = *rdl->section;
316 
317         if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
318             continue;
319         }
320         rdl->notify_discard(rdl, &tmp);
321     }
322 }
323 
324 static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
325                                   uint64_t size)
326 {
327     RamDiscardListener *rdl, *rdl2;
328     int ret = 0;
329 
330     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
331         MemoryRegionSection tmp = *rdl->section;
332 
333         if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
334             continue;
335         }
336         ret = rdl->notify_populate(rdl, &tmp);
337         if (ret) {
338             break;
339         }
340     }
341 
342     if (ret) {
343         /* Notify all already-notified listeners. */
344         QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
345             MemoryRegionSection tmp = *rdl->section;
346 
347             if (rdl2 == rdl) {
348                 break;
349             }
350             if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
351                 continue;
352             }
353             rdl2->notify_discard(rdl2, &tmp);
354         }
355     }
356     return ret;
357 }
358 
359 static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
360 {
361     RamDiscardListener *rdl;
362 
363     if (!vmem->size) {
364         return;
365     }
366 
367     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
368         if (rdl->double_discard_supported) {
369             rdl->notify_discard(rdl, rdl->section);
370         } else {
371             virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
372                                                 virtio_mem_notify_discard_cb);
373         }
374     }
375 }
376 
377 static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa,
378                                    uint64_t size, bool plugged)
379 {
380     const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
381     const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1;
382     unsigned long found_bit;
383 
384     /* We fake a shorter bitmap to avoid searching too far. */
385     if (plugged) {
386         found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit);
387     } else {
388         found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit);
389     }
390     return found_bit > last_bit;
391 }
392 
393 static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa,
394                                   uint64_t size, bool plugged)
395 {
396     const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size;
397     const unsigned long nbits = size / vmem->block_size;
398 
399     if (plugged) {
400         bitmap_set(vmem->bitmap, bit, nbits);
401     } else {
402         bitmap_clear(vmem->bitmap, bit, nbits);
403     }
404 }
405 
406 static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem,
407                                      struct virtio_mem_resp *resp)
408 {
409     VirtIODevice *vdev = VIRTIO_DEVICE(vmem);
410     VirtQueue *vq = vmem->vq;
411 
412     trace_virtio_mem_send_response(le16_to_cpu(resp->type));
413     iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp));
414 
415     virtqueue_push(vq, elem, sizeof(*resp));
416     virtio_notify(vdev, vq);
417 }
418 
419 static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
420                                             VirtQueueElement *elem,
421                                             uint16_t type)
422 {
423     struct virtio_mem_resp resp = {
424         .type = cpu_to_le16(type),
425     };
426 
427     virtio_mem_send_response(vmem, elem, &resp);
428 }
429 
430 static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
431                                    uint64_t size)
432 {
433     if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
434         return false;
435     }
436     if (gpa + size < gpa || !size) {
437         return false;
438     }
439     if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) {
440         return false;
441     }
442     if (gpa + size > vmem->addr + vmem->usable_region_size) {
443         return false;
444     }
445     return true;
446 }
447 
448 static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
449                                       uint64_t size, bool plug)
450 {
451     const uint64_t offset = start_gpa - vmem->addr;
452     RAMBlock *rb = vmem->memdev->mr.ram_block;
453 
454     if (virtio_mem_is_busy()) {
455         return -EBUSY;
456     }
457 
458     if (!plug) {
459         if (ram_block_discard_range(rb, offset, size)) {
460             return -EBUSY;
461         }
462         virtio_mem_notify_unplug(vmem, offset, size);
463     } else {
464         int ret = 0;
465 
466         if (vmem->prealloc) {
467             void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
468             int fd = memory_region_get_fd(&vmem->memdev->mr);
469             Error *local_err = NULL;
470 
471             os_mem_prealloc(fd, area, size, 1, &local_err);
472             if (local_err) {
473                 static bool warned;
474 
475                 /*
476                  * Warn only once, we don't want to fill the log with these
477                  * warnings.
478                  */
479                 if (!warned) {
480                     warn_report_err(local_err);
481                     warned = true;
482                 } else {
483                     error_free(local_err);
484                 }
485                 ret = -EBUSY;
486             }
487         }
488         if (!ret) {
489             ret = virtio_mem_notify_plug(vmem, offset, size);
490         }
491 
492         if (ret) {
493             /* Could be preallocation or a notifier populated memory. */
494             ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
495             return -EBUSY;
496         }
497     }
498     virtio_mem_set_bitmap(vmem, start_gpa, size, plug);
499     return 0;
500 }
501 
502 static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa,
503                                            uint16_t nb_blocks, bool plug)
504 {
505     const uint64_t size = nb_blocks * vmem->block_size;
506     int ret;
507 
508     if (!virtio_mem_valid_range(vmem, gpa, size)) {
509         return VIRTIO_MEM_RESP_ERROR;
510     }
511 
512     if (plug && (vmem->size + size > vmem->requested_size)) {
513         return VIRTIO_MEM_RESP_NACK;
514     }
515 
516     /* test if really all blocks are in the opposite state */
517     if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) {
518         return VIRTIO_MEM_RESP_ERROR;
519     }
520 
521     ret = virtio_mem_set_block_state(vmem, gpa, size, plug);
522     if (ret) {
523         return VIRTIO_MEM_RESP_BUSY;
524     }
525     if (plug) {
526         vmem->size += size;
527     } else {
528         vmem->size -= size;
529     }
530     notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
531     return VIRTIO_MEM_RESP_ACK;
532 }
533 
534 static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
535                                     struct virtio_mem_req *req)
536 {
537     const uint64_t gpa = le64_to_cpu(req->u.plug.addr);
538     const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks);
539     uint16_t type;
540 
541     trace_virtio_mem_plug_request(gpa, nb_blocks);
542     type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true);
543     virtio_mem_send_response_simple(vmem, elem, type);
544 }
545 
546 static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
547                                       struct virtio_mem_req *req)
548 {
549     const uint64_t gpa = le64_to_cpu(req->u.unplug.addr);
550     const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks);
551     uint16_t type;
552 
553     trace_virtio_mem_unplug_request(gpa, nb_blocks);
554     type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false);
555     virtio_mem_send_response_simple(vmem, elem, type);
556 }
557 
558 static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
559                                             uint64_t requested_size,
560                                             bool can_shrink)
561 {
562     uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr),
563                            requested_size + VIRTIO_MEM_USABLE_EXTENT);
564 
565     /* The usable region size always has to be multiples of the block size. */
566     newsize = QEMU_ALIGN_UP(newsize, vmem->block_size);
567 
568     if (!requested_size) {
569         newsize = 0;
570     }
571 
572     if (newsize < vmem->usable_region_size && !can_shrink) {
573         return;
574     }
575 
576     trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize);
577     vmem->usable_region_size = newsize;
578 }
579 
580 static int virtio_mem_unplug_all(VirtIOMEM *vmem)
581 {
582     RAMBlock *rb = vmem->memdev->mr.ram_block;
583 
584     if (virtio_mem_is_busy()) {
585         return -EBUSY;
586     }
587 
588     if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
589         return -EBUSY;
590     }
591     virtio_mem_notify_unplug_all(vmem);
592 
593     bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
594     if (vmem->size) {
595         vmem->size = 0;
596         notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
597     }
598     trace_virtio_mem_unplugged_all();
599     virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
600     return 0;
601 }
602 
603 static void virtio_mem_unplug_all_request(VirtIOMEM *vmem,
604                                           VirtQueueElement *elem)
605 {
606     trace_virtio_mem_unplug_all_request();
607     if (virtio_mem_unplug_all(vmem)) {
608         virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY);
609     } else {
610         virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK);
611     }
612 }
613 
614 static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem,
615                                      struct virtio_mem_req *req)
616 {
617     const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks);
618     const uint64_t gpa = le64_to_cpu(req->u.state.addr);
619     const uint64_t size = nb_blocks * vmem->block_size;
620     struct virtio_mem_resp resp = {
621         .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK),
622     };
623 
624     trace_virtio_mem_state_request(gpa, nb_blocks);
625     if (!virtio_mem_valid_range(vmem, gpa, size)) {
626         virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR);
627         return;
628     }
629 
630     if (virtio_mem_test_bitmap(vmem, gpa, size, true)) {
631         resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED);
632     } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) {
633         resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED);
634     } else {
635         resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED);
636     }
637     trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state));
638     virtio_mem_send_response(vmem, elem, &resp);
639 }
640 
641 static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq)
642 {
643     const int len = sizeof(struct virtio_mem_req);
644     VirtIOMEM *vmem = VIRTIO_MEM(vdev);
645     VirtQueueElement *elem;
646     struct virtio_mem_req req;
647     uint16_t type;
648 
649     while (true) {
650         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
651         if (!elem) {
652             return;
653         }
654 
655         if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) {
656             virtio_error(vdev, "virtio-mem protocol violation: invalid request"
657                          " size: %d", len);
658             virtqueue_detach_element(vq, elem, 0);
659             g_free(elem);
660             return;
661         }
662 
663         if (iov_size(elem->in_sg, elem->in_num) <
664             sizeof(struct virtio_mem_resp)) {
665             virtio_error(vdev, "virtio-mem protocol violation: not enough space"
666                          " for response: %zu",
667                          iov_size(elem->in_sg, elem->in_num));
668             virtqueue_detach_element(vq, elem, 0);
669             g_free(elem);
670             return;
671         }
672 
673         type = le16_to_cpu(req.type);
674         switch (type) {
675         case VIRTIO_MEM_REQ_PLUG:
676             virtio_mem_plug_request(vmem, elem, &req);
677             break;
678         case VIRTIO_MEM_REQ_UNPLUG:
679             virtio_mem_unplug_request(vmem, elem, &req);
680             break;
681         case VIRTIO_MEM_REQ_UNPLUG_ALL:
682             virtio_mem_unplug_all_request(vmem, elem);
683             break;
684         case VIRTIO_MEM_REQ_STATE:
685             virtio_mem_state_request(vmem, elem, &req);
686             break;
687         default:
688             virtio_error(vdev, "virtio-mem protocol violation: unknown request"
689                          " type: %d", type);
690             virtqueue_detach_element(vq, elem, 0);
691             g_free(elem);
692             return;
693         }
694 
695         g_free(elem);
696     }
697 }
698 
699 static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data)
700 {
701     VirtIOMEM *vmem = VIRTIO_MEM(vdev);
702     struct virtio_mem_config *config = (void *) config_data;
703 
704     config->block_size = cpu_to_le64(vmem->block_size);
705     config->node_id = cpu_to_le16(vmem->node);
706     config->requested_size = cpu_to_le64(vmem->requested_size);
707     config->plugged_size = cpu_to_le64(vmem->size);
708     config->addr = cpu_to_le64(vmem->addr);
709     config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr));
710     config->usable_region_size = cpu_to_le64(vmem->usable_region_size);
711 }
712 
713 static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features,
714                                         Error **errp)
715 {
716     MachineState *ms = MACHINE(qdev_get_machine());
717     VirtIOMEM *vmem = VIRTIO_MEM(vdev);
718 
719     if (ms->numa_state) {
720 #if defined(CONFIG_ACPI)
721         virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM);
722 #endif
723     }
724     assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO);
725     if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) {
726         virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE);
727     }
728     return features;
729 }
730 
731 static int virtio_mem_validate_features(VirtIODevice *vdev)
732 {
733     if (virtio_host_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE) &&
734         !virtio_vdev_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE)) {
735         return -EFAULT;
736     }
737     return 0;
738 }
739 
740 static void virtio_mem_system_reset(void *opaque)
741 {
742     VirtIOMEM *vmem = VIRTIO_MEM(opaque);
743 
744     /*
745      * During usual resets, we will unplug all memory and shrink the usable
746      * region size. This is, however, not possible in all scenarios. Then,
747      * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL).
748      */
749     virtio_mem_unplug_all(vmem);
750 }
751 
752 static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
753 {
754     MachineState *ms = MACHINE(qdev_get_machine());
755     int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0;
756     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
757     VirtIOMEM *vmem = VIRTIO_MEM(dev);
758     uint64_t page_size;
759     RAMBlock *rb;
760     int ret;
761 
762     if (!vmem->memdev) {
763         error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP);
764         return;
765     } else if (host_memory_backend_is_mapped(vmem->memdev)) {
766         error_setg(errp, "'%s' property specifies a busy memdev: %s",
767                    VIRTIO_MEM_MEMDEV_PROP,
768                    object_get_canonical_path_component(OBJECT(vmem->memdev)));
769         return;
770     } else if (!memory_region_is_ram(&vmem->memdev->mr) ||
771         memory_region_is_rom(&vmem->memdev->mr) ||
772         !vmem->memdev->mr.ram_block) {
773         error_setg(errp, "'%s' property specifies an unsupported memdev",
774                    VIRTIO_MEM_MEMDEV_PROP);
775         return;
776     }
777 
778     if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) ||
779         (!nb_numa_nodes && vmem->node)) {
780         error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds"
781                    "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP,
782                    vmem->node, nb_numa_nodes ? nb_numa_nodes : 1);
783         return;
784     }
785 
786     if (enable_mlock) {
787         error_setg(errp, "Incompatible with mlock");
788         return;
789     }
790 
791     rb = vmem->memdev->mr.ram_block;
792     page_size = qemu_ram_pagesize(rb);
793 
794 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
795     switch (vmem->unplugged_inaccessible) {
796     case ON_OFF_AUTO_AUTO:
797         if (virtio_mem_has_shared_zeropage(rb)) {
798             vmem->unplugged_inaccessible = ON_OFF_AUTO_OFF;
799         } else {
800             vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
801         }
802         break;
803     case ON_OFF_AUTO_OFF:
804         if (!virtio_mem_has_shared_zeropage(rb)) {
805             warn_report("'%s' property set to 'off' with a memdev that does"
806                         " not support the shared zeropage.",
807                         VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);
808         }
809         break;
810     default:
811         break;
812     }
813 #else /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
814     vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
815 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
816 
817     /*
818      * If the block size wasn't configured by the user, use a sane default. This
819      * allows using hugetlbfs backends of any page size without manual
820      * intervention.
821      */
822     if (!vmem->block_size) {
823         vmem->block_size = virtio_mem_default_block_size(rb);
824     }
825 
826     if (vmem->block_size < page_size) {
827         error_setg(errp, "'%s' property has to be at least the page size (0x%"
828                    PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size);
829         return;
830     } else if (vmem->block_size < virtio_mem_default_block_size(rb)) {
831         warn_report("'%s' property is smaller than the default block size (%"
832                     PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP,
833                     virtio_mem_default_block_size(rb) / MiB);
834     }
835     if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
836         error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
837                    ")", VIRTIO_MEM_REQUESTED_SIZE_PROP,
838                    VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
839         return;
840     } else if (!QEMU_IS_ALIGNED(vmem->addr, vmem->block_size)) {
841         error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
842                    ")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP,
843                    vmem->block_size);
844         return;
845     } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr),
846                                 vmem->block_size)) {
847         error_setg(errp, "'%s' property memdev size has to be multiples of"
848                    "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP,
849                    VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
850         return;
851     }
852 
853     if (ram_block_coordinated_discard_require(true)) {
854         error_setg(errp, "Discarding RAM is disabled");
855         return;
856     }
857 
858     ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
859     if (ret) {
860         error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
861         ram_block_coordinated_discard_require(false);
862         return;
863     }
864 
865     virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
866 
867     vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) /
868                         vmem->block_size;
869     vmem->bitmap = bitmap_new(vmem->bitmap_size);
870 
871     virtio_init(vdev, TYPE_VIRTIO_MEM, VIRTIO_ID_MEM,
872                 sizeof(struct virtio_mem_config));
873     vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
874 
875     host_memory_backend_set_mapped(vmem->memdev, true);
876     vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
877     qemu_register_reset(virtio_mem_system_reset, vmem);
878 
879     /*
880      * Set ourselves as RamDiscardManager before the plug handler maps the
881      * memory region and exposes it via an address space.
882      */
883     memory_region_set_ram_discard_manager(&vmem->memdev->mr,
884                                           RAM_DISCARD_MANAGER(vmem));
885 }
886 
887 static void virtio_mem_device_unrealize(DeviceState *dev)
888 {
889     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
890     VirtIOMEM *vmem = VIRTIO_MEM(dev);
891 
892     /*
893      * The unplug handler unmapped the memory region, it cannot be
894      * found via an address space anymore. Unset ourselves.
895      */
896     memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
897     qemu_unregister_reset(virtio_mem_system_reset, vmem);
898     vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
899     host_memory_backend_set_mapped(vmem->memdev, false);
900     virtio_del_queue(vdev, 0);
901     virtio_cleanup(vdev);
902     g_free(vmem->bitmap);
903     ram_block_coordinated_discard_require(false);
904 }
905 
906 static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
907                                        uint64_t offset, uint64_t size)
908 {
909     RAMBlock *rb = vmem->memdev->mr.ram_block;
910 
911     return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
912 }
913 
914 static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
915 {
916     /* Make sure all memory is really discarded after migration. */
917     return virtio_mem_for_each_unplugged_range(vmem, NULL,
918                                                virtio_mem_discard_range_cb);
919 }
920 
921 static int virtio_mem_post_load(void *opaque, int version_id)
922 {
923     VirtIOMEM *vmem = VIRTIO_MEM(opaque);
924     RamDiscardListener *rdl;
925     int ret;
926 
927     /*
928      * We started out with all memory discarded and our memory region is mapped
929      * into an address space. Replay, now that we updated the bitmap.
930      */
931     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
932         ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
933                                                  virtio_mem_notify_populate_cb);
934         if (ret) {
935             return ret;
936         }
937     }
938 
939     if (migration_in_incoming_postcopy()) {
940         return 0;
941     }
942 
943     return virtio_mem_restore_unplugged(vmem);
944 }
945 
946 typedef struct VirtIOMEMMigSanityChecks {
947     VirtIOMEM *parent;
948     uint64_t addr;
949     uint64_t region_size;
950     uint64_t block_size;
951     uint32_t node;
952 } VirtIOMEMMigSanityChecks;
953 
954 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque)
955 {
956     VirtIOMEMMigSanityChecks *tmp = opaque;
957     VirtIOMEM *vmem = tmp->parent;
958 
959     tmp->addr = vmem->addr;
960     tmp->region_size = memory_region_size(&vmem->memdev->mr);
961     tmp->block_size = vmem->block_size;
962     tmp->node = vmem->node;
963     return 0;
964 }
965 
966 static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id)
967 {
968     VirtIOMEMMigSanityChecks *tmp = opaque;
969     VirtIOMEM *vmem = tmp->parent;
970     const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr);
971 
972     if (tmp->addr != vmem->addr) {
973         error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
974                      VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr);
975         return -EINVAL;
976     }
977     /*
978      * Note: Preparation for resizeable memory regions. The maximum size
979      * of the memory region must not change during migration.
980      */
981     if (tmp->region_size != new_region_size) {
982         error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%"
983                      PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size,
984                      new_region_size);
985         return -EINVAL;
986     }
987     if (tmp->block_size != vmem->block_size) {
988         error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
989                      VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size,
990                      vmem->block_size);
991         return -EINVAL;
992     }
993     if (tmp->node != vmem->node) {
994         error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32,
995                      VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node);
996         return -EINVAL;
997     }
998     return 0;
999 }
1000 
1001 static const VMStateDescription vmstate_virtio_mem_sanity_checks = {
1002     .name = "virtio-mem-device/sanity-checks",
1003     .pre_save = virtio_mem_mig_sanity_checks_pre_save,
1004     .post_load = virtio_mem_mig_sanity_checks_post_load,
1005     .fields = (VMStateField[]) {
1006         VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks),
1007         VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks),
1008         VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks),
1009         VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks),
1010         VMSTATE_END_OF_LIST(),
1011     },
1012 };
1013 
1014 static const VMStateDescription vmstate_virtio_mem_device = {
1015     .name = "virtio-mem-device",
1016     .minimum_version_id = 1,
1017     .version_id = 1,
1018     .priority = MIG_PRI_VIRTIO_MEM,
1019     .post_load = virtio_mem_post_load,
1020     .fields = (VMStateField[]) {
1021         VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
1022                          vmstate_virtio_mem_sanity_checks),
1023         VMSTATE_UINT64(usable_region_size, VirtIOMEM),
1024         VMSTATE_UINT64(size, VirtIOMEM),
1025         VMSTATE_UINT64(requested_size, VirtIOMEM),
1026         VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size),
1027         VMSTATE_END_OF_LIST()
1028     },
1029 };
1030 
1031 static const VMStateDescription vmstate_virtio_mem = {
1032     .name = "virtio-mem",
1033     .minimum_version_id = 1,
1034     .version_id = 1,
1035     .fields = (VMStateField[]) {
1036         VMSTATE_VIRTIO_DEVICE,
1037         VMSTATE_END_OF_LIST()
1038     },
1039 };
1040 
1041 static void virtio_mem_fill_device_info(const VirtIOMEM *vmem,
1042                                         VirtioMEMDeviceInfo *vi)
1043 {
1044     vi->memaddr = vmem->addr;
1045     vi->node = vmem->node;
1046     vi->requested_size = vmem->requested_size;
1047     vi->size = vmem->size;
1048     vi->max_size = memory_region_size(&vmem->memdev->mr);
1049     vi->block_size = vmem->block_size;
1050     vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev));
1051 }
1052 
1053 static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp)
1054 {
1055     if (!vmem->memdev) {
1056         error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP);
1057         return NULL;
1058     }
1059 
1060     return &vmem->memdev->mr;
1061 }
1062 
1063 static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,
1064                                                 Notifier *notifier)
1065 {
1066     notifier_list_add(&vmem->size_change_notifiers, notifier);
1067 }
1068 
1069 static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem,
1070                                                    Notifier *notifier)
1071 {
1072     notifier_remove(notifier);
1073 }
1074 
1075 static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name,
1076                                 void *opaque, Error **errp)
1077 {
1078     const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1079     uint64_t value = vmem->size;
1080 
1081     visit_type_size(v, name, &value, errp);
1082 }
1083 
1084 static void virtio_mem_get_requested_size(Object *obj, Visitor *v,
1085                                           const char *name, void *opaque,
1086                                           Error **errp)
1087 {
1088     const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1089     uint64_t value = vmem->requested_size;
1090 
1091     visit_type_size(v, name, &value, errp);
1092 }
1093 
1094 static void virtio_mem_set_requested_size(Object *obj, Visitor *v,
1095                                           const char *name, void *opaque,
1096                                           Error **errp)
1097 {
1098     VirtIOMEM *vmem = VIRTIO_MEM(obj);
1099     Error *err = NULL;
1100     uint64_t value;
1101 
1102     visit_type_size(v, name, &value, &err);
1103     if (err) {
1104         error_propagate(errp, err);
1105         return;
1106     }
1107 
1108     /*
1109      * The block size and memory backend are not fixed until the device was
1110      * realized. realize() will verify these properties then.
1111      */
1112     if (DEVICE(obj)->realized) {
1113         if (!QEMU_IS_ALIGNED(value, vmem->block_size)) {
1114             error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64
1115                        ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP,
1116                        vmem->block_size);
1117             return;
1118         } else if (value > memory_region_size(&vmem->memdev->mr)) {
1119             error_setg(errp, "'%s' cannot exceed the memory backend size"
1120                        "(0x%" PRIx64 ")", name,
1121                        memory_region_size(&vmem->memdev->mr));
1122             return;
1123         }
1124 
1125         if (value != vmem->requested_size) {
1126             virtio_mem_resize_usable_region(vmem, value, false);
1127             vmem->requested_size = value;
1128         }
1129         /*
1130          * Trigger a config update so the guest gets notified. We trigger
1131          * even if the size didn't change (especially helpful for debugging).
1132          */
1133         virtio_notify_config(VIRTIO_DEVICE(vmem));
1134     } else {
1135         vmem->requested_size = value;
1136     }
1137 }
1138 
1139 static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name,
1140                                       void *opaque, Error **errp)
1141 {
1142     const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1143     uint64_t value = vmem->block_size;
1144 
1145     /*
1146      * If not configured by the user (and we're not realized yet), use the
1147      * default block size we would use with the current memory backend.
1148      */
1149     if (!value) {
1150         if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) {
1151             value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block);
1152         } else {
1153             value = virtio_mem_thp_size();
1154         }
1155     }
1156 
1157     visit_type_size(v, name, &value, errp);
1158 }
1159 
1160 static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
1161                                       void *opaque, Error **errp)
1162 {
1163     VirtIOMEM *vmem = VIRTIO_MEM(obj);
1164     Error *err = NULL;
1165     uint64_t value;
1166 
1167     if (DEVICE(obj)->realized) {
1168         error_setg(errp, "'%s' cannot be changed", name);
1169         return;
1170     }
1171 
1172     visit_type_size(v, name, &value, &err);
1173     if (err) {
1174         error_propagate(errp, err);
1175         return;
1176     }
1177 
1178     if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) {
1179         error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name,
1180                    VIRTIO_MEM_MIN_BLOCK_SIZE);
1181         return;
1182     } else if (!is_power_of_2(value)) {
1183         error_setg(errp, "'%s' property has to be a power of two", name);
1184         return;
1185     }
1186     vmem->block_size = value;
1187 }
1188 
1189 static void virtio_mem_instance_init(Object *obj)
1190 {
1191     VirtIOMEM *vmem = VIRTIO_MEM(obj);
1192 
1193     notifier_list_init(&vmem->size_change_notifiers);
1194     QLIST_INIT(&vmem->rdl_list);
1195 
1196     object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
1197                         NULL, NULL, NULL);
1198     object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size",
1199                         virtio_mem_get_requested_size,
1200                         virtio_mem_set_requested_size, NULL, NULL);
1201     object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size",
1202                         virtio_mem_get_block_size, virtio_mem_set_block_size,
1203                         NULL, NULL);
1204 }
1205 
1206 static Property virtio_mem_properties[] = {
1207     DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),
1208     DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),
1209     DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false),
1210     DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev,
1211                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1212 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
1213     DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM,
1214                             unplugged_inaccessible, ON_OFF_AUTO_AUTO),
1215 #endif
1216     DEFINE_PROP_END_OF_LIST(),
1217 };
1218 
1219 static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
1220                                                    const MemoryRegion *mr)
1221 {
1222     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1223 
1224     g_assert(mr == &vmem->memdev->mr);
1225     return vmem->block_size;
1226 }
1227 
1228 static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
1229                                         const MemoryRegionSection *s)
1230 {
1231     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1232     uint64_t start_gpa = vmem->addr + s->offset_within_region;
1233     uint64_t end_gpa = start_gpa + int128_get64(s->size);
1234 
1235     g_assert(s->mr == &vmem->memdev->mr);
1236 
1237     start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
1238     end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
1239 
1240     if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
1241         return false;
1242     }
1243 
1244     return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true);
1245 }
1246 
1247 struct VirtIOMEMReplayData {
1248     void *fn;
1249     void *opaque;
1250 };
1251 
1252 static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
1253 {
1254     struct VirtIOMEMReplayData *data = arg;
1255 
1256     return ((ReplayRamPopulate)data->fn)(s, data->opaque);
1257 }
1258 
1259 static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
1260                                            MemoryRegionSection *s,
1261                                            ReplayRamPopulate replay_fn,
1262                                            void *opaque)
1263 {
1264     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1265     struct VirtIOMEMReplayData data = {
1266         .fn = replay_fn,
1267         .opaque = opaque,
1268     };
1269 
1270     g_assert(s->mr == &vmem->memdev->mr);
1271     return virtio_mem_for_each_plugged_section(vmem, s, &data,
1272                                             virtio_mem_rdm_replay_populated_cb);
1273 }
1274 
1275 static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
1276                                               void *arg)
1277 {
1278     struct VirtIOMEMReplayData *data = arg;
1279 
1280     ((ReplayRamDiscard)data->fn)(s, data->opaque);
1281     return 0;
1282 }
1283 
1284 static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
1285                                             MemoryRegionSection *s,
1286                                             ReplayRamDiscard replay_fn,
1287                                             void *opaque)
1288 {
1289     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1290     struct VirtIOMEMReplayData data = {
1291         .fn = replay_fn,
1292         .opaque = opaque,
1293     };
1294 
1295     g_assert(s->mr == &vmem->memdev->mr);
1296     virtio_mem_for_each_unplugged_section(vmem, s, &data,
1297                                           virtio_mem_rdm_replay_discarded_cb);
1298 }
1299 
1300 static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
1301                                              RamDiscardListener *rdl,
1302                                              MemoryRegionSection *s)
1303 {
1304     VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1305     int ret;
1306 
1307     g_assert(s->mr == &vmem->memdev->mr);
1308     rdl->section = memory_region_section_new_copy(s);
1309 
1310     QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
1311     ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1312                                               virtio_mem_notify_populate_cb);
1313     if (ret) {
1314         error_report("%s: Replaying plugged ranges failed: %s", __func__,
1315                      strerror(-ret));
1316     }
1317 }
1318 
1319 static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
1320                                                RamDiscardListener *rdl)
1321 {
1322     VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1323 
1324     g_assert(rdl->section->mr == &vmem->memdev->mr);
1325     if (vmem->size) {
1326         if (rdl->double_discard_supported) {
1327             rdl->notify_discard(rdl, rdl->section);
1328         } else {
1329             virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1330                                                 virtio_mem_notify_discard_cb);
1331         }
1332     }
1333 
1334     memory_region_section_free_copy(rdl->section);
1335     rdl->section = NULL;
1336     QLIST_REMOVE(rdl, next);
1337 }
1338 
1339 static void virtio_mem_class_init(ObjectClass *klass, void *data)
1340 {
1341     DeviceClass *dc = DEVICE_CLASS(klass);
1342     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1343     VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
1344     RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
1345 
1346     device_class_set_props(dc, virtio_mem_properties);
1347     dc->vmsd = &vmstate_virtio_mem;
1348 
1349     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1350     vdc->realize = virtio_mem_device_realize;
1351     vdc->unrealize = virtio_mem_device_unrealize;
1352     vdc->get_config = virtio_mem_get_config;
1353     vdc->get_features = virtio_mem_get_features;
1354     vdc->validate_features = virtio_mem_validate_features;
1355     vdc->vmsd = &vmstate_virtio_mem_device;
1356 
1357     vmc->fill_device_info = virtio_mem_fill_device_info;
1358     vmc->get_memory_region = virtio_mem_get_memory_region;
1359     vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
1360     vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
1361 
1362     rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
1363     rdmc->is_populated = virtio_mem_rdm_is_populated;
1364     rdmc->replay_populated = virtio_mem_rdm_replay_populated;
1365     rdmc->replay_discarded = virtio_mem_rdm_replay_discarded;
1366     rdmc->register_listener = virtio_mem_rdm_register_listener;
1367     rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
1368 }
1369 
1370 static const TypeInfo virtio_mem_info = {
1371     .name = TYPE_VIRTIO_MEM,
1372     .parent = TYPE_VIRTIO_DEVICE,
1373     .instance_size = sizeof(VirtIOMEM),
1374     .instance_init = virtio_mem_instance_init,
1375     .class_init = virtio_mem_class_init,
1376     .class_size = sizeof(VirtIOMEMClass),
1377     .interfaces = (InterfaceInfo[]) {
1378         { TYPE_RAM_DISCARD_MANAGER },
1379         { }
1380     },
1381 };
1382 
1383 static void virtio_register_types(void)
1384 {
1385     type_register_static(&virtio_mem_info);
1386 }
1387 
1388 type_init(virtio_register_types)
1389