xref: /openbmc/qemu/hw/virtio/virtio-mem.c (revision 3b95a71b22827d261786b84f38b1e9109f6bf57b)
1 /*
2  * Virtio MEM device
3  *
4  * Copyright (C) 2020 Red Hat, Inc.
5  *
6  * Authors:
7  *  David Hildenbrand <david@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "qemu/iov.h"
15 #include "qemu/cutils.h"
16 #include "qemu/error-report.h"
17 #include "qemu/units.h"
18 #include "sysemu/numa.h"
19 #include "sysemu/sysemu.h"
20 #include "sysemu/reset.h"
21 #include "hw/virtio/virtio.h"
22 #include "hw/virtio/virtio-bus.h"
23 #include "hw/virtio/virtio-access.h"
24 #include "hw/virtio/virtio-mem.h"
25 #include "qapi/error.h"
26 #include "qapi/visitor.h"
27 #include "exec/ram_addr.h"
28 #include "migration/misc.h"
29 #include "hw/boards.h"
30 #include "hw/qdev-properties.h"
31 #include CONFIG_DEVICES
32 #include "trace.h"
33 
34 static const VMStateDescription vmstate_virtio_mem_device_early;
35 
36 /*
37  * We only had legacy x86 guests that did not support
38  * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests.
39  */
40 #if defined(TARGET_X86_64) || defined(TARGET_I386)
41 #define VIRTIO_MEM_HAS_LEGACY_GUESTS
42 #endif
43 
44 /*
45  * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking
46  * bitmap small.
47  */
48 #define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
49 
50 static uint32_t virtio_mem_default_thp_size(void)
51 {
52     uint32_t default_thp_size = VIRTIO_MEM_MIN_BLOCK_SIZE;
53 
54 #if defined(__x86_64__) || defined(__arm__) || defined(__powerpc64__)
55     default_thp_size = 2 * MiB;
56 #elif defined(__aarch64__)
57     if (qemu_real_host_page_size() == 4 * KiB) {
58         default_thp_size = 2 * MiB;
59     } else if (qemu_real_host_page_size() == 16 * KiB) {
60         default_thp_size = 32 * MiB;
61     } else if (qemu_real_host_page_size() == 64 * KiB) {
62         default_thp_size = 512 * MiB;
63     }
64 #endif
65 
66     return default_thp_size;
67 }
68 
69 /*
70  * We want to have a reasonable default block size such that
71  * 1. We avoid splitting THPs when unplugging memory, which degrades
72  *    performance.
73  * 2. We avoid placing THPs for plugged blocks that also cover unplugged
74  *    blocks.
75  *
76  * The actual THP size might differ between Linux kernels, so we try to probe
77  * it. In the future (if we ever run into issues regarding 2.), we might want
78  * to disable THP in case we fail to properly probe the THP size, or if the
79  * block size is configured smaller than the THP size.
80  */
81 static uint32_t thp_size;
82 
83 #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
84 static uint32_t virtio_mem_thp_size(void)
85 {
86     gchar *content = NULL;
87     const char *endptr;
88     uint64_t tmp;
89 
90     if (thp_size) {
91         return thp_size;
92     }
93 
94     /*
95      * Try to probe the actual THP size, fallback to (sane but eventually
96      * incorrect) default sizes.
97      */
98     if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
99         !qemu_strtou64(content, &endptr, 0, &tmp) &&
100         (!endptr || *endptr == '\n')) {
101         /* Sanity-check the value and fallback to something reasonable. */
102         if (!tmp || !is_power_of_2(tmp)) {
103             warn_report("Read unsupported THP size: %" PRIx64, tmp);
104         } else {
105             thp_size = tmp;
106         }
107     }
108 
109     if (!thp_size) {
110         thp_size = virtio_mem_default_thp_size();
111         warn_report("Could not detect THP size, falling back to %" PRIx64
112                     "  MiB.", thp_size / MiB);
113     }
114 
115     g_free(content);
116     return thp_size;
117 }
118 
119 static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
120 {
121     const uint64_t page_size = qemu_ram_pagesize(rb);
122 
123     /* We can have hugetlbfs with a page size smaller than the THP size. */
124     if (page_size == qemu_real_host_page_size()) {
125         return MAX(page_size, virtio_mem_thp_size());
126     }
127     return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);
128 }
129 
130 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
131 static bool virtio_mem_has_shared_zeropage(RAMBlock *rb)
132 {
133     /*
134      * We only have a guaranteed shared zeropage on ordinary MAP_PRIVATE
135      * anonymous RAM. In any other case, reading unplugged *can* populate a
136      * fresh page, consuming actual memory.
137      */
138     return !qemu_ram_is_shared(rb) && rb->fd < 0 &&
139            qemu_ram_pagesize(rb) == qemu_real_host_page_size();
140 }
141 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
142 
143 /*
144  * Size the usable region bigger than the requested size if possible. Esp.
145  * Linux guests will only add (aligned) memory blocks in case they fully
146  * fit into the usable region, but plug+online only a subset of the pages.
147  * The memory block size corresponds mostly to the section size.
148  *
149  * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and
150  * a section size of 512MB on arm64 (as long as the start address is properly
151  * aligned, similar to ordinary DIMMs).
152  *
153  * We can change this at any time and maybe even make it configurable if
154  * necessary (as the section size can change). But it's more likely that the
155  * section size will rather get smaller and not bigger over time.
156  */
157 #if defined(TARGET_X86_64) || defined(TARGET_I386)
158 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
159 #elif defined(TARGET_ARM)
160 #define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB))
161 #else
162 #error VIRTIO_MEM_USABLE_EXTENT not defined
163 #endif
164 
165 static bool virtio_mem_is_busy(void)
166 {
167     /*
168      * Postcopy cannot handle concurrent discards and we don't want to migrate
169      * pages on-demand with stale content when plugging new blocks.
170      *
171      * For precopy, we don't want unplugged blocks in our migration stream, and
172      * when plugging new blocks, the page content might differ between source
173      * and destination (observable by the guest when not initializing pages
174      * after plugging them) until we're running on the destination (as we didn't
175      * migrate these blocks when they were unplugged).
176      */
177     return migration_in_incoming_postcopy() || !migration_is_idle();
178 }
179 
180 typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
181                                    uint64_t offset, uint64_t size);
182 
183 static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
184                                                virtio_mem_range_cb cb)
185 {
186     unsigned long first_zero_bit, last_zero_bit;
187     uint64_t offset, size;
188     int ret = 0;
189 
190     first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
191     while (first_zero_bit < vmem->bitmap_size) {
192         offset = first_zero_bit * vmem->block_size;
193         last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
194                                       first_zero_bit + 1) - 1;
195         size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
196 
197         ret = cb(vmem, arg, offset, size);
198         if (ret) {
199             break;
200         }
201         first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
202                                             last_zero_bit + 2);
203     }
204     return ret;
205 }
206 
207 /*
208  * Adjust the memory section to cover the intersection with the given range.
209  *
210  * Returns false if the intersection is empty, otherwise returns true.
211  */
212 static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s,
213                                                 uint64_t offset, uint64_t size)
214 {
215     uint64_t start = MAX(s->offset_within_region, offset);
216     uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
217                        offset + size);
218 
219     if (end <= start) {
220         return false;
221     }
222 
223     s->offset_within_address_space += start - s->offset_within_region;
224     s->offset_within_region = start;
225     s->size = int128_make64(end - start);
226     return true;
227 }
228 
229 typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
230 
231 static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
232                                                MemoryRegionSection *s,
233                                                void *arg,
234                                                virtio_mem_section_cb cb)
235 {
236     unsigned long first_bit, last_bit;
237     uint64_t offset, size;
238     int ret = 0;
239 
240     first_bit = s->offset_within_region / vmem->block_size;
241     first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
242     while (first_bit < vmem->bitmap_size) {
243         MemoryRegionSection tmp = *s;
244 
245         offset = first_bit * vmem->block_size;
246         last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
247                                       first_bit + 1) - 1;
248         size = (last_bit - first_bit + 1) * vmem->block_size;
249 
250         if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
251             break;
252         }
253         ret = cb(&tmp, arg);
254         if (ret) {
255             break;
256         }
257         first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
258                                   last_bit + 2);
259     }
260     return ret;
261 }
262 
263 static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
264                                                  MemoryRegionSection *s,
265                                                  void *arg,
266                                                  virtio_mem_section_cb cb)
267 {
268     unsigned long first_bit, last_bit;
269     uint64_t offset, size;
270     int ret = 0;
271 
272     first_bit = s->offset_within_region / vmem->block_size;
273     first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
274     while (first_bit < vmem->bitmap_size) {
275         MemoryRegionSection tmp = *s;
276 
277         offset = first_bit * vmem->block_size;
278         last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
279                                  first_bit + 1) - 1;
280         size = (last_bit - first_bit + 1) * vmem->block_size;
281 
282         if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
283             break;
284         }
285         ret = cb(&tmp, arg);
286         if (ret) {
287             break;
288         }
289         first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
290                                        last_bit + 2);
291     }
292     return ret;
293 }
294 
295 static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
296 {
297     RamDiscardListener *rdl = arg;
298 
299     return rdl->notify_populate(rdl, s);
300 }
301 
302 static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
303 {
304     RamDiscardListener *rdl = arg;
305 
306     rdl->notify_discard(rdl, s);
307     return 0;
308 }
309 
310 static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
311                                      uint64_t size)
312 {
313     RamDiscardListener *rdl;
314 
315     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
316         MemoryRegionSection tmp = *rdl->section;
317 
318         if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
319             continue;
320         }
321         rdl->notify_discard(rdl, &tmp);
322     }
323 }
324 
325 static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
326                                   uint64_t size)
327 {
328     RamDiscardListener *rdl, *rdl2;
329     int ret = 0;
330 
331     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
332         MemoryRegionSection tmp = *rdl->section;
333 
334         if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
335             continue;
336         }
337         ret = rdl->notify_populate(rdl, &tmp);
338         if (ret) {
339             break;
340         }
341     }
342 
343     if (ret) {
344         /* Notify all already-notified listeners. */
345         QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
346             MemoryRegionSection tmp = *rdl2->section;
347 
348             if (rdl2 == rdl) {
349                 break;
350             }
351             if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
352                 continue;
353             }
354             rdl2->notify_discard(rdl2, &tmp);
355         }
356     }
357     return ret;
358 }
359 
360 static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
361 {
362     RamDiscardListener *rdl;
363 
364     if (!vmem->size) {
365         return;
366     }
367 
368     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
369         if (rdl->double_discard_supported) {
370             rdl->notify_discard(rdl, rdl->section);
371         } else {
372             virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
373                                                 virtio_mem_notify_discard_cb);
374         }
375     }
376 }
377 
378 static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa,
379                                    uint64_t size, bool plugged)
380 {
381     const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
382     const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1;
383     unsigned long found_bit;
384 
385     /* We fake a shorter bitmap to avoid searching too far. */
386     if (plugged) {
387         found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit);
388     } else {
389         found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit);
390     }
391     return found_bit > last_bit;
392 }
393 
394 static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa,
395                                   uint64_t size, bool plugged)
396 {
397     const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size;
398     const unsigned long nbits = size / vmem->block_size;
399 
400     if (plugged) {
401         bitmap_set(vmem->bitmap, bit, nbits);
402     } else {
403         bitmap_clear(vmem->bitmap, bit, nbits);
404     }
405 }
406 
407 static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem,
408                                      struct virtio_mem_resp *resp)
409 {
410     VirtIODevice *vdev = VIRTIO_DEVICE(vmem);
411     VirtQueue *vq = vmem->vq;
412 
413     trace_virtio_mem_send_response(le16_to_cpu(resp->type));
414     iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp));
415 
416     virtqueue_push(vq, elem, sizeof(*resp));
417     virtio_notify(vdev, vq);
418 }
419 
420 static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
421                                             VirtQueueElement *elem,
422                                             uint16_t type)
423 {
424     struct virtio_mem_resp resp = {
425         .type = cpu_to_le16(type),
426     };
427 
428     virtio_mem_send_response(vmem, elem, &resp);
429 }
430 
431 static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
432                                    uint64_t size)
433 {
434     if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
435         return false;
436     }
437     if (gpa + size < gpa || !size) {
438         return false;
439     }
440     if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) {
441         return false;
442     }
443     if (gpa + size > vmem->addr + vmem->usable_region_size) {
444         return false;
445     }
446     return true;
447 }
448 
449 static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
450                                       uint64_t size, bool plug)
451 {
452     const uint64_t offset = start_gpa - vmem->addr;
453     RAMBlock *rb = vmem->memdev->mr.ram_block;
454 
455     if (virtio_mem_is_busy()) {
456         return -EBUSY;
457     }
458 
459     if (!plug) {
460         if (ram_block_discard_range(rb, offset, size)) {
461             return -EBUSY;
462         }
463         virtio_mem_notify_unplug(vmem, offset, size);
464     } else {
465         int ret = 0;
466 
467         if (vmem->prealloc) {
468             void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
469             int fd = memory_region_get_fd(&vmem->memdev->mr);
470             Error *local_err = NULL;
471 
472             qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err);
473             if (local_err) {
474                 static bool warned;
475 
476                 /*
477                  * Warn only once, we don't want to fill the log with these
478                  * warnings.
479                  */
480                 if (!warned) {
481                     warn_report_err(local_err);
482                     warned = true;
483                 } else {
484                     error_free(local_err);
485                 }
486                 ret = -EBUSY;
487             }
488         }
489         if (!ret) {
490             ret = virtio_mem_notify_plug(vmem, offset, size);
491         }
492 
493         if (ret) {
494             /* Could be preallocation or a notifier populated memory. */
495             ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
496             return -EBUSY;
497         }
498     }
499     virtio_mem_set_bitmap(vmem, start_gpa, size, plug);
500     return 0;
501 }
502 
503 static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa,
504                                            uint16_t nb_blocks, bool plug)
505 {
506     const uint64_t size = nb_blocks * vmem->block_size;
507     int ret;
508 
509     if (!virtio_mem_valid_range(vmem, gpa, size)) {
510         return VIRTIO_MEM_RESP_ERROR;
511     }
512 
513     if (plug && (vmem->size + size > vmem->requested_size)) {
514         return VIRTIO_MEM_RESP_NACK;
515     }
516 
517     /* test if really all blocks are in the opposite state */
518     if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) {
519         return VIRTIO_MEM_RESP_ERROR;
520     }
521 
522     ret = virtio_mem_set_block_state(vmem, gpa, size, plug);
523     if (ret) {
524         return VIRTIO_MEM_RESP_BUSY;
525     }
526     if (plug) {
527         vmem->size += size;
528     } else {
529         vmem->size -= size;
530     }
531     notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
532     return VIRTIO_MEM_RESP_ACK;
533 }
534 
535 static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
536                                     struct virtio_mem_req *req)
537 {
538     const uint64_t gpa = le64_to_cpu(req->u.plug.addr);
539     const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks);
540     uint16_t type;
541 
542     trace_virtio_mem_plug_request(gpa, nb_blocks);
543     type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true);
544     virtio_mem_send_response_simple(vmem, elem, type);
545 }
546 
547 static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
548                                       struct virtio_mem_req *req)
549 {
550     const uint64_t gpa = le64_to_cpu(req->u.unplug.addr);
551     const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks);
552     uint16_t type;
553 
554     trace_virtio_mem_unplug_request(gpa, nb_blocks);
555     type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false);
556     virtio_mem_send_response_simple(vmem, elem, type);
557 }
558 
559 static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
560                                             uint64_t requested_size,
561                                             bool can_shrink)
562 {
563     uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr),
564                            requested_size + VIRTIO_MEM_USABLE_EXTENT);
565 
566     /* The usable region size always has to be multiples of the block size. */
567     newsize = QEMU_ALIGN_UP(newsize, vmem->block_size);
568 
569     if (!requested_size) {
570         newsize = 0;
571     }
572 
573     if (newsize < vmem->usable_region_size && !can_shrink) {
574         return;
575     }
576 
577     trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize);
578     vmem->usable_region_size = newsize;
579 }
580 
581 static int virtio_mem_unplug_all(VirtIOMEM *vmem)
582 {
583     RAMBlock *rb = vmem->memdev->mr.ram_block;
584 
585     if (virtio_mem_is_busy()) {
586         return -EBUSY;
587     }
588 
589     if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
590         return -EBUSY;
591     }
592     virtio_mem_notify_unplug_all(vmem);
593 
594     bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
595     if (vmem->size) {
596         vmem->size = 0;
597         notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
598     }
599     trace_virtio_mem_unplugged_all();
600     virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
601     return 0;
602 }
603 
604 static void virtio_mem_unplug_all_request(VirtIOMEM *vmem,
605                                           VirtQueueElement *elem)
606 {
607     trace_virtio_mem_unplug_all_request();
608     if (virtio_mem_unplug_all(vmem)) {
609         virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY);
610     } else {
611         virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK);
612     }
613 }
614 
615 static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem,
616                                      struct virtio_mem_req *req)
617 {
618     const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks);
619     const uint64_t gpa = le64_to_cpu(req->u.state.addr);
620     const uint64_t size = nb_blocks * vmem->block_size;
621     struct virtio_mem_resp resp = {
622         .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK),
623     };
624 
625     trace_virtio_mem_state_request(gpa, nb_blocks);
626     if (!virtio_mem_valid_range(vmem, gpa, size)) {
627         virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR);
628         return;
629     }
630 
631     if (virtio_mem_test_bitmap(vmem, gpa, size, true)) {
632         resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED);
633     } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) {
634         resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED);
635     } else {
636         resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED);
637     }
638     trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state));
639     virtio_mem_send_response(vmem, elem, &resp);
640 }
641 
642 static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq)
643 {
644     const int len = sizeof(struct virtio_mem_req);
645     VirtIOMEM *vmem = VIRTIO_MEM(vdev);
646     VirtQueueElement *elem;
647     struct virtio_mem_req req;
648     uint16_t type;
649 
650     while (true) {
651         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
652         if (!elem) {
653             return;
654         }
655 
656         if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) {
657             virtio_error(vdev, "virtio-mem protocol violation: invalid request"
658                          " size: %d", len);
659             virtqueue_detach_element(vq, elem, 0);
660             g_free(elem);
661             return;
662         }
663 
664         if (iov_size(elem->in_sg, elem->in_num) <
665             sizeof(struct virtio_mem_resp)) {
666             virtio_error(vdev, "virtio-mem protocol violation: not enough space"
667                          " for response: %zu",
668                          iov_size(elem->in_sg, elem->in_num));
669             virtqueue_detach_element(vq, elem, 0);
670             g_free(elem);
671             return;
672         }
673 
674         type = le16_to_cpu(req.type);
675         switch (type) {
676         case VIRTIO_MEM_REQ_PLUG:
677             virtio_mem_plug_request(vmem, elem, &req);
678             break;
679         case VIRTIO_MEM_REQ_UNPLUG:
680             virtio_mem_unplug_request(vmem, elem, &req);
681             break;
682         case VIRTIO_MEM_REQ_UNPLUG_ALL:
683             virtio_mem_unplug_all_request(vmem, elem);
684             break;
685         case VIRTIO_MEM_REQ_STATE:
686             virtio_mem_state_request(vmem, elem, &req);
687             break;
688         default:
689             virtio_error(vdev, "virtio-mem protocol violation: unknown request"
690                          " type: %d", type);
691             virtqueue_detach_element(vq, elem, 0);
692             g_free(elem);
693             return;
694         }
695 
696         g_free(elem);
697     }
698 }
699 
700 static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data)
701 {
702     VirtIOMEM *vmem = VIRTIO_MEM(vdev);
703     struct virtio_mem_config *config = (void *) config_data;
704 
705     config->block_size = cpu_to_le64(vmem->block_size);
706     config->node_id = cpu_to_le16(vmem->node);
707     config->requested_size = cpu_to_le64(vmem->requested_size);
708     config->plugged_size = cpu_to_le64(vmem->size);
709     config->addr = cpu_to_le64(vmem->addr);
710     config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr));
711     config->usable_region_size = cpu_to_le64(vmem->usable_region_size);
712 }
713 
714 static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features,
715                                         Error **errp)
716 {
717     MachineState *ms = MACHINE(qdev_get_machine());
718     VirtIOMEM *vmem = VIRTIO_MEM(vdev);
719 
720     if (ms->numa_state) {
721 #if defined(CONFIG_ACPI)
722         virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM);
723 #endif
724     }
725     assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO);
726     if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) {
727         virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE);
728     }
729     return features;
730 }
731 
732 static int virtio_mem_validate_features(VirtIODevice *vdev)
733 {
734     if (virtio_host_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE) &&
735         !virtio_vdev_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE)) {
736         return -EFAULT;
737     }
738     return 0;
739 }
740 
741 static void virtio_mem_system_reset(void *opaque)
742 {
743     VirtIOMEM *vmem = VIRTIO_MEM(opaque);
744 
745     /*
746      * During usual resets, we will unplug all memory and shrink the usable
747      * region size. This is, however, not possible in all scenarios. Then,
748      * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL).
749      */
750     virtio_mem_unplug_all(vmem);
751 }
752 
753 static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
754 {
755     MachineState *ms = MACHINE(qdev_get_machine());
756     int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0;
757     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
758     VirtIOMEM *vmem = VIRTIO_MEM(dev);
759     uint64_t page_size;
760     RAMBlock *rb;
761     int ret;
762 
763     if (!vmem->memdev) {
764         error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP);
765         return;
766     } else if (host_memory_backend_is_mapped(vmem->memdev)) {
767         error_setg(errp, "'%s' property specifies a busy memdev: %s",
768                    VIRTIO_MEM_MEMDEV_PROP,
769                    object_get_canonical_path_component(OBJECT(vmem->memdev)));
770         return;
771     } else if (!memory_region_is_ram(&vmem->memdev->mr) ||
772         memory_region_is_rom(&vmem->memdev->mr) ||
773         !vmem->memdev->mr.ram_block) {
774         error_setg(errp, "'%s' property specifies an unsupported memdev",
775                    VIRTIO_MEM_MEMDEV_PROP);
776         return;
777     } else if (vmem->memdev->prealloc) {
778         error_setg(errp, "'%s' property specifies a memdev with preallocation"
779                    " enabled: %s. Instead, specify 'prealloc=on' for the"
780                    " virtio-mem device. ", VIRTIO_MEM_MEMDEV_PROP,
781                    object_get_canonical_path_component(OBJECT(vmem->memdev)));
782         return;
783     }
784 
785     if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) ||
786         (!nb_numa_nodes && vmem->node)) {
787         error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds"
788                    "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP,
789                    vmem->node, nb_numa_nodes ? nb_numa_nodes : 1);
790         return;
791     }
792 
793     if (enable_mlock) {
794         error_setg(errp, "Incompatible with mlock");
795         return;
796     }
797 
798     rb = vmem->memdev->mr.ram_block;
799     page_size = qemu_ram_pagesize(rb);
800 
801 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
802     switch (vmem->unplugged_inaccessible) {
803     case ON_OFF_AUTO_AUTO:
804         if (virtio_mem_has_shared_zeropage(rb)) {
805             vmem->unplugged_inaccessible = ON_OFF_AUTO_OFF;
806         } else {
807             vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
808         }
809         break;
810     case ON_OFF_AUTO_OFF:
811         if (!virtio_mem_has_shared_zeropage(rb)) {
812             warn_report("'%s' property set to 'off' with a memdev that does"
813                         " not support the shared zeropage.",
814                         VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);
815         }
816         break;
817     default:
818         break;
819     }
820 #else /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
821     vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
822 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
823 
824     /*
825      * If the block size wasn't configured by the user, use a sane default. This
826      * allows using hugetlbfs backends of any page size without manual
827      * intervention.
828      */
829     if (!vmem->block_size) {
830         vmem->block_size = virtio_mem_default_block_size(rb);
831     }
832 
833     if (vmem->block_size < page_size) {
834         error_setg(errp, "'%s' property has to be at least the page size (0x%"
835                    PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size);
836         return;
837     } else if (vmem->block_size < virtio_mem_default_block_size(rb)) {
838         warn_report("'%s' property is smaller than the default block size (%"
839                     PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP,
840                     virtio_mem_default_block_size(rb) / MiB);
841     }
842     if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
843         error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
844                    ")", VIRTIO_MEM_REQUESTED_SIZE_PROP,
845                    VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
846         return;
847     } else if (!QEMU_IS_ALIGNED(vmem->addr, vmem->block_size)) {
848         error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
849                    ")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP,
850                    vmem->block_size);
851         return;
852     } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr),
853                                 vmem->block_size)) {
854         error_setg(errp, "'%s' property memdev size has to be multiples of"
855                    "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP,
856                    VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
857         return;
858     }
859 
860     if (ram_block_coordinated_discard_require(true)) {
861         error_setg(errp, "Discarding RAM is disabled");
862         return;
863     }
864 
865     ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
866     if (ret) {
867         error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
868         ram_block_coordinated_discard_require(false);
869         return;
870     }
871 
872     virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
873 
874     vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) /
875                         vmem->block_size;
876     vmem->bitmap = bitmap_new(vmem->bitmap_size);
877 
878     virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config));
879     vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
880 
881     host_memory_backend_set_mapped(vmem->memdev, true);
882     vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
883     if (vmem->early_migration) {
884         vmstate_register(VMSTATE_IF(vmem), VMSTATE_INSTANCE_ID_ANY,
885                          &vmstate_virtio_mem_device_early, vmem);
886     }
887     qemu_register_reset(virtio_mem_system_reset, vmem);
888 
889     /*
890      * Set ourselves as RamDiscardManager before the plug handler maps the
891      * memory region and exposes it via an address space.
892      */
893     memory_region_set_ram_discard_manager(&vmem->memdev->mr,
894                                           RAM_DISCARD_MANAGER(vmem));
895 }
896 
897 static void virtio_mem_device_unrealize(DeviceState *dev)
898 {
899     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
900     VirtIOMEM *vmem = VIRTIO_MEM(dev);
901 
902     /*
903      * The unplug handler unmapped the memory region, it cannot be
904      * found via an address space anymore. Unset ourselves.
905      */
906     memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
907     qemu_unregister_reset(virtio_mem_system_reset, vmem);
908     if (vmem->early_migration) {
909         vmstate_unregister(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early,
910                            vmem);
911     }
912     vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
913     host_memory_backend_set_mapped(vmem->memdev, false);
914     virtio_del_queue(vdev, 0);
915     virtio_cleanup(vdev);
916     g_free(vmem->bitmap);
917     ram_block_coordinated_discard_require(false);
918 }
919 
920 static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
921                                        uint64_t offset, uint64_t size)
922 {
923     RAMBlock *rb = vmem->memdev->mr.ram_block;
924 
925     return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
926 }
927 
928 static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
929 {
930     /* Make sure all memory is really discarded after migration. */
931     return virtio_mem_for_each_unplugged_range(vmem, NULL,
932                                                virtio_mem_discard_range_cb);
933 }
934 
935 static int virtio_mem_post_load(void *opaque, int version_id)
936 {
937     VirtIOMEM *vmem = VIRTIO_MEM(opaque);
938     RamDiscardListener *rdl;
939     int ret;
940 
941     /*
942      * We started out with all memory discarded and our memory region is mapped
943      * into an address space. Replay, now that we updated the bitmap.
944      */
945     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
946         ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
947                                                  virtio_mem_notify_populate_cb);
948         if (ret) {
949             return ret;
950         }
951     }
952 
953     if (migration_in_incoming_postcopy()) {
954         return 0;
955     }
956 
957     return virtio_mem_restore_unplugged(vmem);
958 }
959 
960 typedef struct VirtIOMEMMigSanityChecks {
961     VirtIOMEM *parent;
962     uint64_t addr;
963     uint64_t region_size;
964     uint64_t block_size;
965     uint32_t node;
966 } VirtIOMEMMigSanityChecks;
967 
968 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque)
969 {
970     VirtIOMEMMigSanityChecks *tmp = opaque;
971     VirtIOMEM *vmem = tmp->parent;
972 
973     tmp->addr = vmem->addr;
974     tmp->region_size = memory_region_size(&vmem->memdev->mr);
975     tmp->block_size = vmem->block_size;
976     tmp->node = vmem->node;
977     return 0;
978 }
979 
980 static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id)
981 {
982     VirtIOMEMMigSanityChecks *tmp = opaque;
983     VirtIOMEM *vmem = tmp->parent;
984     const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr);
985 
986     if (tmp->addr != vmem->addr) {
987         error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
988                      VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr);
989         return -EINVAL;
990     }
991     /*
992      * Note: Preparation for resizeable memory regions. The maximum size
993      * of the memory region must not change during migration.
994      */
995     if (tmp->region_size != new_region_size) {
996         error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%"
997                      PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size,
998                      new_region_size);
999         return -EINVAL;
1000     }
1001     if (tmp->block_size != vmem->block_size) {
1002         error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
1003                      VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size,
1004                      vmem->block_size);
1005         return -EINVAL;
1006     }
1007     if (tmp->node != vmem->node) {
1008         error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32,
1009                      VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node);
1010         return -EINVAL;
1011     }
1012     return 0;
1013 }
1014 
1015 static const VMStateDescription vmstate_virtio_mem_sanity_checks = {
1016     .name = "virtio-mem-device/sanity-checks",
1017     .pre_save = virtio_mem_mig_sanity_checks_pre_save,
1018     .post_load = virtio_mem_mig_sanity_checks_post_load,
1019     .fields = (VMStateField[]) {
1020         VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks),
1021         VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks),
1022         VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks),
1023         VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks),
1024         VMSTATE_END_OF_LIST(),
1025     },
1026 };
1027 
1028 static bool virtio_mem_vmstate_field_exists(void *opaque, int version_id)
1029 {
1030     const VirtIOMEM *vmem = VIRTIO_MEM(opaque);
1031 
1032     /* With early migration, these fields were already migrated. */
1033     return !vmem->early_migration;
1034 }
1035 
1036 static const VMStateDescription vmstate_virtio_mem_device = {
1037     .name = "virtio-mem-device",
1038     .minimum_version_id = 1,
1039     .version_id = 1,
1040     .priority = MIG_PRI_VIRTIO_MEM,
1041     .post_load = virtio_mem_post_load,
1042     .fields = (VMStateField[]) {
1043         VMSTATE_WITH_TMP_TEST(VirtIOMEM, virtio_mem_vmstate_field_exists,
1044                               VirtIOMEMMigSanityChecks,
1045                               vmstate_virtio_mem_sanity_checks),
1046         VMSTATE_UINT64(usable_region_size, VirtIOMEM),
1047         VMSTATE_UINT64_TEST(size, VirtIOMEM, virtio_mem_vmstate_field_exists),
1048         VMSTATE_UINT64(requested_size, VirtIOMEM),
1049         VMSTATE_BITMAP_TEST(bitmap, VirtIOMEM, virtio_mem_vmstate_field_exists,
1050                             0, bitmap_size),
1051         VMSTATE_END_OF_LIST()
1052     },
1053 };
1054 
1055 /*
1056  * Transfer properties that are immutable while migration is active early,
1057  * such that we have have this information around before migrating any RAM
1058  * content.
1059  *
1060  * Note that virtio_mem_is_busy() makes sure these properties can no longer
1061  * change on the migration source until migration completed.
1062  *
1063  * With QEMU compat machines, we transmit these properties later, via
1064  * vmstate_virtio_mem_device instead -- see virtio_mem_vmstate_field_exists().
1065  */
1066 static const VMStateDescription vmstate_virtio_mem_device_early = {
1067     .name = "virtio-mem-device-early",
1068     .minimum_version_id = 1,
1069     .version_id = 1,
1070     .early_setup = true,
1071     .fields = (VMStateField[]) {
1072         VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
1073                          vmstate_virtio_mem_sanity_checks),
1074         VMSTATE_UINT64(size, VirtIOMEM),
1075         VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size),
1076         VMSTATE_END_OF_LIST()
1077     },
1078 };
1079 
1080 static const VMStateDescription vmstate_virtio_mem = {
1081     .name = "virtio-mem",
1082     .minimum_version_id = 1,
1083     .version_id = 1,
1084     .fields = (VMStateField[]) {
1085         VMSTATE_VIRTIO_DEVICE,
1086         VMSTATE_END_OF_LIST()
1087     },
1088 };
1089 
1090 static void virtio_mem_fill_device_info(const VirtIOMEM *vmem,
1091                                         VirtioMEMDeviceInfo *vi)
1092 {
1093     vi->memaddr = vmem->addr;
1094     vi->node = vmem->node;
1095     vi->requested_size = vmem->requested_size;
1096     vi->size = vmem->size;
1097     vi->max_size = memory_region_size(&vmem->memdev->mr);
1098     vi->block_size = vmem->block_size;
1099     vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev));
1100 }
1101 
1102 static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp)
1103 {
1104     if (!vmem->memdev) {
1105         error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP);
1106         return NULL;
1107     }
1108 
1109     return &vmem->memdev->mr;
1110 }
1111 
1112 static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,
1113                                                 Notifier *notifier)
1114 {
1115     notifier_list_add(&vmem->size_change_notifiers, notifier);
1116 }
1117 
1118 static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem,
1119                                                    Notifier *notifier)
1120 {
1121     notifier_remove(notifier);
1122 }
1123 
1124 static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name,
1125                                 void *opaque, Error **errp)
1126 {
1127     const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1128     uint64_t value = vmem->size;
1129 
1130     visit_type_size(v, name, &value, errp);
1131 }
1132 
1133 static void virtio_mem_get_requested_size(Object *obj, Visitor *v,
1134                                           const char *name, void *opaque,
1135                                           Error **errp)
1136 {
1137     const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1138     uint64_t value = vmem->requested_size;
1139 
1140     visit_type_size(v, name, &value, errp);
1141 }
1142 
1143 static void virtio_mem_set_requested_size(Object *obj, Visitor *v,
1144                                           const char *name, void *opaque,
1145                                           Error **errp)
1146 {
1147     VirtIOMEM *vmem = VIRTIO_MEM(obj);
1148     uint64_t value;
1149 
1150     if (!visit_type_size(v, name, &value, errp)) {
1151         return;
1152     }
1153 
1154     /*
1155      * The block size and memory backend are not fixed until the device was
1156      * realized. realize() will verify these properties then.
1157      */
1158     if (DEVICE(obj)->realized) {
1159         if (!QEMU_IS_ALIGNED(value, vmem->block_size)) {
1160             error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64
1161                        ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP,
1162                        vmem->block_size);
1163             return;
1164         } else if (value > memory_region_size(&vmem->memdev->mr)) {
1165             error_setg(errp, "'%s' cannot exceed the memory backend size"
1166                        "(0x%" PRIx64 ")", name,
1167                        memory_region_size(&vmem->memdev->mr));
1168             return;
1169         }
1170 
1171         if (value != vmem->requested_size) {
1172             virtio_mem_resize_usable_region(vmem, value, false);
1173             vmem->requested_size = value;
1174         }
1175         /*
1176          * Trigger a config update so the guest gets notified. We trigger
1177          * even if the size didn't change (especially helpful for debugging).
1178          */
1179         virtio_notify_config(VIRTIO_DEVICE(vmem));
1180     } else {
1181         vmem->requested_size = value;
1182     }
1183 }
1184 
1185 static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name,
1186                                       void *opaque, Error **errp)
1187 {
1188     const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1189     uint64_t value = vmem->block_size;
1190 
1191     /*
1192      * If not configured by the user (and we're not realized yet), use the
1193      * default block size we would use with the current memory backend.
1194      */
1195     if (!value) {
1196         if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) {
1197             value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block);
1198         } else {
1199             value = virtio_mem_thp_size();
1200         }
1201     }
1202 
1203     visit_type_size(v, name, &value, errp);
1204 }
1205 
1206 static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
1207                                       void *opaque, Error **errp)
1208 {
1209     VirtIOMEM *vmem = VIRTIO_MEM(obj);
1210     uint64_t value;
1211 
1212     if (DEVICE(obj)->realized) {
1213         error_setg(errp, "'%s' cannot be changed", name);
1214         return;
1215     }
1216 
1217     if (!visit_type_size(v, name, &value, errp)) {
1218         return;
1219     }
1220 
1221     if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) {
1222         error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name,
1223                    VIRTIO_MEM_MIN_BLOCK_SIZE);
1224         return;
1225     } else if (!is_power_of_2(value)) {
1226         error_setg(errp, "'%s' property has to be a power of two", name);
1227         return;
1228     }
1229     vmem->block_size = value;
1230 }
1231 
1232 static void virtio_mem_instance_init(Object *obj)
1233 {
1234     VirtIOMEM *vmem = VIRTIO_MEM(obj);
1235 
1236     notifier_list_init(&vmem->size_change_notifiers);
1237     QLIST_INIT(&vmem->rdl_list);
1238 
1239     object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
1240                         NULL, NULL, NULL);
1241     object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size",
1242                         virtio_mem_get_requested_size,
1243                         virtio_mem_set_requested_size, NULL, NULL);
1244     object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size",
1245                         virtio_mem_get_block_size, virtio_mem_set_block_size,
1246                         NULL, NULL);
1247 }
1248 
1249 static Property virtio_mem_properties[] = {
1250     DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),
1251     DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),
1252     DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false),
1253     DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev,
1254                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1255 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
1256     DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM,
1257                             unplugged_inaccessible, ON_OFF_AUTO_AUTO),
1258 #endif
1259     DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM,
1260                      early_migration, true),
1261     DEFINE_PROP_END_OF_LIST(),
1262 };
1263 
1264 static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
1265                                                    const MemoryRegion *mr)
1266 {
1267     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1268 
1269     g_assert(mr == &vmem->memdev->mr);
1270     return vmem->block_size;
1271 }
1272 
1273 static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
1274                                         const MemoryRegionSection *s)
1275 {
1276     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1277     uint64_t start_gpa = vmem->addr + s->offset_within_region;
1278     uint64_t end_gpa = start_gpa + int128_get64(s->size);
1279 
1280     g_assert(s->mr == &vmem->memdev->mr);
1281 
1282     start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
1283     end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
1284 
1285     if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
1286         return false;
1287     }
1288 
1289     return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true);
1290 }
1291 
1292 struct VirtIOMEMReplayData {
1293     void *fn;
1294     void *opaque;
1295 };
1296 
1297 static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
1298 {
1299     struct VirtIOMEMReplayData *data = arg;
1300 
1301     return ((ReplayRamPopulate)data->fn)(s, data->opaque);
1302 }
1303 
1304 static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
1305                                            MemoryRegionSection *s,
1306                                            ReplayRamPopulate replay_fn,
1307                                            void *opaque)
1308 {
1309     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1310     struct VirtIOMEMReplayData data = {
1311         .fn = replay_fn,
1312         .opaque = opaque,
1313     };
1314 
1315     g_assert(s->mr == &vmem->memdev->mr);
1316     return virtio_mem_for_each_plugged_section(vmem, s, &data,
1317                                             virtio_mem_rdm_replay_populated_cb);
1318 }
1319 
1320 static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
1321                                               void *arg)
1322 {
1323     struct VirtIOMEMReplayData *data = arg;
1324 
1325     ((ReplayRamDiscard)data->fn)(s, data->opaque);
1326     return 0;
1327 }
1328 
1329 static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
1330                                             MemoryRegionSection *s,
1331                                             ReplayRamDiscard replay_fn,
1332                                             void *opaque)
1333 {
1334     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1335     struct VirtIOMEMReplayData data = {
1336         .fn = replay_fn,
1337         .opaque = opaque,
1338     };
1339 
1340     g_assert(s->mr == &vmem->memdev->mr);
1341     virtio_mem_for_each_unplugged_section(vmem, s, &data,
1342                                           virtio_mem_rdm_replay_discarded_cb);
1343 }
1344 
1345 static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
1346                                              RamDiscardListener *rdl,
1347                                              MemoryRegionSection *s)
1348 {
1349     VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1350     int ret;
1351 
1352     g_assert(s->mr == &vmem->memdev->mr);
1353     rdl->section = memory_region_section_new_copy(s);
1354 
1355     QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
1356     ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1357                                               virtio_mem_notify_populate_cb);
1358     if (ret) {
1359         error_report("%s: Replaying plugged ranges failed: %s", __func__,
1360                      strerror(-ret));
1361     }
1362 }
1363 
1364 static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
1365                                                RamDiscardListener *rdl)
1366 {
1367     VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1368 
1369     g_assert(rdl->section->mr == &vmem->memdev->mr);
1370     if (vmem->size) {
1371         if (rdl->double_discard_supported) {
1372             rdl->notify_discard(rdl, rdl->section);
1373         } else {
1374             virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1375                                                 virtio_mem_notify_discard_cb);
1376         }
1377     }
1378 
1379     memory_region_section_free_copy(rdl->section);
1380     rdl->section = NULL;
1381     QLIST_REMOVE(rdl, next);
1382 }
1383 
1384 static void virtio_mem_class_init(ObjectClass *klass, void *data)
1385 {
1386     DeviceClass *dc = DEVICE_CLASS(klass);
1387     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1388     VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
1389     RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
1390 
1391     device_class_set_props(dc, virtio_mem_properties);
1392     dc->vmsd = &vmstate_virtio_mem;
1393 
1394     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1395     vdc->realize = virtio_mem_device_realize;
1396     vdc->unrealize = virtio_mem_device_unrealize;
1397     vdc->get_config = virtio_mem_get_config;
1398     vdc->get_features = virtio_mem_get_features;
1399     vdc->validate_features = virtio_mem_validate_features;
1400     vdc->vmsd = &vmstate_virtio_mem_device;
1401 
1402     vmc->fill_device_info = virtio_mem_fill_device_info;
1403     vmc->get_memory_region = virtio_mem_get_memory_region;
1404     vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
1405     vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
1406 
1407     rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
1408     rdmc->is_populated = virtio_mem_rdm_is_populated;
1409     rdmc->replay_populated = virtio_mem_rdm_replay_populated;
1410     rdmc->replay_discarded = virtio_mem_rdm_replay_discarded;
1411     rdmc->register_listener = virtio_mem_rdm_register_listener;
1412     rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
1413 }
1414 
1415 static const TypeInfo virtio_mem_info = {
1416     .name = TYPE_VIRTIO_MEM,
1417     .parent = TYPE_VIRTIO_DEVICE,
1418     .instance_size = sizeof(VirtIOMEM),
1419     .instance_init = virtio_mem_instance_init,
1420     .class_init = virtio_mem_class_init,
1421     .class_size = sizeof(VirtIOMEMClass),
1422     .interfaces = (InterfaceInfo[]) {
1423         { TYPE_RAM_DISCARD_MANAGER },
1424         { }
1425     },
1426 };
1427 
1428 static void virtio_register_types(void)
1429 {
1430     type_register_static(&virtio_mem_info);
1431 }
1432 
1433 type_init(virtio_register_types)
1434