xref: /openbmc/qemu/hw/virtio/virtio-mem.c (revision 6a0e10b7)
1 /*
2  * Virtio MEM device
3  *
4  * Copyright (C) 2020 Red Hat, Inc.
5  *
6  * Authors:
7  *  David Hildenbrand <david@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "qemu/iov.h"
15 #include "qemu/cutils.h"
16 #include "qemu/error-report.h"
17 #include "qemu/units.h"
18 #include "sysemu/numa.h"
19 #include "sysemu/sysemu.h"
20 #include "sysemu/reset.h"
21 #include "sysemu/runstate.h"
22 #include "hw/virtio/virtio.h"
23 #include "hw/virtio/virtio-bus.h"
24 #include "hw/virtio/virtio-mem.h"
25 #include "qapi/error.h"
26 #include "qapi/visitor.h"
27 #include "exec/ram_addr.h"
28 #include "migration/misc.h"
29 #include "hw/boards.h"
30 #include "hw/qdev-properties.h"
31 #include CONFIG_DEVICES
32 #include "trace.h"
33 
34 static const VMStateDescription vmstate_virtio_mem_device_early;
35 
36 /*
37  * We only had legacy x86 guests that did not support
38  * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests.
39  */
40 #if defined(TARGET_X86_64) || defined(TARGET_I386)
41 #define VIRTIO_MEM_HAS_LEGACY_GUESTS
42 #endif
43 
44 /*
45  * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking
46  * bitmap small.
47  */
48 #define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
49 
50 static uint32_t virtio_mem_default_thp_size(void)
51 {
52     uint32_t default_thp_size = VIRTIO_MEM_MIN_BLOCK_SIZE;
53 
54 #if defined(__x86_64__) || defined(__arm__) || defined(__powerpc64__)
55     default_thp_size = 2 * MiB;
56 #elif defined(__aarch64__)
57     if (qemu_real_host_page_size() == 4 * KiB) {
58         default_thp_size = 2 * MiB;
59     } else if (qemu_real_host_page_size() == 16 * KiB) {
60         default_thp_size = 32 * MiB;
61     } else if (qemu_real_host_page_size() == 64 * KiB) {
62         default_thp_size = 512 * MiB;
63     }
64 #endif
65 
66     return default_thp_size;
67 }
68 
69 /*
70  * The minimum memslot size depends on this setting ("sane default"), the
71  * device block size, and the memory backend page size. The last (or single)
72  * memslot might be smaller than this constant.
73  */
74 #define VIRTIO_MEM_MIN_MEMSLOT_SIZE (1 * GiB)
75 
76 /*
77  * We want to have a reasonable default block size such that
78  * 1. We avoid splitting THPs when unplugging memory, which degrades
79  *    performance.
80  * 2. We avoid placing THPs for plugged blocks that also cover unplugged
81  *    blocks.
82  *
83  * The actual THP size might differ between Linux kernels, so we try to probe
84  * it. In the future (if we ever run into issues regarding 2.), we might want
85  * to disable THP in case we fail to properly probe the THP size, or if the
86  * block size is configured smaller than the THP size.
87  */
88 static uint32_t thp_size;
89 
90 #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
91 #define HPAGE_PATH "/sys/kernel/mm/transparent_hugepage/"
92 static uint32_t virtio_mem_thp_size(void)
93 {
94     gchar *content = NULL;
95     const char *endptr;
96     uint64_t tmp;
97 
98     if (thp_size) {
99         return thp_size;
100     }
101 
102     /* No THP -> no restrictions. */
103     if (!g_file_test(HPAGE_PATH, G_FILE_TEST_EXISTS)) {
104         thp_size = VIRTIO_MEM_MIN_BLOCK_SIZE;
105         return thp_size;
106     }
107 
108     /*
109      * Try to probe the actual THP size, fallback to (sane but eventually
110      * incorrect) default sizes.
111      */
112     if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
113         !qemu_strtou64(content, &endptr, 0, &tmp) &&
114         (!endptr || *endptr == '\n')) {
115         /* Sanity-check the value and fallback to something reasonable. */
116         if (!tmp || !is_power_of_2(tmp)) {
117             warn_report("Read unsupported THP size: %" PRIx64, tmp);
118         } else {
119             thp_size = tmp;
120         }
121     }
122 
123     if (!thp_size) {
124         thp_size = virtio_mem_default_thp_size();
125         warn_report("Could not detect THP size, falling back to %" PRIx64
126                     "  MiB.", thp_size / MiB);
127     }
128 
129     g_free(content);
130     return thp_size;
131 }
132 
133 static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
134 {
135     const uint64_t page_size = qemu_ram_pagesize(rb);
136 
137     /* We can have hugetlbfs with a page size smaller than the THP size. */
138     if (page_size == qemu_real_host_page_size()) {
139         return MAX(page_size, virtio_mem_thp_size());
140     }
141     return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);
142 }
143 
144 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
145 static bool virtio_mem_has_shared_zeropage(RAMBlock *rb)
146 {
147     /*
148      * We only have a guaranteed shared zeropage on ordinary MAP_PRIVATE
149      * anonymous RAM. In any other case, reading unplugged *can* populate a
150      * fresh page, consuming actual memory.
151      */
152     return !qemu_ram_is_shared(rb) && qemu_ram_get_fd(rb) < 0 &&
153            qemu_ram_pagesize(rb) == qemu_real_host_page_size();
154 }
155 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
156 
157 /*
158  * Size the usable region bigger than the requested size if possible. Esp.
159  * Linux guests will only add (aligned) memory blocks in case they fully
160  * fit into the usable region, but plug+online only a subset of the pages.
161  * The memory block size corresponds mostly to the section size.
162  *
163  * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and
164  * a section size of 512MB on arm64 (as long as the start address is properly
165  * aligned, similar to ordinary DIMMs).
166  *
167  * We can change this at any time and maybe even make it configurable if
168  * necessary (as the section size can change). But it's more likely that the
169  * section size will rather get smaller and not bigger over time.
170  */
171 #if defined(TARGET_X86_64) || defined(TARGET_I386)
172 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
173 #elif defined(TARGET_ARM)
174 #define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB))
175 #else
176 #error VIRTIO_MEM_USABLE_EXTENT not defined
177 #endif
178 
179 static bool virtio_mem_is_busy(void)
180 {
181     /*
182      * Postcopy cannot handle concurrent discards and we don't want to migrate
183      * pages on-demand with stale content when plugging new blocks.
184      *
185      * For precopy, we don't want unplugged blocks in our migration stream, and
186      * when plugging new blocks, the page content might differ between source
187      * and destination (observable by the guest when not initializing pages
188      * after plugging them) until we're running on the destination (as we didn't
189      * migrate these blocks when they were unplugged).
190      */
191     return migration_in_incoming_postcopy() || !migration_is_idle();
192 }
193 
194 typedef int (*virtio_mem_range_cb)(VirtIOMEM *vmem, void *arg,
195                                    uint64_t offset, uint64_t size);
196 
197 static int virtio_mem_for_each_unplugged_range(VirtIOMEM *vmem, void *arg,
198                                                virtio_mem_range_cb cb)
199 {
200     unsigned long first_zero_bit, last_zero_bit;
201     uint64_t offset, size;
202     int ret = 0;
203 
204     first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
205     while (first_zero_bit < vmem->bitmap_size) {
206         offset = first_zero_bit * vmem->block_size;
207         last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
208                                       first_zero_bit + 1) - 1;
209         size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
210 
211         ret = cb(vmem, arg, offset, size);
212         if (ret) {
213             break;
214         }
215         first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
216                                             last_zero_bit + 2);
217     }
218     return ret;
219 }
220 
221 static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg,
222                                              virtio_mem_range_cb cb)
223 {
224     unsigned long first_bit, last_bit;
225     uint64_t offset, size;
226     int ret = 0;
227 
228     first_bit = find_first_bit(vmem->bitmap, vmem->bitmap_size);
229     while (first_bit < vmem->bitmap_size) {
230         offset = first_bit * vmem->block_size;
231         last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
232                                       first_bit + 1) - 1;
233         size = (last_bit - first_bit + 1) * vmem->block_size;
234 
235         ret = cb(vmem, arg, offset, size);
236         if (ret) {
237             break;
238         }
239         first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
240                                   last_bit + 2);
241     }
242     return ret;
243 }
244 
245 /*
246  * Adjust the memory section to cover the intersection with the given range.
247  *
248  * Returns false if the intersection is empty, otherwise returns true.
249  */
250 static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s,
251                                                 uint64_t offset, uint64_t size)
252 {
253     uint64_t start = MAX(s->offset_within_region, offset);
254     uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
255                        offset + size);
256 
257     if (end <= start) {
258         return false;
259     }
260 
261     s->offset_within_address_space += start - s->offset_within_region;
262     s->offset_within_region = start;
263     s->size = int128_make64(end - start);
264     return true;
265 }
266 
267 typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
268 
269 static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
270                                                MemoryRegionSection *s,
271                                                void *arg,
272                                                virtio_mem_section_cb cb)
273 {
274     unsigned long first_bit, last_bit;
275     uint64_t offset, size;
276     int ret = 0;
277 
278     first_bit = s->offset_within_region / vmem->block_size;
279     first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
280     while (first_bit < vmem->bitmap_size) {
281         MemoryRegionSection tmp = *s;
282 
283         offset = first_bit * vmem->block_size;
284         last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
285                                       first_bit + 1) - 1;
286         size = (last_bit - first_bit + 1) * vmem->block_size;
287 
288         if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
289             break;
290         }
291         ret = cb(&tmp, arg);
292         if (ret) {
293             break;
294         }
295         first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
296                                   last_bit + 2);
297     }
298     return ret;
299 }
300 
301 static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
302                                                  MemoryRegionSection *s,
303                                                  void *arg,
304                                                  virtio_mem_section_cb cb)
305 {
306     unsigned long first_bit, last_bit;
307     uint64_t offset, size;
308     int ret = 0;
309 
310     first_bit = s->offset_within_region / vmem->block_size;
311     first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
312     while (first_bit < vmem->bitmap_size) {
313         MemoryRegionSection tmp = *s;
314 
315         offset = first_bit * vmem->block_size;
316         last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
317                                  first_bit + 1) - 1;
318         size = (last_bit - first_bit + 1) * vmem->block_size;
319 
320         if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
321             break;
322         }
323         ret = cb(&tmp, arg);
324         if (ret) {
325             break;
326         }
327         first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
328                                        last_bit + 2);
329     }
330     return ret;
331 }
332 
333 static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
334 {
335     RamDiscardListener *rdl = arg;
336 
337     return rdl->notify_populate(rdl, s);
338 }
339 
340 static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
341 {
342     RamDiscardListener *rdl = arg;
343 
344     rdl->notify_discard(rdl, s);
345     return 0;
346 }
347 
348 static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
349                                      uint64_t size)
350 {
351     RamDiscardListener *rdl;
352 
353     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
354         MemoryRegionSection tmp = *rdl->section;
355 
356         if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
357             continue;
358         }
359         rdl->notify_discard(rdl, &tmp);
360     }
361 }
362 
363 static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
364                                   uint64_t size)
365 {
366     RamDiscardListener *rdl, *rdl2;
367     int ret = 0;
368 
369     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
370         MemoryRegionSection tmp = *rdl->section;
371 
372         if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
373             continue;
374         }
375         ret = rdl->notify_populate(rdl, &tmp);
376         if (ret) {
377             break;
378         }
379     }
380 
381     if (ret) {
382         /* Notify all already-notified listeners. */
383         QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
384             MemoryRegionSection tmp = *rdl2->section;
385 
386             if (rdl2 == rdl) {
387                 break;
388             }
389             if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
390                 continue;
391             }
392             rdl2->notify_discard(rdl2, &tmp);
393         }
394     }
395     return ret;
396 }
397 
398 static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
399 {
400     RamDiscardListener *rdl;
401 
402     if (!vmem->size) {
403         return;
404     }
405 
406     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
407         if (rdl->double_discard_supported) {
408             rdl->notify_discard(rdl, rdl->section);
409         } else {
410             virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
411                                                 virtio_mem_notify_discard_cb);
412         }
413     }
414 }
415 
416 static bool virtio_mem_is_range_plugged(const VirtIOMEM *vmem,
417                                         uint64_t start_gpa, uint64_t size)
418 {
419     const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
420     const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1;
421     unsigned long found_bit;
422 
423     /* We fake a shorter bitmap to avoid searching too far. */
424     found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit);
425     return found_bit > last_bit;
426 }
427 
428 static bool virtio_mem_is_range_unplugged(const VirtIOMEM *vmem,
429                                           uint64_t start_gpa, uint64_t size)
430 {
431     const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
432     const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1;
433     unsigned long found_bit;
434 
435     /* We fake a shorter bitmap to avoid searching too far. */
436     found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit);
437     return found_bit > last_bit;
438 }
439 
440 static void virtio_mem_set_range_plugged(VirtIOMEM *vmem, uint64_t start_gpa,
441                                          uint64_t size)
442 {
443     const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size;
444     const unsigned long nbits = size / vmem->block_size;
445 
446     bitmap_set(vmem->bitmap, bit, nbits);
447 }
448 
449 static void virtio_mem_set_range_unplugged(VirtIOMEM *vmem, uint64_t start_gpa,
450                                            uint64_t size)
451 {
452     const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size;
453     const unsigned long nbits = size / vmem->block_size;
454 
455     bitmap_clear(vmem->bitmap, bit, nbits);
456 }
457 
458 static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem,
459                                      struct virtio_mem_resp *resp)
460 {
461     VirtIODevice *vdev = VIRTIO_DEVICE(vmem);
462     VirtQueue *vq = vmem->vq;
463 
464     trace_virtio_mem_send_response(le16_to_cpu(resp->type));
465     iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp));
466 
467     virtqueue_push(vq, elem, sizeof(*resp));
468     virtio_notify(vdev, vq);
469 }
470 
471 static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
472                                             VirtQueueElement *elem,
473                                             uint16_t type)
474 {
475     struct virtio_mem_resp resp = {
476         .type = cpu_to_le16(type),
477     };
478 
479     virtio_mem_send_response(vmem, elem, &resp);
480 }
481 
482 static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
483                                    uint64_t size)
484 {
485     if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
486         return false;
487     }
488     if (gpa + size < gpa || !size) {
489         return false;
490     }
491     if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) {
492         return false;
493     }
494     if (gpa + size > vmem->addr + vmem->usable_region_size) {
495         return false;
496     }
497     return true;
498 }
499 
500 static void virtio_mem_activate_memslot(VirtIOMEM *vmem, unsigned int idx)
501 {
502     const uint64_t memslot_offset = idx * vmem->memslot_size;
503 
504     assert(vmem->memslots);
505 
506     /*
507      * Instead of enabling/disabling memslots, we add/remove them. This should
508      * make address space updates faster, because we don't have to loop over
509      * many disabled subregions.
510      */
511     if (memory_region_is_mapped(&vmem->memslots[idx])) {
512         return;
513     }
514     memory_region_add_subregion(vmem->mr, memslot_offset, &vmem->memslots[idx]);
515 }
516 
517 static void virtio_mem_deactivate_memslot(VirtIOMEM *vmem, unsigned int idx)
518 {
519     assert(vmem->memslots);
520 
521     if (!memory_region_is_mapped(&vmem->memslots[idx])) {
522         return;
523     }
524     memory_region_del_subregion(vmem->mr, &vmem->memslots[idx]);
525 }
526 
527 static void virtio_mem_activate_memslots_to_plug(VirtIOMEM *vmem,
528                                                  uint64_t offset, uint64_t size)
529 {
530     const unsigned int start_idx = offset / vmem->memslot_size;
531     const unsigned int end_idx = (offset + size + vmem->memslot_size - 1) /
532                                  vmem->memslot_size;
533     unsigned int idx;
534 
535     assert(vmem->dynamic_memslots);
536 
537     /* Activate all involved memslots in a single transaction. */
538     memory_region_transaction_begin();
539     for (idx = start_idx; idx < end_idx; idx++) {
540         virtio_mem_activate_memslot(vmem, idx);
541     }
542     memory_region_transaction_commit();
543 }
544 
545 static void virtio_mem_deactivate_unplugged_memslots(VirtIOMEM *vmem,
546                                                      uint64_t offset,
547                                                      uint64_t size)
548 {
549     const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
550     const unsigned int start_idx = offset / vmem->memslot_size;
551     const unsigned int end_idx = (offset + size + vmem->memslot_size - 1) /
552                                  vmem->memslot_size;
553     unsigned int idx;
554 
555     assert(vmem->dynamic_memslots);
556 
557     /* Deactivate all memslots with unplugged blocks in a single transaction. */
558     memory_region_transaction_begin();
559     for (idx = start_idx; idx < end_idx; idx++) {
560         const uint64_t memslot_offset = idx * vmem->memslot_size;
561         uint64_t memslot_size = vmem->memslot_size;
562 
563         /* The size of the last memslot might be smaller. */
564         if (idx == vmem->nb_memslots - 1) {
565             memslot_size = region_size - memslot_offset;
566         }
567 
568         /*
569          * Partially covered memslots might still have some blocks plugged and
570          * have to remain active if that's the case.
571          */
572         if (offset > memslot_offset ||
573             offset + size < memslot_offset + memslot_size) {
574             const uint64_t gpa = vmem->addr + memslot_offset;
575 
576             if (!virtio_mem_is_range_unplugged(vmem, gpa, memslot_size)) {
577                 continue;
578             }
579         }
580 
581         virtio_mem_deactivate_memslot(vmem, idx);
582     }
583     memory_region_transaction_commit();
584 }
585 
586 static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
587                                       uint64_t size, bool plug)
588 {
589     const uint64_t offset = start_gpa - vmem->addr;
590     RAMBlock *rb = vmem->memdev->mr.ram_block;
591     int ret = 0;
592 
593     if (virtio_mem_is_busy()) {
594         return -EBUSY;
595     }
596 
597     if (!plug) {
598         if (ram_block_discard_range(rb, offset, size)) {
599             return -EBUSY;
600         }
601         virtio_mem_notify_unplug(vmem, offset, size);
602         virtio_mem_set_range_unplugged(vmem, start_gpa, size);
603         /* Deactivate completely unplugged memslots after updating the state. */
604         if (vmem->dynamic_memslots) {
605             virtio_mem_deactivate_unplugged_memslots(vmem, offset, size);
606         }
607         return 0;
608     }
609 
610     if (vmem->prealloc) {
611         void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
612         int fd = memory_region_get_fd(&vmem->memdev->mr);
613         Error *local_err = NULL;
614 
615         if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, &local_err)) {
616             static bool warned;
617 
618             /*
619              * Warn only once, we don't want to fill the log with these
620              * warnings.
621              */
622             if (!warned) {
623                 warn_report_err(local_err);
624                 warned = true;
625             } else {
626                 error_free(local_err);
627             }
628             ret = -EBUSY;
629         }
630     }
631 
632     if (!ret) {
633         /*
634          * Activate before notifying and rollback in case of any errors.
635          *
636          * When activating a yet inactive memslot, memory notifiers will get
637          * notified about the added memory region and can register with the
638          * RamDiscardManager; this will traverse all plugged blocks and skip the
639          * blocks we are plugging here. The following notification will inform
640          * registered listeners about the blocks we're plugging.
641          */
642         if (vmem->dynamic_memslots) {
643             virtio_mem_activate_memslots_to_plug(vmem, offset, size);
644         }
645         ret = virtio_mem_notify_plug(vmem, offset, size);
646         if (ret && vmem->dynamic_memslots) {
647             virtio_mem_deactivate_unplugged_memslots(vmem, offset, size);
648         }
649     }
650     if (ret) {
651         /* Could be preallocation or a notifier populated memory. */
652         ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
653         return -EBUSY;
654     }
655 
656     virtio_mem_set_range_plugged(vmem, start_gpa, size);
657     return 0;
658 }
659 
660 static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa,
661                                            uint16_t nb_blocks, bool plug)
662 {
663     const uint64_t size = nb_blocks * vmem->block_size;
664     int ret;
665 
666     if (!virtio_mem_valid_range(vmem, gpa, size)) {
667         return VIRTIO_MEM_RESP_ERROR;
668     }
669 
670     if (plug && (vmem->size + size > vmem->requested_size)) {
671         return VIRTIO_MEM_RESP_NACK;
672     }
673 
674     /* test if really all blocks are in the opposite state */
675     if ((plug && !virtio_mem_is_range_unplugged(vmem, gpa, size)) ||
676         (!plug && !virtio_mem_is_range_plugged(vmem, gpa, size))) {
677         return VIRTIO_MEM_RESP_ERROR;
678     }
679 
680     ret = virtio_mem_set_block_state(vmem, gpa, size, plug);
681     if (ret) {
682         return VIRTIO_MEM_RESP_BUSY;
683     }
684     if (plug) {
685         vmem->size += size;
686     } else {
687         vmem->size -= size;
688     }
689     notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
690     return VIRTIO_MEM_RESP_ACK;
691 }
692 
693 static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
694                                     struct virtio_mem_req *req)
695 {
696     const uint64_t gpa = le64_to_cpu(req->u.plug.addr);
697     const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks);
698     uint16_t type;
699 
700     trace_virtio_mem_plug_request(gpa, nb_blocks);
701     type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true);
702     virtio_mem_send_response_simple(vmem, elem, type);
703 }
704 
705 static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
706                                       struct virtio_mem_req *req)
707 {
708     const uint64_t gpa = le64_to_cpu(req->u.unplug.addr);
709     const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks);
710     uint16_t type;
711 
712     trace_virtio_mem_unplug_request(gpa, nb_blocks);
713     type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false);
714     virtio_mem_send_response_simple(vmem, elem, type);
715 }
716 
717 static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
718                                             uint64_t requested_size,
719                                             bool can_shrink)
720 {
721     uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr),
722                            requested_size + VIRTIO_MEM_USABLE_EXTENT);
723 
724     /* The usable region size always has to be multiples of the block size. */
725     newsize = QEMU_ALIGN_UP(newsize, vmem->block_size);
726 
727     if (!requested_size) {
728         newsize = 0;
729     }
730 
731     if (newsize < vmem->usable_region_size && !can_shrink) {
732         return;
733     }
734 
735     trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize);
736     vmem->usable_region_size = newsize;
737 }
738 
739 static int virtio_mem_unplug_all(VirtIOMEM *vmem)
740 {
741     const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
742     RAMBlock *rb = vmem->memdev->mr.ram_block;
743 
744     if (vmem->size) {
745         if (virtio_mem_is_busy()) {
746             return -EBUSY;
747         }
748         if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
749             return -EBUSY;
750         }
751         virtio_mem_notify_unplug_all(vmem);
752 
753         bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
754         vmem->size = 0;
755         notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
756 
757         /* Deactivate all memslots after updating the state. */
758         if (vmem->dynamic_memslots) {
759             virtio_mem_deactivate_unplugged_memslots(vmem, 0, region_size);
760         }
761     }
762 
763     trace_virtio_mem_unplugged_all();
764     virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
765     return 0;
766 }
767 
768 static void virtio_mem_unplug_all_request(VirtIOMEM *vmem,
769                                           VirtQueueElement *elem)
770 {
771     trace_virtio_mem_unplug_all_request();
772     if (virtio_mem_unplug_all(vmem)) {
773         virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY);
774     } else {
775         virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK);
776     }
777 }
778 
779 static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem,
780                                      struct virtio_mem_req *req)
781 {
782     const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks);
783     const uint64_t gpa = le64_to_cpu(req->u.state.addr);
784     const uint64_t size = nb_blocks * vmem->block_size;
785     struct virtio_mem_resp resp = {
786         .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK),
787     };
788 
789     trace_virtio_mem_state_request(gpa, nb_blocks);
790     if (!virtio_mem_valid_range(vmem, gpa, size)) {
791         virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR);
792         return;
793     }
794 
795     if (virtio_mem_is_range_plugged(vmem, gpa, size)) {
796         resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED);
797     } else if (virtio_mem_is_range_unplugged(vmem, gpa, size)) {
798         resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED);
799     } else {
800         resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED);
801     }
802     trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state));
803     virtio_mem_send_response(vmem, elem, &resp);
804 }
805 
806 static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq)
807 {
808     const int len = sizeof(struct virtio_mem_req);
809     VirtIOMEM *vmem = VIRTIO_MEM(vdev);
810     VirtQueueElement *elem;
811     struct virtio_mem_req req;
812     uint16_t type;
813 
814     while (true) {
815         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
816         if (!elem) {
817             return;
818         }
819 
820         if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) {
821             virtio_error(vdev, "virtio-mem protocol violation: invalid request"
822                          " size: %d", len);
823             virtqueue_detach_element(vq, elem, 0);
824             g_free(elem);
825             return;
826         }
827 
828         if (iov_size(elem->in_sg, elem->in_num) <
829             sizeof(struct virtio_mem_resp)) {
830             virtio_error(vdev, "virtio-mem protocol violation: not enough space"
831                          " for response: %zu",
832                          iov_size(elem->in_sg, elem->in_num));
833             virtqueue_detach_element(vq, elem, 0);
834             g_free(elem);
835             return;
836         }
837 
838         type = le16_to_cpu(req.type);
839         switch (type) {
840         case VIRTIO_MEM_REQ_PLUG:
841             virtio_mem_plug_request(vmem, elem, &req);
842             break;
843         case VIRTIO_MEM_REQ_UNPLUG:
844             virtio_mem_unplug_request(vmem, elem, &req);
845             break;
846         case VIRTIO_MEM_REQ_UNPLUG_ALL:
847             virtio_mem_unplug_all_request(vmem, elem);
848             break;
849         case VIRTIO_MEM_REQ_STATE:
850             virtio_mem_state_request(vmem, elem, &req);
851             break;
852         default:
853             virtio_error(vdev, "virtio-mem protocol violation: unknown request"
854                          " type: %d", type);
855             virtqueue_detach_element(vq, elem, 0);
856             g_free(elem);
857             return;
858         }
859 
860         g_free(elem);
861     }
862 }
863 
864 static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data)
865 {
866     VirtIOMEM *vmem = VIRTIO_MEM(vdev);
867     struct virtio_mem_config *config = (void *) config_data;
868 
869     config->block_size = cpu_to_le64(vmem->block_size);
870     config->node_id = cpu_to_le16(vmem->node);
871     config->requested_size = cpu_to_le64(vmem->requested_size);
872     config->plugged_size = cpu_to_le64(vmem->size);
873     config->addr = cpu_to_le64(vmem->addr);
874     config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr));
875     config->usable_region_size = cpu_to_le64(vmem->usable_region_size);
876 }
877 
878 static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features,
879                                         Error **errp)
880 {
881     MachineState *ms = MACHINE(qdev_get_machine());
882     VirtIOMEM *vmem = VIRTIO_MEM(vdev);
883 
884     if (ms->numa_state) {
885 #if defined(CONFIG_ACPI)
886         virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM);
887 #endif
888     }
889     assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO);
890     if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) {
891         virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE);
892     }
893     return features;
894 }
895 
896 static int virtio_mem_validate_features(VirtIODevice *vdev)
897 {
898     if (virtio_host_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE) &&
899         !virtio_vdev_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE)) {
900         return -EFAULT;
901     }
902     return 0;
903 }
904 
905 static void virtio_mem_system_reset(void *opaque)
906 {
907     VirtIOMEM *vmem = VIRTIO_MEM(opaque);
908 
909     /*
910      * During usual resets, we will unplug all memory and shrink the usable
911      * region size. This is, however, not possible in all scenarios. Then,
912      * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL).
913      */
914     virtio_mem_unplug_all(vmem);
915 }
916 
917 static void virtio_mem_prepare_mr(VirtIOMEM *vmem)
918 {
919     const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
920 
921     assert(!vmem->mr && vmem->dynamic_memslots);
922     vmem->mr = g_new0(MemoryRegion, 1);
923     memory_region_init(vmem->mr, OBJECT(vmem), "virtio-mem",
924                        region_size);
925     vmem->mr->align = memory_region_get_alignment(&vmem->memdev->mr);
926 }
927 
928 static void virtio_mem_prepare_memslots(VirtIOMEM *vmem)
929 {
930     const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
931     unsigned int idx;
932 
933     g_assert(!vmem->memslots && vmem->nb_memslots && vmem->dynamic_memslots);
934     vmem->memslots = g_new0(MemoryRegion, vmem->nb_memslots);
935 
936     /* Initialize our memslots, but don't map them yet. */
937     for (idx = 0; idx < vmem->nb_memslots; idx++) {
938         const uint64_t memslot_offset = idx * vmem->memslot_size;
939         uint64_t memslot_size = vmem->memslot_size;
940         char name[20];
941 
942         /* The size of the last memslot might be smaller. */
943         if (idx == vmem->nb_memslots - 1) {
944             memslot_size = region_size - memslot_offset;
945         }
946 
947         snprintf(name, sizeof(name), "memslot-%u", idx);
948         memory_region_init_alias(&vmem->memslots[idx], OBJECT(vmem), name,
949                                  &vmem->memdev->mr, memslot_offset,
950                                  memslot_size);
951         /*
952          * We want to be able to atomically and efficiently activate/deactivate
953          * individual memslots without affecting adjacent memslots in memory
954          * notifiers.
955          */
956         memory_region_set_unmergeable(&vmem->memslots[idx], true);
957     }
958 }
959 
960 static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
961 {
962     MachineState *ms = MACHINE(qdev_get_machine());
963     int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0;
964     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
965     VirtIOMEM *vmem = VIRTIO_MEM(dev);
966     uint64_t page_size;
967     RAMBlock *rb;
968     int ret;
969 
970     if (!vmem->memdev) {
971         error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP);
972         return;
973     } else if (host_memory_backend_is_mapped(vmem->memdev)) {
974         error_setg(errp, "'%s' property specifies a busy memdev: %s",
975                    VIRTIO_MEM_MEMDEV_PROP,
976                    object_get_canonical_path_component(OBJECT(vmem->memdev)));
977         return;
978     } else if (!memory_region_is_ram(&vmem->memdev->mr) ||
979         memory_region_is_rom(&vmem->memdev->mr) ||
980         !vmem->memdev->mr.ram_block) {
981         error_setg(errp, "'%s' property specifies an unsupported memdev",
982                    VIRTIO_MEM_MEMDEV_PROP);
983         return;
984     } else if (vmem->memdev->prealloc) {
985         error_setg(errp, "'%s' property specifies a memdev with preallocation"
986                    " enabled: %s. Instead, specify 'prealloc=on' for the"
987                    " virtio-mem device. ", VIRTIO_MEM_MEMDEV_PROP,
988                    object_get_canonical_path_component(OBJECT(vmem->memdev)));
989         return;
990     }
991 
992     if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) ||
993         (!nb_numa_nodes && vmem->node)) {
994         error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds"
995                    "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP,
996                    vmem->node, nb_numa_nodes ? nb_numa_nodes : 1);
997         return;
998     }
999 
1000     if (enable_mlock) {
1001         error_setg(errp, "Incompatible with mlock");
1002         return;
1003     }
1004 
1005     rb = vmem->memdev->mr.ram_block;
1006     page_size = qemu_ram_pagesize(rb);
1007 
1008 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
1009     switch (vmem->unplugged_inaccessible) {
1010     case ON_OFF_AUTO_AUTO:
1011         if (virtio_mem_has_shared_zeropage(rb)) {
1012             vmem->unplugged_inaccessible = ON_OFF_AUTO_OFF;
1013         } else {
1014             vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
1015         }
1016         break;
1017     case ON_OFF_AUTO_OFF:
1018         if (!virtio_mem_has_shared_zeropage(rb)) {
1019             warn_report("'%s' property set to 'off' with a memdev that does"
1020                         " not support the shared zeropage.",
1021                         VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);
1022         }
1023         break;
1024     default:
1025         break;
1026     }
1027 #else /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
1028     vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
1029 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
1030 
1031     if (vmem->dynamic_memslots &&
1032         vmem->unplugged_inaccessible != ON_OFF_AUTO_ON) {
1033         error_setg(errp, "'%s' property set to 'on' requires '%s' to be 'on'",
1034                    VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP,
1035                    VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);
1036         return;
1037     }
1038 
1039     /*
1040      * If the block size wasn't configured by the user, use a sane default. This
1041      * allows using hugetlbfs backends of any page size without manual
1042      * intervention.
1043      */
1044     if (!vmem->block_size) {
1045         vmem->block_size = virtio_mem_default_block_size(rb);
1046     }
1047 
1048     if (vmem->block_size < page_size) {
1049         error_setg(errp, "'%s' property has to be at least the page size (0x%"
1050                    PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size);
1051         return;
1052     } else if (vmem->block_size < virtio_mem_default_block_size(rb)) {
1053         warn_report("'%s' property is smaller than the default block size (%"
1054                     PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP,
1055                     virtio_mem_default_block_size(rb) / MiB);
1056     }
1057     if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
1058         error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
1059                    ")", VIRTIO_MEM_REQUESTED_SIZE_PROP,
1060                    VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
1061         return;
1062     } else if (!QEMU_IS_ALIGNED(vmem->addr, vmem->block_size)) {
1063         error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
1064                    ")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP,
1065                    vmem->block_size);
1066         return;
1067     } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr),
1068                                 vmem->block_size)) {
1069         error_setg(errp, "'%s' property memdev size has to be multiples of"
1070                    "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP,
1071                    VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
1072         return;
1073     }
1074 
1075     if (ram_block_coordinated_discard_require(true)) {
1076         error_setg(errp, "Discarding RAM is disabled");
1077         return;
1078     }
1079 
1080     /*
1081      * We don't know at this point whether shared RAM is migrated using
1082      * QEMU or migrated using the file content. "x-ignore-shared" will be
1083      * configured after realizing the device. So in case we have an
1084      * incoming migration, simply always skip the discard step.
1085      *
1086      * Otherwise, make sure that we start with a clean slate: either the
1087      * memory backend might get reused or the shared file might still have
1088      * memory allocated.
1089      */
1090     if (!runstate_check(RUN_STATE_INMIGRATE)) {
1091         ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
1092         if (ret) {
1093             error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
1094             ram_block_coordinated_discard_require(false);
1095             return;
1096         }
1097     }
1098 
1099     virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
1100 
1101     vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) /
1102                         vmem->block_size;
1103     vmem->bitmap = bitmap_new(vmem->bitmap_size);
1104 
1105     virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config));
1106     vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
1107 
1108     /*
1109      * With "dynamic-memslots=off" (old behavior) we always map the whole
1110      * RAM memory region directly.
1111      */
1112     if (vmem->dynamic_memslots) {
1113         if (!vmem->mr) {
1114             virtio_mem_prepare_mr(vmem);
1115         }
1116         if (vmem->nb_memslots <= 1) {
1117             vmem->nb_memslots = 1;
1118             vmem->memslot_size = memory_region_size(&vmem->memdev->mr);
1119         }
1120         if (!vmem->memslots) {
1121             virtio_mem_prepare_memslots(vmem);
1122         }
1123     } else {
1124         assert(!vmem->mr && !vmem->nb_memslots && !vmem->memslots);
1125     }
1126 
1127     host_memory_backend_set_mapped(vmem->memdev, true);
1128     vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
1129     if (vmem->early_migration) {
1130         vmstate_register_any(VMSTATE_IF(vmem),
1131                              &vmstate_virtio_mem_device_early, vmem);
1132     }
1133     qemu_register_reset(virtio_mem_system_reset, vmem);
1134 
1135     /*
1136      * Set ourselves as RamDiscardManager before the plug handler maps the
1137      * memory region and exposes it via an address space.
1138      */
1139     memory_region_set_ram_discard_manager(&vmem->memdev->mr,
1140                                           RAM_DISCARD_MANAGER(vmem));
1141 }
1142 
1143 static void virtio_mem_device_unrealize(DeviceState *dev)
1144 {
1145     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1146     VirtIOMEM *vmem = VIRTIO_MEM(dev);
1147 
1148     /*
1149      * The unplug handler unmapped the memory region, it cannot be
1150      * found via an address space anymore. Unset ourselves.
1151      */
1152     memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
1153     qemu_unregister_reset(virtio_mem_system_reset, vmem);
1154     if (vmem->early_migration) {
1155         vmstate_unregister(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early,
1156                            vmem);
1157     }
1158     vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
1159     host_memory_backend_set_mapped(vmem->memdev, false);
1160     virtio_del_queue(vdev, 0);
1161     virtio_cleanup(vdev);
1162     g_free(vmem->bitmap);
1163     ram_block_coordinated_discard_require(false);
1164 }
1165 
1166 static int virtio_mem_discard_range_cb(VirtIOMEM *vmem, void *arg,
1167                                        uint64_t offset, uint64_t size)
1168 {
1169     RAMBlock *rb = vmem->memdev->mr.ram_block;
1170 
1171     return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
1172 }
1173 
1174 static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
1175 {
1176     /* Make sure all memory is really discarded after migration. */
1177     return virtio_mem_for_each_unplugged_range(vmem, NULL,
1178                                                virtio_mem_discard_range_cb);
1179 }
1180 
1181 static int virtio_mem_activate_memslot_range_cb(VirtIOMEM *vmem, void *arg,
1182                                                 uint64_t offset, uint64_t size)
1183 {
1184     virtio_mem_activate_memslots_to_plug(vmem, offset, size);
1185     return 0;
1186 }
1187 
1188 static int virtio_mem_post_load_bitmap(VirtIOMEM *vmem)
1189 {
1190     RamDiscardListener *rdl;
1191     int ret;
1192 
1193     /*
1194      * We restored the bitmap and updated the requested size; activate all
1195      * memslots (so listeners register) before notifying about plugged blocks.
1196      */
1197     if (vmem->dynamic_memslots) {
1198         /*
1199          * We don't expect any active memslots at this point to deactivate: no
1200          * memory was plugged on the migration destination.
1201          */
1202         virtio_mem_for_each_plugged_range(vmem, NULL,
1203                                           virtio_mem_activate_memslot_range_cb);
1204     }
1205 
1206     /*
1207      * We started out with all memory discarded and our memory region is mapped
1208      * into an address space. Replay, now that we updated the bitmap.
1209      */
1210     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
1211         ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1212                                                  virtio_mem_notify_populate_cb);
1213         if (ret) {
1214             return ret;
1215         }
1216     }
1217     return 0;
1218 }
1219 
1220 static int virtio_mem_post_load(void *opaque, int version_id)
1221 {
1222     VirtIOMEM *vmem = VIRTIO_MEM(opaque);
1223     int ret;
1224 
1225     if (!vmem->early_migration) {
1226         ret = virtio_mem_post_load_bitmap(vmem);
1227         if (ret) {
1228             return ret;
1229         }
1230     }
1231 
1232     /*
1233      * If shared RAM is migrated using the file content and not using QEMU,
1234      * don't mess with preallocation and postcopy.
1235      */
1236     if (migrate_ram_is_ignored(vmem->memdev->mr.ram_block)) {
1237         return 0;
1238     }
1239 
1240     if (vmem->prealloc && !vmem->early_migration) {
1241         warn_report("Proper preallocation with migration requires a newer QEMU machine");
1242     }
1243 
1244     if (migration_in_incoming_postcopy()) {
1245         return 0;
1246     }
1247 
1248     return virtio_mem_restore_unplugged(vmem);
1249 }
1250 
1251 static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, void *arg,
1252                                         uint64_t offset, uint64_t size)
1253 {
1254     void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
1255     int fd = memory_region_get_fd(&vmem->memdev->mr);
1256     Error *local_err = NULL;
1257 
1258     if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, &local_err)) {
1259         error_report_err(local_err);
1260         return -ENOMEM;
1261     }
1262     return 0;
1263 }
1264 
1265 static int virtio_mem_post_load_early(void *opaque, int version_id)
1266 {
1267     VirtIOMEM *vmem = VIRTIO_MEM(opaque);
1268     RAMBlock *rb = vmem->memdev->mr.ram_block;
1269     int ret;
1270 
1271     if (!vmem->prealloc) {
1272         goto post_load_bitmap;
1273     }
1274 
1275     /*
1276      * If shared RAM is migrated using the file content and not using QEMU,
1277      * don't mess with preallocation and postcopy.
1278      */
1279     if (migrate_ram_is_ignored(rb)) {
1280         goto post_load_bitmap;
1281     }
1282 
1283     /*
1284      * We restored the bitmap and verified that the basic properties
1285      * match on source and destination, so we can go ahead and preallocate
1286      * memory for all plugged memory blocks, before actual RAM migration starts
1287      * touching this memory.
1288      */
1289     ret = virtio_mem_for_each_plugged_range(vmem, NULL,
1290                                             virtio_mem_prealloc_range_cb);
1291     if (ret) {
1292         return ret;
1293     }
1294 
1295     /*
1296      * This is tricky: postcopy wants to start with a clean slate. On
1297      * POSTCOPY_INCOMING_ADVISE, postcopy code discards all (ordinarily
1298      * preallocated) RAM such that postcopy will work as expected later.
1299      *
1300      * However, we run after POSTCOPY_INCOMING_ADVISE -- but before actual
1301      * RAM migration. So let's discard all memory again. This looks like an
1302      * expensive NOP, but actually serves a purpose: we made sure that we
1303      * were able to allocate all required backend memory once. We cannot
1304      * guarantee that the backend memory we will free will remain free
1305      * until we need it during postcopy, but at least we can catch the
1306      * obvious setup issues this way.
1307      */
1308     if (migration_incoming_postcopy_advised()) {
1309         if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
1310             return -EBUSY;
1311         }
1312     }
1313 
1314 post_load_bitmap:
1315     /* Finally, update any other state to be consistent with the new bitmap. */
1316     return virtio_mem_post_load_bitmap(vmem);
1317 }
1318 
1319 typedef struct VirtIOMEMMigSanityChecks {
1320     VirtIOMEM *parent;
1321     uint64_t addr;
1322     uint64_t region_size;
1323     uint64_t block_size;
1324     uint32_t node;
1325 } VirtIOMEMMigSanityChecks;
1326 
1327 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque)
1328 {
1329     VirtIOMEMMigSanityChecks *tmp = opaque;
1330     VirtIOMEM *vmem = tmp->parent;
1331 
1332     tmp->addr = vmem->addr;
1333     tmp->region_size = memory_region_size(&vmem->memdev->mr);
1334     tmp->block_size = vmem->block_size;
1335     tmp->node = vmem->node;
1336     return 0;
1337 }
1338 
1339 static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id)
1340 {
1341     VirtIOMEMMigSanityChecks *tmp = opaque;
1342     VirtIOMEM *vmem = tmp->parent;
1343     const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr);
1344 
1345     if (tmp->addr != vmem->addr) {
1346         error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
1347                      VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr);
1348         return -EINVAL;
1349     }
1350     /*
1351      * Note: Preparation for resizable memory regions. The maximum size
1352      * of the memory region must not change during migration.
1353      */
1354     if (tmp->region_size != new_region_size) {
1355         error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%"
1356                      PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size,
1357                      new_region_size);
1358         return -EINVAL;
1359     }
1360     if (tmp->block_size != vmem->block_size) {
1361         error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
1362                      VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size,
1363                      vmem->block_size);
1364         return -EINVAL;
1365     }
1366     if (tmp->node != vmem->node) {
1367         error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32,
1368                      VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node);
1369         return -EINVAL;
1370     }
1371     return 0;
1372 }
1373 
1374 static const VMStateDescription vmstate_virtio_mem_sanity_checks = {
1375     .name = "virtio-mem-device/sanity-checks",
1376     .pre_save = virtio_mem_mig_sanity_checks_pre_save,
1377     .post_load = virtio_mem_mig_sanity_checks_post_load,
1378     .fields = (const VMStateField[]) {
1379         VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks),
1380         VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks),
1381         VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks),
1382         VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks),
1383         VMSTATE_END_OF_LIST(),
1384     },
1385 };
1386 
1387 static bool virtio_mem_vmstate_field_exists(void *opaque, int version_id)
1388 {
1389     const VirtIOMEM *vmem = VIRTIO_MEM(opaque);
1390 
1391     /* With early migration, these fields were already migrated. */
1392     return !vmem->early_migration;
1393 }
1394 
1395 static const VMStateDescription vmstate_virtio_mem_device = {
1396     .name = "virtio-mem-device",
1397     .minimum_version_id = 1,
1398     .version_id = 1,
1399     .priority = MIG_PRI_VIRTIO_MEM,
1400     .post_load = virtio_mem_post_load,
1401     .fields = (const VMStateField[]) {
1402         VMSTATE_WITH_TMP_TEST(VirtIOMEM, virtio_mem_vmstate_field_exists,
1403                               VirtIOMEMMigSanityChecks,
1404                               vmstate_virtio_mem_sanity_checks),
1405         VMSTATE_UINT64(usable_region_size, VirtIOMEM),
1406         VMSTATE_UINT64_TEST(size, VirtIOMEM, virtio_mem_vmstate_field_exists),
1407         VMSTATE_UINT64(requested_size, VirtIOMEM),
1408         VMSTATE_BITMAP_TEST(bitmap, VirtIOMEM, virtio_mem_vmstate_field_exists,
1409                             0, bitmap_size),
1410         VMSTATE_END_OF_LIST()
1411     },
1412 };
1413 
1414 /*
1415  * Transfer properties that are immutable while migration is active early,
1416  * such that we have have this information around before migrating any RAM
1417  * content.
1418  *
1419  * Note that virtio_mem_is_busy() makes sure these properties can no longer
1420  * change on the migration source until migration completed.
1421  *
1422  * With QEMU compat machines, we transmit these properties later, via
1423  * vmstate_virtio_mem_device instead -- see virtio_mem_vmstate_field_exists().
1424  */
1425 static const VMStateDescription vmstate_virtio_mem_device_early = {
1426     .name = "virtio-mem-device-early",
1427     .minimum_version_id = 1,
1428     .version_id = 1,
1429     .early_setup = true,
1430     .post_load = virtio_mem_post_load_early,
1431     .fields = (const VMStateField[]) {
1432         VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
1433                          vmstate_virtio_mem_sanity_checks),
1434         VMSTATE_UINT64(size, VirtIOMEM),
1435         VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size),
1436         VMSTATE_END_OF_LIST()
1437     },
1438 };
1439 
1440 static const VMStateDescription vmstate_virtio_mem = {
1441     .name = "virtio-mem",
1442     .minimum_version_id = 1,
1443     .version_id = 1,
1444     .fields = (const VMStateField[]) {
1445         VMSTATE_VIRTIO_DEVICE,
1446         VMSTATE_END_OF_LIST()
1447     },
1448 };
1449 
1450 static void virtio_mem_fill_device_info(const VirtIOMEM *vmem,
1451                                         VirtioMEMDeviceInfo *vi)
1452 {
1453     vi->memaddr = vmem->addr;
1454     vi->node = vmem->node;
1455     vi->requested_size = vmem->requested_size;
1456     vi->size = vmem->size;
1457     vi->max_size = memory_region_size(&vmem->memdev->mr);
1458     vi->block_size = vmem->block_size;
1459     vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev));
1460 }
1461 
1462 static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp)
1463 {
1464     if (!vmem->memdev) {
1465         error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP);
1466         return NULL;
1467     } else if (vmem->dynamic_memslots) {
1468         if (!vmem->mr) {
1469             virtio_mem_prepare_mr(vmem);
1470         }
1471         return vmem->mr;
1472     }
1473 
1474     return &vmem->memdev->mr;
1475 }
1476 
1477 static void virtio_mem_decide_memslots(VirtIOMEM *vmem, unsigned int limit)
1478 {
1479     uint64_t region_size, memslot_size, min_memslot_size;
1480     unsigned int memslots;
1481     RAMBlock *rb;
1482 
1483     if (!vmem->dynamic_memslots) {
1484         return;
1485     }
1486 
1487     /* We're called exactly once, before realizing the device. */
1488     assert(!vmem->nb_memslots);
1489 
1490     /* If realizing the device will fail, just assume a single memslot. */
1491     if (limit <= 1 || !vmem->memdev || !vmem->memdev->mr.ram_block) {
1492         vmem->nb_memslots = 1;
1493         return;
1494     }
1495 
1496     rb = vmem->memdev->mr.ram_block;
1497     region_size = memory_region_size(&vmem->memdev->mr);
1498 
1499     /*
1500      * Determine the default block size now, to determine the minimum memslot
1501      * size. We want the minimum slot size to be at least the device block size.
1502      */
1503     if (!vmem->block_size) {
1504         vmem->block_size = virtio_mem_default_block_size(rb);
1505     }
1506     /* If realizing the device will fail, just assume a single memslot. */
1507     if (vmem->block_size < qemu_ram_pagesize(rb) ||
1508         !QEMU_IS_ALIGNED(region_size, vmem->block_size)) {
1509         vmem->nb_memslots = 1;
1510         return;
1511     }
1512 
1513     /*
1514      * All memslots except the last one have a reasonable minimum size, and
1515      * and all memslot sizes are aligned to the device block size.
1516      */
1517     memslot_size = QEMU_ALIGN_UP(region_size / limit, vmem->block_size);
1518     min_memslot_size = MAX(vmem->block_size, VIRTIO_MEM_MIN_MEMSLOT_SIZE);
1519     memslot_size = MAX(memslot_size, min_memslot_size);
1520 
1521     memslots = QEMU_ALIGN_UP(region_size, memslot_size) / memslot_size;
1522     if (memslots != 1) {
1523         vmem->memslot_size = memslot_size;
1524     }
1525     vmem->nb_memslots = memslots;
1526 }
1527 
1528 static unsigned int virtio_mem_get_memslots(VirtIOMEM *vmem)
1529 {
1530     if (!vmem->dynamic_memslots) {
1531         /* Exactly one static RAM memory region. */
1532         return 1;
1533     }
1534 
1535     /* We're called after instructed to make a decision. */
1536     g_assert(vmem->nb_memslots);
1537     return vmem->nb_memslots;
1538 }
1539 
1540 static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,
1541                                                 Notifier *notifier)
1542 {
1543     notifier_list_add(&vmem->size_change_notifiers, notifier);
1544 }
1545 
1546 static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem,
1547                                                    Notifier *notifier)
1548 {
1549     notifier_remove(notifier);
1550 }
1551 
1552 static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name,
1553                                 void *opaque, Error **errp)
1554 {
1555     const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1556     uint64_t value = vmem->size;
1557 
1558     visit_type_size(v, name, &value, errp);
1559 }
1560 
1561 static void virtio_mem_get_requested_size(Object *obj, Visitor *v,
1562                                           const char *name, void *opaque,
1563                                           Error **errp)
1564 {
1565     const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1566     uint64_t value = vmem->requested_size;
1567 
1568     visit_type_size(v, name, &value, errp);
1569 }
1570 
1571 static void virtio_mem_set_requested_size(Object *obj, Visitor *v,
1572                                           const char *name, void *opaque,
1573                                           Error **errp)
1574 {
1575     VirtIOMEM *vmem = VIRTIO_MEM(obj);
1576     uint64_t value;
1577 
1578     if (!visit_type_size(v, name, &value, errp)) {
1579         return;
1580     }
1581 
1582     /*
1583      * The block size and memory backend are not fixed until the device was
1584      * realized. realize() will verify these properties then.
1585      */
1586     if (DEVICE(obj)->realized) {
1587         if (!QEMU_IS_ALIGNED(value, vmem->block_size)) {
1588             error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64
1589                        ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP,
1590                        vmem->block_size);
1591             return;
1592         } else if (value > memory_region_size(&vmem->memdev->mr)) {
1593             error_setg(errp, "'%s' cannot exceed the memory backend size"
1594                        "(0x%" PRIx64 ")", name,
1595                        memory_region_size(&vmem->memdev->mr));
1596             return;
1597         }
1598 
1599         if (value != vmem->requested_size) {
1600             virtio_mem_resize_usable_region(vmem, value, false);
1601             vmem->requested_size = value;
1602         }
1603         /*
1604          * Trigger a config update so the guest gets notified. We trigger
1605          * even if the size didn't change (especially helpful for debugging).
1606          */
1607         virtio_notify_config(VIRTIO_DEVICE(vmem));
1608     } else {
1609         vmem->requested_size = value;
1610     }
1611 }
1612 
1613 static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name,
1614                                       void *opaque, Error **errp)
1615 {
1616     const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1617     uint64_t value = vmem->block_size;
1618 
1619     /*
1620      * If not configured by the user (and we're not realized yet), use the
1621      * default block size we would use with the current memory backend.
1622      */
1623     if (!value) {
1624         if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) {
1625             value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block);
1626         } else {
1627             value = virtio_mem_thp_size();
1628         }
1629     }
1630 
1631     visit_type_size(v, name, &value, errp);
1632 }
1633 
1634 static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
1635                                       void *opaque, Error **errp)
1636 {
1637     VirtIOMEM *vmem = VIRTIO_MEM(obj);
1638     uint64_t value;
1639 
1640     if (DEVICE(obj)->realized) {
1641         error_setg(errp, "'%s' cannot be changed", name);
1642         return;
1643     }
1644 
1645     if (!visit_type_size(v, name, &value, errp)) {
1646         return;
1647     }
1648 
1649     if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) {
1650         error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name,
1651                    VIRTIO_MEM_MIN_BLOCK_SIZE);
1652         return;
1653     } else if (!is_power_of_2(value)) {
1654         error_setg(errp, "'%s' property has to be a power of two", name);
1655         return;
1656     }
1657     vmem->block_size = value;
1658 }
1659 
1660 static void virtio_mem_instance_init(Object *obj)
1661 {
1662     VirtIOMEM *vmem = VIRTIO_MEM(obj);
1663 
1664     notifier_list_init(&vmem->size_change_notifiers);
1665     QLIST_INIT(&vmem->rdl_list);
1666 
1667     object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
1668                         NULL, NULL, NULL);
1669     object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size",
1670                         virtio_mem_get_requested_size,
1671                         virtio_mem_set_requested_size, NULL, NULL);
1672     object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size",
1673                         virtio_mem_get_block_size, virtio_mem_set_block_size,
1674                         NULL, NULL);
1675 }
1676 
1677 static void virtio_mem_instance_finalize(Object *obj)
1678 {
1679     VirtIOMEM *vmem = VIRTIO_MEM(obj);
1680 
1681     /*
1682      * Note: the core already dropped the references on all memory regions
1683      * (it's passed as the owner to memory_region_init_*()) and finalized
1684      * these objects. We can simply free the memory.
1685      */
1686     g_free(vmem->memslots);
1687     vmem->memslots = NULL;
1688     g_free(vmem->mr);
1689     vmem->mr = NULL;
1690 }
1691 
1692 static Property virtio_mem_properties[] = {
1693     DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),
1694     DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),
1695     DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false),
1696     DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev,
1697                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1698 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
1699     DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM,
1700                             unplugged_inaccessible, ON_OFF_AUTO_ON),
1701 #endif
1702     DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM,
1703                      early_migration, true),
1704     DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM,
1705                      dynamic_memslots, false),
1706     DEFINE_PROP_END_OF_LIST(),
1707 };
1708 
1709 static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
1710                                                    const MemoryRegion *mr)
1711 {
1712     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1713 
1714     g_assert(mr == &vmem->memdev->mr);
1715     return vmem->block_size;
1716 }
1717 
1718 static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
1719                                         const MemoryRegionSection *s)
1720 {
1721     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1722     uint64_t start_gpa = vmem->addr + s->offset_within_region;
1723     uint64_t end_gpa = start_gpa + int128_get64(s->size);
1724 
1725     g_assert(s->mr == &vmem->memdev->mr);
1726 
1727     start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
1728     end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
1729 
1730     if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
1731         return false;
1732     }
1733 
1734     return virtio_mem_is_range_plugged(vmem, start_gpa, end_gpa - start_gpa);
1735 }
1736 
1737 struct VirtIOMEMReplayData {
1738     void *fn;
1739     void *opaque;
1740 };
1741 
1742 static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
1743 {
1744     struct VirtIOMEMReplayData *data = arg;
1745 
1746     return ((ReplayRamPopulate)data->fn)(s, data->opaque);
1747 }
1748 
1749 static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
1750                                            MemoryRegionSection *s,
1751                                            ReplayRamPopulate replay_fn,
1752                                            void *opaque)
1753 {
1754     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1755     struct VirtIOMEMReplayData data = {
1756         .fn = replay_fn,
1757         .opaque = opaque,
1758     };
1759 
1760     g_assert(s->mr == &vmem->memdev->mr);
1761     return virtio_mem_for_each_plugged_section(vmem, s, &data,
1762                                             virtio_mem_rdm_replay_populated_cb);
1763 }
1764 
1765 static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
1766                                               void *arg)
1767 {
1768     struct VirtIOMEMReplayData *data = arg;
1769 
1770     ((ReplayRamDiscard)data->fn)(s, data->opaque);
1771     return 0;
1772 }
1773 
1774 static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
1775                                             MemoryRegionSection *s,
1776                                             ReplayRamDiscard replay_fn,
1777                                             void *opaque)
1778 {
1779     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1780     struct VirtIOMEMReplayData data = {
1781         .fn = replay_fn,
1782         .opaque = opaque,
1783     };
1784 
1785     g_assert(s->mr == &vmem->memdev->mr);
1786     virtio_mem_for_each_unplugged_section(vmem, s, &data,
1787                                           virtio_mem_rdm_replay_discarded_cb);
1788 }
1789 
1790 static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
1791                                              RamDiscardListener *rdl,
1792                                              MemoryRegionSection *s)
1793 {
1794     VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1795     int ret;
1796 
1797     g_assert(s->mr == &vmem->memdev->mr);
1798     rdl->section = memory_region_section_new_copy(s);
1799 
1800     QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
1801     ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1802                                               virtio_mem_notify_populate_cb);
1803     if (ret) {
1804         error_report("%s: Replaying plugged ranges failed: %s", __func__,
1805                      strerror(-ret));
1806     }
1807 }
1808 
1809 static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
1810                                                RamDiscardListener *rdl)
1811 {
1812     VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1813 
1814     g_assert(rdl->section->mr == &vmem->memdev->mr);
1815     if (vmem->size) {
1816         if (rdl->double_discard_supported) {
1817             rdl->notify_discard(rdl, rdl->section);
1818         } else {
1819             virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1820                                                 virtio_mem_notify_discard_cb);
1821         }
1822     }
1823 
1824     memory_region_section_free_copy(rdl->section);
1825     rdl->section = NULL;
1826     QLIST_REMOVE(rdl, next);
1827 }
1828 
1829 static void virtio_mem_unplug_request_check(VirtIOMEM *vmem, Error **errp)
1830 {
1831     if (vmem->unplugged_inaccessible == ON_OFF_AUTO_OFF) {
1832         /*
1833          * We could allow it with a usable region size of 0, but let's just
1834          * not care about that legacy setting.
1835          */
1836         error_setg(errp, "virtio-mem device cannot get unplugged while"
1837                    " '" VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP "' != 'on'");
1838         return;
1839     }
1840 
1841     if (vmem->size) {
1842         error_setg(errp, "virtio-mem device cannot get unplugged while some"
1843                    " of its memory is still plugged");
1844         return;
1845     }
1846     if (vmem->requested_size) {
1847         error_setg(errp, "virtio-mem device cannot get unplugged while"
1848                    " '" VIRTIO_MEM_REQUESTED_SIZE_PROP "' != '0'");
1849         return;
1850     }
1851 }
1852 
1853 static void virtio_mem_class_init(ObjectClass *klass, void *data)
1854 {
1855     DeviceClass *dc = DEVICE_CLASS(klass);
1856     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1857     VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
1858     RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
1859 
1860     device_class_set_props(dc, virtio_mem_properties);
1861     dc->vmsd = &vmstate_virtio_mem;
1862 
1863     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1864     vdc->realize = virtio_mem_device_realize;
1865     vdc->unrealize = virtio_mem_device_unrealize;
1866     vdc->get_config = virtio_mem_get_config;
1867     vdc->get_features = virtio_mem_get_features;
1868     vdc->validate_features = virtio_mem_validate_features;
1869     vdc->vmsd = &vmstate_virtio_mem_device;
1870 
1871     vmc->fill_device_info = virtio_mem_fill_device_info;
1872     vmc->get_memory_region = virtio_mem_get_memory_region;
1873     vmc->decide_memslots = virtio_mem_decide_memslots;
1874     vmc->get_memslots = virtio_mem_get_memslots;
1875     vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
1876     vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
1877     vmc->unplug_request_check = virtio_mem_unplug_request_check;
1878 
1879     rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
1880     rdmc->is_populated = virtio_mem_rdm_is_populated;
1881     rdmc->replay_populated = virtio_mem_rdm_replay_populated;
1882     rdmc->replay_discarded = virtio_mem_rdm_replay_discarded;
1883     rdmc->register_listener = virtio_mem_rdm_register_listener;
1884     rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
1885 }
1886 
1887 static const TypeInfo virtio_mem_info = {
1888     .name = TYPE_VIRTIO_MEM,
1889     .parent = TYPE_VIRTIO_DEVICE,
1890     .instance_size = sizeof(VirtIOMEM),
1891     .instance_init = virtio_mem_instance_init,
1892     .instance_finalize = virtio_mem_instance_finalize,
1893     .class_init = virtio_mem_class_init,
1894     .class_size = sizeof(VirtIOMEMClass),
1895     .interfaces = (InterfaceInfo[]) {
1896         { TYPE_RAM_DISCARD_MANAGER },
1897         { }
1898     },
1899 };
1900 
1901 static void virtio_register_types(void)
1902 {
1903     type_register_static(&virtio_mem_info);
1904 }
1905 
1906 type_init(virtio_register_types)
1907