xref: /openbmc/qemu/migration/multifd.h (revision a66f28df650166ae8b50c992eea45e7b247f4143)
1d32ca5adSJuan Quintela /*
2d32ca5adSJuan Quintela  * Multifd common functions
3d32ca5adSJuan Quintela  *
4d32ca5adSJuan Quintela  * Copyright (c) 2019-2020 Red Hat Inc
5d32ca5adSJuan Quintela  *
6d32ca5adSJuan Quintela  * Authors:
7d32ca5adSJuan Quintela  *  Juan Quintela <quintela@redhat.com>
8d32ca5adSJuan Quintela  *
9d32ca5adSJuan Quintela  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10d32ca5adSJuan Quintela  * See the COPYING file in the top-level directory.
11d32ca5adSJuan Quintela  */
12d32ca5adSJuan Quintela 
13d32ca5adSJuan Quintela #ifndef QEMU_MIGRATION_MULTIFD_H
14d32ca5adSJuan Quintela #define QEMU_MIGRATION_MULTIFD_H
15d32ca5adSJuan Quintela 
1690fa121cSFabiano Rosas #include "exec/target_page.h"
17a49d15a3SFabiano Rosas #include "ram.h"
18a49d15a3SFabiano Rosas 
19d117ed06SFabiano Rosas typedef struct MultiFDRecvData MultiFDRecvData;
20addd7d15SFabiano Rosas typedef struct MultiFDSendData MultiFDSendData;
21d117ed06SFabiano Rosas 
22bd8b0a8fSFabiano Rosas bool multifd_send_setup(void);
23cde85c37SPeter Xu void multifd_send_shutdown(void);
24a8a3e710SFabiano Rosas void multifd_send_channel_created(void);
25cde85c37SPeter Xu int multifd_recv_setup(Error **errp);
26cde85c37SPeter Xu void multifd_recv_cleanup(void);
27cde85c37SPeter Xu void multifd_recv_shutdown(void);
28d32ca5adSJuan Quintela bool multifd_recv_all_channels_created(void);
296720c2b3Smanish.mishra void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
30d32ca5adSJuan Quintela void multifd_recv_sync_main(void);
319346fa18SFabiano Rosas int multifd_send_sync_main(void);
32d6556d17SPeter Xu bool multifd_queue_page(RAMBlock *block, ram_addr_t offset);
33d117ed06SFabiano Rosas bool multifd_recv(void);
34d117ed06SFabiano Rosas MultiFDRecvData *multifd_get_recv_data(void);
35d32ca5adSJuan Quintela 
367ec2c2b3SJuan Quintela /* Multifd Compression flags */
37d32ca5adSJuan Quintela #define MULTIFD_FLAG_SYNC (1 << 0)
38d32ca5adSJuan Quintela 
39*80484f94SBryan Zhang /* We reserve 5 bits for compression methods */
40*80484f94SBryan Zhang #define MULTIFD_FLAG_COMPRESSION_MASK (0x1f << 1)
41ab7cbb0bSJuan Quintela /* we need to be compatible. Before compression value was 0 */
42ab7cbb0bSJuan Quintela #define MULTIFD_FLAG_NOCOMP (0 << 1)
437ec2c2b3SJuan Quintela #define MULTIFD_FLAG_ZLIB (1 << 1)
4487dc6f5fSJuan Quintela #define MULTIFD_FLAG_ZSTD (2 << 1)
45354cac28SYuan Liu #define MULTIFD_FLAG_QPL (4 << 1)
46f3d8bb75SShameer Kolothum #define MULTIFD_FLAG_UADK (8 << 1)
47*80484f94SBryan Zhang #define MULTIFD_FLAG_QATZIP (16 << 1)
48ab7cbb0bSJuan Quintela 
49d32ca5adSJuan Quintela /* This value needs to be a multiple of qemu_target_page_size() */
50d32ca5adSJuan Quintela #define MULTIFD_PACKET_SIZE (512 * 1024)
51d32ca5adSJuan Quintela 
52d32ca5adSJuan Quintela typedef struct {
53d32ca5adSJuan Quintela     uint32_t magic;
54d32ca5adSJuan Quintela     uint32_t version;
55d32ca5adSJuan Quintela     uint32_t flags;
56d32ca5adSJuan Quintela     /* maximum number of allocated pages */
57d32ca5adSJuan Quintela     uint32_t pages_alloc;
588c0ec0b2SJuan Quintela     /* non zero pages */
598c0ec0b2SJuan Quintela     uint32_t normal_pages;
60d32ca5adSJuan Quintela     /* size of the next packet that contains pages */
61d32ca5adSJuan Quintela     uint32_t next_packet_size;
62d32ca5adSJuan Quintela     uint64_t packet_num;
63303e6f54SHao Xiang     /* zero pages */
64303e6f54SHao Xiang     uint32_t zero_pages;
65303e6f54SHao Xiang     uint32_t unused32[1];    /* Reserved for future use */
66303e6f54SHao Xiang     uint64_t unused64[3];    /* Reserved for future use */
67d32ca5adSJuan Quintela     char ramblock[256];
68303e6f54SHao Xiang     /*
69303e6f54SHao Xiang      * This array contains the pointers to:
70303e6f54SHao Xiang      *  - normal pages (initial normal_pages entries)
71303e6f54SHao Xiang      *  - zero pages (following zero_pages entries)
72303e6f54SHao Xiang      */
73d32ca5adSJuan Quintela     uint64_t offset[];
74d32ca5adSJuan Quintela } __attribute__((packed)) MultiFDPacket_t;
75d32ca5adSJuan Quintela 
76d32ca5adSJuan Quintela typedef struct {
77d32ca5adSJuan Quintela     /* number of used pages */
7890a3d2f9SJuan Quintela     uint32_t num;
79303e6f54SHao Xiang     /* number of normal pages */
80303e6f54SHao Xiang     uint32_t normal_num;
81d32ca5adSJuan Quintela     RAMBlock *block;
820e427da0SFabiano Rosas     /* offset of each page */
830e427da0SFabiano Rosas     ram_addr_t offset[];
84d32ca5adSJuan Quintela } MultiFDPages_t;
85d32ca5adSJuan Quintela 
86d117ed06SFabiano Rosas struct MultiFDRecvData {
87d117ed06SFabiano Rosas     void *opaque;
88d117ed06SFabiano Rosas     size_t size;
89d117ed06SFabiano Rosas     /* for preadv */
90d117ed06SFabiano Rosas     off_t file_offset;
91d117ed06SFabiano Rosas };
92d117ed06SFabiano Rosas 
93addd7d15SFabiano Rosas typedef enum {
94addd7d15SFabiano Rosas     MULTIFD_PAYLOAD_NONE,
95addd7d15SFabiano Rosas     MULTIFD_PAYLOAD_RAM,
96addd7d15SFabiano Rosas } MultiFDPayloadType;
97addd7d15SFabiano Rosas 
98addd7d15SFabiano Rosas typedef union MultiFDPayload {
99addd7d15SFabiano Rosas     MultiFDPages_t ram;
100addd7d15SFabiano Rosas } MultiFDPayload;
101addd7d15SFabiano Rosas 
102addd7d15SFabiano Rosas struct MultiFDSendData {
103addd7d15SFabiano Rosas     MultiFDPayloadType type;
104addd7d15SFabiano Rosas     MultiFDPayload u;
105addd7d15SFabiano Rosas };
106addd7d15SFabiano Rosas 
multifd_payload_empty(MultiFDSendData * data)107addd7d15SFabiano Rosas static inline bool multifd_payload_empty(MultiFDSendData *data)
108addd7d15SFabiano Rosas {
109addd7d15SFabiano Rosas     return data->type == MULTIFD_PAYLOAD_NONE;
110addd7d15SFabiano Rosas }
111addd7d15SFabiano Rosas 
multifd_set_payload_type(MultiFDSendData * data,MultiFDPayloadType type)112addd7d15SFabiano Rosas static inline void multifd_set_payload_type(MultiFDSendData *data,
113addd7d15SFabiano Rosas                                             MultiFDPayloadType type)
114addd7d15SFabiano Rosas {
115addd7d15SFabiano Rosas     data->type = type;
116addd7d15SFabiano Rosas }
117addd7d15SFabiano Rosas 
118d32ca5adSJuan Quintela typedef struct {
1194a8f19c9SJuan Quintela     /* Fields are only written at creating/deletion time */
1204a8f19c9SJuan Quintela     /* No lock required for them, they are read only */
1214a8f19c9SJuan Quintela 
122d32ca5adSJuan Quintela     /* channel number */
123d32ca5adSJuan Quintela     uint8_t id;
124d32ca5adSJuan Quintela     /* channel thread name */
125d32ca5adSJuan Quintela     char *name;
126d32ca5adSJuan Quintela     /* channel thread id */
127d32ca5adSJuan Quintela     QemuThread thread;
128a2a63c4aSFabiano Rosas     bool thread_created;
129e1921f10SFabiano Rosas     QemuThread tls_thread;
130e1921f10SFabiano Rosas     bool tls_thread_created;
131d32ca5adSJuan Quintela     /* communication channel */
132d32ca5adSJuan Quintela     QIOChannel *c;
1334a8f19c9SJuan Quintela     /* packet allocated len */
1344a8f19c9SJuan Quintela     uint32_t packet_len;
1354a8f19c9SJuan Quintela     /* multifd flags for sending ram */
1364a8f19c9SJuan Quintela     int write_flags;
1374a8f19c9SJuan Quintela 
138d32ca5adSJuan Quintela     /* sem where to wait for more work */
139d32ca5adSJuan Quintela     QemuSemaphore sem;
1404a8f19c9SJuan Quintela     /* syncs main thread and channels */
1414a8f19c9SJuan Quintela     QemuSemaphore sem_sync;
1424a8f19c9SJuan Quintela 
143d32ca5adSJuan Quintela     /* multifd flags for each packet */
144d32ca5adSJuan Quintela     uint32_t flags;
145f5f48a78SPeter Xu     /*
146f5f48a78SPeter Xu      * The sender thread has work to do if either of below boolean is set.
147f5f48a78SPeter Xu      *
148f5f48a78SPeter Xu      * @pending_job:  a job is pending
149f5f48a78SPeter Xu      * @pending_sync: a sync request is pending
150f5f48a78SPeter Xu      *
151f5f48a78SPeter Xu      * For both of these fields, they're only set by the requesters, and
152f5f48a78SPeter Xu      * cleared by the multifd sender threads.
153f5f48a78SPeter Xu      */
154f5f48a78SPeter Xu     bool pending_job;
155f5f48a78SPeter Xu     bool pending_sync;
1569f0e1089SFabiano Rosas     MultiFDSendData *data;
1574a8f19c9SJuan Quintela 
1584a8f19c9SJuan Quintela     /* thread local variables. No locking required */
1594a8f19c9SJuan Quintela 
1604a8f19c9SJuan Quintela     /* pointer to the packet */
1614a8f19c9SJuan Quintela     MultiFDPacket_t *packet;
1624a8f19c9SJuan Quintela     /* size of the next packet that contains pages */
1634a8f19c9SJuan Quintela     uint32_t next_packet_size;
164d32ca5adSJuan Quintela     /* packets sent through this channel */
16505b7ec18SPeter Xu     uint64_t packets_sent;
166226468baSJuan Quintela     /* buffers to send */
167226468baSJuan Quintela     struct iovec *iov;
168226468baSJuan Quintela     /* number of iovs used */
169226468baSJuan Quintela     uint32_t iovs_num;
170ab7cbb0bSJuan Quintela     /* used for compression methods */
171402dd7acSFabiano Rosas     void *compress_data;
172d32ca5adSJuan Quintela }  MultiFDSendParams;
173d32ca5adSJuan Quintela 
174d32ca5adSJuan Quintela typedef struct {
1754a8f19c9SJuan Quintela     /* Fields are only written at creating/deletion time */
1764a8f19c9SJuan Quintela     /* No lock required for them, they are read only */
1774a8f19c9SJuan Quintela 
178d32ca5adSJuan Quintela     /* channel number */
179d32ca5adSJuan Quintela     uint8_t id;
180d32ca5adSJuan Quintela     /* channel thread name */
181d32ca5adSJuan Quintela     char *name;
182d32ca5adSJuan Quintela     /* channel thread id */
183d32ca5adSJuan Quintela     QemuThread thread;
184a2a63c4aSFabiano Rosas     bool thread_created;
185d32ca5adSJuan Quintela     /* communication channel */
186d32ca5adSJuan Quintela     QIOChannel *c;
1874a8f19c9SJuan Quintela     /* packet allocated len */
1884a8f19c9SJuan Quintela     uint32_t packet_len;
1894a8f19c9SJuan Quintela 
1904a8f19c9SJuan Quintela     /* syncs main thread and channels */
1914a8f19c9SJuan Quintela     QemuSemaphore sem_sync;
192d117ed06SFabiano Rosas     /* sem where to wait for more work */
193d117ed06SFabiano Rosas     QemuSemaphore sem;
1944a8f19c9SJuan Quintela 
195d32ca5adSJuan Quintela     /* this mutex protects the following parameters */
196d32ca5adSJuan Quintela     QemuMutex mutex;
197d32ca5adSJuan Quintela     /* should this thread finish */
198d32ca5adSJuan Quintela     bool quit;
199d32ca5adSJuan Quintela     /* multifd flags for each packet */
200d32ca5adSJuan Quintela     uint32_t flags;
201d32ca5adSJuan Quintela     /* global number of generated multifd packets */
202d32ca5adSJuan Quintela     uint64_t packet_num;
203d117ed06SFabiano Rosas     int pending_job;
204d117ed06SFabiano Rosas     MultiFDRecvData *data;
2054a8f19c9SJuan Quintela 
2064a8f19c9SJuan Quintela     /* thread local variables. No locking required */
2074a8f19c9SJuan Quintela 
2084a8f19c9SJuan Quintela     /* pointer to the packet */
2094a8f19c9SJuan Quintela     MultiFDPacket_t *packet;
210d32ca5adSJuan Quintela     /* size of the next packet that contains pages */
211d32ca5adSJuan Quintela     uint32_t next_packet_size;
21205b7ec18SPeter Xu     /* packets received through this channel */
21305b7ec18SPeter Xu     uint64_t packets_recved;
2145d1d1fcfSLukas Straub     /* ramblock */
2155d1d1fcfSLukas Straub     RAMBlock *block;
2164a8f19c9SJuan Quintela     /* ramblock host address */
2174a8f19c9SJuan Quintela     uint8_t *host;
218226468baSJuan Quintela     /* buffers to recv */
219226468baSJuan Quintela     struct iovec *iov;
220cf2d4aa8SJuan Quintela     /* Pages that are not zero */
221cf2d4aa8SJuan Quintela     ram_addr_t *normal;
222cf2d4aa8SJuan Quintela     /* num of non zero pages */
223cf2d4aa8SJuan Quintela     uint32_t normal_num;
224303e6f54SHao Xiang     /* Pages that are zero */
225303e6f54SHao Xiang     ram_addr_t *zero;
226303e6f54SHao Xiang     /* num of zero pages */
227303e6f54SHao Xiang     uint32_t zero_num;
228ab7cbb0bSJuan Quintela     /* used for de-compression methods */
229402dd7acSFabiano Rosas     void *compress_data;
230d32ca5adSJuan Quintela } MultiFDRecvParams;
231d32ca5adSJuan Quintela 
232ab7cbb0bSJuan Quintela typedef struct {
23362e1af13SFabiano Rosas     /*
23462e1af13SFabiano Rosas      * The send_setup, send_cleanup, send_prepare are only called on
23562e1af13SFabiano Rosas      * the QEMU instance at the migration source.
23662e1af13SFabiano Rosas      */
23762e1af13SFabiano Rosas 
23862e1af13SFabiano Rosas     /*
23962e1af13SFabiano Rosas      * Setup for sending side. Called once per channel during channel
24062e1af13SFabiano Rosas      * setup phase.
24162e1af13SFabiano Rosas      *
24262e1af13SFabiano Rosas      * Must allocate p->iov. If packets are in use (default), one
24362e1af13SFabiano Rosas      * extra iovec must be allocated for the packet header. Any memory
24462e1af13SFabiano Rosas      * allocated in this hook must be released at send_cleanup.
24562e1af13SFabiano Rosas      *
24662e1af13SFabiano Rosas      * p->write_flags may be used for passing flags to the QIOChannel.
24762e1af13SFabiano Rosas      *
24862e1af13SFabiano Rosas      * p->compression_data may be used by compression methods to store
24962e1af13SFabiano Rosas      * compression data.
25062e1af13SFabiano Rosas      */
251ab7cbb0bSJuan Quintela     int (*send_setup)(MultiFDSendParams *p, Error **errp);
25262e1af13SFabiano Rosas 
25362e1af13SFabiano Rosas     /*
25462e1af13SFabiano Rosas      * Cleanup for sending side. Called once per channel during
25562e1af13SFabiano Rosas      * channel cleanup phase.
25662e1af13SFabiano Rosas      */
257ab7cbb0bSJuan Quintela     void (*send_cleanup)(MultiFDSendParams *p, Error **errp);
25862e1af13SFabiano Rosas 
25962e1af13SFabiano Rosas     /*
26062e1af13SFabiano Rosas      * Prepare the send packet. Called as a result of multifd_send()
26162e1af13SFabiano Rosas      * on the client side, with p pointing to the MultiFDSendParams of
26262e1af13SFabiano Rosas      * a channel that is currently idle.
26362e1af13SFabiano Rosas      *
26462e1af13SFabiano Rosas      * Must populate p->iov with the data to be sent, increment
26562e1af13SFabiano Rosas      * p->iovs_num to match the amount of iovecs used and set
26662e1af13SFabiano Rosas      * p->next_packet_size with the amount of data currently present
26762e1af13SFabiano Rosas      * in p->iov.
26862e1af13SFabiano Rosas      *
26962e1af13SFabiano Rosas      * Must indicate whether this is a compression packet by setting
27062e1af13SFabiano Rosas      * p->flags.
27162e1af13SFabiano Rosas      *
27262e1af13SFabiano Rosas      * As a last step, if packets are in use (default), must prepare
27362e1af13SFabiano Rosas      * the packet by calling multifd_send_fill_packet().
27462e1af13SFabiano Rosas      */
27502fb8104SJuan Quintela     int (*send_prepare)(MultiFDSendParams *p, Error **errp);
27662e1af13SFabiano Rosas 
27762e1af13SFabiano Rosas     /*
27862e1af13SFabiano Rosas      * The recv_setup, recv_cleanup, recv are only called on the QEMU
27962e1af13SFabiano Rosas      * instance at the migration destination.
28062e1af13SFabiano Rosas      */
28162e1af13SFabiano Rosas 
28262e1af13SFabiano Rosas     /*
28362e1af13SFabiano Rosas      * Setup for receiving side. Called once per channel during
28462e1af13SFabiano Rosas      * channel setup phase. May be empty.
28562e1af13SFabiano Rosas      *
28662e1af13SFabiano Rosas      * May allocate data structures for the receiving of data. May use
28762e1af13SFabiano Rosas      * p->iov. Compression methods may use p->compress_data.
28862e1af13SFabiano Rosas      */
289ab7cbb0bSJuan Quintela     int (*recv_setup)(MultiFDRecvParams *p, Error **errp);
29062e1af13SFabiano Rosas 
29162e1af13SFabiano Rosas     /*
29262e1af13SFabiano Rosas      * Cleanup for receiving side. Called once per channel during
29362e1af13SFabiano Rosas      * channel cleanup phase. May be empty.
29462e1af13SFabiano Rosas      */
295ab7cbb0bSJuan Quintela     void (*recv_cleanup)(MultiFDRecvParams *p);
29662e1af13SFabiano Rosas 
29762e1af13SFabiano Rosas     /*
29862e1af13SFabiano Rosas      * Data receive method. Called as a result of multifd_recv() on
29962e1af13SFabiano Rosas      * the client side, with p pointing to the MultiFDRecvParams of a
30062e1af13SFabiano Rosas      * channel that is currently idle. Only called if there is data
30162e1af13SFabiano Rosas      * available to receive.
30262e1af13SFabiano Rosas      *
30362e1af13SFabiano Rosas      * Must validate p->flags according to what was set at
30462e1af13SFabiano Rosas      * send_prepare.
30562e1af13SFabiano Rosas      *
30662e1af13SFabiano Rosas      * Must read the data from the QIOChannel p->c.
30762e1af13SFabiano Rosas      */
3089db19125SFabiano Rosas     int (*recv)(MultiFDRecvParams *p, Error **errp);
309ab7cbb0bSJuan Quintela } MultiFDMethods;
310ab7cbb0bSJuan Quintela 
311308d165cSFabiano Rosas void multifd_register_ops(int method, const MultiFDMethods *ops);
31225a1f878SPeter Xu void multifd_send_fill_packet(MultiFDSendParams *p);
313303e6f54SHao Xiang bool multifd_send_prepare_common(MultiFDSendParams *p);
314303e6f54SHao Xiang void multifd_send_zero_page_detect(MultiFDSendParams *p);
315303e6f54SHao Xiang void multifd_recv_zero_page_process(MultiFDRecvParams *p);
3167ec2c2b3SJuan Quintela 
multifd_send_prepare_header(MultiFDSendParams * p)317452b2057SPeter Xu static inline void multifd_send_prepare_header(MultiFDSendParams *p)
318452b2057SPeter Xu {
319452b2057SPeter Xu     p->iov[0].iov_len = p->packet_len;
320452b2057SPeter Xu     p->iov[0].iov_base = p->packet;
321452b2057SPeter Xu     p->iovs_num++;
322452b2057SPeter Xu }
323452b2057SPeter Xu 
324b7b03eb6SFabiano Rosas void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc);
32540c9471eSFabiano Rosas bool multifd_send(MultiFDSendData **send_data);
32640c9471eSFabiano Rosas MultiFDSendData *multifd_send_data_alloc(void);
327452b2057SPeter Xu 
multifd_ram_page_size(void)32890fa121cSFabiano Rosas static inline uint32_t multifd_ram_page_size(void)
32990fa121cSFabiano Rosas {
33090fa121cSFabiano Rosas     return qemu_target_page_size();
33190fa121cSFabiano Rosas }
33290fa121cSFabiano Rosas 
multifd_ram_page_count(void)33390fa121cSFabiano Rosas static inline uint32_t multifd_ram_page_count(void)
33490fa121cSFabiano Rosas {
33590fa121cSFabiano Rosas     return MULTIFD_PACKET_SIZE / qemu_target_page_size();
33690fa121cSFabiano Rosas }
337a71ef5c7SFabiano Rosas 
338a71ef5c7SFabiano Rosas void multifd_ram_save_setup(void);
339a71ef5c7SFabiano Rosas void multifd_ram_save_cleanup(void);
340a0c78d81SFabiano Rosas int multifd_ram_flush_and_sync(void);
34140c9471eSFabiano Rosas size_t multifd_ram_payload_size(void);
34240c9471eSFabiano Rosas void multifd_ram_fill_packet(MultiFDSendParams *p);
34340c9471eSFabiano Rosas int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp);
344d32ca5adSJuan Quintela #endif
345