xref: /openbmc/qemu/migration/multifd.h (revision be06edd3edac94406a5b121c151d72fa493a0ae1)
1 /*
2  * Multifd common functions
3  *
4  * Copyright (c) 2019-2020 Red Hat Inc
5  *
6  * Authors:
7  *  Juan Quintela <quintela@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #ifndef QEMU_MIGRATION_MULTIFD_H
14 #define QEMU_MIGRATION_MULTIFD_H
15 
16 #include "exec/target_page.h"
17 #include "ram.h"
18 
19 typedef struct MultiFDRecvData MultiFDRecvData;
20 typedef struct MultiFDSendData MultiFDSendData;
21 
22 typedef enum {
23     /* No sync request */
24     MULTIFD_SYNC_NONE = 0,
25     /* Sync locally on the sender threads without pushing messages */
26     MULTIFD_SYNC_LOCAL,
27     /*
28      * Sync not only on the sender threads, but also push MULTIFD_FLAG_SYNC
29      * message to the wire for each iochannel (which is for a remote sync).
30      *
31      * When remote sync is used, need to be paired with a follow up
32      * RAM_SAVE_FLAG_EOS / RAM_SAVE_FLAG_MULTIFD_FLUSH message on the main
33      * channel.
34      */
35     MULTIFD_SYNC_ALL,
36 } MultiFDSyncReq;
37 
38 bool multifd_send_setup(void);
39 void multifd_send_shutdown(void);
40 void multifd_send_channel_created(void);
41 int multifd_recv_setup(Error **errp);
42 void multifd_recv_cleanup(void);
43 void multifd_recv_shutdown(void);
44 bool multifd_recv_all_channels_created(void);
45 void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
46 void multifd_recv_sync_main(void);
47 int multifd_send_sync_main(MultiFDSyncReq req);
48 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset);
49 bool multifd_recv(void);
50 MultiFDRecvData *multifd_get_recv_data(void);
51 
52 /* Multiple fd's */
53 
54 #define MULTIFD_MAGIC 0x11223344U
55 #define MULTIFD_VERSION 1
56 
57 /* Multifd Compression flags */
58 #define MULTIFD_FLAG_SYNC (1 << 0)
59 
60 /* We reserve 5 bits for compression methods */
61 #define MULTIFD_FLAG_COMPRESSION_MASK (0x1f << 1)
62 /* we need to be compatible. Before compression value was 0 */
63 #define MULTIFD_FLAG_NOCOMP (0 << 1)
64 #define MULTIFD_FLAG_ZLIB (1 << 1)
65 #define MULTIFD_FLAG_ZSTD (2 << 1)
66 #define MULTIFD_FLAG_QPL (4 << 1)
67 #define MULTIFD_FLAG_UADK (8 << 1)
68 #define MULTIFD_FLAG_QATZIP (16 << 1)
69 
70 /*
71  * If set it means that this packet contains device state
72  * (MultiFDPacketDeviceState_t), not RAM data (MultiFDPacket_t).
73  */
74 #define MULTIFD_FLAG_DEVICE_STATE (32 << 1)
75 
76 /* This value needs to be a multiple of qemu_target_page_size() */
77 #define MULTIFD_PACKET_SIZE (512 * 1024)
78 
79 typedef struct {
80     uint32_t magic;
81     uint32_t version;
82     uint32_t flags;
83 } __attribute__((packed)) MultiFDPacketHdr_t;
84 
85 typedef struct {
86     MultiFDPacketHdr_t hdr;
87 
88     /* maximum number of allocated pages */
89     uint32_t pages_alloc;
90     /* non zero pages */
91     uint32_t normal_pages;
92     /* size of the next packet that contains pages */
93     uint32_t next_packet_size;
94     uint64_t packet_num;
95     /* zero pages */
96     uint32_t zero_pages;
97     uint32_t unused32[1];    /* Reserved for future use */
98     uint64_t unused64[3];    /* Reserved for future use */
99     char ramblock[256];
100     /*
101      * This array contains the pointers to:
102      *  - normal pages (initial normal_pages entries)
103      *  - zero pages (following zero_pages entries)
104      */
105     uint64_t offset[];
106 } __attribute__((packed)) MultiFDPacket_t;
107 
108 typedef struct {
109     MultiFDPacketHdr_t hdr;
110 
111     char idstr[256];
112     uint32_t instance_id;
113 
114     /* size of the next packet that contains the actual data */
115     uint32_t next_packet_size;
116 } __attribute__((packed)) MultiFDPacketDeviceState_t;
117 
118 typedef struct {
119     /* number of used pages */
120     uint32_t num;
121     /* number of normal pages */
122     uint32_t normal_num;
123     /*
124      * Pointer to the ramblock.  NOTE: it's caller's responsibility to make
125      * sure the pointer is always valid!
126      */
127     RAMBlock *block;
128     /* offset array of each page, managed by multifd */
129     ram_addr_t *offset;
130 } MultiFDPages_t;
131 
132 struct MultiFDRecvData {
133     void *opaque;
134     size_t size;
135     /* for preadv */
136     off_t file_offset;
137 };
138 
139 typedef struct {
140     char *idstr;
141     uint32_t instance_id;
142     char *buf;
143     size_t buf_len;
144 } MultiFDDeviceState_t;
145 
146 typedef enum {
147     MULTIFD_PAYLOAD_NONE,
148     MULTIFD_PAYLOAD_RAM,
149     MULTIFD_PAYLOAD_DEVICE_STATE,
150 } MultiFDPayloadType;
151 
152 typedef struct MultiFDPayload {
153     MultiFDPages_t ram;
154     MultiFDDeviceState_t device_state;
155 } MultiFDPayload;
156 
157 struct MultiFDSendData {
158     MultiFDPayloadType type;
159     MultiFDPayload u;
160 };
161 
162 static inline bool multifd_payload_empty(MultiFDSendData *data)
163 {
164     return data->type == MULTIFD_PAYLOAD_NONE;
165 }
166 
167 static inline bool multifd_payload_device_state(MultiFDSendData *data)
168 {
169     return data->type == MULTIFD_PAYLOAD_DEVICE_STATE;
170 }
171 
172 static inline void multifd_set_payload_type(MultiFDSendData *data,
173                                             MultiFDPayloadType type)
174 {
175     assert(multifd_payload_empty(data));
176     assert(type != MULTIFD_PAYLOAD_NONE);
177 
178     data->type = type;
179 }
180 
181 typedef struct {
182     /* Fields are only written at creating/deletion time */
183     /* No lock required for them, they are read only */
184 
185     /* channel number */
186     uint8_t id;
187     /* channel thread name */
188     char *name;
189     /* channel thread id */
190     QemuThread thread;
191     bool thread_created;
192     QemuThread tls_thread;
193     bool tls_thread_created;
194     /* communication channel */
195     QIOChannel *c;
196     /* packet allocated len */
197     uint32_t packet_len;
198     /* multifd flags for sending ram */
199     int write_flags;
200 
201     /* sem where to wait for more work */
202     QemuSemaphore sem;
203     /* syncs main thread and channels */
204     QemuSemaphore sem_sync;
205 
206     /* multifd flags for each packet */
207     uint32_t flags;
208     /*
209      * The sender thread has work to do if either of below field is set.
210      *
211      * @pending_job:  a job is pending
212      * @pending_sync: a sync request is pending
213      *
214      * For both of these fields, they're only set by the requesters, and
215      * cleared by the multifd sender threads.
216      */
217     bool pending_job;
218     MultiFDSyncReq pending_sync;
219 
220     MultiFDSendData *data;
221 
222     /* thread local variables. No locking required */
223 
224     /* pointers to the possible packet types */
225     MultiFDPacket_t *packet;
226     MultiFDPacketDeviceState_t *packet_device_state;
227     /* size of the next packet that contains pages */
228     uint32_t next_packet_size;
229     /* packets sent through this channel */
230     uint64_t packets_sent;
231     /* buffers to send */
232     struct iovec *iov;
233     /* number of iovs used */
234     uint32_t iovs_num;
235     /* used for compression methods */
236     void *compress_data;
237 }  MultiFDSendParams;
238 
239 typedef struct {
240     /* Fields are only written at creating/deletion time */
241     /* No lock required for them, they are read only */
242 
243     /* channel number */
244     uint8_t id;
245     /* channel thread name */
246     char *name;
247     /* channel thread id */
248     QemuThread thread;
249     bool thread_created;
250     /* communication channel */
251     QIOChannel *c;
252     /* packet allocated len */
253     uint32_t packet_len;
254 
255     /* syncs main thread and channels */
256     QemuSemaphore sem_sync;
257     /* sem where to wait for more work */
258     QemuSemaphore sem;
259 
260     /* this mutex protects the following parameters */
261     QemuMutex mutex;
262     /* should this thread finish */
263     bool quit;
264     /* multifd flags for each packet */
265     uint32_t flags;
266     /* global number of generated multifd packets */
267     uint64_t packet_num;
268     int pending_job;
269     MultiFDRecvData *data;
270 
271     /* thread local variables. No locking required */
272 
273     /* pointers to the possible packet types */
274     MultiFDPacket_t *packet;
275     MultiFDPacketDeviceState_t *packet_dev_state;
276     /* size of the next packet that contains pages */
277     uint32_t next_packet_size;
278     /* packets received through this channel */
279     uint64_t packets_recved;
280     /* ramblock */
281     RAMBlock *block;
282     /* ramblock host address */
283     uint8_t *host;
284     /* buffers to recv */
285     struct iovec *iov;
286     /* Pages that are not zero */
287     ram_addr_t *normal;
288     /* num of non zero pages */
289     uint32_t normal_num;
290     /* Pages that are zero */
291     ram_addr_t *zero;
292     /* num of zero pages */
293     uint32_t zero_num;
294     /* used for de-compression methods */
295     void *compress_data;
296     /* Flags for the QIOChannel */
297     int read_flags;
298 } MultiFDRecvParams;
299 
300 typedef struct {
301     /*
302      * The send_setup, send_cleanup, send_prepare are only called on
303      * the QEMU instance at the migration source.
304      */
305 
306     /*
307      * Setup for sending side. Called once per channel during channel
308      * setup phase.
309      *
310      * Must allocate p->iov. If packets are in use (default), one
311      * extra iovec must be allocated for the packet header. Any memory
312      * allocated in this hook must be released at send_cleanup.
313      *
314      * p->write_flags may be used for passing flags to the QIOChannel.
315      *
316      * p->compression_data may be used by compression methods to store
317      * compression data.
318      */
319     int (*send_setup)(MultiFDSendParams *p, Error **errp);
320 
321     /*
322      * Cleanup for sending side. Called once per channel during
323      * channel cleanup phase.
324      */
325     void (*send_cleanup)(MultiFDSendParams *p, Error **errp);
326 
327     /*
328      * Prepare the send packet. Called as a result of multifd_send()
329      * on the client side, with p pointing to the MultiFDSendParams of
330      * a channel that is currently idle.
331      *
332      * Must populate p->iov with the data to be sent, increment
333      * p->iovs_num to match the amount of iovecs used and set
334      * p->next_packet_size with the amount of data currently present
335      * in p->iov.
336      *
337      * Must indicate whether this is a compression packet by setting
338      * p->flags.
339      *
340      * As a last step, if packets are in use (default), must prepare
341      * the packet by calling multifd_send_fill_packet().
342      */
343     int (*send_prepare)(MultiFDSendParams *p, Error **errp);
344 
345     /*
346      * The recv_setup, recv_cleanup, recv are only called on the QEMU
347      * instance at the migration destination.
348      */
349 
350     /*
351      * Setup for receiving side. Called once per channel during
352      * channel setup phase. May be empty.
353      *
354      * May allocate data structures for the receiving of data. May use
355      * p->iov. Compression methods may use p->compress_data.
356      */
357     int (*recv_setup)(MultiFDRecvParams *p, Error **errp);
358 
359     /*
360      * Cleanup for receiving side. Called once per channel during
361      * channel cleanup phase. May be empty.
362      */
363     void (*recv_cleanup)(MultiFDRecvParams *p);
364 
365     /*
366      * Data receive method. Called as a result of multifd_recv() on
367      * the client side, with p pointing to the MultiFDRecvParams of a
368      * channel that is currently idle. Only called if there is data
369      * available to receive.
370      *
371      * Must validate p->flags according to what was set at
372      * send_prepare.
373      *
374      * Must read the data from the QIOChannel p->c.
375      */
376     int (*recv)(MultiFDRecvParams *p, Error **errp);
377 } MultiFDMethods;
378 
379 void multifd_register_ops(int method, const MultiFDMethods *ops);
380 void multifd_send_fill_packet(MultiFDSendParams *p);
381 bool multifd_send_prepare_common(MultiFDSendParams *p);
382 void multifd_send_zero_page_detect(MultiFDSendParams *p);
383 void multifd_recv_zero_page_process(MultiFDRecvParams *p);
384 
385 void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc);
386 bool multifd_send(MultiFDSendData **send_data);
387 MultiFDSendData *multifd_send_data_alloc(void);
388 void multifd_send_data_clear(MultiFDSendData *data);
389 void multifd_send_data_free(MultiFDSendData *data);
390 
391 static inline uint32_t multifd_ram_page_size(void)
392 {
393     return qemu_target_page_size();
394 }
395 
396 static inline uint32_t multifd_ram_page_count(void)
397 {
398     return MULTIFD_PACKET_SIZE / qemu_target_page_size();
399 }
400 
401 void multifd_ram_save_setup(void);
402 void multifd_ram_save_cleanup(void);
403 int multifd_ram_flush_and_sync(QEMUFile *f);
404 bool multifd_ram_sync_per_round(void);
405 bool multifd_ram_sync_per_section(void);
406 void multifd_ram_payload_alloc(MultiFDPages_t *pages);
407 void multifd_ram_payload_free(MultiFDPages_t *pages);
408 void multifd_ram_fill_packet(MultiFDSendParams *p);
409 int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp);
410 
411 void multifd_send_data_clear_device_state(MultiFDDeviceState_t *device_state);
412 
413 void multifd_device_state_send_setup(void);
414 void multifd_device_state_send_cleanup(void);
415 
416 void multifd_device_state_send_prepare(MultiFDSendParams *p);
417 
418 #endif
419