1 /*
2 * Multifd common functions
3 *
4 * Copyright (c) 2019-2020 Red Hat Inc
5 *
6 * Authors:
7 * Juan Quintela <quintela@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13 #ifndef QEMU_MIGRATION_MULTIFD_H
14 #define QEMU_MIGRATION_MULTIFD_H
15
16 #include "exec/target_page.h"
17 #include "ram.h"
18
19 typedef struct MultiFDRecvData MultiFDRecvData;
20 typedef struct MultiFDSendData MultiFDSendData;
21
22 typedef enum {
23 /* No sync request */
24 MULTIFD_SYNC_NONE = 0,
25 /* Sync locally on the sender threads without pushing messages */
26 MULTIFD_SYNC_LOCAL,
27 /*
28 * Sync not only on the sender threads, but also push MULTIFD_FLAG_SYNC
29 * message to the wire for each iochannel (which is for a remote sync).
30 *
31 * When remote sync is used, need to be paired with a follow up
32 * RAM_SAVE_FLAG_EOS / RAM_SAVE_FLAG_MULTIFD_FLUSH message on the main
33 * channel.
34 */
35 MULTIFD_SYNC_ALL,
36 } MultiFDSyncReq;
37
38 bool multifd_send_setup(void);
39 void multifd_send_shutdown(void);
40 void multifd_send_channel_created(void);
41 int multifd_recv_setup(Error **errp);
42 void multifd_recv_cleanup(void);
43 void multifd_recv_shutdown(void);
44 bool multifd_recv_all_channels_created(void);
45 void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
46 void multifd_recv_sync_main(void);
47 int multifd_send_sync_main(MultiFDSyncReq req);
48 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset);
49 bool multifd_recv(void);
50 MultiFDRecvData *multifd_get_recv_data(void);
51
52 /* Multifd Compression flags */
53 #define MULTIFD_FLAG_SYNC (1 << 0)
54
55 /* We reserve 5 bits for compression methods */
56 #define MULTIFD_FLAG_COMPRESSION_MASK (0x1f << 1)
57 /* we need to be compatible. Before compression value was 0 */
58 #define MULTIFD_FLAG_NOCOMP (0 << 1)
59 #define MULTIFD_FLAG_ZLIB (1 << 1)
60 #define MULTIFD_FLAG_ZSTD (2 << 1)
61 #define MULTIFD_FLAG_QPL (4 << 1)
62 #define MULTIFD_FLAG_UADK (8 << 1)
63 #define MULTIFD_FLAG_QATZIP (16 << 1)
64
65 /*
66 * If set it means that this packet contains device state
67 * (MultiFDPacketDeviceState_t), not RAM data (MultiFDPacket_t).
68 */
69 #define MULTIFD_FLAG_DEVICE_STATE (32 << 1)
70
71 /* This value needs to be a multiple of qemu_target_page_size() */
72 #define MULTIFD_PACKET_SIZE (512 * 1024)
73
74 typedef struct {
75 uint32_t magic;
76 uint32_t version;
77 uint32_t flags;
78 } __attribute__((packed)) MultiFDPacketHdr_t;
79
80 typedef struct {
81 MultiFDPacketHdr_t hdr;
82
83 /* maximum number of allocated pages */
84 uint32_t pages_alloc;
85 /* non zero pages */
86 uint32_t normal_pages;
87 /* size of the next packet that contains pages */
88 uint32_t next_packet_size;
89 uint64_t packet_num;
90 /* zero pages */
91 uint32_t zero_pages;
92 uint32_t unused32[1]; /* Reserved for future use */
93 uint64_t unused64[3]; /* Reserved for future use */
94 char ramblock[256];
95 /*
96 * This array contains the pointers to:
97 * - normal pages (initial normal_pages entries)
98 * - zero pages (following zero_pages entries)
99 */
100 uint64_t offset[];
101 } __attribute__((packed)) MultiFDPacket_t;
102
103 typedef struct {
104 MultiFDPacketHdr_t hdr;
105
106 char idstr[256];
107 uint32_t instance_id;
108
109 /* size of the next packet that contains the actual data */
110 uint32_t next_packet_size;
111 } __attribute__((packed)) MultiFDPacketDeviceState_t;
112
113 typedef struct {
114 /* number of used pages */
115 uint32_t num;
116 /* number of normal pages */
117 uint32_t normal_num;
118 /*
119 * Pointer to the ramblock. NOTE: it's caller's responsibility to make
120 * sure the pointer is always valid!
121 */
122 RAMBlock *block;
123 /* offset array of each page, managed by multifd */
124 ram_addr_t *offset;
125 } MultiFDPages_t;
126
127 struct MultiFDRecvData {
128 void *opaque;
129 size_t size;
130 /* for preadv */
131 off_t file_offset;
132 };
133
134 typedef struct {
135 char *idstr;
136 uint32_t instance_id;
137 char *buf;
138 size_t buf_len;
139 } MultiFDDeviceState_t;
140
141 typedef enum {
142 MULTIFD_PAYLOAD_NONE,
143 MULTIFD_PAYLOAD_RAM,
144 MULTIFD_PAYLOAD_DEVICE_STATE,
145 } MultiFDPayloadType;
146
147 typedef struct MultiFDPayload {
148 MultiFDPages_t ram;
149 MultiFDDeviceState_t device_state;
150 } MultiFDPayload;
151
152 struct MultiFDSendData {
153 MultiFDPayloadType type;
154 MultiFDPayload u;
155 };
156
multifd_payload_empty(MultiFDSendData * data)157 static inline bool multifd_payload_empty(MultiFDSendData *data)
158 {
159 return data->type == MULTIFD_PAYLOAD_NONE;
160 }
161
multifd_payload_device_state(MultiFDSendData * data)162 static inline bool multifd_payload_device_state(MultiFDSendData *data)
163 {
164 return data->type == MULTIFD_PAYLOAD_DEVICE_STATE;
165 }
166
multifd_set_payload_type(MultiFDSendData * data,MultiFDPayloadType type)167 static inline void multifd_set_payload_type(MultiFDSendData *data,
168 MultiFDPayloadType type)
169 {
170 assert(multifd_payload_empty(data));
171 assert(type != MULTIFD_PAYLOAD_NONE);
172
173 data->type = type;
174 }
175
176 typedef struct {
177 /* Fields are only written at creating/deletion time */
178 /* No lock required for them, they are read only */
179
180 /* channel number */
181 uint8_t id;
182 /* channel thread name */
183 char *name;
184 /* channel thread id */
185 QemuThread thread;
186 bool thread_created;
187 QemuThread tls_thread;
188 bool tls_thread_created;
189 /* communication channel */
190 QIOChannel *c;
191 /* packet allocated len */
192 uint32_t packet_len;
193 /* multifd flags for sending ram */
194 int write_flags;
195
196 /* sem where to wait for more work */
197 QemuSemaphore sem;
198 /* syncs main thread and channels */
199 QemuSemaphore sem_sync;
200
201 /* multifd flags for each packet */
202 uint32_t flags;
203 /*
204 * The sender thread has work to do if either of below field is set.
205 *
206 * @pending_job: a job is pending
207 * @pending_sync: a sync request is pending
208 *
209 * For both of these fields, they're only set by the requesters, and
210 * cleared by the multifd sender threads.
211 */
212 bool pending_job;
213 MultiFDSyncReq pending_sync;
214
215 MultiFDSendData *data;
216
217 /* thread local variables. No locking required */
218
219 /* pointers to the possible packet types */
220 MultiFDPacket_t *packet;
221 MultiFDPacketDeviceState_t *packet_device_state;
222 /* size of the next packet that contains pages */
223 uint32_t next_packet_size;
224 /* packets sent through this channel */
225 uint64_t packets_sent;
226 /* buffers to send */
227 struct iovec *iov;
228 /* number of iovs used */
229 uint32_t iovs_num;
230 /* used for compression methods */
231 void *compress_data;
232 } MultiFDSendParams;
233
234 typedef struct {
235 /* Fields are only written at creating/deletion time */
236 /* No lock required for them, they are read only */
237
238 /* channel number */
239 uint8_t id;
240 /* channel thread name */
241 char *name;
242 /* channel thread id */
243 QemuThread thread;
244 bool thread_created;
245 /* communication channel */
246 QIOChannel *c;
247 /* packet allocated len */
248 uint32_t packet_len;
249
250 /* syncs main thread and channels */
251 QemuSemaphore sem_sync;
252 /* sem where to wait for more work */
253 QemuSemaphore sem;
254
255 /* this mutex protects the following parameters */
256 QemuMutex mutex;
257 /* should this thread finish */
258 bool quit;
259 /* multifd flags for each packet */
260 uint32_t flags;
261 /* global number of generated multifd packets */
262 uint64_t packet_num;
263 int pending_job;
264 MultiFDRecvData *data;
265
266 /* thread local variables. No locking required */
267
268 /* pointers to the possible packet types */
269 MultiFDPacket_t *packet;
270 MultiFDPacketDeviceState_t *packet_dev_state;
271 /* size of the next packet that contains pages */
272 uint32_t next_packet_size;
273 /* packets received through this channel */
274 uint64_t packets_recved;
275 /* ramblock */
276 RAMBlock *block;
277 /* ramblock host address */
278 uint8_t *host;
279 /* buffers to recv */
280 struct iovec *iov;
281 /* Pages that are not zero */
282 ram_addr_t *normal;
283 /* num of non zero pages */
284 uint32_t normal_num;
285 /* Pages that are zero */
286 ram_addr_t *zero;
287 /* num of zero pages */
288 uint32_t zero_num;
289 /* used for de-compression methods */
290 void *compress_data;
291 /* Flags for the QIOChannel */
292 int read_flags;
293 } MultiFDRecvParams;
294
295 typedef struct {
296 /*
297 * The send_setup, send_cleanup, send_prepare are only called on
298 * the QEMU instance at the migration source.
299 */
300
301 /*
302 * Setup for sending side. Called once per channel during channel
303 * setup phase.
304 *
305 * Must allocate p->iov. If packets are in use (default), one
306 * extra iovec must be allocated for the packet header. Any memory
307 * allocated in this hook must be released at send_cleanup.
308 *
309 * p->write_flags may be used for passing flags to the QIOChannel.
310 *
311 * p->compression_data may be used by compression methods to store
312 * compression data.
313 */
314 int (*send_setup)(MultiFDSendParams *p, Error **errp);
315
316 /*
317 * Cleanup for sending side. Called once per channel during
318 * channel cleanup phase.
319 */
320 void (*send_cleanup)(MultiFDSendParams *p, Error **errp);
321
322 /*
323 * Prepare the send packet. Called as a result of multifd_send()
324 * on the client side, with p pointing to the MultiFDSendParams of
325 * a channel that is currently idle.
326 *
327 * Must populate p->iov with the data to be sent, increment
328 * p->iovs_num to match the amount of iovecs used and set
329 * p->next_packet_size with the amount of data currently present
330 * in p->iov.
331 *
332 * Must indicate whether this is a compression packet by setting
333 * p->flags.
334 *
335 * As a last step, if packets are in use (default), must prepare
336 * the packet by calling multifd_send_fill_packet().
337 */
338 int (*send_prepare)(MultiFDSendParams *p, Error **errp);
339
340 /*
341 * The recv_setup, recv_cleanup, recv are only called on the QEMU
342 * instance at the migration destination.
343 */
344
345 /*
346 * Setup for receiving side. Called once per channel during
347 * channel setup phase. May be empty.
348 *
349 * May allocate data structures for the receiving of data. May use
350 * p->iov. Compression methods may use p->compress_data.
351 */
352 int (*recv_setup)(MultiFDRecvParams *p, Error **errp);
353
354 /*
355 * Cleanup for receiving side. Called once per channel during
356 * channel cleanup phase. May be empty.
357 */
358 void (*recv_cleanup)(MultiFDRecvParams *p);
359
360 /*
361 * Data receive method. Called as a result of multifd_recv() on
362 * the client side, with p pointing to the MultiFDRecvParams of a
363 * channel that is currently idle. Only called if there is data
364 * available to receive.
365 *
366 * Must validate p->flags according to what was set at
367 * send_prepare.
368 *
369 * Must read the data from the QIOChannel p->c.
370 */
371 int (*recv)(MultiFDRecvParams *p, Error **errp);
372 } MultiFDMethods;
373
374 void multifd_register_ops(int method, const MultiFDMethods *ops);
375 void multifd_send_fill_packet(MultiFDSendParams *p);
376 bool multifd_send_prepare_common(MultiFDSendParams *p);
377 void multifd_send_zero_page_detect(MultiFDSendParams *p);
378 void multifd_recv_zero_page_process(MultiFDRecvParams *p);
379
380 void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc);
381 bool multifd_send(MultiFDSendData **send_data);
382 MultiFDSendData *multifd_send_data_alloc(void);
383 void multifd_send_data_clear(MultiFDSendData *data);
384 void multifd_send_data_free(MultiFDSendData *data);
385
multifd_ram_page_size(void)386 static inline uint32_t multifd_ram_page_size(void)
387 {
388 return qemu_target_page_size();
389 }
390
multifd_ram_page_count(void)391 static inline uint32_t multifd_ram_page_count(void)
392 {
393 return MULTIFD_PACKET_SIZE / qemu_target_page_size();
394 }
395
396 void multifd_ram_save_setup(void);
397 void multifd_ram_save_cleanup(void);
398 int multifd_ram_flush_and_sync(QEMUFile *f);
399 bool multifd_ram_sync_per_round(void);
400 bool multifd_ram_sync_per_section(void);
401 void multifd_ram_payload_alloc(MultiFDPages_t *pages);
402 void multifd_ram_payload_free(MultiFDPages_t *pages);
403 void multifd_ram_fill_packet(MultiFDSendParams *p);
404 int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp);
405
406 void multifd_send_data_clear_device_state(MultiFDDeviceState_t *device_state);
407
408 void multifd_device_state_send_setup(void);
409 void multifd_device_state_send_cleanup(void);
410
411 void multifd_device_state_send_prepare(MultiFDSendParams *p);
412
413 #endif
414