1 /* 2 * Multifd RAM migration without compression 3 * 4 * Copyright (c) 2019-2020 Red Hat Inc 5 * 6 * Authors: 7 * Juan Quintela <quintela@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "exec/ramblock.h" 15 #include "exec/target_page.h" 16 #include "file.h" 17 #include "multifd.h" 18 #include "options.h" 19 #include "qapi/error.h" 20 #include "qemu/error-report.h" 21 #include "trace.h" 22 23 static MultiFDSendData *multifd_ram_send; 24 25 size_t multifd_ram_payload_size(void) 26 { 27 uint32_t n = multifd_ram_page_count(); 28 29 /* 30 * We keep an array of page offsets at the end of MultiFDPages_t, 31 * add space for it in the allocation. 32 */ 33 return sizeof(MultiFDPages_t) + n * sizeof(ram_addr_t); 34 } 35 36 void multifd_ram_save_setup(void) 37 { 38 multifd_ram_send = multifd_send_data_alloc(); 39 } 40 41 void multifd_ram_save_cleanup(void) 42 { 43 g_free(multifd_ram_send); 44 multifd_ram_send = NULL; 45 } 46 47 static void multifd_set_file_bitmap(MultiFDSendParams *p) 48 { 49 MultiFDPages_t *pages = &p->data->u.ram; 50 51 assert(pages->block); 52 53 for (int i = 0; i < pages->normal_num; i++) { 54 ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true); 55 } 56 57 for (int i = pages->normal_num; i < pages->num; i++) { 58 ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false); 59 } 60 } 61 62 static int multifd_nocomp_send_setup(MultiFDSendParams *p, Error **errp) 63 { 64 uint32_t page_count = multifd_ram_page_count(); 65 66 if (migrate_zero_copy_send()) { 67 p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; 68 } 69 70 if (!migrate_mapped_ram()) { 71 /* We need one extra place for the packet header */ 72 p->iov = g_new0(struct iovec, page_count + 1); 73 } else { 74 p->iov = g_new0(struct iovec, page_count); 75 } 76 77 return 0; 78 } 79 80 static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) 81 { 82 g_free(p->iov); 83 p->iov = NULL; 84 return; 85 } 86 87 static void multifd_send_prepare_iovs(MultiFDSendParams *p) 88 { 89 MultiFDPages_t *pages = &p->data->u.ram; 90 uint32_t page_size = multifd_ram_page_size(); 91 92 for (int i = 0; i < pages->normal_num; i++) { 93 p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; 94 p->iov[p->iovs_num].iov_len = page_size; 95 p->iovs_num++; 96 } 97 98 p->next_packet_size = pages->normal_num * page_size; 99 } 100 101 static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp) 102 { 103 bool use_zero_copy_send = migrate_zero_copy_send(); 104 int ret; 105 106 multifd_send_zero_page_detect(p); 107 108 if (migrate_mapped_ram()) { 109 multifd_send_prepare_iovs(p); 110 multifd_set_file_bitmap(p); 111 112 return 0; 113 } 114 115 if (!use_zero_copy_send) { 116 /* 117 * Only !zerocopy needs the header in IOV; zerocopy will 118 * send it separately. 119 */ 120 multifd_send_prepare_header(p); 121 } 122 123 multifd_send_prepare_iovs(p); 124 p->flags |= MULTIFD_FLAG_NOCOMP; 125 126 multifd_send_fill_packet(p); 127 128 if (use_zero_copy_send) { 129 /* Send header first, without zerocopy */ 130 ret = qio_channel_write_all(p->c, (void *)p->packet, 131 p->packet_len, errp); 132 if (ret != 0) { 133 return -1; 134 } 135 } 136 137 return 0; 138 } 139 140 static int multifd_nocomp_recv_setup(MultiFDRecvParams *p, Error **errp) 141 { 142 p->iov = g_new0(struct iovec, multifd_ram_page_count()); 143 return 0; 144 } 145 146 static void multifd_nocomp_recv_cleanup(MultiFDRecvParams *p) 147 { 148 g_free(p->iov); 149 p->iov = NULL; 150 } 151 152 static int multifd_nocomp_recv(MultiFDRecvParams *p, Error **errp) 153 { 154 uint32_t flags; 155 156 if (migrate_mapped_ram()) { 157 return multifd_file_recv_data(p, errp); 158 } 159 160 flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; 161 162 if (flags != MULTIFD_FLAG_NOCOMP) { 163 error_setg(errp, "multifd %u: flags received %x flags expected %x", 164 p->id, flags, MULTIFD_FLAG_NOCOMP); 165 return -1; 166 } 167 168 multifd_recv_zero_page_process(p); 169 170 if (!p->normal_num) { 171 return 0; 172 } 173 174 for (int i = 0; i < p->normal_num; i++) { 175 p->iov[i].iov_base = p->host + p->normal[i]; 176 p->iov[i].iov_len = multifd_ram_page_size(); 177 ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); 178 } 179 return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp); 180 } 181 182 static void multifd_pages_reset(MultiFDPages_t *pages) 183 { 184 /* 185 * We don't need to touch offset[] array, because it will be 186 * overwritten later when reused. 187 */ 188 pages->num = 0; 189 pages->normal_num = 0; 190 pages->block = NULL; 191 } 192 193 void multifd_ram_fill_packet(MultiFDSendParams *p) 194 { 195 MultiFDPacket_t *packet = p->packet; 196 MultiFDPages_t *pages = &p->data->u.ram; 197 uint32_t zero_num = pages->num - pages->normal_num; 198 199 packet->pages_alloc = cpu_to_be32(multifd_ram_page_count()); 200 packet->normal_pages = cpu_to_be32(pages->normal_num); 201 packet->zero_pages = cpu_to_be32(zero_num); 202 203 if (pages->block) { 204 strncpy(packet->ramblock, pages->block->idstr, 256); 205 } 206 207 for (int i = 0; i < pages->num; i++) { 208 /* there are architectures where ram_addr_t is 32 bit */ 209 uint64_t temp = pages->offset[i]; 210 211 packet->offset[i] = cpu_to_be64(temp); 212 } 213 214 trace_multifd_send_ram_fill(p->id, pages->normal_num, 215 zero_num); 216 } 217 218 int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp) 219 { 220 MultiFDPacket_t *packet = p->packet; 221 uint32_t page_count = multifd_ram_page_count(); 222 uint32_t page_size = multifd_ram_page_size(); 223 int i; 224 225 packet->pages_alloc = be32_to_cpu(packet->pages_alloc); 226 /* 227 * If we received a packet that is 100 times bigger than expected 228 * just stop migration. It is a magic number. 229 */ 230 if (packet->pages_alloc > page_count) { 231 error_setg(errp, "multifd: received packet " 232 "with size %u and expected a size of %u", 233 packet->pages_alloc, page_count) ; 234 return -1; 235 } 236 237 p->normal_num = be32_to_cpu(packet->normal_pages); 238 if (p->normal_num > packet->pages_alloc) { 239 error_setg(errp, "multifd: received packet " 240 "with %u normal pages and expected maximum pages are %u", 241 p->normal_num, packet->pages_alloc) ; 242 return -1; 243 } 244 245 p->zero_num = be32_to_cpu(packet->zero_pages); 246 if (p->zero_num > packet->pages_alloc - p->normal_num) { 247 error_setg(errp, "multifd: received packet " 248 "with %u zero pages and expected maximum zero pages are %u", 249 p->zero_num, packet->pages_alloc - p->normal_num) ; 250 return -1; 251 } 252 253 if (p->normal_num == 0 && p->zero_num == 0) { 254 return 0; 255 } 256 257 /* make sure that ramblock is 0 terminated */ 258 packet->ramblock[255] = 0; 259 p->block = qemu_ram_block_by_name(packet->ramblock); 260 if (!p->block) { 261 error_setg(errp, "multifd: unknown ram block %s", 262 packet->ramblock); 263 return -1; 264 } 265 266 p->host = p->block->host; 267 for (i = 0; i < p->normal_num; i++) { 268 uint64_t offset = be64_to_cpu(packet->offset[i]); 269 270 if (offset > (p->block->used_length - page_size)) { 271 error_setg(errp, "multifd: offset too long %" PRIu64 272 " (max " RAM_ADDR_FMT ")", 273 offset, p->block->used_length); 274 return -1; 275 } 276 p->normal[i] = offset; 277 } 278 279 for (i = 0; i < p->zero_num; i++) { 280 uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]); 281 282 if (offset > (p->block->used_length - page_size)) { 283 error_setg(errp, "multifd: offset too long %" PRIu64 284 " (max " RAM_ADDR_FMT ")", 285 offset, p->block->used_length); 286 return -1; 287 } 288 p->zero[i] = offset; 289 } 290 291 return 0; 292 } 293 294 static inline bool multifd_queue_empty(MultiFDPages_t *pages) 295 { 296 return pages->num == 0; 297 } 298 299 static inline bool multifd_queue_full(MultiFDPages_t *pages) 300 { 301 return pages->num == multifd_ram_page_count(); 302 } 303 304 static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset) 305 { 306 pages->offset[pages->num++] = offset; 307 } 308 309 /* Returns true if enqueue successful, false otherwise */ 310 bool multifd_queue_page(RAMBlock *block, ram_addr_t offset) 311 { 312 MultiFDPages_t *pages; 313 314 retry: 315 pages = &multifd_ram_send->u.ram; 316 317 if (multifd_payload_empty(multifd_ram_send)) { 318 multifd_pages_reset(pages); 319 multifd_set_payload_type(multifd_ram_send, MULTIFD_PAYLOAD_RAM); 320 } 321 322 /* If the queue is empty, we can already enqueue now */ 323 if (multifd_queue_empty(pages)) { 324 pages->block = block; 325 multifd_enqueue(pages, offset); 326 return true; 327 } 328 329 /* 330 * Not empty, meanwhile we need a flush. It can because of either: 331 * 332 * (1) The page is not on the same ramblock of previous ones, or, 333 * (2) The queue is full. 334 * 335 * After flush, always retry. 336 */ 337 if (pages->block != block || multifd_queue_full(pages)) { 338 if (!multifd_send(&multifd_ram_send)) { 339 return false; 340 } 341 goto retry; 342 } 343 344 /* Not empty, and we still have space, do it! */ 345 multifd_enqueue(pages, offset); 346 return true; 347 } 348 349 int multifd_ram_flush_and_sync(void) 350 { 351 if (!migrate_multifd()) { 352 return 0; 353 } 354 355 if (!multifd_payload_empty(multifd_ram_send)) { 356 if (!multifd_send(&multifd_ram_send)) { 357 error_report("%s: multifd_send fail", __func__); 358 return -1; 359 } 360 } 361 362 return multifd_send_sync_main(); 363 } 364 365 bool multifd_send_prepare_common(MultiFDSendParams *p) 366 { 367 MultiFDPages_t *pages = &p->data->u.ram; 368 multifd_send_zero_page_detect(p); 369 370 if (!pages->normal_num) { 371 p->next_packet_size = 0; 372 return false; 373 } 374 375 multifd_send_prepare_header(p); 376 377 return true; 378 } 379 380 static MultiFDMethods multifd_nocomp_ops = { 381 .send_setup = multifd_nocomp_send_setup, 382 .send_cleanup = multifd_nocomp_send_cleanup, 383 .send_prepare = multifd_nocomp_send_prepare, 384 .recv_setup = multifd_nocomp_recv_setup, 385 .recv_cleanup = multifd_nocomp_recv_cleanup, 386 .recv = multifd_nocomp_recv 387 }; 388 389 static void multifd_nocomp_register(void) 390 { 391 multifd_register_ops(MULTIFD_COMPRESSION_NONE, &multifd_nocomp_ops); 392 } 393 394 migration_init(multifd_nocomp_register); 395