1 /* 2 * Declarations for cpu physical memory functions 3 * 4 * Copyright 2011 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Avi Kivity <avi@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * This header is for use by exec.c and memory.c ONLY. Do not include it. 16 * The functions declared here will be removed soon. 17 */ 18 19 #ifndef RAM_ADDR_H 20 #define RAM_ADDR_H 21 22 #ifndef CONFIG_USER_ONLY 23 #include "cpu.h" 24 #include "hw/xen/xen.h" 25 #include "sysemu/tcg.h" 26 #include "exec/ramlist.h" 27 28 struct RAMBlock { 29 struct rcu_head rcu; 30 struct MemoryRegion *mr; 31 uint8_t *host; 32 uint8_t *colo_cache; /* For colo, VM's ram cache */ 33 ram_addr_t offset; 34 ram_addr_t used_length; 35 ram_addr_t max_length; 36 void (*resized)(const char*, uint64_t length, void *host); 37 uint32_t flags; 38 /* Protected by iothread lock. */ 39 char idstr[256]; 40 /* RCU-enabled, writes protected by the ramlist lock */ 41 QLIST_ENTRY(RAMBlock) next; 42 QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; 43 int fd; 44 size_t page_size; 45 /* dirty bitmap used during migration */ 46 unsigned long *bmap; 47 /* bitmap of pages that haven't been sent even once 48 * only maintained and used in postcopy at the moment 49 * where it's used to send the dirtymap at the start 50 * of the postcopy phase 51 */ 52 unsigned long *unsentmap; 53 /* bitmap of already received pages in postcopy */ 54 unsigned long *receivedmap; 55 56 /* 57 * bitmap to track already cleared dirty bitmap. When the bit is 58 * set, it means the corresponding memory chunk needs a log-clear. 59 * Set this up to non-NULL to enable the capability to postpone 60 * and split clearing of dirty bitmap on the remote node (e.g., 61 * KVM). The bitmap will be set only when doing global sync. 62 * 63 * NOTE: this bitmap is different comparing to the other bitmaps 64 * in that one bit can represent multiple guest pages (which is 65 * decided by the `clear_bmap_shift' variable below). On 66 * destination side, this should always be NULL, and the variable 67 * `clear_bmap_shift' is meaningless. 68 */ 69 unsigned long *clear_bmap; 70 uint8_t clear_bmap_shift; 71 }; 72 73 /** 74 * clear_bmap_size: calculate clear bitmap size 75 * 76 * @pages: number of guest pages 77 * @shift: guest page number shift 78 * 79 * Returns: number of bits for the clear bitmap 80 */ 81 static inline long clear_bmap_size(uint64_t pages, uint8_t shift) 82 { 83 return DIV_ROUND_UP(pages, 1UL << shift); 84 } 85 86 /** 87 * clear_bmap_set: set clear bitmap for the page range 88 * 89 * @rb: the ramblock to operate on 90 * @start: the start page number 91 * @size: number of pages to set in the bitmap 92 * 93 * Returns: None 94 */ 95 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, 96 uint64_t npages) 97 { 98 uint8_t shift = rb->clear_bmap_shift; 99 100 bitmap_set_atomic(rb->clear_bmap, start >> shift, 101 clear_bmap_size(npages, shift)); 102 } 103 104 /** 105 * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set 106 * 107 * @rb: the ramblock to operate on 108 * @page: the page number to check 109 * 110 * Returns: true if the bit was set, false otherwise 111 */ 112 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) 113 { 114 uint8_t shift = rb->clear_bmap_shift; 115 116 return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1); 117 } 118 119 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) 120 { 121 return (b && b->host && offset < b->used_length) ? true : false; 122 } 123 124 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) 125 { 126 assert(offset_in_ramblock(block, offset)); 127 return (char *)block->host + offset; 128 } 129 130 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, 131 RAMBlock *rb) 132 { 133 uint64_t host_addr_offset = 134 (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); 135 return host_addr_offset >> TARGET_PAGE_BITS; 136 } 137 138 bool ramblock_is_pmem(RAMBlock *rb); 139 140 long qemu_minrampagesize(void); 141 long qemu_maxrampagesize(void); 142 143 /** 144 * qemu_ram_alloc_from_file, 145 * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing 146 * file or device 147 * 148 * Parameters: 149 * @size: the size in bytes of the ram block 150 * @mr: the memory region where the ram block is 151 * @ram_flags: specify the properties of the ram block, which can be one 152 * or bit-or of following values 153 * - RAM_SHARED: mmap the backing file or device with MAP_SHARED 154 * - RAM_PMEM: the backend @mem_path or @fd is persistent memory 155 * Other bits are ignored. 156 * @mem_path or @fd: specify the backing file or device 157 * @errp: pointer to Error*, to store an error if it happens 158 * 159 * Return: 160 * On success, return a pointer to the ram block. 161 * On failure, return NULL. 162 */ 163 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, 164 uint32_t ram_flags, const char *mem_path, 165 Error **errp); 166 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, 167 uint32_t ram_flags, int fd, 168 Error **errp); 169 170 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 171 MemoryRegion *mr, Error **errp); 172 RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr, 173 Error **errp); 174 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, 175 void (*resized)(const char*, 176 uint64_t length, 177 void *host), 178 MemoryRegion *mr, Error **errp); 179 void qemu_ram_free(RAMBlock *block); 180 181 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); 182 183 #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) 184 #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) 185 186 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end); 187 188 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, 189 ram_addr_t length, 190 unsigned client) 191 { 192 DirtyMemoryBlocks *blocks; 193 unsigned long end, page; 194 unsigned long idx, offset, base; 195 bool dirty = false; 196 197 assert(client < DIRTY_MEMORY_NUM); 198 199 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 200 page = start >> TARGET_PAGE_BITS; 201 202 rcu_read_lock(); 203 204 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 205 206 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 207 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 208 base = page - offset; 209 while (page < end) { 210 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 211 unsigned long num = next - base; 212 unsigned long found = find_next_bit(blocks->blocks[idx], num, offset); 213 if (found < num) { 214 dirty = true; 215 break; 216 } 217 218 page = next; 219 idx++; 220 offset = 0; 221 base += DIRTY_MEMORY_BLOCK_SIZE; 222 } 223 224 rcu_read_unlock(); 225 226 return dirty; 227 } 228 229 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, 230 ram_addr_t length, 231 unsigned client) 232 { 233 DirtyMemoryBlocks *blocks; 234 unsigned long end, page; 235 unsigned long idx, offset, base; 236 bool dirty = true; 237 238 assert(client < DIRTY_MEMORY_NUM); 239 240 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 241 page = start >> TARGET_PAGE_BITS; 242 243 rcu_read_lock(); 244 245 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 246 247 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 248 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 249 base = page - offset; 250 while (page < end) { 251 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 252 unsigned long num = next - base; 253 unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset); 254 if (found < num) { 255 dirty = false; 256 break; 257 } 258 259 page = next; 260 idx++; 261 offset = 0; 262 base += DIRTY_MEMORY_BLOCK_SIZE; 263 } 264 265 rcu_read_unlock(); 266 267 return dirty; 268 } 269 270 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, 271 unsigned client) 272 { 273 return cpu_physical_memory_get_dirty(addr, 1, client); 274 } 275 276 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) 277 { 278 bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); 279 bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); 280 bool migration = 281 cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); 282 return !(vga && code && migration); 283 } 284 285 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, 286 ram_addr_t length, 287 uint8_t mask) 288 { 289 uint8_t ret = 0; 290 291 if (mask & (1 << DIRTY_MEMORY_VGA) && 292 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { 293 ret |= (1 << DIRTY_MEMORY_VGA); 294 } 295 if (mask & (1 << DIRTY_MEMORY_CODE) && 296 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { 297 ret |= (1 << DIRTY_MEMORY_CODE); 298 } 299 if (mask & (1 << DIRTY_MEMORY_MIGRATION) && 300 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { 301 ret |= (1 << DIRTY_MEMORY_MIGRATION); 302 } 303 return ret; 304 } 305 306 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, 307 unsigned client) 308 { 309 unsigned long page, idx, offset; 310 DirtyMemoryBlocks *blocks; 311 312 assert(client < DIRTY_MEMORY_NUM); 313 314 page = addr >> TARGET_PAGE_BITS; 315 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 316 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 317 318 rcu_read_lock(); 319 320 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 321 322 set_bit_atomic(offset, blocks->blocks[idx]); 323 324 rcu_read_unlock(); 325 } 326 327 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, 328 ram_addr_t length, 329 uint8_t mask) 330 { 331 DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; 332 unsigned long end, page; 333 unsigned long idx, offset, base; 334 int i; 335 336 if (!mask && !xen_enabled()) { 337 return; 338 } 339 340 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 341 page = start >> TARGET_PAGE_BITS; 342 343 rcu_read_lock(); 344 345 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 346 blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]); 347 } 348 349 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 350 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 351 base = page - offset; 352 while (page < end) { 353 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 354 355 if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { 356 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], 357 offset, next - page); 358 } 359 if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { 360 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], 361 offset, next - page); 362 } 363 if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { 364 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], 365 offset, next - page); 366 } 367 368 page = next; 369 idx++; 370 offset = 0; 371 base += DIRTY_MEMORY_BLOCK_SIZE; 372 } 373 374 rcu_read_unlock(); 375 376 xen_hvm_modified_memory(start, length); 377 } 378 379 #if !defined(_WIN32) 380 static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, 381 ram_addr_t start, 382 ram_addr_t pages) 383 { 384 unsigned long i, j; 385 unsigned long page_number, c; 386 hwaddr addr; 387 ram_addr_t ram_addr; 388 unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; 389 unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE; 390 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 391 392 /* start address is aligned at the start of a word? */ 393 if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && 394 (hpratio == 1)) { 395 unsigned long **blocks[DIRTY_MEMORY_NUM]; 396 unsigned long idx; 397 unsigned long offset; 398 long k; 399 long nr = BITS_TO_LONGS(pages); 400 401 idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; 402 offset = BIT_WORD((start >> TARGET_PAGE_BITS) % 403 DIRTY_MEMORY_BLOCK_SIZE); 404 405 rcu_read_lock(); 406 407 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 408 blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks; 409 } 410 411 for (k = 0; k < nr; k++) { 412 if (bitmap[k]) { 413 unsigned long temp = leul_to_cpu(bitmap[k]); 414 415 atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); 416 417 if (global_dirty_log) { 418 atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], 419 temp); 420 } 421 422 if (tcg_enabled()) { 423 atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp); 424 } 425 } 426 427 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 428 offset = 0; 429 idx++; 430 } 431 } 432 433 rcu_read_unlock(); 434 435 xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); 436 } else { 437 uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; 438 439 if (!global_dirty_log) { 440 clients &= ~(1 << DIRTY_MEMORY_MIGRATION); 441 } 442 443 /* 444 * bitmap-traveling is faster than memory-traveling (for addr...) 445 * especially when most of the memory is not dirty. 446 */ 447 for (i = 0; i < len; i++) { 448 if (bitmap[i] != 0) { 449 c = leul_to_cpu(bitmap[i]); 450 do { 451 j = ctzl(c); 452 c &= ~(1ul << j); 453 page_number = (i * HOST_LONG_BITS + j) * hpratio; 454 addr = page_number * TARGET_PAGE_SIZE; 455 ram_addr = start + addr; 456 cpu_physical_memory_set_dirty_range(ram_addr, 457 TARGET_PAGE_SIZE * hpratio, clients); 458 } while (c != 0); 459 } 460 } 461 } 462 } 463 #endif /* not _WIN32 */ 464 465 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, 466 ram_addr_t length, 467 unsigned client); 468 469 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty 470 (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); 471 472 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, 473 ram_addr_t start, 474 ram_addr_t length); 475 476 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, 477 ram_addr_t length) 478 { 479 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); 480 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); 481 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); 482 } 483 484 485 /* Called with RCU critical section */ 486 static inline 487 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, 488 ram_addr_t start, 489 ram_addr_t length, 490 uint64_t *real_dirty_pages) 491 { 492 ram_addr_t addr; 493 unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); 494 uint64_t num_dirty = 0; 495 unsigned long *dest = rb->bmap; 496 497 /* start address and length is aligned at the start of a word? */ 498 if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == 499 (start + rb->offset) && 500 !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { 501 int k; 502 int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); 503 unsigned long * const *src; 504 unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; 505 unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % 506 DIRTY_MEMORY_BLOCK_SIZE); 507 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 508 509 src = atomic_rcu_read( 510 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; 511 512 for (k = page; k < page + nr; k++) { 513 if (src[idx][offset]) { 514 unsigned long bits = atomic_xchg(&src[idx][offset], 0); 515 unsigned long new_dirty; 516 *real_dirty_pages += ctpopl(bits); 517 new_dirty = ~dest[k]; 518 dest[k] |= bits; 519 new_dirty &= bits; 520 num_dirty += ctpopl(new_dirty); 521 } 522 523 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 524 offset = 0; 525 idx++; 526 } 527 } 528 529 if (rb->clear_bmap) { 530 /* 531 * Postpone the dirty bitmap clear to the point before we 532 * really send the pages, also we will split the clear 533 * dirty procedure into smaller chunks. 534 */ 535 clear_bmap_set(rb, start >> TARGET_PAGE_BITS, 536 length >> TARGET_PAGE_BITS); 537 } else { 538 /* Slow path - still do that in a huge chunk */ 539 memory_region_clear_dirty_bitmap(rb->mr, start, length); 540 } 541 } else { 542 ram_addr_t offset = rb->offset; 543 544 for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { 545 if (cpu_physical_memory_test_and_clear_dirty( 546 start + addr + offset, 547 TARGET_PAGE_SIZE, 548 DIRTY_MEMORY_MIGRATION)) { 549 *real_dirty_pages += 1; 550 long k = (start + addr) >> TARGET_PAGE_BITS; 551 if (!test_and_set_bit(k, dest)) { 552 num_dirty++; 553 } 554 } 555 } 556 } 557 558 return num_dirty; 559 } 560 #endif 561 #endif 562