1 /* 2 * Declarations for cpu physical memory functions 3 * 4 * Copyright 2011 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Avi Kivity <avi@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * This header is for use by exec.c and memory.c ONLY. Do not include it. 16 * The functions declared here will be removed soon. 17 */ 18 19 #ifndef RAM_ADDR_H 20 #define RAM_ADDR_H 21 22 #ifndef CONFIG_USER_ONLY 23 #include "cpu.h" 24 #include "hw/xen/xen.h" 25 #include "sysemu/tcg.h" 26 #include "exec/ramlist.h" 27 28 struct RAMBlock { 29 struct rcu_head rcu; 30 struct MemoryRegion *mr; 31 uint8_t *host; 32 uint8_t *colo_cache; /* For colo, VM's ram cache */ 33 ram_addr_t offset; 34 ram_addr_t used_length; 35 ram_addr_t max_length; 36 void (*resized)(const char*, uint64_t length, void *host); 37 uint32_t flags; 38 /* Protected by iothread lock. */ 39 char idstr[256]; 40 /* RCU-enabled, writes protected by the ramlist lock */ 41 QLIST_ENTRY(RAMBlock) next; 42 QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; 43 int fd; 44 size_t page_size; 45 /* dirty bitmap used during migration */ 46 unsigned long *bmap; 47 /* bitmap of already received pages in postcopy */ 48 unsigned long *receivedmap; 49 50 /* 51 * bitmap to track already cleared dirty bitmap. When the bit is 52 * set, it means the corresponding memory chunk needs a log-clear. 53 * Set this up to non-NULL to enable the capability to postpone 54 * and split clearing of dirty bitmap on the remote node (e.g., 55 * KVM). The bitmap will be set only when doing global sync. 56 * 57 * NOTE: this bitmap is different comparing to the other bitmaps 58 * in that one bit can represent multiple guest pages (which is 59 * decided by the `clear_bmap_shift' variable below). On 60 * destination side, this should always be NULL, and the variable 61 * `clear_bmap_shift' is meaningless. 62 */ 63 unsigned long *clear_bmap; 64 uint8_t clear_bmap_shift; 65 }; 66 67 /** 68 * clear_bmap_size: calculate clear bitmap size 69 * 70 * @pages: number of guest pages 71 * @shift: guest page number shift 72 * 73 * Returns: number of bits for the clear bitmap 74 */ 75 static inline long clear_bmap_size(uint64_t pages, uint8_t shift) 76 { 77 return DIV_ROUND_UP(pages, 1UL << shift); 78 } 79 80 /** 81 * clear_bmap_set: set clear bitmap for the page range 82 * 83 * @rb: the ramblock to operate on 84 * @start: the start page number 85 * @size: number of pages to set in the bitmap 86 * 87 * Returns: None 88 */ 89 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, 90 uint64_t npages) 91 { 92 uint8_t shift = rb->clear_bmap_shift; 93 94 bitmap_set_atomic(rb->clear_bmap, start >> shift, 95 clear_bmap_size(npages, shift)); 96 } 97 98 /** 99 * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set 100 * 101 * @rb: the ramblock to operate on 102 * @page: the page number to check 103 * 104 * Returns: true if the bit was set, false otherwise 105 */ 106 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) 107 { 108 uint8_t shift = rb->clear_bmap_shift; 109 110 return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1); 111 } 112 113 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) 114 { 115 return (b && b->host && offset < b->used_length) ? true : false; 116 } 117 118 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) 119 { 120 assert(offset_in_ramblock(block, offset)); 121 return (char *)block->host + offset; 122 } 123 124 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, 125 RAMBlock *rb) 126 { 127 uint64_t host_addr_offset = 128 (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); 129 return host_addr_offset >> TARGET_PAGE_BITS; 130 } 131 132 bool ramblock_is_pmem(RAMBlock *rb); 133 134 long qemu_minrampagesize(void); 135 long qemu_maxrampagesize(void); 136 137 /** 138 * qemu_ram_alloc_from_file, 139 * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing 140 * file or device 141 * 142 * Parameters: 143 * @size: the size in bytes of the ram block 144 * @mr: the memory region where the ram block is 145 * @ram_flags: specify the properties of the ram block, which can be one 146 * or bit-or of following values 147 * - RAM_SHARED: mmap the backing file or device with MAP_SHARED 148 * - RAM_PMEM: the backend @mem_path or @fd is persistent memory 149 * Other bits are ignored. 150 * @mem_path or @fd: specify the backing file or device 151 * @errp: pointer to Error*, to store an error if it happens 152 * 153 * Return: 154 * On success, return a pointer to the ram block. 155 * On failure, return NULL. 156 */ 157 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, 158 uint32_t ram_flags, const char *mem_path, 159 Error **errp); 160 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, 161 uint32_t ram_flags, int fd, 162 Error **errp); 163 164 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 165 MemoryRegion *mr, Error **errp); 166 RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr, 167 Error **errp); 168 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, 169 void (*resized)(const char*, 170 uint64_t length, 171 void *host), 172 MemoryRegion *mr, Error **errp); 173 void qemu_ram_free(RAMBlock *block); 174 175 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); 176 177 void qemu_ram_writeback(RAMBlock *block, ram_addr_t start, ram_addr_t length); 178 179 /* Clear whole block of mem */ 180 static inline void qemu_ram_block_writeback(RAMBlock *block) 181 { 182 qemu_ram_writeback(block, 0, block->used_length); 183 } 184 185 #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) 186 #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) 187 188 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end); 189 190 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, 191 ram_addr_t length, 192 unsigned client) 193 { 194 DirtyMemoryBlocks *blocks; 195 unsigned long end, page; 196 unsigned long idx, offset, base; 197 bool dirty = false; 198 199 assert(client < DIRTY_MEMORY_NUM); 200 201 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 202 page = start >> TARGET_PAGE_BITS; 203 204 WITH_RCU_READ_LOCK_GUARD() { 205 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 206 207 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 208 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 209 base = page - offset; 210 while (page < end) { 211 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 212 unsigned long num = next - base; 213 unsigned long found = find_next_bit(blocks->blocks[idx], 214 num, offset); 215 if (found < num) { 216 dirty = true; 217 break; 218 } 219 220 page = next; 221 idx++; 222 offset = 0; 223 base += DIRTY_MEMORY_BLOCK_SIZE; 224 } 225 } 226 227 return dirty; 228 } 229 230 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, 231 ram_addr_t length, 232 unsigned client) 233 { 234 DirtyMemoryBlocks *blocks; 235 unsigned long end, page; 236 unsigned long idx, offset, base; 237 bool dirty = true; 238 239 assert(client < DIRTY_MEMORY_NUM); 240 241 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 242 page = start >> TARGET_PAGE_BITS; 243 244 RCU_READ_LOCK_GUARD(); 245 246 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 247 248 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 249 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 250 base = page - offset; 251 while (page < end) { 252 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 253 unsigned long num = next - base; 254 unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset); 255 if (found < num) { 256 dirty = false; 257 break; 258 } 259 260 page = next; 261 idx++; 262 offset = 0; 263 base += DIRTY_MEMORY_BLOCK_SIZE; 264 } 265 266 return dirty; 267 } 268 269 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, 270 unsigned client) 271 { 272 return cpu_physical_memory_get_dirty(addr, 1, client); 273 } 274 275 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) 276 { 277 bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); 278 bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); 279 bool migration = 280 cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); 281 return !(vga && code && migration); 282 } 283 284 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, 285 ram_addr_t length, 286 uint8_t mask) 287 { 288 uint8_t ret = 0; 289 290 if (mask & (1 << DIRTY_MEMORY_VGA) && 291 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { 292 ret |= (1 << DIRTY_MEMORY_VGA); 293 } 294 if (mask & (1 << DIRTY_MEMORY_CODE) && 295 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { 296 ret |= (1 << DIRTY_MEMORY_CODE); 297 } 298 if (mask & (1 << DIRTY_MEMORY_MIGRATION) && 299 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { 300 ret |= (1 << DIRTY_MEMORY_MIGRATION); 301 } 302 return ret; 303 } 304 305 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, 306 unsigned client) 307 { 308 unsigned long page, idx, offset; 309 DirtyMemoryBlocks *blocks; 310 311 assert(client < DIRTY_MEMORY_NUM); 312 313 page = addr >> TARGET_PAGE_BITS; 314 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 315 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 316 317 RCU_READ_LOCK_GUARD(); 318 319 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 320 321 set_bit_atomic(offset, blocks->blocks[idx]); 322 } 323 324 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, 325 ram_addr_t length, 326 uint8_t mask) 327 { 328 DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; 329 unsigned long end, page; 330 unsigned long idx, offset, base; 331 int i; 332 333 if (!mask && !xen_enabled()) { 334 return; 335 } 336 337 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 338 page = start >> TARGET_PAGE_BITS; 339 340 WITH_RCU_READ_LOCK_GUARD() { 341 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 342 blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]); 343 } 344 345 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 346 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 347 base = page - offset; 348 while (page < end) { 349 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 350 351 if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { 352 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], 353 offset, next - page); 354 } 355 if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { 356 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], 357 offset, next - page); 358 } 359 if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { 360 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], 361 offset, next - page); 362 } 363 364 page = next; 365 idx++; 366 offset = 0; 367 base += DIRTY_MEMORY_BLOCK_SIZE; 368 } 369 } 370 371 xen_hvm_modified_memory(start, length); 372 } 373 374 #if !defined(_WIN32) 375 static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, 376 ram_addr_t start, 377 ram_addr_t pages) 378 { 379 unsigned long i, j; 380 unsigned long page_number, c; 381 hwaddr addr; 382 ram_addr_t ram_addr; 383 unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; 384 unsigned long hpratio = qemu_real_host_page_size / TARGET_PAGE_SIZE; 385 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 386 387 /* start address is aligned at the start of a word? */ 388 if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && 389 (hpratio == 1)) { 390 unsigned long **blocks[DIRTY_MEMORY_NUM]; 391 unsigned long idx; 392 unsigned long offset; 393 long k; 394 long nr = BITS_TO_LONGS(pages); 395 396 idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; 397 offset = BIT_WORD((start >> TARGET_PAGE_BITS) % 398 DIRTY_MEMORY_BLOCK_SIZE); 399 400 WITH_RCU_READ_LOCK_GUARD() { 401 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 402 blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks; 403 } 404 405 for (k = 0; k < nr; k++) { 406 if (bitmap[k]) { 407 unsigned long temp = leul_to_cpu(bitmap[k]); 408 409 atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); 410 411 if (global_dirty_log) { 412 atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], 413 temp); 414 } 415 416 if (tcg_enabled()) { 417 atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], 418 temp); 419 } 420 } 421 422 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 423 offset = 0; 424 idx++; 425 } 426 } 427 } 428 429 xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); 430 } else { 431 uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; 432 433 if (!global_dirty_log) { 434 clients &= ~(1 << DIRTY_MEMORY_MIGRATION); 435 } 436 437 /* 438 * bitmap-traveling is faster than memory-traveling (for addr...) 439 * especially when most of the memory is not dirty. 440 */ 441 for (i = 0; i < len; i++) { 442 if (bitmap[i] != 0) { 443 c = leul_to_cpu(bitmap[i]); 444 do { 445 j = ctzl(c); 446 c &= ~(1ul << j); 447 page_number = (i * HOST_LONG_BITS + j) * hpratio; 448 addr = page_number * TARGET_PAGE_SIZE; 449 ram_addr = start + addr; 450 cpu_physical_memory_set_dirty_range(ram_addr, 451 TARGET_PAGE_SIZE * hpratio, clients); 452 } while (c != 0); 453 } 454 } 455 } 456 } 457 #endif /* not _WIN32 */ 458 459 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, 460 ram_addr_t length, 461 unsigned client); 462 463 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty 464 (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); 465 466 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, 467 ram_addr_t start, 468 ram_addr_t length); 469 470 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, 471 ram_addr_t length) 472 { 473 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); 474 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); 475 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); 476 } 477 478 479 /* Called with RCU critical section */ 480 static inline 481 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, 482 ram_addr_t start, 483 ram_addr_t length, 484 uint64_t *real_dirty_pages) 485 { 486 ram_addr_t addr; 487 unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); 488 uint64_t num_dirty = 0; 489 unsigned long *dest = rb->bmap; 490 491 /* start address and length is aligned at the start of a word? */ 492 if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == 493 (start + rb->offset) && 494 !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { 495 int k; 496 int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); 497 unsigned long * const *src; 498 unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; 499 unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % 500 DIRTY_MEMORY_BLOCK_SIZE); 501 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 502 503 src = atomic_rcu_read( 504 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; 505 506 for (k = page; k < page + nr; k++) { 507 if (src[idx][offset]) { 508 unsigned long bits = atomic_xchg(&src[idx][offset], 0); 509 unsigned long new_dirty; 510 *real_dirty_pages += ctpopl(bits); 511 new_dirty = ~dest[k]; 512 dest[k] |= bits; 513 new_dirty &= bits; 514 num_dirty += ctpopl(new_dirty); 515 } 516 517 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 518 offset = 0; 519 idx++; 520 } 521 } 522 523 if (rb->clear_bmap) { 524 /* 525 * Postpone the dirty bitmap clear to the point before we 526 * really send the pages, also we will split the clear 527 * dirty procedure into smaller chunks. 528 */ 529 clear_bmap_set(rb, start >> TARGET_PAGE_BITS, 530 length >> TARGET_PAGE_BITS); 531 } else { 532 /* Slow path - still do that in a huge chunk */ 533 memory_region_clear_dirty_bitmap(rb->mr, start, length); 534 } 535 } else { 536 ram_addr_t offset = rb->offset; 537 538 for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { 539 if (cpu_physical_memory_test_and_clear_dirty( 540 start + addr + offset, 541 TARGET_PAGE_SIZE, 542 DIRTY_MEMORY_MIGRATION)) { 543 *real_dirty_pages += 1; 544 long k = (start + addr) >> TARGET_PAGE_BITS; 545 if (!test_and_set_bit(k, dest)) { 546 num_dirty++; 547 } 548 } 549 } 550 } 551 552 return num_dirty; 553 } 554 #endif 555 #endif 556