1 /* 2 * Declarations for cpu physical memory functions 3 * 4 * Copyright 2011 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Avi Kivity <avi@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * This header is for use by exec.c and memory.c ONLY. Do not include it. 16 * The functions declared here will be removed soon. 17 */ 18 19 #ifndef RAM_ADDR_H 20 #define RAM_ADDR_H 21 22 #ifndef CONFIG_USER_ONLY 23 #include "cpu.h" 24 #include "hw/xen/xen.h" 25 #include "sysemu/tcg.h" 26 #include "exec/ramlist.h" 27 28 struct RAMBlock { 29 struct rcu_head rcu; 30 struct MemoryRegion *mr; 31 uint8_t *host; 32 uint8_t *colo_cache; /* For colo, VM's ram cache */ 33 ram_addr_t offset; 34 ram_addr_t used_length; 35 ram_addr_t max_length; 36 void (*resized)(const char*, uint64_t length, void *host); 37 uint32_t flags; 38 /* Protected by iothread lock. */ 39 char idstr[256]; 40 /* RCU-enabled, writes protected by the ramlist lock */ 41 QLIST_ENTRY(RAMBlock) next; 42 QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; 43 int fd; 44 size_t page_size; 45 /* dirty bitmap used during migration */ 46 unsigned long *bmap; 47 /* bitmap of already received pages in postcopy */ 48 unsigned long *receivedmap; 49 50 /* 51 * bitmap to track already cleared dirty bitmap. When the bit is 52 * set, it means the corresponding memory chunk needs a log-clear. 53 * Set this up to non-NULL to enable the capability to postpone 54 * and split clearing of dirty bitmap on the remote node (e.g., 55 * KVM). The bitmap will be set only when doing global sync. 56 * 57 * NOTE: this bitmap is different comparing to the other bitmaps 58 * in that one bit can represent multiple guest pages (which is 59 * decided by the `clear_bmap_shift' variable below). On 60 * destination side, this should always be NULL, and the variable 61 * `clear_bmap_shift' is meaningless. 62 */ 63 unsigned long *clear_bmap; 64 uint8_t clear_bmap_shift; 65 }; 66 67 /** 68 * clear_bmap_size: calculate clear bitmap size 69 * 70 * @pages: number of guest pages 71 * @shift: guest page number shift 72 * 73 * Returns: number of bits for the clear bitmap 74 */ 75 static inline long clear_bmap_size(uint64_t pages, uint8_t shift) 76 { 77 return DIV_ROUND_UP(pages, 1UL << shift); 78 } 79 80 /** 81 * clear_bmap_set: set clear bitmap for the page range 82 * 83 * @rb: the ramblock to operate on 84 * @start: the start page number 85 * @size: number of pages to set in the bitmap 86 * 87 * Returns: None 88 */ 89 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, 90 uint64_t npages) 91 { 92 uint8_t shift = rb->clear_bmap_shift; 93 94 bitmap_set_atomic(rb->clear_bmap, start >> shift, 95 clear_bmap_size(npages, shift)); 96 } 97 98 /** 99 * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set 100 * 101 * @rb: the ramblock to operate on 102 * @page: the page number to check 103 * 104 * Returns: true if the bit was set, false otherwise 105 */ 106 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) 107 { 108 uint8_t shift = rb->clear_bmap_shift; 109 110 return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1); 111 } 112 113 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) 114 { 115 return (b && b->host && offset < b->used_length) ? true : false; 116 } 117 118 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) 119 { 120 assert(offset_in_ramblock(block, offset)); 121 return (char *)block->host + offset; 122 } 123 124 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, 125 RAMBlock *rb) 126 { 127 uint64_t host_addr_offset = 128 (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); 129 return host_addr_offset >> TARGET_PAGE_BITS; 130 } 131 132 bool ramblock_is_pmem(RAMBlock *rb); 133 134 long qemu_minrampagesize(void); 135 long qemu_maxrampagesize(void); 136 137 /** 138 * qemu_ram_alloc_from_file, 139 * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing 140 * file or device 141 * 142 * Parameters: 143 * @size: the size in bytes of the ram block 144 * @mr: the memory region where the ram block is 145 * @ram_flags: specify the properties of the ram block, which can be one 146 * or bit-or of following values 147 * - RAM_SHARED: mmap the backing file or device with MAP_SHARED 148 * - RAM_PMEM: the backend @mem_path or @fd is persistent memory 149 * Other bits are ignored. 150 * @mem_path or @fd: specify the backing file or device 151 * @errp: pointer to Error*, to store an error if it happens 152 * 153 * Return: 154 * On success, return a pointer to the ram block. 155 * On failure, return NULL. 156 */ 157 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, 158 uint32_t ram_flags, const char *mem_path, 159 Error **errp); 160 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, 161 uint32_t ram_flags, int fd, 162 Error **errp); 163 164 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 165 MemoryRegion *mr, Error **errp); 166 RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr, 167 Error **errp); 168 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, 169 void (*resized)(const char*, 170 uint64_t length, 171 void *host), 172 MemoryRegion *mr, Error **errp); 173 void qemu_ram_free(RAMBlock *block); 174 175 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); 176 177 #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) 178 #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) 179 180 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end); 181 182 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, 183 ram_addr_t length, 184 unsigned client) 185 { 186 DirtyMemoryBlocks *blocks; 187 unsigned long end, page; 188 unsigned long idx, offset, base; 189 bool dirty = false; 190 191 assert(client < DIRTY_MEMORY_NUM); 192 193 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 194 page = start >> TARGET_PAGE_BITS; 195 196 WITH_RCU_READ_LOCK_GUARD() { 197 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 198 199 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 200 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 201 base = page - offset; 202 while (page < end) { 203 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 204 unsigned long num = next - base; 205 unsigned long found = find_next_bit(blocks->blocks[idx], 206 num, offset); 207 if (found < num) { 208 dirty = true; 209 break; 210 } 211 212 page = next; 213 idx++; 214 offset = 0; 215 base += DIRTY_MEMORY_BLOCK_SIZE; 216 } 217 } 218 219 return dirty; 220 } 221 222 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, 223 ram_addr_t length, 224 unsigned client) 225 { 226 DirtyMemoryBlocks *blocks; 227 unsigned long end, page; 228 unsigned long idx, offset, base; 229 bool dirty = true; 230 231 assert(client < DIRTY_MEMORY_NUM); 232 233 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 234 page = start >> TARGET_PAGE_BITS; 235 236 RCU_READ_LOCK_GUARD(); 237 238 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 239 240 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 241 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 242 base = page - offset; 243 while (page < end) { 244 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 245 unsigned long num = next - base; 246 unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset); 247 if (found < num) { 248 dirty = false; 249 break; 250 } 251 252 page = next; 253 idx++; 254 offset = 0; 255 base += DIRTY_MEMORY_BLOCK_SIZE; 256 } 257 258 return dirty; 259 } 260 261 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, 262 unsigned client) 263 { 264 return cpu_physical_memory_get_dirty(addr, 1, client); 265 } 266 267 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) 268 { 269 bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); 270 bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); 271 bool migration = 272 cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); 273 return !(vga && code && migration); 274 } 275 276 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, 277 ram_addr_t length, 278 uint8_t mask) 279 { 280 uint8_t ret = 0; 281 282 if (mask & (1 << DIRTY_MEMORY_VGA) && 283 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { 284 ret |= (1 << DIRTY_MEMORY_VGA); 285 } 286 if (mask & (1 << DIRTY_MEMORY_CODE) && 287 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { 288 ret |= (1 << DIRTY_MEMORY_CODE); 289 } 290 if (mask & (1 << DIRTY_MEMORY_MIGRATION) && 291 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { 292 ret |= (1 << DIRTY_MEMORY_MIGRATION); 293 } 294 return ret; 295 } 296 297 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, 298 unsigned client) 299 { 300 unsigned long page, idx, offset; 301 DirtyMemoryBlocks *blocks; 302 303 assert(client < DIRTY_MEMORY_NUM); 304 305 page = addr >> TARGET_PAGE_BITS; 306 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 307 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 308 309 RCU_READ_LOCK_GUARD(); 310 311 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 312 313 set_bit_atomic(offset, blocks->blocks[idx]); 314 } 315 316 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, 317 ram_addr_t length, 318 uint8_t mask) 319 { 320 DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; 321 unsigned long end, page; 322 unsigned long idx, offset, base; 323 int i; 324 325 if (!mask && !xen_enabled()) { 326 return; 327 } 328 329 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 330 page = start >> TARGET_PAGE_BITS; 331 332 WITH_RCU_READ_LOCK_GUARD() { 333 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 334 blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]); 335 } 336 337 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 338 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 339 base = page - offset; 340 while (page < end) { 341 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 342 343 if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { 344 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], 345 offset, next - page); 346 } 347 if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { 348 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], 349 offset, next - page); 350 } 351 if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { 352 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], 353 offset, next - page); 354 } 355 356 page = next; 357 idx++; 358 offset = 0; 359 base += DIRTY_MEMORY_BLOCK_SIZE; 360 } 361 } 362 363 xen_hvm_modified_memory(start, length); 364 } 365 366 #if !defined(_WIN32) 367 static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, 368 ram_addr_t start, 369 ram_addr_t pages) 370 { 371 unsigned long i, j; 372 unsigned long page_number, c; 373 hwaddr addr; 374 ram_addr_t ram_addr; 375 unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; 376 unsigned long hpratio = qemu_real_host_page_size / TARGET_PAGE_SIZE; 377 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 378 379 /* start address is aligned at the start of a word? */ 380 if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && 381 (hpratio == 1)) { 382 unsigned long **blocks[DIRTY_MEMORY_NUM]; 383 unsigned long idx; 384 unsigned long offset; 385 long k; 386 long nr = BITS_TO_LONGS(pages); 387 388 idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; 389 offset = BIT_WORD((start >> TARGET_PAGE_BITS) % 390 DIRTY_MEMORY_BLOCK_SIZE); 391 392 WITH_RCU_READ_LOCK_GUARD() { 393 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 394 blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks; 395 } 396 397 for (k = 0; k < nr; k++) { 398 if (bitmap[k]) { 399 unsigned long temp = leul_to_cpu(bitmap[k]); 400 401 atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); 402 403 if (global_dirty_log) { 404 atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], 405 temp); 406 } 407 408 if (tcg_enabled()) { 409 atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], 410 temp); 411 } 412 } 413 414 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 415 offset = 0; 416 idx++; 417 } 418 } 419 } 420 421 xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); 422 } else { 423 uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; 424 425 if (!global_dirty_log) { 426 clients &= ~(1 << DIRTY_MEMORY_MIGRATION); 427 } 428 429 /* 430 * bitmap-traveling is faster than memory-traveling (for addr...) 431 * especially when most of the memory is not dirty. 432 */ 433 for (i = 0; i < len; i++) { 434 if (bitmap[i] != 0) { 435 c = leul_to_cpu(bitmap[i]); 436 do { 437 j = ctzl(c); 438 c &= ~(1ul << j); 439 page_number = (i * HOST_LONG_BITS + j) * hpratio; 440 addr = page_number * TARGET_PAGE_SIZE; 441 ram_addr = start + addr; 442 cpu_physical_memory_set_dirty_range(ram_addr, 443 TARGET_PAGE_SIZE * hpratio, clients); 444 } while (c != 0); 445 } 446 } 447 } 448 } 449 #endif /* not _WIN32 */ 450 451 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, 452 ram_addr_t length, 453 unsigned client); 454 455 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty 456 (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); 457 458 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, 459 ram_addr_t start, 460 ram_addr_t length); 461 462 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, 463 ram_addr_t length) 464 { 465 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); 466 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); 467 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); 468 } 469 470 471 /* Called with RCU critical section */ 472 static inline 473 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, 474 ram_addr_t start, 475 ram_addr_t length, 476 uint64_t *real_dirty_pages) 477 { 478 ram_addr_t addr; 479 unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); 480 uint64_t num_dirty = 0; 481 unsigned long *dest = rb->bmap; 482 483 /* start address and length is aligned at the start of a word? */ 484 if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == 485 (start + rb->offset) && 486 !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { 487 int k; 488 int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); 489 unsigned long * const *src; 490 unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; 491 unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % 492 DIRTY_MEMORY_BLOCK_SIZE); 493 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 494 495 src = atomic_rcu_read( 496 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; 497 498 for (k = page; k < page + nr; k++) { 499 if (src[idx][offset]) { 500 unsigned long bits = atomic_xchg(&src[idx][offset], 0); 501 unsigned long new_dirty; 502 *real_dirty_pages += ctpopl(bits); 503 new_dirty = ~dest[k]; 504 dest[k] |= bits; 505 new_dirty &= bits; 506 num_dirty += ctpopl(new_dirty); 507 } 508 509 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 510 offset = 0; 511 idx++; 512 } 513 } 514 515 if (rb->clear_bmap) { 516 /* 517 * Postpone the dirty bitmap clear to the point before we 518 * really send the pages, also we will split the clear 519 * dirty procedure into smaller chunks. 520 */ 521 clear_bmap_set(rb, start >> TARGET_PAGE_BITS, 522 length >> TARGET_PAGE_BITS); 523 } else { 524 /* Slow path - still do that in a huge chunk */ 525 memory_region_clear_dirty_bitmap(rb->mr, start, length); 526 } 527 } else { 528 ram_addr_t offset = rb->offset; 529 530 for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { 531 if (cpu_physical_memory_test_and_clear_dirty( 532 start + addr + offset, 533 TARGET_PAGE_SIZE, 534 DIRTY_MEMORY_MIGRATION)) { 535 *real_dirty_pages += 1; 536 long k = (start + addr) >> TARGET_PAGE_BITS; 537 if (!test_and_set_bit(k, dest)) { 538 num_dirty++; 539 } 540 } 541 } 542 } 543 544 return num_dirty; 545 } 546 #endif 547 #endif 548