1 /* 2 * Declarations for cpu physical memory functions 3 * 4 * Copyright 2011 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Avi Kivity <avi@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * This header is for use by exec.c and memory.c ONLY. Do not include it. 16 * The functions declared here will be removed soon. 17 */ 18 19 #ifndef RAM_ADDR_H 20 #define RAM_ADDR_H 21 22 #ifndef CONFIG_USER_ONLY 23 #include "hw/xen/xen.h" 24 #include "sysemu/tcg.h" 25 #include "exec/ramlist.h" 26 27 struct RAMBlock { 28 struct rcu_head rcu; 29 struct MemoryRegion *mr; 30 uint8_t *host; 31 uint8_t *colo_cache; /* For colo, VM's ram cache */ 32 ram_addr_t offset; 33 ram_addr_t used_length; 34 ram_addr_t max_length; 35 void (*resized)(const char*, uint64_t length, void *host); 36 uint32_t flags; 37 /* Protected by iothread lock. */ 38 char idstr[256]; 39 /* RCU-enabled, writes protected by the ramlist lock */ 40 QLIST_ENTRY(RAMBlock) next; 41 QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; 42 int fd; 43 size_t page_size; 44 /* dirty bitmap used during migration */ 45 unsigned long *bmap; 46 /* bitmap of pages that haven't been sent even once 47 * only maintained and used in postcopy at the moment 48 * where it's used to send the dirtymap at the start 49 * of the postcopy phase 50 */ 51 unsigned long *unsentmap; 52 /* bitmap of already received pages in postcopy */ 53 unsigned long *receivedmap; 54 55 /* 56 * bitmap to track already cleared dirty bitmap. When the bit is 57 * set, it means the corresponding memory chunk needs a log-clear. 58 * Set this up to non-NULL to enable the capability to postpone 59 * and split clearing of dirty bitmap on the remote node (e.g., 60 * KVM). The bitmap will be set only when doing global sync. 61 * 62 * NOTE: this bitmap is different comparing to the other bitmaps 63 * in that one bit can represent multiple guest pages (which is 64 * decided by the `clear_bmap_shift' variable below). On 65 * destination side, this should always be NULL, and the variable 66 * `clear_bmap_shift' is meaningless. 67 */ 68 unsigned long *clear_bmap; 69 uint8_t clear_bmap_shift; 70 }; 71 72 /** 73 * clear_bmap_size: calculate clear bitmap size 74 * 75 * @pages: number of guest pages 76 * @shift: guest page number shift 77 * 78 * Returns: number of bits for the clear bitmap 79 */ 80 static inline long clear_bmap_size(uint64_t pages, uint8_t shift) 81 { 82 return DIV_ROUND_UP(pages, 1UL << shift); 83 } 84 85 /** 86 * clear_bmap_set: set clear bitmap for the page range 87 * 88 * @rb: the ramblock to operate on 89 * @start: the start page number 90 * @size: number of pages to set in the bitmap 91 * 92 * Returns: None 93 */ 94 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, 95 uint64_t npages) 96 { 97 uint8_t shift = rb->clear_bmap_shift; 98 99 bitmap_set_atomic(rb->clear_bmap, start >> shift, 100 clear_bmap_size(npages, shift)); 101 } 102 103 /** 104 * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set 105 * 106 * @rb: the ramblock to operate on 107 * @page: the page number to check 108 * 109 * Returns: true if the bit was set, false otherwise 110 */ 111 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) 112 { 113 uint8_t shift = rb->clear_bmap_shift; 114 115 return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1); 116 } 117 118 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) 119 { 120 return (b && b->host && offset < b->used_length) ? true : false; 121 } 122 123 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) 124 { 125 assert(offset_in_ramblock(block, offset)); 126 return (char *)block->host + offset; 127 } 128 129 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, 130 RAMBlock *rb) 131 { 132 uint64_t host_addr_offset = 133 (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); 134 return host_addr_offset >> TARGET_PAGE_BITS; 135 } 136 137 bool ramblock_is_pmem(RAMBlock *rb); 138 139 long qemu_minrampagesize(void); 140 long qemu_maxrampagesize(void); 141 142 /** 143 * qemu_ram_alloc_from_file, 144 * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing 145 * file or device 146 * 147 * Parameters: 148 * @size: the size in bytes of the ram block 149 * @mr: the memory region where the ram block is 150 * @ram_flags: specify the properties of the ram block, which can be one 151 * or bit-or of following values 152 * - RAM_SHARED: mmap the backing file or device with MAP_SHARED 153 * - RAM_PMEM: the backend @mem_path or @fd is persistent memory 154 * Other bits are ignored. 155 * @mem_path or @fd: specify the backing file or device 156 * @errp: pointer to Error*, to store an error if it happens 157 * 158 * Return: 159 * On success, return a pointer to the ram block. 160 * On failure, return NULL. 161 */ 162 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, 163 uint32_t ram_flags, const char *mem_path, 164 Error **errp); 165 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, 166 uint32_t ram_flags, int fd, 167 Error **errp); 168 169 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 170 MemoryRegion *mr, Error **errp); 171 RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr, 172 Error **errp); 173 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, 174 void (*resized)(const char*, 175 uint64_t length, 176 void *host), 177 MemoryRegion *mr, Error **errp); 178 void qemu_ram_free(RAMBlock *block); 179 180 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); 181 182 #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) 183 #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) 184 185 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end); 186 187 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, 188 ram_addr_t length, 189 unsigned client) 190 { 191 DirtyMemoryBlocks *blocks; 192 unsigned long end, page; 193 unsigned long idx, offset, base; 194 bool dirty = false; 195 196 assert(client < DIRTY_MEMORY_NUM); 197 198 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 199 page = start >> TARGET_PAGE_BITS; 200 201 rcu_read_lock(); 202 203 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 204 205 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 206 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 207 base = page - offset; 208 while (page < end) { 209 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 210 unsigned long num = next - base; 211 unsigned long found = find_next_bit(blocks->blocks[idx], num, offset); 212 if (found < num) { 213 dirty = true; 214 break; 215 } 216 217 page = next; 218 idx++; 219 offset = 0; 220 base += DIRTY_MEMORY_BLOCK_SIZE; 221 } 222 223 rcu_read_unlock(); 224 225 return dirty; 226 } 227 228 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, 229 ram_addr_t length, 230 unsigned client) 231 { 232 DirtyMemoryBlocks *blocks; 233 unsigned long end, page; 234 unsigned long idx, offset, base; 235 bool dirty = true; 236 237 assert(client < DIRTY_MEMORY_NUM); 238 239 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 240 page = start >> TARGET_PAGE_BITS; 241 242 rcu_read_lock(); 243 244 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 245 246 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 247 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 248 base = page - offset; 249 while (page < end) { 250 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 251 unsigned long num = next - base; 252 unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset); 253 if (found < num) { 254 dirty = false; 255 break; 256 } 257 258 page = next; 259 idx++; 260 offset = 0; 261 base += DIRTY_MEMORY_BLOCK_SIZE; 262 } 263 264 rcu_read_unlock(); 265 266 return dirty; 267 } 268 269 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, 270 unsigned client) 271 { 272 return cpu_physical_memory_get_dirty(addr, 1, client); 273 } 274 275 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) 276 { 277 bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); 278 bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); 279 bool migration = 280 cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); 281 return !(vga && code && migration); 282 } 283 284 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, 285 ram_addr_t length, 286 uint8_t mask) 287 { 288 uint8_t ret = 0; 289 290 if (mask & (1 << DIRTY_MEMORY_VGA) && 291 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { 292 ret |= (1 << DIRTY_MEMORY_VGA); 293 } 294 if (mask & (1 << DIRTY_MEMORY_CODE) && 295 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { 296 ret |= (1 << DIRTY_MEMORY_CODE); 297 } 298 if (mask & (1 << DIRTY_MEMORY_MIGRATION) && 299 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { 300 ret |= (1 << DIRTY_MEMORY_MIGRATION); 301 } 302 return ret; 303 } 304 305 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, 306 unsigned client) 307 { 308 unsigned long page, idx, offset; 309 DirtyMemoryBlocks *blocks; 310 311 assert(client < DIRTY_MEMORY_NUM); 312 313 page = addr >> TARGET_PAGE_BITS; 314 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 315 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 316 317 rcu_read_lock(); 318 319 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 320 321 set_bit_atomic(offset, blocks->blocks[idx]); 322 323 rcu_read_unlock(); 324 } 325 326 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, 327 ram_addr_t length, 328 uint8_t mask) 329 { 330 DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; 331 unsigned long end, page; 332 unsigned long idx, offset, base; 333 int i; 334 335 if (!mask && !xen_enabled()) { 336 return; 337 } 338 339 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 340 page = start >> TARGET_PAGE_BITS; 341 342 rcu_read_lock(); 343 344 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 345 blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]); 346 } 347 348 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 349 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 350 base = page - offset; 351 while (page < end) { 352 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 353 354 if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { 355 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], 356 offset, next - page); 357 } 358 if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { 359 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], 360 offset, next - page); 361 } 362 if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { 363 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], 364 offset, next - page); 365 } 366 367 page = next; 368 idx++; 369 offset = 0; 370 base += DIRTY_MEMORY_BLOCK_SIZE; 371 } 372 373 rcu_read_unlock(); 374 375 xen_hvm_modified_memory(start, length); 376 } 377 378 #if !defined(_WIN32) 379 static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, 380 ram_addr_t start, 381 ram_addr_t pages) 382 { 383 unsigned long i, j; 384 unsigned long page_number, c; 385 hwaddr addr; 386 ram_addr_t ram_addr; 387 unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; 388 unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE; 389 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 390 391 /* start address is aligned at the start of a word? */ 392 if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && 393 (hpratio == 1)) { 394 unsigned long **blocks[DIRTY_MEMORY_NUM]; 395 unsigned long idx; 396 unsigned long offset; 397 long k; 398 long nr = BITS_TO_LONGS(pages); 399 400 idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; 401 offset = BIT_WORD((start >> TARGET_PAGE_BITS) % 402 DIRTY_MEMORY_BLOCK_SIZE); 403 404 rcu_read_lock(); 405 406 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 407 blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks; 408 } 409 410 for (k = 0; k < nr; k++) { 411 if (bitmap[k]) { 412 unsigned long temp = leul_to_cpu(bitmap[k]); 413 414 atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); 415 416 if (global_dirty_log) { 417 atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], 418 temp); 419 } 420 421 if (tcg_enabled()) { 422 atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp); 423 } 424 } 425 426 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 427 offset = 0; 428 idx++; 429 } 430 } 431 432 rcu_read_unlock(); 433 434 xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); 435 } else { 436 uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; 437 438 if (!global_dirty_log) { 439 clients &= ~(1 << DIRTY_MEMORY_MIGRATION); 440 } 441 442 /* 443 * bitmap-traveling is faster than memory-traveling (for addr...) 444 * especially when most of the memory is not dirty. 445 */ 446 for (i = 0; i < len; i++) { 447 if (bitmap[i] != 0) { 448 c = leul_to_cpu(bitmap[i]); 449 do { 450 j = ctzl(c); 451 c &= ~(1ul << j); 452 page_number = (i * HOST_LONG_BITS + j) * hpratio; 453 addr = page_number * TARGET_PAGE_SIZE; 454 ram_addr = start + addr; 455 cpu_physical_memory_set_dirty_range(ram_addr, 456 TARGET_PAGE_SIZE * hpratio, clients); 457 } while (c != 0); 458 } 459 } 460 } 461 } 462 #endif /* not _WIN32 */ 463 464 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, 465 ram_addr_t length, 466 unsigned client); 467 468 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty 469 (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); 470 471 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, 472 ram_addr_t start, 473 ram_addr_t length); 474 475 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, 476 ram_addr_t length) 477 { 478 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); 479 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); 480 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); 481 } 482 483 484 /* Called with RCU critical section */ 485 static inline 486 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, 487 ram_addr_t start, 488 ram_addr_t length, 489 uint64_t *real_dirty_pages) 490 { 491 ram_addr_t addr; 492 unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); 493 uint64_t num_dirty = 0; 494 unsigned long *dest = rb->bmap; 495 496 /* start address and length is aligned at the start of a word? */ 497 if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == 498 (start + rb->offset) && 499 !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { 500 int k; 501 int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); 502 unsigned long * const *src; 503 unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; 504 unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % 505 DIRTY_MEMORY_BLOCK_SIZE); 506 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 507 508 src = atomic_rcu_read( 509 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; 510 511 for (k = page; k < page + nr; k++) { 512 if (src[idx][offset]) { 513 unsigned long bits = atomic_xchg(&src[idx][offset], 0); 514 unsigned long new_dirty; 515 *real_dirty_pages += ctpopl(bits); 516 new_dirty = ~dest[k]; 517 dest[k] |= bits; 518 new_dirty &= bits; 519 num_dirty += ctpopl(new_dirty); 520 } 521 522 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 523 offset = 0; 524 idx++; 525 } 526 } 527 528 if (rb->clear_bmap) { 529 /* 530 * Postpone the dirty bitmap clear to the point before we 531 * really send the pages, also we will split the clear 532 * dirty procedure into smaller chunks. 533 */ 534 clear_bmap_set(rb, start >> TARGET_PAGE_BITS, 535 length >> TARGET_PAGE_BITS); 536 } else { 537 /* Slow path - still do that in a huge chunk */ 538 memory_region_clear_dirty_bitmap(rb->mr, start, length); 539 } 540 } else { 541 ram_addr_t offset = rb->offset; 542 543 for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { 544 if (cpu_physical_memory_test_and_clear_dirty( 545 start + addr + offset, 546 TARGET_PAGE_SIZE, 547 DIRTY_MEMORY_MIGRATION)) { 548 *real_dirty_pages += 1; 549 long k = (start + addr) >> TARGET_PAGE_BITS; 550 if (!test_and_set_bit(k, dest)) { 551 num_dirty++; 552 } 553 } 554 } 555 } 556 557 return num_dirty; 558 } 559 #endif 560 #endif 561