1 /* 2 * Declarations for cpu physical memory functions 3 * 4 * Copyright 2011 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Avi Kivity <avi@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * This header is for use by exec.c and memory.c ONLY. Do not include it. 16 * The functions declared here will be removed soon. 17 */ 18 19 #ifndef RAM_ADDR_H 20 #define RAM_ADDR_H 21 22 #ifndef CONFIG_USER_ONLY 23 #include "cpu.h" 24 #include "hw/xen/xen.h" 25 #include "sysemu/tcg.h" 26 #include "exec/ramlist.h" 27 28 struct RAMBlock { 29 struct rcu_head rcu; 30 struct MemoryRegion *mr; 31 uint8_t *host; 32 uint8_t *colo_cache; /* For colo, VM's ram cache */ 33 ram_addr_t offset; 34 ram_addr_t used_length; 35 ram_addr_t max_length; 36 void (*resized)(const char*, uint64_t length, void *host); 37 uint32_t flags; 38 /* Protected by iothread lock. */ 39 char idstr[256]; 40 /* RCU-enabled, writes protected by the ramlist lock */ 41 QLIST_ENTRY(RAMBlock) next; 42 QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; 43 int fd; 44 size_t page_size; 45 /* dirty bitmap used during migration */ 46 unsigned long *bmap; 47 /* bitmap of already received pages in postcopy */ 48 unsigned long *receivedmap; 49 50 /* 51 * bitmap to track already cleared dirty bitmap. When the bit is 52 * set, it means the corresponding memory chunk needs a log-clear. 53 * Set this up to non-NULL to enable the capability to postpone 54 * and split clearing of dirty bitmap on the remote node (e.g., 55 * KVM). The bitmap will be set only when doing global sync. 56 * 57 * NOTE: this bitmap is different comparing to the other bitmaps 58 * in that one bit can represent multiple guest pages (which is 59 * decided by the `clear_bmap_shift' variable below). On 60 * destination side, this should always be NULL, and the variable 61 * `clear_bmap_shift' is meaningless. 62 */ 63 unsigned long *clear_bmap; 64 uint8_t clear_bmap_shift; 65 }; 66 67 /** 68 * clear_bmap_size: calculate clear bitmap size 69 * 70 * @pages: number of guest pages 71 * @shift: guest page number shift 72 * 73 * Returns: number of bits for the clear bitmap 74 */ 75 static inline long clear_bmap_size(uint64_t pages, uint8_t shift) 76 { 77 return DIV_ROUND_UP(pages, 1UL << shift); 78 } 79 80 /** 81 * clear_bmap_set: set clear bitmap for the page range 82 * 83 * @rb: the ramblock to operate on 84 * @start: the start page number 85 * @size: number of pages to set in the bitmap 86 * 87 * Returns: None 88 */ 89 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, 90 uint64_t npages) 91 { 92 uint8_t shift = rb->clear_bmap_shift; 93 94 bitmap_set_atomic(rb->clear_bmap, start >> shift, 95 clear_bmap_size(npages, shift)); 96 } 97 98 /** 99 * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set 100 * 101 * @rb: the ramblock to operate on 102 * @page: the page number to check 103 * 104 * Returns: true if the bit was set, false otherwise 105 */ 106 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) 107 { 108 uint8_t shift = rb->clear_bmap_shift; 109 110 return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1); 111 } 112 113 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) 114 { 115 return (b && b->host && offset < b->used_length) ? true : false; 116 } 117 118 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) 119 { 120 assert(offset_in_ramblock(block, offset)); 121 return (char *)block->host + offset; 122 } 123 124 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, 125 RAMBlock *rb) 126 { 127 uint64_t host_addr_offset = 128 (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); 129 return host_addr_offset >> TARGET_PAGE_BITS; 130 } 131 132 bool ramblock_is_pmem(RAMBlock *rb); 133 134 long qemu_minrampagesize(void); 135 long qemu_maxrampagesize(void); 136 137 /** 138 * qemu_ram_alloc_from_file, 139 * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing 140 * file or device 141 * 142 * Parameters: 143 * @size: the size in bytes of the ram block 144 * @mr: the memory region where the ram block is 145 * @ram_flags: specify the properties of the ram block, which can be one 146 * or bit-or of following values 147 * - RAM_SHARED: mmap the backing file or device with MAP_SHARED 148 * - RAM_PMEM: the backend @mem_path or @fd is persistent memory 149 * Other bits are ignored. 150 * @mem_path or @fd: specify the backing file or device 151 * @errp: pointer to Error*, to store an error if it happens 152 * 153 * Return: 154 * On success, return a pointer to the ram block. 155 * On failure, return NULL. 156 */ 157 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, 158 uint32_t ram_flags, const char *mem_path, 159 Error **errp); 160 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, 161 uint32_t ram_flags, int fd, 162 Error **errp); 163 164 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 165 MemoryRegion *mr, Error **errp); 166 RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr, 167 Error **errp); 168 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, 169 void (*resized)(const char*, 170 uint64_t length, 171 void *host), 172 MemoryRegion *mr, Error **errp); 173 void qemu_ram_free(RAMBlock *block); 174 175 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); 176 177 #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) 178 #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) 179 180 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end); 181 182 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, 183 ram_addr_t length, 184 unsigned client) 185 { 186 DirtyMemoryBlocks *blocks; 187 unsigned long end, page; 188 unsigned long idx, offset, base; 189 bool dirty = false; 190 191 assert(client < DIRTY_MEMORY_NUM); 192 193 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 194 page = start >> TARGET_PAGE_BITS; 195 196 rcu_read_lock(); 197 198 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 199 200 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 201 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 202 base = page - offset; 203 while (page < end) { 204 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 205 unsigned long num = next - base; 206 unsigned long found = find_next_bit(blocks->blocks[idx], num, offset); 207 if (found < num) { 208 dirty = true; 209 break; 210 } 211 212 page = next; 213 idx++; 214 offset = 0; 215 base += DIRTY_MEMORY_BLOCK_SIZE; 216 } 217 218 rcu_read_unlock(); 219 220 return dirty; 221 } 222 223 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, 224 ram_addr_t length, 225 unsigned client) 226 { 227 DirtyMemoryBlocks *blocks; 228 unsigned long end, page; 229 unsigned long idx, offset, base; 230 bool dirty = true; 231 232 assert(client < DIRTY_MEMORY_NUM); 233 234 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 235 page = start >> TARGET_PAGE_BITS; 236 237 rcu_read_lock(); 238 239 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 240 241 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 242 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 243 base = page - offset; 244 while (page < end) { 245 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 246 unsigned long num = next - base; 247 unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset); 248 if (found < num) { 249 dirty = false; 250 break; 251 } 252 253 page = next; 254 idx++; 255 offset = 0; 256 base += DIRTY_MEMORY_BLOCK_SIZE; 257 } 258 259 rcu_read_unlock(); 260 261 return dirty; 262 } 263 264 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, 265 unsigned client) 266 { 267 return cpu_physical_memory_get_dirty(addr, 1, client); 268 } 269 270 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) 271 { 272 bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); 273 bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); 274 bool migration = 275 cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); 276 return !(vga && code && migration); 277 } 278 279 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, 280 ram_addr_t length, 281 uint8_t mask) 282 { 283 uint8_t ret = 0; 284 285 if (mask & (1 << DIRTY_MEMORY_VGA) && 286 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { 287 ret |= (1 << DIRTY_MEMORY_VGA); 288 } 289 if (mask & (1 << DIRTY_MEMORY_CODE) && 290 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { 291 ret |= (1 << DIRTY_MEMORY_CODE); 292 } 293 if (mask & (1 << DIRTY_MEMORY_MIGRATION) && 294 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { 295 ret |= (1 << DIRTY_MEMORY_MIGRATION); 296 } 297 return ret; 298 } 299 300 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, 301 unsigned client) 302 { 303 unsigned long page, idx, offset; 304 DirtyMemoryBlocks *blocks; 305 306 assert(client < DIRTY_MEMORY_NUM); 307 308 page = addr >> TARGET_PAGE_BITS; 309 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 310 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 311 312 rcu_read_lock(); 313 314 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); 315 316 set_bit_atomic(offset, blocks->blocks[idx]); 317 318 rcu_read_unlock(); 319 } 320 321 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, 322 ram_addr_t length, 323 uint8_t mask) 324 { 325 DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; 326 unsigned long end, page; 327 unsigned long idx, offset, base; 328 int i; 329 330 if (!mask && !xen_enabled()) { 331 return; 332 } 333 334 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 335 page = start >> TARGET_PAGE_BITS; 336 337 rcu_read_lock(); 338 339 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 340 blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]); 341 } 342 343 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 344 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 345 base = page - offset; 346 while (page < end) { 347 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 348 349 if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { 350 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], 351 offset, next - page); 352 } 353 if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { 354 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], 355 offset, next - page); 356 } 357 if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { 358 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], 359 offset, next - page); 360 } 361 362 page = next; 363 idx++; 364 offset = 0; 365 base += DIRTY_MEMORY_BLOCK_SIZE; 366 } 367 368 rcu_read_unlock(); 369 370 xen_hvm_modified_memory(start, length); 371 } 372 373 #if !defined(_WIN32) 374 static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, 375 ram_addr_t start, 376 ram_addr_t pages) 377 { 378 unsigned long i, j; 379 unsigned long page_number, c; 380 hwaddr addr; 381 ram_addr_t ram_addr; 382 unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; 383 unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE; 384 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 385 386 /* start address is aligned at the start of a word? */ 387 if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && 388 (hpratio == 1)) { 389 unsigned long **blocks[DIRTY_MEMORY_NUM]; 390 unsigned long idx; 391 unsigned long offset; 392 long k; 393 long nr = BITS_TO_LONGS(pages); 394 395 idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; 396 offset = BIT_WORD((start >> TARGET_PAGE_BITS) % 397 DIRTY_MEMORY_BLOCK_SIZE); 398 399 rcu_read_lock(); 400 401 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 402 blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks; 403 } 404 405 for (k = 0; k < nr; k++) { 406 if (bitmap[k]) { 407 unsigned long temp = leul_to_cpu(bitmap[k]); 408 409 atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); 410 411 if (global_dirty_log) { 412 atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], 413 temp); 414 } 415 416 if (tcg_enabled()) { 417 atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp); 418 } 419 } 420 421 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 422 offset = 0; 423 idx++; 424 } 425 } 426 427 rcu_read_unlock(); 428 429 xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); 430 } else { 431 uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; 432 433 if (!global_dirty_log) { 434 clients &= ~(1 << DIRTY_MEMORY_MIGRATION); 435 } 436 437 /* 438 * bitmap-traveling is faster than memory-traveling (for addr...) 439 * especially when most of the memory is not dirty. 440 */ 441 for (i = 0; i < len; i++) { 442 if (bitmap[i] != 0) { 443 c = leul_to_cpu(bitmap[i]); 444 do { 445 j = ctzl(c); 446 c &= ~(1ul << j); 447 page_number = (i * HOST_LONG_BITS + j) * hpratio; 448 addr = page_number * TARGET_PAGE_SIZE; 449 ram_addr = start + addr; 450 cpu_physical_memory_set_dirty_range(ram_addr, 451 TARGET_PAGE_SIZE * hpratio, clients); 452 } while (c != 0); 453 } 454 } 455 } 456 } 457 #endif /* not _WIN32 */ 458 459 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, 460 ram_addr_t length, 461 unsigned client); 462 463 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty 464 (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); 465 466 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, 467 ram_addr_t start, 468 ram_addr_t length); 469 470 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, 471 ram_addr_t length) 472 { 473 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); 474 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); 475 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); 476 } 477 478 479 /* Called with RCU critical section */ 480 static inline 481 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, 482 ram_addr_t start, 483 ram_addr_t length, 484 uint64_t *real_dirty_pages) 485 { 486 ram_addr_t addr; 487 unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); 488 uint64_t num_dirty = 0; 489 unsigned long *dest = rb->bmap; 490 491 /* start address and length is aligned at the start of a word? */ 492 if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == 493 (start + rb->offset) && 494 !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { 495 int k; 496 int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); 497 unsigned long * const *src; 498 unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; 499 unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % 500 DIRTY_MEMORY_BLOCK_SIZE); 501 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 502 503 src = atomic_rcu_read( 504 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; 505 506 for (k = page; k < page + nr; k++) { 507 if (src[idx][offset]) { 508 unsigned long bits = atomic_xchg(&src[idx][offset], 0); 509 unsigned long new_dirty; 510 *real_dirty_pages += ctpopl(bits); 511 new_dirty = ~dest[k]; 512 dest[k] |= bits; 513 new_dirty &= bits; 514 num_dirty += ctpopl(new_dirty); 515 } 516 517 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 518 offset = 0; 519 idx++; 520 } 521 } 522 523 if (rb->clear_bmap) { 524 /* 525 * Postpone the dirty bitmap clear to the point before we 526 * really send the pages, also we will split the clear 527 * dirty procedure into smaller chunks. 528 */ 529 clear_bmap_set(rb, start >> TARGET_PAGE_BITS, 530 length >> TARGET_PAGE_BITS); 531 } else { 532 /* Slow path - still do that in a huge chunk */ 533 memory_region_clear_dirty_bitmap(rb->mr, start, length); 534 } 535 } else { 536 ram_addr_t offset = rb->offset; 537 538 for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { 539 if (cpu_physical_memory_test_and_clear_dirty( 540 start + addr + offset, 541 TARGET_PAGE_SIZE, 542 DIRTY_MEMORY_MIGRATION)) { 543 *real_dirty_pages += 1; 544 long k = (start + addr) >> TARGET_PAGE_BITS; 545 if (!test_and_set_bit(k, dest)) { 546 num_dirty++; 547 } 548 } 549 } 550 } 551 552 return num_dirty; 553 } 554 #endif 555 #endif 556