1 /* 2 * Declarations for cpu physical memory functions 3 * 4 * Copyright 2011 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Avi Kivity <avi@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * This header is for use by exec.c and memory.c ONLY. Do not include it. 16 * The functions declared here will be removed soon. 17 */ 18 19 #ifndef RAM_ADDR_H 20 #define RAM_ADDR_H 21 22 #ifndef CONFIG_USER_ONLY 23 #include "cpu.h" 24 #include "system/xen.h" 25 #include "system/tcg.h" 26 #include "exec/ramlist.h" 27 #include "exec/ramblock.h" 28 #include "exec/exec-all.h" 29 #include "qemu/rcu.h" 30 31 #include "exec/hwaddr.h" 32 #include "exec/cpu-common.h" 33 34 extern uint64_t total_dirty_pages; 35 36 /** 37 * clear_bmap_size: calculate clear bitmap size 38 * 39 * @pages: number of guest pages 40 * @shift: guest page number shift 41 * 42 * Returns: number of bits for the clear bitmap 43 */ 44 static inline long clear_bmap_size(uint64_t pages, uint8_t shift) 45 { 46 return DIV_ROUND_UP(pages, 1UL << shift); 47 } 48 49 /** 50 * clear_bmap_set: set clear bitmap for the page range. Must be with 51 * bitmap_mutex held. 52 * 53 * @rb: the ramblock to operate on 54 * @start: the start page number 55 * @size: number of pages to set in the bitmap 56 * 57 * Returns: None 58 */ 59 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, 60 uint64_t npages) 61 { 62 uint8_t shift = rb->clear_bmap_shift; 63 64 bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift)); 65 } 66 67 /** 68 * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set. 69 * Must be with bitmap_mutex held. 70 * 71 * @rb: the ramblock to operate on 72 * @page: the page number to check 73 * 74 * Returns: true if the bit was set, false otherwise 75 */ 76 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) 77 { 78 uint8_t shift = rb->clear_bmap_shift; 79 80 return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1); 81 } 82 83 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) 84 { 85 return (b && b->host && offset < b->used_length) ? true : false; 86 } 87 88 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) 89 { 90 assert(offset_in_ramblock(block, offset)); 91 return (char *)block->host + offset; 92 } 93 94 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, 95 RAMBlock *rb) 96 { 97 uint64_t host_addr_offset = 98 (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); 99 return host_addr_offset >> TARGET_PAGE_BITS; 100 } 101 102 bool ramblock_is_pmem(RAMBlock *rb); 103 104 long qemu_minrampagesize(void); 105 long qemu_maxrampagesize(void); 106 107 /** 108 * qemu_ram_alloc_from_file, 109 * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing 110 * file or device 111 * 112 * Parameters: 113 * @size: the size in bytes of the ram block 114 * @max_size: the maximum size of the block after resizing 115 * @mr: the memory region where the ram block is 116 * @resized: callback after calls to qemu_ram_resize 117 * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, 118 * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, 119 * RAM_READONLY_FD, RAM_GUEST_MEMFD 120 * @mem_path or @fd: specify the backing file or device 121 * @offset: Offset into target file 122 * @grow: extend file if necessary (but an empty file is always extended). 123 * @errp: pointer to Error*, to store an error if it happens 124 * 125 * Return: 126 * On success, return a pointer to the ram block. 127 * On failure, return NULL. 128 */ 129 typedef void (*qemu_ram_resize_cb)(const char *, uint64_t length, void *host); 130 131 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, 132 uint32_t ram_flags, const char *mem_path, 133 off_t offset, Error **errp); 134 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, ram_addr_t max_size, 135 qemu_ram_resize_cb resized, MemoryRegion *mr, 136 uint32_t ram_flags, int fd, off_t offset, 137 bool grow, 138 Error **errp); 139 140 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 141 MemoryRegion *mr, Error **errp); 142 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr, 143 Error **errp); 144 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, 145 qemu_ram_resize_cb resized, 146 MemoryRegion *mr, Error **errp); 147 void qemu_ram_free(RAMBlock *block); 148 149 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); 150 151 void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length); 152 153 /* Clear whole block of mem */ 154 static inline void qemu_ram_block_writeback(RAMBlock *block) 155 { 156 qemu_ram_msync(block, 0, block->used_length); 157 } 158 159 #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) 160 #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) 161 162 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, 163 ram_addr_t length, 164 unsigned client) 165 { 166 DirtyMemoryBlocks *blocks; 167 unsigned long end, page; 168 unsigned long idx, offset, base; 169 bool dirty = false; 170 171 assert(client < DIRTY_MEMORY_NUM); 172 173 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 174 page = start >> TARGET_PAGE_BITS; 175 176 WITH_RCU_READ_LOCK_GUARD() { 177 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 178 179 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 180 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 181 base = page - offset; 182 while (page < end) { 183 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 184 unsigned long num = next - base; 185 unsigned long found = find_next_bit(blocks->blocks[idx], 186 num, offset); 187 if (found < num) { 188 dirty = true; 189 break; 190 } 191 192 page = next; 193 idx++; 194 offset = 0; 195 base += DIRTY_MEMORY_BLOCK_SIZE; 196 } 197 } 198 199 return dirty; 200 } 201 202 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, 203 ram_addr_t length, 204 unsigned client) 205 { 206 DirtyMemoryBlocks *blocks; 207 unsigned long end, page; 208 unsigned long idx, offset, base; 209 bool dirty = true; 210 211 assert(client < DIRTY_MEMORY_NUM); 212 213 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 214 page = start >> TARGET_PAGE_BITS; 215 216 RCU_READ_LOCK_GUARD(); 217 218 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 219 220 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 221 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 222 base = page - offset; 223 while (page < end) { 224 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 225 unsigned long num = next - base; 226 unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset); 227 if (found < num) { 228 dirty = false; 229 break; 230 } 231 232 page = next; 233 idx++; 234 offset = 0; 235 base += DIRTY_MEMORY_BLOCK_SIZE; 236 } 237 238 return dirty; 239 } 240 241 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, 242 unsigned client) 243 { 244 return cpu_physical_memory_get_dirty(addr, 1, client); 245 } 246 247 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) 248 { 249 bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); 250 bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); 251 bool migration = 252 cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); 253 return !(vga && code && migration); 254 } 255 256 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, 257 ram_addr_t length, 258 uint8_t mask) 259 { 260 uint8_t ret = 0; 261 262 if (mask & (1 << DIRTY_MEMORY_VGA) && 263 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { 264 ret |= (1 << DIRTY_MEMORY_VGA); 265 } 266 if (mask & (1 << DIRTY_MEMORY_CODE) && 267 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { 268 ret |= (1 << DIRTY_MEMORY_CODE); 269 } 270 if (mask & (1 << DIRTY_MEMORY_MIGRATION) && 271 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { 272 ret |= (1 << DIRTY_MEMORY_MIGRATION); 273 } 274 return ret; 275 } 276 277 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, 278 unsigned client) 279 { 280 unsigned long page, idx, offset; 281 DirtyMemoryBlocks *blocks; 282 283 assert(client < DIRTY_MEMORY_NUM); 284 285 page = addr >> TARGET_PAGE_BITS; 286 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 287 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 288 289 RCU_READ_LOCK_GUARD(); 290 291 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 292 293 set_bit_atomic(offset, blocks->blocks[idx]); 294 } 295 296 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, 297 ram_addr_t length, 298 uint8_t mask) 299 { 300 DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; 301 unsigned long end, page; 302 unsigned long idx, offset, base; 303 int i; 304 305 if (!mask && !xen_enabled()) { 306 return; 307 } 308 309 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 310 page = start >> TARGET_PAGE_BITS; 311 312 WITH_RCU_READ_LOCK_GUARD() { 313 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 314 blocks[i] = qatomic_rcu_read(&ram_list.dirty_memory[i]); 315 } 316 317 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 318 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 319 base = page - offset; 320 while (page < end) { 321 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 322 323 if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { 324 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], 325 offset, next - page); 326 } 327 if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { 328 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], 329 offset, next - page); 330 } 331 if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { 332 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], 333 offset, next - page); 334 } 335 336 page = next; 337 idx++; 338 offset = 0; 339 base += DIRTY_MEMORY_BLOCK_SIZE; 340 } 341 } 342 343 xen_hvm_modified_memory(start, length); 344 } 345 346 #if !defined(_WIN32) 347 348 /* 349 * Contrary to cpu_physical_memory_sync_dirty_bitmap() this function returns 350 * the number of dirty pages in @bitmap passed as argument. On the other hand, 351 * cpu_physical_memory_sync_dirty_bitmap() returns newly dirtied pages that 352 * weren't set in the global migration bitmap. 353 */ 354 static inline 355 uint64_t cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, 356 ram_addr_t start, 357 ram_addr_t pages) 358 { 359 unsigned long i, j; 360 unsigned long page_number, c, nbits; 361 hwaddr addr; 362 ram_addr_t ram_addr; 363 uint64_t num_dirty = 0; 364 unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; 365 unsigned long hpratio = qemu_real_host_page_size() / TARGET_PAGE_SIZE; 366 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 367 368 /* start address is aligned at the start of a word? */ 369 if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && 370 (hpratio == 1)) { 371 unsigned long **blocks[DIRTY_MEMORY_NUM]; 372 unsigned long idx; 373 unsigned long offset; 374 long k; 375 long nr = BITS_TO_LONGS(pages); 376 377 idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; 378 offset = BIT_WORD((start >> TARGET_PAGE_BITS) % 379 DIRTY_MEMORY_BLOCK_SIZE); 380 381 WITH_RCU_READ_LOCK_GUARD() { 382 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 383 blocks[i] = 384 qatomic_rcu_read(&ram_list.dirty_memory[i])->blocks; 385 } 386 387 for (k = 0; k < nr; k++) { 388 if (bitmap[k]) { 389 unsigned long temp = leul_to_cpu(bitmap[k]); 390 391 nbits = ctpopl(temp); 392 qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); 393 394 if (global_dirty_tracking) { 395 qatomic_or( 396 &blocks[DIRTY_MEMORY_MIGRATION][idx][offset], 397 temp); 398 if (unlikely( 399 global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { 400 total_dirty_pages += nbits; 401 } 402 } 403 404 num_dirty += nbits; 405 406 if (tcg_enabled()) { 407 qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], 408 temp); 409 } 410 } 411 412 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 413 offset = 0; 414 idx++; 415 } 416 } 417 } 418 419 xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); 420 } else { 421 uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; 422 423 if (!global_dirty_tracking) { 424 clients &= ~(1 << DIRTY_MEMORY_MIGRATION); 425 } 426 427 /* 428 * bitmap-traveling is faster than memory-traveling (for addr...) 429 * especially when most of the memory is not dirty. 430 */ 431 for (i = 0; i < len; i++) { 432 if (bitmap[i] != 0) { 433 c = leul_to_cpu(bitmap[i]); 434 nbits = ctpopl(c); 435 if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { 436 total_dirty_pages += nbits; 437 } 438 num_dirty += nbits; 439 do { 440 j = ctzl(c); 441 c &= ~(1ul << j); 442 page_number = (i * HOST_LONG_BITS + j) * hpratio; 443 addr = page_number * TARGET_PAGE_SIZE; 444 ram_addr = start + addr; 445 cpu_physical_memory_set_dirty_range(ram_addr, 446 TARGET_PAGE_SIZE * hpratio, clients); 447 } while (c != 0); 448 } 449 } 450 } 451 452 return num_dirty; 453 } 454 #endif /* not _WIN32 */ 455 456 static inline void cpu_physical_memory_dirty_bits_cleared(ram_addr_t start, 457 ram_addr_t length) 458 { 459 if (tcg_enabled()) { 460 tlb_reset_dirty_range_all(start, length); 461 } 462 463 } 464 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, 465 ram_addr_t length, 466 unsigned client); 467 468 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty 469 (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); 470 471 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, 472 ram_addr_t start, 473 ram_addr_t length); 474 475 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, 476 ram_addr_t length) 477 { 478 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); 479 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); 480 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); 481 } 482 483 484 /* Called with RCU critical section */ 485 static inline 486 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, 487 ram_addr_t start, 488 ram_addr_t length) 489 { 490 ram_addr_t addr; 491 unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); 492 uint64_t num_dirty = 0; 493 unsigned long *dest = rb->bmap; 494 495 /* start address and length is aligned at the start of a word? */ 496 if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == 497 (start + rb->offset) && 498 !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { 499 int k; 500 int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); 501 unsigned long * const *src; 502 unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; 503 unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % 504 DIRTY_MEMORY_BLOCK_SIZE); 505 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 506 507 src = qatomic_rcu_read( 508 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; 509 510 for (k = page; k < page + nr; k++) { 511 if (src[idx][offset]) { 512 unsigned long bits = qatomic_xchg(&src[idx][offset], 0); 513 unsigned long new_dirty; 514 new_dirty = ~dest[k]; 515 dest[k] |= bits; 516 new_dirty &= bits; 517 num_dirty += ctpopl(new_dirty); 518 } 519 520 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 521 offset = 0; 522 idx++; 523 } 524 } 525 if (num_dirty) { 526 cpu_physical_memory_dirty_bits_cleared(start, length); 527 } 528 529 if (rb->clear_bmap) { 530 /* 531 * Postpone the dirty bitmap clear to the point before we 532 * really send the pages, also we will split the clear 533 * dirty procedure into smaller chunks. 534 */ 535 clear_bmap_set(rb, start >> TARGET_PAGE_BITS, 536 length >> TARGET_PAGE_BITS); 537 } else { 538 /* Slow path - still do that in a huge chunk */ 539 memory_region_clear_dirty_bitmap(rb->mr, start, length); 540 } 541 } else { 542 ram_addr_t offset = rb->offset; 543 544 for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { 545 if (cpu_physical_memory_test_and_clear_dirty( 546 start + addr + offset, 547 TARGET_PAGE_SIZE, 548 DIRTY_MEMORY_MIGRATION)) { 549 long k = (start + addr) >> TARGET_PAGE_BITS; 550 if (!test_and_set_bit(k, dest)) { 551 num_dirty++; 552 } 553 } 554 } 555 } 556 557 return num_dirty; 558 } 559 #endif 560 #endif 561