1 /* 2 * Declarations for cpu physical memory functions 3 * 4 * Copyright 2011 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Avi Kivity <avi@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * This header is for use by exec.c and memory.c ONLY. Do not include it. 16 * The functions declared here will be removed soon. 17 */ 18 19 #ifndef RAM_ADDR_H 20 #define RAM_ADDR_H 21 22 #ifndef CONFIG_USER_ONLY 23 #include "cpu.h" 24 #include "sysemu/xen.h" 25 #include "sysemu/tcg.h" 26 #include "exec/ramlist.h" 27 #include "exec/ramblock.h" 28 #include "exec/exec-all.h" 29 #include "qemu/rcu.h" 30 31 extern uint64_t total_dirty_pages; 32 33 /** 34 * clear_bmap_size: calculate clear bitmap size 35 * 36 * @pages: number of guest pages 37 * @shift: guest page number shift 38 * 39 * Returns: number of bits for the clear bitmap 40 */ 41 static inline long clear_bmap_size(uint64_t pages, uint8_t shift) 42 { 43 return DIV_ROUND_UP(pages, 1UL << shift); 44 } 45 46 /** 47 * clear_bmap_set: set clear bitmap for the page range. Must be with 48 * bitmap_mutex held. 49 * 50 * @rb: the ramblock to operate on 51 * @start: the start page number 52 * @size: number of pages to set in the bitmap 53 * 54 * Returns: None 55 */ 56 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, 57 uint64_t npages) 58 { 59 uint8_t shift = rb->clear_bmap_shift; 60 61 bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift)); 62 } 63 64 /** 65 * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set. 66 * Must be with bitmap_mutex held. 67 * 68 * @rb: the ramblock to operate on 69 * @page: the page number to check 70 * 71 * Returns: true if the bit was set, false otherwise 72 */ 73 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) 74 { 75 uint8_t shift = rb->clear_bmap_shift; 76 77 return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1); 78 } 79 80 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) 81 { 82 return (b && b->host && offset < b->used_length) ? true : false; 83 } 84 85 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) 86 { 87 assert(offset_in_ramblock(block, offset)); 88 return (char *)block->host + offset; 89 } 90 91 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, 92 RAMBlock *rb) 93 { 94 uint64_t host_addr_offset = 95 (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); 96 return host_addr_offset >> TARGET_PAGE_BITS; 97 } 98 99 bool ramblock_is_pmem(RAMBlock *rb); 100 101 long qemu_minrampagesize(void); 102 long qemu_maxrampagesize(void); 103 104 /** 105 * qemu_ram_alloc_from_file, 106 * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing 107 * file or device 108 * 109 * Parameters: 110 * @size: the size in bytes of the ram block 111 * @mr: the memory region where the ram block is 112 * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, 113 * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, 114 * RAM_READONLY_FD, RAM_GUEST_MEMFD 115 * @mem_path or @fd: specify the backing file or device 116 * @offset: Offset into target file 117 * @errp: pointer to Error*, to store an error if it happens 118 * 119 * Return: 120 * On success, return a pointer to the ram block. 121 * On failure, return NULL. 122 */ 123 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, 124 uint32_t ram_flags, const char *mem_path, 125 off_t offset, Error **errp); 126 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, 127 uint32_t ram_flags, int fd, off_t offset, 128 Error **errp); 129 130 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 131 MemoryRegion *mr, Error **errp); 132 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr, 133 Error **errp); 134 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, 135 void (*resized)(const char*, 136 uint64_t length, 137 void *host), 138 MemoryRegion *mr, Error **errp); 139 void qemu_ram_free(RAMBlock *block); 140 141 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); 142 143 void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length); 144 145 /* Clear whole block of mem */ 146 static inline void qemu_ram_block_writeback(RAMBlock *block) 147 { 148 qemu_ram_msync(block, 0, block->used_length); 149 } 150 151 #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) 152 #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) 153 154 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, 155 ram_addr_t length, 156 unsigned client) 157 { 158 DirtyMemoryBlocks *blocks; 159 unsigned long end, page; 160 unsigned long idx, offset, base; 161 bool dirty = false; 162 163 assert(client < DIRTY_MEMORY_NUM); 164 165 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 166 page = start >> TARGET_PAGE_BITS; 167 168 WITH_RCU_READ_LOCK_GUARD() { 169 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 170 171 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 172 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 173 base = page - offset; 174 while (page < end) { 175 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 176 unsigned long num = next - base; 177 unsigned long found = find_next_bit(blocks->blocks[idx], 178 num, offset); 179 if (found < num) { 180 dirty = true; 181 break; 182 } 183 184 page = next; 185 idx++; 186 offset = 0; 187 base += DIRTY_MEMORY_BLOCK_SIZE; 188 } 189 } 190 191 return dirty; 192 } 193 194 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, 195 ram_addr_t length, 196 unsigned client) 197 { 198 DirtyMemoryBlocks *blocks; 199 unsigned long end, page; 200 unsigned long idx, offset, base; 201 bool dirty = true; 202 203 assert(client < DIRTY_MEMORY_NUM); 204 205 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 206 page = start >> TARGET_PAGE_BITS; 207 208 RCU_READ_LOCK_GUARD(); 209 210 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 211 212 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 213 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 214 base = page - offset; 215 while (page < end) { 216 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 217 unsigned long num = next - base; 218 unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset); 219 if (found < num) { 220 dirty = false; 221 break; 222 } 223 224 page = next; 225 idx++; 226 offset = 0; 227 base += DIRTY_MEMORY_BLOCK_SIZE; 228 } 229 230 return dirty; 231 } 232 233 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, 234 unsigned client) 235 { 236 return cpu_physical_memory_get_dirty(addr, 1, client); 237 } 238 239 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) 240 { 241 bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); 242 bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); 243 bool migration = 244 cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); 245 return !(vga && code && migration); 246 } 247 248 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, 249 ram_addr_t length, 250 uint8_t mask) 251 { 252 uint8_t ret = 0; 253 254 if (mask & (1 << DIRTY_MEMORY_VGA) && 255 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { 256 ret |= (1 << DIRTY_MEMORY_VGA); 257 } 258 if (mask & (1 << DIRTY_MEMORY_CODE) && 259 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { 260 ret |= (1 << DIRTY_MEMORY_CODE); 261 } 262 if (mask & (1 << DIRTY_MEMORY_MIGRATION) && 263 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { 264 ret |= (1 << DIRTY_MEMORY_MIGRATION); 265 } 266 return ret; 267 } 268 269 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, 270 unsigned client) 271 { 272 unsigned long page, idx, offset; 273 DirtyMemoryBlocks *blocks; 274 275 assert(client < DIRTY_MEMORY_NUM); 276 277 page = addr >> TARGET_PAGE_BITS; 278 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 279 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 280 281 RCU_READ_LOCK_GUARD(); 282 283 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 284 285 set_bit_atomic(offset, blocks->blocks[idx]); 286 } 287 288 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, 289 ram_addr_t length, 290 uint8_t mask) 291 { 292 DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; 293 unsigned long end, page; 294 unsigned long idx, offset, base; 295 int i; 296 297 if (!mask && !xen_enabled()) { 298 return; 299 } 300 301 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 302 page = start >> TARGET_PAGE_BITS; 303 304 WITH_RCU_READ_LOCK_GUARD() { 305 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 306 blocks[i] = qatomic_rcu_read(&ram_list.dirty_memory[i]); 307 } 308 309 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 310 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 311 base = page - offset; 312 while (page < end) { 313 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 314 315 if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { 316 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], 317 offset, next - page); 318 } 319 if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { 320 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], 321 offset, next - page); 322 } 323 if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { 324 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], 325 offset, next - page); 326 } 327 328 page = next; 329 idx++; 330 offset = 0; 331 base += DIRTY_MEMORY_BLOCK_SIZE; 332 } 333 } 334 335 xen_hvm_modified_memory(start, length); 336 } 337 338 #if !defined(_WIN32) 339 340 /* 341 * Contrary to cpu_physical_memory_sync_dirty_bitmap() this function returns 342 * the number of dirty pages in @bitmap passed as argument. On the other hand, 343 * cpu_physical_memory_sync_dirty_bitmap() returns newly dirtied pages that 344 * weren't set in the global migration bitmap. 345 */ 346 static inline 347 uint64_t cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, 348 ram_addr_t start, 349 ram_addr_t pages) 350 { 351 unsigned long i, j; 352 unsigned long page_number, c, nbits; 353 hwaddr addr; 354 ram_addr_t ram_addr; 355 uint64_t num_dirty = 0; 356 unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; 357 unsigned long hpratio = qemu_real_host_page_size() / TARGET_PAGE_SIZE; 358 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 359 360 /* start address is aligned at the start of a word? */ 361 if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && 362 (hpratio == 1)) { 363 unsigned long **blocks[DIRTY_MEMORY_NUM]; 364 unsigned long idx; 365 unsigned long offset; 366 long k; 367 long nr = BITS_TO_LONGS(pages); 368 369 idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; 370 offset = BIT_WORD((start >> TARGET_PAGE_BITS) % 371 DIRTY_MEMORY_BLOCK_SIZE); 372 373 WITH_RCU_READ_LOCK_GUARD() { 374 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 375 blocks[i] = 376 qatomic_rcu_read(&ram_list.dirty_memory[i])->blocks; 377 } 378 379 for (k = 0; k < nr; k++) { 380 if (bitmap[k]) { 381 unsigned long temp = leul_to_cpu(bitmap[k]); 382 383 nbits = ctpopl(temp); 384 qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); 385 386 if (global_dirty_tracking) { 387 qatomic_or( 388 &blocks[DIRTY_MEMORY_MIGRATION][idx][offset], 389 temp); 390 if (unlikely( 391 global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { 392 total_dirty_pages += nbits; 393 } 394 } 395 396 num_dirty += nbits; 397 398 if (tcg_enabled()) { 399 qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], 400 temp); 401 } 402 } 403 404 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 405 offset = 0; 406 idx++; 407 } 408 } 409 } 410 411 xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); 412 } else { 413 uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; 414 415 if (!global_dirty_tracking) { 416 clients &= ~(1 << DIRTY_MEMORY_MIGRATION); 417 } 418 419 /* 420 * bitmap-traveling is faster than memory-traveling (for addr...) 421 * especially when most of the memory is not dirty. 422 */ 423 for (i = 0; i < len; i++) { 424 if (bitmap[i] != 0) { 425 c = leul_to_cpu(bitmap[i]); 426 nbits = ctpopl(c); 427 if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { 428 total_dirty_pages += nbits; 429 } 430 num_dirty += nbits; 431 do { 432 j = ctzl(c); 433 c &= ~(1ul << j); 434 page_number = (i * HOST_LONG_BITS + j) * hpratio; 435 addr = page_number * TARGET_PAGE_SIZE; 436 ram_addr = start + addr; 437 cpu_physical_memory_set_dirty_range(ram_addr, 438 TARGET_PAGE_SIZE * hpratio, clients); 439 } while (c != 0); 440 } 441 } 442 } 443 444 return num_dirty; 445 } 446 #endif /* not _WIN32 */ 447 448 static inline void cpu_physical_memory_dirty_bits_cleared(ram_addr_t start, 449 ram_addr_t length) 450 { 451 if (tcg_enabled()) { 452 tlb_reset_dirty_range_all(start, length); 453 } 454 455 } 456 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, 457 ram_addr_t length, 458 unsigned client); 459 460 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty 461 (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); 462 463 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, 464 ram_addr_t start, 465 ram_addr_t length); 466 467 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, 468 ram_addr_t length) 469 { 470 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); 471 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); 472 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); 473 } 474 475 476 /* Called with RCU critical section */ 477 static inline 478 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, 479 ram_addr_t start, 480 ram_addr_t length) 481 { 482 ram_addr_t addr; 483 unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); 484 uint64_t num_dirty = 0; 485 unsigned long *dest = rb->bmap; 486 487 /* start address and length is aligned at the start of a word? */ 488 if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == 489 (start + rb->offset) && 490 !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { 491 int k; 492 int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); 493 unsigned long * const *src; 494 unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; 495 unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % 496 DIRTY_MEMORY_BLOCK_SIZE); 497 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 498 499 src = qatomic_rcu_read( 500 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; 501 502 for (k = page; k < page + nr; k++) { 503 if (src[idx][offset]) { 504 unsigned long bits = qatomic_xchg(&src[idx][offset], 0); 505 unsigned long new_dirty; 506 new_dirty = ~dest[k]; 507 dest[k] |= bits; 508 new_dirty &= bits; 509 num_dirty += ctpopl(new_dirty); 510 } 511 512 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 513 offset = 0; 514 idx++; 515 } 516 } 517 if (num_dirty) { 518 cpu_physical_memory_dirty_bits_cleared(start, length); 519 } 520 521 if (rb->clear_bmap) { 522 /* 523 * Postpone the dirty bitmap clear to the point before we 524 * really send the pages, also we will split the clear 525 * dirty procedure into smaller chunks. 526 */ 527 clear_bmap_set(rb, start >> TARGET_PAGE_BITS, 528 length >> TARGET_PAGE_BITS); 529 } else { 530 /* Slow path - still do that in a huge chunk */ 531 memory_region_clear_dirty_bitmap(rb->mr, start, length); 532 } 533 } else { 534 ram_addr_t offset = rb->offset; 535 536 for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { 537 if (cpu_physical_memory_test_and_clear_dirty( 538 start + addr + offset, 539 TARGET_PAGE_SIZE, 540 DIRTY_MEMORY_MIGRATION)) { 541 long k = (start + addr) >> TARGET_PAGE_BITS; 542 if (!test_and_set_bit(k, dest)) { 543 num_dirty++; 544 } 545 } 546 } 547 } 548 549 return num_dirty; 550 } 551 #endif 552 #endif 553