1 /* 2 * Declarations for cpu physical memory functions 3 * 4 * Copyright 2011 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Avi Kivity <avi@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * This header is for use by exec.c and memory.c ONLY. Do not include it. 16 * The functions declared here will be removed soon. 17 */ 18 19 #ifndef RAM_ADDR_H 20 #define RAM_ADDR_H 21 22 #ifndef CONFIG_USER_ONLY 23 #include "cpu.h" 24 #include "sysemu/xen.h" 25 #include "sysemu/tcg.h" 26 #include "exec/ramlist.h" 27 #include "exec/ramblock.h" 28 #include "exec/exec-all.h" 29 30 extern uint64_t total_dirty_pages; 31 32 /** 33 * clear_bmap_size: calculate clear bitmap size 34 * 35 * @pages: number of guest pages 36 * @shift: guest page number shift 37 * 38 * Returns: number of bits for the clear bitmap 39 */ 40 static inline long clear_bmap_size(uint64_t pages, uint8_t shift) 41 { 42 return DIV_ROUND_UP(pages, 1UL << shift); 43 } 44 45 /** 46 * clear_bmap_set: set clear bitmap for the page range. Must be with 47 * bitmap_mutex held. 48 * 49 * @rb: the ramblock to operate on 50 * @start: the start page number 51 * @size: number of pages to set in the bitmap 52 * 53 * Returns: None 54 */ 55 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, 56 uint64_t npages) 57 { 58 uint8_t shift = rb->clear_bmap_shift; 59 60 bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift)); 61 } 62 63 /** 64 * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set. 65 * Must be with bitmap_mutex held. 66 * 67 * @rb: the ramblock to operate on 68 * @page: the page number to check 69 * 70 * Returns: true if the bit was set, false otherwise 71 */ 72 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) 73 { 74 uint8_t shift = rb->clear_bmap_shift; 75 76 return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1); 77 } 78 79 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) 80 { 81 return (b && b->host && offset < b->used_length) ? true : false; 82 } 83 84 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) 85 { 86 assert(offset_in_ramblock(block, offset)); 87 return (char *)block->host + offset; 88 } 89 90 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, 91 RAMBlock *rb) 92 { 93 uint64_t host_addr_offset = 94 (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); 95 return host_addr_offset >> TARGET_PAGE_BITS; 96 } 97 98 bool ramblock_is_pmem(RAMBlock *rb); 99 100 long qemu_minrampagesize(void); 101 long qemu_maxrampagesize(void); 102 103 /** 104 * qemu_ram_alloc_from_file, 105 * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing 106 * file or device 107 * 108 * Parameters: 109 * @size: the size in bytes of the ram block 110 * @mr: the memory region where the ram block is 111 * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, 112 * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, 113 * RAM_READONLY_FD, RAM_GUEST_MEMFD 114 * @mem_path or @fd: specify the backing file or device 115 * @offset: Offset into target file 116 * @errp: pointer to Error*, to store an error if it happens 117 * 118 * Return: 119 * On success, return a pointer to the ram block. 120 * On failure, return NULL. 121 */ 122 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, 123 uint32_t ram_flags, const char *mem_path, 124 off_t offset, Error **errp); 125 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, 126 uint32_t ram_flags, int fd, off_t offset, 127 Error **errp); 128 129 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 130 MemoryRegion *mr, Error **errp); 131 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr, 132 Error **errp); 133 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, 134 void (*resized)(const char*, 135 uint64_t length, 136 void *host), 137 MemoryRegion *mr, Error **errp); 138 void qemu_ram_free(RAMBlock *block); 139 140 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); 141 142 void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length); 143 144 /* Clear whole block of mem */ 145 static inline void qemu_ram_block_writeback(RAMBlock *block) 146 { 147 qemu_ram_msync(block, 0, block->used_length); 148 } 149 150 #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) 151 #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) 152 153 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, 154 ram_addr_t length, 155 unsigned client) 156 { 157 DirtyMemoryBlocks *blocks; 158 unsigned long end, page; 159 unsigned long idx, offset, base; 160 bool dirty = false; 161 162 assert(client < DIRTY_MEMORY_NUM); 163 164 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 165 page = start >> TARGET_PAGE_BITS; 166 167 WITH_RCU_READ_LOCK_GUARD() { 168 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 169 170 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 171 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 172 base = page - offset; 173 while (page < end) { 174 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 175 unsigned long num = next - base; 176 unsigned long found = find_next_bit(blocks->blocks[idx], 177 num, offset); 178 if (found < num) { 179 dirty = true; 180 break; 181 } 182 183 page = next; 184 idx++; 185 offset = 0; 186 base += DIRTY_MEMORY_BLOCK_SIZE; 187 } 188 } 189 190 return dirty; 191 } 192 193 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, 194 ram_addr_t length, 195 unsigned client) 196 { 197 DirtyMemoryBlocks *blocks; 198 unsigned long end, page; 199 unsigned long idx, offset, base; 200 bool dirty = true; 201 202 assert(client < DIRTY_MEMORY_NUM); 203 204 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 205 page = start >> TARGET_PAGE_BITS; 206 207 RCU_READ_LOCK_GUARD(); 208 209 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 210 211 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 212 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 213 base = page - offset; 214 while (page < end) { 215 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 216 unsigned long num = next - base; 217 unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset); 218 if (found < num) { 219 dirty = false; 220 break; 221 } 222 223 page = next; 224 idx++; 225 offset = 0; 226 base += DIRTY_MEMORY_BLOCK_SIZE; 227 } 228 229 return dirty; 230 } 231 232 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, 233 unsigned client) 234 { 235 return cpu_physical_memory_get_dirty(addr, 1, client); 236 } 237 238 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) 239 { 240 bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); 241 bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); 242 bool migration = 243 cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); 244 return !(vga && code && migration); 245 } 246 247 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, 248 ram_addr_t length, 249 uint8_t mask) 250 { 251 uint8_t ret = 0; 252 253 if (mask & (1 << DIRTY_MEMORY_VGA) && 254 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { 255 ret |= (1 << DIRTY_MEMORY_VGA); 256 } 257 if (mask & (1 << DIRTY_MEMORY_CODE) && 258 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { 259 ret |= (1 << DIRTY_MEMORY_CODE); 260 } 261 if (mask & (1 << DIRTY_MEMORY_MIGRATION) && 262 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { 263 ret |= (1 << DIRTY_MEMORY_MIGRATION); 264 } 265 return ret; 266 } 267 268 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, 269 unsigned client) 270 { 271 unsigned long page, idx, offset; 272 DirtyMemoryBlocks *blocks; 273 274 assert(client < DIRTY_MEMORY_NUM); 275 276 page = addr >> TARGET_PAGE_BITS; 277 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 278 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 279 280 RCU_READ_LOCK_GUARD(); 281 282 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 283 284 set_bit_atomic(offset, blocks->blocks[idx]); 285 } 286 287 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, 288 ram_addr_t length, 289 uint8_t mask) 290 { 291 DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; 292 unsigned long end, page; 293 unsigned long idx, offset, base; 294 int i; 295 296 if (!mask && !xen_enabled()) { 297 return; 298 } 299 300 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 301 page = start >> TARGET_PAGE_BITS; 302 303 WITH_RCU_READ_LOCK_GUARD() { 304 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 305 blocks[i] = qatomic_rcu_read(&ram_list.dirty_memory[i]); 306 } 307 308 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 309 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 310 base = page - offset; 311 while (page < end) { 312 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 313 314 if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { 315 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], 316 offset, next - page); 317 } 318 if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { 319 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], 320 offset, next - page); 321 } 322 if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { 323 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], 324 offset, next - page); 325 } 326 327 page = next; 328 idx++; 329 offset = 0; 330 base += DIRTY_MEMORY_BLOCK_SIZE; 331 } 332 } 333 334 xen_hvm_modified_memory(start, length); 335 } 336 337 #if !defined(_WIN32) 338 339 /* 340 * Contrary to cpu_physical_memory_sync_dirty_bitmap() this function returns 341 * the number of dirty pages in @bitmap passed as argument. On the other hand, 342 * cpu_physical_memory_sync_dirty_bitmap() returns newly dirtied pages that 343 * weren't set in the global migration bitmap. 344 */ 345 static inline 346 uint64_t cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, 347 ram_addr_t start, 348 ram_addr_t pages) 349 { 350 unsigned long i, j; 351 unsigned long page_number, c, nbits; 352 hwaddr addr; 353 ram_addr_t ram_addr; 354 uint64_t num_dirty = 0; 355 unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; 356 unsigned long hpratio = qemu_real_host_page_size() / TARGET_PAGE_SIZE; 357 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 358 359 /* start address is aligned at the start of a word? */ 360 if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && 361 (hpratio == 1)) { 362 unsigned long **blocks[DIRTY_MEMORY_NUM]; 363 unsigned long idx; 364 unsigned long offset; 365 long k; 366 long nr = BITS_TO_LONGS(pages); 367 368 idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; 369 offset = BIT_WORD((start >> TARGET_PAGE_BITS) % 370 DIRTY_MEMORY_BLOCK_SIZE); 371 372 WITH_RCU_READ_LOCK_GUARD() { 373 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 374 blocks[i] = 375 qatomic_rcu_read(&ram_list.dirty_memory[i])->blocks; 376 } 377 378 for (k = 0; k < nr; k++) { 379 if (bitmap[k]) { 380 unsigned long temp = leul_to_cpu(bitmap[k]); 381 382 nbits = ctpopl(temp); 383 qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); 384 385 if (global_dirty_tracking) { 386 qatomic_or( 387 &blocks[DIRTY_MEMORY_MIGRATION][idx][offset], 388 temp); 389 if (unlikely( 390 global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { 391 total_dirty_pages += nbits; 392 } 393 } 394 395 num_dirty += nbits; 396 397 if (tcg_enabled()) { 398 qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], 399 temp); 400 } 401 } 402 403 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 404 offset = 0; 405 idx++; 406 } 407 } 408 } 409 410 xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); 411 } else { 412 uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; 413 414 if (!global_dirty_tracking) { 415 clients &= ~(1 << DIRTY_MEMORY_MIGRATION); 416 } 417 418 /* 419 * bitmap-traveling is faster than memory-traveling (for addr...) 420 * especially when most of the memory is not dirty. 421 */ 422 for (i = 0; i < len; i++) { 423 if (bitmap[i] != 0) { 424 c = leul_to_cpu(bitmap[i]); 425 nbits = ctpopl(c); 426 if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { 427 total_dirty_pages += nbits; 428 } 429 num_dirty += nbits; 430 do { 431 j = ctzl(c); 432 c &= ~(1ul << j); 433 page_number = (i * HOST_LONG_BITS + j) * hpratio; 434 addr = page_number * TARGET_PAGE_SIZE; 435 ram_addr = start + addr; 436 cpu_physical_memory_set_dirty_range(ram_addr, 437 TARGET_PAGE_SIZE * hpratio, clients); 438 } while (c != 0); 439 } 440 } 441 } 442 443 return num_dirty; 444 } 445 #endif /* not _WIN32 */ 446 447 static inline void cpu_physical_memory_dirty_bits_cleared(ram_addr_t start, 448 ram_addr_t length) 449 { 450 if (tcg_enabled()) { 451 tlb_reset_dirty_range_all(start, length); 452 } 453 454 } 455 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, 456 ram_addr_t length, 457 unsigned client); 458 459 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty 460 (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); 461 462 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, 463 ram_addr_t start, 464 ram_addr_t length); 465 466 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, 467 ram_addr_t length) 468 { 469 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); 470 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); 471 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); 472 } 473 474 475 /* Called with RCU critical section */ 476 static inline 477 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, 478 ram_addr_t start, 479 ram_addr_t length) 480 { 481 ram_addr_t addr; 482 unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); 483 uint64_t num_dirty = 0; 484 unsigned long *dest = rb->bmap; 485 486 /* start address and length is aligned at the start of a word? */ 487 if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == 488 (start + rb->offset) && 489 !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { 490 int k; 491 int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); 492 unsigned long * const *src; 493 unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; 494 unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % 495 DIRTY_MEMORY_BLOCK_SIZE); 496 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 497 498 src = qatomic_rcu_read( 499 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; 500 501 for (k = page; k < page + nr; k++) { 502 if (src[idx][offset]) { 503 unsigned long bits = qatomic_xchg(&src[idx][offset], 0); 504 unsigned long new_dirty; 505 new_dirty = ~dest[k]; 506 dest[k] |= bits; 507 new_dirty &= bits; 508 num_dirty += ctpopl(new_dirty); 509 } 510 511 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 512 offset = 0; 513 idx++; 514 } 515 } 516 if (num_dirty) { 517 cpu_physical_memory_dirty_bits_cleared(start, length); 518 } 519 520 if (rb->clear_bmap) { 521 /* 522 * Postpone the dirty bitmap clear to the point before we 523 * really send the pages, also we will split the clear 524 * dirty procedure into smaller chunks. 525 */ 526 clear_bmap_set(rb, start >> TARGET_PAGE_BITS, 527 length >> TARGET_PAGE_BITS); 528 } else { 529 /* Slow path - still do that in a huge chunk */ 530 memory_region_clear_dirty_bitmap(rb->mr, start, length); 531 } 532 } else { 533 ram_addr_t offset = rb->offset; 534 535 for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { 536 if (cpu_physical_memory_test_and_clear_dirty( 537 start + addr + offset, 538 TARGET_PAGE_SIZE, 539 DIRTY_MEMORY_MIGRATION)) { 540 long k = (start + addr) >> TARGET_PAGE_BITS; 541 if (!test_and_set_bit(k, dest)) { 542 num_dirty++; 543 } 544 } 545 } 546 } 547 548 return num_dirty; 549 } 550 #endif 551 #endif 552