1 /* 2 * Declarations for cpu physical memory functions 3 * 4 * Copyright 2011 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Avi Kivity <avi@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * This header is for use by exec.c and memory.c ONLY. Do not include it. 16 * The functions declared here will be removed soon. 17 */ 18 19 #ifndef RAM_ADDR_H 20 #define RAM_ADDR_H 21 22 #ifndef CONFIG_USER_ONLY 23 #include "cpu.h" 24 #include "sysemu/xen.h" 25 #include "sysemu/tcg.h" 26 #include "exec/ramlist.h" 27 #include "exec/ramblock.h" 28 29 extern uint64_t total_dirty_pages; 30 31 /** 32 * clear_bmap_size: calculate clear bitmap size 33 * 34 * @pages: number of guest pages 35 * @shift: guest page number shift 36 * 37 * Returns: number of bits for the clear bitmap 38 */ 39 static inline long clear_bmap_size(uint64_t pages, uint8_t shift) 40 { 41 return DIV_ROUND_UP(pages, 1UL << shift); 42 } 43 44 /** 45 * clear_bmap_set: set clear bitmap for the page range. Must be with 46 * bitmap_mutex held. 47 * 48 * @rb: the ramblock to operate on 49 * @start: the start page number 50 * @size: number of pages to set in the bitmap 51 * 52 * Returns: None 53 */ 54 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, 55 uint64_t npages) 56 { 57 uint8_t shift = rb->clear_bmap_shift; 58 59 bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift)); 60 } 61 62 /** 63 * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set. 64 * Must be with bitmap_mutex held. 65 * 66 * @rb: the ramblock to operate on 67 * @page: the page number to check 68 * 69 * Returns: true if the bit was set, false otherwise 70 */ 71 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) 72 { 73 uint8_t shift = rb->clear_bmap_shift; 74 75 return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1); 76 } 77 78 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) 79 { 80 return (b && b->host && offset < b->used_length) ? true : false; 81 } 82 83 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) 84 { 85 assert(offset_in_ramblock(block, offset)); 86 return (char *)block->host + offset; 87 } 88 89 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, 90 RAMBlock *rb) 91 { 92 uint64_t host_addr_offset = 93 (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); 94 return host_addr_offset >> TARGET_PAGE_BITS; 95 } 96 97 bool ramblock_is_pmem(RAMBlock *rb); 98 99 long qemu_minrampagesize(void); 100 long qemu_maxrampagesize(void); 101 102 /** 103 * qemu_ram_alloc_from_file, 104 * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing 105 * file or device 106 * 107 * Parameters: 108 * @size: the size in bytes of the ram block 109 * @mr: the memory region where the ram block is 110 * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, 111 * RAM_NORESERVE. 112 * @mem_path or @fd: specify the backing file or device 113 * @offset: Offset into target file 114 * @readonly: true to open @path for reading, false for read/write. 115 * @errp: pointer to Error*, to store an error if it happens 116 * 117 * Return: 118 * On success, return a pointer to the ram block. 119 * On failure, return NULL. 120 */ 121 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, 122 uint32_t ram_flags, const char *mem_path, 123 off_t offset, bool readonly, Error **errp); 124 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, 125 uint32_t ram_flags, int fd, off_t offset, 126 bool readonly, Error **errp); 127 128 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 129 MemoryRegion *mr, Error **errp); 130 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr, 131 Error **errp); 132 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, 133 void (*resized)(const char*, 134 uint64_t length, 135 void *host), 136 MemoryRegion *mr, Error **errp); 137 void qemu_ram_free(RAMBlock *block); 138 139 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); 140 141 void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length); 142 143 /* Clear whole block of mem */ 144 static inline void qemu_ram_block_writeback(RAMBlock *block) 145 { 146 qemu_ram_msync(block, 0, block->used_length); 147 } 148 149 #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) 150 #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) 151 152 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, 153 ram_addr_t length, 154 unsigned client) 155 { 156 DirtyMemoryBlocks *blocks; 157 unsigned long end, page; 158 unsigned long idx, offset, base; 159 bool dirty = false; 160 161 assert(client < DIRTY_MEMORY_NUM); 162 163 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 164 page = start >> TARGET_PAGE_BITS; 165 166 WITH_RCU_READ_LOCK_GUARD() { 167 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 168 169 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 170 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 171 base = page - offset; 172 while (page < end) { 173 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 174 unsigned long num = next - base; 175 unsigned long found = find_next_bit(blocks->blocks[idx], 176 num, offset); 177 if (found < num) { 178 dirty = true; 179 break; 180 } 181 182 page = next; 183 idx++; 184 offset = 0; 185 base += DIRTY_MEMORY_BLOCK_SIZE; 186 } 187 } 188 189 return dirty; 190 } 191 192 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, 193 ram_addr_t length, 194 unsigned client) 195 { 196 DirtyMemoryBlocks *blocks; 197 unsigned long end, page; 198 unsigned long idx, offset, base; 199 bool dirty = true; 200 201 assert(client < DIRTY_MEMORY_NUM); 202 203 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 204 page = start >> TARGET_PAGE_BITS; 205 206 RCU_READ_LOCK_GUARD(); 207 208 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 209 210 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 211 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 212 base = page - offset; 213 while (page < end) { 214 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 215 unsigned long num = next - base; 216 unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset); 217 if (found < num) { 218 dirty = false; 219 break; 220 } 221 222 page = next; 223 idx++; 224 offset = 0; 225 base += DIRTY_MEMORY_BLOCK_SIZE; 226 } 227 228 return dirty; 229 } 230 231 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, 232 unsigned client) 233 { 234 return cpu_physical_memory_get_dirty(addr, 1, client); 235 } 236 237 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) 238 { 239 bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); 240 bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); 241 bool migration = 242 cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); 243 return !(vga && code && migration); 244 } 245 246 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, 247 ram_addr_t length, 248 uint8_t mask) 249 { 250 uint8_t ret = 0; 251 252 if (mask & (1 << DIRTY_MEMORY_VGA) && 253 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { 254 ret |= (1 << DIRTY_MEMORY_VGA); 255 } 256 if (mask & (1 << DIRTY_MEMORY_CODE) && 257 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { 258 ret |= (1 << DIRTY_MEMORY_CODE); 259 } 260 if (mask & (1 << DIRTY_MEMORY_MIGRATION) && 261 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { 262 ret |= (1 << DIRTY_MEMORY_MIGRATION); 263 } 264 return ret; 265 } 266 267 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, 268 unsigned client) 269 { 270 unsigned long page, idx, offset; 271 DirtyMemoryBlocks *blocks; 272 273 assert(client < DIRTY_MEMORY_NUM); 274 275 page = addr >> TARGET_PAGE_BITS; 276 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 277 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 278 279 RCU_READ_LOCK_GUARD(); 280 281 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 282 283 set_bit_atomic(offset, blocks->blocks[idx]); 284 } 285 286 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, 287 ram_addr_t length, 288 uint8_t mask) 289 { 290 DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; 291 unsigned long end, page; 292 unsigned long idx, offset, base; 293 int i; 294 295 if (!mask && !xen_enabled()) { 296 return; 297 } 298 299 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 300 page = start >> TARGET_PAGE_BITS; 301 302 WITH_RCU_READ_LOCK_GUARD() { 303 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 304 blocks[i] = qatomic_rcu_read(&ram_list.dirty_memory[i]); 305 } 306 307 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 308 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 309 base = page - offset; 310 while (page < end) { 311 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 312 313 if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { 314 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], 315 offset, next - page); 316 } 317 if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { 318 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], 319 offset, next - page); 320 } 321 if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { 322 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], 323 offset, next - page); 324 } 325 326 page = next; 327 idx++; 328 offset = 0; 329 base += DIRTY_MEMORY_BLOCK_SIZE; 330 } 331 } 332 333 xen_hvm_modified_memory(start, length); 334 } 335 336 #if !defined(_WIN32) 337 338 /* 339 * Contrary to cpu_physical_memory_sync_dirty_bitmap() this function returns 340 * the number of dirty pages in @bitmap passed as argument. On the other hand, 341 * cpu_physical_memory_sync_dirty_bitmap() returns newly dirtied pages that 342 * weren't set in the global migration bitmap. 343 */ 344 static inline 345 uint64_t cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, 346 ram_addr_t start, 347 ram_addr_t pages) 348 { 349 unsigned long i, j; 350 unsigned long page_number, c, nbits; 351 hwaddr addr; 352 ram_addr_t ram_addr; 353 uint64_t num_dirty = 0; 354 unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; 355 unsigned long hpratio = qemu_real_host_page_size() / TARGET_PAGE_SIZE; 356 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 357 358 /* start address is aligned at the start of a word? */ 359 if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && 360 (hpratio == 1)) { 361 unsigned long **blocks[DIRTY_MEMORY_NUM]; 362 unsigned long idx; 363 unsigned long offset; 364 long k; 365 long nr = BITS_TO_LONGS(pages); 366 367 idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; 368 offset = BIT_WORD((start >> TARGET_PAGE_BITS) % 369 DIRTY_MEMORY_BLOCK_SIZE); 370 371 WITH_RCU_READ_LOCK_GUARD() { 372 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 373 blocks[i] = 374 qatomic_rcu_read(&ram_list.dirty_memory[i])->blocks; 375 } 376 377 for (k = 0; k < nr; k++) { 378 if (bitmap[k]) { 379 unsigned long temp = leul_to_cpu(bitmap[k]); 380 381 nbits = ctpopl(temp); 382 qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); 383 384 if (global_dirty_tracking) { 385 qatomic_or( 386 &blocks[DIRTY_MEMORY_MIGRATION][idx][offset], 387 temp); 388 if (unlikely( 389 global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { 390 total_dirty_pages += nbits; 391 } 392 } 393 394 num_dirty += nbits; 395 396 if (tcg_enabled()) { 397 qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], 398 temp); 399 } 400 } 401 402 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 403 offset = 0; 404 idx++; 405 } 406 } 407 } 408 409 xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); 410 } else { 411 uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; 412 413 if (!global_dirty_tracking) { 414 clients &= ~(1 << DIRTY_MEMORY_MIGRATION); 415 } 416 417 /* 418 * bitmap-traveling is faster than memory-traveling (for addr...) 419 * especially when most of the memory is not dirty. 420 */ 421 for (i = 0; i < len; i++) { 422 if (bitmap[i] != 0) { 423 c = leul_to_cpu(bitmap[i]); 424 nbits = ctpopl(c); 425 if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { 426 total_dirty_pages += nbits; 427 } 428 num_dirty += nbits; 429 do { 430 j = ctzl(c); 431 c &= ~(1ul << j); 432 page_number = (i * HOST_LONG_BITS + j) * hpratio; 433 addr = page_number * TARGET_PAGE_SIZE; 434 ram_addr = start + addr; 435 cpu_physical_memory_set_dirty_range(ram_addr, 436 TARGET_PAGE_SIZE * hpratio, clients); 437 } while (c != 0); 438 } 439 } 440 } 441 442 return num_dirty; 443 } 444 #endif /* not _WIN32 */ 445 446 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, 447 ram_addr_t length, 448 unsigned client); 449 450 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty 451 (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); 452 453 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, 454 ram_addr_t start, 455 ram_addr_t length); 456 457 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, 458 ram_addr_t length) 459 { 460 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); 461 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); 462 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); 463 } 464 465 466 /* Called with RCU critical section */ 467 static inline 468 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, 469 ram_addr_t start, 470 ram_addr_t length) 471 { 472 ram_addr_t addr; 473 unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); 474 uint64_t num_dirty = 0; 475 unsigned long *dest = rb->bmap; 476 477 /* start address and length is aligned at the start of a word? */ 478 if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == 479 (start + rb->offset) && 480 !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { 481 int k; 482 int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); 483 unsigned long * const *src; 484 unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; 485 unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % 486 DIRTY_MEMORY_BLOCK_SIZE); 487 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 488 489 src = qatomic_rcu_read( 490 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; 491 492 for (k = page; k < page + nr; k++) { 493 if (src[idx][offset]) { 494 unsigned long bits = qatomic_xchg(&src[idx][offset], 0); 495 unsigned long new_dirty; 496 new_dirty = ~dest[k]; 497 dest[k] |= bits; 498 new_dirty &= bits; 499 num_dirty += ctpopl(new_dirty); 500 } 501 502 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 503 offset = 0; 504 idx++; 505 } 506 } 507 508 if (rb->clear_bmap) { 509 /* 510 * Postpone the dirty bitmap clear to the point before we 511 * really send the pages, also we will split the clear 512 * dirty procedure into smaller chunks. 513 */ 514 clear_bmap_set(rb, start >> TARGET_PAGE_BITS, 515 length >> TARGET_PAGE_BITS); 516 } else { 517 /* Slow path - still do that in a huge chunk */ 518 memory_region_clear_dirty_bitmap(rb->mr, start, length); 519 } 520 } else { 521 ram_addr_t offset = rb->offset; 522 523 for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { 524 if (cpu_physical_memory_test_and_clear_dirty( 525 start + addr + offset, 526 TARGET_PAGE_SIZE, 527 DIRTY_MEMORY_MIGRATION)) { 528 long k = (start + addr) >> TARGET_PAGE_BITS; 529 if (!test_and_set_bit(k, dest)) { 530 num_dirty++; 531 } 532 } 533 } 534 } 535 536 return num_dirty; 537 } 538 #endif 539 #endif 540