1 /* 2 * Declarations for cpu physical memory functions 3 * 4 * Copyright 2011 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Avi Kivity <avi@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * This header is for use by exec.c and memory.c ONLY. Do not include it. 16 * The functions declared here will be removed soon. 17 */ 18 19 #ifndef RAM_ADDR_H 20 #define RAM_ADDR_H 21 22 #ifndef CONFIG_USER_ONLY 23 #include "cpu.h" 24 #include "sysemu/xen.h" 25 #include "sysemu/tcg.h" 26 #include "exec/ramlist.h" 27 #include "exec/ramblock.h" 28 29 extern uint64_t total_dirty_pages; 30 31 /** 32 * clear_bmap_size: calculate clear bitmap size 33 * 34 * @pages: number of guest pages 35 * @shift: guest page number shift 36 * 37 * Returns: number of bits for the clear bitmap 38 */ 39 static inline long clear_bmap_size(uint64_t pages, uint8_t shift) 40 { 41 return DIV_ROUND_UP(pages, 1UL << shift); 42 } 43 44 /** 45 * clear_bmap_set: set clear bitmap for the page range. Must be with 46 * bitmap_mutex held. 47 * 48 * @rb: the ramblock to operate on 49 * @start: the start page number 50 * @size: number of pages to set in the bitmap 51 * 52 * Returns: None 53 */ 54 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, 55 uint64_t npages) 56 { 57 uint8_t shift = rb->clear_bmap_shift; 58 59 bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift)); 60 } 61 62 /** 63 * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set. 64 * Must be with bitmap_mutex held. 65 * 66 * @rb: the ramblock to operate on 67 * @page: the page number to check 68 * 69 * Returns: true if the bit was set, false otherwise 70 */ 71 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) 72 { 73 uint8_t shift = rb->clear_bmap_shift; 74 75 return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1); 76 } 77 78 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) 79 { 80 return (b && b->host && offset < b->used_length) ? true : false; 81 } 82 83 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) 84 { 85 assert(offset_in_ramblock(block, offset)); 86 return (char *)block->host + offset; 87 } 88 89 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, 90 RAMBlock *rb) 91 { 92 uint64_t host_addr_offset = 93 (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); 94 return host_addr_offset >> TARGET_PAGE_BITS; 95 } 96 97 bool ramblock_is_pmem(RAMBlock *rb); 98 99 long qemu_minrampagesize(void); 100 long qemu_maxrampagesize(void); 101 102 /** 103 * qemu_ram_alloc_from_file, 104 * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing 105 * file or device 106 * 107 * Parameters: 108 * @size: the size in bytes of the ram block 109 * @mr: the memory region where the ram block is 110 * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, 111 * RAM_NORESERVE. 112 * @mem_path or @fd: specify the backing file or device 113 * @offset: Offset into target file 114 * @readonly: true to open @path for reading, false for read/write. 115 * @errp: pointer to Error*, to store an error if it happens 116 * 117 * Return: 118 * On success, return a pointer to the ram block. 119 * On failure, return NULL. 120 */ 121 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, 122 uint32_t ram_flags, const char *mem_path, 123 off_t offset, bool readonly, Error **errp); 124 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, 125 uint32_t ram_flags, int fd, off_t offset, 126 bool readonly, Error **errp); 127 128 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 129 MemoryRegion *mr, Error **errp); 130 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr, 131 Error **errp); 132 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, 133 void (*resized)(const char*, 134 uint64_t length, 135 void *host), 136 MemoryRegion *mr, Error **errp); 137 void qemu_ram_free(RAMBlock *block); 138 139 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); 140 141 void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length); 142 143 /* Clear whole block of mem */ 144 static inline void qemu_ram_block_writeback(RAMBlock *block) 145 { 146 qemu_ram_msync(block, 0, block->used_length); 147 } 148 149 #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) 150 #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) 151 152 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, 153 ram_addr_t length, 154 unsigned client) 155 { 156 DirtyMemoryBlocks *blocks; 157 unsigned long end, page; 158 unsigned long idx, offset, base; 159 bool dirty = false; 160 161 assert(client < DIRTY_MEMORY_NUM); 162 163 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 164 page = start >> TARGET_PAGE_BITS; 165 166 WITH_RCU_READ_LOCK_GUARD() { 167 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 168 169 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 170 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 171 base = page - offset; 172 while (page < end) { 173 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 174 unsigned long num = next - base; 175 unsigned long found = find_next_bit(blocks->blocks[idx], 176 num, offset); 177 if (found < num) { 178 dirty = true; 179 break; 180 } 181 182 page = next; 183 idx++; 184 offset = 0; 185 base += DIRTY_MEMORY_BLOCK_SIZE; 186 } 187 } 188 189 return dirty; 190 } 191 192 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, 193 ram_addr_t length, 194 unsigned client) 195 { 196 DirtyMemoryBlocks *blocks; 197 unsigned long end, page; 198 unsigned long idx, offset, base; 199 bool dirty = true; 200 201 assert(client < DIRTY_MEMORY_NUM); 202 203 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 204 page = start >> TARGET_PAGE_BITS; 205 206 RCU_READ_LOCK_GUARD(); 207 208 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 209 210 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 211 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 212 base = page - offset; 213 while (page < end) { 214 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 215 unsigned long num = next - base; 216 unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset); 217 if (found < num) { 218 dirty = false; 219 break; 220 } 221 222 page = next; 223 idx++; 224 offset = 0; 225 base += DIRTY_MEMORY_BLOCK_SIZE; 226 } 227 228 return dirty; 229 } 230 231 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, 232 unsigned client) 233 { 234 return cpu_physical_memory_get_dirty(addr, 1, client); 235 } 236 237 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) 238 { 239 bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); 240 bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); 241 bool migration = 242 cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); 243 return !(vga && code && migration); 244 } 245 246 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, 247 ram_addr_t length, 248 uint8_t mask) 249 { 250 uint8_t ret = 0; 251 252 if (mask & (1 << DIRTY_MEMORY_VGA) && 253 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { 254 ret |= (1 << DIRTY_MEMORY_VGA); 255 } 256 if (mask & (1 << DIRTY_MEMORY_CODE) && 257 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { 258 ret |= (1 << DIRTY_MEMORY_CODE); 259 } 260 if (mask & (1 << DIRTY_MEMORY_MIGRATION) && 261 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { 262 ret |= (1 << DIRTY_MEMORY_MIGRATION); 263 } 264 return ret; 265 } 266 267 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, 268 unsigned client) 269 { 270 unsigned long page, idx, offset; 271 DirtyMemoryBlocks *blocks; 272 273 assert(client < DIRTY_MEMORY_NUM); 274 275 page = addr >> TARGET_PAGE_BITS; 276 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 277 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 278 279 RCU_READ_LOCK_GUARD(); 280 281 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 282 283 set_bit_atomic(offset, blocks->blocks[idx]); 284 } 285 286 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, 287 ram_addr_t length, 288 uint8_t mask) 289 { 290 DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; 291 unsigned long end, page; 292 unsigned long idx, offset, base; 293 int i; 294 295 if (!mask && !xen_enabled()) { 296 return; 297 } 298 299 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 300 page = start >> TARGET_PAGE_BITS; 301 302 WITH_RCU_READ_LOCK_GUARD() { 303 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 304 blocks[i] = qatomic_rcu_read(&ram_list.dirty_memory[i]); 305 } 306 307 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 308 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 309 base = page - offset; 310 while (page < end) { 311 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 312 313 if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { 314 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], 315 offset, next - page); 316 } 317 if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { 318 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], 319 offset, next - page); 320 } 321 if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { 322 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], 323 offset, next - page); 324 } 325 326 page = next; 327 idx++; 328 offset = 0; 329 base += DIRTY_MEMORY_BLOCK_SIZE; 330 } 331 } 332 333 xen_hvm_modified_memory(start, length); 334 } 335 336 #if !defined(_WIN32) 337 static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, 338 ram_addr_t start, 339 ram_addr_t pages) 340 { 341 unsigned long i, j; 342 unsigned long page_number, c; 343 hwaddr addr; 344 ram_addr_t ram_addr; 345 unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; 346 unsigned long hpratio = qemu_real_host_page_size() / TARGET_PAGE_SIZE; 347 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 348 349 /* start address is aligned at the start of a word? */ 350 if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && 351 (hpratio == 1)) { 352 unsigned long **blocks[DIRTY_MEMORY_NUM]; 353 unsigned long idx; 354 unsigned long offset; 355 long k; 356 long nr = BITS_TO_LONGS(pages); 357 358 idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; 359 offset = BIT_WORD((start >> TARGET_PAGE_BITS) % 360 DIRTY_MEMORY_BLOCK_SIZE); 361 362 WITH_RCU_READ_LOCK_GUARD() { 363 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 364 blocks[i] = 365 qatomic_rcu_read(&ram_list.dirty_memory[i])->blocks; 366 } 367 368 for (k = 0; k < nr; k++) { 369 if (bitmap[k]) { 370 unsigned long temp = leul_to_cpu(bitmap[k]); 371 372 qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); 373 374 if (global_dirty_tracking) { 375 qatomic_or( 376 &blocks[DIRTY_MEMORY_MIGRATION][idx][offset], 377 temp); 378 if (unlikely( 379 global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { 380 total_dirty_pages += ctpopl(temp); 381 } 382 } 383 384 if (tcg_enabled()) { 385 qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], 386 temp); 387 } 388 } 389 390 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 391 offset = 0; 392 idx++; 393 } 394 } 395 } 396 397 xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); 398 } else { 399 uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; 400 401 if (!global_dirty_tracking) { 402 clients &= ~(1 << DIRTY_MEMORY_MIGRATION); 403 } 404 405 /* 406 * bitmap-traveling is faster than memory-traveling (for addr...) 407 * especially when most of the memory is not dirty. 408 */ 409 for (i = 0; i < len; i++) { 410 if (bitmap[i] != 0) { 411 c = leul_to_cpu(bitmap[i]); 412 if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { 413 total_dirty_pages += ctpopl(c); 414 } 415 do { 416 j = ctzl(c); 417 c &= ~(1ul << j); 418 page_number = (i * HOST_LONG_BITS + j) * hpratio; 419 addr = page_number * TARGET_PAGE_SIZE; 420 ram_addr = start + addr; 421 cpu_physical_memory_set_dirty_range(ram_addr, 422 TARGET_PAGE_SIZE * hpratio, clients); 423 } while (c != 0); 424 } 425 } 426 } 427 } 428 #endif /* not _WIN32 */ 429 430 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, 431 ram_addr_t length, 432 unsigned client); 433 434 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty 435 (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); 436 437 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, 438 ram_addr_t start, 439 ram_addr_t length); 440 441 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, 442 ram_addr_t length) 443 { 444 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); 445 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); 446 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); 447 } 448 449 450 /* Called with RCU critical section */ 451 static inline 452 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, 453 ram_addr_t start, 454 ram_addr_t length) 455 { 456 ram_addr_t addr; 457 unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); 458 uint64_t num_dirty = 0; 459 unsigned long *dest = rb->bmap; 460 461 /* start address and length is aligned at the start of a word? */ 462 if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == 463 (start + rb->offset) && 464 !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { 465 int k; 466 int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); 467 unsigned long * const *src; 468 unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; 469 unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % 470 DIRTY_MEMORY_BLOCK_SIZE); 471 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 472 473 src = qatomic_rcu_read( 474 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; 475 476 for (k = page; k < page + nr; k++) { 477 if (src[idx][offset]) { 478 unsigned long bits = qatomic_xchg(&src[idx][offset], 0); 479 unsigned long new_dirty; 480 new_dirty = ~dest[k]; 481 dest[k] |= bits; 482 new_dirty &= bits; 483 num_dirty += ctpopl(new_dirty); 484 } 485 486 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 487 offset = 0; 488 idx++; 489 } 490 } 491 492 if (rb->clear_bmap) { 493 /* 494 * Postpone the dirty bitmap clear to the point before we 495 * really send the pages, also we will split the clear 496 * dirty procedure into smaller chunks. 497 */ 498 clear_bmap_set(rb, start >> TARGET_PAGE_BITS, 499 length >> TARGET_PAGE_BITS); 500 } else { 501 /* Slow path - still do that in a huge chunk */ 502 memory_region_clear_dirty_bitmap(rb->mr, start, length); 503 } 504 } else { 505 ram_addr_t offset = rb->offset; 506 507 for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { 508 if (cpu_physical_memory_test_and_clear_dirty( 509 start + addr + offset, 510 TARGET_PAGE_SIZE, 511 DIRTY_MEMORY_MIGRATION)) { 512 long k = (start + addr) >> TARGET_PAGE_BITS; 513 if (!test_and_set_bit(k, dest)) { 514 num_dirty++; 515 } 516 } 517 } 518 } 519 520 return num_dirty; 521 } 522 #endif 523 #endif 524