1 /* 2 * Declarations for cpu physical memory functions 3 * 4 * Copyright 2011 Red Hat, Inc. and/or its affiliates 5 * 6 * Authors: 7 * Avi Kivity <avi@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * This header is for use by exec.c and memory.c ONLY. Do not include it. 16 * The functions declared here will be removed soon. 17 */ 18 19 #ifndef RAM_ADDR_H 20 #define RAM_ADDR_H 21 22 #ifndef CONFIG_USER_ONLY 23 #include "cpu.h" 24 #include "sysemu/xen.h" 25 #include "sysemu/tcg.h" 26 #include "exec/ramlist.h" 27 #include "exec/ramblock.h" 28 29 /** 30 * clear_bmap_size: calculate clear bitmap size 31 * 32 * @pages: number of guest pages 33 * @shift: guest page number shift 34 * 35 * Returns: number of bits for the clear bitmap 36 */ 37 static inline long clear_bmap_size(uint64_t pages, uint8_t shift) 38 { 39 return DIV_ROUND_UP(pages, 1UL << shift); 40 } 41 42 /** 43 * clear_bmap_set: set clear bitmap for the page range 44 * 45 * @rb: the ramblock to operate on 46 * @start: the start page number 47 * @size: number of pages to set in the bitmap 48 * 49 * Returns: None 50 */ 51 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, 52 uint64_t npages) 53 { 54 uint8_t shift = rb->clear_bmap_shift; 55 56 bitmap_set_atomic(rb->clear_bmap, start >> shift, 57 clear_bmap_size(npages, shift)); 58 } 59 60 /** 61 * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set 62 * 63 * @rb: the ramblock to operate on 64 * @page: the page number to check 65 * 66 * Returns: true if the bit was set, false otherwise 67 */ 68 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) 69 { 70 uint8_t shift = rb->clear_bmap_shift; 71 72 return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1); 73 } 74 75 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) 76 { 77 return (b && b->host && offset < b->used_length) ? true : false; 78 } 79 80 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) 81 { 82 assert(offset_in_ramblock(block, offset)); 83 return (char *)block->host + offset; 84 } 85 86 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, 87 RAMBlock *rb) 88 { 89 uint64_t host_addr_offset = 90 (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); 91 return host_addr_offset >> TARGET_PAGE_BITS; 92 } 93 94 bool ramblock_is_pmem(RAMBlock *rb); 95 96 long qemu_minrampagesize(void); 97 long qemu_maxrampagesize(void); 98 99 /** 100 * qemu_ram_alloc_from_file, 101 * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing 102 * file or device 103 * 104 * Parameters: 105 * @size: the size in bytes of the ram block 106 * @mr: the memory region where the ram block is 107 * @ram_flags: specify the properties of the ram block, which can be one 108 * or bit-or of following values 109 * - RAM_SHARED: mmap the backing file or device with MAP_SHARED 110 * - RAM_PMEM: the backend @mem_path or @fd is persistent memory 111 * Other bits are ignored. 112 * @mem_path or @fd: specify the backing file or device 113 * @errp: pointer to Error*, to store an error if it happens 114 * 115 * Return: 116 * On success, return a pointer to the ram block. 117 * On failure, return NULL. 118 */ 119 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, 120 uint32_t ram_flags, const char *mem_path, 121 Error **errp); 122 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, 123 uint32_t ram_flags, int fd, 124 Error **errp); 125 126 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 127 MemoryRegion *mr, Error **errp); 128 RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr, 129 Error **errp); 130 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, 131 void (*resized)(const char*, 132 uint64_t length, 133 void *host), 134 MemoryRegion *mr, Error **errp); 135 void qemu_ram_free(RAMBlock *block); 136 137 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); 138 139 void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length); 140 141 /* Clear whole block of mem */ 142 static inline void qemu_ram_block_writeback(RAMBlock *block) 143 { 144 qemu_ram_msync(block, 0, block->used_length); 145 } 146 147 #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) 148 #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) 149 150 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end); 151 152 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, 153 ram_addr_t length, 154 unsigned client) 155 { 156 DirtyMemoryBlocks *blocks; 157 unsigned long end, page; 158 unsigned long idx, offset, base; 159 bool dirty = false; 160 161 assert(client < DIRTY_MEMORY_NUM); 162 163 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 164 page = start >> TARGET_PAGE_BITS; 165 166 WITH_RCU_READ_LOCK_GUARD() { 167 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 168 169 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 170 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 171 base = page - offset; 172 while (page < end) { 173 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 174 unsigned long num = next - base; 175 unsigned long found = find_next_bit(blocks->blocks[idx], 176 num, offset); 177 if (found < num) { 178 dirty = true; 179 break; 180 } 181 182 page = next; 183 idx++; 184 offset = 0; 185 base += DIRTY_MEMORY_BLOCK_SIZE; 186 } 187 } 188 189 return dirty; 190 } 191 192 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, 193 ram_addr_t length, 194 unsigned client) 195 { 196 DirtyMemoryBlocks *blocks; 197 unsigned long end, page; 198 unsigned long idx, offset, base; 199 bool dirty = true; 200 201 assert(client < DIRTY_MEMORY_NUM); 202 203 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 204 page = start >> TARGET_PAGE_BITS; 205 206 RCU_READ_LOCK_GUARD(); 207 208 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 209 210 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 211 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 212 base = page - offset; 213 while (page < end) { 214 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 215 unsigned long num = next - base; 216 unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset); 217 if (found < num) { 218 dirty = false; 219 break; 220 } 221 222 page = next; 223 idx++; 224 offset = 0; 225 base += DIRTY_MEMORY_BLOCK_SIZE; 226 } 227 228 return dirty; 229 } 230 231 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, 232 unsigned client) 233 { 234 return cpu_physical_memory_get_dirty(addr, 1, client); 235 } 236 237 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) 238 { 239 bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); 240 bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); 241 bool migration = 242 cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); 243 return !(vga && code && migration); 244 } 245 246 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, 247 ram_addr_t length, 248 uint8_t mask) 249 { 250 uint8_t ret = 0; 251 252 if (mask & (1 << DIRTY_MEMORY_VGA) && 253 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { 254 ret |= (1 << DIRTY_MEMORY_VGA); 255 } 256 if (mask & (1 << DIRTY_MEMORY_CODE) && 257 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { 258 ret |= (1 << DIRTY_MEMORY_CODE); 259 } 260 if (mask & (1 << DIRTY_MEMORY_MIGRATION) && 261 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { 262 ret |= (1 << DIRTY_MEMORY_MIGRATION); 263 } 264 return ret; 265 } 266 267 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, 268 unsigned client) 269 { 270 unsigned long page, idx, offset; 271 DirtyMemoryBlocks *blocks; 272 273 assert(client < DIRTY_MEMORY_NUM); 274 275 page = addr >> TARGET_PAGE_BITS; 276 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 277 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 278 279 RCU_READ_LOCK_GUARD(); 280 281 blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); 282 283 set_bit_atomic(offset, blocks->blocks[idx]); 284 } 285 286 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, 287 ram_addr_t length, 288 uint8_t mask) 289 { 290 DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; 291 unsigned long end, page; 292 unsigned long idx, offset, base; 293 int i; 294 295 if (!mask && !xen_enabled()) { 296 return; 297 } 298 299 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; 300 page = start >> TARGET_PAGE_BITS; 301 302 WITH_RCU_READ_LOCK_GUARD() { 303 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 304 blocks[i] = qatomic_rcu_read(&ram_list.dirty_memory[i]); 305 } 306 307 idx = page / DIRTY_MEMORY_BLOCK_SIZE; 308 offset = page % DIRTY_MEMORY_BLOCK_SIZE; 309 base = page - offset; 310 while (page < end) { 311 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); 312 313 if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { 314 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], 315 offset, next - page); 316 } 317 if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { 318 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], 319 offset, next - page); 320 } 321 if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { 322 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], 323 offset, next - page); 324 } 325 326 page = next; 327 idx++; 328 offset = 0; 329 base += DIRTY_MEMORY_BLOCK_SIZE; 330 } 331 } 332 333 xen_hvm_modified_memory(start, length); 334 } 335 336 #if !defined(_WIN32) 337 static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, 338 ram_addr_t start, 339 ram_addr_t pages) 340 { 341 unsigned long i, j; 342 unsigned long page_number, c; 343 hwaddr addr; 344 ram_addr_t ram_addr; 345 unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; 346 unsigned long hpratio = qemu_real_host_page_size / TARGET_PAGE_SIZE; 347 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 348 349 /* start address is aligned at the start of a word? */ 350 if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && 351 (hpratio == 1)) { 352 unsigned long **blocks[DIRTY_MEMORY_NUM]; 353 unsigned long idx; 354 unsigned long offset; 355 long k; 356 long nr = BITS_TO_LONGS(pages); 357 358 idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; 359 offset = BIT_WORD((start >> TARGET_PAGE_BITS) % 360 DIRTY_MEMORY_BLOCK_SIZE); 361 362 WITH_RCU_READ_LOCK_GUARD() { 363 for (i = 0; i < DIRTY_MEMORY_NUM; i++) { 364 blocks[i] = 365 qatomic_rcu_read(&ram_list.dirty_memory[i])->blocks; 366 } 367 368 for (k = 0; k < nr; k++) { 369 if (bitmap[k]) { 370 unsigned long temp = leul_to_cpu(bitmap[k]); 371 372 qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); 373 374 if (global_dirty_log) { 375 qatomic_or( 376 &blocks[DIRTY_MEMORY_MIGRATION][idx][offset], 377 temp); 378 } 379 380 if (tcg_enabled()) { 381 qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], 382 temp); 383 } 384 } 385 386 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 387 offset = 0; 388 idx++; 389 } 390 } 391 } 392 393 xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); 394 } else { 395 uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; 396 397 if (!global_dirty_log) { 398 clients &= ~(1 << DIRTY_MEMORY_MIGRATION); 399 } 400 401 /* 402 * bitmap-traveling is faster than memory-traveling (for addr...) 403 * especially when most of the memory is not dirty. 404 */ 405 for (i = 0; i < len; i++) { 406 if (bitmap[i] != 0) { 407 c = leul_to_cpu(bitmap[i]); 408 do { 409 j = ctzl(c); 410 c &= ~(1ul << j); 411 page_number = (i * HOST_LONG_BITS + j) * hpratio; 412 addr = page_number * TARGET_PAGE_SIZE; 413 ram_addr = start + addr; 414 cpu_physical_memory_set_dirty_range(ram_addr, 415 TARGET_PAGE_SIZE * hpratio, clients); 416 } while (c != 0); 417 } 418 } 419 } 420 } 421 #endif /* not _WIN32 */ 422 423 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, 424 ram_addr_t length, 425 unsigned client); 426 427 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty 428 (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); 429 430 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, 431 ram_addr_t start, 432 ram_addr_t length); 433 434 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, 435 ram_addr_t length) 436 { 437 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); 438 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); 439 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); 440 } 441 442 443 /* Called with RCU critical section */ 444 static inline 445 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, 446 ram_addr_t start, 447 ram_addr_t length) 448 { 449 ram_addr_t addr; 450 unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); 451 uint64_t num_dirty = 0; 452 unsigned long *dest = rb->bmap; 453 454 /* start address and length is aligned at the start of a word? */ 455 if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == 456 (start + rb->offset) && 457 !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { 458 int k; 459 int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); 460 unsigned long * const *src; 461 unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; 462 unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % 463 DIRTY_MEMORY_BLOCK_SIZE); 464 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); 465 466 src = qatomic_rcu_read( 467 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; 468 469 for (k = page; k < page + nr; k++) { 470 if (src[idx][offset]) { 471 unsigned long bits = qatomic_xchg(&src[idx][offset], 0); 472 unsigned long new_dirty; 473 new_dirty = ~dest[k]; 474 dest[k] |= bits; 475 new_dirty &= bits; 476 num_dirty += ctpopl(new_dirty); 477 } 478 479 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { 480 offset = 0; 481 idx++; 482 } 483 } 484 485 if (rb->clear_bmap) { 486 /* 487 * Postpone the dirty bitmap clear to the point before we 488 * really send the pages, also we will split the clear 489 * dirty procedure into smaller chunks. 490 */ 491 clear_bmap_set(rb, start >> TARGET_PAGE_BITS, 492 length >> TARGET_PAGE_BITS); 493 } else { 494 /* Slow path - still do that in a huge chunk */ 495 memory_region_clear_dirty_bitmap(rb->mr, start, length); 496 } 497 } else { 498 ram_addr_t offset = rb->offset; 499 500 for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { 501 if (cpu_physical_memory_test_and_clear_dirty( 502 start + addr + offset, 503 TARGET_PAGE_SIZE, 504 DIRTY_MEMORY_MIGRATION)) { 505 long k = (start + addr) >> TARGET_PAGE_BITS; 506 if (!test_and_set_bit(k, dest)) { 507 num_dirty++; 508 } 509 } 510 } 511 } 512 513 return num_dirty; 514 } 515 #endif 516 #endif 517