xref: /openbmc/qemu/include/exec/ram_addr.h (revision 74b3445378f7d9be35b7a77757bb568080a6f929)
1 /*
2  * Declarations for cpu physical memory functions
3  *
4  * Copyright 2011 Red Hat, Inc. and/or its affiliates
5  *
6  * Authors:
7  *  Avi Kivity <avi@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or
10  * later.  See the COPYING file in the top-level directory.
11  *
12  */
13 
14 /*
15  * This header is for use by exec.c and memory.c ONLY.  Do not include it.
16  * The functions declared here will be removed soon.
17  */
18 
19 #ifndef RAM_ADDR_H
20 #define RAM_ADDR_H
21 
22 #ifndef CONFIG_USER_ONLY
23 #include "cpu.h"
24 #include "system/xen.h"
25 #include "system/tcg.h"
26 #include "exec/cputlb.h"
27 #include "exec/ramlist.h"
28 #include "exec/ramblock.h"
29 #include "exec/exec-all.h"
30 #include "qemu/rcu.h"
31 
32 #include "exec/hwaddr.h"
33 #include "exec/cpu-common.h"
34 
35 extern uint64_t total_dirty_pages;
36 
37 /**
38  * clear_bmap_size: calculate clear bitmap size
39  *
40  * @pages: number of guest pages
41  * @shift: guest page number shift
42  *
43  * Returns: number of bits for the clear bitmap
44  */
clear_bmap_size(uint64_t pages,uint8_t shift)45 static inline long clear_bmap_size(uint64_t pages, uint8_t shift)
46 {
47     return DIV_ROUND_UP(pages, 1UL << shift);
48 }
49 
50 /**
51  * clear_bmap_set: set clear bitmap for the page range.  Must be with
52  * bitmap_mutex held.
53  *
54  * @rb: the ramblock to operate on
55  * @start: the start page number
56  * @size: number of pages to set in the bitmap
57  *
58  * Returns: None
59  */
clear_bmap_set(RAMBlock * rb,uint64_t start,uint64_t npages)60 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start,
61                                   uint64_t npages)
62 {
63     uint8_t shift = rb->clear_bmap_shift;
64 
65     bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift));
66 }
67 
68 /**
69  * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set.
70  * Must be with bitmap_mutex held.
71  *
72  * @rb: the ramblock to operate on
73  * @page: the page number to check
74  *
75  * Returns: true if the bit was set, false otherwise
76  */
clear_bmap_test_and_clear(RAMBlock * rb,uint64_t page)77 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page)
78 {
79     uint8_t shift = rb->clear_bmap_shift;
80 
81     return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1);
82 }
83 
offset_in_ramblock(RAMBlock * b,ram_addr_t offset)84 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
85 {
86     return (b && b->host && offset < b->used_length) ? true : false;
87 }
88 
ramblock_ptr(RAMBlock * block,ram_addr_t offset)89 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
90 {
91     assert(offset_in_ramblock(block, offset));
92     return (char *)block->host + offset;
93 }
94 
ramblock_recv_bitmap_offset(void * host_addr,RAMBlock * rb)95 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
96                                                             RAMBlock *rb)
97 {
98     uint64_t host_addr_offset =
99             (uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
100     return host_addr_offset >> TARGET_PAGE_BITS;
101 }
102 
103 bool ramblock_is_pmem(RAMBlock *rb);
104 
105 /**
106  * qemu_ram_alloc_from_file,
107  * qemu_ram_alloc_from_fd:  Allocate a ram block from the specified backing
108  *                          file or device
109  *
110  * Parameters:
111  *  @size: the size in bytes of the ram block
112  *  @max_size: the maximum size of the block after resizing
113  *  @mr: the memory region where the ram block is
114  *  @resized: callback after calls to qemu_ram_resize
115  *  @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
116  *              RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
117  *              RAM_READONLY_FD, RAM_GUEST_MEMFD
118  *  @mem_path or @fd: specify the backing file or device
119  *  @offset: Offset into target file
120  *  @grow: extend file if necessary (but an empty file is always extended).
121  *  @errp: pointer to Error*, to store an error if it happens
122  *
123  * Return:
124  *  On success, return a pointer to the ram block.
125  *  On failure, return NULL.
126  */
127 typedef void (*qemu_ram_resize_cb)(const char *, uint64_t length, void *host);
128 
129 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
130                                    uint32_t ram_flags, const char *mem_path,
131                                    off_t offset, Error **errp);
132 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, ram_addr_t max_size,
133                                  qemu_ram_resize_cb resized, MemoryRegion *mr,
134                                  uint32_t ram_flags, int fd, off_t offset,
135                                  bool grow,
136                                  Error **errp);
137 
138 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
139                                   MemoryRegion *mr, Error **errp);
140 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr,
141                          Error **errp);
142 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
143                                     qemu_ram_resize_cb resized,
144                                     MemoryRegion *mr, Error **errp);
145 void qemu_ram_free(RAMBlock *block);
146 
147 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp);
148 
149 void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length);
150 
151 /* Clear whole block of mem */
qemu_ram_block_writeback(RAMBlock * block)152 static inline void qemu_ram_block_writeback(RAMBlock *block)
153 {
154     qemu_ram_msync(block, 0, block->used_length);
155 }
156 
157 #define DIRTY_CLIENTS_ALL     ((1 << DIRTY_MEMORY_NUM) - 1)
158 #define DIRTY_CLIENTS_NOCODE  (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
159 
cpu_physical_memory_get_dirty(ram_addr_t start,ram_addr_t length,unsigned client)160 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
161                                                  ram_addr_t length,
162                                                  unsigned client)
163 {
164     DirtyMemoryBlocks *blocks;
165     unsigned long end, page;
166     unsigned long idx, offset, base;
167     bool dirty = false;
168 
169     assert(client < DIRTY_MEMORY_NUM);
170 
171     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
172     page = start >> TARGET_PAGE_BITS;
173 
174     WITH_RCU_READ_LOCK_GUARD() {
175         blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
176 
177         idx = page / DIRTY_MEMORY_BLOCK_SIZE;
178         offset = page % DIRTY_MEMORY_BLOCK_SIZE;
179         base = page - offset;
180         while (page < end) {
181             unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
182             unsigned long num = next - base;
183             unsigned long found = find_next_bit(blocks->blocks[idx],
184                                                 num, offset);
185             if (found < num) {
186                 dirty = true;
187                 break;
188             }
189 
190             page = next;
191             idx++;
192             offset = 0;
193             base += DIRTY_MEMORY_BLOCK_SIZE;
194         }
195     }
196 
197     return dirty;
198 }
199 
cpu_physical_memory_all_dirty(ram_addr_t start,ram_addr_t length,unsigned client)200 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
201                                                  ram_addr_t length,
202                                                  unsigned client)
203 {
204     DirtyMemoryBlocks *blocks;
205     unsigned long end, page;
206     unsigned long idx, offset, base;
207     bool dirty = true;
208 
209     assert(client < DIRTY_MEMORY_NUM);
210 
211     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
212     page = start >> TARGET_PAGE_BITS;
213 
214     RCU_READ_LOCK_GUARD();
215 
216     blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
217 
218     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
219     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
220     base = page - offset;
221     while (page < end) {
222         unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
223         unsigned long num = next - base;
224         unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset);
225         if (found < num) {
226             dirty = false;
227             break;
228         }
229 
230         page = next;
231         idx++;
232         offset = 0;
233         base += DIRTY_MEMORY_BLOCK_SIZE;
234     }
235 
236     return dirty;
237 }
238 
cpu_physical_memory_get_dirty_flag(ram_addr_t addr,unsigned client)239 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr,
240                                                       unsigned client)
241 {
242     return cpu_physical_memory_get_dirty(addr, 1, client);
243 }
244 
cpu_physical_memory_is_clean(ram_addr_t addr)245 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr)
246 {
247     bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA);
248     bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE);
249     bool migration =
250         cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
251     return !(vga && code && migration);
252 }
253 
cpu_physical_memory_range_includes_clean(ram_addr_t start,ram_addr_t length,uint8_t mask)254 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start,
255                                                                ram_addr_t length,
256                                                                uint8_t mask)
257 {
258     uint8_t ret = 0;
259 
260     if (mask & (1 << DIRTY_MEMORY_VGA) &&
261         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) {
262         ret |= (1 << DIRTY_MEMORY_VGA);
263     }
264     if (mask & (1 << DIRTY_MEMORY_CODE) &&
265         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) {
266         ret |= (1 << DIRTY_MEMORY_CODE);
267     }
268     if (mask & (1 << DIRTY_MEMORY_MIGRATION) &&
269         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) {
270         ret |= (1 << DIRTY_MEMORY_MIGRATION);
271     }
272     return ret;
273 }
274 
cpu_physical_memory_set_dirty_flag(ram_addr_t addr,unsigned client)275 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr,
276                                                       unsigned client)
277 {
278     unsigned long page, idx, offset;
279     DirtyMemoryBlocks *blocks;
280 
281     assert(client < DIRTY_MEMORY_NUM);
282 
283     page = addr >> TARGET_PAGE_BITS;
284     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
285     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
286 
287     RCU_READ_LOCK_GUARD();
288 
289     blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
290 
291     set_bit_atomic(offset, blocks->blocks[idx]);
292 }
293 
cpu_physical_memory_set_dirty_range(ram_addr_t start,ram_addr_t length,uint8_t mask)294 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
295                                                        ram_addr_t length,
296                                                        uint8_t mask)
297 {
298     DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM];
299     unsigned long end, page;
300     unsigned long idx, offset, base;
301     int i;
302 
303     if (!mask && !xen_enabled()) {
304         return;
305     }
306 
307     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
308     page = start >> TARGET_PAGE_BITS;
309 
310     WITH_RCU_READ_LOCK_GUARD() {
311         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
312             blocks[i] = qatomic_rcu_read(&ram_list.dirty_memory[i]);
313         }
314 
315         idx = page / DIRTY_MEMORY_BLOCK_SIZE;
316         offset = page % DIRTY_MEMORY_BLOCK_SIZE;
317         base = page - offset;
318         while (page < end) {
319             unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
320 
321             if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
322                 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
323                                   offset, next - page);
324             }
325             if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
326                 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
327                                   offset, next - page);
328             }
329             if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
330                 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
331                                   offset, next - page);
332             }
333 
334             page = next;
335             idx++;
336             offset = 0;
337             base += DIRTY_MEMORY_BLOCK_SIZE;
338         }
339     }
340 
341     xen_hvm_modified_memory(start, length);
342 }
343 
344 #if !defined(_WIN32)
345 
346 /*
347  * Contrary to cpu_physical_memory_sync_dirty_bitmap() this function returns
348  * the number of dirty pages in @bitmap passed as argument. On the other hand,
349  * cpu_physical_memory_sync_dirty_bitmap() returns newly dirtied pages that
350  * weren't set in the global migration bitmap.
351  */
352 static inline
cpu_physical_memory_set_dirty_lebitmap(unsigned long * bitmap,ram_addr_t start,ram_addr_t pages)353 uint64_t cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
354                                                 ram_addr_t start,
355                                                 ram_addr_t pages)
356 {
357     unsigned long i, j;
358     unsigned long page_number, c, nbits;
359     hwaddr addr;
360     ram_addr_t ram_addr;
361     uint64_t num_dirty = 0;
362     unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
363     unsigned long hpratio = qemu_real_host_page_size() / TARGET_PAGE_SIZE;
364     unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
365 
366     /* start address is aligned at the start of a word? */
367     if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) &&
368         (hpratio == 1)) {
369         unsigned long **blocks[DIRTY_MEMORY_NUM];
370         unsigned long idx;
371         unsigned long offset;
372         long k;
373         long nr = BITS_TO_LONGS(pages);
374 
375         idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE;
376         offset = BIT_WORD((start >> TARGET_PAGE_BITS) %
377                           DIRTY_MEMORY_BLOCK_SIZE);
378 
379         WITH_RCU_READ_LOCK_GUARD() {
380             for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
381                 blocks[i] =
382                     qatomic_rcu_read(&ram_list.dirty_memory[i])->blocks;
383             }
384 
385             for (k = 0; k < nr; k++) {
386                 if (bitmap[k]) {
387                     unsigned long temp = leul_to_cpu(bitmap[k]);
388 
389                     nbits = ctpopl(temp);
390                     qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
391 
392                     if (global_dirty_tracking) {
393                         qatomic_or(
394                                 &blocks[DIRTY_MEMORY_MIGRATION][idx][offset],
395                                 temp);
396                         if (unlikely(
397                             global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) {
398                             total_dirty_pages += nbits;
399                         }
400                     }
401 
402                     num_dirty += nbits;
403 
404                     if (tcg_enabled()) {
405                         qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset],
406                                    temp);
407                     }
408                 }
409 
410                 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
411                     offset = 0;
412                     idx++;
413                 }
414             }
415         }
416 
417         xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
418     } else {
419         uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
420 
421         if (!global_dirty_tracking) {
422             clients &= ~(1 << DIRTY_MEMORY_MIGRATION);
423         }
424 
425         /*
426          * bitmap-traveling is faster than memory-traveling (for addr...)
427          * especially when most of the memory is not dirty.
428          */
429         for (i = 0; i < len; i++) {
430             if (bitmap[i] != 0) {
431                 c = leul_to_cpu(bitmap[i]);
432                 nbits = ctpopl(c);
433                 if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) {
434                     total_dirty_pages += nbits;
435                 }
436                 num_dirty += nbits;
437                 do {
438                     j = ctzl(c);
439                     c &= ~(1ul << j);
440                     page_number = (i * HOST_LONG_BITS + j) * hpratio;
441                     addr = page_number * TARGET_PAGE_SIZE;
442                     ram_addr = start + addr;
443                     cpu_physical_memory_set_dirty_range(ram_addr,
444                                        TARGET_PAGE_SIZE * hpratio, clients);
445                 } while (c != 0);
446             }
447         }
448     }
449 
450     return num_dirty;
451 }
452 #endif /* not _WIN32 */
453 
cpu_physical_memory_dirty_bits_cleared(ram_addr_t start,ram_addr_t length)454 static inline void cpu_physical_memory_dirty_bits_cleared(ram_addr_t start,
455                                                           ram_addr_t length)
456 {
457     if (tcg_enabled()) {
458         tlb_reset_dirty_range_all(start, length);
459     }
460 
461 }
462 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
463                                               ram_addr_t length,
464                                               unsigned client);
465 
466 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
467     (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client);
468 
469 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
470                                             ram_addr_t start,
471                                             ram_addr_t length);
472 
cpu_physical_memory_clear_dirty_range(ram_addr_t start,ram_addr_t length)473 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
474                                                          ram_addr_t length)
475 {
476     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION);
477     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA);
478     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE);
479 }
480 
481 
482 /* Called with RCU critical section */
483 static inline
cpu_physical_memory_sync_dirty_bitmap(RAMBlock * rb,ram_addr_t start,ram_addr_t length)484 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
485                                                ram_addr_t start,
486                                                ram_addr_t length)
487 {
488     ram_addr_t addr;
489     unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS);
490     uint64_t num_dirty = 0;
491     unsigned long *dest = rb->bmap;
492 
493     /* start address and length is aligned at the start of a word? */
494     if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) ==
495          (start + rb->offset) &&
496         !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) {
497         int k;
498         int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
499         unsigned long * const *src;
500         unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE;
501         unsigned long offset = BIT_WORD((word * BITS_PER_LONG) %
502                                         DIRTY_MEMORY_BLOCK_SIZE);
503         unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
504 
505         src = qatomic_rcu_read(
506                 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks;
507 
508         for (k = page; k < page + nr; k++) {
509             if (src[idx][offset]) {
510                 unsigned long bits = qatomic_xchg(&src[idx][offset], 0);
511                 unsigned long new_dirty;
512                 new_dirty = ~dest[k];
513                 dest[k] |= bits;
514                 new_dirty &= bits;
515                 num_dirty += ctpopl(new_dirty);
516             }
517 
518             if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
519                 offset = 0;
520                 idx++;
521             }
522         }
523         if (num_dirty) {
524             cpu_physical_memory_dirty_bits_cleared(start, length);
525         }
526 
527         if (rb->clear_bmap) {
528             /*
529              * Postpone the dirty bitmap clear to the point before we
530              * really send the pages, also we will split the clear
531              * dirty procedure into smaller chunks.
532              */
533             clear_bmap_set(rb, start >> TARGET_PAGE_BITS,
534                            length >> TARGET_PAGE_BITS);
535         } else {
536             /* Slow path - still do that in a huge chunk */
537             memory_region_clear_dirty_bitmap(rb->mr, start, length);
538         }
539     } else {
540         ram_addr_t offset = rb->offset;
541 
542         for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
543             if (cpu_physical_memory_test_and_clear_dirty(
544                         start + addr + offset,
545                         TARGET_PAGE_SIZE,
546                         DIRTY_MEMORY_MIGRATION)) {
547                 long k = (start + addr) >> TARGET_PAGE_BITS;
548                 if (!test_and_set_bit(k, dest)) {
549                     num_dirty++;
550                 }
551             }
552         }
553     }
554 
555     return num_dirty;
556 }
557 #endif
558 #endif
559