xref: /openbmc/qemu/include/exec/ram_addr.h (revision 4a12a11a)
1 /*
2  * Declarations for cpu physical memory functions
3  *
4  * Copyright 2011 Red Hat, Inc. and/or its affiliates
5  *
6  * Authors:
7  *  Avi Kivity <avi@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or
10  * later.  See the COPYING file in the top-level directory.
11  *
12  */
13 
14 /*
15  * This header is for use by exec.c and memory.c ONLY.  Do not include it.
16  * The functions declared here will be removed soon.
17  */
18 
19 #ifndef RAM_ADDR_H
20 #define RAM_ADDR_H
21 
22 #ifndef CONFIG_USER_ONLY
23 #include "hw/xen/xen.h"
24 #include "sysemu/tcg.h"
25 #include "exec/ramlist.h"
26 
27 struct RAMBlock {
28     struct rcu_head rcu;
29     struct MemoryRegion *mr;
30     uint8_t *host;
31     uint8_t *colo_cache; /* For colo, VM's ram cache */
32     ram_addr_t offset;
33     ram_addr_t used_length;
34     ram_addr_t max_length;
35     void (*resized)(const char*, uint64_t length, void *host);
36     uint32_t flags;
37     /* Protected by iothread lock.  */
38     char idstr[256];
39     /* RCU-enabled, writes protected by the ramlist lock */
40     QLIST_ENTRY(RAMBlock) next;
41     QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
42     int fd;
43     size_t page_size;
44     /* dirty bitmap used during migration */
45     unsigned long *bmap;
46     /* bitmap of pages that haven't been sent even once
47      * only maintained and used in postcopy at the moment
48      * where it's used to send the dirtymap at the start
49      * of the postcopy phase
50      */
51     unsigned long *unsentmap;
52     /* bitmap of already received pages in postcopy */
53     unsigned long *receivedmap;
54 };
55 
56 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
57 {
58     return (b && b->host && offset < b->used_length) ? true : false;
59 }
60 
61 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
62 {
63     assert(offset_in_ramblock(block, offset));
64     return (char *)block->host + offset;
65 }
66 
67 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
68                                                             RAMBlock *rb)
69 {
70     uint64_t host_addr_offset =
71             (uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
72     return host_addr_offset >> TARGET_PAGE_BITS;
73 }
74 
75 bool ramblock_is_pmem(RAMBlock *rb);
76 
77 long qemu_minrampagesize(void);
78 long qemu_maxrampagesize(void);
79 
80 /**
81  * qemu_ram_alloc_from_file,
82  * qemu_ram_alloc_from_fd:  Allocate a ram block from the specified backing
83  *                          file or device
84  *
85  * Parameters:
86  *  @size: the size in bytes of the ram block
87  *  @mr: the memory region where the ram block is
88  *  @ram_flags: specify the properties of the ram block, which can be one
89  *              or bit-or of following values
90  *              - RAM_SHARED: mmap the backing file or device with MAP_SHARED
91  *              - RAM_PMEM: the backend @mem_path or @fd is persistent memory
92  *              Other bits are ignored.
93  *  @mem_path or @fd: specify the backing file or device
94  *  @errp: pointer to Error*, to store an error if it happens
95  *
96  * Return:
97  *  On success, return a pointer to the ram block.
98  *  On failure, return NULL.
99  */
100 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
101                                    uint32_t ram_flags, const char *mem_path,
102                                    Error **errp);
103 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
104                                  uint32_t ram_flags, int fd,
105                                  Error **errp);
106 
107 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
108                                   MemoryRegion *mr, Error **errp);
109 RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr,
110                          Error **errp);
111 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
112                                     void (*resized)(const char*,
113                                                     uint64_t length,
114                                                     void *host),
115                                     MemoryRegion *mr, Error **errp);
116 void qemu_ram_free(RAMBlock *block);
117 
118 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp);
119 
120 #define DIRTY_CLIENTS_ALL     ((1 << DIRTY_MEMORY_NUM) - 1)
121 #define DIRTY_CLIENTS_NOCODE  (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
122 
123 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end);
124 
125 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
126                                                  ram_addr_t length,
127                                                  unsigned client)
128 {
129     DirtyMemoryBlocks *blocks;
130     unsigned long end, page;
131     unsigned long idx, offset, base;
132     bool dirty = false;
133 
134     assert(client < DIRTY_MEMORY_NUM);
135 
136     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
137     page = start >> TARGET_PAGE_BITS;
138 
139     rcu_read_lock();
140 
141     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
142 
143     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
144     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
145     base = page - offset;
146     while (page < end) {
147         unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
148         unsigned long num = next - base;
149         unsigned long found = find_next_bit(blocks->blocks[idx], num, offset);
150         if (found < num) {
151             dirty = true;
152             break;
153         }
154 
155         page = next;
156         idx++;
157         offset = 0;
158         base += DIRTY_MEMORY_BLOCK_SIZE;
159     }
160 
161     rcu_read_unlock();
162 
163     return dirty;
164 }
165 
166 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
167                                                  ram_addr_t length,
168                                                  unsigned client)
169 {
170     DirtyMemoryBlocks *blocks;
171     unsigned long end, page;
172     unsigned long idx, offset, base;
173     bool dirty = true;
174 
175     assert(client < DIRTY_MEMORY_NUM);
176 
177     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
178     page = start >> TARGET_PAGE_BITS;
179 
180     rcu_read_lock();
181 
182     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
183 
184     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
185     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
186     base = page - offset;
187     while (page < end) {
188         unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
189         unsigned long num = next - base;
190         unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset);
191         if (found < num) {
192             dirty = false;
193             break;
194         }
195 
196         page = next;
197         idx++;
198         offset = 0;
199         base += DIRTY_MEMORY_BLOCK_SIZE;
200     }
201 
202     rcu_read_unlock();
203 
204     return dirty;
205 }
206 
207 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr,
208                                                       unsigned client)
209 {
210     return cpu_physical_memory_get_dirty(addr, 1, client);
211 }
212 
213 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr)
214 {
215     bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA);
216     bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE);
217     bool migration =
218         cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
219     return !(vga && code && migration);
220 }
221 
222 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start,
223                                                                ram_addr_t length,
224                                                                uint8_t mask)
225 {
226     uint8_t ret = 0;
227 
228     if (mask & (1 << DIRTY_MEMORY_VGA) &&
229         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) {
230         ret |= (1 << DIRTY_MEMORY_VGA);
231     }
232     if (mask & (1 << DIRTY_MEMORY_CODE) &&
233         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) {
234         ret |= (1 << DIRTY_MEMORY_CODE);
235     }
236     if (mask & (1 << DIRTY_MEMORY_MIGRATION) &&
237         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) {
238         ret |= (1 << DIRTY_MEMORY_MIGRATION);
239     }
240     return ret;
241 }
242 
243 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr,
244                                                       unsigned client)
245 {
246     unsigned long page, idx, offset;
247     DirtyMemoryBlocks *blocks;
248 
249     assert(client < DIRTY_MEMORY_NUM);
250 
251     page = addr >> TARGET_PAGE_BITS;
252     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
253     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
254 
255     rcu_read_lock();
256 
257     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
258 
259     set_bit_atomic(offset, blocks->blocks[idx]);
260 
261     rcu_read_unlock();
262 }
263 
264 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
265                                                        ram_addr_t length,
266                                                        uint8_t mask)
267 {
268     DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM];
269     unsigned long end, page;
270     unsigned long idx, offset, base;
271     int i;
272 
273     if (!mask && !xen_enabled()) {
274         return;
275     }
276 
277     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
278     page = start >> TARGET_PAGE_BITS;
279 
280     rcu_read_lock();
281 
282     for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
283         blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]);
284     }
285 
286     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
287     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
288     base = page - offset;
289     while (page < end) {
290         unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
291 
292         if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
293             bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
294                               offset, next - page);
295         }
296         if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
297             bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
298                               offset, next - page);
299         }
300         if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
301             bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
302                               offset, next - page);
303         }
304 
305         page = next;
306         idx++;
307         offset = 0;
308         base += DIRTY_MEMORY_BLOCK_SIZE;
309     }
310 
311     rcu_read_unlock();
312 
313     xen_hvm_modified_memory(start, length);
314 }
315 
316 #if !defined(_WIN32)
317 static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
318                                                           ram_addr_t start,
319                                                           ram_addr_t pages)
320 {
321     unsigned long i, j;
322     unsigned long page_number, c;
323     hwaddr addr;
324     ram_addr_t ram_addr;
325     unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
326     unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE;
327     unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
328 
329     /* start address is aligned at the start of a word? */
330     if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) &&
331         (hpratio == 1)) {
332         unsigned long **blocks[DIRTY_MEMORY_NUM];
333         unsigned long idx;
334         unsigned long offset;
335         long k;
336         long nr = BITS_TO_LONGS(pages);
337 
338         idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE;
339         offset = BIT_WORD((start >> TARGET_PAGE_BITS) %
340                           DIRTY_MEMORY_BLOCK_SIZE);
341 
342         rcu_read_lock();
343 
344         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
345             blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks;
346         }
347 
348         for (k = 0; k < nr; k++) {
349             if (bitmap[k]) {
350                 unsigned long temp = leul_to_cpu(bitmap[k]);
351 
352                 atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
353 
354                 if (global_dirty_log) {
355                     atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset],
356                               temp);
357                 }
358 
359                 if (tcg_enabled()) {
360                     atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp);
361                 }
362             }
363 
364             if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
365                 offset = 0;
366                 idx++;
367             }
368         }
369 
370         rcu_read_unlock();
371 
372         xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
373     } else {
374         uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
375 
376         if (!global_dirty_log) {
377             clients &= ~(1 << DIRTY_MEMORY_MIGRATION);
378         }
379 
380         /*
381          * bitmap-traveling is faster than memory-traveling (for addr...)
382          * especially when most of the memory is not dirty.
383          */
384         for (i = 0; i < len; i++) {
385             if (bitmap[i] != 0) {
386                 c = leul_to_cpu(bitmap[i]);
387                 do {
388                     j = ctzl(c);
389                     c &= ~(1ul << j);
390                     page_number = (i * HOST_LONG_BITS + j) * hpratio;
391                     addr = page_number * TARGET_PAGE_SIZE;
392                     ram_addr = start + addr;
393                     cpu_physical_memory_set_dirty_range(ram_addr,
394                                        TARGET_PAGE_SIZE * hpratio, clients);
395                 } while (c != 0);
396             }
397         }
398     }
399 }
400 #endif /* not _WIN32 */
401 
402 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
403                                               ram_addr_t length,
404                                               unsigned client);
405 
406 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
407     (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client);
408 
409 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
410                                             ram_addr_t start,
411                                             ram_addr_t length);
412 
413 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
414                                                          ram_addr_t length)
415 {
416     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION);
417     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA);
418     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE);
419 }
420 
421 
422 /* Called with RCU critical section */
423 static inline
424 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
425                                                ram_addr_t start,
426                                                ram_addr_t length,
427                                                uint64_t *real_dirty_pages)
428 {
429     ram_addr_t addr;
430     unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS);
431     uint64_t num_dirty = 0;
432     unsigned long *dest = rb->bmap;
433 
434     /* start address and length is aligned at the start of a word? */
435     if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) ==
436          (start + rb->offset) &&
437         !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) {
438         int k;
439         int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
440         unsigned long * const *src;
441         unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE;
442         unsigned long offset = BIT_WORD((word * BITS_PER_LONG) %
443                                         DIRTY_MEMORY_BLOCK_SIZE);
444         unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
445 
446         src = atomic_rcu_read(
447                 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks;
448 
449         for (k = page; k < page + nr; k++) {
450             if (src[idx][offset]) {
451                 unsigned long bits = atomic_xchg(&src[idx][offset], 0);
452                 unsigned long new_dirty;
453                 *real_dirty_pages += ctpopl(bits);
454                 new_dirty = ~dest[k];
455                 dest[k] |= bits;
456                 new_dirty &= bits;
457                 num_dirty += ctpopl(new_dirty);
458             }
459 
460             if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
461                 offset = 0;
462                 idx++;
463             }
464         }
465 
466         /* TODO: split the huge bitmap into smaller chunks */
467         memory_region_clear_dirty_bitmap(rb->mr, start, length);
468     } else {
469         ram_addr_t offset = rb->offset;
470 
471         for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
472             if (cpu_physical_memory_test_and_clear_dirty(
473                         start + addr + offset,
474                         TARGET_PAGE_SIZE,
475                         DIRTY_MEMORY_MIGRATION)) {
476                 *real_dirty_pages += 1;
477                 long k = (start + addr) >> TARGET_PAGE_BITS;
478                 if (!test_and_set_bit(k, dest)) {
479                     num_dirty++;
480                 }
481             }
482         }
483     }
484 
485     return num_dirty;
486 }
487 #endif
488 #endif
489