xref: /openbmc/qemu/include/exec/ram_addr.h (revision 8e6fe6b8bab4716b4adf99a9ab52eaa82464b37e)
1 /*
2  * Declarations for cpu physical memory functions
3  *
4  * Copyright 2011 Red Hat, Inc. and/or its affiliates
5  *
6  * Authors:
7  *  Avi Kivity <avi@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or
10  * later.  See the COPYING file in the top-level directory.
11  *
12  */
13 
14 /*
15  * This header is for use by exec.c and memory.c ONLY.  Do not include it.
16  * The functions declared here will be removed soon.
17  */
18 
19 #ifndef RAM_ADDR_H
20 #define RAM_ADDR_H
21 
22 #ifndef CONFIG_USER_ONLY
23 #include "hw/xen/xen.h"
24 #include "sysemu/tcg.h"
25 #include "exec/ramlist.h"
26 
27 struct RAMBlock {
28     struct rcu_head rcu;
29     struct MemoryRegion *mr;
30     uint8_t *host;
31     uint8_t *colo_cache; /* For colo, VM's ram cache */
32     ram_addr_t offset;
33     ram_addr_t used_length;
34     ram_addr_t max_length;
35     void (*resized)(const char*, uint64_t length, void *host);
36     uint32_t flags;
37     /* Protected by iothread lock.  */
38     char idstr[256];
39     /* RCU-enabled, writes protected by the ramlist lock */
40     QLIST_ENTRY(RAMBlock) next;
41     QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
42     int fd;
43     size_t page_size;
44     /* dirty bitmap used during migration */
45     unsigned long *bmap;
46     /* bitmap of pages that haven't been sent even once
47      * only maintained and used in postcopy at the moment
48      * where it's used to send the dirtymap at the start
49      * of the postcopy phase
50      */
51     unsigned long *unsentmap;
52     /* bitmap of already received pages in postcopy */
53     unsigned long *receivedmap;
54 };
55 
56 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
57 {
58     return (b && b->host && offset < b->used_length) ? true : false;
59 }
60 
61 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
62 {
63     assert(offset_in_ramblock(block, offset));
64     return (char *)block->host + offset;
65 }
66 
67 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
68                                                             RAMBlock *rb)
69 {
70     uint64_t host_addr_offset =
71             (uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
72     return host_addr_offset >> TARGET_PAGE_BITS;
73 }
74 
75 bool ramblock_is_pmem(RAMBlock *rb);
76 
77 long qemu_minrampagesize(void);
78 long qemu_maxrampagesize(void);
79 
80 /**
81  * qemu_ram_alloc_from_file,
82  * qemu_ram_alloc_from_fd:  Allocate a ram block from the specified backing
83  *                          file or device
84  *
85  * Parameters:
86  *  @size: the size in bytes of the ram block
87  *  @mr: the memory region where the ram block is
88  *  @ram_flags: specify the properties of the ram block, which can be one
89  *              or bit-or of following values
90  *              - RAM_SHARED: mmap the backing file or device with MAP_SHARED
91  *              - RAM_PMEM: the backend @mem_path or @fd is persistent memory
92  *              Other bits are ignored.
93  *  @mem_path or @fd: specify the backing file or device
94  *  @errp: pointer to Error*, to store an error if it happens
95  *
96  * Return:
97  *  On success, return a pointer to the ram block.
98  *  On failure, return NULL.
99  */
100 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
101                                    uint32_t ram_flags, const char *mem_path,
102                                    Error **errp);
103 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
104                                  uint32_t ram_flags, int fd,
105                                  Error **errp);
106 
107 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
108                                   MemoryRegion *mr, Error **errp);
109 RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr,
110                          Error **errp);
111 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
112                                     void (*resized)(const char*,
113                                                     uint64_t length,
114                                                     void *host),
115                                     MemoryRegion *mr, Error **errp);
116 void qemu_ram_free(RAMBlock *block);
117 
118 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp);
119 
120 #define DIRTY_CLIENTS_ALL     ((1 << DIRTY_MEMORY_NUM) - 1)
121 #define DIRTY_CLIENTS_NOCODE  (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
122 
123 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end);
124 
125 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
126                                                  ram_addr_t length,
127                                                  unsigned client)
128 {
129     DirtyMemoryBlocks *blocks;
130     unsigned long end, page;
131     unsigned long idx, offset, base;
132     bool dirty = false;
133 
134     assert(client < DIRTY_MEMORY_NUM);
135 
136     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
137     page = start >> TARGET_PAGE_BITS;
138 
139     rcu_read_lock();
140 
141     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
142 
143     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
144     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
145     base = page - offset;
146     while (page < end) {
147         unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
148         unsigned long num = next - base;
149         unsigned long found = find_next_bit(blocks->blocks[idx], num, offset);
150         if (found < num) {
151             dirty = true;
152             break;
153         }
154 
155         page = next;
156         idx++;
157         offset = 0;
158         base += DIRTY_MEMORY_BLOCK_SIZE;
159     }
160 
161     rcu_read_unlock();
162 
163     return dirty;
164 }
165 
166 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
167                                                  ram_addr_t length,
168                                                  unsigned client)
169 {
170     DirtyMemoryBlocks *blocks;
171     unsigned long end, page;
172     unsigned long idx, offset, base;
173     bool dirty = true;
174 
175     assert(client < DIRTY_MEMORY_NUM);
176 
177     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
178     page = start >> TARGET_PAGE_BITS;
179 
180     rcu_read_lock();
181 
182     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
183 
184     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
185     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
186     base = page - offset;
187     while (page < end) {
188         unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
189         unsigned long num = next - base;
190         unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset);
191         if (found < num) {
192             dirty = false;
193             break;
194         }
195 
196         page = next;
197         idx++;
198         offset = 0;
199         base += DIRTY_MEMORY_BLOCK_SIZE;
200     }
201 
202     rcu_read_unlock();
203 
204     return dirty;
205 }
206 
207 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr,
208                                                       unsigned client)
209 {
210     return cpu_physical_memory_get_dirty(addr, 1, client);
211 }
212 
213 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr)
214 {
215     bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA);
216     bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE);
217     bool migration =
218         cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
219     return !(vga && code && migration);
220 }
221 
222 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start,
223                                                                ram_addr_t length,
224                                                                uint8_t mask)
225 {
226     uint8_t ret = 0;
227 
228     if (mask & (1 << DIRTY_MEMORY_VGA) &&
229         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) {
230         ret |= (1 << DIRTY_MEMORY_VGA);
231     }
232     if (mask & (1 << DIRTY_MEMORY_CODE) &&
233         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) {
234         ret |= (1 << DIRTY_MEMORY_CODE);
235     }
236     if (mask & (1 << DIRTY_MEMORY_MIGRATION) &&
237         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) {
238         ret |= (1 << DIRTY_MEMORY_MIGRATION);
239     }
240     return ret;
241 }
242 
243 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr,
244                                                       unsigned client)
245 {
246     unsigned long page, idx, offset;
247     DirtyMemoryBlocks *blocks;
248 
249     assert(client < DIRTY_MEMORY_NUM);
250 
251     page = addr >> TARGET_PAGE_BITS;
252     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
253     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
254 
255     rcu_read_lock();
256 
257     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
258 
259     set_bit_atomic(offset, blocks->blocks[idx]);
260 
261     rcu_read_unlock();
262 }
263 
264 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
265                                                        ram_addr_t length,
266                                                        uint8_t mask)
267 {
268     DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM];
269     unsigned long end, page;
270     unsigned long idx, offset, base;
271     int i;
272 
273     if (!mask && !xen_enabled()) {
274         return;
275     }
276 
277     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
278     page = start >> TARGET_PAGE_BITS;
279 
280     rcu_read_lock();
281 
282     for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
283         blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]);
284     }
285 
286     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
287     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
288     base = page - offset;
289     while (page < end) {
290         unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
291 
292         if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
293             bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
294                               offset, next - page);
295         }
296         if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
297             bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
298                               offset, next - page);
299         }
300         if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
301             bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
302                               offset, next - page);
303         }
304 
305         page = next;
306         idx++;
307         offset = 0;
308         base += DIRTY_MEMORY_BLOCK_SIZE;
309     }
310 
311     rcu_read_unlock();
312 
313     xen_hvm_modified_memory(start, length);
314 }
315 
316 #if !defined(_WIN32)
317 static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
318                                                           ram_addr_t start,
319                                                           ram_addr_t pages)
320 {
321     unsigned long i, j;
322     unsigned long page_number, c;
323     hwaddr addr;
324     ram_addr_t ram_addr;
325     unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
326     unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE;
327     unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
328 
329     /* start address is aligned at the start of a word? */
330     if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) &&
331         (hpratio == 1)) {
332         unsigned long **blocks[DIRTY_MEMORY_NUM];
333         unsigned long idx;
334         unsigned long offset;
335         long k;
336         long nr = BITS_TO_LONGS(pages);
337 
338         idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE;
339         offset = BIT_WORD((start >> TARGET_PAGE_BITS) %
340                           DIRTY_MEMORY_BLOCK_SIZE);
341 
342         rcu_read_lock();
343 
344         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
345             blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks;
346         }
347 
348         for (k = 0; k < nr; k++) {
349             if (bitmap[k]) {
350                 unsigned long temp = leul_to_cpu(bitmap[k]);
351 
352                 atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], temp);
353                 atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
354                 if (tcg_enabled()) {
355                     atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp);
356                 }
357             }
358 
359             if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
360                 offset = 0;
361                 idx++;
362             }
363         }
364 
365         rcu_read_unlock();
366 
367         xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
368     } else {
369         uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
370         /*
371          * bitmap-traveling is faster than memory-traveling (for addr...)
372          * especially when most of the memory is not dirty.
373          */
374         for (i = 0; i < len; i++) {
375             if (bitmap[i] != 0) {
376                 c = leul_to_cpu(bitmap[i]);
377                 do {
378                     j = ctzl(c);
379                     c &= ~(1ul << j);
380                     page_number = (i * HOST_LONG_BITS + j) * hpratio;
381                     addr = page_number * TARGET_PAGE_SIZE;
382                     ram_addr = start + addr;
383                     cpu_physical_memory_set_dirty_range(ram_addr,
384                                        TARGET_PAGE_SIZE * hpratio, clients);
385                 } while (c != 0);
386             }
387         }
388     }
389 }
390 #endif /* not _WIN32 */
391 
392 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
393                                               ram_addr_t length,
394                                               unsigned client);
395 
396 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
397     (ram_addr_t start, ram_addr_t length, unsigned client);
398 
399 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
400                                             ram_addr_t start,
401                                             ram_addr_t length);
402 
403 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
404                                                          ram_addr_t length)
405 {
406     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION);
407     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA);
408     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE);
409 }
410 
411 
412 static inline
413 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
414                                                ram_addr_t start,
415                                                ram_addr_t length,
416                                                uint64_t *real_dirty_pages)
417 {
418     ram_addr_t addr;
419     unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS);
420     uint64_t num_dirty = 0;
421     unsigned long *dest = rb->bmap;
422 
423     /* start address and length is aligned at the start of a word? */
424     if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) ==
425          (start + rb->offset) &&
426         !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) {
427         int k;
428         int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
429         unsigned long * const *src;
430         unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE;
431         unsigned long offset = BIT_WORD((word * BITS_PER_LONG) %
432                                         DIRTY_MEMORY_BLOCK_SIZE);
433         unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
434 
435         rcu_read_lock();
436 
437         src = atomic_rcu_read(
438                 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks;
439 
440         for (k = page; k < page + nr; k++) {
441             if (src[idx][offset]) {
442                 unsigned long bits = atomic_xchg(&src[idx][offset], 0);
443                 unsigned long new_dirty;
444                 *real_dirty_pages += ctpopl(bits);
445                 new_dirty = ~dest[k];
446                 dest[k] |= bits;
447                 new_dirty &= bits;
448                 num_dirty += ctpopl(new_dirty);
449             }
450 
451             if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
452                 offset = 0;
453                 idx++;
454             }
455         }
456 
457         rcu_read_unlock();
458     } else {
459         ram_addr_t offset = rb->offset;
460 
461         for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
462             if (cpu_physical_memory_test_and_clear_dirty(
463                         start + addr + offset,
464                         TARGET_PAGE_SIZE,
465                         DIRTY_MEMORY_MIGRATION)) {
466                 *real_dirty_pages += 1;
467                 long k = (start + addr) >> TARGET_PAGE_BITS;
468                 if (!test_and_set_bit(k, dest)) {
469                     num_dirty++;
470                 }
471             }
472         }
473     }
474 
475     return num_dirty;
476 }
477 #endif
478 #endif
479