xref: /openbmc/qemu/include/exec/ram_addr.h (revision da054c64)
1 /*
2  * Declarations for cpu physical memory functions
3  *
4  * Copyright 2011 Red Hat, Inc. and/or its affiliates
5  *
6  * Authors:
7  *  Avi Kivity <avi@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or
10  * later.  See the COPYING file in the top-level directory.
11  *
12  */
13 
14 /*
15  * This header is for use by exec.c and memory.c ONLY.  Do not include it.
16  * The functions declared here will be removed soon.
17  */
18 
19 #ifndef RAM_ADDR_H
20 #define RAM_ADDR_H
21 
22 #ifndef CONFIG_USER_ONLY
23 #include "hw/xen/xen.h"
24 #include "exec/ramlist.h"
25 
26 struct RAMBlock {
27     struct rcu_head rcu;
28     struct MemoryRegion *mr;
29     uint8_t *host;
30     uint8_t *colo_cache; /* For colo, VM's ram cache */
31     ram_addr_t offset;
32     ram_addr_t used_length;
33     ram_addr_t max_length;
34     void (*resized)(const char*, uint64_t length, void *host);
35     uint32_t flags;
36     /* Protected by iothread lock.  */
37     char idstr[256];
38     /* RCU-enabled, writes protected by the ramlist lock */
39     QLIST_ENTRY(RAMBlock) next;
40     QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
41     int fd;
42     size_t page_size;
43     /* dirty bitmap used during migration */
44     unsigned long *bmap;
45     /* bitmap of pages that haven't been sent even once
46      * only maintained and used in postcopy at the moment
47      * where it's used to send the dirtymap at the start
48      * of the postcopy phase
49      */
50     unsigned long *unsentmap;
51     /* bitmap of already received pages in postcopy */
52     unsigned long *receivedmap;
53 };
54 
55 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
56 {
57     return (b && b->host && offset < b->used_length) ? true : false;
58 }
59 
60 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
61 {
62     assert(offset_in_ramblock(block, offset));
63     return (char *)block->host + offset;
64 }
65 
66 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
67                                                             RAMBlock *rb)
68 {
69     uint64_t host_addr_offset =
70             (uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
71     return host_addr_offset >> TARGET_PAGE_BITS;
72 }
73 
74 bool ramblock_is_pmem(RAMBlock *rb);
75 
76 long qemu_getrampagesize(void);
77 
78 /**
79  * qemu_ram_alloc_from_file,
80  * qemu_ram_alloc_from_fd:  Allocate a ram block from the specified backing
81  *                          file or device
82  *
83  * Parameters:
84  *  @size: the size in bytes of the ram block
85  *  @mr: the memory region where the ram block is
86  *  @ram_flags: specify the properties of the ram block, which can be one
87  *              or bit-or of following values
88  *              - RAM_SHARED: mmap the backing file or device with MAP_SHARED
89  *              - RAM_PMEM: the backend @mem_path or @fd is persistent memory
90  *              Other bits are ignored.
91  *  @mem_path or @fd: specify the backing file or device
92  *  @errp: pointer to Error*, to store an error if it happens
93  *
94  * Return:
95  *  On success, return a pointer to the ram block.
96  *  On failure, return NULL.
97  */
98 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
99                                    uint32_t ram_flags, const char *mem_path,
100                                    Error **errp);
101 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
102                                  uint32_t ram_flags, int fd,
103                                  Error **errp);
104 
105 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
106                                   MemoryRegion *mr, Error **errp);
107 RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr,
108                          Error **errp);
109 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
110                                     void (*resized)(const char*,
111                                                     uint64_t length,
112                                                     void *host),
113                                     MemoryRegion *mr, Error **errp);
114 void qemu_ram_free(RAMBlock *block);
115 
116 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp);
117 
118 #define DIRTY_CLIENTS_ALL     ((1 << DIRTY_MEMORY_NUM) - 1)
119 #define DIRTY_CLIENTS_NOCODE  (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
120 
121 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end);
122 
123 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
124                                                  ram_addr_t length,
125                                                  unsigned client)
126 {
127     DirtyMemoryBlocks *blocks;
128     unsigned long end, page;
129     unsigned long idx, offset, base;
130     bool dirty = false;
131 
132     assert(client < DIRTY_MEMORY_NUM);
133 
134     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
135     page = start >> TARGET_PAGE_BITS;
136 
137     rcu_read_lock();
138 
139     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
140 
141     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
142     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
143     base = page - offset;
144     while (page < end) {
145         unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
146         unsigned long num = next - base;
147         unsigned long found = find_next_bit(blocks->blocks[idx], num, offset);
148         if (found < num) {
149             dirty = true;
150             break;
151         }
152 
153         page = next;
154         idx++;
155         offset = 0;
156         base += DIRTY_MEMORY_BLOCK_SIZE;
157     }
158 
159     rcu_read_unlock();
160 
161     return dirty;
162 }
163 
164 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
165                                                  ram_addr_t length,
166                                                  unsigned client)
167 {
168     DirtyMemoryBlocks *blocks;
169     unsigned long end, page;
170     unsigned long idx, offset, base;
171     bool dirty = true;
172 
173     assert(client < DIRTY_MEMORY_NUM);
174 
175     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
176     page = start >> TARGET_PAGE_BITS;
177 
178     rcu_read_lock();
179 
180     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
181 
182     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
183     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
184     base = page - offset;
185     while (page < end) {
186         unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
187         unsigned long num = next - base;
188         unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset);
189         if (found < num) {
190             dirty = false;
191             break;
192         }
193 
194         page = next;
195         idx++;
196         offset = 0;
197         base += DIRTY_MEMORY_BLOCK_SIZE;
198     }
199 
200     rcu_read_unlock();
201 
202     return dirty;
203 }
204 
205 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr,
206                                                       unsigned client)
207 {
208     return cpu_physical_memory_get_dirty(addr, 1, client);
209 }
210 
211 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr)
212 {
213     bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA);
214     bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE);
215     bool migration =
216         cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
217     return !(vga && code && migration);
218 }
219 
220 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start,
221                                                                ram_addr_t length,
222                                                                uint8_t mask)
223 {
224     uint8_t ret = 0;
225 
226     if (mask & (1 << DIRTY_MEMORY_VGA) &&
227         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) {
228         ret |= (1 << DIRTY_MEMORY_VGA);
229     }
230     if (mask & (1 << DIRTY_MEMORY_CODE) &&
231         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) {
232         ret |= (1 << DIRTY_MEMORY_CODE);
233     }
234     if (mask & (1 << DIRTY_MEMORY_MIGRATION) &&
235         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) {
236         ret |= (1 << DIRTY_MEMORY_MIGRATION);
237     }
238     return ret;
239 }
240 
241 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr,
242                                                       unsigned client)
243 {
244     unsigned long page, idx, offset;
245     DirtyMemoryBlocks *blocks;
246 
247     assert(client < DIRTY_MEMORY_NUM);
248 
249     page = addr >> TARGET_PAGE_BITS;
250     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
251     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
252 
253     rcu_read_lock();
254 
255     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
256 
257     set_bit_atomic(offset, blocks->blocks[idx]);
258 
259     rcu_read_unlock();
260 }
261 
262 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
263                                                        ram_addr_t length,
264                                                        uint8_t mask)
265 {
266     DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM];
267     unsigned long end, page;
268     unsigned long idx, offset, base;
269     int i;
270 
271     if (!mask && !xen_enabled()) {
272         return;
273     }
274 
275     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
276     page = start >> TARGET_PAGE_BITS;
277 
278     rcu_read_lock();
279 
280     for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
281         blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]);
282     }
283 
284     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
285     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
286     base = page - offset;
287     while (page < end) {
288         unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
289 
290         if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
291             bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
292                               offset, next - page);
293         }
294         if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
295             bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
296                               offset, next - page);
297         }
298         if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
299             bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
300                               offset, next - page);
301         }
302 
303         page = next;
304         idx++;
305         offset = 0;
306         base += DIRTY_MEMORY_BLOCK_SIZE;
307     }
308 
309     rcu_read_unlock();
310 
311     xen_hvm_modified_memory(start, length);
312 }
313 
314 #if !defined(_WIN32)
315 static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
316                                                           ram_addr_t start,
317                                                           ram_addr_t pages)
318 {
319     unsigned long i, j;
320     unsigned long page_number, c;
321     hwaddr addr;
322     ram_addr_t ram_addr;
323     unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
324     unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE;
325     unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
326 
327     /* start address is aligned at the start of a word? */
328     if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) &&
329         (hpratio == 1)) {
330         unsigned long **blocks[DIRTY_MEMORY_NUM];
331         unsigned long idx;
332         unsigned long offset;
333         long k;
334         long nr = BITS_TO_LONGS(pages);
335 
336         idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE;
337         offset = BIT_WORD((start >> TARGET_PAGE_BITS) %
338                           DIRTY_MEMORY_BLOCK_SIZE);
339 
340         rcu_read_lock();
341 
342         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
343             blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks;
344         }
345 
346         for (k = 0; k < nr; k++) {
347             if (bitmap[k]) {
348                 unsigned long temp = leul_to_cpu(bitmap[k]);
349 
350                 atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], temp);
351                 atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
352                 if (tcg_enabled()) {
353                     atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp);
354                 }
355             }
356 
357             if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
358                 offset = 0;
359                 idx++;
360             }
361         }
362 
363         rcu_read_unlock();
364 
365         xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
366     } else {
367         uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
368         /*
369          * bitmap-traveling is faster than memory-traveling (for addr...)
370          * especially when most of the memory is not dirty.
371          */
372         for (i = 0; i < len; i++) {
373             if (bitmap[i] != 0) {
374                 c = leul_to_cpu(bitmap[i]);
375                 do {
376                     j = ctzl(c);
377                     c &= ~(1ul << j);
378                     page_number = (i * HOST_LONG_BITS + j) * hpratio;
379                     addr = page_number * TARGET_PAGE_SIZE;
380                     ram_addr = start + addr;
381                     cpu_physical_memory_set_dirty_range(ram_addr,
382                                        TARGET_PAGE_SIZE * hpratio, clients);
383                 } while (c != 0);
384             }
385         }
386     }
387 }
388 #endif /* not _WIN32 */
389 
390 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
391                                               ram_addr_t length,
392                                               unsigned client);
393 
394 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
395     (ram_addr_t start, ram_addr_t length, unsigned client);
396 
397 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
398                                             ram_addr_t start,
399                                             ram_addr_t length);
400 
401 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
402                                                          ram_addr_t length)
403 {
404     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION);
405     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA);
406     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE);
407 }
408 
409 
410 static inline
411 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
412                                                ram_addr_t start,
413                                                ram_addr_t length,
414                                                uint64_t *real_dirty_pages)
415 {
416     ram_addr_t addr;
417     unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS);
418     uint64_t num_dirty = 0;
419     unsigned long *dest = rb->bmap;
420 
421     /* start address and length is aligned at the start of a word? */
422     if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) ==
423          (start + rb->offset) &&
424         !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) {
425         int k;
426         int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
427         unsigned long * const *src;
428         unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE;
429         unsigned long offset = BIT_WORD((word * BITS_PER_LONG) %
430                                         DIRTY_MEMORY_BLOCK_SIZE);
431         unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
432 
433         rcu_read_lock();
434 
435         src = atomic_rcu_read(
436                 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks;
437 
438         for (k = page; k < page + nr; k++) {
439             if (src[idx][offset]) {
440                 unsigned long bits = atomic_xchg(&src[idx][offset], 0);
441                 unsigned long new_dirty;
442                 *real_dirty_pages += ctpopl(bits);
443                 new_dirty = ~dest[k];
444                 dest[k] |= bits;
445                 new_dirty &= bits;
446                 num_dirty += ctpopl(new_dirty);
447             }
448 
449             if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
450                 offset = 0;
451                 idx++;
452             }
453         }
454 
455         rcu_read_unlock();
456     } else {
457         ram_addr_t offset = rb->offset;
458 
459         for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
460             if (cpu_physical_memory_test_and_clear_dirty(
461                         start + addr + offset,
462                         TARGET_PAGE_SIZE,
463                         DIRTY_MEMORY_MIGRATION)) {
464                 *real_dirty_pages += 1;
465                 long k = (start + addr) >> TARGET_PAGE_BITS;
466                 if (!test_and_set_bit(k, dest)) {
467                     num_dirty++;
468                 }
469             }
470         }
471     }
472 
473     return num_dirty;
474 }
475 #endif
476 #endif
477