1 /* 2 * Support for RAM backed by mmaped host memory. 3 * 4 * Copyright (c) 2015 Red Hat, Inc. 5 * 6 * Authors: 7 * Michael S. Tsirkin <mst@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "qemu/mmap-alloc.h" 15 #include "qemu/host-utils.h" 16 17 #define HUGETLBFS_MAGIC 0x958458f6 18 19 #ifdef CONFIG_LINUX 20 #include <sys/vfs.h> 21 #endif 22 23 size_t qemu_fd_getpagesize(int fd) 24 { 25 #ifdef CONFIG_LINUX 26 struct statfs fs; 27 int ret; 28 29 if (fd != -1) { 30 do { 31 ret = fstatfs(fd, &fs); 32 } while (ret != 0 && errno == EINTR); 33 34 if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) { 35 return fs.f_bsize; 36 } 37 } 38 #ifdef __sparc__ 39 /* SPARC Linux needs greater alignment than the pagesize */ 40 return QEMU_VMALLOC_ALIGN; 41 #endif 42 #endif 43 44 return getpagesize(); 45 } 46 47 size_t qemu_mempath_getpagesize(const char *mem_path) 48 { 49 #ifdef CONFIG_LINUX 50 struct statfs fs; 51 int ret; 52 53 do { 54 ret = statfs(mem_path, &fs); 55 } while (ret != 0 && errno == EINTR); 56 57 if (ret != 0) { 58 fprintf(stderr, "Couldn't statfs() memory path: %s\n", 59 strerror(errno)); 60 exit(1); 61 } 62 63 if (fs.f_type == HUGETLBFS_MAGIC) { 64 /* It's hugepage, return the huge page size */ 65 return fs.f_bsize; 66 } 67 #ifdef __sparc__ 68 /* SPARC Linux needs greater alignment than the pagesize */ 69 return QEMU_VMALLOC_ALIGN; 70 #endif 71 #endif 72 73 return getpagesize(); 74 } 75 76 void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) 77 { 78 /* 79 * Note: this always allocates at least one extra page of virtual address 80 * space, even if size is already aligned. 81 */ 82 size_t total = size + align; 83 #if defined(__powerpc64__) && defined(__linux__) 84 /* On ppc64 mappings in the same segment (aka slice) must share the same 85 * page size. Since we will be re-allocating part of this segment 86 * from the supplied fd, we should make sure to use the same page size, to 87 * this end we mmap the supplied fd. In this case, set MAP_NORESERVE to 88 * avoid allocating backing store memory. 89 * We do this unless we are using the system page size, in which case 90 * anonymous memory is OK. 91 */ 92 int anonfd = fd == -1 || qemu_fd_getpagesize(fd) == getpagesize() ? -1 : fd; 93 int flags = anonfd == -1 ? MAP_ANONYMOUS : MAP_NORESERVE; 94 void *ptr = mmap(0, total, PROT_NONE, flags | MAP_PRIVATE, anonfd, 0); 95 #else 96 void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 97 #endif 98 size_t offset; 99 void *ptr1; 100 101 if (ptr == MAP_FAILED) { 102 return MAP_FAILED; 103 } 104 105 assert(is_power_of_2(align)); 106 /* Always align to host page size */ 107 assert(align >= getpagesize()); 108 109 offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; 110 ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, 111 MAP_FIXED | 112 (fd == -1 ? MAP_ANONYMOUS : 0) | 113 (shared ? MAP_SHARED : MAP_PRIVATE), 114 fd, 0); 115 if (ptr1 == MAP_FAILED) { 116 munmap(ptr, total); 117 return MAP_FAILED; 118 } 119 120 if (offset > 0) { 121 munmap(ptr, offset); 122 } 123 124 /* 125 * Leave a single PROT_NONE page allocated after the RAM block, to serve as 126 * a guard page guarding against potential buffer overflows. 127 */ 128 total -= offset; 129 if (total > size + getpagesize()) { 130 munmap(ptr1 + size + getpagesize(), total - size - getpagesize()); 131 } 132 133 return ptr1; 134 } 135 136 void qemu_ram_munmap(void *ptr, size_t size) 137 { 138 if (ptr) { 139 /* Unmap both the RAM block and the guard page */ 140 munmap(ptr, size + getpagesize()); 141 } 142 } 143