1 /* 2 * Copyright (C) 2011 Citrix Ltd. 3 * 4 * This work is licensed under the terms of the GNU GPL, version 2. See 5 * the COPYING file in the top-level directory. 6 * 7 * Contributions after 2012-01-13 are licensed under the terms of the 8 * GNU GPL, version 2 or (at your option) any later version. 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qemu/units.h" 13 #include "qemu/error-report.h" 14 15 #include <sys/resource.h> 16 17 #include "hw/xen/xen_native.h" 18 #include "qemu/bitmap.h" 19 20 #include "sysemu/runstate.h" 21 #include "sysemu/xen-mapcache.h" 22 #include "trace.h" 23 24 25 //#define MAPCACHE_DEBUG 26 27 #ifdef MAPCACHE_DEBUG 28 # define DPRINTF(fmt, ...) do { \ 29 fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \ 30 } while (0) 31 #else 32 # define DPRINTF(fmt, ...) do { } while (0) 33 #endif 34 35 #if HOST_LONG_BITS == 32 36 # define MCACHE_BUCKET_SHIFT 16 37 # define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */ 38 #else 39 # define MCACHE_BUCKET_SHIFT 20 40 # define MCACHE_MAX_SIZE (1UL<<35) /* 32GB Cap */ 41 #endif 42 #define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) 43 44 /* This is the size of the virtual address space reserve to QEMU that will not 45 * be use by MapCache. 46 * From empirical tests I observed that qemu use 75MB more than the 47 * max_mcache_size. 48 */ 49 #define NON_MCACHE_MEMORY_SIZE (80 * MiB) 50 51 typedef struct MapCacheEntry { 52 hwaddr paddr_index; 53 uint8_t *vaddr_base; 54 unsigned long *valid_mapping; 55 uint32_t lock; 56 #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0) 57 uint8_t flags; 58 hwaddr size; 59 struct MapCacheEntry *next; 60 } MapCacheEntry; 61 62 typedef struct MapCacheRev { 63 uint8_t *vaddr_req; 64 hwaddr paddr_index; 65 hwaddr size; 66 QTAILQ_ENTRY(MapCacheRev) next; 67 bool dma; 68 } MapCacheRev; 69 70 typedef struct MapCache { 71 MapCacheEntry *entry; 72 unsigned long nr_buckets; 73 QTAILQ_HEAD(, MapCacheRev) locked_entries; 74 75 /* For most cases (>99.9%), the page address is the same. */ 76 MapCacheEntry *last_entry; 77 unsigned long max_mcache_size; 78 unsigned int mcache_bucket_shift; 79 80 phys_offset_to_gaddr_t phys_offset_to_gaddr; 81 QemuMutex lock; 82 void *opaque; 83 } MapCache; 84 85 static MapCache *mapcache; 86 87 static inline void mapcache_lock(void) 88 { 89 qemu_mutex_lock(&mapcache->lock); 90 } 91 92 static inline void mapcache_unlock(void) 93 { 94 qemu_mutex_unlock(&mapcache->lock); 95 } 96 97 static inline int test_bits(int nr, int size, const unsigned long *addr) 98 { 99 unsigned long res = find_next_zero_bit(addr, size + nr, nr); 100 if (res >= nr + size) 101 return 1; 102 else 103 return 0; 104 } 105 106 void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque) 107 { 108 unsigned long size; 109 struct rlimit rlimit_as; 110 111 mapcache = g_new0(MapCache, 1); 112 113 mapcache->phys_offset_to_gaddr = f; 114 mapcache->opaque = opaque; 115 qemu_mutex_init(&mapcache->lock); 116 117 QTAILQ_INIT(&mapcache->locked_entries); 118 119 if (geteuid() == 0) { 120 rlimit_as.rlim_cur = RLIM_INFINITY; 121 rlimit_as.rlim_max = RLIM_INFINITY; 122 mapcache->max_mcache_size = MCACHE_MAX_SIZE; 123 } else { 124 getrlimit(RLIMIT_AS, &rlimit_as); 125 rlimit_as.rlim_cur = rlimit_as.rlim_max; 126 127 if (rlimit_as.rlim_max != RLIM_INFINITY) { 128 warn_report("QEMU's maximum size of virtual" 129 " memory is not infinity"); 130 } 131 if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) { 132 mapcache->max_mcache_size = rlimit_as.rlim_max - 133 NON_MCACHE_MEMORY_SIZE; 134 } else { 135 mapcache->max_mcache_size = MCACHE_MAX_SIZE; 136 } 137 } 138 139 setrlimit(RLIMIT_AS, &rlimit_as); 140 141 mapcache->nr_buckets = 142 (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) + 143 (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> 144 (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); 145 146 size = mapcache->nr_buckets * sizeof (MapCacheEntry); 147 size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); 148 DPRINTF("%s, nr_buckets = %lx size %lu\n", __func__, 149 mapcache->nr_buckets, size); 150 mapcache->entry = g_malloc0(size); 151 } 152 153 static void xen_remap_bucket(MapCacheEntry *entry, 154 void *vaddr, 155 hwaddr size, 156 hwaddr address_index, 157 bool dummy) 158 { 159 uint8_t *vaddr_base; 160 xen_pfn_t *pfns; 161 int *err; 162 unsigned int i; 163 hwaddr nb_pfn = size >> XC_PAGE_SHIFT; 164 165 trace_xen_remap_bucket(address_index); 166 167 pfns = g_new0(xen_pfn_t, nb_pfn); 168 err = g_new0(int, nb_pfn); 169 170 if (entry->vaddr_base != NULL) { 171 if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) { 172 ram_block_notify_remove(entry->vaddr_base, entry->size, 173 entry->size); 174 } 175 176 /* 177 * If an entry is being replaced by another mapping and we're using 178 * MAP_FIXED flag for it - there is possibility of a race for vaddr 179 * address with another thread doing an mmap call itself 180 * (see man 2 mmap). To avoid that we skip explicit unmapping here 181 * and allow the kernel to destroy the previous mappings by replacing 182 * them in mmap call later. 183 * 184 * Non-identical replacements are not allowed therefore. 185 */ 186 assert(!vaddr || (entry->vaddr_base == vaddr && entry->size == size)); 187 188 if (!vaddr && munmap(entry->vaddr_base, entry->size) != 0) { 189 perror("unmap fails"); 190 exit(-1); 191 } 192 } 193 g_free(entry->valid_mapping); 194 entry->valid_mapping = NULL; 195 196 for (i = 0; i < nb_pfn; i++) { 197 pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; 198 } 199 200 /* 201 * If the caller has requested the mapping at a specific address use 202 * MAP_FIXED to make sure it's honored. 203 */ 204 if (!dummy) { 205 vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr, 206 PROT_READ | PROT_WRITE, 207 vaddr ? MAP_FIXED : 0, 208 nb_pfn, pfns, err); 209 if (vaddr_base == NULL) { 210 perror("xenforeignmemory_map2"); 211 exit(-1); 212 } 213 } else { 214 /* 215 * We create dummy mappings where we are unable to create a foreign 216 * mapping immediately due to certain circumstances (i.e. on resume now) 217 */ 218 vaddr_base = mmap(vaddr, size, PROT_READ | PROT_WRITE, 219 MAP_ANON | MAP_SHARED | (vaddr ? MAP_FIXED : 0), 220 -1, 0); 221 if (vaddr_base == MAP_FAILED) { 222 perror("mmap"); 223 exit(-1); 224 } 225 } 226 227 if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) { 228 ram_block_notify_add(vaddr_base, size, size); 229 } 230 231 entry->vaddr_base = vaddr_base; 232 entry->paddr_index = address_index; 233 entry->size = size; 234 entry->valid_mapping = g_new0(unsigned long, 235 BITS_TO_LONGS(size >> XC_PAGE_SHIFT)); 236 237 if (dummy) { 238 entry->flags |= XEN_MAPCACHE_ENTRY_DUMMY; 239 } else { 240 entry->flags &= ~(XEN_MAPCACHE_ENTRY_DUMMY); 241 } 242 243 bitmap_zero(entry->valid_mapping, nb_pfn); 244 for (i = 0; i < nb_pfn; i++) { 245 if (!err[i]) { 246 bitmap_set(entry->valid_mapping, i, 1); 247 } 248 } 249 250 g_free(pfns); 251 g_free(err); 252 } 253 254 static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size, 255 uint8_t lock, bool dma) 256 { 257 MapCacheEntry *entry, *pentry = NULL, 258 *free_entry = NULL, *free_pentry = NULL; 259 hwaddr address_index; 260 hwaddr address_offset; 261 hwaddr cache_size = size; 262 hwaddr test_bit_size; 263 bool translated G_GNUC_UNUSED = false; 264 bool dummy = false; 265 266 tryagain: 267 address_index = phys_addr >> MCACHE_BUCKET_SHIFT; 268 address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1); 269 270 trace_xen_map_cache(phys_addr); 271 272 /* test_bit_size is always a multiple of XC_PAGE_SIZE */ 273 if (size) { 274 test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1)); 275 276 if (test_bit_size % XC_PAGE_SIZE) { 277 test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE); 278 } 279 } else { 280 test_bit_size = XC_PAGE_SIZE; 281 } 282 283 if (mapcache->last_entry != NULL && 284 mapcache->last_entry->paddr_index == address_index && 285 !lock && !size && 286 test_bits(address_offset >> XC_PAGE_SHIFT, 287 test_bit_size >> XC_PAGE_SHIFT, 288 mapcache->last_entry->valid_mapping)) { 289 trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset); 290 return mapcache->last_entry->vaddr_base + address_offset; 291 } 292 293 /* size is always a multiple of MCACHE_BUCKET_SIZE */ 294 if (size) { 295 cache_size = size + address_offset; 296 if (cache_size % MCACHE_BUCKET_SIZE) { 297 cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE); 298 } 299 } else { 300 cache_size = MCACHE_BUCKET_SIZE; 301 } 302 303 entry = &mapcache->entry[address_index % mapcache->nr_buckets]; 304 305 while (entry && (lock || entry->lock) && entry->vaddr_base && 306 (entry->paddr_index != address_index || entry->size != cache_size || 307 !test_bits(address_offset >> XC_PAGE_SHIFT, 308 test_bit_size >> XC_PAGE_SHIFT, 309 entry->valid_mapping))) { 310 if (!free_entry && !entry->lock) { 311 free_entry = entry; 312 free_pentry = pentry; 313 } 314 pentry = entry; 315 entry = entry->next; 316 } 317 if (!entry && free_entry) { 318 entry = free_entry; 319 pentry = free_pentry; 320 } 321 if (!entry) { 322 entry = g_new0(MapCacheEntry, 1); 323 pentry->next = entry; 324 xen_remap_bucket(entry, NULL, cache_size, address_index, dummy); 325 } else if (!entry->lock) { 326 if (!entry->vaddr_base || entry->paddr_index != address_index || 327 entry->size != cache_size || 328 !test_bits(address_offset >> XC_PAGE_SHIFT, 329 test_bit_size >> XC_PAGE_SHIFT, 330 entry->valid_mapping)) { 331 xen_remap_bucket(entry, NULL, cache_size, address_index, dummy); 332 } 333 } 334 335 if(!test_bits(address_offset >> XC_PAGE_SHIFT, 336 test_bit_size >> XC_PAGE_SHIFT, 337 entry->valid_mapping)) { 338 mapcache->last_entry = NULL; 339 #ifdef XEN_COMPAT_PHYSMAP 340 if (!translated && mapcache->phys_offset_to_gaddr) { 341 phys_addr = mapcache->phys_offset_to_gaddr(phys_addr, size); 342 translated = true; 343 goto tryagain; 344 } 345 #endif 346 if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) { 347 dummy = true; 348 goto tryagain; 349 } 350 trace_xen_map_cache_return(NULL); 351 return NULL; 352 } 353 354 mapcache->last_entry = entry; 355 if (lock) { 356 MapCacheRev *reventry = g_new0(MapCacheRev, 1); 357 entry->lock++; 358 if (entry->lock == 0) { 359 fprintf(stderr, 360 "mapcache entry lock overflow: "HWADDR_FMT_plx" -> %p\n", 361 entry->paddr_index, entry->vaddr_base); 362 abort(); 363 } 364 reventry->dma = dma; 365 reventry->vaddr_req = mapcache->last_entry->vaddr_base + address_offset; 366 reventry->paddr_index = mapcache->last_entry->paddr_index; 367 reventry->size = entry->size; 368 QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next); 369 } 370 371 trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset); 372 return mapcache->last_entry->vaddr_base + address_offset; 373 } 374 375 uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size, 376 uint8_t lock, bool dma) 377 { 378 uint8_t *p; 379 380 mapcache_lock(); 381 p = xen_map_cache_unlocked(phys_addr, size, lock, dma); 382 mapcache_unlock(); 383 return p; 384 } 385 386 ram_addr_t xen_ram_addr_from_mapcache(void *ptr) 387 { 388 MapCacheEntry *entry = NULL; 389 MapCacheRev *reventry; 390 hwaddr paddr_index; 391 hwaddr size; 392 ram_addr_t raddr; 393 int found = 0; 394 395 mapcache_lock(); 396 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 397 if (reventry->vaddr_req == ptr) { 398 paddr_index = reventry->paddr_index; 399 size = reventry->size; 400 found = 1; 401 break; 402 } 403 } 404 if (!found) { 405 fprintf(stderr, "%s, could not find %p\n", __func__, ptr); 406 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 407 DPRINTF(" "HWADDR_FMT_plx" -> %p is present\n", reventry->paddr_index, 408 reventry->vaddr_req); 409 } 410 abort(); 411 return 0; 412 } 413 414 entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; 415 while (entry && (entry->paddr_index != paddr_index || entry->size != size)) { 416 entry = entry->next; 417 } 418 if (!entry) { 419 DPRINTF("Trying to find address %p that is not in the mapcache!\n", ptr); 420 raddr = 0; 421 } else { 422 raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) + 423 ((unsigned long) ptr - (unsigned long) entry->vaddr_base); 424 } 425 mapcache_unlock(); 426 return raddr; 427 } 428 429 static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer) 430 { 431 MapCacheEntry *entry = NULL, *pentry = NULL; 432 MapCacheRev *reventry; 433 hwaddr paddr_index; 434 hwaddr size; 435 int found = 0; 436 437 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 438 if (reventry->vaddr_req == buffer) { 439 paddr_index = reventry->paddr_index; 440 size = reventry->size; 441 found = 1; 442 break; 443 } 444 } 445 if (!found) { 446 DPRINTF("%s, could not find %p\n", __func__, buffer); 447 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 448 DPRINTF(" "HWADDR_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); 449 } 450 return; 451 } 452 QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); 453 g_free(reventry); 454 455 if (mapcache->last_entry != NULL && 456 mapcache->last_entry->paddr_index == paddr_index) { 457 mapcache->last_entry = NULL; 458 } 459 460 entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; 461 while (entry && (entry->paddr_index != paddr_index || entry->size != size)) { 462 pentry = entry; 463 entry = entry->next; 464 } 465 if (!entry) { 466 DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer); 467 return; 468 } 469 entry->lock--; 470 if (entry->lock > 0 || pentry == NULL) { 471 return; 472 } 473 474 pentry->next = entry->next; 475 ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size); 476 if (munmap(entry->vaddr_base, entry->size) != 0) { 477 perror("unmap fails"); 478 exit(-1); 479 } 480 g_free(entry->valid_mapping); 481 g_free(entry); 482 } 483 484 void xen_invalidate_map_cache_entry(uint8_t *buffer) 485 { 486 mapcache_lock(); 487 xen_invalidate_map_cache_entry_unlocked(buffer); 488 mapcache_unlock(); 489 } 490 491 void xen_invalidate_map_cache(void) 492 { 493 unsigned long i; 494 MapCacheRev *reventry; 495 496 /* Flush pending AIO before destroying the mapcache */ 497 bdrv_drain_all(); 498 499 mapcache_lock(); 500 501 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 502 if (!reventry->dma) { 503 continue; 504 } 505 fprintf(stderr, "Locked DMA mapping while invalidating mapcache!" 506 " "HWADDR_FMT_plx" -> %p is present\n", 507 reventry->paddr_index, reventry->vaddr_req); 508 } 509 510 for (i = 0; i < mapcache->nr_buckets; i++) { 511 MapCacheEntry *entry = &mapcache->entry[i]; 512 513 if (entry->vaddr_base == NULL) { 514 continue; 515 } 516 if (entry->lock > 0) { 517 continue; 518 } 519 520 if (munmap(entry->vaddr_base, entry->size) != 0) { 521 perror("unmap fails"); 522 exit(-1); 523 } 524 525 entry->paddr_index = 0; 526 entry->vaddr_base = NULL; 527 entry->size = 0; 528 g_free(entry->valid_mapping); 529 entry->valid_mapping = NULL; 530 } 531 532 mapcache->last_entry = NULL; 533 534 mapcache_unlock(); 535 } 536 537 static uint8_t *xen_replace_cache_entry_unlocked(hwaddr old_phys_addr, 538 hwaddr new_phys_addr, 539 hwaddr size) 540 { 541 MapCacheEntry *entry; 542 hwaddr address_index, address_offset; 543 hwaddr test_bit_size, cache_size = size; 544 545 address_index = old_phys_addr >> MCACHE_BUCKET_SHIFT; 546 address_offset = old_phys_addr & (MCACHE_BUCKET_SIZE - 1); 547 548 assert(size); 549 /* test_bit_size is always a multiple of XC_PAGE_SIZE */ 550 test_bit_size = size + (old_phys_addr & (XC_PAGE_SIZE - 1)); 551 if (test_bit_size % XC_PAGE_SIZE) { 552 test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE); 553 } 554 cache_size = size + address_offset; 555 if (cache_size % MCACHE_BUCKET_SIZE) { 556 cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE); 557 } 558 559 entry = &mapcache->entry[address_index % mapcache->nr_buckets]; 560 while (entry && !(entry->paddr_index == address_index && 561 entry->size == cache_size)) { 562 entry = entry->next; 563 } 564 if (!entry) { 565 DPRINTF("Trying to update an entry for "HWADDR_FMT_plx \ 566 "that is not in the mapcache!\n", old_phys_addr); 567 return NULL; 568 } 569 570 address_index = new_phys_addr >> MCACHE_BUCKET_SHIFT; 571 address_offset = new_phys_addr & (MCACHE_BUCKET_SIZE - 1); 572 573 fprintf(stderr, "Replacing a dummy mapcache entry for "HWADDR_FMT_plx \ 574 " with "HWADDR_FMT_plx"\n", old_phys_addr, new_phys_addr); 575 576 xen_remap_bucket(entry, entry->vaddr_base, 577 cache_size, address_index, false); 578 if (!test_bits(address_offset >> XC_PAGE_SHIFT, 579 test_bit_size >> XC_PAGE_SHIFT, 580 entry->valid_mapping)) { 581 DPRINTF("Unable to update a mapcache entry for "HWADDR_FMT_plx"!\n", 582 old_phys_addr); 583 return NULL; 584 } 585 586 return entry->vaddr_base + address_offset; 587 } 588 589 uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr, 590 hwaddr new_phys_addr, 591 hwaddr size) 592 { 593 uint8_t *p; 594 595 mapcache_lock(); 596 p = xen_replace_cache_entry_unlocked(old_phys_addr, new_phys_addr, size); 597 mapcache_unlock(); 598 return p; 599 } 600