1 /* 2 * Copyright (C) 2011 Citrix Ltd. 3 * 4 * This work is licensed under the terms of the GNU GPL, version 2. See 5 * the COPYING file in the top-level directory. 6 * 7 * Contributions after 2012-01-13 are licensed under the terms of the 8 * GNU GPL, version 2 or (at your option) any later version. 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qemu/units.h" 13 #include "qemu/error-report.h" 14 15 #include <sys/resource.h> 16 17 #include "hw/xen/xen_native.h" 18 #include "qemu/bitmap.h" 19 20 #include "sysemu/runstate.h" 21 #include "sysemu/xen-mapcache.h" 22 #include "trace.h" 23 24 25 #if HOST_LONG_BITS == 32 26 # define MCACHE_BUCKET_SHIFT 16 27 # define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */ 28 #else 29 # define MCACHE_BUCKET_SHIFT 20 30 # define MCACHE_MAX_SIZE (1UL<<35) /* 32GB Cap */ 31 #endif 32 #define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) 33 34 /* This is the size of the virtual address space reserve to QEMU that will not 35 * be use by MapCache. 36 * From empirical tests I observed that qemu use 75MB more than the 37 * max_mcache_size. 38 */ 39 #define NON_MCACHE_MEMORY_SIZE (80 * MiB) 40 41 typedef struct MapCacheEntry { 42 hwaddr paddr_index; 43 uint8_t *vaddr_base; 44 unsigned long *valid_mapping; 45 uint32_t lock; 46 #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0) 47 uint8_t flags; 48 hwaddr size; 49 struct MapCacheEntry *next; 50 } MapCacheEntry; 51 52 typedef struct MapCacheRev { 53 uint8_t *vaddr_req; 54 hwaddr paddr_index; 55 hwaddr size; 56 QTAILQ_ENTRY(MapCacheRev) next; 57 bool dma; 58 } MapCacheRev; 59 60 typedef struct MapCache { 61 MapCacheEntry *entry; 62 unsigned long nr_buckets; 63 QTAILQ_HEAD(, MapCacheRev) locked_entries; 64 65 /* For most cases (>99.9%), the page address is the same. */ 66 MapCacheEntry *last_entry; 67 unsigned long max_mcache_size; 68 unsigned int mcache_bucket_shift; 69 70 phys_offset_to_gaddr_t phys_offset_to_gaddr; 71 QemuMutex lock; 72 void *opaque; 73 } MapCache; 74 75 static MapCache *mapcache; 76 77 static inline void mapcache_lock(void) 78 { 79 qemu_mutex_lock(&mapcache->lock); 80 } 81 82 static inline void mapcache_unlock(void) 83 { 84 qemu_mutex_unlock(&mapcache->lock); 85 } 86 87 static inline int test_bits(int nr, int size, const unsigned long *addr) 88 { 89 unsigned long res = find_next_zero_bit(addr, size + nr, nr); 90 if (res >= nr + size) 91 return 1; 92 else 93 return 0; 94 } 95 96 void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque) 97 { 98 unsigned long size; 99 struct rlimit rlimit_as; 100 101 mapcache = g_new0(MapCache, 1); 102 103 mapcache->phys_offset_to_gaddr = f; 104 mapcache->opaque = opaque; 105 qemu_mutex_init(&mapcache->lock); 106 107 QTAILQ_INIT(&mapcache->locked_entries); 108 109 if (geteuid() == 0) { 110 rlimit_as.rlim_cur = RLIM_INFINITY; 111 rlimit_as.rlim_max = RLIM_INFINITY; 112 mapcache->max_mcache_size = MCACHE_MAX_SIZE; 113 } else { 114 getrlimit(RLIMIT_AS, &rlimit_as); 115 rlimit_as.rlim_cur = rlimit_as.rlim_max; 116 117 if (rlimit_as.rlim_max != RLIM_INFINITY) { 118 warn_report("QEMU's maximum size of virtual" 119 " memory is not infinity"); 120 } 121 if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) { 122 mapcache->max_mcache_size = rlimit_as.rlim_max - 123 NON_MCACHE_MEMORY_SIZE; 124 } else { 125 mapcache->max_mcache_size = MCACHE_MAX_SIZE; 126 } 127 } 128 129 setrlimit(RLIMIT_AS, &rlimit_as); 130 131 mapcache->nr_buckets = 132 (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) + 133 (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> 134 (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); 135 136 size = mapcache->nr_buckets * sizeof (MapCacheEntry); 137 size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); 138 trace_xen_map_cache_init(mapcache->nr_buckets, size); 139 mapcache->entry = g_malloc0(size); 140 } 141 142 static void xen_remap_bucket(MapCacheEntry *entry, 143 void *vaddr, 144 hwaddr size, 145 hwaddr address_index, 146 bool dummy) 147 { 148 uint8_t *vaddr_base; 149 xen_pfn_t *pfns; 150 int *err; 151 unsigned int i; 152 hwaddr nb_pfn = size >> XC_PAGE_SHIFT; 153 154 trace_xen_remap_bucket(address_index); 155 156 pfns = g_new0(xen_pfn_t, nb_pfn); 157 err = g_new0(int, nb_pfn); 158 159 if (entry->vaddr_base != NULL) { 160 if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) { 161 ram_block_notify_remove(entry->vaddr_base, entry->size, 162 entry->size); 163 } 164 165 /* 166 * If an entry is being replaced by another mapping and we're using 167 * MAP_FIXED flag for it - there is possibility of a race for vaddr 168 * address with another thread doing an mmap call itself 169 * (see man 2 mmap). To avoid that we skip explicit unmapping here 170 * and allow the kernel to destroy the previous mappings by replacing 171 * them in mmap call later. 172 * 173 * Non-identical replacements are not allowed therefore. 174 */ 175 assert(!vaddr || (entry->vaddr_base == vaddr && entry->size == size)); 176 177 if (!vaddr && munmap(entry->vaddr_base, entry->size) != 0) { 178 perror("unmap fails"); 179 exit(-1); 180 } 181 } 182 g_free(entry->valid_mapping); 183 entry->valid_mapping = NULL; 184 185 for (i = 0; i < nb_pfn; i++) { 186 pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; 187 } 188 189 /* 190 * If the caller has requested the mapping at a specific address use 191 * MAP_FIXED to make sure it's honored. 192 */ 193 if (!dummy) { 194 vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr, 195 PROT_READ | PROT_WRITE, 196 vaddr ? MAP_FIXED : 0, 197 nb_pfn, pfns, err); 198 if (vaddr_base == NULL) { 199 perror("xenforeignmemory_map2"); 200 exit(-1); 201 } 202 } else { 203 /* 204 * We create dummy mappings where we are unable to create a foreign 205 * mapping immediately due to certain circumstances (i.e. on resume now) 206 */ 207 vaddr_base = mmap(vaddr, size, PROT_READ | PROT_WRITE, 208 MAP_ANON | MAP_SHARED | (vaddr ? MAP_FIXED : 0), 209 -1, 0); 210 if (vaddr_base == MAP_FAILED) { 211 perror("mmap"); 212 exit(-1); 213 } 214 } 215 216 if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) { 217 ram_block_notify_add(vaddr_base, size, size); 218 } 219 220 entry->vaddr_base = vaddr_base; 221 entry->paddr_index = address_index; 222 entry->size = size; 223 entry->valid_mapping = g_new0(unsigned long, 224 BITS_TO_LONGS(size >> XC_PAGE_SHIFT)); 225 226 if (dummy) { 227 entry->flags |= XEN_MAPCACHE_ENTRY_DUMMY; 228 } else { 229 entry->flags &= ~(XEN_MAPCACHE_ENTRY_DUMMY); 230 } 231 232 bitmap_zero(entry->valid_mapping, nb_pfn); 233 for (i = 0; i < nb_pfn; i++) { 234 if (!err[i]) { 235 bitmap_set(entry->valid_mapping, i, 1); 236 } 237 } 238 239 g_free(pfns); 240 g_free(err); 241 } 242 243 static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size, 244 uint8_t lock, bool dma) 245 { 246 MapCacheEntry *entry, *pentry = NULL, 247 *free_entry = NULL, *free_pentry = NULL; 248 hwaddr address_index; 249 hwaddr address_offset; 250 hwaddr cache_size = size; 251 hwaddr test_bit_size; 252 bool translated G_GNUC_UNUSED = false; 253 bool dummy = false; 254 255 tryagain: 256 address_index = phys_addr >> MCACHE_BUCKET_SHIFT; 257 address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1); 258 259 trace_xen_map_cache(phys_addr); 260 261 /* test_bit_size is always a multiple of XC_PAGE_SIZE */ 262 if (size) { 263 test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1)); 264 265 if (test_bit_size % XC_PAGE_SIZE) { 266 test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE); 267 } 268 } else { 269 test_bit_size = XC_PAGE_SIZE; 270 } 271 272 if (mapcache->last_entry != NULL && 273 mapcache->last_entry->paddr_index == address_index && 274 !lock && !size && 275 test_bits(address_offset >> XC_PAGE_SHIFT, 276 test_bit_size >> XC_PAGE_SHIFT, 277 mapcache->last_entry->valid_mapping)) { 278 trace_xen_map_cache_return( 279 mapcache->last_entry->vaddr_base + address_offset 280 ); 281 return mapcache->last_entry->vaddr_base + address_offset; 282 } 283 284 /* size is always a multiple of MCACHE_BUCKET_SIZE */ 285 if (size) { 286 cache_size = size + address_offset; 287 if (cache_size % MCACHE_BUCKET_SIZE) { 288 cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE); 289 } 290 } else { 291 cache_size = MCACHE_BUCKET_SIZE; 292 } 293 294 entry = &mapcache->entry[address_index % mapcache->nr_buckets]; 295 296 while (entry && (lock || entry->lock) && entry->vaddr_base && 297 (entry->paddr_index != address_index || entry->size != cache_size || 298 !test_bits(address_offset >> XC_PAGE_SHIFT, 299 test_bit_size >> XC_PAGE_SHIFT, 300 entry->valid_mapping))) { 301 if (!free_entry && !entry->lock) { 302 free_entry = entry; 303 free_pentry = pentry; 304 } 305 pentry = entry; 306 entry = entry->next; 307 } 308 if (!entry && free_entry) { 309 entry = free_entry; 310 pentry = free_pentry; 311 } 312 if (!entry) { 313 entry = g_new0(MapCacheEntry, 1); 314 pentry->next = entry; 315 xen_remap_bucket(entry, NULL, cache_size, address_index, dummy); 316 } else if (!entry->lock) { 317 if (!entry->vaddr_base || entry->paddr_index != address_index || 318 entry->size != cache_size || 319 !test_bits(address_offset >> XC_PAGE_SHIFT, 320 test_bit_size >> XC_PAGE_SHIFT, 321 entry->valid_mapping)) { 322 xen_remap_bucket(entry, NULL, cache_size, address_index, dummy); 323 } 324 } 325 326 if(!test_bits(address_offset >> XC_PAGE_SHIFT, 327 test_bit_size >> XC_PAGE_SHIFT, 328 entry->valid_mapping)) { 329 mapcache->last_entry = NULL; 330 #ifdef XEN_COMPAT_PHYSMAP 331 if (!translated && mapcache->phys_offset_to_gaddr) { 332 phys_addr = mapcache->phys_offset_to_gaddr(phys_addr, size); 333 translated = true; 334 goto tryagain; 335 } 336 #endif 337 if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) { 338 dummy = true; 339 goto tryagain; 340 } 341 trace_xen_map_cache_return(NULL); 342 return NULL; 343 } 344 345 mapcache->last_entry = entry; 346 if (lock) { 347 MapCacheRev *reventry = g_new0(MapCacheRev, 1); 348 entry->lock++; 349 if (entry->lock == 0) { 350 error_report("mapcache entry lock overflow: "HWADDR_FMT_plx" -> %p", 351 entry->paddr_index, entry->vaddr_base); 352 abort(); 353 } 354 reventry->dma = dma; 355 reventry->vaddr_req = mapcache->last_entry->vaddr_base + address_offset; 356 reventry->paddr_index = mapcache->last_entry->paddr_index; 357 reventry->size = entry->size; 358 QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next); 359 } 360 361 trace_xen_map_cache_return( 362 mapcache->last_entry->vaddr_base + address_offset 363 ); 364 return mapcache->last_entry->vaddr_base + address_offset; 365 } 366 367 uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size, 368 uint8_t lock, bool dma) 369 { 370 uint8_t *p; 371 372 mapcache_lock(); 373 p = xen_map_cache_unlocked(phys_addr, size, lock, dma); 374 mapcache_unlock(); 375 return p; 376 } 377 378 ram_addr_t xen_ram_addr_from_mapcache(void *ptr) 379 { 380 MapCacheEntry *entry = NULL; 381 MapCacheRev *reventry; 382 hwaddr paddr_index; 383 hwaddr size; 384 ram_addr_t raddr; 385 int found = 0; 386 387 mapcache_lock(); 388 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 389 if (reventry->vaddr_req == ptr) { 390 paddr_index = reventry->paddr_index; 391 size = reventry->size; 392 found = 1; 393 break; 394 } 395 } 396 if (!found) { 397 trace_xen_ram_addr_from_mapcache_not_found(ptr); 398 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 399 trace_xen_ram_addr_from_mapcache_found(reventry->paddr_index, 400 reventry->vaddr_req); 401 } 402 abort(); 403 return 0; 404 } 405 406 entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; 407 while (entry && (entry->paddr_index != paddr_index || entry->size != size)) { 408 entry = entry->next; 409 } 410 if (!entry) { 411 trace_xen_ram_addr_from_mapcache_not_in_cache(ptr); 412 raddr = 0; 413 } else { 414 raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) + 415 ((unsigned long) ptr - (unsigned long) entry->vaddr_base); 416 } 417 mapcache_unlock(); 418 return raddr; 419 } 420 421 static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer) 422 { 423 MapCacheEntry *entry = NULL, *pentry = NULL; 424 MapCacheRev *reventry; 425 hwaddr paddr_index; 426 hwaddr size; 427 int found = 0; 428 429 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 430 if (reventry->vaddr_req == buffer) { 431 paddr_index = reventry->paddr_index; 432 size = reventry->size; 433 found = 1; 434 break; 435 } 436 } 437 if (!found) { 438 trace_xen_invalidate_map_cache_entry_unlocked_not_found(buffer); 439 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 440 trace_xen_invalidate_map_cache_entry_unlocked_found( 441 reventry->paddr_index, 442 reventry->vaddr_req 443 ); 444 } 445 return; 446 } 447 QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); 448 g_free(reventry); 449 450 if (mapcache->last_entry != NULL && 451 mapcache->last_entry->paddr_index == paddr_index) { 452 mapcache->last_entry = NULL; 453 } 454 455 entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; 456 while (entry && (entry->paddr_index != paddr_index || entry->size != size)) { 457 pentry = entry; 458 entry = entry->next; 459 } 460 if (!entry) { 461 trace_xen_invalidate_map_cache_entry_unlocked_miss(buffer); 462 return; 463 } 464 entry->lock--; 465 if (entry->lock > 0 || pentry == NULL) { 466 return; 467 } 468 469 pentry->next = entry->next; 470 ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size); 471 if (munmap(entry->vaddr_base, entry->size) != 0) { 472 perror("unmap fails"); 473 exit(-1); 474 } 475 g_free(entry->valid_mapping); 476 g_free(entry); 477 } 478 479 typedef struct XenMapCacheData { 480 Coroutine *co; 481 uint8_t *buffer; 482 } XenMapCacheData; 483 484 static void xen_invalidate_map_cache_entry_bh(void *opaque) 485 { 486 XenMapCacheData *data = opaque; 487 488 mapcache_lock(); 489 xen_invalidate_map_cache_entry_unlocked(data->buffer); 490 mapcache_unlock(); 491 492 aio_co_wake(data->co); 493 } 494 495 void coroutine_mixed_fn xen_invalidate_map_cache_entry(uint8_t *buffer) 496 { 497 if (qemu_in_coroutine()) { 498 XenMapCacheData data = { 499 .co = qemu_coroutine_self(), 500 .buffer = buffer, 501 }; 502 aio_bh_schedule_oneshot(qemu_get_current_aio_context(), 503 xen_invalidate_map_cache_entry_bh, &data); 504 qemu_coroutine_yield(); 505 } else { 506 mapcache_lock(); 507 xen_invalidate_map_cache_entry_unlocked(buffer); 508 mapcache_unlock(); 509 } 510 } 511 512 void xen_invalidate_map_cache(void) 513 { 514 unsigned long i; 515 MapCacheRev *reventry; 516 517 /* Flush pending AIO before destroying the mapcache */ 518 bdrv_drain_all(); 519 520 mapcache_lock(); 521 522 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 523 if (!reventry->dma) { 524 continue; 525 } 526 trace_xen_invalidate_map_cache(reventry->paddr_index, 527 reventry->vaddr_req); 528 } 529 530 for (i = 0; i < mapcache->nr_buckets; i++) { 531 MapCacheEntry *entry = &mapcache->entry[i]; 532 533 if (entry->vaddr_base == NULL) { 534 continue; 535 } 536 if (entry->lock > 0) { 537 continue; 538 } 539 540 if (munmap(entry->vaddr_base, entry->size) != 0) { 541 perror("unmap fails"); 542 exit(-1); 543 } 544 545 entry->paddr_index = 0; 546 entry->vaddr_base = NULL; 547 entry->size = 0; 548 g_free(entry->valid_mapping); 549 entry->valid_mapping = NULL; 550 } 551 552 mapcache->last_entry = NULL; 553 554 mapcache_unlock(); 555 } 556 557 static uint8_t *xen_replace_cache_entry_unlocked(hwaddr old_phys_addr, 558 hwaddr new_phys_addr, 559 hwaddr size) 560 { 561 MapCacheEntry *entry; 562 hwaddr address_index, address_offset; 563 hwaddr test_bit_size, cache_size = size; 564 565 address_index = old_phys_addr >> MCACHE_BUCKET_SHIFT; 566 address_offset = old_phys_addr & (MCACHE_BUCKET_SIZE - 1); 567 568 assert(size); 569 /* test_bit_size is always a multiple of XC_PAGE_SIZE */ 570 test_bit_size = size + (old_phys_addr & (XC_PAGE_SIZE - 1)); 571 if (test_bit_size % XC_PAGE_SIZE) { 572 test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE); 573 } 574 cache_size = size + address_offset; 575 if (cache_size % MCACHE_BUCKET_SIZE) { 576 cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE); 577 } 578 579 entry = &mapcache->entry[address_index % mapcache->nr_buckets]; 580 while (entry && !(entry->paddr_index == address_index && 581 entry->size == cache_size)) { 582 entry = entry->next; 583 } 584 if (!entry) { 585 trace_xen_replace_cache_entry_unlocked(old_phys_addr); 586 return NULL; 587 } 588 589 address_index = new_phys_addr >> MCACHE_BUCKET_SHIFT; 590 address_offset = new_phys_addr & (MCACHE_BUCKET_SIZE - 1); 591 592 trace_xen_replace_cache_entry_dummy(old_phys_addr, new_phys_addr); 593 594 xen_remap_bucket(entry, entry->vaddr_base, 595 cache_size, address_index, false); 596 if (!test_bits(address_offset >> XC_PAGE_SHIFT, 597 test_bit_size >> XC_PAGE_SHIFT, 598 entry->valid_mapping)) { 599 trace_xen_replace_cache_entry_unlocked_could_not_update_entry( 600 old_phys_addr 601 ); 602 return NULL; 603 } 604 605 return entry->vaddr_base + address_offset; 606 } 607 608 uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr, 609 hwaddr new_phys_addr, 610 hwaddr size) 611 { 612 uint8_t *p; 613 614 mapcache_lock(); 615 p = xen_replace_cache_entry_unlocked(old_phys_addr, new_phys_addr, size); 616 mapcache_unlock(); 617 return p; 618 } 619