1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/address-spaces.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/cputlb.h" 28 #include "exec/memory-internal.h" 29 #include "exec/ram_addr.h" 30 #include "tcg/tcg.h" 31 #include "qemu/error-report.h" 32 #include "exec/log.h" 33 #include "exec/helper-proto.h" 34 #include "qemu/atomic.h" 35 #include "qemu/atomic128.h" 36 #include "translate-all.h" 37 #include "trace-root.h" 38 #include "trace/mem.h" 39 #ifdef CONFIG_PLUGIN 40 #include "qemu/plugin-memory.h" 41 #endif 42 43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 44 /* #define DEBUG_TLB */ 45 /* #define DEBUG_TLB_LOG */ 46 47 #ifdef DEBUG_TLB 48 # define DEBUG_TLB_GATE 1 49 # ifdef DEBUG_TLB_LOG 50 # define DEBUG_TLB_LOG_GATE 1 51 # else 52 # define DEBUG_TLB_LOG_GATE 0 53 # endif 54 #else 55 # define DEBUG_TLB_GATE 0 56 # define DEBUG_TLB_LOG_GATE 0 57 #endif 58 59 #define tlb_debug(fmt, ...) do { \ 60 if (DEBUG_TLB_LOG_GATE) { \ 61 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 62 ## __VA_ARGS__); \ 63 } else if (DEBUG_TLB_GATE) { \ 64 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 65 } \ 66 } while (0) 67 68 #define assert_cpu_is_self(cpu) do { \ 69 if (DEBUG_TLB_GATE) { \ 70 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 71 } \ 72 } while (0) 73 74 /* run_on_cpu_data.target_ptr should always be big enough for a 75 * target_ulong even on 32 bit builds */ 76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 77 78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 79 */ 80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 82 83 static inline size_t tlb_n_entries(CPUTLBDescFast *fast) 84 { 85 return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1; 86 } 87 88 static inline size_t sizeof_tlb(CPUTLBDescFast *fast) 89 { 90 return fast->mask + (1 << CPU_TLB_ENTRY_BITS); 91 } 92 93 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, 94 size_t max_entries) 95 { 96 desc->window_begin_ns = ns; 97 desc->window_max_entries = max_entries; 98 } 99 100 /** 101 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 102 * @desc: The CPUTLBDesc portion of the TLB 103 * @fast: The CPUTLBDescFast portion of the same TLB 104 * 105 * Called with tlb_lock_held. 106 * 107 * We have two main constraints when resizing a TLB: (1) we only resize it 108 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 109 * the array or unnecessarily flushing it), which means we do not control how 110 * frequently the resizing can occur; (2) we don't have access to the guest's 111 * future scheduling decisions, and therefore have to decide the magnitude of 112 * the resize based on past observations. 113 * 114 * In general, a memory-hungry process can benefit greatly from an appropriately 115 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 116 * we just have to make the TLB as large as possible; while an oversized TLB 117 * results in minimal TLB miss rates, it also takes longer to be flushed 118 * (flushes can be _very_ frequent), and the reduced locality can also hurt 119 * performance. 120 * 121 * To achieve near-optimal performance for all kinds of workloads, we: 122 * 123 * 1. Aggressively increase the size of the TLB when the use rate of the 124 * TLB being flushed is high, since it is likely that in the near future this 125 * memory-hungry process will execute again, and its memory hungriness will 126 * probably be similar. 127 * 128 * 2. Slowly reduce the size of the TLB as the use rate declines over a 129 * reasonably large time window. The rationale is that if in such a time window 130 * we have not observed a high TLB use rate, it is likely that we won't observe 131 * it in the near future. In that case, once a time window expires we downsize 132 * the TLB to match the maximum use rate observed in the window. 133 * 134 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 135 * since in that range performance is likely near-optimal. Recall that the TLB 136 * is direct mapped, so we want the use rate to be low (or at least not too 137 * high), since otherwise we are likely to have a significant amount of 138 * conflict misses. 139 */ 140 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast, 141 int64_t now) 142 { 143 size_t old_size = tlb_n_entries(fast); 144 size_t rate; 145 size_t new_size = old_size; 146 int64_t window_len_ms = 100; 147 int64_t window_len_ns = window_len_ms * 1000 * 1000; 148 bool window_expired = now > desc->window_begin_ns + window_len_ns; 149 150 if (desc->n_used_entries > desc->window_max_entries) { 151 desc->window_max_entries = desc->n_used_entries; 152 } 153 rate = desc->window_max_entries * 100 / old_size; 154 155 if (rate > 70) { 156 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 157 } else if (rate < 30 && window_expired) { 158 size_t ceil = pow2ceil(desc->window_max_entries); 159 size_t expected_rate = desc->window_max_entries * 100 / ceil; 160 161 /* 162 * Avoid undersizing when the max number of entries seen is just below 163 * a pow2. For instance, if max_entries == 1025, the expected use rate 164 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 165 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 166 * later. Thus, make sure that the expected use rate remains below 70%. 167 * (and since we double the size, that means the lowest rate we'd 168 * expect to get is 35%, which is still in the 30-70% range where 169 * we consider that the size is appropriate.) 170 */ 171 if (expected_rate > 70) { 172 ceil *= 2; 173 } 174 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 175 } 176 177 if (new_size == old_size) { 178 if (window_expired) { 179 tlb_window_reset(desc, now, desc->n_used_entries); 180 } 181 return; 182 } 183 184 g_free(fast->table); 185 g_free(desc->iotlb); 186 187 tlb_window_reset(desc, now, 0); 188 /* desc->n_used_entries is cleared by the caller */ 189 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 190 fast->table = g_try_new(CPUTLBEntry, new_size); 191 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 192 193 /* 194 * If the allocations fail, try smaller sizes. We just freed some 195 * memory, so going back to half of new_size has a good chance of working. 196 * Increased memory pressure elsewhere in the system might cause the 197 * allocations to fail though, so we progressively reduce the allocation 198 * size, aborting if we cannot even allocate the smallest TLB we support. 199 */ 200 while (fast->table == NULL || desc->iotlb == NULL) { 201 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 202 error_report("%s: %s", __func__, strerror(errno)); 203 abort(); 204 } 205 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 206 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 207 208 g_free(fast->table); 209 g_free(desc->iotlb); 210 fast->table = g_try_new(CPUTLBEntry, new_size); 211 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 212 } 213 } 214 215 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) 216 { 217 desc->n_used_entries = 0; 218 desc->large_page_addr = -1; 219 desc->large_page_mask = -1; 220 desc->vindex = 0; 221 memset(fast->table, -1, sizeof_tlb(fast)); 222 memset(desc->vtable, -1, sizeof(desc->vtable)); 223 } 224 225 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx, 226 int64_t now) 227 { 228 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; 229 CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx]; 230 231 tlb_mmu_resize_locked(desc, fast, now); 232 tlb_mmu_flush_locked(desc, fast); 233 } 234 235 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now) 236 { 237 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 238 239 tlb_window_reset(desc, now, 0); 240 desc->n_used_entries = 0; 241 fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 242 fast->table = g_new(CPUTLBEntry, n_entries); 243 desc->iotlb = g_new(CPUIOTLBEntry, n_entries); 244 tlb_mmu_flush_locked(desc, fast); 245 } 246 247 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 248 { 249 env_tlb(env)->d[mmu_idx].n_used_entries++; 250 } 251 252 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 253 { 254 env_tlb(env)->d[mmu_idx].n_used_entries--; 255 } 256 257 void tlb_init(CPUState *cpu) 258 { 259 CPUArchState *env = cpu->env_ptr; 260 int64_t now = get_clock_realtime(); 261 int i; 262 263 qemu_spin_init(&env_tlb(env)->c.lock); 264 265 /* All tlbs are initialized flushed. */ 266 env_tlb(env)->c.dirty = 0; 267 268 for (i = 0; i < NB_MMU_MODES; i++) { 269 tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now); 270 } 271 } 272 273 /* flush_all_helper: run fn across all cpus 274 * 275 * If the wait flag is set then the src cpu's helper will be queued as 276 * "safe" work and the loop exited creating a synchronisation point 277 * where all queued work will be finished before execution starts 278 * again. 279 */ 280 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 281 run_on_cpu_data d) 282 { 283 CPUState *cpu; 284 285 CPU_FOREACH(cpu) { 286 if (cpu != src) { 287 async_run_on_cpu(cpu, fn, d); 288 } 289 } 290 } 291 292 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 293 { 294 CPUState *cpu; 295 size_t full = 0, part = 0, elide = 0; 296 297 CPU_FOREACH(cpu) { 298 CPUArchState *env = cpu->env_ptr; 299 300 full += atomic_read(&env_tlb(env)->c.full_flush_count); 301 part += atomic_read(&env_tlb(env)->c.part_flush_count); 302 elide += atomic_read(&env_tlb(env)->c.elide_flush_count); 303 } 304 *pfull = full; 305 *ppart = part; 306 *pelide = elide; 307 } 308 309 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 310 { 311 CPUArchState *env = cpu->env_ptr; 312 uint16_t asked = data.host_int; 313 uint16_t all_dirty, work, to_clean; 314 int64_t now = get_clock_realtime(); 315 316 assert_cpu_is_self(cpu); 317 318 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 319 320 qemu_spin_lock(&env_tlb(env)->c.lock); 321 322 all_dirty = env_tlb(env)->c.dirty; 323 to_clean = asked & all_dirty; 324 all_dirty &= ~to_clean; 325 env_tlb(env)->c.dirty = all_dirty; 326 327 for (work = to_clean; work != 0; work &= work - 1) { 328 int mmu_idx = ctz32(work); 329 tlb_flush_one_mmuidx_locked(env, mmu_idx, now); 330 } 331 332 qemu_spin_unlock(&env_tlb(env)->c.lock); 333 334 cpu_tb_jmp_cache_clear(cpu); 335 336 if (to_clean == ALL_MMUIDX_BITS) { 337 atomic_set(&env_tlb(env)->c.full_flush_count, 338 env_tlb(env)->c.full_flush_count + 1); 339 } else { 340 atomic_set(&env_tlb(env)->c.part_flush_count, 341 env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); 342 if (to_clean != asked) { 343 atomic_set(&env_tlb(env)->c.elide_flush_count, 344 env_tlb(env)->c.elide_flush_count + 345 ctpop16(asked & ~to_clean)); 346 } 347 } 348 } 349 350 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 351 { 352 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 353 354 if (cpu->created && !qemu_cpu_is_self(cpu)) { 355 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 356 RUN_ON_CPU_HOST_INT(idxmap)); 357 } else { 358 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 359 } 360 } 361 362 void tlb_flush(CPUState *cpu) 363 { 364 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 365 } 366 367 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 368 { 369 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 370 371 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 372 373 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 374 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 375 } 376 377 void tlb_flush_all_cpus(CPUState *src_cpu) 378 { 379 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 380 } 381 382 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 383 { 384 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 385 386 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 387 388 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 389 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 390 } 391 392 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 393 { 394 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 395 } 396 397 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 398 target_ulong page) 399 { 400 return tlb_hit_page(tlb_entry->addr_read, page) || 401 tlb_hit_page(tlb_addr_write(tlb_entry), page) || 402 tlb_hit_page(tlb_entry->addr_code, page); 403 } 404 405 /** 406 * tlb_entry_is_empty - return true if the entry is not in use 407 * @te: pointer to CPUTLBEntry 408 */ 409 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 410 { 411 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 412 } 413 414 /* Called with tlb_c.lock held */ 415 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 416 target_ulong page) 417 { 418 if (tlb_hit_page_anyprot(tlb_entry, page)) { 419 memset(tlb_entry, -1, sizeof(*tlb_entry)); 420 return true; 421 } 422 return false; 423 } 424 425 /* Called with tlb_c.lock held */ 426 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 427 target_ulong page) 428 { 429 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; 430 int k; 431 432 assert_cpu_is_self(env_cpu(env)); 433 for (k = 0; k < CPU_VTLB_SIZE; k++) { 434 if (tlb_flush_entry_locked(&d->vtable[k], page)) { 435 tlb_n_used_entries_dec(env, mmu_idx); 436 } 437 } 438 } 439 440 static void tlb_flush_page_locked(CPUArchState *env, int midx, 441 target_ulong page) 442 { 443 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; 444 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; 445 446 /* Check if we need to flush due to large pages. */ 447 if ((page & lp_mask) == lp_addr) { 448 tlb_debug("forcing full flush midx %d (" 449 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 450 midx, lp_addr, lp_mask); 451 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); 452 } else { 453 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 454 tlb_n_used_entries_dec(env, midx); 455 } 456 tlb_flush_vtlb_page_locked(env, midx, page); 457 } 458 } 459 460 /** 461 * tlb_flush_page_by_mmuidx_async_0: 462 * @cpu: cpu on which to flush 463 * @addr: page of virtual address to flush 464 * @idxmap: set of mmu_idx to flush 465 * 466 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page 467 * at @addr from the tlbs indicated by @idxmap from @cpu. 468 */ 469 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, 470 target_ulong addr, 471 uint16_t idxmap) 472 { 473 CPUArchState *env = cpu->env_ptr; 474 int mmu_idx; 475 476 assert_cpu_is_self(cpu); 477 478 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); 479 480 qemu_spin_lock(&env_tlb(env)->c.lock); 481 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 482 if ((idxmap >> mmu_idx) & 1) { 483 tlb_flush_page_locked(env, mmu_idx, addr); 484 } 485 } 486 qemu_spin_unlock(&env_tlb(env)->c.lock); 487 488 tb_flush_jmp_cache(cpu, addr); 489 } 490 491 /** 492 * tlb_flush_page_by_mmuidx_async_1: 493 * @cpu: cpu on which to flush 494 * @data: encoded addr + idxmap 495 * 496 * Helper for tlb_flush_page_by_mmuidx and friends, called through 497 * async_run_on_cpu. The idxmap parameter is encoded in the page 498 * offset of the target_ptr field. This limits the set of mmu_idx 499 * that can be passed via this method. 500 */ 501 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, 502 run_on_cpu_data data) 503 { 504 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; 505 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; 506 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; 507 508 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 509 } 510 511 typedef struct { 512 target_ulong addr; 513 uint16_t idxmap; 514 } TLBFlushPageByMMUIdxData; 515 516 /** 517 * tlb_flush_page_by_mmuidx_async_2: 518 * @cpu: cpu on which to flush 519 * @data: allocated addr + idxmap 520 * 521 * Helper for tlb_flush_page_by_mmuidx and friends, called through 522 * async_run_on_cpu. The addr+idxmap parameters are stored in a 523 * TLBFlushPageByMMUIdxData structure that has been allocated 524 * specifically for this helper. Free the structure when done. 525 */ 526 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, 527 run_on_cpu_data data) 528 { 529 TLBFlushPageByMMUIdxData *d = data.host_ptr; 530 531 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap); 532 g_free(d); 533 } 534 535 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 536 { 537 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 538 539 /* This should already be page aligned */ 540 addr &= TARGET_PAGE_MASK; 541 542 if (qemu_cpu_is_self(cpu)) { 543 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 544 } else if (idxmap < TARGET_PAGE_SIZE) { 545 /* 546 * Most targets have only a few mmu_idx. In the case where 547 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid 548 * allocating memory for this operation. 549 */ 550 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1, 551 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 552 } else { 553 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1); 554 555 /* Otherwise allocate a structure, freed by the worker. */ 556 d->addr = addr; 557 d->idxmap = idxmap; 558 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2, 559 RUN_ON_CPU_HOST_PTR(d)); 560 } 561 } 562 563 void tlb_flush_page(CPUState *cpu, target_ulong addr) 564 { 565 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 566 } 567 568 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 569 uint16_t idxmap) 570 { 571 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 572 573 /* This should already be page aligned */ 574 addr &= TARGET_PAGE_MASK; 575 576 /* 577 * Allocate memory to hold addr+idxmap only when needed. 578 * See tlb_flush_page_by_mmuidx for details. 579 */ 580 if (idxmap < TARGET_PAGE_SIZE) { 581 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 582 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 583 } else { 584 CPUState *dst_cpu; 585 586 /* Allocate a separate data block for each destination cpu. */ 587 CPU_FOREACH(dst_cpu) { 588 if (dst_cpu != src_cpu) { 589 TLBFlushPageByMMUIdxData *d 590 = g_new(TLBFlushPageByMMUIdxData, 1); 591 592 d->addr = addr; 593 d->idxmap = idxmap; 594 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 595 RUN_ON_CPU_HOST_PTR(d)); 596 } 597 } 598 } 599 600 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); 601 } 602 603 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 604 { 605 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 606 } 607 608 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 609 target_ulong addr, 610 uint16_t idxmap) 611 { 612 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 613 614 /* This should already be page aligned */ 615 addr &= TARGET_PAGE_MASK; 616 617 /* 618 * Allocate memory to hold addr+idxmap only when needed. 619 * See tlb_flush_page_by_mmuidx for details. 620 */ 621 if (idxmap < TARGET_PAGE_SIZE) { 622 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 623 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 624 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1, 625 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 626 } else { 627 CPUState *dst_cpu; 628 TLBFlushPageByMMUIdxData *d; 629 630 /* Allocate a separate data block for each destination cpu. */ 631 CPU_FOREACH(dst_cpu) { 632 if (dst_cpu != src_cpu) { 633 d = g_new(TLBFlushPageByMMUIdxData, 1); 634 d->addr = addr; 635 d->idxmap = idxmap; 636 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 637 RUN_ON_CPU_HOST_PTR(d)); 638 } 639 } 640 641 d = g_new(TLBFlushPageByMMUIdxData, 1); 642 d->addr = addr; 643 d->idxmap = idxmap; 644 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2, 645 RUN_ON_CPU_HOST_PTR(d)); 646 } 647 } 648 649 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 650 { 651 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 652 } 653 654 /* update the TLBs so that writes to code in the virtual page 'addr' 655 can be detected */ 656 void tlb_protect_code(ram_addr_t ram_addr) 657 { 658 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 659 DIRTY_MEMORY_CODE); 660 } 661 662 /* update the TLB so that writes in physical page 'phys_addr' are no longer 663 tested for self modifying code */ 664 void tlb_unprotect_code(ram_addr_t ram_addr) 665 { 666 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 667 } 668 669 670 /* 671 * Dirty write flag handling 672 * 673 * When the TCG code writes to a location it looks up the address in 674 * the TLB and uses that data to compute the final address. If any of 675 * the lower bits of the address are set then the slow path is forced. 676 * There are a number of reasons to do this but for normal RAM the 677 * most usual is detecting writes to code regions which may invalidate 678 * generated code. 679 * 680 * Other vCPUs might be reading their TLBs during guest execution, so we update 681 * te->addr_write with atomic_set. We don't need to worry about this for 682 * oversized guests as MTTCG is disabled for them. 683 * 684 * Called with tlb_c.lock held. 685 */ 686 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 687 uintptr_t start, uintptr_t length) 688 { 689 uintptr_t addr = tlb_entry->addr_write; 690 691 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | 692 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { 693 addr &= TARGET_PAGE_MASK; 694 addr += tlb_entry->addend; 695 if ((addr - start) < length) { 696 #if TCG_OVERSIZED_GUEST 697 tlb_entry->addr_write |= TLB_NOTDIRTY; 698 #else 699 atomic_set(&tlb_entry->addr_write, 700 tlb_entry->addr_write | TLB_NOTDIRTY); 701 #endif 702 } 703 } 704 } 705 706 /* 707 * Called with tlb_c.lock held. 708 * Called only from the vCPU context, i.e. the TLB's owner thread. 709 */ 710 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 711 { 712 *d = *s; 713 } 714 715 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 716 * the target vCPU). 717 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 718 * thing actually updated is the target TLB entry ->addr_write flags. 719 */ 720 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 721 { 722 CPUArchState *env; 723 724 int mmu_idx; 725 726 env = cpu->env_ptr; 727 qemu_spin_lock(&env_tlb(env)->c.lock); 728 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 729 unsigned int i; 730 unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); 731 732 for (i = 0; i < n; i++) { 733 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], 734 start1, length); 735 } 736 737 for (i = 0; i < CPU_VTLB_SIZE; i++) { 738 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], 739 start1, length); 740 } 741 } 742 qemu_spin_unlock(&env_tlb(env)->c.lock); 743 } 744 745 /* Called with tlb_c.lock held */ 746 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 747 target_ulong vaddr) 748 { 749 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 750 tlb_entry->addr_write = vaddr; 751 } 752 } 753 754 /* update the TLB corresponding to virtual page vaddr 755 so that it is no longer dirty */ 756 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 757 { 758 CPUArchState *env = cpu->env_ptr; 759 int mmu_idx; 760 761 assert_cpu_is_self(cpu); 762 763 vaddr &= TARGET_PAGE_MASK; 764 qemu_spin_lock(&env_tlb(env)->c.lock); 765 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 766 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 767 } 768 769 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 770 int k; 771 for (k = 0; k < CPU_VTLB_SIZE; k++) { 772 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); 773 } 774 } 775 qemu_spin_unlock(&env_tlb(env)->c.lock); 776 } 777 778 /* Our TLB does not support large pages, so remember the area covered by 779 large pages and trigger a full TLB flush if these are invalidated. */ 780 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 781 target_ulong vaddr, target_ulong size) 782 { 783 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; 784 target_ulong lp_mask = ~(size - 1); 785 786 if (lp_addr == (target_ulong)-1) { 787 /* No previous large page. */ 788 lp_addr = vaddr; 789 } else { 790 /* Extend the existing region to include the new page. 791 This is a compromise between unnecessary flushes and 792 the cost of maintaining a full variable size TLB. */ 793 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; 794 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 795 lp_mask <<= 1; 796 } 797 } 798 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; 799 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; 800 } 801 802 /* Add a new TLB entry. At most one entry for a given virtual address 803 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 804 * supplied size is only used by tlb_flush_page. 805 * 806 * Called from TCG-generated code, which is under an RCU read-side 807 * critical section. 808 */ 809 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 810 hwaddr paddr, MemTxAttrs attrs, int prot, 811 int mmu_idx, target_ulong size) 812 { 813 CPUArchState *env = cpu->env_ptr; 814 CPUTLB *tlb = env_tlb(env); 815 CPUTLBDesc *desc = &tlb->d[mmu_idx]; 816 MemoryRegionSection *section; 817 unsigned int index; 818 target_ulong address; 819 target_ulong write_address; 820 uintptr_t addend; 821 CPUTLBEntry *te, tn; 822 hwaddr iotlb, xlat, sz, paddr_page; 823 target_ulong vaddr_page; 824 int asidx = cpu_asidx_from_attrs(cpu, attrs); 825 int wp_flags; 826 bool is_ram, is_romd; 827 828 assert_cpu_is_self(cpu); 829 830 if (size <= TARGET_PAGE_SIZE) { 831 sz = TARGET_PAGE_SIZE; 832 } else { 833 tlb_add_large_page(env, mmu_idx, vaddr, size); 834 sz = size; 835 } 836 vaddr_page = vaddr & TARGET_PAGE_MASK; 837 paddr_page = paddr & TARGET_PAGE_MASK; 838 839 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 840 &xlat, &sz, attrs, &prot); 841 assert(sz >= TARGET_PAGE_SIZE); 842 843 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 844 " prot=%x idx=%d\n", 845 vaddr, paddr, prot, mmu_idx); 846 847 address = vaddr_page; 848 if (size < TARGET_PAGE_SIZE) { 849 /* Repeat the MMU check and TLB fill on every access. */ 850 address |= TLB_INVALID_MASK; 851 } 852 if (attrs.byte_swap) { 853 address |= TLB_BSWAP; 854 } 855 856 is_ram = memory_region_is_ram(section->mr); 857 is_romd = memory_region_is_romd(section->mr); 858 859 if (is_ram || is_romd) { 860 /* RAM and ROMD both have associated host memory. */ 861 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 862 } else { 863 /* I/O does not; force the host address to NULL. */ 864 addend = 0; 865 } 866 867 write_address = address; 868 if (is_ram) { 869 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 870 /* 871 * Computing is_clean is expensive; avoid all that unless 872 * the page is actually writable. 873 */ 874 if (prot & PAGE_WRITE) { 875 if (section->readonly) { 876 write_address |= TLB_DISCARD_WRITE; 877 } else if (cpu_physical_memory_is_clean(iotlb)) { 878 write_address |= TLB_NOTDIRTY; 879 } 880 } 881 } else { 882 /* I/O or ROMD */ 883 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; 884 /* 885 * Writes to romd devices must go through MMIO to enable write. 886 * Reads to romd devices go through the ram_ptr found above, 887 * but of course reads to I/O must go through MMIO. 888 */ 889 write_address |= TLB_MMIO; 890 if (!is_romd) { 891 address = write_address; 892 } 893 } 894 895 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, 896 TARGET_PAGE_SIZE); 897 898 index = tlb_index(env, mmu_idx, vaddr_page); 899 te = tlb_entry(env, mmu_idx, vaddr_page); 900 901 /* 902 * Hold the TLB lock for the rest of the function. We could acquire/release 903 * the lock several times in the function, but it is faster to amortize the 904 * acquisition cost by acquiring it just once. Note that this leads to 905 * a longer critical section, but this is not a concern since the TLB lock 906 * is unlikely to be contended. 907 */ 908 qemu_spin_lock(&tlb->c.lock); 909 910 /* Note that the tlb is no longer clean. */ 911 tlb->c.dirty |= 1 << mmu_idx; 912 913 /* Make sure there's no cached translation for the new page. */ 914 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 915 916 /* 917 * Only evict the old entry to the victim tlb if it's for a 918 * different page; otherwise just overwrite the stale data. 919 */ 920 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 921 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; 922 CPUTLBEntry *tv = &desc->vtable[vidx]; 923 924 /* Evict the old entry into the victim tlb. */ 925 copy_tlb_helper_locked(tv, te); 926 desc->viotlb[vidx] = desc->iotlb[index]; 927 tlb_n_used_entries_dec(env, mmu_idx); 928 } 929 930 /* refill the tlb */ 931 /* 932 * At this point iotlb contains a physical section number in the lower 933 * TARGET_PAGE_BITS, and either 934 * + the ram_addr_t of the page base of the target RAM (RAM) 935 * + the offset within section->mr of the page base (I/O, ROMD) 936 * We subtract the vaddr_page (which is page aligned and thus won't 937 * disturb the low bits) to give an offset which can be added to the 938 * (non-page-aligned) vaddr of the eventual memory access to get 939 * the MemoryRegion offset for the access. Note that the vaddr we 940 * subtract here is that of the page base, and not the same as the 941 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 942 */ 943 desc->iotlb[index].addr = iotlb - vaddr_page; 944 desc->iotlb[index].attrs = attrs; 945 946 /* Now calculate the new entry */ 947 tn.addend = addend - vaddr_page; 948 if (prot & PAGE_READ) { 949 tn.addr_read = address; 950 if (wp_flags & BP_MEM_READ) { 951 tn.addr_read |= TLB_WATCHPOINT; 952 } 953 } else { 954 tn.addr_read = -1; 955 } 956 957 if (prot & PAGE_EXEC) { 958 tn.addr_code = address; 959 } else { 960 tn.addr_code = -1; 961 } 962 963 tn.addr_write = -1; 964 if (prot & PAGE_WRITE) { 965 tn.addr_write = write_address; 966 if (prot & PAGE_WRITE_INV) { 967 tn.addr_write |= TLB_INVALID_MASK; 968 } 969 if (wp_flags & BP_MEM_WRITE) { 970 tn.addr_write |= TLB_WATCHPOINT; 971 } 972 } 973 974 copy_tlb_helper_locked(te, &tn); 975 tlb_n_used_entries_inc(env, mmu_idx); 976 qemu_spin_unlock(&tlb->c.lock); 977 } 978 979 /* Add a new TLB entry, but without specifying the memory 980 * transaction attributes to be used. 981 */ 982 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 983 hwaddr paddr, int prot, 984 int mmu_idx, target_ulong size) 985 { 986 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 987 prot, mmu_idx, size); 988 } 989 990 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 991 { 992 ram_addr_t ram_addr; 993 994 ram_addr = qemu_ram_addr_from_host(ptr); 995 if (ram_addr == RAM_ADDR_INVALID) { 996 error_report("Bad ram pointer %p", ptr); 997 abort(); 998 } 999 return ram_addr; 1000 } 1001 1002 /* 1003 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the 1004 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must 1005 * be discarded and looked up again (e.g. via tlb_entry()). 1006 */ 1007 static void tlb_fill(CPUState *cpu, target_ulong addr, int size, 1008 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1009 { 1010 CPUClass *cc = CPU_GET_CLASS(cpu); 1011 bool ok; 1012 1013 /* 1014 * This is not a probe, so only valid return is success; failure 1015 * should result in exception + longjmp to the cpu loop. 1016 */ 1017 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr); 1018 assert(ok); 1019 } 1020 1021 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1022 int mmu_idx, target_ulong addr, uintptr_t retaddr, 1023 MMUAccessType access_type, MemOp op) 1024 { 1025 CPUState *cpu = env_cpu(env); 1026 hwaddr mr_offset; 1027 MemoryRegionSection *section; 1028 MemoryRegion *mr; 1029 uint64_t val; 1030 bool locked = false; 1031 MemTxResult r; 1032 1033 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1034 mr = section->mr; 1035 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1036 cpu->mem_io_pc = retaddr; 1037 if (!cpu->can_do_io) { 1038 cpu_io_recompile(cpu, retaddr); 1039 } 1040 1041 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 1042 qemu_mutex_lock_iothread(); 1043 locked = true; 1044 } 1045 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); 1046 if (r != MEMTX_OK) { 1047 hwaddr physaddr = mr_offset + 1048 section->offset_within_address_space - 1049 section->offset_within_region; 1050 1051 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, 1052 mmu_idx, iotlbentry->attrs, r, retaddr); 1053 } 1054 if (locked) { 1055 qemu_mutex_unlock_iothread(); 1056 } 1057 1058 return val; 1059 } 1060 1061 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1062 int mmu_idx, uint64_t val, target_ulong addr, 1063 uintptr_t retaddr, MemOp op) 1064 { 1065 CPUState *cpu = env_cpu(env); 1066 hwaddr mr_offset; 1067 MemoryRegionSection *section; 1068 MemoryRegion *mr; 1069 bool locked = false; 1070 MemTxResult r; 1071 1072 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1073 mr = section->mr; 1074 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1075 if (!cpu->can_do_io) { 1076 cpu_io_recompile(cpu, retaddr); 1077 } 1078 cpu->mem_io_pc = retaddr; 1079 1080 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 1081 qemu_mutex_lock_iothread(); 1082 locked = true; 1083 } 1084 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); 1085 if (r != MEMTX_OK) { 1086 hwaddr physaddr = mr_offset + 1087 section->offset_within_address_space - 1088 section->offset_within_region; 1089 1090 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), 1091 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, 1092 retaddr); 1093 } 1094 if (locked) { 1095 qemu_mutex_unlock_iothread(); 1096 } 1097 } 1098 1099 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) 1100 { 1101 #if TCG_OVERSIZED_GUEST 1102 return *(target_ulong *)((uintptr_t)entry + ofs); 1103 #else 1104 /* ofs might correspond to .addr_write, so use atomic_read */ 1105 return atomic_read((target_ulong *)((uintptr_t)entry + ofs)); 1106 #endif 1107 } 1108 1109 /* Return true if ADDR is present in the victim tlb, and has been copied 1110 back to the main tlb. */ 1111 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1112 size_t elt_ofs, target_ulong page) 1113 { 1114 size_t vidx; 1115 1116 assert_cpu_is_self(env_cpu(env)); 1117 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1118 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; 1119 target_ulong cmp; 1120 1121 /* elt_ofs might correspond to .addr_write, so use atomic_read */ 1122 #if TCG_OVERSIZED_GUEST 1123 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1124 #else 1125 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1126 #endif 1127 1128 if (cmp == page) { 1129 /* Found entry in victim tlb, swap tlb and iotlb. */ 1130 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index]; 1131 1132 qemu_spin_lock(&env_tlb(env)->c.lock); 1133 copy_tlb_helper_locked(&tmptlb, tlb); 1134 copy_tlb_helper_locked(tlb, vtlb); 1135 copy_tlb_helper_locked(vtlb, &tmptlb); 1136 qemu_spin_unlock(&env_tlb(env)->c.lock); 1137 1138 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1139 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; 1140 tmpio = *io; *io = *vio; *vio = tmpio; 1141 return true; 1142 } 1143 } 1144 return false; 1145 } 1146 1147 /* Macro to call the above, with local variables from the use context. */ 1148 #define VICTIM_TLB_HIT(TY, ADDR) \ 1149 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1150 (ADDR) & TARGET_PAGE_MASK) 1151 1152 /* 1153 * Return a ram_addr_t for the virtual address for execution. 1154 * 1155 * Return -1 if we can't translate and execute from an entire page 1156 * of RAM. This will force us to execute by loading and translating 1157 * one insn at a time, without caching. 1158 * 1159 * NOTE: This function will trigger an exception if the page is 1160 * not executable. 1161 */ 1162 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, 1163 void **hostp) 1164 { 1165 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1166 uintptr_t index = tlb_index(env, mmu_idx, addr); 1167 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1168 void *p; 1169 1170 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1171 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1172 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1173 index = tlb_index(env, mmu_idx, addr); 1174 entry = tlb_entry(env, mmu_idx, addr); 1175 1176 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { 1177 /* 1178 * The MMU protection covers a smaller range than a target 1179 * page, so we must redo the MMU check for every insn. 1180 */ 1181 return -1; 1182 } 1183 } 1184 assert(tlb_hit(entry->addr_code, addr)); 1185 } 1186 1187 if (unlikely(entry->addr_code & TLB_MMIO)) { 1188 /* The region is not backed by RAM. */ 1189 if (hostp) { 1190 *hostp = NULL; 1191 } 1192 return -1; 1193 } 1194 1195 p = (void *)((uintptr_t)addr + entry->addend); 1196 if (hostp) { 1197 *hostp = p; 1198 } 1199 return qemu_ram_addr_from_host_nofail(p); 1200 } 1201 1202 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1203 { 1204 return get_page_addr_code_hostp(env, addr, NULL); 1205 } 1206 1207 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, 1208 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) 1209 { 1210 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; 1211 1212 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); 1213 1214 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 1215 struct page_collection *pages 1216 = page_collection_lock(ram_addr, ram_addr + size); 1217 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); 1218 page_collection_unlock(pages); 1219 } 1220 1221 /* 1222 * Set both VGA and migration bits for simplicity and to remove 1223 * the notdirty callback faster. 1224 */ 1225 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); 1226 1227 /* We remove the notdirty callback only if the code has been flushed. */ 1228 if (!cpu_physical_memory_is_clean(ram_addr)) { 1229 trace_memory_notdirty_set_dirty(mem_vaddr); 1230 tlb_set_dirty(cpu, mem_vaddr); 1231 } 1232 } 1233 1234 /* 1235 * Probe for whether the specified guest access is permitted. If it is not 1236 * permitted then an exception will be taken in the same way as if this 1237 * were a real access (and we will not return). 1238 * If the size is 0 or the page requires I/O access, returns NULL; otherwise, 1239 * returns the address of the host page similar to tlb_vaddr_to_host(). 1240 */ 1241 void *probe_access(CPUArchState *env, target_ulong addr, int size, 1242 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1243 { 1244 uintptr_t index = tlb_index(env, mmu_idx, addr); 1245 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1246 target_ulong tlb_addr; 1247 size_t elt_ofs; 1248 int wp_access; 1249 1250 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 1251 1252 switch (access_type) { 1253 case MMU_DATA_LOAD: 1254 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1255 wp_access = BP_MEM_READ; 1256 break; 1257 case MMU_DATA_STORE: 1258 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1259 wp_access = BP_MEM_WRITE; 1260 break; 1261 case MMU_INST_FETCH: 1262 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1263 wp_access = BP_MEM_READ; 1264 break; 1265 default: 1266 g_assert_not_reached(); 1267 } 1268 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1269 1270 if (unlikely(!tlb_hit(tlb_addr, addr))) { 1271 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, 1272 addr & TARGET_PAGE_MASK)) { 1273 tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr); 1274 /* TLB resize via tlb_fill may have moved the entry. */ 1275 index = tlb_index(env, mmu_idx, addr); 1276 entry = tlb_entry(env, mmu_idx, addr); 1277 } 1278 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1279 } 1280 1281 if (!size) { 1282 return NULL; 1283 } 1284 1285 if (unlikely(tlb_addr & TLB_FLAGS_MASK)) { 1286 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1287 1288 /* Reject I/O access, or other required slow-path. */ 1289 if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { 1290 return NULL; 1291 } 1292 1293 /* Handle watchpoints. */ 1294 if (tlb_addr & TLB_WATCHPOINT) { 1295 cpu_check_watchpoint(env_cpu(env), addr, size, 1296 iotlbentry->attrs, wp_access, retaddr); 1297 } 1298 1299 /* Handle clean RAM pages. */ 1300 if (tlb_addr & TLB_NOTDIRTY) { 1301 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1302 } 1303 } 1304 1305 return (void *)((uintptr_t)addr + entry->addend); 1306 } 1307 1308 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, 1309 MMUAccessType access_type, int mmu_idx) 1310 { 1311 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1312 target_ulong tlb_addr, page; 1313 size_t elt_ofs; 1314 1315 switch (access_type) { 1316 case MMU_DATA_LOAD: 1317 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1318 break; 1319 case MMU_DATA_STORE: 1320 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1321 break; 1322 case MMU_INST_FETCH: 1323 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1324 break; 1325 default: 1326 g_assert_not_reached(); 1327 } 1328 1329 page = addr & TARGET_PAGE_MASK; 1330 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1331 1332 if (!tlb_hit_page(tlb_addr, page)) { 1333 uintptr_t index = tlb_index(env, mmu_idx, addr); 1334 1335 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) { 1336 CPUState *cs = env_cpu(env); 1337 CPUClass *cc = CPU_GET_CLASS(cs); 1338 1339 if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) { 1340 /* Non-faulting page table read failed. */ 1341 return NULL; 1342 } 1343 1344 /* TLB resize via tlb_fill may have moved the entry. */ 1345 entry = tlb_entry(env, mmu_idx, addr); 1346 } 1347 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1348 } 1349 1350 if (tlb_addr & ~TARGET_PAGE_MASK) { 1351 /* IO access */ 1352 return NULL; 1353 } 1354 1355 return (void *)((uintptr_t)addr + entry->addend); 1356 } 1357 1358 1359 #ifdef CONFIG_PLUGIN 1360 /* 1361 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. 1362 * This should be a hot path as we will have just looked this path up 1363 * in the softmmu lookup code (or helper). We don't handle re-fills or 1364 * checking the victim table. This is purely informational. 1365 * 1366 * This should never fail as the memory access being instrumented 1367 * should have just filled the TLB. 1368 */ 1369 1370 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, 1371 bool is_store, struct qemu_plugin_hwaddr *data) 1372 { 1373 CPUArchState *env = cpu->env_ptr; 1374 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1375 uintptr_t index = tlb_index(env, mmu_idx, addr); 1376 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; 1377 1378 if (likely(tlb_hit(tlb_addr, addr))) { 1379 /* We must have an iotlb entry for MMIO */ 1380 if (tlb_addr & TLB_MMIO) { 1381 CPUIOTLBEntry *iotlbentry; 1382 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1383 data->is_io = true; 1384 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1385 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1386 } else { 1387 data->is_io = false; 1388 data->v.ram.hostaddr = addr + tlbe->addend; 1389 } 1390 return true; 1391 } 1392 return false; 1393 } 1394 1395 #endif 1396 1397 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1398 * operations, or io operations to proceed. Return the host address. */ 1399 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1400 TCGMemOpIdx oi, uintptr_t retaddr) 1401 { 1402 size_t mmu_idx = get_mmuidx(oi); 1403 uintptr_t index = tlb_index(env, mmu_idx, addr); 1404 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1405 target_ulong tlb_addr = tlb_addr_write(tlbe); 1406 MemOp mop = get_memop(oi); 1407 int a_bits = get_alignment_bits(mop); 1408 int s_bits = mop & MO_SIZE; 1409 void *hostaddr; 1410 1411 /* Adjust the given return address. */ 1412 retaddr -= GETPC_ADJ; 1413 1414 /* Enforce guest required alignment. */ 1415 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1416 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1417 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1418 mmu_idx, retaddr); 1419 } 1420 1421 /* Enforce qemu required alignment. */ 1422 if (unlikely(addr & ((1 << s_bits) - 1))) { 1423 /* We get here if guest alignment was not requested, 1424 or was not enforced by cpu_unaligned_access above. 1425 We might widen the access and emulate, but for now 1426 mark an exception and exit the cpu loop. */ 1427 goto stop_the_world; 1428 } 1429 1430 /* Check TLB entry and enforce page permissions. */ 1431 if (!tlb_hit(tlb_addr, addr)) { 1432 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1433 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, 1434 mmu_idx, retaddr); 1435 index = tlb_index(env, mmu_idx, addr); 1436 tlbe = tlb_entry(env, mmu_idx, addr); 1437 } 1438 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1439 } 1440 1441 /* Notice an IO access or a needs-MMU-lookup access */ 1442 if (unlikely(tlb_addr & TLB_MMIO)) { 1443 /* There's really nothing that can be done to 1444 support this apart from stop-the-world. */ 1445 goto stop_the_world; 1446 } 1447 1448 /* Let the guest notice RMW on a write-only page. */ 1449 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1450 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1451 mmu_idx, retaddr); 1452 /* Since we don't support reads and writes to different addresses, 1453 and we do have the proper page loaded for write, this shouldn't 1454 ever return. But just in case, handle via stop-the-world. */ 1455 goto stop_the_world; 1456 } 1457 1458 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1459 1460 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1461 notdirty_write(env_cpu(env), addr, 1 << s_bits, 1462 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); 1463 } 1464 1465 return hostaddr; 1466 1467 stop_the_world: 1468 cpu_loop_exit_atomic(env_cpu(env), retaddr); 1469 } 1470 1471 /* 1472 * Load Helpers 1473 * 1474 * We support two different access types. SOFTMMU_CODE_ACCESS is 1475 * specifically for reading instructions from system memory. It is 1476 * called by the translation loop and in some helpers where the code 1477 * is disassembled. It shouldn't be called directly by guest code. 1478 */ 1479 1480 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, 1481 TCGMemOpIdx oi, uintptr_t retaddr); 1482 1483 static inline uint64_t QEMU_ALWAYS_INLINE 1484 load_memop(const void *haddr, MemOp op) 1485 { 1486 switch (op) { 1487 case MO_UB: 1488 return ldub_p(haddr); 1489 case MO_BEUW: 1490 return lduw_be_p(haddr); 1491 case MO_LEUW: 1492 return lduw_le_p(haddr); 1493 case MO_BEUL: 1494 return (uint32_t)ldl_be_p(haddr); 1495 case MO_LEUL: 1496 return (uint32_t)ldl_le_p(haddr); 1497 case MO_BEQ: 1498 return ldq_be_p(haddr); 1499 case MO_LEQ: 1500 return ldq_le_p(haddr); 1501 default: 1502 qemu_build_not_reached(); 1503 } 1504 } 1505 1506 static inline uint64_t QEMU_ALWAYS_INLINE 1507 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, 1508 uintptr_t retaddr, MemOp op, bool code_read, 1509 FullLoadHelper *full_load) 1510 { 1511 uintptr_t mmu_idx = get_mmuidx(oi); 1512 uintptr_t index = tlb_index(env, mmu_idx, addr); 1513 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1514 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1515 const size_t tlb_off = code_read ? 1516 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); 1517 const MMUAccessType access_type = 1518 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; 1519 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1520 void *haddr; 1521 uint64_t res; 1522 size_t size = memop_size(op); 1523 1524 /* Handle CPU specific unaligned behaviour */ 1525 if (addr & ((1 << a_bits) - 1)) { 1526 cpu_unaligned_access(env_cpu(env), addr, access_type, 1527 mmu_idx, retaddr); 1528 } 1529 1530 /* If the TLB entry is for a different page, reload and try again. */ 1531 if (!tlb_hit(tlb_addr, addr)) { 1532 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1533 addr & TARGET_PAGE_MASK)) { 1534 tlb_fill(env_cpu(env), addr, size, 1535 access_type, mmu_idx, retaddr); 1536 index = tlb_index(env, mmu_idx, addr); 1537 entry = tlb_entry(env, mmu_idx, addr); 1538 } 1539 tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1540 tlb_addr &= ~TLB_INVALID_MASK; 1541 } 1542 1543 /* Handle anything that isn't just a straight memory access. */ 1544 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1545 CPUIOTLBEntry *iotlbentry; 1546 bool need_swap; 1547 1548 /* For anything that is unaligned, recurse through full_load. */ 1549 if ((addr & (size - 1)) != 0) { 1550 goto do_unaligned_access; 1551 } 1552 1553 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1554 1555 /* Handle watchpoints. */ 1556 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1557 /* On watchpoint hit, this will longjmp out. */ 1558 cpu_check_watchpoint(env_cpu(env), addr, size, 1559 iotlbentry->attrs, BP_MEM_READ, retaddr); 1560 } 1561 1562 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1563 1564 /* Handle I/O access. */ 1565 if (likely(tlb_addr & TLB_MMIO)) { 1566 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, 1567 access_type, op ^ (need_swap * MO_BSWAP)); 1568 } 1569 1570 haddr = (void *)((uintptr_t)addr + entry->addend); 1571 1572 /* 1573 * Keep these two load_memop separate to ensure that the compiler 1574 * is able to fold the entire function to a single instruction. 1575 * There is a build-time assert inside to remind you of this. ;-) 1576 */ 1577 if (unlikely(need_swap)) { 1578 return load_memop(haddr, op ^ MO_BSWAP); 1579 } 1580 return load_memop(haddr, op); 1581 } 1582 1583 /* Handle slow unaligned access (it spans two pages or IO). */ 1584 if (size > 1 1585 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1586 >= TARGET_PAGE_SIZE)) { 1587 target_ulong addr1, addr2; 1588 uint64_t r1, r2; 1589 unsigned shift; 1590 do_unaligned_access: 1591 addr1 = addr & ~((target_ulong)size - 1); 1592 addr2 = addr1 + size; 1593 r1 = full_load(env, addr1, oi, retaddr); 1594 r2 = full_load(env, addr2, oi, retaddr); 1595 shift = (addr & (size - 1)) * 8; 1596 1597 if (memop_big_endian(op)) { 1598 /* Big-endian combine. */ 1599 res = (r1 << shift) | (r2 >> ((size * 8) - shift)); 1600 } else { 1601 /* Little-endian combine. */ 1602 res = (r1 >> shift) | (r2 << ((size * 8) - shift)); 1603 } 1604 return res & MAKE_64BIT_MASK(0, size * 8); 1605 } 1606 1607 haddr = (void *)((uintptr_t)addr + entry->addend); 1608 return load_memop(haddr, op); 1609 } 1610 1611 /* 1612 * For the benefit of TCG generated code, we want to avoid the 1613 * complication of ABI-specific return type promotion and always 1614 * return a value extended to the register size of the host. This is 1615 * tcg_target_long, except in the case of a 32-bit host and 64-bit 1616 * data, and for that we always have uint64_t. 1617 * 1618 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. 1619 */ 1620 1621 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, 1622 TCGMemOpIdx oi, uintptr_t retaddr) 1623 { 1624 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); 1625 } 1626 1627 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, 1628 TCGMemOpIdx oi, uintptr_t retaddr) 1629 { 1630 return full_ldub_mmu(env, addr, oi, retaddr); 1631 } 1632 1633 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1634 TCGMemOpIdx oi, uintptr_t retaddr) 1635 { 1636 return load_helper(env, addr, oi, retaddr, MO_LEUW, false, 1637 full_le_lduw_mmu); 1638 } 1639 1640 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1641 TCGMemOpIdx oi, uintptr_t retaddr) 1642 { 1643 return full_le_lduw_mmu(env, addr, oi, retaddr); 1644 } 1645 1646 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1647 TCGMemOpIdx oi, uintptr_t retaddr) 1648 { 1649 return load_helper(env, addr, oi, retaddr, MO_BEUW, false, 1650 full_be_lduw_mmu); 1651 } 1652 1653 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1654 TCGMemOpIdx oi, uintptr_t retaddr) 1655 { 1656 return full_be_lduw_mmu(env, addr, oi, retaddr); 1657 } 1658 1659 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1660 TCGMemOpIdx oi, uintptr_t retaddr) 1661 { 1662 return load_helper(env, addr, oi, retaddr, MO_LEUL, false, 1663 full_le_ldul_mmu); 1664 } 1665 1666 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1667 TCGMemOpIdx oi, uintptr_t retaddr) 1668 { 1669 return full_le_ldul_mmu(env, addr, oi, retaddr); 1670 } 1671 1672 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1673 TCGMemOpIdx oi, uintptr_t retaddr) 1674 { 1675 return load_helper(env, addr, oi, retaddr, MO_BEUL, false, 1676 full_be_ldul_mmu); 1677 } 1678 1679 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1680 TCGMemOpIdx oi, uintptr_t retaddr) 1681 { 1682 return full_be_ldul_mmu(env, addr, oi, retaddr); 1683 } 1684 1685 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, 1686 TCGMemOpIdx oi, uintptr_t retaddr) 1687 { 1688 return load_helper(env, addr, oi, retaddr, MO_LEQ, false, 1689 helper_le_ldq_mmu); 1690 } 1691 1692 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, 1693 TCGMemOpIdx oi, uintptr_t retaddr) 1694 { 1695 return load_helper(env, addr, oi, retaddr, MO_BEQ, false, 1696 helper_be_ldq_mmu); 1697 } 1698 1699 /* 1700 * Provide signed versions of the load routines as well. We can of course 1701 * avoid this for 64-bit data, or for 32-bit data on 32-bit host. 1702 */ 1703 1704 1705 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, 1706 TCGMemOpIdx oi, uintptr_t retaddr) 1707 { 1708 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); 1709 } 1710 1711 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, 1712 TCGMemOpIdx oi, uintptr_t retaddr) 1713 { 1714 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); 1715 } 1716 1717 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, 1718 TCGMemOpIdx oi, uintptr_t retaddr) 1719 { 1720 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); 1721 } 1722 1723 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, 1724 TCGMemOpIdx oi, uintptr_t retaddr) 1725 { 1726 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); 1727 } 1728 1729 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, 1730 TCGMemOpIdx oi, uintptr_t retaddr) 1731 { 1732 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); 1733 } 1734 1735 /* 1736 * Load helpers for cpu_ldst.h. 1737 */ 1738 1739 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, 1740 int mmu_idx, uintptr_t retaddr, 1741 MemOp op, FullLoadHelper *full_load) 1742 { 1743 uint16_t meminfo; 1744 TCGMemOpIdx oi; 1745 uint64_t ret; 1746 1747 meminfo = trace_mem_get_info(op, mmu_idx, false); 1748 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 1749 1750 op &= ~MO_SIGN; 1751 oi = make_memop_idx(op, mmu_idx); 1752 ret = full_load(env, addr, oi, retaddr); 1753 1754 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 1755 1756 return ret; 1757 } 1758 1759 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1760 int mmu_idx, uintptr_t ra) 1761 { 1762 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu); 1763 } 1764 1765 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1766 int mmu_idx, uintptr_t ra) 1767 { 1768 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB, 1769 full_ldub_mmu); 1770 } 1771 1772 uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1773 int mmu_idx, uintptr_t ra) 1774 { 1775 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW, 1776 MO_TE == MO_LE 1777 ? full_le_lduw_mmu : full_be_lduw_mmu); 1778 } 1779 1780 int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1781 int mmu_idx, uintptr_t ra) 1782 { 1783 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW, 1784 MO_TE == MO_LE 1785 ? full_le_lduw_mmu : full_be_lduw_mmu); 1786 } 1787 1788 uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1789 int mmu_idx, uintptr_t ra) 1790 { 1791 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL, 1792 MO_TE == MO_LE 1793 ? full_le_ldul_mmu : full_be_ldul_mmu); 1794 } 1795 1796 uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1797 int mmu_idx, uintptr_t ra) 1798 { 1799 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ, 1800 MO_TE == MO_LE 1801 ? helper_le_ldq_mmu : helper_be_ldq_mmu); 1802 } 1803 1804 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, 1805 uintptr_t retaddr) 1806 { 1807 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1808 } 1809 1810 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1811 { 1812 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1813 } 1814 1815 uint32_t cpu_lduw_data_ra(CPUArchState *env, target_ulong ptr, 1816 uintptr_t retaddr) 1817 { 1818 return cpu_lduw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1819 } 1820 1821 int cpu_ldsw_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1822 { 1823 return cpu_ldsw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1824 } 1825 1826 uint32_t cpu_ldl_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1827 { 1828 return cpu_ldl_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1829 } 1830 1831 uint64_t cpu_ldq_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1832 { 1833 return cpu_ldq_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1834 } 1835 1836 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) 1837 { 1838 return cpu_ldub_data_ra(env, ptr, 0); 1839 } 1840 1841 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) 1842 { 1843 return cpu_ldsb_data_ra(env, ptr, 0); 1844 } 1845 1846 uint32_t cpu_lduw_data(CPUArchState *env, target_ulong ptr) 1847 { 1848 return cpu_lduw_data_ra(env, ptr, 0); 1849 } 1850 1851 int cpu_ldsw_data(CPUArchState *env, target_ulong ptr) 1852 { 1853 return cpu_ldsw_data_ra(env, ptr, 0); 1854 } 1855 1856 uint32_t cpu_ldl_data(CPUArchState *env, target_ulong ptr) 1857 { 1858 return cpu_ldl_data_ra(env, ptr, 0); 1859 } 1860 1861 uint64_t cpu_ldq_data(CPUArchState *env, target_ulong ptr) 1862 { 1863 return cpu_ldq_data_ra(env, ptr, 0); 1864 } 1865 1866 /* 1867 * Store Helpers 1868 */ 1869 1870 static inline void QEMU_ALWAYS_INLINE 1871 store_memop(void *haddr, uint64_t val, MemOp op) 1872 { 1873 switch (op) { 1874 case MO_UB: 1875 stb_p(haddr, val); 1876 break; 1877 case MO_BEUW: 1878 stw_be_p(haddr, val); 1879 break; 1880 case MO_LEUW: 1881 stw_le_p(haddr, val); 1882 break; 1883 case MO_BEUL: 1884 stl_be_p(haddr, val); 1885 break; 1886 case MO_LEUL: 1887 stl_le_p(haddr, val); 1888 break; 1889 case MO_BEQ: 1890 stq_be_p(haddr, val); 1891 break; 1892 case MO_LEQ: 1893 stq_le_p(haddr, val); 1894 break; 1895 default: 1896 qemu_build_not_reached(); 1897 } 1898 } 1899 1900 static inline void QEMU_ALWAYS_INLINE 1901 store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 1902 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) 1903 { 1904 uintptr_t mmu_idx = get_mmuidx(oi); 1905 uintptr_t index = tlb_index(env, mmu_idx, addr); 1906 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1907 target_ulong tlb_addr = tlb_addr_write(entry); 1908 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 1909 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1910 void *haddr; 1911 size_t size = memop_size(op); 1912 1913 /* Handle CPU specific unaligned behaviour */ 1914 if (addr & ((1 << a_bits) - 1)) { 1915 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1916 mmu_idx, retaddr); 1917 } 1918 1919 /* If the TLB entry is for a different page, reload and try again. */ 1920 if (!tlb_hit(tlb_addr, addr)) { 1921 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1922 addr & TARGET_PAGE_MASK)) { 1923 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, 1924 mmu_idx, retaddr); 1925 index = tlb_index(env, mmu_idx, addr); 1926 entry = tlb_entry(env, mmu_idx, addr); 1927 } 1928 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; 1929 } 1930 1931 /* Handle anything that isn't just a straight memory access. */ 1932 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1933 CPUIOTLBEntry *iotlbentry; 1934 bool need_swap; 1935 1936 /* For anything that is unaligned, recurse through byte stores. */ 1937 if ((addr & (size - 1)) != 0) { 1938 goto do_unaligned_access; 1939 } 1940 1941 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1942 1943 /* Handle watchpoints. */ 1944 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1945 /* On watchpoint hit, this will longjmp out. */ 1946 cpu_check_watchpoint(env_cpu(env), addr, size, 1947 iotlbentry->attrs, BP_MEM_WRITE, retaddr); 1948 } 1949 1950 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1951 1952 /* Handle I/O access. */ 1953 if (tlb_addr & TLB_MMIO) { 1954 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, 1955 op ^ (need_swap * MO_BSWAP)); 1956 return; 1957 } 1958 1959 /* Ignore writes to ROM. */ 1960 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { 1961 return; 1962 } 1963 1964 /* Handle clean RAM pages. */ 1965 if (tlb_addr & TLB_NOTDIRTY) { 1966 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1967 } 1968 1969 haddr = (void *)((uintptr_t)addr + entry->addend); 1970 1971 /* 1972 * Keep these two store_memop separate to ensure that the compiler 1973 * is able to fold the entire function to a single instruction. 1974 * There is a build-time assert inside to remind you of this. ;-) 1975 */ 1976 if (unlikely(need_swap)) { 1977 store_memop(haddr, val, op ^ MO_BSWAP); 1978 } else { 1979 store_memop(haddr, val, op); 1980 } 1981 return; 1982 } 1983 1984 /* Handle slow unaligned access (it spans two pages or IO). */ 1985 if (size > 1 1986 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1987 >= TARGET_PAGE_SIZE)) { 1988 int i; 1989 uintptr_t index2; 1990 CPUTLBEntry *entry2; 1991 target_ulong page2, tlb_addr2; 1992 size_t size2; 1993 1994 do_unaligned_access: 1995 /* 1996 * Ensure the second page is in the TLB. Note that the first page 1997 * is already guaranteed to be filled, and that the second page 1998 * cannot evict the first. 1999 */ 2000 page2 = (addr + size) & TARGET_PAGE_MASK; 2001 size2 = (addr + size) & ~TARGET_PAGE_MASK; 2002 index2 = tlb_index(env, mmu_idx, page2); 2003 entry2 = tlb_entry(env, mmu_idx, page2); 2004 tlb_addr2 = tlb_addr_write(entry2); 2005 if (!tlb_hit_page(tlb_addr2, page2)) { 2006 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { 2007 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, 2008 mmu_idx, retaddr); 2009 index2 = tlb_index(env, mmu_idx, page2); 2010 entry2 = tlb_entry(env, mmu_idx, page2); 2011 } 2012 tlb_addr2 = tlb_addr_write(entry2); 2013 } 2014 2015 /* 2016 * Handle watchpoints. Since this may trap, all checks 2017 * must happen before any store. 2018 */ 2019 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2020 cpu_check_watchpoint(env_cpu(env), addr, size - size2, 2021 env_tlb(env)->d[mmu_idx].iotlb[index].attrs, 2022 BP_MEM_WRITE, retaddr); 2023 } 2024 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { 2025 cpu_check_watchpoint(env_cpu(env), page2, size2, 2026 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, 2027 BP_MEM_WRITE, retaddr); 2028 } 2029 2030 /* 2031 * XXX: not efficient, but simple. 2032 * This loop must go in the forward direction to avoid issues 2033 * with self-modifying code in Windows 64-bit. 2034 */ 2035 for (i = 0; i < size; ++i) { 2036 uint8_t val8; 2037 if (memop_big_endian(op)) { 2038 /* Big-endian extract. */ 2039 val8 = val >> (((size - 1) * 8) - (i * 8)); 2040 } else { 2041 /* Little-endian extract. */ 2042 val8 = val >> (i * 8); 2043 } 2044 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2045 } 2046 return; 2047 } 2048 2049 haddr = (void *)((uintptr_t)addr + entry->addend); 2050 store_memop(haddr, val, op); 2051 } 2052 2053 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, 2054 TCGMemOpIdx oi, uintptr_t retaddr) 2055 { 2056 store_helper(env, addr, val, oi, retaddr, MO_UB); 2057 } 2058 2059 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2060 TCGMemOpIdx oi, uintptr_t retaddr) 2061 { 2062 store_helper(env, addr, val, oi, retaddr, MO_LEUW); 2063 } 2064 2065 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2066 TCGMemOpIdx oi, uintptr_t retaddr) 2067 { 2068 store_helper(env, addr, val, oi, retaddr, MO_BEUW); 2069 } 2070 2071 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2072 TCGMemOpIdx oi, uintptr_t retaddr) 2073 { 2074 store_helper(env, addr, val, oi, retaddr, MO_LEUL); 2075 } 2076 2077 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2078 TCGMemOpIdx oi, uintptr_t retaddr) 2079 { 2080 store_helper(env, addr, val, oi, retaddr, MO_BEUL); 2081 } 2082 2083 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2084 TCGMemOpIdx oi, uintptr_t retaddr) 2085 { 2086 store_helper(env, addr, val, oi, retaddr, MO_LEQ); 2087 } 2088 2089 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2090 TCGMemOpIdx oi, uintptr_t retaddr) 2091 { 2092 store_helper(env, addr, val, oi, retaddr, MO_BEQ); 2093 } 2094 2095 /* 2096 * Store Helpers for cpu_ldst.h 2097 */ 2098 2099 static inline void QEMU_ALWAYS_INLINE 2100 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2101 int mmu_idx, uintptr_t retaddr, MemOp op) 2102 { 2103 TCGMemOpIdx oi; 2104 uint16_t meminfo; 2105 2106 meminfo = trace_mem_get_info(op, mmu_idx, true); 2107 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2108 2109 oi = make_memop_idx(op, mmu_idx); 2110 store_helper(env, addr, val, oi, retaddr, op); 2111 2112 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2113 } 2114 2115 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2116 int mmu_idx, uintptr_t retaddr) 2117 { 2118 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); 2119 } 2120 2121 void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2122 int mmu_idx, uintptr_t retaddr) 2123 { 2124 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW); 2125 } 2126 2127 void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2128 int mmu_idx, uintptr_t retaddr) 2129 { 2130 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL); 2131 } 2132 2133 void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2134 int mmu_idx, uintptr_t retaddr) 2135 { 2136 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ); 2137 } 2138 2139 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, 2140 uint32_t val, uintptr_t retaddr) 2141 { 2142 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2143 } 2144 2145 void cpu_stw_data_ra(CPUArchState *env, target_ulong ptr, 2146 uint32_t val, uintptr_t retaddr) 2147 { 2148 cpu_stw_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2149 } 2150 2151 void cpu_stl_data_ra(CPUArchState *env, target_ulong ptr, 2152 uint32_t val, uintptr_t retaddr) 2153 { 2154 cpu_stl_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2155 } 2156 2157 void cpu_stq_data_ra(CPUArchState *env, target_ulong ptr, 2158 uint64_t val, uintptr_t retaddr) 2159 { 2160 cpu_stq_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2161 } 2162 2163 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2164 { 2165 cpu_stb_data_ra(env, ptr, val, 0); 2166 } 2167 2168 void cpu_stw_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2169 { 2170 cpu_stw_data_ra(env, ptr, val, 0); 2171 } 2172 2173 void cpu_stl_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2174 { 2175 cpu_stl_data_ra(env, ptr, val, 0); 2176 } 2177 2178 void cpu_stq_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2179 { 2180 cpu_stq_data_ra(env, ptr, val, 0); 2181 } 2182 2183 /* First set of helpers allows passing in of OI and RETADDR. This makes 2184 them callable from other helpers. */ 2185 2186 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 2187 #define ATOMIC_NAME(X) \ 2188 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 2189 #define ATOMIC_MMU_DECLS 2190 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) 2191 #define ATOMIC_MMU_CLEANUP 2192 #define ATOMIC_MMU_IDX get_mmuidx(oi) 2193 2194 #include "atomic_common.inc.c" 2195 2196 #define DATA_SIZE 1 2197 #include "atomic_template.h" 2198 2199 #define DATA_SIZE 2 2200 #include "atomic_template.h" 2201 2202 #define DATA_SIZE 4 2203 #include "atomic_template.h" 2204 2205 #ifdef CONFIG_ATOMIC64 2206 #define DATA_SIZE 8 2207 #include "atomic_template.h" 2208 #endif 2209 2210 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 2211 #define DATA_SIZE 16 2212 #include "atomic_template.h" 2213 #endif 2214 2215 /* Second set of helpers are directly callable from TCG as helpers. */ 2216 2217 #undef EXTRA_ARGS 2218 #undef ATOMIC_NAME 2219 #undef ATOMIC_MMU_LOOKUP 2220 #define EXTRA_ARGS , TCGMemOpIdx oi 2221 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 2222 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) 2223 2224 #define DATA_SIZE 1 2225 #include "atomic_template.h" 2226 2227 #define DATA_SIZE 2 2228 #include "atomic_template.h" 2229 2230 #define DATA_SIZE 4 2231 #include "atomic_template.h" 2232 2233 #ifdef CONFIG_ATOMIC64 2234 #define DATA_SIZE 8 2235 #include "atomic_template.h" 2236 #endif 2237 #undef ATOMIC_MMU_IDX 2238 2239 /* Code access functions. */ 2240 2241 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, 2242 TCGMemOpIdx oi, uintptr_t retaddr) 2243 { 2244 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); 2245 } 2246 2247 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) 2248 { 2249 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); 2250 return full_ldub_code(env, addr, oi, 0); 2251 } 2252 2253 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, 2254 TCGMemOpIdx oi, uintptr_t retaddr) 2255 { 2256 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); 2257 } 2258 2259 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) 2260 { 2261 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); 2262 return full_lduw_code(env, addr, oi, 0); 2263 } 2264 2265 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, 2266 TCGMemOpIdx oi, uintptr_t retaddr) 2267 { 2268 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); 2269 } 2270 2271 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) 2272 { 2273 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); 2274 return full_ldl_code(env, addr, oi, 0); 2275 } 2276 2277 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, 2278 TCGMemOpIdx oi, uintptr_t retaddr) 2279 { 2280 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code); 2281 } 2282 2283 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) 2284 { 2285 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true)); 2286 return full_ldq_code(env, addr, oi, 0); 2287 } 2288