1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/address-spaces.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/cputlb.h" 28 #include "exec/memory-internal.h" 29 #include "exec/ram_addr.h" 30 #include "tcg/tcg.h" 31 #include "qemu/error-report.h" 32 #include "exec/log.h" 33 #include "exec/helper-proto.h" 34 #include "qemu/atomic.h" 35 #include "qemu/atomic128.h" 36 #include "translate-all.h" 37 #include "trace-root.h" 38 #include "trace/mem.h" 39 #ifdef CONFIG_PLUGIN 40 #include "qemu/plugin-memory.h" 41 #endif 42 43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 44 /* #define DEBUG_TLB */ 45 /* #define DEBUG_TLB_LOG */ 46 47 #ifdef DEBUG_TLB 48 # define DEBUG_TLB_GATE 1 49 # ifdef DEBUG_TLB_LOG 50 # define DEBUG_TLB_LOG_GATE 1 51 # else 52 # define DEBUG_TLB_LOG_GATE 0 53 # endif 54 #else 55 # define DEBUG_TLB_GATE 0 56 # define DEBUG_TLB_LOG_GATE 0 57 #endif 58 59 #define tlb_debug(fmt, ...) do { \ 60 if (DEBUG_TLB_LOG_GATE) { \ 61 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 62 ## __VA_ARGS__); \ 63 } else if (DEBUG_TLB_GATE) { \ 64 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 65 } \ 66 } while (0) 67 68 #define assert_cpu_is_self(cpu) do { \ 69 if (DEBUG_TLB_GATE) { \ 70 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 71 } \ 72 } while (0) 73 74 /* run_on_cpu_data.target_ptr should always be big enough for a 75 * target_ulong even on 32 bit builds */ 76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 77 78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 79 */ 80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 82 83 static inline size_t tlb_n_entries(CPUTLBDescFast *fast) 84 { 85 return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1; 86 } 87 88 static inline size_t sizeof_tlb(CPUTLBDescFast *fast) 89 { 90 return fast->mask + (1 << CPU_TLB_ENTRY_BITS); 91 } 92 93 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, 94 size_t max_entries) 95 { 96 desc->window_begin_ns = ns; 97 desc->window_max_entries = max_entries; 98 } 99 100 /** 101 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 102 * @desc: The CPUTLBDesc portion of the TLB 103 * @fast: The CPUTLBDescFast portion of the same TLB 104 * 105 * Called with tlb_lock_held. 106 * 107 * We have two main constraints when resizing a TLB: (1) we only resize it 108 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 109 * the array or unnecessarily flushing it), which means we do not control how 110 * frequently the resizing can occur; (2) we don't have access to the guest's 111 * future scheduling decisions, and therefore have to decide the magnitude of 112 * the resize based on past observations. 113 * 114 * In general, a memory-hungry process can benefit greatly from an appropriately 115 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 116 * we just have to make the TLB as large as possible; while an oversized TLB 117 * results in minimal TLB miss rates, it also takes longer to be flushed 118 * (flushes can be _very_ frequent), and the reduced locality can also hurt 119 * performance. 120 * 121 * To achieve near-optimal performance for all kinds of workloads, we: 122 * 123 * 1. Aggressively increase the size of the TLB when the use rate of the 124 * TLB being flushed is high, since it is likely that in the near future this 125 * memory-hungry process will execute again, and its memory hungriness will 126 * probably be similar. 127 * 128 * 2. Slowly reduce the size of the TLB as the use rate declines over a 129 * reasonably large time window. The rationale is that if in such a time window 130 * we have not observed a high TLB use rate, it is likely that we won't observe 131 * it in the near future. In that case, once a time window expires we downsize 132 * the TLB to match the maximum use rate observed in the window. 133 * 134 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 135 * since in that range performance is likely near-optimal. Recall that the TLB 136 * is direct mapped, so we want the use rate to be low (or at least not too 137 * high), since otherwise we are likely to have a significant amount of 138 * conflict misses. 139 */ 140 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) 141 { 142 size_t old_size = tlb_n_entries(fast); 143 size_t rate; 144 size_t new_size = old_size; 145 int64_t now = get_clock_realtime(); 146 int64_t window_len_ms = 100; 147 int64_t window_len_ns = window_len_ms * 1000 * 1000; 148 bool window_expired = now > desc->window_begin_ns + window_len_ns; 149 150 if (desc->n_used_entries > desc->window_max_entries) { 151 desc->window_max_entries = desc->n_used_entries; 152 } 153 rate = desc->window_max_entries * 100 / old_size; 154 155 if (rate > 70) { 156 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 157 } else if (rate < 30 && window_expired) { 158 size_t ceil = pow2ceil(desc->window_max_entries); 159 size_t expected_rate = desc->window_max_entries * 100 / ceil; 160 161 /* 162 * Avoid undersizing when the max number of entries seen is just below 163 * a pow2. For instance, if max_entries == 1025, the expected use rate 164 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 165 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 166 * later. Thus, make sure that the expected use rate remains below 70%. 167 * (and since we double the size, that means the lowest rate we'd 168 * expect to get is 35%, which is still in the 30-70% range where 169 * we consider that the size is appropriate.) 170 */ 171 if (expected_rate > 70) { 172 ceil *= 2; 173 } 174 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 175 } 176 177 if (new_size == old_size) { 178 if (window_expired) { 179 tlb_window_reset(desc, now, desc->n_used_entries); 180 } 181 return; 182 } 183 184 g_free(fast->table); 185 g_free(desc->iotlb); 186 187 tlb_window_reset(desc, now, 0); 188 /* desc->n_used_entries is cleared by the caller */ 189 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 190 fast->table = g_try_new(CPUTLBEntry, new_size); 191 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 192 193 /* 194 * If the allocations fail, try smaller sizes. We just freed some 195 * memory, so going back to half of new_size has a good chance of working. 196 * Increased memory pressure elsewhere in the system might cause the 197 * allocations to fail though, so we progressively reduce the allocation 198 * size, aborting if we cannot even allocate the smallest TLB we support. 199 */ 200 while (fast->table == NULL || desc->iotlb == NULL) { 201 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 202 error_report("%s: %s", __func__, strerror(errno)); 203 abort(); 204 } 205 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 206 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 207 208 g_free(fast->table); 209 g_free(desc->iotlb); 210 fast->table = g_try_new(CPUTLBEntry, new_size); 211 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 212 } 213 } 214 215 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) 216 { 217 desc->n_used_entries = 0; 218 desc->large_page_addr = -1; 219 desc->large_page_mask = -1; 220 desc->vindex = 0; 221 memset(fast->table, -1, sizeof_tlb(fast)); 222 memset(desc->vtable, -1, sizeof(desc->vtable)); 223 } 224 225 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) 226 { 227 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; 228 CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx]; 229 230 tlb_mmu_resize_locked(desc, fast); 231 tlb_mmu_flush_locked(desc, fast); 232 } 233 234 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now) 235 { 236 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 237 238 tlb_window_reset(desc, now, 0); 239 desc->n_used_entries = 0; 240 fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 241 fast->table = g_new(CPUTLBEntry, n_entries); 242 desc->iotlb = g_new(CPUIOTLBEntry, n_entries); 243 } 244 245 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 246 { 247 env_tlb(env)->d[mmu_idx].n_used_entries++; 248 } 249 250 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 251 { 252 env_tlb(env)->d[mmu_idx].n_used_entries--; 253 } 254 255 void tlb_init(CPUState *cpu) 256 { 257 CPUArchState *env = cpu->env_ptr; 258 int64_t now = get_clock_realtime(); 259 int i; 260 261 qemu_spin_init(&env_tlb(env)->c.lock); 262 263 /* Ensure that cpu_reset performs a full flush. */ 264 env_tlb(env)->c.dirty = ALL_MMUIDX_BITS; 265 266 for (i = 0; i < NB_MMU_MODES; i++) { 267 tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now); 268 } 269 } 270 271 /* flush_all_helper: run fn across all cpus 272 * 273 * If the wait flag is set then the src cpu's helper will be queued as 274 * "safe" work and the loop exited creating a synchronisation point 275 * where all queued work will be finished before execution starts 276 * again. 277 */ 278 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 279 run_on_cpu_data d) 280 { 281 CPUState *cpu; 282 283 CPU_FOREACH(cpu) { 284 if (cpu != src) { 285 async_run_on_cpu(cpu, fn, d); 286 } 287 } 288 } 289 290 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 291 { 292 CPUState *cpu; 293 size_t full = 0, part = 0, elide = 0; 294 295 CPU_FOREACH(cpu) { 296 CPUArchState *env = cpu->env_ptr; 297 298 full += atomic_read(&env_tlb(env)->c.full_flush_count); 299 part += atomic_read(&env_tlb(env)->c.part_flush_count); 300 elide += atomic_read(&env_tlb(env)->c.elide_flush_count); 301 } 302 *pfull = full; 303 *ppart = part; 304 *pelide = elide; 305 } 306 307 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 308 { 309 CPUArchState *env = cpu->env_ptr; 310 uint16_t asked = data.host_int; 311 uint16_t all_dirty, work, to_clean; 312 313 assert_cpu_is_self(cpu); 314 315 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 316 317 qemu_spin_lock(&env_tlb(env)->c.lock); 318 319 all_dirty = env_tlb(env)->c.dirty; 320 to_clean = asked & all_dirty; 321 all_dirty &= ~to_clean; 322 env_tlb(env)->c.dirty = all_dirty; 323 324 for (work = to_clean; work != 0; work &= work - 1) { 325 int mmu_idx = ctz32(work); 326 tlb_flush_one_mmuidx_locked(env, mmu_idx); 327 } 328 329 qemu_spin_unlock(&env_tlb(env)->c.lock); 330 331 cpu_tb_jmp_cache_clear(cpu); 332 333 if (to_clean == ALL_MMUIDX_BITS) { 334 atomic_set(&env_tlb(env)->c.full_flush_count, 335 env_tlb(env)->c.full_flush_count + 1); 336 } else { 337 atomic_set(&env_tlb(env)->c.part_flush_count, 338 env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); 339 if (to_clean != asked) { 340 atomic_set(&env_tlb(env)->c.elide_flush_count, 341 env_tlb(env)->c.elide_flush_count + 342 ctpop16(asked & ~to_clean)); 343 } 344 } 345 } 346 347 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 348 { 349 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 350 351 if (cpu->created && !qemu_cpu_is_self(cpu)) { 352 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 353 RUN_ON_CPU_HOST_INT(idxmap)); 354 } else { 355 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 356 } 357 } 358 359 void tlb_flush(CPUState *cpu) 360 { 361 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 362 } 363 364 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 365 { 366 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 367 368 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 369 370 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 371 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 372 } 373 374 void tlb_flush_all_cpus(CPUState *src_cpu) 375 { 376 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 377 } 378 379 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 380 { 381 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 382 383 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 384 385 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 386 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 387 } 388 389 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 390 { 391 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 392 } 393 394 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 395 target_ulong page) 396 { 397 return tlb_hit_page(tlb_entry->addr_read, page) || 398 tlb_hit_page(tlb_addr_write(tlb_entry), page) || 399 tlb_hit_page(tlb_entry->addr_code, page); 400 } 401 402 /** 403 * tlb_entry_is_empty - return true if the entry is not in use 404 * @te: pointer to CPUTLBEntry 405 */ 406 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 407 { 408 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 409 } 410 411 /* Called with tlb_c.lock held */ 412 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 413 target_ulong page) 414 { 415 if (tlb_hit_page_anyprot(tlb_entry, page)) { 416 memset(tlb_entry, -1, sizeof(*tlb_entry)); 417 return true; 418 } 419 return false; 420 } 421 422 /* Called with tlb_c.lock held */ 423 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 424 target_ulong page) 425 { 426 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; 427 int k; 428 429 assert_cpu_is_self(env_cpu(env)); 430 for (k = 0; k < CPU_VTLB_SIZE; k++) { 431 if (tlb_flush_entry_locked(&d->vtable[k], page)) { 432 tlb_n_used_entries_dec(env, mmu_idx); 433 } 434 } 435 } 436 437 static void tlb_flush_page_locked(CPUArchState *env, int midx, 438 target_ulong page) 439 { 440 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; 441 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; 442 443 /* Check if we need to flush due to large pages. */ 444 if ((page & lp_mask) == lp_addr) { 445 tlb_debug("forcing full flush midx %d (" 446 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 447 midx, lp_addr, lp_mask); 448 tlb_flush_one_mmuidx_locked(env, midx); 449 } else { 450 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 451 tlb_n_used_entries_dec(env, midx); 452 } 453 tlb_flush_vtlb_page_locked(env, midx, page); 454 } 455 } 456 457 /** 458 * tlb_flush_page_by_mmuidx_async_0: 459 * @cpu: cpu on which to flush 460 * @addr: page of virtual address to flush 461 * @idxmap: set of mmu_idx to flush 462 * 463 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page 464 * at @addr from the tlbs indicated by @idxmap from @cpu. 465 */ 466 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, 467 target_ulong addr, 468 uint16_t idxmap) 469 { 470 CPUArchState *env = cpu->env_ptr; 471 int mmu_idx; 472 473 assert_cpu_is_self(cpu); 474 475 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); 476 477 qemu_spin_lock(&env_tlb(env)->c.lock); 478 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 479 if ((idxmap >> mmu_idx) & 1) { 480 tlb_flush_page_locked(env, mmu_idx, addr); 481 } 482 } 483 qemu_spin_unlock(&env_tlb(env)->c.lock); 484 485 tb_flush_jmp_cache(cpu, addr); 486 } 487 488 /** 489 * tlb_flush_page_by_mmuidx_async_1: 490 * @cpu: cpu on which to flush 491 * @data: encoded addr + idxmap 492 * 493 * Helper for tlb_flush_page_by_mmuidx and friends, called through 494 * async_run_on_cpu. The idxmap parameter is encoded in the page 495 * offset of the target_ptr field. This limits the set of mmu_idx 496 * that can be passed via this method. 497 */ 498 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, 499 run_on_cpu_data data) 500 { 501 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; 502 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; 503 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; 504 505 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 506 } 507 508 typedef struct { 509 target_ulong addr; 510 uint16_t idxmap; 511 } TLBFlushPageByMMUIdxData; 512 513 /** 514 * tlb_flush_page_by_mmuidx_async_2: 515 * @cpu: cpu on which to flush 516 * @data: allocated addr + idxmap 517 * 518 * Helper for tlb_flush_page_by_mmuidx and friends, called through 519 * async_run_on_cpu. The addr+idxmap parameters are stored in a 520 * TLBFlushPageByMMUIdxData structure that has been allocated 521 * specifically for this helper. Free the structure when done. 522 */ 523 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, 524 run_on_cpu_data data) 525 { 526 TLBFlushPageByMMUIdxData *d = data.host_ptr; 527 528 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap); 529 g_free(d); 530 } 531 532 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 533 { 534 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 535 536 /* This should already be page aligned */ 537 addr &= TARGET_PAGE_MASK; 538 539 if (qemu_cpu_is_self(cpu)) { 540 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 541 } else if (idxmap < TARGET_PAGE_SIZE) { 542 /* 543 * Most targets have only a few mmu_idx. In the case where 544 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid 545 * allocating memory for this operation. 546 */ 547 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1, 548 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 549 } else { 550 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1); 551 552 /* Otherwise allocate a structure, freed by the worker. */ 553 d->addr = addr; 554 d->idxmap = idxmap; 555 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2, 556 RUN_ON_CPU_HOST_PTR(d)); 557 } 558 } 559 560 void tlb_flush_page(CPUState *cpu, target_ulong addr) 561 { 562 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 563 } 564 565 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 566 uint16_t idxmap) 567 { 568 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 569 570 /* This should already be page aligned */ 571 addr &= TARGET_PAGE_MASK; 572 573 /* 574 * Allocate memory to hold addr+idxmap only when needed. 575 * See tlb_flush_page_by_mmuidx for details. 576 */ 577 if (idxmap < TARGET_PAGE_SIZE) { 578 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 579 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 580 } else { 581 CPUState *dst_cpu; 582 583 /* Allocate a separate data block for each destination cpu. */ 584 CPU_FOREACH(dst_cpu) { 585 if (dst_cpu != src_cpu) { 586 TLBFlushPageByMMUIdxData *d 587 = g_new(TLBFlushPageByMMUIdxData, 1); 588 589 d->addr = addr; 590 d->idxmap = idxmap; 591 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 592 RUN_ON_CPU_HOST_PTR(d)); 593 } 594 } 595 } 596 597 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); 598 } 599 600 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 601 { 602 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 603 } 604 605 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 606 target_ulong addr, 607 uint16_t idxmap) 608 { 609 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 610 611 /* This should already be page aligned */ 612 addr &= TARGET_PAGE_MASK; 613 614 /* 615 * Allocate memory to hold addr+idxmap only when needed. 616 * See tlb_flush_page_by_mmuidx for details. 617 */ 618 if (idxmap < TARGET_PAGE_SIZE) { 619 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 620 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 621 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1, 622 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 623 } else { 624 CPUState *dst_cpu; 625 TLBFlushPageByMMUIdxData *d; 626 627 /* Allocate a separate data block for each destination cpu. */ 628 CPU_FOREACH(dst_cpu) { 629 if (dst_cpu != src_cpu) { 630 d = g_new(TLBFlushPageByMMUIdxData, 1); 631 d->addr = addr; 632 d->idxmap = idxmap; 633 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 634 RUN_ON_CPU_HOST_PTR(d)); 635 } 636 } 637 638 d = g_new(TLBFlushPageByMMUIdxData, 1); 639 d->addr = addr; 640 d->idxmap = idxmap; 641 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2, 642 RUN_ON_CPU_HOST_PTR(d)); 643 } 644 } 645 646 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 647 { 648 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 649 } 650 651 /* update the TLBs so that writes to code in the virtual page 'addr' 652 can be detected */ 653 void tlb_protect_code(ram_addr_t ram_addr) 654 { 655 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 656 DIRTY_MEMORY_CODE); 657 } 658 659 /* update the TLB so that writes in physical page 'phys_addr' are no longer 660 tested for self modifying code */ 661 void tlb_unprotect_code(ram_addr_t ram_addr) 662 { 663 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 664 } 665 666 667 /* 668 * Dirty write flag handling 669 * 670 * When the TCG code writes to a location it looks up the address in 671 * the TLB and uses that data to compute the final address. If any of 672 * the lower bits of the address are set then the slow path is forced. 673 * There are a number of reasons to do this but for normal RAM the 674 * most usual is detecting writes to code regions which may invalidate 675 * generated code. 676 * 677 * Other vCPUs might be reading their TLBs during guest execution, so we update 678 * te->addr_write with atomic_set. We don't need to worry about this for 679 * oversized guests as MTTCG is disabled for them. 680 * 681 * Called with tlb_c.lock held. 682 */ 683 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 684 uintptr_t start, uintptr_t length) 685 { 686 uintptr_t addr = tlb_entry->addr_write; 687 688 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | 689 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { 690 addr &= TARGET_PAGE_MASK; 691 addr += tlb_entry->addend; 692 if ((addr - start) < length) { 693 #if TCG_OVERSIZED_GUEST 694 tlb_entry->addr_write |= TLB_NOTDIRTY; 695 #else 696 atomic_set(&tlb_entry->addr_write, 697 tlb_entry->addr_write | TLB_NOTDIRTY); 698 #endif 699 } 700 } 701 } 702 703 /* 704 * Called with tlb_c.lock held. 705 * Called only from the vCPU context, i.e. the TLB's owner thread. 706 */ 707 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 708 { 709 *d = *s; 710 } 711 712 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 713 * the target vCPU). 714 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 715 * thing actually updated is the target TLB entry ->addr_write flags. 716 */ 717 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 718 { 719 CPUArchState *env; 720 721 int mmu_idx; 722 723 env = cpu->env_ptr; 724 qemu_spin_lock(&env_tlb(env)->c.lock); 725 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 726 unsigned int i; 727 unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); 728 729 for (i = 0; i < n; i++) { 730 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], 731 start1, length); 732 } 733 734 for (i = 0; i < CPU_VTLB_SIZE; i++) { 735 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], 736 start1, length); 737 } 738 } 739 qemu_spin_unlock(&env_tlb(env)->c.lock); 740 } 741 742 /* Called with tlb_c.lock held */ 743 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 744 target_ulong vaddr) 745 { 746 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 747 tlb_entry->addr_write = vaddr; 748 } 749 } 750 751 /* update the TLB corresponding to virtual page vaddr 752 so that it is no longer dirty */ 753 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 754 { 755 CPUArchState *env = cpu->env_ptr; 756 int mmu_idx; 757 758 assert_cpu_is_self(cpu); 759 760 vaddr &= TARGET_PAGE_MASK; 761 qemu_spin_lock(&env_tlb(env)->c.lock); 762 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 763 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 764 } 765 766 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 767 int k; 768 for (k = 0; k < CPU_VTLB_SIZE; k++) { 769 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); 770 } 771 } 772 qemu_spin_unlock(&env_tlb(env)->c.lock); 773 } 774 775 /* Our TLB does not support large pages, so remember the area covered by 776 large pages and trigger a full TLB flush if these are invalidated. */ 777 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 778 target_ulong vaddr, target_ulong size) 779 { 780 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; 781 target_ulong lp_mask = ~(size - 1); 782 783 if (lp_addr == (target_ulong)-1) { 784 /* No previous large page. */ 785 lp_addr = vaddr; 786 } else { 787 /* Extend the existing region to include the new page. 788 This is a compromise between unnecessary flushes and 789 the cost of maintaining a full variable size TLB. */ 790 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; 791 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 792 lp_mask <<= 1; 793 } 794 } 795 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; 796 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; 797 } 798 799 /* Add a new TLB entry. At most one entry for a given virtual address 800 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 801 * supplied size is only used by tlb_flush_page. 802 * 803 * Called from TCG-generated code, which is under an RCU read-side 804 * critical section. 805 */ 806 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 807 hwaddr paddr, MemTxAttrs attrs, int prot, 808 int mmu_idx, target_ulong size) 809 { 810 CPUArchState *env = cpu->env_ptr; 811 CPUTLB *tlb = env_tlb(env); 812 CPUTLBDesc *desc = &tlb->d[mmu_idx]; 813 MemoryRegionSection *section; 814 unsigned int index; 815 target_ulong address; 816 target_ulong write_address; 817 uintptr_t addend; 818 CPUTLBEntry *te, tn; 819 hwaddr iotlb, xlat, sz, paddr_page; 820 target_ulong vaddr_page; 821 int asidx = cpu_asidx_from_attrs(cpu, attrs); 822 int wp_flags; 823 bool is_ram, is_romd; 824 825 assert_cpu_is_self(cpu); 826 827 if (size <= TARGET_PAGE_SIZE) { 828 sz = TARGET_PAGE_SIZE; 829 } else { 830 tlb_add_large_page(env, mmu_idx, vaddr, size); 831 sz = size; 832 } 833 vaddr_page = vaddr & TARGET_PAGE_MASK; 834 paddr_page = paddr & TARGET_PAGE_MASK; 835 836 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 837 &xlat, &sz, attrs, &prot); 838 assert(sz >= TARGET_PAGE_SIZE); 839 840 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 841 " prot=%x idx=%d\n", 842 vaddr, paddr, prot, mmu_idx); 843 844 address = vaddr_page; 845 if (size < TARGET_PAGE_SIZE) { 846 /* Repeat the MMU check and TLB fill on every access. */ 847 address |= TLB_INVALID_MASK; 848 } 849 if (attrs.byte_swap) { 850 address |= TLB_BSWAP; 851 } 852 853 is_ram = memory_region_is_ram(section->mr); 854 is_romd = memory_region_is_romd(section->mr); 855 856 if (is_ram || is_romd) { 857 /* RAM and ROMD both have associated host memory. */ 858 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 859 } else { 860 /* I/O does not; force the host address to NULL. */ 861 addend = 0; 862 } 863 864 write_address = address; 865 if (is_ram) { 866 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 867 /* 868 * Computing is_clean is expensive; avoid all that unless 869 * the page is actually writable. 870 */ 871 if (prot & PAGE_WRITE) { 872 if (section->readonly) { 873 write_address |= TLB_DISCARD_WRITE; 874 } else if (cpu_physical_memory_is_clean(iotlb)) { 875 write_address |= TLB_NOTDIRTY; 876 } 877 } 878 } else { 879 /* I/O or ROMD */ 880 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; 881 /* 882 * Writes to romd devices must go through MMIO to enable write. 883 * Reads to romd devices go through the ram_ptr found above, 884 * but of course reads to I/O must go through MMIO. 885 */ 886 write_address |= TLB_MMIO; 887 if (!is_romd) { 888 address = write_address; 889 } 890 } 891 892 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, 893 TARGET_PAGE_SIZE); 894 895 index = tlb_index(env, mmu_idx, vaddr_page); 896 te = tlb_entry(env, mmu_idx, vaddr_page); 897 898 /* 899 * Hold the TLB lock for the rest of the function. We could acquire/release 900 * the lock several times in the function, but it is faster to amortize the 901 * acquisition cost by acquiring it just once. Note that this leads to 902 * a longer critical section, but this is not a concern since the TLB lock 903 * is unlikely to be contended. 904 */ 905 qemu_spin_lock(&tlb->c.lock); 906 907 /* Note that the tlb is no longer clean. */ 908 tlb->c.dirty |= 1 << mmu_idx; 909 910 /* Make sure there's no cached translation for the new page. */ 911 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 912 913 /* 914 * Only evict the old entry to the victim tlb if it's for a 915 * different page; otherwise just overwrite the stale data. 916 */ 917 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 918 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; 919 CPUTLBEntry *tv = &desc->vtable[vidx]; 920 921 /* Evict the old entry into the victim tlb. */ 922 copy_tlb_helper_locked(tv, te); 923 desc->viotlb[vidx] = desc->iotlb[index]; 924 tlb_n_used_entries_dec(env, mmu_idx); 925 } 926 927 /* refill the tlb */ 928 /* 929 * At this point iotlb contains a physical section number in the lower 930 * TARGET_PAGE_BITS, and either 931 * + the ram_addr_t of the page base of the target RAM (RAM) 932 * + the offset within section->mr of the page base (I/O, ROMD) 933 * We subtract the vaddr_page (which is page aligned and thus won't 934 * disturb the low bits) to give an offset which can be added to the 935 * (non-page-aligned) vaddr of the eventual memory access to get 936 * the MemoryRegion offset for the access. Note that the vaddr we 937 * subtract here is that of the page base, and not the same as the 938 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 939 */ 940 desc->iotlb[index].addr = iotlb - vaddr_page; 941 desc->iotlb[index].attrs = attrs; 942 943 /* Now calculate the new entry */ 944 tn.addend = addend - vaddr_page; 945 if (prot & PAGE_READ) { 946 tn.addr_read = address; 947 if (wp_flags & BP_MEM_READ) { 948 tn.addr_read |= TLB_WATCHPOINT; 949 } 950 } else { 951 tn.addr_read = -1; 952 } 953 954 if (prot & PAGE_EXEC) { 955 tn.addr_code = address; 956 } else { 957 tn.addr_code = -1; 958 } 959 960 tn.addr_write = -1; 961 if (prot & PAGE_WRITE) { 962 tn.addr_write = write_address; 963 if (prot & PAGE_WRITE_INV) { 964 tn.addr_write |= TLB_INVALID_MASK; 965 } 966 if (wp_flags & BP_MEM_WRITE) { 967 tn.addr_write |= TLB_WATCHPOINT; 968 } 969 } 970 971 copy_tlb_helper_locked(te, &tn); 972 tlb_n_used_entries_inc(env, mmu_idx); 973 qemu_spin_unlock(&tlb->c.lock); 974 } 975 976 /* Add a new TLB entry, but without specifying the memory 977 * transaction attributes to be used. 978 */ 979 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 980 hwaddr paddr, int prot, 981 int mmu_idx, target_ulong size) 982 { 983 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 984 prot, mmu_idx, size); 985 } 986 987 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 988 { 989 ram_addr_t ram_addr; 990 991 ram_addr = qemu_ram_addr_from_host(ptr); 992 if (ram_addr == RAM_ADDR_INVALID) { 993 error_report("Bad ram pointer %p", ptr); 994 abort(); 995 } 996 return ram_addr; 997 } 998 999 /* 1000 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the 1001 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must 1002 * be discarded and looked up again (e.g. via tlb_entry()). 1003 */ 1004 static void tlb_fill(CPUState *cpu, target_ulong addr, int size, 1005 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1006 { 1007 CPUClass *cc = CPU_GET_CLASS(cpu); 1008 bool ok; 1009 1010 /* 1011 * This is not a probe, so only valid return is success; failure 1012 * should result in exception + longjmp to the cpu loop. 1013 */ 1014 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr); 1015 assert(ok); 1016 } 1017 1018 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1019 int mmu_idx, target_ulong addr, uintptr_t retaddr, 1020 MMUAccessType access_type, MemOp op) 1021 { 1022 CPUState *cpu = env_cpu(env); 1023 hwaddr mr_offset; 1024 MemoryRegionSection *section; 1025 MemoryRegion *mr; 1026 uint64_t val; 1027 bool locked = false; 1028 MemTxResult r; 1029 1030 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1031 mr = section->mr; 1032 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1033 cpu->mem_io_pc = retaddr; 1034 if (!cpu->can_do_io) { 1035 cpu_io_recompile(cpu, retaddr); 1036 } 1037 1038 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 1039 qemu_mutex_lock_iothread(); 1040 locked = true; 1041 } 1042 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); 1043 if (r != MEMTX_OK) { 1044 hwaddr physaddr = mr_offset + 1045 section->offset_within_address_space - 1046 section->offset_within_region; 1047 1048 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, 1049 mmu_idx, iotlbentry->attrs, r, retaddr); 1050 } 1051 if (locked) { 1052 qemu_mutex_unlock_iothread(); 1053 } 1054 1055 return val; 1056 } 1057 1058 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1059 int mmu_idx, uint64_t val, target_ulong addr, 1060 uintptr_t retaddr, MemOp op) 1061 { 1062 CPUState *cpu = env_cpu(env); 1063 hwaddr mr_offset; 1064 MemoryRegionSection *section; 1065 MemoryRegion *mr; 1066 bool locked = false; 1067 MemTxResult r; 1068 1069 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1070 mr = section->mr; 1071 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1072 if (!cpu->can_do_io) { 1073 cpu_io_recompile(cpu, retaddr); 1074 } 1075 cpu->mem_io_pc = retaddr; 1076 1077 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 1078 qemu_mutex_lock_iothread(); 1079 locked = true; 1080 } 1081 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); 1082 if (r != MEMTX_OK) { 1083 hwaddr physaddr = mr_offset + 1084 section->offset_within_address_space - 1085 section->offset_within_region; 1086 1087 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), 1088 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, 1089 retaddr); 1090 } 1091 if (locked) { 1092 qemu_mutex_unlock_iothread(); 1093 } 1094 } 1095 1096 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) 1097 { 1098 #if TCG_OVERSIZED_GUEST 1099 return *(target_ulong *)((uintptr_t)entry + ofs); 1100 #else 1101 /* ofs might correspond to .addr_write, so use atomic_read */ 1102 return atomic_read((target_ulong *)((uintptr_t)entry + ofs)); 1103 #endif 1104 } 1105 1106 /* Return true if ADDR is present in the victim tlb, and has been copied 1107 back to the main tlb. */ 1108 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1109 size_t elt_ofs, target_ulong page) 1110 { 1111 size_t vidx; 1112 1113 assert_cpu_is_self(env_cpu(env)); 1114 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1115 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; 1116 target_ulong cmp; 1117 1118 /* elt_ofs might correspond to .addr_write, so use atomic_read */ 1119 #if TCG_OVERSIZED_GUEST 1120 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1121 #else 1122 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1123 #endif 1124 1125 if (cmp == page) { 1126 /* Found entry in victim tlb, swap tlb and iotlb. */ 1127 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index]; 1128 1129 qemu_spin_lock(&env_tlb(env)->c.lock); 1130 copy_tlb_helper_locked(&tmptlb, tlb); 1131 copy_tlb_helper_locked(tlb, vtlb); 1132 copy_tlb_helper_locked(vtlb, &tmptlb); 1133 qemu_spin_unlock(&env_tlb(env)->c.lock); 1134 1135 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1136 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; 1137 tmpio = *io; *io = *vio; *vio = tmpio; 1138 return true; 1139 } 1140 } 1141 return false; 1142 } 1143 1144 /* Macro to call the above, with local variables from the use context. */ 1145 #define VICTIM_TLB_HIT(TY, ADDR) \ 1146 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1147 (ADDR) & TARGET_PAGE_MASK) 1148 1149 /* 1150 * Return a ram_addr_t for the virtual address for execution. 1151 * 1152 * Return -1 if we can't translate and execute from an entire page 1153 * of RAM. This will force us to execute by loading and translating 1154 * one insn at a time, without caching. 1155 * 1156 * NOTE: This function will trigger an exception if the page is 1157 * not executable. 1158 */ 1159 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, 1160 void **hostp) 1161 { 1162 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1163 uintptr_t index = tlb_index(env, mmu_idx, addr); 1164 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1165 void *p; 1166 1167 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1168 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1169 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1170 index = tlb_index(env, mmu_idx, addr); 1171 entry = tlb_entry(env, mmu_idx, addr); 1172 1173 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { 1174 /* 1175 * The MMU protection covers a smaller range than a target 1176 * page, so we must redo the MMU check for every insn. 1177 */ 1178 return -1; 1179 } 1180 } 1181 assert(tlb_hit(entry->addr_code, addr)); 1182 } 1183 1184 if (unlikely(entry->addr_code & TLB_MMIO)) { 1185 /* The region is not backed by RAM. */ 1186 if (hostp) { 1187 *hostp = NULL; 1188 } 1189 return -1; 1190 } 1191 1192 p = (void *)((uintptr_t)addr + entry->addend); 1193 if (hostp) { 1194 *hostp = p; 1195 } 1196 return qemu_ram_addr_from_host_nofail(p); 1197 } 1198 1199 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1200 { 1201 return get_page_addr_code_hostp(env, addr, NULL); 1202 } 1203 1204 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, 1205 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) 1206 { 1207 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; 1208 1209 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); 1210 1211 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 1212 struct page_collection *pages 1213 = page_collection_lock(ram_addr, ram_addr + size); 1214 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); 1215 page_collection_unlock(pages); 1216 } 1217 1218 /* 1219 * Set both VGA and migration bits for simplicity and to remove 1220 * the notdirty callback faster. 1221 */ 1222 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); 1223 1224 /* We remove the notdirty callback only if the code has been flushed. */ 1225 if (!cpu_physical_memory_is_clean(ram_addr)) { 1226 trace_memory_notdirty_set_dirty(mem_vaddr); 1227 tlb_set_dirty(cpu, mem_vaddr); 1228 } 1229 } 1230 1231 /* 1232 * Probe for whether the specified guest access is permitted. If it is not 1233 * permitted then an exception will be taken in the same way as if this 1234 * were a real access (and we will not return). 1235 * If the size is 0 or the page requires I/O access, returns NULL; otherwise, 1236 * returns the address of the host page similar to tlb_vaddr_to_host(). 1237 */ 1238 void *probe_access(CPUArchState *env, target_ulong addr, int size, 1239 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1240 { 1241 uintptr_t index = tlb_index(env, mmu_idx, addr); 1242 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1243 target_ulong tlb_addr; 1244 size_t elt_ofs; 1245 int wp_access; 1246 1247 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 1248 1249 switch (access_type) { 1250 case MMU_DATA_LOAD: 1251 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1252 wp_access = BP_MEM_READ; 1253 break; 1254 case MMU_DATA_STORE: 1255 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1256 wp_access = BP_MEM_WRITE; 1257 break; 1258 case MMU_INST_FETCH: 1259 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1260 wp_access = BP_MEM_READ; 1261 break; 1262 default: 1263 g_assert_not_reached(); 1264 } 1265 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1266 1267 if (unlikely(!tlb_hit(tlb_addr, addr))) { 1268 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, 1269 addr & TARGET_PAGE_MASK)) { 1270 tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr); 1271 /* TLB resize via tlb_fill may have moved the entry. */ 1272 index = tlb_index(env, mmu_idx, addr); 1273 entry = tlb_entry(env, mmu_idx, addr); 1274 } 1275 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1276 } 1277 1278 if (!size) { 1279 return NULL; 1280 } 1281 1282 if (unlikely(tlb_addr & TLB_FLAGS_MASK)) { 1283 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1284 1285 /* Reject I/O access, or other required slow-path. */ 1286 if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { 1287 return NULL; 1288 } 1289 1290 /* Handle watchpoints. */ 1291 if (tlb_addr & TLB_WATCHPOINT) { 1292 cpu_check_watchpoint(env_cpu(env), addr, size, 1293 iotlbentry->attrs, wp_access, retaddr); 1294 } 1295 1296 /* Handle clean RAM pages. */ 1297 if (tlb_addr & TLB_NOTDIRTY) { 1298 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1299 } 1300 } 1301 1302 return (void *)((uintptr_t)addr + entry->addend); 1303 } 1304 1305 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, 1306 MMUAccessType access_type, int mmu_idx) 1307 { 1308 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1309 target_ulong tlb_addr, page; 1310 size_t elt_ofs; 1311 1312 switch (access_type) { 1313 case MMU_DATA_LOAD: 1314 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1315 break; 1316 case MMU_DATA_STORE: 1317 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1318 break; 1319 case MMU_INST_FETCH: 1320 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1321 break; 1322 default: 1323 g_assert_not_reached(); 1324 } 1325 1326 page = addr & TARGET_PAGE_MASK; 1327 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1328 1329 if (!tlb_hit_page(tlb_addr, page)) { 1330 uintptr_t index = tlb_index(env, mmu_idx, addr); 1331 1332 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) { 1333 CPUState *cs = env_cpu(env); 1334 CPUClass *cc = CPU_GET_CLASS(cs); 1335 1336 if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) { 1337 /* Non-faulting page table read failed. */ 1338 return NULL; 1339 } 1340 1341 /* TLB resize via tlb_fill may have moved the entry. */ 1342 entry = tlb_entry(env, mmu_idx, addr); 1343 } 1344 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1345 } 1346 1347 if (tlb_addr & ~TARGET_PAGE_MASK) { 1348 /* IO access */ 1349 return NULL; 1350 } 1351 1352 return (void *)((uintptr_t)addr + entry->addend); 1353 } 1354 1355 1356 #ifdef CONFIG_PLUGIN 1357 /* 1358 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. 1359 * This should be a hot path as we will have just looked this path up 1360 * in the softmmu lookup code (or helper). We don't handle re-fills or 1361 * checking the victim table. This is purely informational. 1362 * 1363 * This should never fail as the memory access being instrumented 1364 * should have just filled the TLB. 1365 */ 1366 1367 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, 1368 bool is_store, struct qemu_plugin_hwaddr *data) 1369 { 1370 CPUArchState *env = cpu->env_ptr; 1371 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1372 uintptr_t index = tlb_index(env, mmu_idx, addr); 1373 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; 1374 1375 if (likely(tlb_hit(tlb_addr, addr))) { 1376 /* We must have an iotlb entry for MMIO */ 1377 if (tlb_addr & TLB_MMIO) { 1378 CPUIOTLBEntry *iotlbentry; 1379 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1380 data->is_io = true; 1381 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1382 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1383 } else { 1384 data->is_io = false; 1385 data->v.ram.hostaddr = addr + tlbe->addend; 1386 } 1387 return true; 1388 } 1389 return false; 1390 } 1391 1392 #endif 1393 1394 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1395 * operations, or io operations to proceed. Return the host address. */ 1396 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1397 TCGMemOpIdx oi, uintptr_t retaddr) 1398 { 1399 size_t mmu_idx = get_mmuidx(oi); 1400 uintptr_t index = tlb_index(env, mmu_idx, addr); 1401 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1402 target_ulong tlb_addr = tlb_addr_write(tlbe); 1403 MemOp mop = get_memop(oi); 1404 int a_bits = get_alignment_bits(mop); 1405 int s_bits = mop & MO_SIZE; 1406 void *hostaddr; 1407 1408 /* Adjust the given return address. */ 1409 retaddr -= GETPC_ADJ; 1410 1411 /* Enforce guest required alignment. */ 1412 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1413 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1414 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1415 mmu_idx, retaddr); 1416 } 1417 1418 /* Enforce qemu required alignment. */ 1419 if (unlikely(addr & ((1 << s_bits) - 1))) { 1420 /* We get here if guest alignment was not requested, 1421 or was not enforced by cpu_unaligned_access above. 1422 We might widen the access and emulate, but for now 1423 mark an exception and exit the cpu loop. */ 1424 goto stop_the_world; 1425 } 1426 1427 /* Check TLB entry and enforce page permissions. */ 1428 if (!tlb_hit(tlb_addr, addr)) { 1429 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1430 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, 1431 mmu_idx, retaddr); 1432 index = tlb_index(env, mmu_idx, addr); 1433 tlbe = tlb_entry(env, mmu_idx, addr); 1434 } 1435 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1436 } 1437 1438 /* Notice an IO access or a needs-MMU-lookup access */ 1439 if (unlikely(tlb_addr & TLB_MMIO)) { 1440 /* There's really nothing that can be done to 1441 support this apart from stop-the-world. */ 1442 goto stop_the_world; 1443 } 1444 1445 /* Let the guest notice RMW on a write-only page. */ 1446 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1447 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1448 mmu_idx, retaddr); 1449 /* Since we don't support reads and writes to different addresses, 1450 and we do have the proper page loaded for write, this shouldn't 1451 ever return. But just in case, handle via stop-the-world. */ 1452 goto stop_the_world; 1453 } 1454 1455 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1456 1457 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1458 notdirty_write(env_cpu(env), addr, 1 << s_bits, 1459 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); 1460 } 1461 1462 return hostaddr; 1463 1464 stop_the_world: 1465 cpu_loop_exit_atomic(env_cpu(env), retaddr); 1466 } 1467 1468 /* 1469 * Load Helpers 1470 * 1471 * We support two different access types. SOFTMMU_CODE_ACCESS is 1472 * specifically for reading instructions from system memory. It is 1473 * called by the translation loop and in some helpers where the code 1474 * is disassembled. It shouldn't be called directly by guest code. 1475 */ 1476 1477 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, 1478 TCGMemOpIdx oi, uintptr_t retaddr); 1479 1480 static inline uint64_t QEMU_ALWAYS_INLINE 1481 load_memop(const void *haddr, MemOp op) 1482 { 1483 switch (op) { 1484 case MO_UB: 1485 return ldub_p(haddr); 1486 case MO_BEUW: 1487 return lduw_be_p(haddr); 1488 case MO_LEUW: 1489 return lduw_le_p(haddr); 1490 case MO_BEUL: 1491 return (uint32_t)ldl_be_p(haddr); 1492 case MO_LEUL: 1493 return (uint32_t)ldl_le_p(haddr); 1494 case MO_BEQ: 1495 return ldq_be_p(haddr); 1496 case MO_LEQ: 1497 return ldq_le_p(haddr); 1498 default: 1499 qemu_build_not_reached(); 1500 } 1501 } 1502 1503 static inline uint64_t QEMU_ALWAYS_INLINE 1504 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, 1505 uintptr_t retaddr, MemOp op, bool code_read, 1506 FullLoadHelper *full_load) 1507 { 1508 uintptr_t mmu_idx = get_mmuidx(oi); 1509 uintptr_t index = tlb_index(env, mmu_idx, addr); 1510 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1511 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1512 const size_t tlb_off = code_read ? 1513 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); 1514 const MMUAccessType access_type = 1515 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; 1516 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1517 void *haddr; 1518 uint64_t res; 1519 size_t size = memop_size(op); 1520 1521 /* Handle CPU specific unaligned behaviour */ 1522 if (addr & ((1 << a_bits) - 1)) { 1523 cpu_unaligned_access(env_cpu(env), addr, access_type, 1524 mmu_idx, retaddr); 1525 } 1526 1527 /* If the TLB entry is for a different page, reload and try again. */ 1528 if (!tlb_hit(tlb_addr, addr)) { 1529 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1530 addr & TARGET_PAGE_MASK)) { 1531 tlb_fill(env_cpu(env), addr, size, 1532 access_type, mmu_idx, retaddr); 1533 index = tlb_index(env, mmu_idx, addr); 1534 entry = tlb_entry(env, mmu_idx, addr); 1535 } 1536 tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1537 tlb_addr &= ~TLB_INVALID_MASK; 1538 } 1539 1540 /* Handle anything that isn't just a straight memory access. */ 1541 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1542 CPUIOTLBEntry *iotlbentry; 1543 bool need_swap; 1544 1545 /* For anything that is unaligned, recurse through full_load. */ 1546 if ((addr & (size - 1)) != 0) { 1547 goto do_unaligned_access; 1548 } 1549 1550 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1551 1552 /* Handle watchpoints. */ 1553 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1554 /* On watchpoint hit, this will longjmp out. */ 1555 cpu_check_watchpoint(env_cpu(env), addr, size, 1556 iotlbentry->attrs, BP_MEM_READ, retaddr); 1557 } 1558 1559 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1560 1561 /* Handle I/O access. */ 1562 if (likely(tlb_addr & TLB_MMIO)) { 1563 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, 1564 access_type, op ^ (need_swap * MO_BSWAP)); 1565 } 1566 1567 haddr = (void *)((uintptr_t)addr + entry->addend); 1568 1569 /* 1570 * Keep these two load_memop separate to ensure that the compiler 1571 * is able to fold the entire function to a single instruction. 1572 * There is a build-time assert inside to remind you of this. ;-) 1573 */ 1574 if (unlikely(need_swap)) { 1575 return load_memop(haddr, op ^ MO_BSWAP); 1576 } 1577 return load_memop(haddr, op); 1578 } 1579 1580 /* Handle slow unaligned access (it spans two pages or IO). */ 1581 if (size > 1 1582 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1583 >= TARGET_PAGE_SIZE)) { 1584 target_ulong addr1, addr2; 1585 uint64_t r1, r2; 1586 unsigned shift; 1587 do_unaligned_access: 1588 addr1 = addr & ~((target_ulong)size - 1); 1589 addr2 = addr1 + size; 1590 r1 = full_load(env, addr1, oi, retaddr); 1591 r2 = full_load(env, addr2, oi, retaddr); 1592 shift = (addr & (size - 1)) * 8; 1593 1594 if (memop_big_endian(op)) { 1595 /* Big-endian combine. */ 1596 res = (r1 << shift) | (r2 >> ((size * 8) - shift)); 1597 } else { 1598 /* Little-endian combine. */ 1599 res = (r1 >> shift) | (r2 << ((size * 8) - shift)); 1600 } 1601 return res & MAKE_64BIT_MASK(0, size * 8); 1602 } 1603 1604 haddr = (void *)((uintptr_t)addr + entry->addend); 1605 return load_memop(haddr, op); 1606 } 1607 1608 /* 1609 * For the benefit of TCG generated code, we want to avoid the 1610 * complication of ABI-specific return type promotion and always 1611 * return a value extended to the register size of the host. This is 1612 * tcg_target_long, except in the case of a 32-bit host and 64-bit 1613 * data, and for that we always have uint64_t. 1614 * 1615 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. 1616 */ 1617 1618 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, 1619 TCGMemOpIdx oi, uintptr_t retaddr) 1620 { 1621 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); 1622 } 1623 1624 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, 1625 TCGMemOpIdx oi, uintptr_t retaddr) 1626 { 1627 return full_ldub_mmu(env, addr, oi, retaddr); 1628 } 1629 1630 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1631 TCGMemOpIdx oi, uintptr_t retaddr) 1632 { 1633 return load_helper(env, addr, oi, retaddr, MO_LEUW, false, 1634 full_le_lduw_mmu); 1635 } 1636 1637 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1638 TCGMemOpIdx oi, uintptr_t retaddr) 1639 { 1640 return full_le_lduw_mmu(env, addr, oi, retaddr); 1641 } 1642 1643 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1644 TCGMemOpIdx oi, uintptr_t retaddr) 1645 { 1646 return load_helper(env, addr, oi, retaddr, MO_BEUW, false, 1647 full_be_lduw_mmu); 1648 } 1649 1650 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1651 TCGMemOpIdx oi, uintptr_t retaddr) 1652 { 1653 return full_be_lduw_mmu(env, addr, oi, retaddr); 1654 } 1655 1656 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1657 TCGMemOpIdx oi, uintptr_t retaddr) 1658 { 1659 return load_helper(env, addr, oi, retaddr, MO_LEUL, false, 1660 full_le_ldul_mmu); 1661 } 1662 1663 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1664 TCGMemOpIdx oi, uintptr_t retaddr) 1665 { 1666 return full_le_ldul_mmu(env, addr, oi, retaddr); 1667 } 1668 1669 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1670 TCGMemOpIdx oi, uintptr_t retaddr) 1671 { 1672 return load_helper(env, addr, oi, retaddr, MO_BEUL, false, 1673 full_be_ldul_mmu); 1674 } 1675 1676 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1677 TCGMemOpIdx oi, uintptr_t retaddr) 1678 { 1679 return full_be_ldul_mmu(env, addr, oi, retaddr); 1680 } 1681 1682 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, 1683 TCGMemOpIdx oi, uintptr_t retaddr) 1684 { 1685 return load_helper(env, addr, oi, retaddr, MO_LEQ, false, 1686 helper_le_ldq_mmu); 1687 } 1688 1689 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, 1690 TCGMemOpIdx oi, uintptr_t retaddr) 1691 { 1692 return load_helper(env, addr, oi, retaddr, MO_BEQ, false, 1693 helper_be_ldq_mmu); 1694 } 1695 1696 /* 1697 * Provide signed versions of the load routines as well. We can of course 1698 * avoid this for 64-bit data, or for 32-bit data on 32-bit host. 1699 */ 1700 1701 1702 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, 1703 TCGMemOpIdx oi, uintptr_t retaddr) 1704 { 1705 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); 1706 } 1707 1708 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, 1709 TCGMemOpIdx oi, uintptr_t retaddr) 1710 { 1711 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); 1712 } 1713 1714 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, 1715 TCGMemOpIdx oi, uintptr_t retaddr) 1716 { 1717 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); 1718 } 1719 1720 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, 1721 TCGMemOpIdx oi, uintptr_t retaddr) 1722 { 1723 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); 1724 } 1725 1726 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, 1727 TCGMemOpIdx oi, uintptr_t retaddr) 1728 { 1729 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); 1730 } 1731 1732 /* 1733 * Load helpers for cpu_ldst.h. 1734 */ 1735 1736 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, 1737 int mmu_idx, uintptr_t retaddr, 1738 MemOp op, FullLoadHelper *full_load) 1739 { 1740 uint16_t meminfo; 1741 TCGMemOpIdx oi; 1742 uint64_t ret; 1743 1744 meminfo = trace_mem_get_info(op, mmu_idx, false); 1745 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 1746 1747 op &= ~MO_SIGN; 1748 oi = make_memop_idx(op, mmu_idx); 1749 ret = full_load(env, addr, oi, retaddr); 1750 1751 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 1752 1753 return ret; 1754 } 1755 1756 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1757 int mmu_idx, uintptr_t ra) 1758 { 1759 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu); 1760 } 1761 1762 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1763 int mmu_idx, uintptr_t ra) 1764 { 1765 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB, 1766 full_ldub_mmu); 1767 } 1768 1769 uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1770 int mmu_idx, uintptr_t ra) 1771 { 1772 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW, 1773 MO_TE == MO_LE 1774 ? full_le_lduw_mmu : full_be_lduw_mmu); 1775 } 1776 1777 int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1778 int mmu_idx, uintptr_t ra) 1779 { 1780 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW, 1781 MO_TE == MO_LE 1782 ? full_le_lduw_mmu : full_be_lduw_mmu); 1783 } 1784 1785 uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1786 int mmu_idx, uintptr_t ra) 1787 { 1788 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL, 1789 MO_TE == MO_LE 1790 ? full_le_ldul_mmu : full_be_ldul_mmu); 1791 } 1792 1793 uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1794 int mmu_idx, uintptr_t ra) 1795 { 1796 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ, 1797 MO_TE == MO_LE 1798 ? helper_le_ldq_mmu : helper_be_ldq_mmu); 1799 } 1800 1801 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, 1802 uintptr_t retaddr) 1803 { 1804 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1805 } 1806 1807 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1808 { 1809 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1810 } 1811 1812 uint32_t cpu_lduw_data_ra(CPUArchState *env, target_ulong ptr, 1813 uintptr_t retaddr) 1814 { 1815 return cpu_lduw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1816 } 1817 1818 int cpu_ldsw_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1819 { 1820 return cpu_ldsw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1821 } 1822 1823 uint32_t cpu_ldl_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1824 { 1825 return cpu_ldl_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1826 } 1827 1828 uint64_t cpu_ldq_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1829 { 1830 return cpu_ldq_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1831 } 1832 1833 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) 1834 { 1835 return cpu_ldub_data_ra(env, ptr, 0); 1836 } 1837 1838 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) 1839 { 1840 return cpu_ldsb_data_ra(env, ptr, 0); 1841 } 1842 1843 uint32_t cpu_lduw_data(CPUArchState *env, target_ulong ptr) 1844 { 1845 return cpu_lduw_data_ra(env, ptr, 0); 1846 } 1847 1848 int cpu_ldsw_data(CPUArchState *env, target_ulong ptr) 1849 { 1850 return cpu_ldsw_data_ra(env, ptr, 0); 1851 } 1852 1853 uint32_t cpu_ldl_data(CPUArchState *env, target_ulong ptr) 1854 { 1855 return cpu_ldl_data_ra(env, ptr, 0); 1856 } 1857 1858 uint64_t cpu_ldq_data(CPUArchState *env, target_ulong ptr) 1859 { 1860 return cpu_ldq_data_ra(env, ptr, 0); 1861 } 1862 1863 /* 1864 * Store Helpers 1865 */ 1866 1867 static inline void QEMU_ALWAYS_INLINE 1868 store_memop(void *haddr, uint64_t val, MemOp op) 1869 { 1870 switch (op) { 1871 case MO_UB: 1872 stb_p(haddr, val); 1873 break; 1874 case MO_BEUW: 1875 stw_be_p(haddr, val); 1876 break; 1877 case MO_LEUW: 1878 stw_le_p(haddr, val); 1879 break; 1880 case MO_BEUL: 1881 stl_be_p(haddr, val); 1882 break; 1883 case MO_LEUL: 1884 stl_le_p(haddr, val); 1885 break; 1886 case MO_BEQ: 1887 stq_be_p(haddr, val); 1888 break; 1889 case MO_LEQ: 1890 stq_le_p(haddr, val); 1891 break; 1892 default: 1893 qemu_build_not_reached(); 1894 } 1895 } 1896 1897 static inline void QEMU_ALWAYS_INLINE 1898 store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 1899 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) 1900 { 1901 uintptr_t mmu_idx = get_mmuidx(oi); 1902 uintptr_t index = tlb_index(env, mmu_idx, addr); 1903 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1904 target_ulong tlb_addr = tlb_addr_write(entry); 1905 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 1906 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1907 void *haddr; 1908 size_t size = memop_size(op); 1909 1910 /* Handle CPU specific unaligned behaviour */ 1911 if (addr & ((1 << a_bits) - 1)) { 1912 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1913 mmu_idx, retaddr); 1914 } 1915 1916 /* If the TLB entry is for a different page, reload and try again. */ 1917 if (!tlb_hit(tlb_addr, addr)) { 1918 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1919 addr & TARGET_PAGE_MASK)) { 1920 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, 1921 mmu_idx, retaddr); 1922 index = tlb_index(env, mmu_idx, addr); 1923 entry = tlb_entry(env, mmu_idx, addr); 1924 } 1925 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; 1926 } 1927 1928 /* Handle anything that isn't just a straight memory access. */ 1929 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1930 CPUIOTLBEntry *iotlbentry; 1931 bool need_swap; 1932 1933 /* For anything that is unaligned, recurse through byte stores. */ 1934 if ((addr & (size - 1)) != 0) { 1935 goto do_unaligned_access; 1936 } 1937 1938 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1939 1940 /* Handle watchpoints. */ 1941 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1942 /* On watchpoint hit, this will longjmp out. */ 1943 cpu_check_watchpoint(env_cpu(env), addr, size, 1944 iotlbentry->attrs, BP_MEM_WRITE, retaddr); 1945 } 1946 1947 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1948 1949 /* Handle I/O access. */ 1950 if (tlb_addr & TLB_MMIO) { 1951 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, 1952 op ^ (need_swap * MO_BSWAP)); 1953 return; 1954 } 1955 1956 /* Ignore writes to ROM. */ 1957 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { 1958 return; 1959 } 1960 1961 /* Handle clean RAM pages. */ 1962 if (tlb_addr & TLB_NOTDIRTY) { 1963 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1964 } 1965 1966 haddr = (void *)((uintptr_t)addr + entry->addend); 1967 1968 /* 1969 * Keep these two store_memop separate to ensure that the compiler 1970 * is able to fold the entire function to a single instruction. 1971 * There is a build-time assert inside to remind you of this. ;-) 1972 */ 1973 if (unlikely(need_swap)) { 1974 store_memop(haddr, val, op ^ MO_BSWAP); 1975 } else { 1976 store_memop(haddr, val, op); 1977 } 1978 return; 1979 } 1980 1981 /* Handle slow unaligned access (it spans two pages or IO). */ 1982 if (size > 1 1983 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1984 >= TARGET_PAGE_SIZE)) { 1985 int i; 1986 uintptr_t index2; 1987 CPUTLBEntry *entry2; 1988 target_ulong page2, tlb_addr2; 1989 size_t size2; 1990 1991 do_unaligned_access: 1992 /* 1993 * Ensure the second page is in the TLB. Note that the first page 1994 * is already guaranteed to be filled, and that the second page 1995 * cannot evict the first. 1996 */ 1997 page2 = (addr + size) & TARGET_PAGE_MASK; 1998 size2 = (addr + size) & ~TARGET_PAGE_MASK; 1999 index2 = tlb_index(env, mmu_idx, page2); 2000 entry2 = tlb_entry(env, mmu_idx, page2); 2001 tlb_addr2 = tlb_addr_write(entry2); 2002 if (!tlb_hit_page(tlb_addr2, page2)) { 2003 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { 2004 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, 2005 mmu_idx, retaddr); 2006 index2 = tlb_index(env, mmu_idx, page2); 2007 entry2 = tlb_entry(env, mmu_idx, page2); 2008 } 2009 tlb_addr2 = tlb_addr_write(entry2); 2010 } 2011 2012 /* 2013 * Handle watchpoints. Since this may trap, all checks 2014 * must happen before any store. 2015 */ 2016 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2017 cpu_check_watchpoint(env_cpu(env), addr, size - size2, 2018 env_tlb(env)->d[mmu_idx].iotlb[index].attrs, 2019 BP_MEM_WRITE, retaddr); 2020 } 2021 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { 2022 cpu_check_watchpoint(env_cpu(env), page2, size2, 2023 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, 2024 BP_MEM_WRITE, retaddr); 2025 } 2026 2027 /* 2028 * XXX: not efficient, but simple. 2029 * This loop must go in the forward direction to avoid issues 2030 * with self-modifying code in Windows 64-bit. 2031 */ 2032 for (i = 0; i < size; ++i) { 2033 uint8_t val8; 2034 if (memop_big_endian(op)) { 2035 /* Big-endian extract. */ 2036 val8 = val >> (((size - 1) * 8) - (i * 8)); 2037 } else { 2038 /* Little-endian extract. */ 2039 val8 = val >> (i * 8); 2040 } 2041 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2042 } 2043 return; 2044 } 2045 2046 haddr = (void *)((uintptr_t)addr + entry->addend); 2047 store_memop(haddr, val, op); 2048 } 2049 2050 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, 2051 TCGMemOpIdx oi, uintptr_t retaddr) 2052 { 2053 store_helper(env, addr, val, oi, retaddr, MO_UB); 2054 } 2055 2056 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2057 TCGMemOpIdx oi, uintptr_t retaddr) 2058 { 2059 store_helper(env, addr, val, oi, retaddr, MO_LEUW); 2060 } 2061 2062 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2063 TCGMemOpIdx oi, uintptr_t retaddr) 2064 { 2065 store_helper(env, addr, val, oi, retaddr, MO_BEUW); 2066 } 2067 2068 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2069 TCGMemOpIdx oi, uintptr_t retaddr) 2070 { 2071 store_helper(env, addr, val, oi, retaddr, MO_LEUL); 2072 } 2073 2074 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2075 TCGMemOpIdx oi, uintptr_t retaddr) 2076 { 2077 store_helper(env, addr, val, oi, retaddr, MO_BEUL); 2078 } 2079 2080 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2081 TCGMemOpIdx oi, uintptr_t retaddr) 2082 { 2083 store_helper(env, addr, val, oi, retaddr, MO_LEQ); 2084 } 2085 2086 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2087 TCGMemOpIdx oi, uintptr_t retaddr) 2088 { 2089 store_helper(env, addr, val, oi, retaddr, MO_BEQ); 2090 } 2091 2092 /* 2093 * Store Helpers for cpu_ldst.h 2094 */ 2095 2096 static inline void QEMU_ALWAYS_INLINE 2097 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2098 int mmu_idx, uintptr_t retaddr, MemOp op) 2099 { 2100 TCGMemOpIdx oi; 2101 uint16_t meminfo; 2102 2103 meminfo = trace_mem_get_info(op, mmu_idx, true); 2104 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2105 2106 oi = make_memop_idx(op, mmu_idx); 2107 store_helper(env, addr, val, oi, retaddr, op); 2108 2109 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2110 } 2111 2112 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2113 int mmu_idx, uintptr_t retaddr) 2114 { 2115 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); 2116 } 2117 2118 void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2119 int mmu_idx, uintptr_t retaddr) 2120 { 2121 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW); 2122 } 2123 2124 void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2125 int mmu_idx, uintptr_t retaddr) 2126 { 2127 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL); 2128 } 2129 2130 void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2131 int mmu_idx, uintptr_t retaddr) 2132 { 2133 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ); 2134 } 2135 2136 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, 2137 uint32_t val, uintptr_t retaddr) 2138 { 2139 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2140 } 2141 2142 void cpu_stw_data_ra(CPUArchState *env, target_ulong ptr, 2143 uint32_t val, uintptr_t retaddr) 2144 { 2145 cpu_stw_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2146 } 2147 2148 void cpu_stl_data_ra(CPUArchState *env, target_ulong ptr, 2149 uint32_t val, uintptr_t retaddr) 2150 { 2151 cpu_stl_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2152 } 2153 2154 void cpu_stq_data_ra(CPUArchState *env, target_ulong ptr, 2155 uint64_t val, uintptr_t retaddr) 2156 { 2157 cpu_stq_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2158 } 2159 2160 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2161 { 2162 cpu_stb_data_ra(env, ptr, val, 0); 2163 } 2164 2165 void cpu_stw_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2166 { 2167 cpu_stw_data_ra(env, ptr, val, 0); 2168 } 2169 2170 void cpu_stl_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2171 { 2172 cpu_stl_data_ra(env, ptr, val, 0); 2173 } 2174 2175 void cpu_stq_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2176 { 2177 cpu_stq_data_ra(env, ptr, val, 0); 2178 } 2179 2180 /* First set of helpers allows passing in of OI and RETADDR. This makes 2181 them callable from other helpers. */ 2182 2183 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 2184 #define ATOMIC_NAME(X) \ 2185 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 2186 #define ATOMIC_MMU_DECLS 2187 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) 2188 #define ATOMIC_MMU_CLEANUP 2189 #define ATOMIC_MMU_IDX get_mmuidx(oi) 2190 2191 #include "atomic_common.inc.c" 2192 2193 #define DATA_SIZE 1 2194 #include "atomic_template.h" 2195 2196 #define DATA_SIZE 2 2197 #include "atomic_template.h" 2198 2199 #define DATA_SIZE 4 2200 #include "atomic_template.h" 2201 2202 #ifdef CONFIG_ATOMIC64 2203 #define DATA_SIZE 8 2204 #include "atomic_template.h" 2205 #endif 2206 2207 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 2208 #define DATA_SIZE 16 2209 #include "atomic_template.h" 2210 #endif 2211 2212 /* Second set of helpers are directly callable from TCG as helpers. */ 2213 2214 #undef EXTRA_ARGS 2215 #undef ATOMIC_NAME 2216 #undef ATOMIC_MMU_LOOKUP 2217 #define EXTRA_ARGS , TCGMemOpIdx oi 2218 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 2219 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) 2220 2221 #define DATA_SIZE 1 2222 #include "atomic_template.h" 2223 2224 #define DATA_SIZE 2 2225 #include "atomic_template.h" 2226 2227 #define DATA_SIZE 4 2228 #include "atomic_template.h" 2229 2230 #ifdef CONFIG_ATOMIC64 2231 #define DATA_SIZE 8 2232 #include "atomic_template.h" 2233 #endif 2234 #undef ATOMIC_MMU_IDX 2235 2236 /* Code access functions. */ 2237 2238 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, 2239 TCGMemOpIdx oi, uintptr_t retaddr) 2240 { 2241 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); 2242 } 2243 2244 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) 2245 { 2246 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); 2247 return full_ldub_code(env, addr, oi, 0); 2248 } 2249 2250 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, 2251 TCGMemOpIdx oi, uintptr_t retaddr) 2252 { 2253 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); 2254 } 2255 2256 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) 2257 { 2258 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); 2259 return full_lduw_code(env, addr, oi, 0); 2260 } 2261 2262 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, 2263 TCGMemOpIdx oi, uintptr_t retaddr) 2264 { 2265 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); 2266 } 2267 2268 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) 2269 { 2270 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); 2271 return full_ldl_code(env, addr, oi, 0); 2272 } 2273 2274 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, 2275 TCGMemOpIdx oi, uintptr_t retaddr) 2276 { 2277 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code); 2278 } 2279 2280 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) 2281 { 2282 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true)); 2283 return full_ldq_code(env, addr, oi, 0); 2284 } 2285