1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/address-spaces.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/cputlb.h" 28 #include "exec/memory-internal.h" 29 #include "exec/ram_addr.h" 30 #include "tcg/tcg.h" 31 #include "qemu/error-report.h" 32 #include "exec/log.h" 33 #include "exec/helper-proto.h" 34 #include "qemu/atomic.h" 35 #include "qemu/atomic128.h" 36 #include "translate-all.h" 37 #include "trace-root.h" 38 #include "trace/mem.h" 39 #ifdef CONFIG_PLUGIN 40 #include "qemu/plugin-memory.h" 41 #endif 42 43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 44 /* #define DEBUG_TLB */ 45 /* #define DEBUG_TLB_LOG */ 46 47 #ifdef DEBUG_TLB 48 # define DEBUG_TLB_GATE 1 49 # ifdef DEBUG_TLB_LOG 50 # define DEBUG_TLB_LOG_GATE 1 51 # else 52 # define DEBUG_TLB_LOG_GATE 0 53 # endif 54 #else 55 # define DEBUG_TLB_GATE 0 56 # define DEBUG_TLB_LOG_GATE 0 57 #endif 58 59 #define tlb_debug(fmt, ...) do { \ 60 if (DEBUG_TLB_LOG_GATE) { \ 61 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 62 ## __VA_ARGS__); \ 63 } else if (DEBUG_TLB_GATE) { \ 64 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 65 } \ 66 } while (0) 67 68 #define assert_cpu_is_self(cpu) do { \ 69 if (DEBUG_TLB_GATE) { \ 70 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 71 } \ 72 } while (0) 73 74 /* run_on_cpu_data.target_ptr should always be big enough for a 75 * target_ulong even on 32 bit builds */ 76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 77 78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 79 */ 80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 82 83 static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx) 84 { 85 return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS); 86 } 87 88 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, 89 size_t max_entries) 90 { 91 desc->window_begin_ns = ns; 92 desc->window_max_entries = max_entries; 93 } 94 95 static void tlb_dyn_init(CPUArchState *env) 96 { 97 int i; 98 99 for (i = 0; i < NB_MMU_MODES; i++) { 100 CPUTLBDesc *desc = &env_tlb(env)->d[i]; 101 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 102 103 tlb_window_reset(desc, get_clock_realtime(), 0); 104 desc->n_used_entries = 0; 105 env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 106 env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries); 107 env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries); 108 } 109 } 110 111 /** 112 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 113 * @env: CPU that owns the TLB 114 * @mmu_idx: MMU index of the TLB 115 * 116 * Called with tlb_lock_held. 117 * 118 * We have two main constraints when resizing a TLB: (1) we only resize it 119 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 120 * the array or unnecessarily flushing it), which means we do not control how 121 * frequently the resizing can occur; (2) we don't have access to the guest's 122 * future scheduling decisions, and therefore have to decide the magnitude of 123 * the resize based on past observations. 124 * 125 * In general, a memory-hungry process can benefit greatly from an appropriately 126 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 127 * we just have to make the TLB as large as possible; while an oversized TLB 128 * results in minimal TLB miss rates, it also takes longer to be flushed 129 * (flushes can be _very_ frequent), and the reduced locality can also hurt 130 * performance. 131 * 132 * To achieve near-optimal performance for all kinds of workloads, we: 133 * 134 * 1. Aggressively increase the size of the TLB when the use rate of the 135 * TLB being flushed is high, since it is likely that in the near future this 136 * memory-hungry process will execute again, and its memory hungriness will 137 * probably be similar. 138 * 139 * 2. Slowly reduce the size of the TLB as the use rate declines over a 140 * reasonably large time window. The rationale is that if in such a time window 141 * we have not observed a high TLB use rate, it is likely that we won't observe 142 * it in the near future. In that case, once a time window expires we downsize 143 * the TLB to match the maximum use rate observed in the window. 144 * 145 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 146 * since in that range performance is likely near-optimal. Recall that the TLB 147 * is direct mapped, so we want the use rate to be low (or at least not too 148 * high), since otherwise we are likely to have a significant amount of 149 * conflict misses. 150 */ 151 static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) 152 { 153 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; 154 size_t old_size = tlb_n_entries(env, mmu_idx); 155 size_t rate; 156 size_t new_size = old_size; 157 int64_t now = get_clock_realtime(); 158 int64_t window_len_ms = 100; 159 int64_t window_len_ns = window_len_ms * 1000 * 1000; 160 bool window_expired = now > desc->window_begin_ns + window_len_ns; 161 162 if (desc->n_used_entries > desc->window_max_entries) { 163 desc->window_max_entries = desc->n_used_entries; 164 } 165 rate = desc->window_max_entries * 100 / old_size; 166 167 if (rate > 70) { 168 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 169 } else if (rate < 30 && window_expired) { 170 size_t ceil = pow2ceil(desc->window_max_entries); 171 size_t expected_rate = desc->window_max_entries * 100 / ceil; 172 173 /* 174 * Avoid undersizing when the max number of entries seen is just below 175 * a pow2. For instance, if max_entries == 1025, the expected use rate 176 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 177 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 178 * later. Thus, make sure that the expected use rate remains below 70%. 179 * (and since we double the size, that means the lowest rate we'd 180 * expect to get is 35%, which is still in the 30-70% range where 181 * we consider that the size is appropriate.) 182 */ 183 if (expected_rate > 70) { 184 ceil *= 2; 185 } 186 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 187 } 188 189 if (new_size == old_size) { 190 if (window_expired) { 191 tlb_window_reset(desc, now, desc->n_used_entries); 192 } 193 return; 194 } 195 196 g_free(env_tlb(env)->f[mmu_idx].table); 197 g_free(env_tlb(env)->d[mmu_idx].iotlb); 198 199 tlb_window_reset(desc, now, 0); 200 /* desc->n_used_entries is cleared by the caller */ 201 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 202 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); 203 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); 204 /* 205 * If the allocations fail, try smaller sizes. We just freed some 206 * memory, so going back to half of new_size has a good chance of working. 207 * Increased memory pressure elsewhere in the system might cause the 208 * allocations to fail though, so we progressively reduce the allocation 209 * size, aborting if we cannot even allocate the smallest TLB we support. 210 */ 211 while (env_tlb(env)->f[mmu_idx].table == NULL || 212 env_tlb(env)->d[mmu_idx].iotlb == NULL) { 213 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 214 error_report("%s: %s", __func__, strerror(errno)); 215 abort(); 216 } 217 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 218 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 219 220 g_free(env_tlb(env)->f[mmu_idx].table); 221 g_free(env_tlb(env)->d[mmu_idx].iotlb); 222 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); 223 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); 224 } 225 } 226 227 static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx) 228 { 229 tlb_mmu_resize_locked(env, mmu_idx); 230 memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx)); 231 env_tlb(env)->d[mmu_idx].n_used_entries = 0; 232 } 233 234 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 235 { 236 env_tlb(env)->d[mmu_idx].n_used_entries++; 237 } 238 239 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 240 { 241 env_tlb(env)->d[mmu_idx].n_used_entries--; 242 } 243 244 void tlb_init(CPUState *cpu) 245 { 246 CPUArchState *env = cpu->env_ptr; 247 248 qemu_spin_init(&env_tlb(env)->c.lock); 249 250 /* Ensure that cpu_reset performs a full flush. */ 251 env_tlb(env)->c.dirty = ALL_MMUIDX_BITS; 252 253 tlb_dyn_init(env); 254 } 255 256 /* flush_all_helper: run fn across all cpus 257 * 258 * If the wait flag is set then the src cpu's helper will be queued as 259 * "safe" work and the loop exited creating a synchronisation point 260 * where all queued work will be finished before execution starts 261 * again. 262 */ 263 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 264 run_on_cpu_data d) 265 { 266 CPUState *cpu; 267 268 CPU_FOREACH(cpu) { 269 if (cpu != src) { 270 async_run_on_cpu(cpu, fn, d); 271 } 272 } 273 } 274 275 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 276 { 277 CPUState *cpu; 278 size_t full = 0, part = 0, elide = 0; 279 280 CPU_FOREACH(cpu) { 281 CPUArchState *env = cpu->env_ptr; 282 283 full += atomic_read(&env_tlb(env)->c.full_flush_count); 284 part += atomic_read(&env_tlb(env)->c.part_flush_count); 285 elide += atomic_read(&env_tlb(env)->c.elide_flush_count); 286 } 287 *pfull = full; 288 *ppart = part; 289 *pelide = elide; 290 } 291 292 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) 293 { 294 tlb_table_flush_by_mmuidx(env, mmu_idx); 295 env_tlb(env)->d[mmu_idx].large_page_addr = -1; 296 env_tlb(env)->d[mmu_idx].large_page_mask = -1; 297 env_tlb(env)->d[mmu_idx].vindex = 0; 298 memset(env_tlb(env)->d[mmu_idx].vtable, -1, 299 sizeof(env_tlb(env)->d[0].vtable)); 300 } 301 302 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 303 { 304 CPUArchState *env = cpu->env_ptr; 305 uint16_t asked = data.host_int; 306 uint16_t all_dirty, work, to_clean; 307 308 assert_cpu_is_self(cpu); 309 310 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 311 312 qemu_spin_lock(&env_tlb(env)->c.lock); 313 314 all_dirty = env_tlb(env)->c.dirty; 315 to_clean = asked & all_dirty; 316 all_dirty &= ~to_clean; 317 env_tlb(env)->c.dirty = all_dirty; 318 319 for (work = to_clean; work != 0; work &= work - 1) { 320 int mmu_idx = ctz32(work); 321 tlb_flush_one_mmuidx_locked(env, mmu_idx); 322 } 323 324 qemu_spin_unlock(&env_tlb(env)->c.lock); 325 326 cpu_tb_jmp_cache_clear(cpu); 327 328 if (to_clean == ALL_MMUIDX_BITS) { 329 atomic_set(&env_tlb(env)->c.full_flush_count, 330 env_tlb(env)->c.full_flush_count + 1); 331 } else { 332 atomic_set(&env_tlb(env)->c.part_flush_count, 333 env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); 334 if (to_clean != asked) { 335 atomic_set(&env_tlb(env)->c.elide_flush_count, 336 env_tlb(env)->c.elide_flush_count + 337 ctpop16(asked & ~to_clean)); 338 } 339 } 340 } 341 342 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 343 { 344 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 345 346 if (cpu->created && !qemu_cpu_is_self(cpu)) { 347 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 348 RUN_ON_CPU_HOST_INT(idxmap)); 349 } else { 350 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 351 } 352 } 353 354 void tlb_flush(CPUState *cpu) 355 { 356 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 357 } 358 359 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 360 { 361 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 362 363 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 364 365 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 366 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 367 } 368 369 void tlb_flush_all_cpus(CPUState *src_cpu) 370 { 371 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 372 } 373 374 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 375 { 376 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 377 378 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 379 380 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 381 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 382 } 383 384 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 385 { 386 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 387 } 388 389 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 390 target_ulong page) 391 { 392 return tlb_hit_page(tlb_entry->addr_read, page) || 393 tlb_hit_page(tlb_addr_write(tlb_entry), page) || 394 tlb_hit_page(tlb_entry->addr_code, page); 395 } 396 397 /** 398 * tlb_entry_is_empty - return true if the entry is not in use 399 * @te: pointer to CPUTLBEntry 400 */ 401 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 402 { 403 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 404 } 405 406 /* Called with tlb_c.lock held */ 407 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 408 target_ulong page) 409 { 410 if (tlb_hit_page_anyprot(tlb_entry, page)) { 411 memset(tlb_entry, -1, sizeof(*tlb_entry)); 412 return true; 413 } 414 return false; 415 } 416 417 /* Called with tlb_c.lock held */ 418 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 419 target_ulong page) 420 { 421 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; 422 int k; 423 424 assert_cpu_is_self(env_cpu(env)); 425 for (k = 0; k < CPU_VTLB_SIZE; k++) { 426 if (tlb_flush_entry_locked(&d->vtable[k], page)) { 427 tlb_n_used_entries_dec(env, mmu_idx); 428 } 429 } 430 } 431 432 static void tlb_flush_page_locked(CPUArchState *env, int midx, 433 target_ulong page) 434 { 435 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; 436 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; 437 438 /* Check if we need to flush due to large pages. */ 439 if ((page & lp_mask) == lp_addr) { 440 tlb_debug("forcing full flush midx %d (" 441 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 442 midx, lp_addr, lp_mask); 443 tlb_flush_one_mmuidx_locked(env, midx); 444 } else { 445 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 446 tlb_n_used_entries_dec(env, midx); 447 } 448 tlb_flush_vtlb_page_locked(env, midx, page); 449 } 450 } 451 452 /* As we are going to hijack the bottom bits of the page address for a 453 * mmuidx bit mask we need to fail to build if we can't do that 454 */ 455 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN); 456 457 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, 458 run_on_cpu_data data) 459 { 460 CPUArchState *env = cpu->env_ptr; 461 target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr; 462 target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK; 463 unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS; 464 int mmu_idx; 465 466 assert_cpu_is_self(cpu); 467 468 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n", 469 addr, mmu_idx_bitmap); 470 471 qemu_spin_lock(&env_tlb(env)->c.lock); 472 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 473 if (test_bit(mmu_idx, &mmu_idx_bitmap)) { 474 tlb_flush_page_locked(env, mmu_idx, addr); 475 } 476 } 477 qemu_spin_unlock(&env_tlb(env)->c.lock); 478 479 tb_flush_jmp_cache(cpu, addr); 480 } 481 482 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 483 { 484 target_ulong addr_and_mmu_idx; 485 486 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 487 488 /* This should already be page aligned */ 489 addr_and_mmu_idx = addr & TARGET_PAGE_MASK; 490 addr_and_mmu_idx |= idxmap; 491 492 if (!qemu_cpu_is_self(cpu)) { 493 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work, 494 RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 495 } else { 496 tlb_flush_page_by_mmuidx_async_work( 497 cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 498 } 499 } 500 501 void tlb_flush_page(CPUState *cpu, target_ulong addr) 502 { 503 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 504 } 505 506 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 507 uint16_t idxmap) 508 { 509 const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work; 510 target_ulong addr_and_mmu_idx; 511 512 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 513 514 /* This should already be page aligned */ 515 addr_and_mmu_idx = addr & TARGET_PAGE_MASK; 516 addr_and_mmu_idx |= idxmap; 517 518 flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 519 fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 520 } 521 522 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 523 { 524 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 525 } 526 527 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 528 target_ulong addr, 529 uint16_t idxmap) 530 { 531 const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work; 532 target_ulong addr_and_mmu_idx; 533 534 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 535 536 /* This should already be page aligned */ 537 addr_and_mmu_idx = addr & TARGET_PAGE_MASK; 538 addr_and_mmu_idx |= idxmap; 539 540 flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 541 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 542 } 543 544 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 545 { 546 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 547 } 548 549 /* update the TLBs so that writes to code in the virtual page 'addr' 550 can be detected */ 551 void tlb_protect_code(ram_addr_t ram_addr) 552 { 553 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 554 DIRTY_MEMORY_CODE); 555 } 556 557 /* update the TLB so that writes in physical page 'phys_addr' are no longer 558 tested for self modifying code */ 559 void tlb_unprotect_code(ram_addr_t ram_addr) 560 { 561 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 562 } 563 564 565 /* 566 * Dirty write flag handling 567 * 568 * When the TCG code writes to a location it looks up the address in 569 * the TLB and uses that data to compute the final address. If any of 570 * the lower bits of the address are set then the slow path is forced. 571 * There are a number of reasons to do this but for normal RAM the 572 * most usual is detecting writes to code regions which may invalidate 573 * generated code. 574 * 575 * Other vCPUs might be reading their TLBs during guest execution, so we update 576 * te->addr_write with atomic_set. We don't need to worry about this for 577 * oversized guests as MTTCG is disabled for them. 578 * 579 * Called with tlb_c.lock held. 580 */ 581 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 582 uintptr_t start, uintptr_t length) 583 { 584 uintptr_t addr = tlb_entry->addr_write; 585 586 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | 587 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { 588 addr &= TARGET_PAGE_MASK; 589 addr += tlb_entry->addend; 590 if ((addr - start) < length) { 591 #if TCG_OVERSIZED_GUEST 592 tlb_entry->addr_write |= TLB_NOTDIRTY; 593 #else 594 atomic_set(&tlb_entry->addr_write, 595 tlb_entry->addr_write | TLB_NOTDIRTY); 596 #endif 597 } 598 } 599 } 600 601 /* 602 * Called with tlb_c.lock held. 603 * Called only from the vCPU context, i.e. the TLB's owner thread. 604 */ 605 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 606 { 607 *d = *s; 608 } 609 610 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 611 * the target vCPU). 612 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 613 * thing actually updated is the target TLB entry ->addr_write flags. 614 */ 615 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 616 { 617 CPUArchState *env; 618 619 int mmu_idx; 620 621 env = cpu->env_ptr; 622 qemu_spin_lock(&env_tlb(env)->c.lock); 623 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 624 unsigned int i; 625 unsigned int n = tlb_n_entries(env, mmu_idx); 626 627 for (i = 0; i < n; i++) { 628 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], 629 start1, length); 630 } 631 632 for (i = 0; i < CPU_VTLB_SIZE; i++) { 633 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], 634 start1, length); 635 } 636 } 637 qemu_spin_unlock(&env_tlb(env)->c.lock); 638 } 639 640 /* Called with tlb_c.lock held */ 641 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 642 target_ulong vaddr) 643 { 644 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 645 tlb_entry->addr_write = vaddr; 646 } 647 } 648 649 /* update the TLB corresponding to virtual page vaddr 650 so that it is no longer dirty */ 651 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 652 { 653 CPUArchState *env = cpu->env_ptr; 654 int mmu_idx; 655 656 assert_cpu_is_self(cpu); 657 658 vaddr &= TARGET_PAGE_MASK; 659 qemu_spin_lock(&env_tlb(env)->c.lock); 660 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 661 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 662 } 663 664 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 665 int k; 666 for (k = 0; k < CPU_VTLB_SIZE; k++) { 667 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); 668 } 669 } 670 qemu_spin_unlock(&env_tlb(env)->c.lock); 671 } 672 673 /* Our TLB does not support large pages, so remember the area covered by 674 large pages and trigger a full TLB flush if these are invalidated. */ 675 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 676 target_ulong vaddr, target_ulong size) 677 { 678 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; 679 target_ulong lp_mask = ~(size - 1); 680 681 if (lp_addr == (target_ulong)-1) { 682 /* No previous large page. */ 683 lp_addr = vaddr; 684 } else { 685 /* Extend the existing region to include the new page. 686 This is a compromise between unnecessary flushes and 687 the cost of maintaining a full variable size TLB. */ 688 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; 689 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 690 lp_mask <<= 1; 691 } 692 } 693 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; 694 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; 695 } 696 697 /* Add a new TLB entry. At most one entry for a given virtual address 698 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 699 * supplied size is only used by tlb_flush_page. 700 * 701 * Called from TCG-generated code, which is under an RCU read-side 702 * critical section. 703 */ 704 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 705 hwaddr paddr, MemTxAttrs attrs, int prot, 706 int mmu_idx, target_ulong size) 707 { 708 CPUArchState *env = cpu->env_ptr; 709 CPUTLB *tlb = env_tlb(env); 710 CPUTLBDesc *desc = &tlb->d[mmu_idx]; 711 MemoryRegionSection *section; 712 unsigned int index; 713 target_ulong address; 714 target_ulong write_address; 715 uintptr_t addend; 716 CPUTLBEntry *te, tn; 717 hwaddr iotlb, xlat, sz, paddr_page; 718 target_ulong vaddr_page; 719 int asidx = cpu_asidx_from_attrs(cpu, attrs); 720 int wp_flags; 721 bool is_ram, is_romd; 722 723 assert_cpu_is_self(cpu); 724 725 if (size <= TARGET_PAGE_SIZE) { 726 sz = TARGET_PAGE_SIZE; 727 } else { 728 tlb_add_large_page(env, mmu_idx, vaddr, size); 729 sz = size; 730 } 731 vaddr_page = vaddr & TARGET_PAGE_MASK; 732 paddr_page = paddr & TARGET_PAGE_MASK; 733 734 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 735 &xlat, &sz, attrs, &prot); 736 assert(sz >= TARGET_PAGE_SIZE); 737 738 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 739 " prot=%x idx=%d\n", 740 vaddr, paddr, prot, mmu_idx); 741 742 address = vaddr_page; 743 if (size < TARGET_PAGE_SIZE) { 744 /* Repeat the MMU check and TLB fill on every access. */ 745 address |= TLB_INVALID_MASK; 746 } 747 if (attrs.byte_swap) { 748 address |= TLB_BSWAP; 749 } 750 751 is_ram = memory_region_is_ram(section->mr); 752 is_romd = memory_region_is_romd(section->mr); 753 754 if (is_ram || is_romd) { 755 /* RAM and ROMD both have associated host memory. */ 756 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 757 } else { 758 /* I/O does not; force the host address to NULL. */ 759 addend = 0; 760 } 761 762 write_address = address; 763 if (is_ram) { 764 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 765 /* 766 * Computing is_clean is expensive; avoid all that unless 767 * the page is actually writable. 768 */ 769 if (prot & PAGE_WRITE) { 770 if (section->readonly) { 771 write_address |= TLB_DISCARD_WRITE; 772 } else if (cpu_physical_memory_is_clean(iotlb)) { 773 write_address |= TLB_NOTDIRTY; 774 } 775 } 776 } else { 777 /* I/O or ROMD */ 778 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; 779 /* 780 * Writes to romd devices must go through MMIO to enable write. 781 * Reads to romd devices go through the ram_ptr found above, 782 * but of course reads to I/O must go through MMIO. 783 */ 784 write_address |= TLB_MMIO; 785 if (!is_romd) { 786 address = write_address; 787 } 788 } 789 790 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, 791 TARGET_PAGE_SIZE); 792 793 index = tlb_index(env, mmu_idx, vaddr_page); 794 te = tlb_entry(env, mmu_idx, vaddr_page); 795 796 /* 797 * Hold the TLB lock for the rest of the function. We could acquire/release 798 * the lock several times in the function, but it is faster to amortize the 799 * acquisition cost by acquiring it just once. Note that this leads to 800 * a longer critical section, but this is not a concern since the TLB lock 801 * is unlikely to be contended. 802 */ 803 qemu_spin_lock(&tlb->c.lock); 804 805 /* Note that the tlb is no longer clean. */ 806 tlb->c.dirty |= 1 << mmu_idx; 807 808 /* Make sure there's no cached translation for the new page. */ 809 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 810 811 /* 812 * Only evict the old entry to the victim tlb if it's for a 813 * different page; otherwise just overwrite the stale data. 814 */ 815 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 816 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; 817 CPUTLBEntry *tv = &desc->vtable[vidx]; 818 819 /* Evict the old entry into the victim tlb. */ 820 copy_tlb_helper_locked(tv, te); 821 desc->viotlb[vidx] = desc->iotlb[index]; 822 tlb_n_used_entries_dec(env, mmu_idx); 823 } 824 825 /* refill the tlb */ 826 /* 827 * At this point iotlb contains a physical section number in the lower 828 * TARGET_PAGE_BITS, and either 829 * + the ram_addr_t of the page base of the target RAM (RAM) 830 * + the offset within section->mr of the page base (I/O, ROMD) 831 * We subtract the vaddr_page (which is page aligned and thus won't 832 * disturb the low bits) to give an offset which can be added to the 833 * (non-page-aligned) vaddr of the eventual memory access to get 834 * the MemoryRegion offset for the access. Note that the vaddr we 835 * subtract here is that of the page base, and not the same as the 836 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 837 */ 838 desc->iotlb[index].addr = iotlb - vaddr_page; 839 desc->iotlb[index].attrs = attrs; 840 841 /* Now calculate the new entry */ 842 tn.addend = addend - vaddr_page; 843 if (prot & PAGE_READ) { 844 tn.addr_read = address; 845 if (wp_flags & BP_MEM_READ) { 846 tn.addr_read |= TLB_WATCHPOINT; 847 } 848 } else { 849 tn.addr_read = -1; 850 } 851 852 if (prot & PAGE_EXEC) { 853 tn.addr_code = address; 854 } else { 855 tn.addr_code = -1; 856 } 857 858 tn.addr_write = -1; 859 if (prot & PAGE_WRITE) { 860 tn.addr_write = write_address; 861 if (prot & PAGE_WRITE_INV) { 862 tn.addr_write |= TLB_INVALID_MASK; 863 } 864 if (wp_flags & BP_MEM_WRITE) { 865 tn.addr_write |= TLB_WATCHPOINT; 866 } 867 } 868 869 copy_tlb_helper_locked(te, &tn); 870 tlb_n_used_entries_inc(env, mmu_idx); 871 qemu_spin_unlock(&tlb->c.lock); 872 } 873 874 /* Add a new TLB entry, but without specifying the memory 875 * transaction attributes to be used. 876 */ 877 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 878 hwaddr paddr, int prot, 879 int mmu_idx, target_ulong size) 880 { 881 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 882 prot, mmu_idx, size); 883 } 884 885 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 886 { 887 ram_addr_t ram_addr; 888 889 ram_addr = qemu_ram_addr_from_host(ptr); 890 if (ram_addr == RAM_ADDR_INVALID) { 891 error_report("Bad ram pointer %p", ptr); 892 abort(); 893 } 894 return ram_addr; 895 } 896 897 /* 898 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the 899 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must 900 * be discarded and looked up again (e.g. via tlb_entry()). 901 */ 902 static void tlb_fill(CPUState *cpu, target_ulong addr, int size, 903 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 904 { 905 CPUClass *cc = CPU_GET_CLASS(cpu); 906 bool ok; 907 908 /* 909 * This is not a probe, so only valid return is success; failure 910 * should result in exception + longjmp to the cpu loop. 911 */ 912 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr); 913 assert(ok); 914 } 915 916 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 917 int mmu_idx, target_ulong addr, uintptr_t retaddr, 918 MMUAccessType access_type, MemOp op) 919 { 920 CPUState *cpu = env_cpu(env); 921 hwaddr mr_offset; 922 MemoryRegionSection *section; 923 MemoryRegion *mr; 924 uint64_t val; 925 bool locked = false; 926 MemTxResult r; 927 928 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 929 mr = section->mr; 930 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 931 cpu->mem_io_pc = retaddr; 932 if (!cpu->can_do_io) { 933 cpu_io_recompile(cpu, retaddr); 934 } 935 936 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 937 qemu_mutex_lock_iothread(); 938 locked = true; 939 } 940 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); 941 if (r != MEMTX_OK) { 942 hwaddr physaddr = mr_offset + 943 section->offset_within_address_space - 944 section->offset_within_region; 945 946 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, 947 mmu_idx, iotlbentry->attrs, r, retaddr); 948 } 949 if (locked) { 950 qemu_mutex_unlock_iothread(); 951 } 952 953 return val; 954 } 955 956 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 957 int mmu_idx, uint64_t val, target_ulong addr, 958 uintptr_t retaddr, MemOp op) 959 { 960 CPUState *cpu = env_cpu(env); 961 hwaddr mr_offset; 962 MemoryRegionSection *section; 963 MemoryRegion *mr; 964 bool locked = false; 965 MemTxResult r; 966 967 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 968 mr = section->mr; 969 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 970 if (!cpu->can_do_io) { 971 cpu_io_recompile(cpu, retaddr); 972 } 973 cpu->mem_io_pc = retaddr; 974 975 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 976 qemu_mutex_lock_iothread(); 977 locked = true; 978 } 979 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); 980 if (r != MEMTX_OK) { 981 hwaddr physaddr = mr_offset + 982 section->offset_within_address_space - 983 section->offset_within_region; 984 985 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), 986 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, 987 retaddr); 988 } 989 if (locked) { 990 qemu_mutex_unlock_iothread(); 991 } 992 } 993 994 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) 995 { 996 #if TCG_OVERSIZED_GUEST 997 return *(target_ulong *)((uintptr_t)entry + ofs); 998 #else 999 /* ofs might correspond to .addr_write, so use atomic_read */ 1000 return atomic_read((target_ulong *)((uintptr_t)entry + ofs)); 1001 #endif 1002 } 1003 1004 /* Return true if ADDR is present in the victim tlb, and has been copied 1005 back to the main tlb. */ 1006 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1007 size_t elt_ofs, target_ulong page) 1008 { 1009 size_t vidx; 1010 1011 assert_cpu_is_self(env_cpu(env)); 1012 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1013 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; 1014 target_ulong cmp; 1015 1016 /* elt_ofs might correspond to .addr_write, so use atomic_read */ 1017 #if TCG_OVERSIZED_GUEST 1018 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1019 #else 1020 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1021 #endif 1022 1023 if (cmp == page) { 1024 /* Found entry in victim tlb, swap tlb and iotlb. */ 1025 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index]; 1026 1027 qemu_spin_lock(&env_tlb(env)->c.lock); 1028 copy_tlb_helper_locked(&tmptlb, tlb); 1029 copy_tlb_helper_locked(tlb, vtlb); 1030 copy_tlb_helper_locked(vtlb, &tmptlb); 1031 qemu_spin_unlock(&env_tlb(env)->c.lock); 1032 1033 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1034 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; 1035 tmpio = *io; *io = *vio; *vio = tmpio; 1036 return true; 1037 } 1038 } 1039 return false; 1040 } 1041 1042 /* Macro to call the above, with local variables from the use context. */ 1043 #define VICTIM_TLB_HIT(TY, ADDR) \ 1044 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1045 (ADDR) & TARGET_PAGE_MASK) 1046 1047 /* 1048 * Return a ram_addr_t for the virtual address for execution. 1049 * 1050 * Return -1 if we can't translate and execute from an entire page 1051 * of RAM. This will force us to execute by loading and translating 1052 * one insn at a time, without caching. 1053 * 1054 * NOTE: This function will trigger an exception if the page is 1055 * not executable. 1056 */ 1057 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, 1058 void **hostp) 1059 { 1060 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1061 uintptr_t index = tlb_index(env, mmu_idx, addr); 1062 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1063 void *p; 1064 1065 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1066 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1067 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1068 index = tlb_index(env, mmu_idx, addr); 1069 entry = tlb_entry(env, mmu_idx, addr); 1070 1071 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { 1072 /* 1073 * The MMU protection covers a smaller range than a target 1074 * page, so we must redo the MMU check for every insn. 1075 */ 1076 return -1; 1077 } 1078 } 1079 assert(tlb_hit(entry->addr_code, addr)); 1080 } 1081 1082 if (unlikely(entry->addr_code & TLB_MMIO)) { 1083 /* The region is not backed by RAM. */ 1084 if (hostp) { 1085 *hostp = NULL; 1086 } 1087 return -1; 1088 } 1089 1090 p = (void *)((uintptr_t)addr + entry->addend); 1091 if (hostp) { 1092 *hostp = p; 1093 } 1094 return qemu_ram_addr_from_host_nofail(p); 1095 } 1096 1097 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1098 { 1099 return get_page_addr_code_hostp(env, addr, NULL); 1100 } 1101 1102 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, 1103 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) 1104 { 1105 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; 1106 1107 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); 1108 1109 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 1110 struct page_collection *pages 1111 = page_collection_lock(ram_addr, ram_addr + size); 1112 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); 1113 page_collection_unlock(pages); 1114 } 1115 1116 /* 1117 * Set both VGA and migration bits for simplicity and to remove 1118 * the notdirty callback faster. 1119 */ 1120 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); 1121 1122 /* We remove the notdirty callback only if the code has been flushed. */ 1123 if (!cpu_physical_memory_is_clean(ram_addr)) { 1124 trace_memory_notdirty_set_dirty(mem_vaddr); 1125 tlb_set_dirty(cpu, mem_vaddr); 1126 } 1127 } 1128 1129 /* 1130 * Probe for whether the specified guest access is permitted. If it is not 1131 * permitted then an exception will be taken in the same way as if this 1132 * were a real access (and we will not return). 1133 * If the size is 0 or the page requires I/O access, returns NULL; otherwise, 1134 * returns the address of the host page similar to tlb_vaddr_to_host(). 1135 */ 1136 void *probe_access(CPUArchState *env, target_ulong addr, int size, 1137 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1138 { 1139 uintptr_t index = tlb_index(env, mmu_idx, addr); 1140 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1141 target_ulong tlb_addr; 1142 size_t elt_ofs; 1143 int wp_access; 1144 1145 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 1146 1147 switch (access_type) { 1148 case MMU_DATA_LOAD: 1149 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1150 wp_access = BP_MEM_READ; 1151 break; 1152 case MMU_DATA_STORE: 1153 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1154 wp_access = BP_MEM_WRITE; 1155 break; 1156 case MMU_INST_FETCH: 1157 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1158 wp_access = BP_MEM_READ; 1159 break; 1160 default: 1161 g_assert_not_reached(); 1162 } 1163 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1164 1165 if (unlikely(!tlb_hit(tlb_addr, addr))) { 1166 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, 1167 addr & TARGET_PAGE_MASK)) { 1168 tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr); 1169 /* TLB resize via tlb_fill may have moved the entry. */ 1170 index = tlb_index(env, mmu_idx, addr); 1171 entry = tlb_entry(env, mmu_idx, addr); 1172 } 1173 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1174 } 1175 1176 if (!size) { 1177 return NULL; 1178 } 1179 1180 if (unlikely(tlb_addr & TLB_FLAGS_MASK)) { 1181 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1182 1183 /* Reject I/O access, or other required slow-path. */ 1184 if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { 1185 return NULL; 1186 } 1187 1188 /* Handle watchpoints. */ 1189 if (tlb_addr & TLB_WATCHPOINT) { 1190 cpu_check_watchpoint(env_cpu(env), addr, size, 1191 iotlbentry->attrs, wp_access, retaddr); 1192 } 1193 1194 /* Handle clean RAM pages. */ 1195 if (tlb_addr & TLB_NOTDIRTY) { 1196 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1197 } 1198 } 1199 1200 return (void *)((uintptr_t)addr + entry->addend); 1201 } 1202 1203 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, 1204 MMUAccessType access_type, int mmu_idx) 1205 { 1206 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1207 target_ulong tlb_addr, page; 1208 size_t elt_ofs; 1209 1210 switch (access_type) { 1211 case MMU_DATA_LOAD: 1212 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1213 break; 1214 case MMU_DATA_STORE: 1215 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1216 break; 1217 case MMU_INST_FETCH: 1218 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1219 break; 1220 default: 1221 g_assert_not_reached(); 1222 } 1223 1224 page = addr & TARGET_PAGE_MASK; 1225 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1226 1227 if (!tlb_hit_page(tlb_addr, page)) { 1228 uintptr_t index = tlb_index(env, mmu_idx, addr); 1229 1230 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) { 1231 CPUState *cs = env_cpu(env); 1232 CPUClass *cc = CPU_GET_CLASS(cs); 1233 1234 if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) { 1235 /* Non-faulting page table read failed. */ 1236 return NULL; 1237 } 1238 1239 /* TLB resize via tlb_fill may have moved the entry. */ 1240 entry = tlb_entry(env, mmu_idx, addr); 1241 } 1242 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1243 } 1244 1245 if (tlb_addr & ~TARGET_PAGE_MASK) { 1246 /* IO access */ 1247 return NULL; 1248 } 1249 1250 return (void *)((uintptr_t)addr + entry->addend); 1251 } 1252 1253 1254 #ifdef CONFIG_PLUGIN 1255 /* 1256 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. 1257 * This should be a hot path as we will have just looked this path up 1258 * in the softmmu lookup code (or helper). We don't handle re-fills or 1259 * checking the victim table. This is purely informational. 1260 * 1261 * This should never fail as the memory access being instrumented 1262 * should have just filled the TLB. 1263 */ 1264 1265 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, 1266 bool is_store, struct qemu_plugin_hwaddr *data) 1267 { 1268 CPUArchState *env = cpu->env_ptr; 1269 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1270 uintptr_t index = tlb_index(env, mmu_idx, addr); 1271 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; 1272 1273 if (likely(tlb_hit(tlb_addr, addr))) { 1274 /* We must have an iotlb entry for MMIO */ 1275 if (tlb_addr & TLB_MMIO) { 1276 CPUIOTLBEntry *iotlbentry; 1277 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1278 data->is_io = true; 1279 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1280 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1281 } else { 1282 data->is_io = false; 1283 data->v.ram.hostaddr = addr + tlbe->addend; 1284 } 1285 return true; 1286 } 1287 return false; 1288 } 1289 1290 #endif 1291 1292 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1293 * operations, or io operations to proceed. Return the host address. */ 1294 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1295 TCGMemOpIdx oi, uintptr_t retaddr) 1296 { 1297 size_t mmu_idx = get_mmuidx(oi); 1298 uintptr_t index = tlb_index(env, mmu_idx, addr); 1299 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1300 target_ulong tlb_addr = tlb_addr_write(tlbe); 1301 MemOp mop = get_memop(oi); 1302 int a_bits = get_alignment_bits(mop); 1303 int s_bits = mop & MO_SIZE; 1304 void *hostaddr; 1305 1306 /* Adjust the given return address. */ 1307 retaddr -= GETPC_ADJ; 1308 1309 /* Enforce guest required alignment. */ 1310 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1311 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1312 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1313 mmu_idx, retaddr); 1314 } 1315 1316 /* Enforce qemu required alignment. */ 1317 if (unlikely(addr & ((1 << s_bits) - 1))) { 1318 /* We get here if guest alignment was not requested, 1319 or was not enforced by cpu_unaligned_access above. 1320 We might widen the access and emulate, but for now 1321 mark an exception and exit the cpu loop. */ 1322 goto stop_the_world; 1323 } 1324 1325 /* Check TLB entry and enforce page permissions. */ 1326 if (!tlb_hit(tlb_addr, addr)) { 1327 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1328 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, 1329 mmu_idx, retaddr); 1330 index = tlb_index(env, mmu_idx, addr); 1331 tlbe = tlb_entry(env, mmu_idx, addr); 1332 } 1333 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1334 } 1335 1336 /* Notice an IO access or a needs-MMU-lookup access */ 1337 if (unlikely(tlb_addr & TLB_MMIO)) { 1338 /* There's really nothing that can be done to 1339 support this apart from stop-the-world. */ 1340 goto stop_the_world; 1341 } 1342 1343 /* Let the guest notice RMW on a write-only page. */ 1344 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1345 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1346 mmu_idx, retaddr); 1347 /* Since we don't support reads and writes to different addresses, 1348 and we do have the proper page loaded for write, this shouldn't 1349 ever return. But just in case, handle via stop-the-world. */ 1350 goto stop_the_world; 1351 } 1352 1353 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1354 1355 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1356 notdirty_write(env_cpu(env), addr, 1 << s_bits, 1357 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); 1358 } 1359 1360 return hostaddr; 1361 1362 stop_the_world: 1363 cpu_loop_exit_atomic(env_cpu(env), retaddr); 1364 } 1365 1366 /* 1367 * Load Helpers 1368 * 1369 * We support two different access types. SOFTMMU_CODE_ACCESS is 1370 * specifically for reading instructions from system memory. It is 1371 * called by the translation loop and in some helpers where the code 1372 * is disassembled. It shouldn't be called directly by guest code. 1373 */ 1374 1375 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, 1376 TCGMemOpIdx oi, uintptr_t retaddr); 1377 1378 static inline uint64_t QEMU_ALWAYS_INLINE 1379 load_memop(const void *haddr, MemOp op) 1380 { 1381 switch (op) { 1382 case MO_UB: 1383 return ldub_p(haddr); 1384 case MO_BEUW: 1385 return lduw_be_p(haddr); 1386 case MO_LEUW: 1387 return lduw_le_p(haddr); 1388 case MO_BEUL: 1389 return (uint32_t)ldl_be_p(haddr); 1390 case MO_LEUL: 1391 return (uint32_t)ldl_le_p(haddr); 1392 case MO_BEQ: 1393 return ldq_be_p(haddr); 1394 case MO_LEQ: 1395 return ldq_le_p(haddr); 1396 default: 1397 qemu_build_not_reached(); 1398 } 1399 } 1400 1401 static inline uint64_t QEMU_ALWAYS_INLINE 1402 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, 1403 uintptr_t retaddr, MemOp op, bool code_read, 1404 FullLoadHelper *full_load) 1405 { 1406 uintptr_t mmu_idx = get_mmuidx(oi); 1407 uintptr_t index = tlb_index(env, mmu_idx, addr); 1408 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1409 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1410 const size_t tlb_off = code_read ? 1411 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); 1412 const MMUAccessType access_type = 1413 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; 1414 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1415 void *haddr; 1416 uint64_t res; 1417 size_t size = memop_size(op); 1418 1419 /* Handle CPU specific unaligned behaviour */ 1420 if (addr & ((1 << a_bits) - 1)) { 1421 cpu_unaligned_access(env_cpu(env), addr, access_type, 1422 mmu_idx, retaddr); 1423 } 1424 1425 /* If the TLB entry is for a different page, reload and try again. */ 1426 if (!tlb_hit(tlb_addr, addr)) { 1427 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1428 addr & TARGET_PAGE_MASK)) { 1429 tlb_fill(env_cpu(env), addr, size, 1430 access_type, mmu_idx, retaddr); 1431 index = tlb_index(env, mmu_idx, addr); 1432 entry = tlb_entry(env, mmu_idx, addr); 1433 } 1434 tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1435 tlb_addr &= ~TLB_INVALID_MASK; 1436 } 1437 1438 /* Handle anything that isn't just a straight memory access. */ 1439 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1440 CPUIOTLBEntry *iotlbentry; 1441 bool need_swap; 1442 1443 /* For anything that is unaligned, recurse through full_load. */ 1444 if ((addr & (size - 1)) != 0) { 1445 goto do_unaligned_access; 1446 } 1447 1448 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1449 1450 /* Handle watchpoints. */ 1451 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1452 /* On watchpoint hit, this will longjmp out. */ 1453 cpu_check_watchpoint(env_cpu(env), addr, size, 1454 iotlbentry->attrs, BP_MEM_READ, retaddr); 1455 } 1456 1457 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1458 1459 /* Handle I/O access. */ 1460 if (likely(tlb_addr & TLB_MMIO)) { 1461 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, 1462 access_type, op ^ (need_swap * MO_BSWAP)); 1463 } 1464 1465 haddr = (void *)((uintptr_t)addr + entry->addend); 1466 1467 /* 1468 * Keep these two load_memop separate to ensure that the compiler 1469 * is able to fold the entire function to a single instruction. 1470 * There is a build-time assert inside to remind you of this. ;-) 1471 */ 1472 if (unlikely(need_swap)) { 1473 return load_memop(haddr, op ^ MO_BSWAP); 1474 } 1475 return load_memop(haddr, op); 1476 } 1477 1478 /* Handle slow unaligned access (it spans two pages or IO). */ 1479 if (size > 1 1480 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1481 >= TARGET_PAGE_SIZE)) { 1482 target_ulong addr1, addr2; 1483 uint64_t r1, r2; 1484 unsigned shift; 1485 do_unaligned_access: 1486 addr1 = addr & ~((target_ulong)size - 1); 1487 addr2 = addr1 + size; 1488 r1 = full_load(env, addr1, oi, retaddr); 1489 r2 = full_load(env, addr2, oi, retaddr); 1490 shift = (addr & (size - 1)) * 8; 1491 1492 if (memop_big_endian(op)) { 1493 /* Big-endian combine. */ 1494 res = (r1 << shift) | (r2 >> ((size * 8) - shift)); 1495 } else { 1496 /* Little-endian combine. */ 1497 res = (r1 >> shift) | (r2 << ((size * 8) - shift)); 1498 } 1499 return res & MAKE_64BIT_MASK(0, size * 8); 1500 } 1501 1502 haddr = (void *)((uintptr_t)addr + entry->addend); 1503 return load_memop(haddr, op); 1504 } 1505 1506 /* 1507 * For the benefit of TCG generated code, we want to avoid the 1508 * complication of ABI-specific return type promotion and always 1509 * return a value extended to the register size of the host. This is 1510 * tcg_target_long, except in the case of a 32-bit host and 64-bit 1511 * data, and for that we always have uint64_t. 1512 * 1513 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. 1514 */ 1515 1516 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, 1517 TCGMemOpIdx oi, uintptr_t retaddr) 1518 { 1519 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); 1520 } 1521 1522 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, 1523 TCGMemOpIdx oi, uintptr_t retaddr) 1524 { 1525 return full_ldub_mmu(env, addr, oi, retaddr); 1526 } 1527 1528 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1529 TCGMemOpIdx oi, uintptr_t retaddr) 1530 { 1531 return load_helper(env, addr, oi, retaddr, MO_LEUW, false, 1532 full_le_lduw_mmu); 1533 } 1534 1535 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1536 TCGMemOpIdx oi, uintptr_t retaddr) 1537 { 1538 return full_le_lduw_mmu(env, addr, oi, retaddr); 1539 } 1540 1541 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1542 TCGMemOpIdx oi, uintptr_t retaddr) 1543 { 1544 return load_helper(env, addr, oi, retaddr, MO_BEUW, false, 1545 full_be_lduw_mmu); 1546 } 1547 1548 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1549 TCGMemOpIdx oi, uintptr_t retaddr) 1550 { 1551 return full_be_lduw_mmu(env, addr, oi, retaddr); 1552 } 1553 1554 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1555 TCGMemOpIdx oi, uintptr_t retaddr) 1556 { 1557 return load_helper(env, addr, oi, retaddr, MO_LEUL, false, 1558 full_le_ldul_mmu); 1559 } 1560 1561 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1562 TCGMemOpIdx oi, uintptr_t retaddr) 1563 { 1564 return full_le_ldul_mmu(env, addr, oi, retaddr); 1565 } 1566 1567 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1568 TCGMemOpIdx oi, uintptr_t retaddr) 1569 { 1570 return load_helper(env, addr, oi, retaddr, MO_BEUL, false, 1571 full_be_ldul_mmu); 1572 } 1573 1574 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1575 TCGMemOpIdx oi, uintptr_t retaddr) 1576 { 1577 return full_be_ldul_mmu(env, addr, oi, retaddr); 1578 } 1579 1580 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, 1581 TCGMemOpIdx oi, uintptr_t retaddr) 1582 { 1583 return load_helper(env, addr, oi, retaddr, MO_LEQ, false, 1584 helper_le_ldq_mmu); 1585 } 1586 1587 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, 1588 TCGMemOpIdx oi, uintptr_t retaddr) 1589 { 1590 return load_helper(env, addr, oi, retaddr, MO_BEQ, false, 1591 helper_be_ldq_mmu); 1592 } 1593 1594 /* 1595 * Provide signed versions of the load routines as well. We can of course 1596 * avoid this for 64-bit data, or for 32-bit data on 32-bit host. 1597 */ 1598 1599 1600 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, 1601 TCGMemOpIdx oi, uintptr_t retaddr) 1602 { 1603 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); 1604 } 1605 1606 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, 1607 TCGMemOpIdx oi, uintptr_t retaddr) 1608 { 1609 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); 1610 } 1611 1612 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, 1613 TCGMemOpIdx oi, uintptr_t retaddr) 1614 { 1615 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); 1616 } 1617 1618 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, 1619 TCGMemOpIdx oi, uintptr_t retaddr) 1620 { 1621 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); 1622 } 1623 1624 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, 1625 TCGMemOpIdx oi, uintptr_t retaddr) 1626 { 1627 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); 1628 } 1629 1630 /* 1631 * Load helpers for cpu_ldst.h. 1632 */ 1633 1634 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, 1635 int mmu_idx, uintptr_t retaddr, 1636 MemOp op, FullLoadHelper *full_load) 1637 { 1638 uint16_t meminfo; 1639 TCGMemOpIdx oi; 1640 uint64_t ret; 1641 1642 meminfo = trace_mem_get_info(op, mmu_idx, false); 1643 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 1644 1645 op &= ~MO_SIGN; 1646 oi = make_memop_idx(op, mmu_idx); 1647 ret = full_load(env, addr, oi, retaddr); 1648 1649 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 1650 1651 return ret; 1652 } 1653 1654 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1655 int mmu_idx, uintptr_t ra) 1656 { 1657 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu); 1658 } 1659 1660 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1661 int mmu_idx, uintptr_t ra) 1662 { 1663 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB, 1664 full_ldub_mmu); 1665 } 1666 1667 uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1668 int mmu_idx, uintptr_t ra) 1669 { 1670 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW, 1671 MO_TE == MO_LE 1672 ? full_le_lduw_mmu : full_be_lduw_mmu); 1673 } 1674 1675 int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1676 int mmu_idx, uintptr_t ra) 1677 { 1678 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW, 1679 MO_TE == MO_LE 1680 ? full_le_lduw_mmu : full_be_lduw_mmu); 1681 } 1682 1683 uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1684 int mmu_idx, uintptr_t ra) 1685 { 1686 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL, 1687 MO_TE == MO_LE 1688 ? full_le_ldul_mmu : full_be_ldul_mmu); 1689 } 1690 1691 uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1692 int mmu_idx, uintptr_t ra) 1693 { 1694 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ, 1695 MO_TE == MO_LE 1696 ? helper_le_ldq_mmu : helper_be_ldq_mmu); 1697 } 1698 1699 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, 1700 uintptr_t retaddr) 1701 { 1702 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1703 } 1704 1705 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1706 { 1707 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1708 } 1709 1710 uint32_t cpu_lduw_data_ra(CPUArchState *env, target_ulong ptr, 1711 uintptr_t retaddr) 1712 { 1713 return cpu_lduw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1714 } 1715 1716 int cpu_ldsw_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1717 { 1718 return cpu_ldsw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1719 } 1720 1721 uint32_t cpu_ldl_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1722 { 1723 return cpu_ldl_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1724 } 1725 1726 uint64_t cpu_ldq_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1727 { 1728 return cpu_ldq_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1729 } 1730 1731 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) 1732 { 1733 return cpu_ldub_data_ra(env, ptr, 0); 1734 } 1735 1736 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) 1737 { 1738 return cpu_ldsb_data_ra(env, ptr, 0); 1739 } 1740 1741 uint32_t cpu_lduw_data(CPUArchState *env, target_ulong ptr) 1742 { 1743 return cpu_lduw_data_ra(env, ptr, 0); 1744 } 1745 1746 int cpu_ldsw_data(CPUArchState *env, target_ulong ptr) 1747 { 1748 return cpu_ldsw_data_ra(env, ptr, 0); 1749 } 1750 1751 uint32_t cpu_ldl_data(CPUArchState *env, target_ulong ptr) 1752 { 1753 return cpu_ldl_data_ra(env, ptr, 0); 1754 } 1755 1756 uint64_t cpu_ldq_data(CPUArchState *env, target_ulong ptr) 1757 { 1758 return cpu_ldq_data_ra(env, ptr, 0); 1759 } 1760 1761 /* 1762 * Store Helpers 1763 */ 1764 1765 static inline void QEMU_ALWAYS_INLINE 1766 store_memop(void *haddr, uint64_t val, MemOp op) 1767 { 1768 switch (op) { 1769 case MO_UB: 1770 stb_p(haddr, val); 1771 break; 1772 case MO_BEUW: 1773 stw_be_p(haddr, val); 1774 break; 1775 case MO_LEUW: 1776 stw_le_p(haddr, val); 1777 break; 1778 case MO_BEUL: 1779 stl_be_p(haddr, val); 1780 break; 1781 case MO_LEUL: 1782 stl_le_p(haddr, val); 1783 break; 1784 case MO_BEQ: 1785 stq_be_p(haddr, val); 1786 break; 1787 case MO_LEQ: 1788 stq_le_p(haddr, val); 1789 break; 1790 default: 1791 qemu_build_not_reached(); 1792 } 1793 } 1794 1795 static inline void QEMU_ALWAYS_INLINE 1796 store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 1797 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) 1798 { 1799 uintptr_t mmu_idx = get_mmuidx(oi); 1800 uintptr_t index = tlb_index(env, mmu_idx, addr); 1801 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1802 target_ulong tlb_addr = tlb_addr_write(entry); 1803 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 1804 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1805 void *haddr; 1806 size_t size = memop_size(op); 1807 1808 /* Handle CPU specific unaligned behaviour */ 1809 if (addr & ((1 << a_bits) - 1)) { 1810 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1811 mmu_idx, retaddr); 1812 } 1813 1814 /* If the TLB entry is for a different page, reload and try again. */ 1815 if (!tlb_hit(tlb_addr, addr)) { 1816 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1817 addr & TARGET_PAGE_MASK)) { 1818 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, 1819 mmu_idx, retaddr); 1820 index = tlb_index(env, mmu_idx, addr); 1821 entry = tlb_entry(env, mmu_idx, addr); 1822 } 1823 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; 1824 } 1825 1826 /* Handle anything that isn't just a straight memory access. */ 1827 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1828 CPUIOTLBEntry *iotlbentry; 1829 bool need_swap; 1830 1831 /* For anything that is unaligned, recurse through byte stores. */ 1832 if ((addr & (size - 1)) != 0) { 1833 goto do_unaligned_access; 1834 } 1835 1836 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1837 1838 /* Handle watchpoints. */ 1839 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1840 /* On watchpoint hit, this will longjmp out. */ 1841 cpu_check_watchpoint(env_cpu(env), addr, size, 1842 iotlbentry->attrs, BP_MEM_WRITE, retaddr); 1843 } 1844 1845 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1846 1847 /* Handle I/O access. */ 1848 if (tlb_addr & TLB_MMIO) { 1849 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, 1850 op ^ (need_swap * MO_BSWAP)); 1851 return; 1852 } 1853 1854 /* Ignore writes to ROM. */ 1855 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { 1856 return; 1857 } 1858 1859 /* Handle clean RAM pages. */ 1860 if (tlb_addr & TLB_NOTDIRTY) { 1861 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1862 } 1863 1864 haddr = (void *)((uintptr_t)addr + entry->addend); 1865 1866 /* 1867 * Keep these two store_memop separate to ensure that the compiler 1868 * is able to fold the entire function to a single instruction. 1869 * There is a build-time assert inside to remind you of this. ;-) 1870 */ 1871 if (unlikely(need_swap)) { 1872 store_memop(haddr, val, op ^ MO_BSWAP); 1873 } else { 1874 store_memop(haddr, val, op); 1875 } 1876 return; 1877 } 1878 1879 /* Handle slow unaligned access (it spans two pages or IO). */ 1880 if (size > 1 1881 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1882 >= TARGET_PAGE_SIZE)) { 1883 int i; 1884 uintptr_t index2; 1885 CPUTLBEntry *entry2; 1886 target_ulong page2, tlb_addr2; 1887 size_t size2; 1888 1889 do_unaligned_access: 1890 /* 1891 * Ensure the second page is in the TLB. Note that the first page 1892 * is already guaranteed to be filled, and that the second page 1893 * cannot evict the first. 1894 */ 1895 page2 = (addr + size) & TARGET_PAGE_MASK; 1896 size2 = (addr + size) & ~TARGET_PAGE_MASK; 1897 index2 = tlb_index(env, mmu_idx, page2); 1898 entry2 = tlb_entry(env, mmu_idx, page2); 1899 tlb_addr2 = tlb_addr_write(entry2); 1900 if (!tlb_hit_page(tlb_addr2, page2)) { 1901 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { 1902 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, 1903 mmu_idx, retaddr); 1904 index2 = tlb_index(env, mmu_idx, page2); 1905 entry2 = tlb_entry(env, mmu_idx, page2); 1906 } 1907 tlb_addr2 = tlb_addr_write(entry2); 1908 } 1909 1910 /* 1911 * Handle watchpoints. Since this may trap, all checks 1912 * must happen before any store. 1913 */ 1914 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1915 cpu_check_watchpoint(env_cpu(env), addr, size - size2, 1916 env_tlb(env)->d[mmu_idx].iotlb[index].attrs, 1917 BP_MEM_WRITE, retaddr); 1918 } 1919 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { 1920 cpu_check_watchpoint(env_cpu(env), page2, size2, 1921 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, 1922 BP_MEM_WRITE, retaddr); 1923 } 1924 1925 /* 1926 * XXX: not efficient, but simple. 1927 * This loop must go in the forward direction to avoid issues 1928 * with self-modifying code in Windows 64-bit. 1929 */ 1930 for (i = 0; i < size; ++i) { 1931 uint8_t val8; 1932 if (memop_big_endian(op)) { 1933 /* Big-endian extract. */ 1934 val8 = val >> (((size - 1) * 8) - (i * 8)); 1935 } else { 1936 /* Little-endian extract. */ 1937 val8 = val >> (i * 8); 1938 } 1939 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 1940 } 1941 return; 1942 } 1943 1944 haddr = (void *)((uintptr_t)addr + entry->addend); 1945 store_memop(haddr, val, op); 1946 } 1947 1948 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, 1949 TCGMemOpIdx oi, uintptr_t retaddr) 1950 { 1951 store_helper(env, addr, val, oi, retaddr, MO_UB); 1952 } 1953 1954 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 1955 TCGMemOpIdx oi, uintptr_t retaddr) 1956 { 1957 store_helper(env, addr, val, oi, retaddr, MO_LEUW); 1958 } 1959 1960 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 1961 TCGMemOpIdx oi, uintptr_t retaddr) 1962 { 1963 store_helper(env, addr, val, oi, retaddr, MO_BEUW); 1964 } 1965 1966 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 1967 TCGMemOpIdx oi, uintptr_t retaddr) 1968 { 1969 store_helper(env, addr, val, oi, retaddr, MO_LEUL); 1970 } 1971 1972 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 1973 TCGMemOpIdx oi, uintptr_t retaddr) 1974 { 1975 store_helper(env, addr, val, oi, retaddr, MO_BEUL); 1976 } 1977 1978 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 1979 TCGMemOpIdx oi, uintptr_t retaddr) 1980 { 1981 store_helper(env, addr, val, oi, retaddr, MO_LEQ); 1982 } 1983 1984 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 1985 TCGMemOpIdx oi, uintptr_t retaddr) 1986 { 1987 store_helper(env, addr, val, oi, retaddr, MO_BEQ); 1988 } 1989 1990 /* 1991 * Store Helpers for cpu_ldst.h 1992 */ 1993 1994 static inline void QEMU_ALWAYS_INLINE 1995 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 1996 int mmu_idx, uintptr_t retaddr, MemOp op) 1997 { 1998 TCGMemOpIdx oi; 1999 uint16_t meminfo; 2000 2001 meminfo = trace_mem_get_info(op, mmu_idx, true); 2002 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2003 2004 oi = make_memop_idx(op, mmu_idx); 2005 store_helper(env, addr, val, oi, retaddr, op); 2006 2007 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2008 } 2009 2010 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2011 int mmu_idx, uintptr_t retaddr) 2012 { 2013 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); 2014 } 2015 2016 void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2017 int mmu_idx, uintptr_t retaddr) 2018 { 2019 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW); 2020 } 2021 2022 void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2023 int mmu_idx, uintptr_t retaddr) 2024 { 2025 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL); 2026 } 2027 2028 void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2029 int mmu_idx, uintptr_t retaddr) 2030 { 2031 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ); 2032 } 2033 2034 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, 2035 uint32_t val, uintptr_t retaddr) 2036 { 2037 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2038 } 2039 2040 void cpu_stw_data_ra(CPUArchState *env, target_ulong ptr, 2041 uint32_t val, uintptr_t retaddr) 2042 { 2043 cpu_stw_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2044 } 2045 2046 void cpu_stl_data_ra(CPUArchState *env, target_ulong ptr, 2047 uint32_t val, uintptr_t retaddr) 2048 { 2049 cpu_stl_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2050 } 2051 2052 void cpu_stq_data_ra(CPUArchState *env, target_ulong ptr, 2053 uint64_t val, uintptr_t retaddr) 2054 { 2055 cpu_stq_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2056 } 2057 2058 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2059 { 2060 cpu_stb_data_ra(env, ptr, val, 0); 2061 } 2062 2063 void cpu_stw_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2064 { 2065 cpu_stw_data_ra(env, ptr, val, 0); 2066 } 2067 2068 void cpu_stl_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2069 { 2070 cpu_stl_data_ra(env, ptr, val, 0); 2071 } 2072 2073 void cpu_stq_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2074 { 2075 cpu_stq_data_ra(env, ptr, val, 0); 2076 } 2077 2078 /* First set of helpers allows passing in of OI and RETADDR. This makes 2079 them callable from other helpers. */ 2080 2081 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 2082 #define ATOMIC_NAME(X) \ 2083 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 2084 #define ATOMIC_MMU_DECLS 2085 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) 2086 #define ATOMIC_MMU_CLEANUP 2087 #define ATOMIC_MMU_IDX get_mmuidx(oi) 2088 2089 #include "atomic_common.inc.c" 2090 2091 #define DATA_SIZE 1 2092 #include "atomic_template.h" 2093 2094 #define DATA_SIZE 2 2095 #include "atomic_template.h" 2096 2097 #define DATA_SIZE 4 2098 #include "atomic_template.h" 2099 2100 #ifdef CONFIG_ATOMIC64 2101 #define DATA_SIZE 8 2102 #include "atomic_template.h" 2103 #endif 2104 2105 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 2106 #define DATA_SIZE 16 2107 #include "atomic_template.h" 2108 #endif 2109 2110 /* Second set of helpers are directly callable from TCG as helpers. */ 2111 2112 #undef EXTRA_ARGS 2113 #undef ATOMIC_NAME 2114 #undef ATOMIC_MMU_LOOKUP 2115 #define EXTRA_ARGS , TCGMemOpIdx oi 2116 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 2117 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) 2118 2119 #define DATA_SIZE 1 2120 #include "atomic_template.h" 2121 2122 #define DATA_SIZE 2 2123 #include "atomic_template.h" 2124 2125 #define DATA_SIZE 4 2126 #include "atomic_template.h" 2127 2128 #ifdef CONFIG_ATOMIC64 2129 #define DATA_SIZE 8 2130 #include "atomic_template.h" 2131 #endif 2132 #undef ATOMIC_MMU_IDX 2133 2134 /* Code access functions. */ 2135 2136 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, 2137 TCGMemOpIdx oi, uintptr_t retaddr) 2138 { 2139 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); 2140 } 2141 2142 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) 2143 { 2144 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); 2145 return full_ldub_code(env, addr, oi, 0); 2146 } 2147 2148 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, 2149 TCGMemOpIdx oi, uintptr_t retaddr) 2150 { 2151 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); 2152 } 2153 2154 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) 2155 { 2156 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); 2157 return full_lduw_code(env, addr, oi, 0); 2158 } 2159 2160 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, 2161 TCGMemOpIdx oi, uintptr_t retaddr) 2162 { 2163 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); 2164 } 2165 2166 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) 2167 { 2168 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); 2169 return full_ldl_code(env, addr, oi, 0); 2170 } 2171 2172 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, 2173 TCGMemOpIdx oi, uintptr_t retaddr) 2174 { 2175 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code); 2176 } 2177 2178 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) 2179 { 2180 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true)); 2181 return full_ldq_code(env, addr, oi, 0); 2182 } 2183