1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/address-spaces.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/cputlb.h" 28 #include "exec/memory-internal.h" 29 #include "exec/ram_addr.h" 30 #include "tcg/tcg.h" 31 #include "qemu/error-report.h" 32 #include "exec/log.h" 33 #include "exec/helper-proto.h" 34 #include "qemu/atomic.h" 35 #include "qemu/atomic128.h" 36 #include "translate-all.h" 37 #include "trace-root.h" 38 #include "trace/mem.h" 39 #ifdef CONFIG_PLUGIN 40 #include "qemu/plugin-memory.h" 41 #endif 42 43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 44 /* #define DEBUG_TLB */ 45 /* #define DEBUG_TLB_LOG */ 46 47 #ifdef DEBUG_TLB 48 # define DEBUG_TLB_GATE 1 49 # ifdef DEBUG_TLB_LOG 50 # define DEBUG_TLB_LOG_GATE 1 51 # else 52 # define DEBUG_TLB_LOG_GATE 0 53 # endif 54 #else 55 # define DEBUG_TLB_GATE 0 56 # define DEBUG_TLB_LOG_GATE 0 57 #endif 58 59 #define tlb_debug(fmt, ...) do { \ 60 if (DEBUG_TLB_LOG_GATE) { \ 61 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 62 ## __VA_ARGS__); \ 63 } else if (DEBUG_TLB_GATE) { \ 64 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 65 } \ 66 } while (0) 67 68 #define assert_cpu_is_self(cpu) do { \ 69 if (DEBUG_TLB_GATE) { \ 70 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 71 } \ 72 } while (0) 73 74 /* run_on_cpu_data.target_ptr should always be big enough for a 75 * target_ulong even on 32 bit builds */ 76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 77 78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 79 */ 80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 82 83 static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx) 84 { 85 return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS); 86 } 87 88 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, 89 size_t max_entries) 90 { 91 desc->window_begin_ns = ns; 92 desc->window_max_entries = max_entries; 93 } 94 95 static void tlb_dyn_init(CPUArchState *env) 96 { 97 int i; 98 99 for (i = 0; i < NB_MMU_MODES; i++) { 100 CPUTLBDesc *desc = &env_tlb(env)->d[i]; 101 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 102 103 tlb_window_reset(desc, get_clock_realtime(), 0); 104 desc->n_used_entries = 0; 105 env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 106 env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries); 107 env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries); 108 } 109 } 110 111 /** 112 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 113 * @env: CPU that owns the TLB 114 * @mmu_idx: MMU index of the TLB 115 * 116 * Called with tlb_lock_held. 117 * 118 * We have two main constraints when resizing a TLB: (1) we only resize it 119 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 120 * the array or unnecessarily flushing it), which means we do not control how 121 * frequently the resizing can occur; (2) we don't have access to the guest's 122 * future scheduling decisions, and therefore have to decide the magnitude of 123 * the resize based on past observations. 124 * 125 * In general, a memory-hungry process can benefit greatly from an appropriately 126 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 127 * we just have to make the TLB as large as possible; while an oversized TLB 128 * results in minimal TLB miss rates, it also takes longer to be flushed 129 * (flushes can be _very_ frequent), and the reduced locality can also hurt 130 * performance. 131 * 132 * To achieve near-optimal performance for all kinds of workloads, we: 133 * 134 * 1. Aggressively increase the size of the TLB when the use rate of the 135 * TLB being flushed is high, since it is likely that in the near future this 136 * memory-hungry process will execute again, and its memory hungriness will 137 * probably be similar. 138 * 139 * 2. Slowly reduce the size of the TLB as the use rate declines over a 140 * reasonably large time window. The rationale is that if in such a time window 141 * we have not observed a high TLB use rate, it is likely that we won't observe 142 * it in the near future. In that case, once a time window expires we downsize 143 * the TLB to match the maximum use rate observed in the window. 144 * 145 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 146 * since in that range performance is likely near-optimal. Recall that the TLB 147 * is direct mapped, so we want the use rate to be low (or at least not too 148 * high), since otherwise we are likely to have a significant amount of 149 * conflict misses. 150 */ 151 static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) 152 { 153 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; 154 size_t old_size = tlb_n_entries(env, mmu_idx); 155 size_t rate; 156 size_t new_size = old_size; 157 int64_t now = get_clock_realtime(); 158 int64_t window_len_ms = 100; 159 int64_t window_len_ns = window_len_ms * 1000 * 1000; 160 bool window_expired = now > desc->window_begin_ns + window_len_ns; 161 162 if (desc->n_used_entries > desc->window_max_entries) { 163 desc->window_max_entries = desc->n_used_entries; 164 } 165 rate = desc->window_max_entries * 100 / old_size; 166 167 if (rate > 70) { 168 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 169 } else if (rate < 30 && window_expired) { 170 size_t ceil = pow2ceil(desc->window_max_entries); 171 size_t expected_rate = desc->window_max_entries * 100 / ceil; 172 173 /* 174 * Avoid undersizing when the max number of entries seen is just below 175 * a pow2. For instance, if max_entries == 1025, the expected use rate 176 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 177 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 178 * later. Thus, make sure that the expected use rate remains below 70%. 179 * (and since we double the size, that means the lowest rate we'd 180 * expect to get is 35%, which is still in the 30-70% range where 181 * we consider that the size is appropriate.) 182 */ 183 if (expected_rate > 70) { 184 ceil *= 2; 185 } 186 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 187 } 188 189 if (new_size == old_size) { 190 if (window_expired) { 191 tlb_window_reset(desc, now, desc->n_used_entries); 192 } 193 return; 194 } 195 196 g_free(env_tlb(env)->f[mmu_idx].table); 197 g_free(env_tlb(env)->d[mmu_idx].iotlb); 198 199 tlb_window_reset(desc, now, 0); 200 /* desc->n_used_entries is cleared by the caller */ 201 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 202 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); 203 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); 204 /* 205 * If the allocations fail, try smaller sizes. We just freed some 206 * memory, so going back to half of new_size has a good chance of working. 207 * Increased memory pressure elsewhere in the system might cause the 208 * allocations to fail though, so we progressively reduce the allocation 209 * size, aborting if we cannot even allocate the smallest TLB we support. 210 */ 211 while (env_tlb(env)->f[mmu_idx].table == NULL || 212 env_tlb(env)->d[mmu_idx].iotlb == NULL) { 213 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 214 error_report("%s: %s", __func__, strerror(errno)); 215 abort(); 216 } 217 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 218 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 219 220 g_free(env_tlb(env)->f[mmu_idx].table); 221 g_free(env_tlb(env)->d[mmu_idx].iotlb); 222 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); 223 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); 224 } 225 } 226 227 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) 228 { 229 tlb_mmu_resize_locked(env, mmu_idx); 230 env_tlb(env)->d[mmu_idx].n_used_entries = 0; 231 env_tlb(env)->d[mmu_idx].large_page_addr = -1; 232 env_tlb(env)->d[mmu_idx].large_page_mask = -1; 233 env_tlb(env)->d[mmu_idx].vindex = 0; 234 memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx)); 235 memset(env_tlb(env)->d[mmu_idx].vtable, -1, 236 sizeof(env_tlb(env)->d[0].vtable)); 237 } 238 239 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 240 { 241 env_tlb(env)->d[mmu_idx].n_used_entries++; 242 } 243 244 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 245 { 246 env_tlb(env)->d[mmu_idx].n_used_entries--; 247 } 248 249 void tlb_init(CPUState *cpu) 250 { 251 CPUArchState *env = cpu->env_ptr; 252 253 qemu_spin_init(&env_tlb(env)->c.lock); 254 255 /* Ensure that cpu_reset performs a full flush. */ 256 env_tlb(env)->c.dirty = ALL_MMUIDX_BITS; 257 258 tlb_dyn_init(env); 259 } 260 261 /* flush_all_helper: run fn across all cpus 262 * 263 * If the wait flag is set then the src cpu's helper will be queued as 264 * "safe" work and the loop exited creating a synchronisation point 265 * where all queued work will be finished before execution starts 266 * again. 267 */ 268 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 269 run_on_cpu_data d) 270 { 271 CPUState *cpu; 272 273 CPU_FOREACH(cpu) { 274 if (cpu != src) { 275 async_run_on_cpu(cpu, fn, d); 276 } 277 } 278 } 279 280 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 281 { 282 CPUState *cpu; 283 size_t full = 0, part = 0, elide = 0; 284 285 CPU_FOREACH(cpu) { 286 CPUArchState *env = cpu->env_ptr; 287 288 full += atomic_read(&env_tlb(env)->c.full_flush_count); 289 part += atomic_read(&env_tlb(env)->c.part_flush_count); 290 elide += atomic_read(&env_tlb(env)->c.elide_flush_count); 291 } 292 *pfull = full; 293 *ppart = part; 294 *pelide = elide; 295 } 296 297 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 298 { 299 CPUArchState *env = cpu->env_ptr; 300 uint16_t asked = data.host_int; 301 uint16_t all_dirty, work, to_clean; 302 303 assert_cpu_is_self(cpu); 304 305 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 306 307 qemu_spin_lock(&env_tlb(env)->c.lock); 308 309 all_dirty = env_tlb(env)->c.dirty; 310 to_clean = asked & all_dirty; 311 all_dirty &= ~to_clean; 312 env_tlb(env)->c.dirty = all_dirty; 313 314 for (work = to_clean; work != 0; work &= work - 1) { 315 int mmu_idx = ctz32(work); 316 tlb_flush_one_mmuidx_locked(env, mmu_idx); 317 } 318 319 qemu_spin_unlock(&env_tlb(env)->c.lock); 320 321 cpu_tb_jmp_cache_clear(cpu); 322 323 if (to_clean == ALL_MMUIDX_BITS) { 324 atomic_set(&env_tlb(env)->c.full_flush_count, 325 env_tlb(env)->c.full_flush_count + 1); 326 } else { 327 atomic_set(&env_tlb(env)->c.part_flush_count, 328 env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); 329 if (to_clean != asked) { 330 atomic_set(&env_tlb(env)->c.elide_flush_count, 331 env_tlb(env)->c.elide_flush_count + 332 ctpop16(asked & ~to_clean)); 333 } 334 } 335 } 336 337 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 338 { 339 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 340 341 if (cpu->created && !qemu_cpu_is_self(cpu)) { 342 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 343 RUN_ON_CPU_HOST_INT(idxmap)); 344 } else { 345 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 346 } 347 } 348 349 void tlb_flush(CPUState *cpu) 350 { 351 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 352 } 353 354 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 355 { 356 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 357 358 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 359 360 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 361 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 362 } 363 364 void tlb_flush_all_cpus(CPUState *src_cpu) 365 { 366 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 367 } 368 369 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 370 { 371 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 372 373 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 374 375 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 376 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 377 } 378 379 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 380 { 381 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 382 } 383 384 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 385 target_ulong page) 386 { 387 return tlb_hit_page(tlb_entry->addr_read, page) || 388 tlb_hit_page(tlb_addr_write(tlb_entry), page) || 389 tlb_hit_page(tlb_entry->addr_code, page); 390 } 391 392 /** 393 * tlb_entry_is_empty - return true if the entry is not in use 394 * @te: pointer to CPUTLBEntry 395 */ 396 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 397 { 398 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 399 } 400 401 /* Called with tlb_c.lock held */ 402 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 403 target_ulong page) 404 { 405 if (tlb_hit_page_anyprot(tlb_entry, page)) { 406 memset(tlb_entry, -1, sizeof(*tlb_entry)); 407 return true; 408 } 409 return false; 410 } 411 412 /* Called with tlb_c.lock held */ 413 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 414 target_ulong page) 415 { 416 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; 417 int k; 418 419 assert_cpu_is_self(env_cpu(env)); 420 for (k = 0; k < CPU_VTLB_SIZE; k++) { 421 if (tlb_flush_entry_locked(&d->vtable[k], page)) { 422 tlb_n_used_entries_dec(env, mmu_idx); 423 } 424 } 425 } 426 427 static void tlb_flush_page_locked(CPUArchState *env, int midx, 428 target_ulong page) 429 { 430 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; 431 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; 432 433 /* Check if we need to flush due to large pages. */ 434 if ((page & lp_mask) == lp_addr) { 435 tlb_debug("forcing full flush midx %d (" 436 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 437 midx, lp_addr, lp_mask); 438 tlb_flush_one_mmuidx_locked(env, midx); 439 } else { 440 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 441 tlb_n_used_entries_dec(env, midx); 442 } 443 tlb_flush_vtlb_page_locked(env, midx, page); 444 } 445 } 446 447 /** 448 * tlb_flush_page_by_mmuidx_async_0: 449 * @cpu: cpu on which to flush 450 * @addr: page of virtual address to flush 451 * @idxmap: set of mmu_idx to flush 452 * 453 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page 454 * at @addr from the tlbs indicated by @idxmap from @cpu. 455 */ 456 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, 457 target_ulong addr, 458 uint16_t idxmap) 459 { 460 CPUArchState *env = cpu->env_ptr; 461 int mmu_idx; 462 463 assert_cpu_is_self(cpu); 464 465 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); 466 467 qemu_spin_lock(&env_tlb(env)->c.lock); 468 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 469 if ((idxmap >> mmu_idx) & 1) { 470 tlb_flush_page_locked(env, mmu_idx, addr); 471 } 472 } 473 qemu_spin_unlock(&env_tlb(env)->c.lock); 474 475 tb_flush_jmp_cache(cpu, addr); 476 } 477 478 /** 479 * tlb_flush_page_by_mmuidx_async_1: 480 * @cpu: cpu on which to flush 481 * @data: encoded addr + idxmap 482 * 483 * Helper for tlb_flush_page_by_mmuidx and friends, called through 484 * async_run_on_cpu. The idxmap parameter is encoded in the page 485 * offset of the target_ptr field. This limits the set of mmu_idx 486 * that can be passed via this method. 487 */ 488 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, 489 run_on_cpu_data data) 490 { 491 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; 492 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; 493 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; 494 495 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 496 } 497 498 typedef struct { 499 target_ulong addr; 500 uint16_t idxmap; 501 } TLBFlushPageByMMUIdxData; 502 503 /** 504 * tlb_flush_page_by_mmuidx_async_2: 505 * @cpu: cpu on which to flush 506 * @data: allocated addr + idxmap 507 * 508 * Helper for tlb_flush_page_by_mmuidx and friends, called through 509 * async_run_on_cpu. The addr+idxmap parameters are stored in a 510 * TLBFlushPageByMMUIdxData structure that has been allocated 511 * specifically for this helper. Free the structure when done. 512 */ 513 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, 514 run_on_cpu_data data) 515 { 516 TLBFlushPageByMMUIdxData *d = data.host_ptr; 517 518 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap); 519 g_free(d); 520 } 521 522 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 523 { 524 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 525 526 /* This should already be page aligned */ 527 addr &= TARGET_PAGE_MASK; 528 529 if (qemu_cpu_is_self(cpu)) { 530 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 531 } else if (idxmap < TARGET_PAGE_SIZE) { 532 /* 533 * Most targets have only a few mmu_idx. In the case where 534 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid 535 * allocating memory for this operation. 536 */ 537 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1, 538 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 539 } else { 540 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1); 541 542 /* Otherwise allocate a structure, freed by the worker. */ 543 d->addr = addr; 544 d->idxmap = idxmap; 545 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2, 546 RUN_ON_CPU_HOST_PTR(d)); 547 } 548 } 549 550 void tlb_flush_page(CPUState *cpu, target_ulong addr) 551 { 552 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 553 } 554 555 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 556 uint16_t idxmap) 557 { 558 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 559 560 /* This should already be page aligned */ 561 addr &= TARGET_PAGE_MASK; 562 563 /* 564 * Allocate memory to hold addr+idxmap only when needed. 565 * See tlb_flush_page_by_mmuidx for details. 566 */ 567 if (idxmap < TARGET_PAGE_SIZE) { 568 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 569 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 570 } else { 571 CPUState *dst_cpu; 572 573 /* Allocate a separate data block for each destination cpu. */ 574 CPU_FOREACH(dst_cpu) { 575 if (dst_cpu != src_cpu) { 576 TLBFlushPageByMMUIdxData *d 577 = g_new(TLBFlushPageByMMUIdxData, 1); 578 579 d->addr = addr; 580 d->idxmap = idxmap; 581 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 582 RUN_ON_CPU_HOST_PTR(d)); 583 } 584 } 585 } 586 587 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); 588 } 589 590 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 591 { 592 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 593 } 594 595 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 596 target_ulong addr, 597 uint16_t idxmap) 598 { 599 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 600 601 /* This should already be page aligned */ 602 addr &= TARGET_PAGE_MASK; 603 604 /* 605 * Allocate memory to hold addr+idxmap only when needed. 606 * See tlb_flush_page_by_mmuidx for details. 607 */ 608 if (idxmap < TARGET_PAGE_SIZE) { 609 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 610 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 611 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1, 612 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 613 } else { 614 CPUState *dst_cpu; 615 TLBFlushPageByMMUIdxData *d; 616 617 /* Allocate a separate data block for each destination cpu. */ 618 CPU_FOREACH(dst_cpu) { 619 if (dst_cpu != src_cpu) { 620 d = g_new(TLBFlushPageByMMUIdxData, 1); 621 d->addr = addr; 622 d->idxmap = idxmap; 623 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 624 RUN_ON_CPU_HOST_PTR(d)); 625 } 626 } 627 628 d = g_new(TLBFlushPageByMMUIdxData, 1); 629 d->addr = addr; 630 d->idxmap = idxmap; 631 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2, 632 RUN_ON_CPU_HOST_PTR(d)); 633 } 634 } 635 636 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 637 { 638 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 639 } 640 641 /* update the TLBs so that writes to code in the virtual page 'addr' 642 can be detected */ 643 void tlb_protect_code(ram_addr_t ram_addr) 644 { 645 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 646 DIRTY_MEMORY_CODE); 647 } 648 649 /* update the TLB so that writes in physical page 'phys_addr' are no longer 650 tested for self modifying code */ 651 void tlb_unprotect_code(ram_addr_t ram_addr) 652 { 653 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 654 } 655 656 657 /* 658 * Dirty write flag handling 659 * 660 * When the TCG code writes to a location it looks up the address in 661 * the TLB and uses that data to compute the final address. If any of 662 * the lower bits of the address are set then the slow path is forced. 663 * There are a number of reasons to do this but for normal RAM the 664 * most usual is detecting writes to code regions which may invalidate 665 * generated code. 666 * 667 * Other vCPUs might be reading their TLBs during guest execution, so we update 668 * te->addr_write with atomic_set. We don't need to worry about this for 669 * oversized guests as MTTCG is disabled for them. 670 * 671 * Called with tlb_c.lock held. 672 */ 673 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 674 uintptr_t start, uintptr_t length) 675 { 676 uintptr_t addr = tlb_entry->addr_write; 677 678 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | 679 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { 680 addr &= TARGET_PAGE_MASK; 681 addr += tlb_entry->addend; 682 if ((addr - start) < length) { 683 #if TCG_OVERSIZED_GUEST 684 tlb_entry->addr_write |= TLB_NOTDIRTY; 685 #else 686 atomic_set(&tlb_entry->addr_write, 687 tlb_entry->addr_write | TLB_NOTDIRTY); 688 #endif 689 } 690 } 691 } 692 693 /* 694 * Called with tlb_c.lock held. 695 * Called only from the vCPU context, i.e. the TLB's owner thread. 696 */ 697 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 698 { 699 *d = *s; 700 } 701 702 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 703 * the target vCPU). 704 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 705 * thing actually updated is the target TLB entry ->addr_write flags. 706 */ 707 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 708 { 709 CPUArchState *env; 710 711 int mmu_idx; 712 713 env = cpu->env_ptr; 714 qemu_spin_lock(&env_tlb(env)->c.lock); 715 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 716 unsigned int i; 717 unsigned int n = tlb_n_entries(env, mmu_idx); 718 719 for (i = 0; i < n; i++) { 720 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], 721 start1, length); 722 } 723 724 for (i = 0; i < CPU_VTLB_SIZE; i++) { 725 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], 726 start1, length); 727 } 728 } 729 qemu_spin_unlock(&env_tlb(env)->c.lock); 730 } 731 732 /* Called with tlb_c.lock held */ 733 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 734 target_ulong vaddr) 735 { 736 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 737 tlb_entry->addr_write = vaddr; 738 } 739 } 740 741 /* update the TLB corresponding to virtual page vaddr 742 so that it is no longer dirty */ 743 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 744 { 745 CPUArchState *env = cpu->env_ptr; 746 int mmu_idx; 747 748 assert_cpu_is_self(cpu); 749 750 vaddr &= TARGET_PAGE_MASK; 751 qemu_spin_lock(&env_tlb(env)->c.lock); 752 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 753 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 754 } 755 756 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 757 int k; 758 for (k = 0; k < CPU_VTLB_SIZE; k++) { 759 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); 760 } 761 } 762 qemu_spin_unlock(&env_tlb(env)->c.lock); 763 } 764 765 /* Our TLB does not support large pages, so remember the area covered by 766 large pages and trigger a full TLB flush if these are invalidated. */ 767 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 768 target_ulong vaddr, target_ulong size) 769 { 770 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; 771 target_ulong lp_mask = ~(size - 1); 772 773 if (lp_addr == (target_ulong)-1) { 774 /* No previous large page. */ 775 lp_addr = vaddr; 776 } else { 777 /* Extend the existing region to include the new page. 778 This is a compromise between unnecessary flushes and 779 the cost of maintaining a full variable size TLB. */ 780 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; 781 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 782 lp_mask <<= 1; 783 } 784 } 785 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; 786 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; 787 } 788 789 /* Add a new TLB entry. At most one entry for a given virtual address 790 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 791 * supplied size is only used by tlb_flush_page. 792 * 793 * Called from TCG-generated code, which is under an RCU read-side 794 * critical section. 795 */ 796 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 797 hwaddr paddr, MemTxAttrs attrs, int prot, 798 int mmu_idx, target_ulong size) 799 { 800 CPUArchState *env = cpu->env_ptr; 801 CPUTLB *tlb = env_tlb(env); 802 CPUTLBDesc *desc = &tlb->d[mmu_idx]; 803 MemoryRegionSection *section; 804 unsigned int index; 805 target_ulong address; 806 target_ulong write_address; 807 uintptr_t addend; 808 CPUTLBEntry *te, tn; 809 hwaddr iotlb, xlat, sz, paddr_page; 810 target_ulong vaddr_page; 811 int asidx = cpu_asidx_from_attrs(cpu, attrs); 812 int wp_flags; 813 bool is_ram, is_romd; 814 815 assert_cpu_is_self(cpu); 816 817 if (size <= TARGET_PAGE_SIZE) { 818 sz = TARGET_PAGE_SIZE; 819 } else { 820 tlb_add_large_page(env, mmu_idx, vaddr, size); 821 sz = size; 822 } 823 vaddr_page = vaddr & TARGET_PAGE_MASK; 824 paddr_page = paddr & TARGET_PAGE_MASK; 825 826 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 827 &xlat, &sz, attrs, &prot); 828 assert(sz >= TARGET_PAGE_SIZE); 829 830 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 831 " prot=%x idx=%d\n", 832 vaddr, paddr, prot, mmu_idx); 833 834 address = vaddr_page; 835 if (size < TARGET_PAGE_SIZE) { 836 /* Repeat the MMU check and TLB fill on every access. */ 837 address |= TLB_INVALID_MASK; 838 } 839 if (attrs.byte_swap) { 840 address |= TLB_BSWAP; 841 } 842 843 is_ram = memory_region_is_ram(section->mr); 844 is_romd = memory_region_is_romd(section->mr); 845 846 if (is_ram || is_romd) { 847 /* RAM and ROMD both have associated host memory. */ 848 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 849 } else { 850 /* I/O does not; force the host address to NULL. */ 851 addend = 0; 852 } 853 854 write_address = address; 855 if (is_ram) { 856 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 857 /* 858 * Computing is_clean is expensive; avoid all that unless 859 * the page is actually writable. 860 */ 861 if (prot & PAGE_WRITE) { 862 if (section->readonly) { 863 write_address |= TLB_DISCARD_WRITE; 864 } else if (cpu_physical_memory_is_clean(iotlb)) { 865 write_address |= TLB_NOTDIRTY; 866 } 867 } 868 } else { 869 /* I/O or ROMD */ 870 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; 871 /* 872 * Writes to romd devices must go through MMIO to enable write. 873 * Reads to romd devices go through the ram_ptr found above, 874 * but of course reads to I/O must go through MMIO. 875 */ 876 write_address |= TLB_MMIO; 877 if (!is_romd) { 878 address = write_address; 879 } 880 } 881 882 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, 883 TARGET_PAGE_SIZE); 884 885 index = tlb_index(env, mmu_idx, vaddr_page); 886 te = tlb_entry(env, mmu_idx, vaddr_page); 887 888 /* 889 * Hold the TLB lock for the rest of the function. We could acquire/release 890 * the lock several times in the function, but it is faster to amortize the 891 * acquisition cost by acquiring it just once. Note that this leads to 892 * a longer critical section, but this is not a concern since the TLB lock 893 * is unlikely to be contended. 894 */ 895 qemu_spin_lock(&tlb->c.lock); 896 897 /* Note that the tlb is no longer clean. */ 898 tlb->c.dirty |= 1 << mmu_idx; 899 900 /* Make sure there's no cached translation for the new page. */ 901 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 902 903 /* 904 * Only evict the old entry to the victim tlb if it's for a 905 * different page; otherwise just overwrite the stale data. 906 */ 907 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 908 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; 909 CPUTLBEntry *tv = &desc->vtable[vidx]; 910 911 /* Evict the old entry into the victim tlb. */ 912 copy_tlb_helper_locked(tv, te); 913 desc->viotlb[vidx] = desc->iotlb[index]; 914 tlb_n_used_entries_dec(env, mmu_idx); 915 } 916 917 /* refill the tlb */ 918 /* 919 * At this point iotlb contains a physical section number in the lower 920 * TARGET_PAGE_BITS, and either 921 * + the ram_addr_t of the page base of the target RAM (RAM) 922 * + the offset within section->mr of the page base (I/O, ROMD) 923 * We subtract the vaddr_page (which is page aligned and thus won't 924 * disturb the low bits) to give an offset which can be added to the 925 * (non-page-aligned) vaddr of the eventual memory access to get 926 * the MemoryRegion offset for the access. Note that the vaddr we 927 * subtract here is that of the page base, and not the same as the 928 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 929 */ 930 desc->iotlb[index].addr = iotlb - vaddr_page; 931 desc->iotlb[index].attrs = attrs; 932 933 /* Now calculate the new entry */ 934 tn.addend = addend - vaddr_page; 935 if (prot & PAGE_READ) { 936 tn.addr_read = address; 937 if (wp_flags & BP_MEM_READ) { 938 tn.addr_read |= TLB_WATCHPOINT; 939 } 940 } else { 941 tn.addr_read = -1; 942 } 943 944 if (prot & PAGE_EXEC) { 945 tn.addr_code = address; 946 } else { 947 tn.addr_code = -1; 948 } 949 950 tn.addr_write = -1; 951 if (prot & PAGE_WRITE) { 952 tn.addr_write = write_address; 953 if (prot & PAGE_WRITE_INV) { 954 tn.addr_write |= TLB_INVALID_MASK; 955 } 956 if (wp_flags & BP_MEM_WRITE) { 957 tn.addr_write |= TLB_WATCHPOINT; 958 } 959 } 960 961 copy_tlb_helper_locked(te, &tn); 962 tlb_n_used_entries_inc(env, mmu_idx); 963 qemu_spin_unlock(&tlb->c.lock); 964 } 965 966 /* Add a new TLB entry, but without specifying the memory 967 * transaction attributes to be used. 968 */ 969 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 970 hwaddr paddr, int prot, 971 int mmu_idx, target_ulong size) 972 { 973 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 974 prot, mmu_idx, size); 975 } 976 977 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 978 { 979 ram_addr_t ram_addr; 980 981 ram_addr = qemu_ram_addr_from_host(ptr); 982 if (ram_addr == RAM_ADDR_INVALID) { 983 error_report("Bad ram pointer %p", ptr); 984 abort(); 985 } 986 return ram_addr; 987 } 988 989 /* 990 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the 991 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must 992 * be discarded and looked up again (e.g. via tlb_entry()). 993 */ 994 static void tlb_fill(CPUState *cpu, target_ulong addr, int size, 995 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 996 { 997 CPUClass *cc = CPU_GET_CLASS(cpu); 998 bool ok; 999 1000 /* 1001 * This is not a probe, so only valid return is success; failure 1002 * should result in exception + longjmp to the cpu loop. 1003 */ 1004 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr); 1005 assert(ok); 1006 } 1007 1008 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1009 int mmu_idx, target_ulong addr, uintptr_t retaddr, 1010 MMUAccessType access_type, MemOp op) 1011 { 1012 CPUState *cpu = env_cpu(env); 1013 hwaddr mr_offset; 1014 MemoryRegionSection *section; 1015 MemoryRegion *mr; 1016 uint64_t val; 1017 bool locked = false; 1018 MemTxResult r; 1019 1020 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1021 mr = section->mr; 1022 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1023 cpu->mem_io_pc = retaddr; 1024 if (!cpu->can_do_io) { 1025 cpu_io_recompile(cpu, retaddr); 1026 } 1027 1028 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 1029 qemu_mutex_lock_iothread(); 1030 locked = true; 1031 } 1032 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); 1033 if (r != MEMTX_OK) { 1034 hwaddr physaddr = mr_offset + 1035 section->offset_within_address_space - 1036 section->offset_within_region; 1037 1038 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, 1039 mmu_idx, iotlbentry->attrs, r, retaddr); 1040 } 1041 if (locked) { 1042 qemu_mutex_unlock_iothread(); 1043 } 1044 1045 return val; 1046 } 1047 1048 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1049 int mmu_idx, uint64_t val, target_ulong addr, 1050 uintptr_t retaddr, MemOp op) 1051 { 1052 CPUState *cpu = env_cpu(env); 1053 hwaddr mr_offset; 1054 MemoryRegionSection *section; 1055 MemoryRegion *mr; 1056 bool locked = false; 1057 MemTxResult r; 1058 1059 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1060 mr = section->mr; 1061 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1062 if (!cpu->can_do_io) { 1063 cpu_io_recompile(cpu, retaddr); 1064 } 1065 cpu->mem_io_pc = retaddr; 1066 1067 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 1068 qemu_mutex_lock_iothread(); 1069 locked = true; 1070 } 1071 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); 1072 if (r != MEMTX_OK) { 1073 hwaddr physaddr = mr_offset + 1074 section->offset_within_address_space - 1075 section->offset_within_region; 1076 1077 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), 1078 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, 1079 retaddr); 1080 } 1081 if (locked) { 1082 qemu_mutex_unlock_iothread(); 1083 } 1084 } 1085 1086 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) 1087 { 1088 #if TCG_OVERSIZED_GUEST 1089 return *(target_ulong *)((uintptr_t)entry + ofs); 1090 #else 1091 /* ofs might correspond to .addr_write, so use atomic_read */ 1092 return atomic_read((target_ulong *)((uintptr_t)entry + ofs)); 1093 #endif 1094 } 1095 1096 /* Return true if ADDR is present in the victim tlb, and has been copied 1097 back to the main tlb. */ 1098 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1099 size_t elt_ofs, target_ulong page) 1100 { 1101 size_t vidx; 1102 1103 assert_cpu_is_self(env_cpu(env)); 1104 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1105 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; 1106 target_ulong cmp; 1107 1108 /* elt_ofs might correspond to .addr_write, so use atomic_read */ 1109 #if TCG_OVERSIZED_GUEST 1110 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1111 #else 1112 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1113 #endif 1114 1115 if (cmp == page) { 1116 /* Found entry in victim tlb, swap tlb and iotlb. */ 1117 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index]; 1118 1119 qemu_spin_lock(&env_tlb(env)->c.lock); 1120 copy_tlb_helper_locked(&tmptlb, tlb); 1121 copy_tlb_helper_locked(tlb, vtlb); 1122 copy_tlb_helper_locked(vtlb, &tmptlb); 1123 qemu_spin_unlock(&env_tlb(env)->c.lock); 1124 1125 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1126 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; 1127 tmpio = *io; *io = *vio; *vio = tmpio; 1128 return true; 1129 } 1130 } 1131 return false; 1132 } 1133 1134 /* Macro to call the above, with local variables from the use context. */ 1135 #define VICTIM_TLB_HIT(TY, ADDR) \ 1136 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1137 (ADDR) & TARGET_PAGE_MASK) 1138 1139 /* 1140 * Return a ram_addr_t for the virtual address for execution. 1141 * 1142 * Return -1 if we can't translate and execute from an entire page 1143 * of RAM. This will force us to execute by loading and translating 1144 * one insn at a time, without caching. 1145 * 1146 * NOTE: This function will trigger an exception if the page is 1147 * not executable. 1148 */ 1149 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, 1150 void **hostp) 1151 { 1152 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1153 uintptr_t index = tlb_index(env, mmu_idx, addr); 1154 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1155 void *p; 1156 1157 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1158 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1159 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1160 index = tlb_index(env, mmu_idx, addr); 1161 entry = tlb_entry(env, mmu_idx, addr); 1162 1163 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { 1164 /* 1165 * The MMU protection covers a smaller range than a target 1166 * page, so we must redo the MMU check for every insn. 1167 */ 1168 return -1; 1169 } 1170 } 1171 assert(tlb_hit(entry->addr_code, addr)); 1172 } 1173 1174 if (unlikely(entry->addr_code & TLB_MMIO)) { 1175 /* The region is not backed by RAM. */ 1176 if (hostp) { 1177 *hostp = NULL; 1178 } 1179 return -1; 1180 } 1181 1182 p = (void *)((uintptr_t)addr + entry->addend); 1183 if (hostp) { 1184 *hostp = p; 1185 } 1186 return qemu_ram_addr_from_host_nofail(p); 1187 } 1188 1189 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1190 { 1191 return get_page_addr_code_hostp(env, addr, NULL); 1192 } 1193 1194 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, 1195 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) 1196 { 1197 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; 1198 1199 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); 1200 1201 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 1202 struct page_collection *pages 1203 = page_collection_lock(ram_addr, ram_addr + size); 1204 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); 1205 page_collection_unlock(pages); 1206 } 1207 1208 /* 1209 * Set both VGA and migration bits for simplicity and to remove 1210 * the notdirty callback faster. 1211 */ 1212 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); 1213 1214 /* We remove the notdirty callback only if the code has been flushed. */ 1215 if (!cpu_physical_memory_is_clean(ram_addr)) { 1216 trace_memory_notdirty_set_dirty(mem_vaddr); 1217 tlb_set_dirty(cpu, mem_vaddr); 1218 } 1219 } 1220 1221 /* 1222 * Probe for whether the specified guest access is permitted. If it is not 1223 * permitted then an exception will be taken in the same way as if this 1224 * were a real access (and we will not return). 1225 * If the size is 0 or the page requires I/O access, returns NULL; otherwise, 1226 * returns the address of the host page similar to tlb_vaddr_to_host(). 1227 */ 1228 void *probe_access(CPUArchState *env, target_ulong addr, int size, 1229 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1230 { 1231 uintptr_t index = tlb_index(env, mmu_idx, addr); 1232 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1233 target_ulong tlb_addr; 1234 size_t elt_ofs; 1235 int wp_access; 1236 1237 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 1238 1239 switch (access_type) { 1240 case MMU_DATA_LOAD: 1241 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1242 wp_access = BP_MEM_READ; 1243 break; 1244 case MMU_DATA_STORE: 1245 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1246 wp_access = BP_MEM_WRITE; 1247 break; 1248 case MMU_INST_FETCH: 1249 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1250 wp_access = BP_MEM_READ; 1251 break; 1252 default: 1253 g_assert_not_reached(); 1254 } 1255 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1256 1257 if (unlikely(!tlb_hit(tlb_addr, addr))) { 1258 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, 1259 addr & TARGET_PAGE_MASK)) { 1260 tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr); 1261 /* TLB resize via tlb_fill may have moved the entry. */ 1262 index = tlb_index(env, mmu_idx, addr); 1263 entry = tlb_entry(env, mmu_idx, addr); 1264 } 1265 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1266 } 1267 1268 if (!size) { 1269 return NULL; 1270 } 1271 1272 if (unlikely(tlb_addr & TLB_FLAGS_MASK)) { 1273 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1274 1275 /* Reject I/O access, or other required slow-path. */ 1276 if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { 1277 return NULL; 1278 } 1279 1280 /* Handle watchpoints. */ 1281 if (tlb_addr & TLB_WATCHPOINT) { 1282 cpu_check_watchpoint(env_cpu(env), addr, size, 1283 iotlbentry->attrs, wp_access, retaddr); 1284 } 1285 1286 /* Handle clean RAM pages. */ 1287 if (tlb_addr & TLB_NOTDIRTY) { 1288 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1289 } 1290 } 1291 1292 return (void *)((uintptr_t)addr + entry->addend); 1293 } 1294 1295 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, 1296 MMUAccessType access_type, int mmu_idx) 1297 { 1298 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1299 target_ulong tlb_addr, page; 1300 size_t elt_ofs; 1301 1302 switch (access_type) { 1303 case MMU_DATA_LOAD: 1304 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1305 break; 1306 case MMU_DATA_STORE: 1307 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1308 break; 1309 case MMU_INST_FETCH: 1310 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1311 break; 1312 default: 1313 g_assert_not_reached(); 1314 } 1315 1316 page = addr & TARGET_PAGE_MASK; 1317 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1318 1319 if (!tlb_hit_page(tlb_addr, page)) { 1320 uintptr_t index = tlb_index(env, mmu_idx, addr); 1321 1322 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) { 1323 CPUState *cs = env_cpu(env); 1324 CPUClass *cc = CPU_GET_CLASS(cs); 1325 1326 if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) { 1327 /* Non-faulting page table read failed. */ 1328 return NULL; 1329 } 1330 1331 /* TLB resize via tlb_fill may have moved the entry. */ 1332 entry = tlb_entry(env, mmu_idx, addr); 1333 } 1334 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1335 } 1336 1337 if (tlb_addr & ~TARGET_PAGE_MASK) { 1338 /* IO access */ 1339 return NULL; 1340 } 1341 1342 return (void *)((uintptr_t)addr + entry->addend); 1343 } 1344 1345 1346 #ifdef CONFIG_PLUGIN 1347 /* 1348 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. 1349 * This should be a hot path as we will have just looked this path up 1350 * in the softmmu lookup code (or helper). We don't handle re-fills or 1351 * checking the victim table. This is purely informational. 1352 * 1353 * This should never fail as the memory access being instrumented 1354 * should have just filled the TLB. 1355 */ 1356 1357 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, 1358 bool is_store, struct qemu_plugin_hwaddr *data) 1359 { 1360 CPUArchState *env = cpu->env_ptr; 1361 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1362 uintptr_t index = tlb_index(env, mmu_idx, addr); 1363 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; 1364 1365 if (likely(tlb_hit(tlb_addr, addr))) { 1366 /* We must have an iotlb entry for MMIO */ 1367 if (tlb_addr & TLB_MMIO) { 1368 CPUIOTLBEntry *iotlbentry; 1369 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1370 data->is_io = true; 1371 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1372 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1373 } else { 1374 data->is_io = false; 1375 data->v.ram.hostaddr = addr + tlbe->addend; 1376 } 1377 return true; 1378 } 1379 return false; 1380 } 1381 1382 #endif 1383 1384 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1385 * operations, or io operations to proceed. Return the host address. */ 1386 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1387 TCGMemOpIdx oi, uintptr_t retaddr) 1388 { 1389 size_t mmu_idx = get_mmuidx(oi); 1390 uintptr_t index = tlb_index(env, mmu_idx, addr); 1391 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1392 target_ulong tlb_addr = tlb_addr_write(tlbe); 1393 MemOp mop = get_memop(oi); 1394 int a_bits = get_alignment_bits(mop); 1395 int s_bits = mop & MO_SIZE; 1396 void *hostaddr; 1397 1398 /* Adjust the given return address. */ 1399 retaddr -= GETPC_ADJ; 1400 1401 /* Enforce guest required alignment. */ 1402 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1403 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1404 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1405 mmu_idx, retaddr); 1406 } 1407 1408 /* Enforce qemu required alignment. */ 1409 if (unlikely(addr & ((1 << s_bits) - 1))) { 1410 /* We get here if guest alignment was not requested, 1411 or was not enforced by cpu_unaligned_access above. 1412 We might widen the access and emulate, but for now 1413 mark an exception and exit the cpu loop. */ 1414 goto stop_the_world; 1415 } 1416 1417 /* Check TLB entry and enforce page permissions. */ 1418 if (!tlb_hit(tlb_addr, addr)) { 1419 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1420 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, 1421 mmu_idx, retaddr); 1422 index = tlb_index(env, mmu_idx, addr); 1423 tlbe = tlb_entry(env, mmu_idx, addr); 1424 } 1425 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1426 } 1427 1428 /* Notice an IO access or a needs-MMU-lookup access */ 1429 if (unlikely(tlb_addr & TLB_MMIO)) { 1430 /* There's really nothing that can be done to 1431 support this apart from stop-the-world. */ 1432 goto stop_the_world; 1433 } 1434 1435 /* Let the guest notice RMW on a write-only page. */ 1436 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1437 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1438 mmu_idx, retaddr); 1439 /* Since we don't support reads and writes to different addresses, 1440 and we do have the proper page loaded for write, this shouldn't 1441 ever return. But just in case, handle via stop-the-world. */ 1442 goto stop_the_world; 1443 } 1444 1445 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1446 1447 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1448 notdirty_write(env_cpu(env), addr, 1 << s_bits, 1449 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); 1450 } 1451 1452 return hostaddr; 1453 1454 stop_the_world: 1455 cpu_loop_exit_atomic(env_cpu(env), retaddr); 1456 } 1457 1458 /* 1459 * Load Helpers 1460 * 1461 * We support two different access types. SOFTMMU_CODE_ACCESS is 1462 * specifically for reading instructions from system memory. It is 1463 * called by the translation loop and in some helpers where the code 1464 * is disassembled. It shouldn't be called directly by guest code. 1465 */ 1466 1467 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, 1468 TCGMemOpIdx oi, uintptr_t retaddr); 1469 1470 static inline uint64_t QEMU_ALWAYS_INLINE 1471 load_memop(const void *haddr, MemOp op) 1472 { 1473 switch (op) { 1474 case MO_UB: 1475 return ldub_p(haddr); 1476 case MO_BEUW: 1477 return lduw_be_p(haddr); 1478 case MO_LEUW: 1479 return lduw_le_p(haddr); 1480 case MO_BEUL: 1481 return (uint32_t)ldl_be_p(haddr); 1482 case MO_LEUL: 1483 return (uint32_t)ldl_le_p(haddr); 1484 case MO_BEQ: 1485 return ldq_be_p(haddr); 1486 case MO_LEQ: 1487 return ldq_le_p(haddr); 1488 default: 1489 qemu_build_not_reached(); 1490 } 1491 } 1492 1493 static inline uint64_t QEMU_ALWAYS_INLINE 1494 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, 1495 uintptr_t retaddr, MemOp op, bool code_read, 1496 FullLoadHelper *full_load) 1497 { 1498 uintptr_t mmu_idx = get_mmuidx(oi); 1499 uintptr_t index = tlb_index(env, mmu_idx, addr); 1500 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1501 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1502 const size_t tlb_off = code_read ? 1503 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); 1504 const MMUAccessType access_type = 1505 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; 1506 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1507 void *haddr; 1508 uint64_t res; 1509 size_t size = memop_size(op); 1510 1511 /* Handle CPU specific unaligned behaviour */ 1512 if (addr & ((1 << a_bits) - 1)) { 1513 cpu_unaligned_access(env_cpu(env), addr, access_type, 1514 mmu_idx, retaddr); 1515 } 1516 1517 /* If the TLB entry is for a different page, reload and try again. */ 1518 if (!tlb_hit(tlb_addr, addr)) { 1519 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1520 addr & TARGET_PAGE_MASK)) { 1521 tlb_fill(env_cpu(env), addr, size, 1522 access_type, mmu_idx, retaddr); 1523 index = tlb_index(env, mmu_idx, addr); 1524 entry = tlb_entry(env, mmu_idx, addr); 1525 } 1526 tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1527 tlb_addr &= ~TLB_INVALID_MASK; 1528 } 1529 1530 /* Handle anything that isn't just a straight memory access. */ 1531 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1532 CPUIOTLBEntry *iotlbentry; 1533 bool need_swap; 1534 1535 /* For anything that is unaligned, recurse through full_load. */ 1536 if ((addr & (size - 1)) != 0) { 1537 goto do_unaligned_access; 1538 } 1539 1540 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1541 1542 /* Handle watchpoints. */ 1543 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1544 /* On watchpoint hit, this will longjmp out. */ 1545 cpu_check_watchpoint(env_cpu(env), addr, size, 1546 iotlbentry->attrs, BP_MEM_READ, retaddr); 1547 } 1548 1549 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1550 1551 /* Handle I/O access. */ 1552 if (likely(tlb_addr & TLB_MMIO)) { 1553 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, 1554 access_type, op ^ (need_swap * MO_BSWAP)); 1555 } 1556 1557 haddr = (void *)((uintptr_t)addr + entry->addend); 1558 1559 /* 1560 * Keep these two load_memop separate to ensure that the compiler 1561 * is able to fold the entire function to a single instruction. 1562 * There is a build-time assert inside to remind you of this. ;-) 1563 */ 1564 if (unlikely(need_swap)) { 1565 return load_memop(haddr, op ^ MO_BSWAP); 1566 } 1567 return load_memop(haddr, op); 1568 } 1569 1570 /* Handle slow unaligned access (it spans two pages or IO). */ 1571 if (size > 1 1572 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1573 >= TARGET_PAGE_SIZE)) { 1574 target_ulong addr1, addr2; 1575 uint64_t r1, r2; 1576 unsigned shift; 1577 do_unaligned_access: 1578 addr1 = addr & ~((target_ulong)size - 1); 1579 addr2 = addr1 + size; 1580 r1 = full_load(env, addr1, oi, retaddr); 1581 r2 = full_load(env, addr2, oi, retaddr); 1582 shift = (addr & (size - 1)) * 8; 1583 1584 if (memop_big_endian(op)) { 1585 /* Big-endian combine. */ 1586 res = (r1 << shift) | (r2 >> ((size * 8) - shift)); 1587 } else { 1588 /* Little-endian combine. */ 1589 res = (r1 >> shift) | (r2 << ((size * 8) - shift)); 1590 } 1591 return res & MAKE_64BIT_MASK(0, size * 8); 1592 } 1593 1594 haddr = (void *)((uintptr_t)addr + entry->addend); 1595 return load_memop(haddr, op); 1596 } 1597 1598 /* 1599 * For the benefit of TCG generated code, we want to avoid the 1600 * complication of ABI-specific return type promotion and always 1601 * return a value extended to the register size of the host. This is 1602 * tcg_target_long, except in the case of a 32-bit host and 64-bit 1603 * data, and for that we always have uint64_t. 1604 * 1605 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. 1606 */ 1607 1608 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, 1609 TCGMemOpIdx oi, uintptr_t retaddr) 1610 { 1611 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); 1612 } 1613 1614 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, 1615 TCGMemOpIdx oi, uintptr_t retaddr) 1616 { 1617 return full_ldub_mmu(env, addr, oi, retaddr); 1618 } 1619 1620 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1621 TCGMemOpIdx oi, uintptr_t retaddr) 1622 { 1623 return load_helper(env, addr, oi, retaddr, MO_LEUW, false, 1624 full_le_lduw_mmu); 1625 } 1626 1627 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1628 TCGMemOpIdx oi, uintptr_t retaddr) 1629 { 1630 return full_le_lduw_mmu(env, addr, oi, retaddr); 1631 } 1632 1633 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1634 TCGMemOpIdx oi, uintptr_t retaddr) 1635 { 1636 return load_helper(env, addr, oi, retaddr, MO_BEUW, false, 1637 full_be_lduw_mmu); 1638 } 1639 1640 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1641 TCGMemOpIdx oi, uintptr_t retaddr) 1642 { 1643 return full_be_lduw_mmu(env, addr, oi, retaddr); 1644 } 1645 1646 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1647 TCGMemOpIdx oi, uintptr_t retaddr) 1648 { 1649 return load_helper(env, addr, oi, retaddr, MO_LEUL, false, 1650 full_le_ldul_mmu); 1651 } 1652 1653 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1654 TCGMemOpIdx oi, uintptr_t retaddr) 1655 { 1656 return full_le_ldul_mmu(env, addr, oi, retaddr); 1657 } 1658 1659 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1660 TCGMemOpIdx oi, uintptr_t retaddr) 1661 { 1662 return load_helper(env, addr, oi, retaddr, MO_BEUL, false, 1663 full_be_ldul_mmu); 1664 } 1665 1666 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1667 TCGMemOpIdx oi, uintptr_t retaddr) 1668 { 1669 return full_be_ldul_mmu(env, addr, oi, retaddr); 1670 } 1671 1672 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, 1673 TCGMemOpIdx oi, uintptr_t retaddr) 1674 { 1675 return load_helper(env, addr, oi, retaddr, MO_LEQ, false, 1676 helper_le_ldq_mmu); 1677 } 1678 1679 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, 1680 TCGMemOpIdx oi, uintptr_t retaddr) 1681 { 1682 return load_helper(env, addr, oi, retaddr, MO_BEQ, false, 1683 helper_be_ldq_mmu); 1684 } 1685 1686 /* 1687 * Provide signed versions of the load routines as well. We can of course 1688 * avoid this for 64-bit data, or for 32-bit data on 32-bit host. 1689 */ 1690 1691 1692 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, 1693 TCGMemOpIdx oi, uintptr_t retaddr) 1694 { 1695 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); 1696 } 1697 1698 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, 1699 TCGMemOpIdx oi, uintptr_t retaddr) 1700 { 1701 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); 1702 } 1703 1704 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, 1705 TCGMemOpIdx oi, uintptr_t retaddr) 1706 { 1707 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); 1708 } 1709 1710 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, 1711 TCGMemOpIdx oi, uintptr_t retaddr) 1712 { 1713 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); 1714 } 1715 1716 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, 1717 TCGMemOpIdx oi, uintptr_t retaddr) 1718 { 1719 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); 1720 } 1721 1722 /* 1723 * Load helpers for cpu_ldst.h. 1724 */ 1725 1726 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, 1727 int mmu_idx, uintptr_t retaddr, 1728 MemOp op, FullLoadHelper *full_load) 1729 { 1730 uint16_t meminfo; 1731 TCGMemOpIdx oi; 1732 uint64_t ret; 1733 1734 meminfo = trace_mem_get_info(op, mmu_idx, false); 1735 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 1736 1737 op &= ~MO_SIGN; 1738 oi = make_memop_idx(op, mmu_idx); 1739 ret = full_load(env, addr, oi, retaddr); 1740 1741 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 1742 1743 return ret; 1744 } 1745 1746 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1747 int mmu_idx, uintptr_t ra) 1748 { 1749 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu); 1750 } 1751 1752 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1753 int mmu_idx, uintptr_t ra) 1754 { 1755 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB, 1756 full_ldub_mmu); 1757 } 1758 1759 uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1760 int mmu_idx, uintptr_t ra) 1761 { 1762 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW, 1763 MO_TE == MO_LE 1764 ? full_le_lduw_mmu : full_be_lduw_mmu); 1765 } 1766 1767 int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1768 int mmu_idx, uintptr_t ra) 1769 { 1770 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW, 1771 MO_TE == MO_LE 1772 ? full_le_lduw_mmu : full_be_lduw_mmu); 1773 } 1774 1775 uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1776 int mmu_idx, uintptr_t ra) 1777 { 1778 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL, 1779 MO_TE == MO_LE 1780 ? full_le_ldul_mmu : full_be_ldul_mmu); 1781 } 1782 1783 uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1784 int mmu_idx, uintptr_t ra) 1785 { 1786 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ, 1787 MO_TE == MO_LE 1788 ? helper_le_ldq_mmu : helper_be_ldq_mmu); 1789 } 1790 1791 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, 1792 uintptr_t retaddr) 1793 { 1794 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1795 } 1796 1797 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1798 { 1799 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1800 } 1801 1802 uint32_t cpu_lduw_data_ra(CPUArchState *env, target_ulong ptr, 1803 uintptr_t retaddr) 1804 { 1805 return cpu_lduw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1806 } 1807 1808 int cpu_ldsw_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1809 { 1810 return cpu_ldsw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1811 } 1812 1813 uint32_t cpu_ldl_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1814 { 1815 return cpu_ldl_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1816 } 1817 1818 uint64_t cpu_ldq_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1819 { 1820 return cpu_ldq_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1821 } 1822 1823 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) 1824 { 1825 return cpu_ldub_data_ra(env, ptr, 0); 1826 } 1827 1828 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) 1829 { 1830 return cpu_ldsb_data_ra(env, ptr, 0); 1831 } 1832 1833 uint32_t cpu_lduw_data(CPUArchState *env, target_ulong ptr) 1834 { 1835 return cpu_lduw_data_ra(env, ptr, 0); 1836 } 1837 1838 int cpu_ldsw_data(CPUArchState *env, target_ulong ptr) 1839 { 1840 return cpu_ldsw_data_ra(env, ptr, 0); 1841 } 1842 1843 uint32_t cpu_ldl_data(CPUArchState *env, target_ulong ptr) 1844 { 1845 return cpu_ldl_data_ra(env, ptr, 0); 1846 } 1847 1848 uint64_t cpu_ldq_data(CPUArchState *env, target_ulong ptr) 1849 { 1850 return cpu_ldq_data_ra(env, ptr, 0); 1851 } 1852 1853 /* 1854 * Store Helpers 1855 */ 1856 1857 static inline void QEMU_ALWAYS_INLINE 1858 store_memop(void *haddr, uint64_t val, MemOp op) 1859 { 1860 switch (op) { 1861 case MO_UB: 1862 stb_p(haddr, val); 1863 break; 1864 case MO_BEUW: 1865 stw_be_p(haddr, val); 1866 break; 1867 case MO_LEUW: 1868 stw_le_p(haddr, val); 1869 break; 1870 case MO_BEUL: 1871 stl_be_p(haddr, val); 1872 break; 1873 case MO_LEUL: 1874 stl_le_p(haddr, val); 1875 break; 1876 case MO_BEQ: 1877 stq_be_p(haddr, val); 1878 break; 1879 case MO_LEQ: 1880 stq_le_p(haddr, val); 1881 break; 1882 default: 1883 qemu_build_not_reached(); 1884 } 1885 } 1886 1887 static inline void QEMU_ALWAYS_INLINE 1888 store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 1889 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) 1890 { 1891 uintptr_t mmu_idx = get_mmuidx(oi); 1892 uintptr_t index = tlb_index(env, mmu_idx, addr); 1893 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1894 target_ulong tlb_addr = tlb_addr_write(entry); 1895 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 1896 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1897 void *haddr; 1898 size_t size = memop_size(op); 1899 1900 /* Handle CPU specific unaligned behaviour */ 1901 if (addr & ((1 << a_bits) - 1)) { 1902 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1903 mmu_idx, retaddr); 1904 } 1905 1906 /* If the TLB entry is for a different page, reload and try again. */ 1907 if (!tlb_hit(tlb_addr, addr)) { 1908 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1909 addr & TARGET_PAGE_MASK)) { 1910 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, 1911 mmu_idx, retaddr); 1912 index = tlb_index(env, mmu_idx, addr); 1913 entry = tlb_entry(env, mmu_idx, addr); 1914 } 1915 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; 1916 } 1917 1918 /* Handle anything that isn't just a straight memory access. */ 1919 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1920 CPUIOTLBEntry *iotlbentry; 1921 bool need_swap; 1922 1923 /* For anything that is unaligned, recurse through byte stores. */ 1924 if ((addr & (size - 1)) != 0) { 1925 goto do_unaligned_access; 1926 } 1927 1928 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1929 1930 /* Handle watchpoints. */ 1931 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1932 /* On watchpoint hit, this will longjmp out. */ 1933 cpu_check_watchpoint(env_cpu(env), addr, size, 1934 iotlbentry->attrs, BP_MEM_WRITE, retaddr); 1935 } 1936 1937 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1938 1939 /* Handle I/O access. */ 1940 if (tlb_addr & TLB_MMIO) { 1941 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, 1942 op ^ (need_swap * MO_BSWAP)); 1943 return; 1944 } 1945 1946 /* Ignore writes to ROM. */ 1947 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { 1948 return; 1949 } 1950 1951 /* Handle clean RAM pages. */ 1952 if (tlb_addr & TLB_NOTDIRTY) { 1953 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1954 } 1955 1956 haddr = (void *)((uintptr_t)addr + entry->addend); 1957 1958 /* 1959 * Keep these two store_memop separate to ensure that the compiler 1960 * is able to fold the entire function to a single instruction. 1961 * There is a build-time assert inside to remind you of this. ;-) 1962 */ 1963 if (unlikely(need_swap)) { 1964 store_memop(haddr, val, op ^ MO_BSWAP); 1965 } else { 1966 store_memop(haddr, val, op); 1967 } 1968 return; 1969 } 1970 1971 /* Handle slow unaligned access (it spans two pages or IO). */ 1972 if (size > 1 1973 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1974 >= TARGET_PAGE_SIZE)) { 1975 int i; 1976 uintptr_t index2; 1977 CPUTLBEntry *entry2; 1978 target_ulong page2, tlb_addr2; 1979 size_t size2; 1980 1981 do_unaligned_access: 1982 /* 1983 * Ensure the second page is in the TLB. Note that the first page 1984 * is already guaranteed to be filled, and that the second page 1985 * cannot evict the first. 1986 */ 1987 page2 = (addr + size) & TARGET_PAGE_MASK; 1988 size2 = (addr + size) & ~TARGET_PAGE_MASK; 1989 index2 = tlb_index(env, mmu_idx, page2); 1990 entry2 = tlb_entry(env, mmu_idx, page2); 1991 tlb_addr2 = tlb_addr_write(entry2); 1992 if (!tlb_hit_page(tlb_addr2, page2)) { 1993 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { 1994 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, 1995 mmu_idx, retaddr); 1996 index2 = tlb_index(env, mmu_idx, page2); 1997 entry2 = tlb_entry(env, mmu_idx, page2); 1998 } 1999 tlb_addr2 = tlb_addr_write(entry2); 2000 } 2001 2002 /* 2003 * Handle watchpoints. Since this may trap, all checks 2004 * must happen before any store. 2005 */ 2006 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2007 cpu_check_watchpoint(env_cpu(env), addr, size - size2, 2008 env_tlb(env)->d[mmu_idx].iotlb[index].attrs, 2009 BP_MEM_WRITE, retaddr); 2010 } 2011 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { 2012 cpu_check_watchpoint(env_cpu(env), page2, size2, 2013 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, 2014 BP_MEM_WRITE, retaddr); 2015 } 2016 2017 /* 2018 * XXX: not efficient, but simple. 2019 * This loop must go in the forward direction to avoid issues 2020 * with self-modifying code in Windows 64-bit. 2021 */ 2022 for (i = 0; i < size; ++i) { 2023 uint8_t val8; 2024 if (memop_big_endian(op)) { 2025 /* Big-endian extract. */ 2026 val8 = val >> (((size - 1) * 8) - (i * 8)); 2027 } else { 2028 /* Little-endian extract. */ 2029 val8 = val >> (i * 8); 2030 } 2031 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2032 } 2033 return; 2034 } 2035 2036 haddr = (void *)((uintptr_t)addr + entry->addend); 2037 store_memop(haddr, val, op); 2038 } 2039 2040 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, 2041 TCGMemOpIdx oi, uintptr_t retaddr) 2042 { 2043 store_helper(env, addr, val, oi, retaddr, MO_UB); 2044 } 2045 2046 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2047 TCGMemOpIdx oi, uintptr_t retaddr) 2048 { 2049 store_helper(env, addr, val, oi, retaddr, MO_LEUW); 2050 } 2051 2052 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2053 TCGMemOpIdx oi, uintptr_t retaddr) 2054 { 2055 store_helper(env, addr, val, oi, retaddr, MO_BEUW); 2056 } 2057 2058 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2059 TCGMemOpIdx oi, uintptr_t retaddr) 2060 { 2061 store_helper(env, addr, val, oi, retaddr, MO_LEUL); 2062 } 2063 2064 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2065 TCGMemOpIdx oi, uintptr_t retaddr) 2066 { 2067 store_helper(env, addr, val, oi, retaddr, MO_BEUL); 2068 } 2069 2070 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2071 TCGMemOpIdx oi, uintptr_t retaddr) 2072 { 2073 store_helper(env, addr, val, oi, retaddr, MO_LEQ); 2074 } 2075 2076 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2077 TCGMemOpIdx oi, uintptr_t retaddr) 2078 { 2079 store_helper(env, addr, val, oi, retaddr, MO_BEQ); 2080 } 2081 2082 /* 2083 * Store Helpers for cpu_ldst.h 2084 */ 2085 2086 static inline void QEMU_ALWAYS_INLINE 2087 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2088 int mmu_idx, uintptr_t retaddr, MemOp op) 2089 { 2090 TCGMemOpIdx oi; 2091 uint16_t meminfo; 2092 2093 meminfo = trace_mem_get_info(op, mmu_idx, true); 2094 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2095 2096 oi = make_memop_idx(op, mmu_idx); 2097 store_helper(env, addr, val, oi, retaddr, op); 2098 2099 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2100 } 2101 2102 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2103 int mmu_idx, uintptr_t retaddr) 2104 { 2105 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); 2106 } 2107 2108 void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2109 int mmu_idx, uintptr_t retaddr) 2110 { 2111 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW); 2112 } 2113 2114 void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2115 int mmu_idx, uintptr_t retaddr) 2116 { 2117 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL); 2118 } 2119 2120 void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2121 int mmu_idx, uintptr_t retaddr) 2122 { 2123 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ); 2124 } 2125 2126 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, 2127 uint32_t val, uintptr_t retaddr) 2128 { 2129 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2130 } 2131 2132 void cpu_stw_data_ra(CPUArchState *env, target_ulong ptr, 2133 uint32_t val, uintptr_t retaddr) 2134 { 2135 cpu_stw_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2136 } 2137 2138 void cpu_stl_data_ra(CPUArchState *env, target_ulong ptr, 2139 uint32_t val, uintptr_t retaddr) 2140 { 2141 cpu_stl_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2142 } 2143 2144 void cpu_stq_data_ra(CPUArchState *env, target_ulong ptr, 2145 uint64_t val, uintptr_t retaddr) 2146 { 2147 cpu_stq_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2148 } 2149 2150 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2151 { 2152 cpu_stb_data_ra(env, ptr, val, 0); 2153 } 2154 2155 void cpu_stw_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2156 { 2157 cpu_stw_data_ra(env, ptr, val, 0); 2158 } 2159 2160 void cpu_stl_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2161 { 2162 cpu_stl_data_ra(env, ptr, val, 0); 2163 } 2164 2165 void cpu_stq_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2166 { 2167 cpu_stq_data_ra(env, ptr, val, 0); 2168 } 2169 2170 /* First set of helpers allows passing in of OI and RETADDR. This makes 2171 them callable from other helpers. */ 2172 2173 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 2174 #define ATOMIC_NAME(X) \ 2175 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 2176 #define ATOMIC_MMU_DECLS 2177 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) 2178 #define ATOMIC_MMU_CLEANUP 2179 #define ATOMIC_MMU_IDX get_mmuidx(oi) 2180 2181 #include "atomic_common.inc.c" 2182 2183 #define DATA_SIZE 1 2184 #include "atomic_template.h" 2185 2186 #define DATA_SIZE 2 2187 #include "atomic_template.h" 2188 2189 #define DATA_SIZE 4 2190 #include "atomic_template.h" 2191 2192 #ifdef CONFIG_ATOMIC64 2193 #define DATA_SIZE 8 2194 #include "atomic_template.h" 2195 #endif 2196 2197 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 2198 #define DATA_SIZE 16 2199 #include "atomic_template.h" 2200 #endif 2201 2202 /* Second set of helpers are directly callable from TCG as helpers. */ 2203 2204 #undef EXTRA_ARGS 2205 #undef ATOMIC_NAME 2206 #undef ATOMIC_MMU_LOOKUP 2207 #define EXTRA_ARGS , TCGMemOpIdx oi 2208 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 2209 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) 2210 2211 #define DATA_SIZE 1 2212 #include "atomic_template.h" 2213 2214 #define DATA_SIZE 2 2215 #include "atomic_template.h" 2216 2217 #define DATA_SIZE 4 2218 #include "atomic_template.h" 2219 2220 #ifdef CONFIG_ATOMIC64 2221 #define DATA_SIZE 8 2222 #include "atomic_template.h" 2223 #endif 2224 #undef ATOMIC_MMU_IDX 2225 2226 /* Code access functions. */ 2227 2228 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, 2229 TCGMemOpIdx oi, uintptr_t retaddr) 2230 { 2231 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); 2232 } 2233 2234 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) 2235 { 2236 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); 2237 return full_ldub_code(env, addr, oi, 0); 2238 } 2239 2240 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, 2241 TCGMemOpIdx oi, uintptr_t retaddr) 2242 { 2243 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); 2244 } 2245 2246 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) 2247 { 2248 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); 2249 return full_lduw_code(env, addr, oi, 0); 2250 } 2251 2252 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, 2253 TCGMemOpIdx oi, uintptr_t retaddr) 2254 { 2255 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); 2256 } 2257 2258 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) 2259 { 2260 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); 2261 return full_ldl_code(env, addr, oi, 0); 2262 } 2263 2264 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, 2265 TCGMemOpIdx oi, uintptr_t retaddr) 2266 { 2267 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code); 2268 } 2269 2270 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) 2271 { 2272 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true)); 2273 return full_ldq_code(env, addr, oi, 0); 2274 } 2275