1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/address-spaces.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/cputlb.h" 28 #include "exec/memory-internal.h" 29 #include "exec/ram_addr.h" 30 #include "tcg/tcg.h" 31 #include "qemu/error-report.h" 32 #include "exec/log.h" 33 #include "exec/helper-proto.h" 34 #include "qemu/atomic.h" 35 #include "qemu/atomic128.h" 36 #include "translate-all.h" 37 #include "trace-root.h" 38 #include "trace/mem.h" 39 #ifdef CONFIG_PLUGIN 40 #include "qemu/plugin-memory.h" 41 #endif 42 43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 44 /* #define DEBUG_TLB */ 45 /* #define DEBUG_TLB_LOG */ 46 47 #ifdef DEBUG_TLB 48 # define DEBUG_TLB_GATE 1 49 # ifdef DEBUG_TLB_LOG 50 # define DEBUG_TLB_LOG_GATE 1 51 # else 52 # define DEBUG_TLB_LOG_GATE 0 53 # endif 54 #else 55 # define DEBUG_TLB_GATE 0 56 # define DEBUG_TLB_LOG_GATE 0 57 #endif 58 59 #define tlb_debug(fmt, ...) do { \ 60 if (DEBUG_TLB_LOG_GATE) { \ 61 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 62 ## __VA_ARGS__); \ 63 } else if (DEBUG_TLB_GATE) { \ 64 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 65 } \ 66 } while (0) 67 68 #define assert_cpu_is_self(cpu) do { \ 69 if (DEBUG_TLB_GATE) { \ 70 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 71 } \ 72 } while (0) 73 74 /* run_on_cpu_data.target_ptr should always be big enough for a 75 * target_ulong even on 32 bit builds */ 76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 77 78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 79 */ 80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 82 83 static inline size_t tlb_n_entries(CPUTLBDescFast *fast) 84 { 85 return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1; 86 } 87 88 static inline size_t sizeof_tlb(CPUTLBDescFast *fast) 89 { 90 return fast->mask + (1 << CPU_TLB_ENTRY_BITS); 91 } 92 93 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, 94 size_t max_entries) 95 { 96 desc->window_begin_ns = ns; 97 desc->window_max_entries = max_entries; 98 } 99 100 static void tlb_dyn_init(CPUArchState *env) 101 { 102 int i; 103 104 for (i = 0; i < NB_MMU_MODES; i++) { 105 CPUTLBDesc *desc = &env_tlb(env)->d[i]; 106 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 107 108 tlb_window_reset(desc, get_clock_realtime(), 0); 109 desc->n_used_entries = 0; 110 env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 111 env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries); 112 env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries); 113 } 114 } 115 116 /** 117 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 118 * @env: CPU that owns the TLB 119 * @mmu_idx: MMU index of the TLB 120 * 121 * Called with tlb_lock_held. 122 * 123 * We have two main constraints when resizing a TLB: (1) we only resize it 124 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 125 * the array or unnecessarily flushing it), which means we do not control how 126 * frequently the resizing can occur; (2) we don't have access to the guest's 127 * future scheduling decisions, and therefore have to decide the magnitude of 128 * the resize based on past observations. 129 * 130 * In general, a memory-hungry process can benefit greatly from an appropriately 131 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 132 * we just have to make the TLB as large as possible; while an oversized TLB 133 * results in minimal TLB miss rates, it also takes longer to be flushed 134 * (flushes can be _very_ frequent), and the reduced locality can also hurt 135 * performance. 136 * 137 * To achieve near-optimal performance for all kinds of workloads, we: 138 * 139 * 1. Aggressively increase the size of the TLB when the use rate of the 140 * TLB being flushed is high, since it is likely that in the near future this 141 * memory-hungry process will execute again, and its memory hungriness will 142 * probably be similar. 143 * 144 * 2. Slowly reduce the size of the TLB as the use rate declines over a 145 * reasonably large time window. The rationale is that if in such a time window 146 * we have not observed a high TLB use rate, it is likely that we won't observe 147 * it in the near future. In that case, once a time window expires we downsize 148 * the TLB to match the maximum use rate observed in the window. 149 * 150 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 151 * since in that range performance is likely near-optimal. Recall that the TLB 152 * is direct mapped, so we want the use rate to be low (or at least not too 153 * high), since otherwise we are likely to have a significant amount of 154 * conflict misses. 155 */ 156 static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) 157 { 158 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; 159 size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); 160 size_t rate; 161 size_t new_size = old_size; 162 int64_t now = get_clock_realtime(); 163 int64_t window_len_ms = 100; 164 int64_t window_len_ns = window_len_ms * 1000 * 1000; 165 bool window_expired = now > desc->window_begin_ns + window_len_ns; 166 167 if (desc->n_used_entries > desc->window_max_entries) { 168 desc->window_max_entries = desc->n_used_entries; 169 } 170 rate = desc->window_max_entries * 100 / old_size; 171 172 if (rate > 70) { 173 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 174 } else if (rate < 30 && window_expired) { 175 size_t ceil = pow2ceil(desc->window_max_entries); 176 size_t expected_rate = desc->window_max_entries * 100 / ceil; 177 178 /* 179 * Avoid undersizing when the max number of entries seen is just below 180 * a pow2. For instance, if max_entries == 1025, the expected use rate 181 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 182 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 183 * later. Thus, make sure that the expected use rate remains below 70%. 184 * (and since we double the size, that means the lowest rate we'd 185 * expect to get is 35%, which is still in the 30-70% range where 186 * we consider that the size is appropriate.) 187 */ 188 if (expected_rate > 70) { 189 ceil *= 2; 190 } 191 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 192 } 193 194 if (new_size == old_size) { 195 if (window_expired) { 196 tlb_window_reset(desc, now, desc->n_used_entries); 197 } 198 return; 199 } 200 201 g_free(env_tlb(env)->f[mmu_idx].table); 202 g_free(env_tlb(env)->d[mmu_idx].iotlb); 203 204 tlb_window_reset(desc, now, 0); 205 /* desc->n_used_entries is cleared by the caller */ 206 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 207 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); 208 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); 209 /* 210 * If the allocations fail, try smaller sizes. We just freed some 211 * memory, so going back to half of new_size has a good chance of working. 212 * Increased memory pressure elsewhere in the system might cause the 213 * allocations to fail though, so we progressively reduce the allocation 214 * size, aborting if we cannot even allocate the smallest TLB we support. 215 */ 216 while (env_tlb(env)->f[mmu_idx].table == NULL || 217 env_tlb(env)->d[mmu_idx].iotlb == NULL) { 218 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 219 error_report("%s: %s", __func__, strerror(errno)); 220 abort(); 221 } 222 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 223 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 224 225 g_free(env_tlb(env)->f[mmu_idx].table); 226 g_free(env_tlb(env)->d[mmu_idx].iotlb); 227 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); 228 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); 229 } 230 } 231 232 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) 233 { 234 tlb_mmu_resize_locked(env, mmu_idx); 235 env_tlb(env)->d[mmu_idx].n_used_entries = 0; 236 env_tlb(env)->d[mmu_idx].large_page_addr = -1; 237 env_tlb(env)->d[mmu_idx].large_page_mask = -1; 238 env_tlb(env)->d[mmu_idx].vindex = 0; 239 memset(env_tlb(env)->f[mmu_idx].table, -1, 240 sizeof_tlb(&env_tlb(env)->f[mmu_idx])); 241 memset(env_tlb(env)->d[mmu_idx].vtable, -1, 242 sizeof(env_tlb(env)->d[0].vtable)); 243 } 244 245 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 246 { 247 env_tlb(env)->d[mmu_idx].n_used_entries++; 248 } 249 250 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 251 { 252 env_tlb(env)->d[mmu_idx].n_used_entries--; 253 } 254 255 void tlb_init(CPUState *cpu) 256 { 257 CPUArchState *env = cpu->env_ptr; 258 259 qemu_spin_init(&env_tlb(env)->c.lock); 260 261 /* Ensure that cpu_reset performs a full flush. */ 262 env_tlb(env)->c.dirty = ALL_MMUIDX_BITS; 263 264 tlb_dyn_init(env); 265 } 266 267 /* flush_all_helper: run fn across all cpus 268 * 269 * If the wait flag is set then the src cpu's helper will be queued as 270 * "safe" work and the loop exited creating a synchronisation point 271 * where all queued work will be finished before execution starts 272 * again. 273 */ 274 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 275 run_on_cpu_data d) 276 { 277 CPUState *cpu; 278 279 CPU_FOREACH(cpu) { 280 if (cpu != src) { 281 async_run_on_cpu(cpu, fn, d); 282 } 283 } 284 } 285 286 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 287 { 288 CPUState *cpu; 289 size_t full = 0, part = 0, elide = 0; 290 291 CPU_FOREACH(cpu) { 292 CPUArchState *env = cpu->env_ptr; 293 294 full += atomic_read(&env_tlb(env)->c.full_flush_count); 295 part += atomic_read(&env_tlb(env)->c.part_flush_count); 296 elide += atomic_read(&env_tlb(env)->c.elide_flush_count); 297 } 298 *pfull = full; 299 *ppart = part; 300 *pelide = elide; 301 } 302 303 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 304 { 305 CPUArchState *env = cpu->env_ptr; 306 uint16_t asked = data.host_int; 307 uint16_t all_dirty, work, to_clean; 308 309 assert_cpu_is_self(cpu); 310 311 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 312 313 qemu_spin_lock(&env_tlb(env)->c.lock); 314 315 all_dirty = env_tlb(env)->c.dirty; 316 to_clean = asked & all_dirty; 317 all_dirty &= ~to_clean; 318 env_tlb(env)->c.dirty = all_dirty; 319 320 for (work = to_clean; work != 0; work &= work - 1) { 321 int mmu_idx = ctz32(work); 322 tlb_flush_one_mmuidx_locked(env, mmu_idx); 323 } 324 325 qemu_spin_unlock(&env_tlb(env)->c.lock); 326 327 cpu_tb_jmp_cache_clear(cpu); 328 329 if (to_clean == ALL_MMUIDX_BITS) { 330 atomic_set(&env_tlb(env)->c.full_flush_count, 331 env_tlb(env)->c.full_flush_count + 1); 332 } else { 333 atomic_set(&env_tlb(env)->c.part_flush_count, 334 env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); 335 if (to_clean != asked) { 336 atomic_set(&env_tlb(env)->c.elide_flush_count, 337 env_tlb(env)->c.elide_flush_count + 338 ctpop16(asked & ~to_clean)); 339 } 340 } 341 } 342 343 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 344 { 345 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 346 347 if (cpu->created && !qemu_cpu_is_self(cpu)) { 348 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 349 RUN_ON_CPU_HOST_INT(idxmap)); 350 } else { 351 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 352 } 353 } 354 355 void tlb_flush(CPUState *cpu) 356 { 357 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 358 } 359 360 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 361 { 362 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 363 364 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 365 366 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 367 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 368 } 369 370 void tlb_flush_all_cpus(CPUState *src_cpu) 371 { 372 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 373 } 374 375 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 376 { 377 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 378 379 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 380 381 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 382 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 383 } 384 385 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 386 { 387 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 388 } 389 390 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 391 target_ulong page) 392 { 393 return tlb_hit_page(tlb_entry->addr_read, page) || 394 tlb_hit_page(tlb_addr_write(tlb_entry), page) || 395 tlb_hit_page(tlb_entry->addr_code, page); 396 } 397 398 /** 399 * tlb_entry_is_empty - return true if the entry is not in use 400 * @te: pointer to CPUTLBEntry 401 */ 402 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 403 { 404 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 405 } 406 407 /* Called with tlb_c.lock held */ 408 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 409 target_ulong page) 410 { 411 if (tlb_hit_page_anyprot(tlb_entry, page)) { 412 memset(tlb_entry, -1, sizeof(*tlb_entry)); 413 return true; 414 } 415 return false; 416 } 417 418 /* Called with tlb_c.lock held */ 419 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 420 target_ulong page) 421 { 422 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; 423 int k; 424 425 assert_cpu_is_self(env_cpu(env)); 426 for (k = 0; k < CPU_VTLB_SIZE; k++) { 427 if (tlb_flush_entry_locked(&d->vtable[k], page)) { 428 tlb_n_used_entries_dec(env, mmu_idx); 429 } 430 } 431 } 432 433 static void tlb_flush_page_locked(CPUArchState *env, int midx, 434 target_ulong page) 435 { 436 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; 437 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; 438 439 /* Check if we need to flush due to large pages. */ 440 if ((page & lp_mask) == lp_addr) { 441 tlb_debug("forcing full flush midx %d (" 442 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 443 midx, lp_addr, lp_mask); 444 tlb_flush_one_mmuidx_locked(env, midx); 445 } else { 446 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 447 tlb_n_used_entries_dec(env, midx); 448 } 449 tlb_flush_vtlb_page_locked(env, midx, page); 450 } 451 } 452 453 /** 454 * tlb_flush_page_by_mmuidx_async_0: 455 * @cpu: cpu on which to flush 456 * @addr: page of virtual address to flush 457 * @idxmap: set of mmu_idx to flush 458 * 459 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page 460 * at @addr from the tlbs indicated by @idxmap from @cpu. 461 */ 462 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, 463 target_ulong addr, 464 uint16_t idxmap) 465 { 466 CPUArchState *env = cpu->env_ptr; 467 int mmu_idx; 468 469 assert_cpu_is_self(cpu); 470 471 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); 472 473 qemu_spin_lock(&env_tlb(env)->c.lock); 474 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 475 if ((idxmap >> mmu_idx) & 1) { 476 tlb_flush_page_locked(env, mmu_idx, addr); 477 } 478 } 479 qemu_spin_unlock(&env_tlb(env)->c.lock); 480 481 tb_flush_jmp_cache(cpu, addr); 482 } 483 484 /** 485 * tlb_flush_page_by_mmuidx_async_1: 486 * @cpu: cpu on which to flush 487 * @data: encoded addr + idxmap 488 * 489 * Helper for tlb_flush_page_by_mmuidx and friends, called through 490 * async_run_on_cpu. The idxmap parameter is encoded in the page 491 * offset of the target_ptr field. This limits the set of mmu_idx 492 * that can be passed via this method. 493 */ 494 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, 495 run_on_cpu_data data) 496 { 497 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; 498 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; 499 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; 500 501 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 502 } 503 504 typedef struct { 505 target_ulong addr; 506 uint16_t idxmap; 507 } TLBFlushPageByMMUIdxData; 508 509 /** 510 * tlb_flush_page_by_mmuidx_async_2: 511 * @cpu: cpu on which to flush 512 * @data: allocated addr + idxmap 513 * 514 * Helper for tlb_flush_page_by_mmuidx and friends, called through 515 * async_run_on_cpu. The addr+idxmap parameters are stored in a 516 * TLBFlushPageByMMUIdxData structure that has been allocated 517 * specifically for this helper. Free the structure when done. 518 */ 519 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, 520 run_on_cpu_data data) 521 { 522 TLBFlushPageByMMUIdxData *d = data.host_ptr; 523 524 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap); 525 g_free(d); 526 } 527 528 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 529 { 530 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 531 532 /* This should already be page aligned */ 533 addr &= TARGET_PAGE_MASK; 534 535 if (qemu_cpu_is_self(cpu)) { 536 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 537 } else if (idxmap < TARGET_PAGE_SIZE) { 538 /* 539 * Most targets have only a few mmu_idx. In the case where 540 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid 541 * allocating memory for this operation. 542 */ 543 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1, 544 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 545 } else { 546 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1); 547 548 /* Otherwise allocate a structure, freed by the worker. */ 549 d->addr = addr; 550 d->idxmap = idxmap; 551 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2, 552 RUN_ON_CPU_HOST_PTR(d)); 553 } 554 } 555 556 void tlb_flush_page(CPUState *cpu, target_ulong addr) 557 { 558 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 559 } 560 561 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 562 uint16_t idxmap) 563 { 564 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 565 566 /* This should already be page aligned */ 567 addr &= TARGET_PAGE_MASK; 568 569 /* 570 * Allocate memory to hold addr+idxmap only when needed. 571 * See tlb_flush_page_by_mmuidx for details. 572 */ 573 if (idxmap < TARGET_PAGE_SIZE) { 574 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 575 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 576 } else { 577 CPUState *dst_cpu; 578 579 /* Allocate a separate data block for each destination cpu. */ 580 CPU_FOREACH(dst_cpu) { 581 if (dst_cpu != src_cpu) { 582 TLBFlushPageByMMUIdxData *d 583 = g_new(TLBFlushPageByMMUIdxData, 1); 584 585 d->addr = addr; 586 d->idxmap = idxmap; 587 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 588 RUN_ON_CPU_HOST_PTR(d)); 589 } 590 } 591 } 592 593 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); 594 } 595 596 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 597 { 598 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 599 } 600 601 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 602 target_ulong addr, 603 uint16_t idxmap) 604 { 605 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 606 607 /* This should already be page aligned */ 608 addr &= TARGET_PAGE_MASK; 609 610 /* 611 * Allocate memory to hold addr+idxmap only when needed. 612 * See tlb_flush_page_by_mmuidx for details. 613 */ 614 if (idxmap < TARGET_PAGE_SIZE) { 615 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 616 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 617 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1, 618 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 619 } else { 620 CPUState *dst_cpu; 621 TLBFlushPageByMMUIdxData *d; 622 623 /* Allocate a separate data block for each destination cpu. */ 624 CPU_FOREACH(dst_cpu) { 625 if (dst_cpu != src_cpu) { 626 d = g_new(TLBFlushPageByMMUIdxData, 1); 627 d->addr = addr; 628 d->idxmap = idxmap; 629 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 630 RUN_ON_CPU_HOST_PTR(d)); 631 } 632 } 633 634 d = g_new(TLBFlushPageByMMUIdxData, 1); 635 d->addr = addr; 636 d->idxmap = idxmap; 637 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2, 638 RUN_ON_CPU_HOST_PTR(d)); 639 } 640 } 641 642 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 643 { 644 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 645 } 646 647 /* update the TLBs so that writes to code in the virtual page 'addr' 648 can be detected */ 649 void tlb_protect_code(ram_addr_t ram_addr) 650 { 651 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 652 DIRTY_MEMORY_CODE); 653 } 654 655 /* update the TLB so that writes in physical page 'phys_addr' are no longer 656 tested for self modifying code */ 657 void tlb_unprotect_code(ram_addr_t ram_addr) 658 { 659 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 660 } 661 662 663 /* 664 * Dirty write flag handling 665 * 666 * When the TCG code writes to a location it looks up the address in 667 * the TLB and uses that data to compute the final address. If any of 668 * the lower bits of the address are set then the slow path is forced. 669 * There are a number of reasons to do this but for normal RAM the 670 * most usual is detecting writes to code regions which may invalidate 671 * generated code. 672 * 673 * Other vCPUs might be reading their TLBs during guest execution, so we update 674 * te->addr_write with atomic_set. We don't need to worry about this for 675 * oversized guests as MTTCG is disabled for them. 676 * 677 * Called with tlb_c.lock held. 678 */ 679 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 680 uintptr_t start, uintptr_t length) 681 { 682 uintptr_t addr = tlb_entry->addr_write; 683 684 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | 685 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { 686 addr &= TARGET_PAGE_MASK; 687 addr += tlb_entry->addend; 688 if ((addr - start) < length) { 689 #if TCG_OVERSIZED_GUEST 690 tlb_entry->addr_write |= TLB_NOTDIRTY; 691 #else 692 atomic_set(&tlb_entry->addr_write, 693 tlb_entry->addr_write | TLB_NOTDIRTY); 694 #endif 695 } 696 } 697 } 698 699 /* 700 * Called with tlb_c.lock held. 701 * Called only from the vCPU context, i.e. the TLB's owner thread. 702 */ 703 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 704 { 705 *d = *s; 706 } 707 708 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 709 * the target vCPU). 710 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 711 * thing actually updated is the target TLB entry ->addr_write flags. 712 */ 713 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 714 { 715 CPUArchState *env; 716 717 int mmu_idx; 718 719 env = cpu->env_ptr; 720 qemu_spin_lock(&env_tlb(env)->c.lock); 721 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 722 unsigned int i; 723 unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); 724 725 for (i = 0; i < n; i++) { 726 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], 727 start1, length); 728 } 729 730 for (i = 0; i < CPU_VTLB_SIZE; i++) { 731 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], 732 start1, length); 733 } 734 } 735 qemu_spin_unlock(&env_tlb(env)->c.lock); 736 } 737 738 /* Called with tlb_c.lock held */ 739 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 740 target_ulong vaddr) 741 { 742 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 743 tlb_entry->addr_write = vaddr; 744 } 745 } 746 747 /* update the TLB corresponding to virtual page vaddr 748 so that it is no longer dirty */ 749 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 750 { 751 CPUArchState *env = cpu->env_ptr; 752 int mmu_idx; 753 754 assert_cpu_is_self(cpu); 755 756 vaddr &= TARGET_PAGE_MASK; 757 qemu_spin_lock(&env_tlb(env)->c.lock); 758 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 759 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 760 } 761 762 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 763 int k; 764 for (k = 0; k < CPU_VTLB_SIZE; k++) { 765 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); 766 } 767 } 768 qemu_spin_unlock(&env_tlb(env)->c.lock); 769 } 770 771 /* Our TLB does not support large pages, so remember the area covered by 772 large pages and trigger a full TLB flush if these are invalidated. */ 773 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 774 target_ulong vaddr, target_ulong size) 775 { 776 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; 777 target_ulong lp_mask = ~(size - 1); 778 779 if (lp_addr == (target_ulong)-1) { 780 /* No previous large page. */ 781 lp_addr = vaddr; 782 } else { 783 /* Extend the existing region to include the new page. 784 This is a compromise between unnecessary flushes and 785 the cost of maintaining a full variable size TLB. */ 786 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; 787 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 788 lp_mask <<= 1; 789 } 790 } 791 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; 792 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; 793 } 794 795 /* Add a new TLB entry. At most one entry for a given virtual address 796 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 797 * supplied size is only used by tlb_flush_page. 798 * 799 * Called from TCG-generated code, which is under an RCU read-side 800 * critical section. 801 */ 802 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 803 hwaddr paddr, MemTxAttrs attrs, int prot, 804 int mmu_idx, target_ulong size) 805 { 806 CPUArchState *env = cpu->env_ptr; 807 CPUTLB *tlb = env_tlb(env); 808 CPUTLBDesc *desc = &tlb->d[mmu_idx]; 809 MemoryRegionSection *section; 810 unsigned int index; 811 target_ulong address; 812 target_ulong write_address; 813 uintptr_t addend; 814 CPUTLBEntry *te, tn; 815 hwaddr iotlb, xlat, sz, paddr_page; 816 target_ulong vaddr_page; 817 int asidx = cpu_asidx_from_attrs(cpu, attrs); 818 int wp_flags; 819 bool is_ram, is_romd; 820 821 assert_cpu_is_self(cpu); 822 823 if (size <= TARGET_PAGE_SIZE) { 824 sz = TARGET_PAGE_SIZE; 825 } else { 826 tlb_add_large_page(env, mmu_idx, vaddr, size); 827 sz = size; 828 } 829 vaddr_page = vaddr & TARGET_PAGE_MASK; 830 paddr_page = paddr & TARGET_PAGE_MASK; 831 832 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 833 &xlat, &sz, attrs, &prot); 834 assert(sz >= TARGET_PAGE_SIZE); 835 836 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 837 " prot=%x idx=%d\n", 838 vaddr, paddr, prot, mmu_idx); 839 840 address = vaddr_page; 841 if (size < TARGET_PAGE_SIZE) { 842 /* Repeat the MMU check and TLB fill on every access. */ 843 address |= TLB_INVALID_MASK; 844 } 845 if (attrs.byte_swap) { 846 address |= TLB_BSWAP; 847 } 848 849 is_ram = memory_region_is_ram(section->mr); 850 is_romd = memory_region_is_romd(section->mr); 851 852 if (is_ram || is_romd) { 853 /* RAM and ROMD both have associated host memory. */ 854 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 855 } else { 856 /* I/O does not; force the host address to NULL. */ 857 addend = 0; 858 } 859 860 write_address = address; 861 if (is_ram) { 862 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 863 /* 864 * Computing is_clean is expensive; avoid all that unless 865 * the page is actually writable. 866 */ 867 if (prot & PAGE_WRITE) { 868 if (section->readonly) { 869 write_address |= TLB_DISCARD_WRITE; 870 } else if (cpu_physical_memory_is_clean(iotlb)) { 871 write_address |= TLB_NOTDIRTY; 872 } 873 } 874 } else { 875 /* I/O or ROMD */ 876 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; 877 /* 878 * Writes to romd devices must go through MMIO to enable write. 879 * Reads to romd devices go through the ram_ptr found above, 880 * but of course reads to I/O must go through MMIO. 881 */ 882 write_address |= TLB_MMIO; 883 if (!is_romd) { 884 address = write_address; 885 } 886 } 887 888 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, 889 TARGET_PAGE_SIZE); 890 891 index = tlb_index(env, mmu_idx, vaddr_page); 892 te = tlb_entry(env, mmu_idx, vaddr_page); 893 894 /* 895 * Hold the TLB lock for the rest of the function. We could acquire/release 896 * the lock several times in the function, but it is faster to amortize the 897 * acquisition cost by acquiring it just once. Note that this leads to 898 * a longer critical section, but this is not a concern since the TLB lock 899 * is unlikely to be contended. 900 */ 901 qemu_spin_lock(&tlb->c.lock); 902 903 /* Note that the tlb is no longer clean. */ 904 tlb->c.dirty |= 1 << mmu_idx; 905 906 /* Make sure there's no cached translation for the new page. */ 907 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 908 909 /* 910 * Only evict the old entry to the victim tlb if it's for a 911 * different page; otherwise just overwrite the stale data. 912 */ 913 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 914 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; 915 CPUTLBEntry *tv = &desc->vtable[vidx]; 916 917 /* Evict the old entry into the victim tlb. */ 918 copy_tlb_helper_locked(tv, te); 919 desc->viotlb[vidx] = desc->iotlb[index]; 920 tlb_n_used_entries_dec(env, mmu_idx); 921 } 922 923 /* refill the tlb */ 924 /* 925 * At this point iotlb contains a physical section number in the lower 926 * TARGET_PAGE_BITS, and either 927 * + the ram_addr_t of the page base of the target RAM (RAM) 928 * + the offset within section->mr of the page base (I/O, ROMD) 929 * We subtract the vaddr_page (which is page aligned and thus won't 930 * disturb the low bits) to give an offset which can be added to the 931 * (non-page-aligned) vaddr of the eventual memory access to get 932 * the MemoryRegion offset for the access. Note that the vaddr we 933 * subtract here is that of the page base, and not the same as the 934 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 935 */ 936 desc->iotlb[index].addr = iotlb - vaddr_page; 937 desc->iotlb[index].attrs = attrs; 938 939 /* Now calculate the new entry */ 940 tn.addend = addend - vaddr_page; 941 if (prot & PAGE_READ) { 942 tn.addr_read = address; 943 if (wp_flags & BP_MEM_READ) { 944 tn.addr_read |= TLB_WATCHPOINT; 945 } 946 } else { 947 tn.addr_read = -1; 948 } 949 950 if (prot & PAGE_EXEC) { 951 tn.addr_code = address; 952 } else { 953 tn.addr_code = -1; 954 } 955 956 tn.addr_write = -1; 957 if (prot & PAGE_WRITE) { 958 tn.addr_write = write_address; 959 if (prot & PAGE_WRITE_INV) { 960 tn.addr_write |= TLB_INVALID_MASK; 961 } 962 if (wp_flags & BP_MEM_WRITE) { 963 tn.addr_write |= TLB_WATCHPOINT; 964 } 965 } 966 967 copy_tlb_helper_locked(te, &tn); 968 tlb_n_used_entries_inc(env, mmu_idx); 969 qemu_spin_unlock(&tlb->c.lock); 970 } 971 972 /* Add a new TLB entry, but without specifying the memory 973 * transaction attributes to be used. 974 */ 975 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 976 hwaddr paddr, int prot, 977 int mmu_idx, target_ulong size) 978 { 979 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 980 prot, mmu_idx, size); 981 } 982 983 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 984 { 985 ram_addr_t ram_addr; 986 987 ram_addr = qemu_ram_addr_from_host(ptr); 988 if (ram_addr == RAM_ADDR_INVALID) { 989 error_report("Bad ram pointer %p", ptr); 990 abort(); 991 } 992 return ram_addr; 993 } 994 995 /* 996 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the 997 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must 998 * be discarded and looked up again (e.g. via tlb_entry()). 999 */ 1000 static void tlb_fill(CPUState *cpu, target_ulong addr, int size, 1001 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1002 { 1003 CPUClass *cc = CPU_GET_CLASS(cpu); 1004 bool ok; 1005 1006 /* 1007 * This is not a probe, so only valid return is success; failure 1008 * should result in exception + longjmp to the cpu loop. 1009 */ 1010 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr); 1011 assert(ok); 1012 } 1013 1014 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1015 int mmu_idx, target_ulong addr, uintptr_t retaddr, 1016 MMUAccessType access_type, MemOp op) 1017 { 1018 CPUState *cpu = env_cpu(env); 1019 hwaddr mr_offset; 1020 MemoryRegionSection *section; 1021 MemoryRegion *mr; 1022 uint64_t val; 1023 bool locked = false; 1024 MemTxResult r; 1025 1026 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1027 mr = section->mr; 1028 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1029 cpu->mem_io_pc = retaddr; 1030 if (!cpu->can_do_io) { 1031 cpu_io_recompile(cpu, retaddr); 1032 } 1033 1034 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 1035 qemu_mutex_lock_iothread(); 1036 locked = true; 1037 } 1038 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); 1039 if (r != MEMTX_OK) { 1040 hwaddr physaddr = mr_offset + 1041 section->offset_within_address_space - 1042 section->offset_within_region; 1043 1044 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, 1045 mmu_idx, iotlbentry->attrs, r, retaddr); 1046 } 1047 if (locked) { 1048 qemu_mutex_unlock_iothread(); 1049 } 1050 1051 return val; 1052 } 1053 1054 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1055 int mmu_idx, uint64_t val, target_ulong addr, 1056 uintptr_t retaddr, MemOp op) 1057 { 1058 CPUState *cpu = env_cpu(env); 1059 hwaddr mr_offset; 1060 MemoryRegionSection *section; 1061 MemoryRegion *mr; 1062 bool locked = false; 1063 MemTxResult r; 1064 1065 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1066 mr = section->mr; 1067 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1068 if (!cpu->can_do_io) { 1069 cpu_io_recompile(cpu, retaddr); 1070 } 1071 cpu->mem_io_pc = retaddr; 1072 1073 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 1074 qemu_mutex_lock_iothread(); 1075 locked = true; 1076 } 1077 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); 1078 if (r != MEMTX_OK) { 1079 hwaddr physaddr = mr_offset + 1080 section->offset_within_address_space - 1081 section->offset_within_region; 1082 1083 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), 1084 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, 1085 retaddr); 1086 } 1087 if (locked) { 1088 qemu_mutex_unlock_iothread(); 1089 } 1090 } 1091 1092 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) 1093 { 1094 #if TCG_OVERSIZED_GUEST 1095 return *(target_ulong *)((uintptr_t)entry + ofs); 1096 #else 1097 /* ofs might correspond to .addr_write, so use atomic_read */ 1098 return atomic_read((target_ulong *)((uintptr_t)entry + ofs)); 1099 #endif 1100 } 1101 1102 /* Return true if ADDR is present in the victim tlb, and has been copied 1103 back to the main tlb. */ 1104 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1105 size_t elt_ofs, target_ulong page) 1106 { 1107 size_t vidx; 1108 1109 assert_cpu_is_self(env_cpu(env)); 1110 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1111 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; 1112 target_ulong cmp; 1113 1114 /* elt_ofs might correspond to .addr_write, so use atomic_read */ 1115 #if TCG_OVERSIZED_GUEST 1116 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1117 #else 1118 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1119 #endif 1120 1121 if (cmp == page) { 1122 /* Found entry in victim tlb, swap tlb and iotlb. */ 1123 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index]; 1124 1125 qemu_spin_lock(&env_tlb(env)->c.lock); 1126 copy_tlb_helper_locked(&tmptlb, tlb); 1127 copy_tlb_helper_locked(tlb, vtlb); 1128 copy_tlb_helper_locked(vtlb, &tmptlb); 1129 qemu_spin_unlock(&env_tlb(env)->c.lock); 1130 1131 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1132 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; 1133 tmpio = *io; *io = *vio; *vio = tmpio; 1134 return true; 1135 } 1136 } 1137 return false; 1138 } 1139 1140 /* Macro to call the above, with local variables from the use context. */ 1141 #define VICTIM_TLB_HIT(TY, ADDR) \ 1142 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1143 (ADDR) & TARGET_PAGE_MASK) 1144 1145 /* 1146 * Return a ram_addr_t for the virtual address for execution. 1147 * 1148 * Return -1 if we can't translate and execute from an entire page 1149 * of RAM. This will force us to execute by loading and translating 1150 * one insn at a time, without caching. 1151 * 1152 * NOTE: This function will trigger an exception if the page is 1153 * not executable. 1154 */ 1155 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, 1156 void **hostp) 1157 { 1158 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1159 uintptr_t index = tlb_index(env, mmu_idx, addr); 1160 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1161 void *p; 1162 1163 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1164 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1165 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1166 index = tlb_index(env, mmu_idx, addr); 1167 entry = tlb_entry(env, mmu_idx, addr); 1168 1169 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { 1170 /* 1171 * The MMU protection covers a smaller range than a target 1172 * page, so we must redo the MMU check for every insn. 1173 */ 1174 return -1; 1175 } 1176 } 1177 assert(tlb_hit(entry->addr_code, addr)); 1178 } 1179 1180 if (unlikely(entry->addr_code & TLB_MMIO)) { 1181 /* The region is not backed by RAM. */ 1182 if (hostp) { 1183 *hostp = NULL; 1184 } 1185 return -1; 1186 } 1187 1188 p = (void *)((uintptr_t)addr + entry->addend); 1189 if (hostp) { 1190 *hostp = p; 1191 } 1192 return qemu_ram_addr_from_host_nofail(p); 1193 } 1194 1195 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1196 { 1197 return get_page_addr_code_hostp(env, addr, NULL); 1198 } 1199 1200 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, 1201 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) 1202 { 1203 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; 1204 1205 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); 1206 1207 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 1208 struct page_collection *pages 1209 = page_collection_lock(ram_addr, ram_addr + size); 1210 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); 1211 page_collection_unlock(pages); 1212 } 1213 1214 /* 1215 * Set both VGA and migration bits for simplicity and to remove 1216 * the notdirty callback faster. 1217 */ 1218 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); 1219 1220 /* We remove the notdirty callback only if the code has been flushed. */ 1221 if (!cpu_physical_memory_is_clean(ram_addr)) { 1222 trace_memory_notdirty_set_dirty(mem_vaddr); 1223 tlb_set_dirty(cpu, mem_vaddr); 1224 } 1225 } 1226 1227 /* 1228 * Probe for whether the specified guest access is permitted. If it is not 1229 * permitted then an exception will be taken in the same way as if this 1230 * were a real access (and we will not return). 1231 * If the size is 0 or the page requires I/O access, returns NULL; otherwise, 1232 * returns the address of the host page similar to tlb_vaddr_to_host(). 1233 */ 1234 void *probe_access(CPUArchState *env, target_ulong addr, int size, 1235 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1236 { 1237 uintptr_t index = tlb_index(env, mmu_idx, addr); 1238 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1239 target_ulong tlb_addr; 1240 size_t elt_ofs; 1241 int wp_access; 1242 1243 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 1244 1245 switch (access_type) { 1246 case MMU_DATA_LOAD: 1247 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1248 wp_access = BP_MEM_READ; 1249 break; 1250 case MMU_DATA_STORE: 1251 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1252 wp_access = BP_MEM_WRITE; 1253 break; 1254 case MMU_INST_FETCH: 1255 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1256 wp_access = BP_MEM_READ; 1257 break; 1258 default: 1259 g_assert_not_reached(); 1260 } 1261 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1262 1263 if (unlikely(!tlb_hit(tlb_addr, addr))) { 1264 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, 1265 addr & TARGET_PAGE_MASK)) { 1266 tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr); 1267 /* TLB resize via tlb_fill may have moved the entry. */ 1268 index = tlb_index(env, mmu_idx, addr); 1269 entry = tlb_entry(env, mmu_idx, addr); 1270 } 1271 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1272 } 1273 1274 if (!size) { 1275 return NULL; 1276 } 1277 1278 if (unlikely(tlb_addr & TLB_FLAGS_MASK)) { 1279 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1280 1281 /* Reject I/O access, or other required slow-path. */ 1282 if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { 1283 return NULL; 1284 } 1285 1286 /* Handle watchpoints. */ 1287 if (tlb_addr & TLB_WATCHPOINT) { 1288 cpu_check_watchpoint(env_cpu(env), addr, size, 1289 iotlbentry->attrs, wp_access, retaddr); 1290 } 1291 1292 /* Handle clean RAM pages. */ 1293 if (tlb_addr & TLB_NOTDIRTY) { 1294 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1295 } 1296 } 1297 1298 return (void *)((uintptr_t)addr + entry->addend); 1299 } 1300 1301 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, 1302 MMUAccessType access_type, int mmu_idx) 1303 { 1304 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1305 target_ulong tlb_addr, page; 1306 size_t elt_ofs; 1307 1308 switch (access_type) { 1309 case MMU_DATA_LOAD: 1310 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1311 break; 1312 case MMU_DATA_STORE: 1313 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1314 break; 1315 case MMU_INST_FETCH: 1316 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1317 break; 1318 default: 1319 g_assert_not_reached(); 1320 } 1321 1322 page = addr & TARGET_PAGE_MASK; 1323 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1324 1325 if (!tlb_hit_page(tlb_addr, page)) { 1326 uintptr_t index = tlb_index(env, mmu_idx, addr); 1327 1328 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) { 1329 CPUState *cs = env_cpu(env); 1330 CPUClass *cc = CPU_GET_CLASS(cs); 1331 1332 if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) { 1333 /* Non-faulting page table read failed. */ 1334 return NULL; 1335 } 1336 1337 /* TLB resize via tlb_fill may have moved the entry. */ 1338 entry = tlb_entry(env, mmu_idx, addr); 1339 } 1340 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1341 } 1342 1343 if (tlb_addr & ~TARGET_PAGE_MASK) { 1344 /* IO access */ 1345 return NULL; 1346 } 1347 1348 return (void *)((uintptr_t)addr + entry->addend); 1349 } 1350 1351 1352 #ifdef CONFIG_PLUGIN 1353 /* 1354 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. 1355 * This should be a hot path as we will have just looked this path up 1356 * in the softmmu lookup code (or helper). We don't handle re-fills or 1357 * checking the victim table. This is purely informational. 1358 * 1359 * This should never fail as the memory access being instrumented 1360 * should have just filled the TLB. 1361 */ 1362 1363 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, 1364 bool is_store, struct qemu_plugin_hwaddr *data) 1365 { 1366 CPUArchState *env = cpu->env_ptr; 1367 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1368 uintptr_t index = tlb_index(env, mmu_idx, addr); 1369 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; 1370 1371 if (likely(tlb_hit(tlb_addr, addr))) { 1372 /* We must have an iotlb entry for MMIO */ 1373 if (tlb_addr & TLB_MMIO) { 1374 CPUIOTLBEntry *iotlbentry; 1375 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1376 data->is_io = true; 1377 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1378 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1379 } else { 1380 data->is_io = false; 1381 data->v.ram.hostaddr = addr + tlbe->addend; 1382 } 1383 return true; 1384 } 1385 return false; 1386 } 1387 1388 #endif 1389 1390 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1391 * operations, or io operations to proceed. Return the host address. */ 1392 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1393 TCGMemOpIdx oi, uintptr_t retaddr) 1394 { 1395 size_t mmu_idx = get_mmuidx(oi); 1396 uintptr_t index = tlb_index(env, mmu_idx, addr); 1397 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1398 target_ulong tlb_addr = tlb_addr_write(tlbe); 1399 MemOp mop = get_memop(oi); 1400 int a_bits = get_alignment_bits(mop); 1401 int s_bits = mop & MO_SIZE; 1402 void *hostaddr; 1403 1404 /* Adjust the given return address. */ 1405 retaddr -= GETPC_ADJ; 1406 1407 /* Enforce guest required alignment. */ 1408 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1409 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1410 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1411 mmu_idx, retaddr); 1412 } 1413 1414 /* Enforce qemu required alignment. */ 1415 if (unlikely(addr & ((1 << s_bits) - 1))) { 1416 /* We get here if guest alignment was not requested, 1417 or was not enforced by cpu_unaligned_access above. 1418 We might widen the access and emulate, but for now 1419 mark an exception and exit the cpu loop. */ 1420 goto stop_the_world; 1421 } 1422 1423 /* Check TLB entry and enforce page permissions. */ 1424 if (!tlb_hit(tlb_addr, addr)) { 1425 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1426 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, 1427 mmu_idx, retaddr); 1428 index = tlb_index(env, mmu_idx, addr); 1429 tlbe = tlb_entry(env, mmu_idx, addr); 1430 } 1431 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1432 } 1433 1434 /* Notice an IO access or a needs-MMU-lookup access */ 1435 if (unlikely(tlb_addr & TLB_MMIO)) { 1436 /* There's really nothing that can be done to 1437 support this apart from stop-the-world. */ 1438 goto stop_the_world; 1439 } 1440 1441 /* Let the guest notice RMW on a write-only page. */ 1442 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1443 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1444 mmu_idx, retaddr); 1445 /* Since we don't support reads and writes to different addresses, 1446 and we do have the proper page loaded for write, this shouldn't 1447 ever return. But just in case, handle via stop-the-world. */ 1448 goto stop_the_world; 1449 } 1450 1451 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1452 1453 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1454 notdirty_write(env_cpu(env), addr, 1 << s_bits, 1455 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); 1456 } 1457 1458 return hostaddr; 1459 1460 stop_the_world: 1461 cpu_loop_exit_atomic(env_cpu(env), retaddr); 1462 } 1463 1464 /* 1465 * Load Helpers 1466 * 1467 * We support two different access types. SOFTMMU_CODE_ACCESS is 1468 * specifically for reading instructions from system memory. It is 1469 * called by the translation loop and in some helpers where the code 1470 * is disassembled. It shouldn't be called directly by guest code. 1471 */ 1472 1473 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, 1474 TCGMemOpIdx oi, uintptr_t retaddr); 1475 1476 static inline uint64_t QEMU_ALWAYS_INLINE 1477 load_memop(const void *haddr, MemOp op) 1478 { 1479 switch (op) { 1480 case MO_UB: 1481 return ldub_p(haddr); 1482 case MO_BEUW: 1483 return lduw_be_p(haddr); 1484 case MO_LEUW: 1485 return lduw_le_p(haddr); 1486 case MO_BEUL: 1487 return (uint32_t)ldl_be_p(haddr); 1488 case MO_LEUL: 1489 return (uint32_t)ldl_le_p(haddr); 1490 case MO_BEQ: 1491 return ldq_be_p(haddr); 1492 case MO_LEQ: 1493 return ldq_le_p(haddr); 1494 default: 1495 qemu_build_not_reached(); 1496 } 1497 } 1498 1499 static inline uint64_t QEMU_ALWAYS_INLINE 1500 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, 1501 uintptr_t retaddr, MemOp op, bool code_read, 1502 FullLoadHelper *full_load) 1503 { 1504 uintptr_t mmu_idx = get_mmuidx(oi); 1505 uintptr_t index = tlb_index(env, mmu_idx, addr); 1506 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1507 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1508 const size_t tlb_off = code_read ? 1509 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); 1510 const MMUAccessType access_type = 1511 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; 1512 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1513 void *haddr; 1514 uint64_t res; 1515 size_t size = memop_size(op); 1516 1517 /* Handle CPU specific unaligned behaviour */ 1518 if (addr & ((1 << a_bits) - 1)) { 1519 cpu_unaligned_access(env_cpu(env), addr, access_type, 1520 mmu_idx, retaddr); 1521 } 1522 1523 /* If the TLB entry is for a different page, reload and try again. */ 1524 if (!tlb_hit(tlb_addr, addr)) { 1525 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1526 addr & TARGET_PAGE_MASK)) { 1527 tlb_fill(env_cpu(env), addr, size, 1528 access_type, mmu_idx, retaddr); 1529 index = tlb_index(env, mmu_idx, addr); 1530 entry = tlb_entry(env, mmu_idx, addr); 1531 } 1532 tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1533 tlb_addr &= ~TLB_INVALID_MASK; 1534 } 1535 1536 /* Handle anything that isn't just a straight memory access. */ 1537 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1538 CPUIOTLBEntry *iotlbentry; 1539 bool need_swap; 1540 1541 /* For anything that is unaligned, recurse through full_load. */ 1542 if ((addr & (size - 1)) != 0) { 1543 goto do_unaligned_access; 1544 } 1545 1546 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1547 1548 /* Handle watchpoints. */ 1549 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1550 /* On watchpoint hit, this will longjmp out. */ 1551 cpu_check_watchpoint(env_cpu(env), addr, size, 1552 iotlbentry->attrs, BP_MEM_READ, retaddr); 1553 } 1554 1555 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1556 1557 /* Handle I/O access. */ 1558 if (likely(tlb_addr & TLB_MMIO)) { 1559 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, 1560 access_type, op ^ (need_swap * MO_BSWAP)); 1561 } 1562 1563 haddr = (void *)((uintptr_t)addr + entry->addend); 1564 1565 /* 1566 * Keep these two load_memop separate to ensure that the compiler 1567 * is able to fold the entire function to a single instruction. 1568 * There is a build-time assert inside to remind you of this. ;-) 1569 */ 1570 if (unlikely(need_swap)) { 1571 return load_memop(haddr, op ^ MO_BSWAP); 1572 } 1573 return load_memop(haddr, op); 1574 } 1575 1576 /* Handle slow unaligned access (it spans two pages or IO). */ 1577 if (size > 1 1578 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1579 >= TARGET_PAGE_SIZE)) { 1580 target_ulong addr1, addr2; 1581 uint64_t r1, r2; 1582 unsigned shift; 1583 do_unaligned_access: 1584 addr1 = addr & ~((target_ulong)size - 1); 1585 addr2 = addr1 + size; 1586 r1 = full_load(env, addr1, oi, retaddr); 1587 r2 = full_load(env, addr2, oi, retaddr); 1588 shift = (addr & (size - 1)) * 8; 1589 1590 if (memop_big_endian(op)) { 1591 /* Big-endian combine. */ 1592 res = (r1 << shift) | (r2 >> ((size * 8) - shift)); 1593 } else { 1594 /* Little-endian combine. */ 1595 res = (r1 >> shift) | (r2 << ((size * 8) - shift)); 1596 } 1597 return res & MAKE_64BIT_MASK(0, size * 8); 1598 } 1599 1600 haddr = (void *)((uintptr_t)addr + entry->addend); 1601 return load_memop(haddr, op); 1602 } 1603 1604 /* 1605 * For the benefit of TCG generated code, we want to avoid the 1606 * complication of ABI-specific return type promotion and always 1607 * return a value extended to the register size of the host. This is 1608 * tcg_target_long, except in the case of a 32-bit host and 64-bit 1609 * data, and for that we always have uint64_t. 1610 * 1611 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. 1612 */ 1613 1614 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, 1615 TCGMemOpIdx oi, uintptr_t retaddr) 1616 { 1617 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); 1618 } 1619 1620 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, 1621 TCGMemOpIdx oi, uintptr_t retaddr) 1622 { 1623 return full_ldub_mmu(env, addr, oi, retaddr); 1624 } 1625 1626 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1627 TCGMemOpIdx oi, uintptr_t retaddr) 1628 { 1629 return load_helper(env, addr, oi, retaddr, MO_LEUW, false, 1630 full_le_lduw_mmu); 1631 } 1632 1633 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1634 TCGMemOpIdx oi, uintptr_t retaddr) 1635 { 1636 return full_le_lduw_mmu(env, addr, oi, retaddr); 1637 } 1638 1639 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1640 TCGMemOpIdx oi, uintptr_t retaddr) 1641 { 1642 return load_helper(env, addr, oi, retaddr, MO_BEUW, false, 1643 full_be_lduw_mmu); 1644 } 1645 1646 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1647 TCGMemOpIdx oi, uintptr_t retaddr) 1648 { 1649 return full_be_lduw_mmu(env, addr, oi, retaddr); 1650 } 1651 1652 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1653 TCGMemOpIdx oi, uintptr_t retaddr) 1654 { 1655 return load_helper(env, addr, oi, retaddr, MO_LEUL, false, 1656 full_le_ldul_mmu); 1657 } 1658 1659 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1660 TCGMemOpIdx oi, uintptr_t retaddr) 1661 { 1662 return full_le_ldul_mmu(env, addr, oi, retaddr); 1663 } 1664 1665 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1666 TCGMemOpIdx oi, uintptr_t retaddr) 1667 { 1668 return load_helper(env, addr, oi, retaddr, MO_BEUL, false, 1669 full_be_ldul_mmu); 1670 } 1671 1672 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1673 TCGMemOpIdx oi, uintptr_t retaddr) 1674 { 1675 return full_be_ldul_mmu(env, addr, oi, retaddr); 1676 } 1677 1678 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, 1679 TCGMemOpIdx oi, uintptr_t retaddr) 1680 { 1681 return load_helper(env, addr, oi, retaddr, MO_LEQ, false, 1682 helper_le_ldq_mmu); 1683 } 1684 1685 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, 1686 TCGMemOpIdx oi, uintptr_t retaddr) 1687 { 1688 return load_helper(env, addr, oi, retaddr, MO_BEQ, false, 1689 helper_be_ldq_mmu); 1690 } 1691 1692 /* 1693 * Provide signed versions of the load routines as well. We can of course 1694 * avoid this for 64-bit data, or for 32-bit data on 32-bit host. 1695 */ 1696 1697 1698 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, 1699 TCGMemOpIdx oi, uintptr_t retaddr) 1700 { 1701 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); 1702 } 1703 1704 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, 1705 TCGMemOpIdx oi, uintptr_t retaddr) 1706 { 1707 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); 1708 } 1709 1710 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, 1711 TCGMemOpIdx oi, uintptr_t retaddr) 1712 { 1713 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); 1714 } 1715 1716 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, 1717 TCGMemOpIdx oi, uintptr_t retaddr) 1718 { 1719 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); 1720 } 1721 1722 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, 1723 TCGMemOpIdx oi, uintptr_t retaddr) 1724 { 1725 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); 1726 } 1727 1728 /* 1729 * Load helpers for cpu_ldst.h. 1730 */ 1731 1732 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, 1733 int mmu_idx, uintptr_t retaddr, 1734 MemOp op, FullLoadHelper *full_load) 1735 { 1736 uint16_t meminfo; 1737 TCGMemOpIdx oi; 1738 uint64_t ret; 1739 1740 meminfo = trace_mem_get_info(op, mmu_idx, false); 1741 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 1742 1743 op &= ~MO_SIGN; 1744 oi = make_memop_idx(op, mmu_idx); 1745 ret = full_load(env, addr, oi, retaddr); 1746 1747 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 1748 1749 return ret; 1750 } 1751 1752 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1753 int mmu_idx, uintptr_t ra) 1754 { 1755 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu); 1756 } 1757 1758 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1759 int mmu_idx, uintptr_t ra) 1760 { 1761 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB, 1762 full_ldub_mmu); 1763 } 1764 1765 uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1766 int mmu_idx, uintptr_t ra) 1767 { 1768 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW, 1769 MO_TE == MO_LE 1770 ? full_le_lduw_mmu : full_be_lduw_mmu); 1771 } 1772 1773 int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1774 int mmu_idx, uintptr_t ra) 1775 { 1776 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW, 1777 MO_TE == MO_LE 1778 ? full_le_lduw_mmu : full_be_lduw_mmu); 1779 } 1780 1781 uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1782 int mmu_idx, uintptr_t ra) 1783 { 1784 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL, 1785 MO_TE == MO_LE 1786 ? full_le_ldul_mmu : full_be_ldul_mmu); 1787 } 1788 1789 uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1790 int mmu_idx, uintptr_t ra) 1791 { 1792 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ, 1793 MO_TE == MO_LE 1794 ? helper_le_ldq_mmu : helper_be_ldq_mmu); 1795 } 1796 1797 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, 1798 uintptr_t retaddr) 1799 { 1800 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1801 } 1802 1803 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1804 { 1805 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1806 } 1807 1808 uint32_t cpu_lduw_data_ra(CPUArchState *env, target_ulong ptr, 1809 uintptr_t retaddr) 1810 { 1811 return cpu_lduw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1812 } 1813 1814 int cpu_ldsw_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1815 { 1816 return cpu_ldsw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1817 } 1818 1819 uint32_t cpu_ldl_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1820 { 1821 return cpu_ldl_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1822 } 1823 1824 uint64_t cpu_ldq_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1825 { 1826 return cpu_ldq_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1827 } 1828 1829 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) 1830 { 1831 return cpu_ldub_data_ra(env, ptr, 0); 1832 } 1833 1834 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) 1835 { 1836 return cpu_ldsb_data_ra(env, ptr, 0); 1837 } 1838 1839 uint32_t cpu_lduw_data(CPUArchState *env, target_ulong ptr) 1840 { 1841 return cpu_lduw_data_ra(env, ptr, 0); 1842 } 1843 1844 int cpu_ldsw_data(CPUArchState *env, target_ulong ptr) 1845 { 1846 return cpu_ldsw_data_ra(env, ptr, 0); 1847 } 1848 1849 uint32_t cpu_ldl_data(CPUArchState *env, target_ulong ptr) 1850 { 1851 return cpu_ldl_data_ra(env, ptr, 0); 1852 } 1853 1854 uint64_t cpu_ldq_data(CPUArchState *env, target_ulong ptr) 1855 { 1856 return cpu_ldq_data_ra(env, ptr, 0); 1857 } 1858 1859 /* 1860 * Store Helpers 1861 */ 1862 1863 static inline void QEMU_ALWAYS_INLINE 1864 store_memop(void *haddr, uint64_t val, MemOp op) 1865 { 1866 switch (op) { 1867 case MO_UB: 1868 stb_p(haddr, val); 1869 break; 1870 case MO_BEUW: 1871 stw_be_p(haddr, val); 1872 break; 1873 case MO_LEUW: 1874 stw_le_p(haddr, val); 1875 break; 1876 case MO_BEUL: 1877 stl_be_p(haddr, val); 1878 break; 1879 case MO_LEUL: 1880 stl_le_p(haddr, val); 1881 break; 1882 case MO_BEQ: 1883 stq_be_p(haddr, val); 1884 break; 1885 case MO_LEQ: 1886 stq_le_p(haddr, val); 1887 break; 1888 default: 1889 qemu_build_not_reached(); 1890 } 1891 } 1892 1893 static inline void QEMU_ALWAYS_INLINE 1894 store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 1895 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) 1896 { 1897 uintptr_t mmu_idx = get_mmuidx(oi); 1898 uintptr_t index = tlb_index(env, mmu_idx, addr); 1899 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1900 target_ulong tlb_addr = tlb_addr_write(entry); 1901 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 1902 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1903 void *haddr; 1904 size_t size = memop_size(op); 1905 1906 /* Handle CPU specific unaligned behaviour */ 1907 if (addr & ((1 << a_bits) - 1)) { 1908 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1909 mmu_idx, retaddr); 1910 } 1911 1912 /* If the TLB entry is for a different page, reload and try again. */ 1913 if (!tlb_hit(tlb_addr, addr)) { 1914 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1915 addr & TARGET_PAGE_MASK)) { 1916 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, 1917 mmu_idx, retaddr); 1918 index = tlb_index(env, mmu_idx, addr); 1919 entry = tlb_entry(env, mmu_idx, addr); 1920 } 1921 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; 1922 } 1923 1924 /* Handle anything that isn't just a straight memory access. */ 1925 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1926 CPUIOTLBEntry *iotlbentry; 1927 bool need_swap; 1928 1929 /* For anything that is unaligned, recurse through byte stores. */ 1930 if ((addr & (size - 1)) != 0) { 1931 goto do_unaligned_access; 1932 } 1933 1934 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1935 1936 /* Handle watchpoints. */ 1937 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1938 /* On watchpoint hit, this will longjmp out. */ 1939 cpu_check_watchpoint(env_cpu(env), addr, size, 1940 iotlbentry->attrs, BP_MEM_WRITE, retaddr); 1941 } 1942 1943 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1944 1945 /* Handle I/O access. */ 1946 if (tlb_addr & TLB_MMIO) { 1947 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, 1948 op ^ (need_swap * MO_BSWAP)); 1949 return; 1950 } 1951 1952 /* Ignore writes to ROM. */ 1953 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { 1954 return; 1955 } 1956 1957 /* Handle clean RAM pages. */ 1958 if (tlb_addr & TLB_NOTDIRTY) { 1959 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1960 } 1961 1962 haddr = (void *)((uintptr_t)addr + entry->addend); 1963 1964 /* 1965 * Keep these two store_memop separate to ensure that the compiler 1966 * is able to fold the entire function to a single instruction. 1967 * There is a build-time assert inside to remind you of this. ;-) 1968 */ 1969 if (unlikely(need_swap)) { 1970 store_memop(haddr, val, op ^ MO_BSWAP); 1971 } else { 1972 store_memop(haddr, val, op); 1973 } 1974 return; 1975 } 1976 1977 /* Handle slow unaligned access (it spans two pages or IO). */ 1978 if (size > 1 1979 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1980 >= TARGET_PAGE_SIZE)) { 1981 int i; 1982 uintptr_t index2; 1983 CPUTLBEntry *entry2; 1984 target_ulong page2, tlb_addr2; 1985 size_t size2; 1986 1987 do_unaligned_access: 1988 /* 1989 * Ensure the second page is in the TLB. Note that the first page 1990 * is already guaranteed to be filled, and that the second page 1991 * cannot evict the first. 1992 */ 1993 page2 = (addr + size) & TARGET_PAGE_MASK; 1994 size2 = (addr + size) & ~TARGET_PAGE_MASK; 1995 index2 = tlb_index(env, mmu_idx, page2); 1996 entry2 = tlb_entry(env, mmu_idx, page2); 1997 tlb_addr2 = tlb_addr_write(entry2); 1998 if (!tlb_hit_page(tlb_addr2, page2)) { 1999 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { 2000 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, 2001 mmu_idx, retaddr); 2002 index2 = tlb_index(env, mmu_idx, page2); 2003 entry2 = tlb_entry(env, mmu_idx, page2); 2004 } 2005 tlb_addr2 = tlb_addr_write(entry2); 2006 } 2007 2008 /* 2009 * Handle watchpoints. Since this may trap, all checks 2010 * must happen before any store. 2011 */ 2012 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2013 cpu_check_watchpoint(env_cpu(env), addr, size - size2, 2014 env_tlb(env)->d[mmu_idx].iotlb[index].attrs, 2015 BP_MEM_WRITE, retaddr); 2016 } 2017 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { 2018 cpu_check_watchpoint(env_cpu(env), page2, size2, 2019 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, 2020 BP_MEM_WRITE, retaddr); 2021 } 2022 2023 /* 2024 * XXX: not efficient, but simple. 2025 * This loop must go in the forward direction to avoid issues 2026 * with self-modifying code in Windows 64-bit. 2027 */ 2028 for (i = 0; i < size; ++i) { 2029 uint8_t val8; 2030 if (memop_big_endian(op)) { 2031 /* Big-endian extract. */ 2032 val8 = val >> (((size - 1) * 8) - (i * 8)); 2033 } else { 2034 /* Little-endian extract. */ 2035 val8 = val >> (i * 8); 2036 } 2037 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2038 } 2039 return; 2040 } 2041 2042 haddr = (void *)((uintptr_t)addr + entry->addend); 2043 store_memop(haddr, val, op); 2044 } 2045 2046 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, 2047 TCGMemOpIdx oi, uintptr_t retaddr) 2048 { 2049 store_helper(env, addr, val, oi, retaddr, MO_UB); 2050 } 2051 2052 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2053 TCGMemOpIdx oi, uintptr_t retaddr) 2054 { 2055 store_helper(env, addr, val, oi, retaddr, MO_LEUW); 2056 } 2057 2058 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2059 TCGMemOpIdx oi, uintptr_t retaddr) 2060 { 2061 store_helper(env, addr, val, oi, retaddr, MO_BEUW); 2062 } 2063 2064 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2065 TCGMemOpIdx oi, uintptr_t retaddr) 2066 { 2067 store_helper(env, addr, val, oi, retaddr, MO_LEUL); 2068 } 2069 2070 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2071 TCGMemOpIdx oi, uintptr_t retaddr) 2072 { 2073 store_helper(env, addr, val, oi, retaddr, MO_BEUL); 2074 } 2075 2076 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2077 TCGMemOpIdx oi, uintptr_t retaddr) 2078 { 2079 store_helper(env, addr, val, oi, retaddr, MO_LEQ); 2080 } 2081 2082 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2083 TCGMemOpIdx oi, uintptr_t retaddr) 2084 { 2085 store_helper(env, addr, val, oi, retaddr, MO_BEQ); 2086 } 2087 2088 /* 2089 * Store Helpers for cpu_ldst.h 2090 */ 2091 2092 static inline void QEMU_ALWAYS_INLINE 2093 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2094 int mmu_idx, uintptr_t retaddr, MemOp op) 2095 { 2096 TCGMemOpIdx oi; 2097 uint16_t meminfo; 2098 2099 meminfo = trace_mem_get_info(op, mmu_idx, true); 2100 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2101 2102 oi = make_memop_idx(op, mmu_idx); 2103 store_helper(env, addr, val, oi, retaddr, op); 2104 2105 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2106 } 2107 2108 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2109 int mmu_idx, uintptr_t retaddr) 2110 { 2111 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); 2112 } 2113 2114 void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2115 int mmu_idx, uintptr_t retaddr) 2116 { 2117 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW); 2118 } 2119 2120 void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2121 int mmu_idx, uintptr_t retaddr) 2122 { 2123 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL); 2124 } 2125 2126 void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2127 int mmu_idx, uintptr_t retaddr) 2128 { 2129 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ); 2130 } 2131 2132 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, 2133 uint32_t val, uintptr_t retaddr) 2134 { 2135 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2136 } 2137 2138 void cpu_stw_data_ra(CPUArchState *env, target_ulong ptr, 2139 uint32_t val, uintptr_t retaddr) 2140 { 2141 cpu_stw_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2142 } 2143 2144 void cpu_stl_data_ra(CPUArchState *env, target_ulong ptr, 2145 uint32_t val, uintptr_t retaddr) 2146 { 2147 cpu_stl_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2148 } 2149 2150 void cpu_stq_data_ra(CPUArchState *env, target_ulong ptr, 2151 uint64_t val, uintptr_t retaddr) 2152 { 2153 cpu_stq_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2154 } 2155 2156 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2157 { 2158 cpu_stb_data_ra(env, ptr, val, 0); 2159 } 2160 2161 void cpu_stw_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2162 { 2163 cpu_stw_data_ra(env, ptr, val, 0); 2164 } 2165 2166 void cpu_stl_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2167 { 2168 cpu_stl_data_ra(env, ptr, val, 0); 2169 } 2170 2171 void cpu_stq_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2172 { 2173 cpu_stq_data_ra(env, ptr, val, 0); 2174 } 2175 2176 /* First set of helpers allows passing in of OI and RETADDR. This makes 2177 them callable from other helpers. */ 2178 2179 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 2180 #define ATOMIC_NAME(X) \ 2181 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 2182 #define ATOMIC_MMU_DECLS 2183 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) 2184 #define ATOMIC_MMU_CLEANUP 2185 #define ATOMIC_MMU_IDX get_mmuidx(oi) 2186 2187 #include "atomic_common.inc.c" 2188 2189 #define DATA_SIZE 1 2190 #include "atomic_template.h" 2191 2192 #define DATA_SIZE 2 2193 #include "atomic_template.h" 2194 2195 #define DATA_SIZE 4 2196 #include "atomic_template.h" 2197 2198 #ifdef CONFIG_ATOMIC64 2199 #define DATA_SIZE 8 2200 #include "atomic_template.h" 2201 #endif 2202 2203 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 2204 #define DATA_SIZE 16 2205 #include "atomic_template.h" 2206 #endif 2207 2208 /* Second set of helpers are directly callable from TCG as helpers. */ 2209 2210 #undef EXTRA_ARGS 2211 #undef ATOMIC_NAME 2212 #undef ATOMIC_MMU_LOOKUP 2213 #define EXTRA_ARGS , TCGMemOpIdx oi 2214 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 2215 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) 2216 2217 #define DATA_SIZE 1 2218 #include "atomic_template.h" 2219 2220 #define DATA_SIZE 2 2221 #include "atomic_template.h" 2222 2223 #define DATA_SIZE 4 2224 #include "atomic_template.h" 2225 2226 #ifdef CONFIG_ATOMIC64 2227 #define DATA_SIZE 8 2228 #include "atomic_template.h" 2229 #endif 2230 #undef ATOMIC_MMU_IDX 2231 2232 /* Code access functions. */ 2233 2234 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, 2235 TCGMemOpIdx oi, uintptr_t retaddr) 2236 { 2237 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); 2238 } 2239 2240 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) 2241 { 2242 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); 2243 return full_ldub_code(env, addr, oi, 0); 2244 } 2245 2246 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, 2247 TCGMemOpIdx oi, uintptr_t retaddr) 2248 { 2249 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); 2250 } 2251 2252 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) 2253 { 2254 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); 2255 return full_lduw_code(env, addr, oi, 0); 2256 } 2257 2258 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, 2259 TCGMemOpIdx oi, uintptr_t retaddr) 2260 { 2261 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); 2262 } 2263 2264 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) 2265 { 2266 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); 2267 return full_ldl_code(env, addr, oi, 0); 2268 } 2269 2270 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, 2271 TCGMemOpIdx oi, uintptr_t retaddr) 2272 { 2273 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code); 2274 } 2275 2276 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) 2277 { 2278 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true)); 2279 return full_ldq_code(env, addr, oi, 0); 2280 } 2281