1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/address-spaces.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/cputlb.h" 28 #include "exec/memory-internal.h" 29 #include "exec/ram_addr.h" 30 #include "tcg/tcg.h" 31 #include "qemu/error-report.h" 32 #include "exec/log.h" 33 #include "exec/helper-proto.h" 34 #include "qemu/atomic.h" 35 #include "qemu/atomic128.h" 36 #include "translate-all.h" 37 #include "trace-root.h" 38 #include "trace/mem.h" 39 #ifdef CONFIG_PLUGIN 40 #include "qemu/plugin-memory.h" 41 #endif 42 43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 44 /* #define DEBUG_TLB */ 45 /* #define DEBUG_TLB_LOG */ 46 47 #ifdef DEBUG_TLB 48 # define DEBUG_TLB_GATE 1 49 # ifdef DEBUG_TLB_LOG 50 # define DEBUG_TLB_LOG_GATE 1 51 # else 52 # define DEBUG_TLB_LOG_GATE 0 53 # endif 54 #else 55 # define DEBUG_TLB_GATE 0 56 # define DEBUG_TLB_LOG_GATE 0 57 #endif 58 59 #define tlb_debug(fmt, ...) do { \ 60 if (DEBUG_TLB_LOG_GATE) { \ 61 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 62 ## __VA_ARGS__); \ 63 } else if (DEBUG_TLB_GATE) { \ 64 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 65 } \ 66 } while (0) 67 68 #define assert_cpu_is_self(cpu) do { \ 69 if (DEBUG_TLB_GATE) { \ 70 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 71 } \ 72 } while (0) 73 74 /* run_on_cpu_data.target_ptr should always be big enough for a 75 * target_ulong even on 32 bit builds */ 76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 77 78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 79 */ 80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 82 83 static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx) 84 { 85 return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1; 86 } 87 88 static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx) 89 { 90 return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS); 91 } 92 93 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, 94 size_t max_entries) 95 { 96 desc->window_begin_ns = ns; 97 desc->window_max_entries = max_entries; 98 } 99 100 static void tlb_dyn_init(CPUArchState *env) 101 { 102 int i; 103 104 for (i = 0; i < NB_MMU_MODES; i++) { 105 CPUTLBDesc *desc = &env_tlb(env)->d[i]; 106 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 107 108 tlb_window_reset(desc, get_clock_realtime(), 0); 109 desc->n_used_entries = 0; 110 env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 111 env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries); 112 env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries); 113 } 114 } 115 116 /** 117 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 118 * @env: CPU that owns the TLB 119 * @mmu_idx: MMU index of the TLB 120 * 121 * Called with tlb_lock_held. 122 * 123 * We have two main constraints when resizing a TLB: (1) we only resize it 124 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 125 * the array or unnecessarily flushing it), which means we do not control how 126 * frequently the resizing can occur; (2) we don't have access to the guest's 127 * future scheduling decisions, and therefore have to decide the magnitude of 128 * the resize based on past observations. 129 * 130 * In general, a memory-hungry process can benefit greatly from an appropriately 131 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 132 * we just have to make the TLB as large as possible; while an oversized TLB 133 * results in minimal TLB miss rates, it also takes longer to be flushed 134 * (flushes can be _very_ frequent), and the reduced locality can also hurt 135 * performance. 136 * 137 * To achieve near-optimal performance for all kinds of workloads, we: 138 * 139 * 1. Aggressively increase the size of the TLB when the use rate of the 140 * TLB being flushed is high, since it is likely that in the near future this 141 * memory-hungry process will execute again, and its memory hungriness will 142 * probably be similar. 143 * 144 * 2. Slowly reduce the size of the TLB as the use rate declines over a 145 * reasonably large time window. The rationale is that if in such a time window 146 * we have not observed a high TLB use rate, it is likely that we won't observe 147 * it in the near future. In that case, once a time window expires we downsize 148 * the TLB to match the maximum use rate observed in the window. 149 * 150 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 151 * since in that range performance is likely near-optimal. Recall that the TLB 152 * is direct mapped, so we want the use rate to be low (or at least not too 153 * high), since otherwise we are likely to have a significant amount of 154 * conflict misses. 155 */ 156 static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) 157 { 158 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; 159 size_t old_size = tlb_n_entries(env, mmu_idx); 160 size_t rate; 161 size_t new_size = old_size; 162 int64_t now = get_clock_realtime(); 163 int64_t window_len_ms = 100; 164 int64_t window_len_ns = window_len_ms * 1000 * 1000; 165 bool window_expired = now > desc->window_begin_ns + window_len_ns; 166 167 if (desc->n_used_entries > desc->window_max_entries) { 168 desc->window_max_entries = desc->n_used_entries; 169 } 170 rate = desc->window_max_entries * 100 / old_size; 171 172 if (rate > 70) { 173 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 174 } else if (rate < 30 && window_expired) { 175 size_t ceil = pow2ceil(desc->window_max_entries); 176 size_t expected_rate = desc->window_max_entries * 100 / ceil; 177 178 /* 179 * Avoid undersizing when the max number of entries seen is just below 180 * a pow2. For instance, if max_entries == 1025, the expected use rate 181 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 182 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 183 * later. Thus, make sure that the expected use rate remains below 70%. 184 * (and since we double the size, that means the lowest rate we'd 185 * expect to get is 35%, which is still in the 30-70% range where 186 * we consider that the size is appropriate.) 187 */ 188 if (expected_rate > 70) { 189 ceil *= 2; 190 } 191 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 192 } 193 194 if (new_size == old_size) { 195 if (window_expired) { 196 tlb_window_reset(desc, now, desc->n_used_entries); 197 } 198 return; 199 } 200 201 g_free(env_tlb(env)->f[mmu_idx].table); 202 g_free(env_tlb(env)->d[mmu_idx].iotlb); 203 204 tlb_window_reset(desc, now, 0); 205 /* desc->n_used_entries is cleared by the caller */ 206 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 207 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); 208 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); 209 /* 210 * If the allocations fail, try smaller sizes. We just freed some 211 * memory, so going back to half of new_size has a good chance of working. 212 * Increased memory pressure elsewhere in the system might cause the 213 * allocations to fail though, so we progressively reduce the allocation 214 * size, aborting if we cannot even allocate the smallest TLB we support. 215 */ 216 while (env_tlb(env)->f[mmu_idx].table == NULL || 217 env_tlb(env)->d[mmu_idx].iotlb == NULL) { 218 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 219 error_report("%s: %s", __func__, strerror(errno)); 220 abort(); 221 } 222 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 223 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 224 225 g_free(env_tlb(env)->f[mmu_idx].table); 226 g_free(env_tlb(env)->d[mmu_idx].iotlb); 227 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); 228 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); 229 } 230 } 231 232 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) 233 { 234 tlb_mmu_resize_locked(env, mmu_idx); 235 env_tlb(env)->d[mmu_idx].n_used_entries = 0; 236 env_tlb(env)->d[mmu_idx].large_page_addr = -1; 237 env_tlb(env)->d[mmu_idx].large_page_mask = -1; 238 env_tlb(env)->d[mmu_idx].vindex = 0; 239 memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx)); 240 memset(env_tlb(env)->d[mmu_idx].vtable, -1, 241 sizeof(env_tlb(env)->d[0].vtable)); 242 } 243 244 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 245 { 246 env_tlb(env)->d[mmu_idx].n_used_entries++; 247 } 248 249 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 250 { 251 env_tlb(env)->d[mmu_idx].n_used_entries--; 252 } 253 254 void tlb_init(CPUState *cpu) 255 { 256 CPUArchState *env = cpu->env_ptr; 257 258 qemu_spin_init(&env_tlb(env)->c.lock); 259 260 /* Ensure that cpu_reset performs a full flush. */ 261 env_tlb(env)->c.dirty = ALL_MMUIDX_BITS; 262 263 tlb_dyn_init(env); 264 } 265 266 /* flush_all_helper: run fn across all cpus 267 * 268 * If the wait flag is set then the src cpu's helper will be queued as 269 * "safe" work and the loop exited creating a synchronisation point 270 * where all queued work will be finished before execution starts 271 * again. 272 */ 273 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 274 run_on_cpu_data d) 275 { 276 CPUState *cpu; 277 278 CPU_FOREACH(cpu) { 279 if (cpu != src) { 280 async_run_on_cpu(cpu, fn, d); 281 } 282 } 283 } 284 285 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 286 { 287 CPUState *cpu; 288 size_t full = 0, part = 0, elide = 0; 289 290 CPU_FOREACH(cpu) { 291 CPUArchState *env = cpu->env_ptr; 292 293 full += atomic_read(&env_tlb(env)->c.full_flush_count); 294 part += atomic_read(&env_tlb(env)->c.part_flush_count); 295 elide += atomic_read(&env_tlb(env)->c.elide_flush_count); 296 } 297 *pfull = full; 298 *ppart = part; 299 *pelide = elide; 300 } 301 302 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 303 { 304 CPUArchState *env = cpu->env_ptr; 305 uint16_t asked = data.host_int; 306 uint16_t all_dirty, work, to_clean; 307 308 assert_cpu_is_self(cpu); 309 310 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 311 312 qemu_spin_lock(&env_tlb(env)->c.lock); 313 314 all_dirty = env_tlb(env)->c.dirty; 315 to_clean = asked & all_dirty; 316 all_dirty &= ~to_clean; 317 env_tlb(env)->c.dirty = all_dirty; 318 319 for (work = to_clean; work != 0; work &= work - 1) { 320 int mmu_idx = ctz32(work); 321 tlb_flush_one_mmuidx_locked(env, mmu_idx); 322 } 323 324 qemu_spin_unlock(&env_tlb(env)->c.lock); 325 326 cpu_tb_jmp_cache_clear(cpu); 327 328 if (to_clean == ALL_MMUIDX_BITS) { 329 atomic_set(&env_tlb(env)->c.full_flush_count, 330 env_tlb(env)->c.full_flush_count + 1); 331 } else { 332 atomic_set(&env_tlb(env)->c.part_flush_count, 333 env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); 334 if (to_clean != asked) { 335 atomic_set(&env_tlb(env)->c.elide_flush_count, 336 env_tlb(env)->c.elide_flush_count + 337 ctpop16(asked & ~to_clean)); 338 } 339 } 340 } 341 342 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 343 { 344 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 345 346 if (cpu->created && !qemu_cpu_is_self(cpu)) { 347 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 348 RUN_ON_CPU_HOST_INT(idxmap)); 349 } else { 350 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 351 } 352 } 353 354 void tlb_flush(CPUState *cpu) 355 { 356 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 357 } 358 359 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 360 { 361 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 362 363 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 364 365 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 366 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 367 } 368 369 void tlb_flush_all_cpus(CPUState *src_cpu) 370 { 371 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 372 } 373 374 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 375 { 376 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 377 378 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 379 380 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 381 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 382 } 383 384 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 385 { 386 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 387 } 388 389 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 390 target_ulong page) 391 { 392 return tlb_hit_page(tlb_entry->addr_read, page) || 393 tlb_hit_page(tlb_addr_write(tlb_entry), page) || 394 tlb_hit_page(tlb_entry->addr_code, page); 395 } 396 397 /** 398 * tlb_entry_is_empty - return true if the entry is not in use 399 * @te: pointer to CPUTLBEntry 400 */ 401 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 402 { 403 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 404 } 405 406 /* Called with tlb_c.lock held */ 407 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 408 target_ulong page) 409 { 410 if (tlb_hit_page_anyprot(tlb_entry, page)) { 411 memset(tlb_entry, -1, sizeof(*tlb_entry)); 412 return true; 413 } 414 return false; 415 } 416 417 /* Called with tlb_c.lock held */ 418 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 419 target_ulong page) 420 { 421 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; 422 int k; 423 424 assert_cpu_is_self(env_cpu(env)); 425 for (k = 0; k < CPU_VTLB_SIZE; k++) { 426 if (tlb_flush_entry_locked(&d->vtable[k], page)) { 427 tlb_n_used_entries_dec(env, mmu_idx); 428 } 429 } 430 } 431 432 static void tlb_flush_page_locked(CPUArchState *env, int midx, 433 target_ulong page) 434 { 435 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; 436 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; 437 438 /* Check if we need to flush due to large pages. */ 439 if ((page & lp_mask) == lp_addr) { 440 tlb_debug("forcing full flush midx %d (" 441 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 442 midx, lp_addr, lp_mask); 443 tlb_flush_one_mmuidx_locked(env, midx); 444 } else { 445 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 446 tlb_n_used_entries_dec(env, midx); 447 } 448 tlb_flush_vtlb_page_locked(env, midx, page); 449 } 450 } 451 452 /** 453 * tlb_flush_page_by_mmuidx_async_0: 454 * @cpu: cpu on which to flush 455 * @addr: page of virtual address to flush 456 * @idxmap: set of mmu_idx to flush 457 * 458 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page 459 * at @addr from the tlbs indicated by @idxmap from @cpu. 460 */ 461 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, 462 target_ulong addr, 463 uint16_t idxmap) 464 { 465 CPUArchState *env = cpu->env_ptr; 466 int mmu_idx; 467 468 assert_cpu_is_self(cpu); 469 470 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); 471 472 qemu_spin_lock(&env_tlb(env)->c.lock); 473 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 474 if ((idxmap >> mmu_idx) & 1) { 475 tlb_flush_page_locked(env, mmu_idx, addr); 476 } 477 } 478 qemu_spin_unlock(&env_tlb(env)->c.lock); 479 480 tb_flush_jmp_cache(cpu, addr); 481 } 482 483 /** 484 * tlb_flush_page_by_mmuidx_async_1: 485 * @cpu: cpu on which to flush 486 * @data: encoded addr + idxmap 487 * 488 * Helper for tlb_flush_page_by_mmuidx and friends, called through 489 * async_run_on_cpu. The idxmap parameter is encoded in the page 490 * offset of the target_ptr field. This limits the set of mmu_idx 491 * that can be passed via this method. 492 */ 493 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, 494 run_on_cpu_data data) 495 { 496 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; 497 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; 498 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; 499 500 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 501 } 502 503 typedef struct { 504 target_ulong addr; 505 uint16_t idxmap; 506 } TLBFlushPageByMMUIdxData; 507 508 /** 509 * tlb_flush_page_by_mmuidx_async_2: 510 * @cpu: cpu on which to flush 511 * @data: allocated addr + idxmap 512 * 513 * Helper for tlb_flush_page_by_mmuidx and friends, called through 514 * async_run_on_cpu. The addr+idxmap parameters are stored in a 515 * TLBFlushPageByMMUIdxData structure that has been allocated 516 * specifically for this helper. Free the structure when done. 517 */ 518 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, 519 run_on_cpu_data data) 520 { 521 TLBFlushPageByMMUIdxData *d = data.host_ptr; 522 523 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap); 524 g_free(d); 525 } 526 527 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 528 { 529 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 530 531 /* This should already be page aligned */ 532 addr &= TARGET_PAGE_MASK; 533 534 if (qemu_cpu_is_self(cpu)) { 535 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 536 } else if (idxmap < TARGET_PAGE_SIZE) { 537 /* 538 * Most targets have only a few mmu_idx. In the case where 539 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid 540 * allocating memory for this operation. 541 */ 542 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1, 543 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 544 } else { 545 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1); 546 547 /* Otherwise allocate a structure, freed by the worker. */ 548 d->addr = addr; 549 d->idxmap = idxmap; 550 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2, 551 RUN_ON_CPU_HOST_PTR(d)); 552 } 553 } 554 555 void tlb_flush_page(CPUState *cpu, target_ulong addr) 556 { 557 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 558 } 559 560 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 561 uint16_t idxmap) 562 { 563 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 564 565 /* This should already be page aligned */ 566 addr &= TARGET_PAGE_MASK; 567 568 /* 569 * Allocate memory to hold addr+idxmap only when needed. 570 * See tlb_flush_page_by_mmuidx for details. 571 */ 572 if (idxmap < TARGET_PAGE_SIZE) { 573 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 574 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 575 } else { 576 CPUState *dst_cpu; 577 578 /* Allocate a separate data block for each destination cpu. */ 579 CPU_FOREACH(dst_cpu) { 580 if (dst_cpu != src_cpu) { 581 TLBFlushPageByMMUIdxData *d 582 = g_new(TLBFlushPageByMMUIdxData, 1); 583 584 d->addr = addr; 585 d->idxmap = idxmap; 586 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 587 RUN_ON_CPU_HOST_PTR(d)); 588 } 589 } 590 } 591 592 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); 593 } 594 595 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 596 { 597 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 598 } 599 600 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 601 target_ulong addr, 602 uint16_t idxmap) 603 { 604 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 605 606 /* This should already be page aligned */ 607 addr &= TARGET_PAGE_MASK; 608 609 /* 610 * Allocate memory to hold addr+idxmap only when needed. 611 * See tlb_flush_page_by_mmuidx for details. 612 */ 613 if (idxmap < TARGET_PAGE_SIZE) { 614 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 615 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 616 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1, 617 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 618 } else { 619 CPUState *dst_cpu; 620 TLBFlushPageByMMUIdxData *d; 621 622 /* Allocate a separate data block for each destination cpu. */ 623 CPU_FOREACH(dst_cpu) { 624 if (dst_cpu != src_cpu) { 625 d = g_new(TLBFlushPageByMMUIdxData, 1); 626 d->addr = addr; 627 d->idxmap = idxmap; 628 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 629 RUN_ON_CPU_HOST_PTR(d)); 630 } 631 } 632 633 d = g_new(TLBFlushPageByMMUIdxData, 1); 634 d->addr = addr; 635 d->idxmap = idxmap; 636 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2, 637 RUN_ON_CPU_HOST_PTR(d)); 638 } 639 } 640 641 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 642 { 643 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 644 } 645 646 /* update the TLBs so that writes to code in the virtual page 'addr' 647 can be detected */ 648 void tlb_protect_code(ram_addr_t ram_addr) 649 { 650 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 651 DIRTY_MEMORY_CODE); 652 } 653 654 /* update the TLB so that writes in physical page 'phys_addr' are no longer 655 tested for self modifying code */ 656 void tlb_unprotect_code(ram_addr_t ram_addr) 657 { 658 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 659 } 660 661 662 /* 663 * Dirty write flag handling 664 * 665 * When the TCG code writes to a location it looks up the address in 666 * the TLB and uses that data to compute the final address. If any of 667 * the lower bits of the address are set then the slow path is forced. 668 * There are a number of reasons to do this but for normal RAM the 669 * most usual is detecting writes to code regions which may invalidate 670 * generated code. 671 * 672 * Other vCPUs might be reading their TLBs during guest execution, so we update 673 * te->addr_write with atomic_set. We don't need to worry about this for 674 * oversized guests as MTTCG is disabled for them. 675 * 676 * Called with tlb_c.lock held. 677 */ 678 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 679 uintptr_t start, uintptr_t length) 680 { 681 uintptr_t addr = tlb_entry->addr_write; 682 683 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | 684 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { 685 addr &= TARGET_PAGE_MASK; 686 addr += tlb_entry->addend; 687 if ((addr - start) < length) { 688 #if TCG_OVERSIZED_GUEST 689 tlb_entry->addr_write |= TLB_NOTDIRTY; 690 #else 691 atomic_set(&tlb_entry->addr_write, 692 tlb_entry->addr_write | TLB_NOTDIRTY); 693 #endif 694 } 695 } 696 } 697 698 /* 699 * Called with tlb_c.lock held. 700 * Called only from the vCPU context, i.e. the TLB's owner thread. 701 */ 702 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 703 { 704 *d = *s; 705 } 706 707 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 708 * the target vCPU). 709 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 710 * thing actually updated is the target TLB entry ->addr_write flags. 711 */ 712 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 713 { 714 CPUArchState *env; 715 716 int mmu_idx; 717 718 env = cpu->env_ptr; 719 qemu_spin_lock(&env_tlb(env)->c.lock); 720 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 721 unsigned int i; 722 unsigned int n = tlb_n_entries(env, mmu_idx); 723 724 for (i = 0; i < n; i++) { 725 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], 726 start1, length); 727 } 728 729 for (i = 0; i < CPU_VTLB_SIZE; i++) { 730 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], 731 start1, length); 732 } 733 } 734 qemu_spin_unlock(&env_tlb(env)->c.lock); 735 } 736 737 /* Called with tlb_c.lock held */ 738 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 739 target_ulong vaddr) 740 { 741 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 742 tlb_entry->addr_write = vaddr; 743 } 744 } 745 746 /* update the TLB corresponding to virtual page vaddr 747 so that it is no longer dirty */ 748 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 749 { 750 CPUArchState *env = cpu->env_ptr; 751 int mmu_idx; 752 753 assert_cpu_is_self(cpu); 754 755 vaddr &= TARGET_PAGE_MASK; 756 qemu_spin_lock(&env_tlb(env)->c.lock); 757 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 758 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 759 } 760 761 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 762 int k; 763 for (k = 0; k < CPU_VTLB_SIZE; k++) { 764 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); 765 } 766 } 767 qemu_spin_unlock(&env_tlb(env)->c.lock); 768 } 769 770 /* Our TLB does not support large pages, so remember the area covered by 771 large pages and trigger a full TLB flush if these are invalidated. */ 772 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 773 target_ulong vaddr, target_ulong size) 774 { 775 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; 776 target_ulong lp_mask = ~(size - 1); 777 778 if (lp_addr == (target_ulong)-1) { 779 /* No previous large page. */ 780 lp_addr = vaddr; 781 } else { 782 /* Extend the existing region to include the new page. 783 This is a compromise between unnecessary flushes and 784 the cost of maintaining a full variable size TLB. */ 785 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; 786 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 787 lp_mask <<= 1; 788 } 789 } 790 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; 791 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; 792 } 793 794 /* Add a new TLB entry. At most one entry for a given virtual address 795 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 796 * supplied size is only used by tlb_flush_page. 797 * 798 * Called from TCG-generated code, which is under an RCU read-side 799 * critical section. 800 */ 801 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 802 hwaddr paddr, MemTxAttrs attrs, int prot, 803 int mmu_idx, target_ulong size) 804 { 805 CPUArchState *env = cpu->env_ptr; 806 CPUTLB *tlb = env_tlb(env); 807 CPUTLBDesc *desc = &tlb->d[mmu_idx]; 808 MemoryRegionSection *section; 809 unsigned int index; 810 target_ulong address; 811 target_ulong write_address; 812 uintptr_t addend; 813 CPUTLBEntry *te, tn; 814 hwaddr iotlb, xlat, sz, paddr_page; 815 target_ulong vaddr_page; 816 int asidx = cpu_asidx_from_attrs(cpu, attrs); 817 int wp_flags; 818 bool is_ram, is_romd; 819 820 assert_cpu_is_self(cpu); 821 822 if (size <= TARGET_PAGE_SIZE) { 823 sz = TARGET_PAGE_SIZE; 824 } else { 825 tlb_add_large_page(env, mmu_idx, vaddr, size); 826 sz = size; 827 } 828 vaddr_page = vaddr & TARGET_PAGE_MASK; 829 paddr_page = paddr & TARGET_PAGE_MASK; 830 831 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 832 &xlat, &sz, attrs, &prot); 833 assert(sz >= TARGET_PAGE_SIZE); 834 835 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 836 " prot=%x idx=%d\n", 837 vaddr, paddr, prot, mmu_idx); 838 839 address = vaddr_page; 840 if (size < TARGET_PAGE_SIZE) { 841 /* Repeat the MMU check and TLB fill on every access. */ 842 address |= TLB_INVALID_MASK; 843 } 844 if (attrs.byte_swap) { 845 address |= TLB_BSWAP; 846 } 847 848 is_ram = memory_region_is_ram(section->mr); 849 is_romd = memory_region_is_romd(section->mr); 850 851 if (is_ram || is_romd) { 852 /* RAM and ROMD both have associated host memory. */ 853 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 854 } else { 855 /* I/O does not; force the host address to NULL. */ 856 addend = 0; 857 } 858 859 write_address = address; 860 if (is_ram) { 861 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 862 /* 863 * Computing is_clean is expensive; avoid all that unless 864 * the page is actually writable. 865 */ 866 if (prot & PAGE_WRITE) { 867 if (section->readonly) { 868 write_address |= TLB_DISCARD_WRITE; 869 } else if (cpu_physical_memory_is_clean(iotlb)) { 870 write_address |= TLB_NOTDIRTY; 871 } 872 } 873 } else { 874 /* I/O or ROMD */ 875 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; 876 /* 877 * Writes to romd devices must go through MMIO to enable write. 878 * Reads to romd devices go through the ram_ptr found above, 879 * but of course reads to I/O must go through MMIO. 880 */ 881 write_address |= TLB_MMIO; 882 if (!is_romd) { 883 address = write_address; 884 } 885 } 886 887 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, 888 TARGET_PAGE_SIZE); 889 890 index = tlb_index(env, mmu_idx, vaddr_page); 891 te = tlb_entry(env, mmu_idx, vaddr_page); 892 893 /* 894 * Hold the TLB lock for the rest of the function. We could acquire/release 895 * the lock several times in the function, but it is faster to amortize the 896 * acquisition cost by acquiring it just once. Note that this leads to 897 * a longer critical section, but this is not a concern since the TLB lock 898 * is unlikely to be contended. 899 */ 900 qemu_spin_lock(&tlb->c.lock); 901 902 /* Note that the tlb is no longer clean. */ 903 tlb->c.dirty |= 1 << mmu_idx; 904 905 /* Make sure there's no cached translation for the new page. */ 906 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 907 908 /* 909 * Only evict the old entry to the victim tlb if it's for a 910 * different page; otherwise just overwrite the stale data. 911 */ 912 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 913 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; 914 CPUTLBEntry *tv = &desc->vtable[vidx]; 915 916 /* Evict the old entry into the victim tlb. */ 917 copy_tlb_helper_locked(tv, te); 918 desc->viotlb[vidx] = desc->iotlb[index]; 919 tlb_n_used_entries_dec(env, mmu_idx); 920 } 921 922 /* refill the tlb */ 923 /* 924 * At this point iotlb contains a physical section number in the lower 925 * TARGET_PAGE_BITS, and either 926 * + the ram_addr_t of the page base of the target RAM (RAM) 927 * + the offset within section->mr of the page base (I/O, ROMD) 928 * We subtract the vaddr_page (which is page aligned and thus won't 929 * disturb the low bits) to give an offset which can be added to the 930 * (non-page-aligned) vaddr of the eventual memory access to get 931 * the MemoryRegion offset for the access. Note that the vaddr we 932 * subtract here is that of the page base, and not the same as the 933 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 934 */ 935 desc->iotlb[index].addr = iotlb - vaddr_page; 936 desc->iotlb[index].attrs = attrs; 937 938 /* Now calculate the new entry */ 939 tn.addend = addend - vaddr_page; 940 if (prot & PAGE_READ) { 941 tn.addr_read = address; 942 if (wp_flags & BP_MEM_READ) { 943 tn.addr_read |= TLB_WATCHPOINT; 944 } 945 } else { 946 tn.addr_read = -1; 947 } 948 949 if (prot & PAGE_EXEC) { 950 tn.addr_code = address; 951 } else { 952 tn.addr_code = -1; 953 } 954 955 tn.addr_write = -1; 956 if (prot & PAGE_WRITE) { 957 tn.addr_write = write_address; 958 if (prot & PAGE_WRITE_INV) { 959 tn.addr_write |= TLB_INVALID_MASK; 960 } 961 if (wp_flags & BP_MEM_WRITE) { 962 tn.addr_write |= TLB_WATCHPOINT; 963 } 964 } 965 966 copy_tlb_helper_locked(te, &tn); 967 tlb_n_used_entries_inc(env, mmu_idx); 968 qemu_spin_unlock(&tlb->c.lock); 969 } 970 971 /* Add a new TLB entry, but without specifying the memory 972 * transaction attributes to be used. 973 */ 974 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 975 hwaddr paddr, int prot, 976 int mmu_idx, target_ulong size) 977 { 978 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 979 prot, mmu_idx, size); 980 } 981 982 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 983 { 984 ram_addr_t ram_addr; 985 986 ram_addr = qemu_ram_addr_from_host(ptr); 987 if (ram_addr == RAM_ADDR_INVALID) { 988 error_report("Bad ram pointer %p", ptr); 989 abort(); 990 } 991 return ram_addr; 992 } 993 994 /* 995 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the 996 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must 997 * be discarded and looked up again (e.g. via tlb_entry()). 998 */ 999 static void tlb_fill(CPUState *cpu, target_ulong addr, int size, 1000 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1001 { 1002 CPUClass *cc = CPU_GET_CLASS(cpu); 1003 bool ok; 1004 1005 /* 1006 * This is not a probe, so only valid return is success; failure 1007 * should result in exception + longjmp to the cpu loop. 1008 */ 1009 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr); 1010 assert(ok); 1011 } 1012 1013 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1014 int mmu_idx, target_ulong addr, uintptr_t retaddr, 1015 MMUAccessType access_type, MemOp op) 1016 { 1017 CPUState *cpu = env_cpu(env); 1018 hwaddr mr_offset; 1019 MemoryRegionSection *section; 1020 MemoryRegion *mr; 1021 uint64_t val; 1022 bool locked = false; 1023 MemTxResult r; 1024 1025 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1026 mr = section->mr; 1027 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1028 cpu->mem_io_pc = retaddr; 1029 if (!cpu->can_do_io) { 1030 cpu_io_recompile(cpu, retaddr); 1031 } 1032 1033 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 1034 qemu_mutex_lock_iothread(); 1035 locked = true; 1036 } 1037 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); 1038 if (r != MEMTX_OK) { 1039 hwaddr physaddr = mr_offset + 1040 section->offset_within_address_space - 1041 section->offset_within_region; 1042 1043 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, 1044 mmu_idx, iotlbentry->attrs, r, retaddr); 1045 } 1046 if (locked) { 1047 qemu_mutex_unlock_iothread(); 1048 } 1049 1050 return val; 1051 } 1052 1053 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1054 int mmu_idx, uint64_t val, target_ulong addr, 1055 uintptr_t retaddr, MemOp op) 1056 { 1057 CPUState *cpu = env_cpu(env); 1058 hwaddr mr_offset; 1059 MemoryRegionSection *section; 1060 MemoryRegion *mr; 1061 bool locked = false; 1062 MemTxResult r; 1063 1064 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1065 mr = section->mr; 1066 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1067 if (!cpu->can_do_io) { 1068 cpu_io_recompile(cpu, retaddr); 1069 } 1070 cpu->mem_io_pc = retaddr; 1071 1072 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 1073 qemu_mutex_lock_iothread(); 1074 locked = true; 1075 } 1076 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); 1077 if (r != MEMTX_OK) { 1078 hwaddr physaddr = mr_offset + 1079 section->offset_within_address_space - 1080 section->offset_within_region; 1081 1082 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), 1083 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, 1084 retaddr); 1085 } 1086 if (locked) { 1087 qemu_mutex_unlock_iothread(); 1088 } 1089 } 1090 1091 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) 1092 { 1093 #if TCG_OVERSIZED_GUEST 1094 return *(target_ulong *)((uintptr_t)entry + ofs); 1095 #else 1096 /* ofs might correspond to .addr_write, so use atomic_read */ 1097 return atomic_read((target_ulong *)((uintptr_t)entry + ofs)); 1098 #endif 1099 } 1100 1101 /* Return true if ADDR is present in the victim tlb, and has been copied 1102 back to the main tlb. */ 1103 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1104 size_t elt_ofs, target_ulong page) 1105 { 1106 size_t vidx; 1107 1108 assert_cpu_is_self(env_cpu(env)); 1109 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1110 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; 1111 target_ulong cmp; 1112 1113 /* elt_ofs might correspond to .addr_write, so use atomic_read */ 1114 #if TCG_OVERSIZED_GUEST 1115 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1116 #else 1117 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1118 #endif 1119 1120 if (cmp == page) { 1121 /* Found entry in victim tlb, swap tlb and iotlb. */ 1122 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index]; 1123 1124 qemu_spin_lock(&env_tlb(env)->c.lock); 1125 copy_tlb_helper_locked(&tmptlb, tlb); 1126 copy_tlb_helper_locked(tlb, vtlb); 1127 copy_tlb_helper_locked(vtlb, &tmptlb); 1128 qemu_spin_unlock(&env_tlb(env)->c.lock); 1129 1130 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1131 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; 1132 tmpio = *io; *io = *vio; *vio = tmpio; 1133 return true; 1134 } 1135 } 1136 return false; 1137 } 1138 1139 /* Macro to call the above, with local variables from the use context. */ 1140 #define VICTIM_TLB_HIT(TY, ADDR) \ 1141 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1142 (ADDR) & TARGET_PAGE_MASK) 1143 1144 /* 1145 * Return a ram_addr_t for the virtual address for execution. 1146 * 1147 * Return -1 if we can't translate and execute from an entire page 1148 * of RAM. This will force us to execute by loading and translating 1149 * one insn at a time, without caching. 1150 * 1151 * NOTE: This function will trigger an exception if the page is 1152 * not executable. 1153 */ 1154 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, 1155 void **hostp) 1156 { 1157 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1158 uintptr_t index = tlb_index(env, mmu_idx, addr); 1159 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1160 void *p; 1161 1162 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1163 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1164 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1165 index = tlb_index(env, mmu_idx, addr); 1166 entry = tlb_entry(env, mmu_idx, addr); 1167 1168 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { 1169 /* 1170 * The MMU protection covers a smaller range than a target 1171 * page, so we must redo the MMU check for every insn. 1172 */ 1173 return -1; 1174 } 1175 } 1176 assert(tlb_hit(entry->addr_code, addr)); 1177 } 1178 1179 if (unlikely(entry->addr_code & TLB_MMIO)) { 1180 /* The region is not backed by RAM. */ 1181 if (hostp) { 1182 *hostp = NULL; 1183 } 1184 return -1; 1185 } 1186 1187 p = (void *)((uintptr_t)addr + entry->addend); 1188 if (hostp) { 1189 *hostp = p; 1190 } 1191 return qemu_ram_addr_from_host_nofail(p); 1192 } 1193 1194 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1195 { 1196 return get_page_addr_code_hostp(env, addr, NULL); 1197 } 1198 1199 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, 1200 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) 1201 { 1202 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; 1203 1204 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); 1205 1206 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 1207 struct page_collection *pages 1208 = page_collection_lock(ram_addr, ram_addr + size); 1209 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); 1210 page_collection_unlock(pages); 1211 } 1212 1213 /* 1214 * Set both VGA and migration bits for simplicity and to remove 1215 * the notdirty callback faster. 1216 */ 1217 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); 1218 1219 /* We remove the notdirty callback only if the code has been flushed. */ 1220 if (!cpu_physical_memory_is_clean(ram_addr)) { 1221 trace_memory_notdirty_set_dirty(mem_vaddr); 1222 tlb_set_dirty(cpu, mem_vaddr); 1223 } 1224 } 1225 1226 /* 1227 * Probe for whether the specified guest access is permitted. If it is not 1228 * permitted then an exception will be taken in the same way as if this 1229 * were a real access (and we will not return). 1230 * If the size is 0 or the page requires I/O access, returns NULL; otherwise, 1231 * returns the address of the host page similar to tlb_vaddr_to_host(). 1232 */ 1233 void *probe_access(CPUArchState *env, target_ulong addr, int size, 1234 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1235 { 1236 uintptr_t index = tlb_index(env, mmu_idx, addr); 1237 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1238 target_ulong tlb_addr; 1239 size_t elt_ofs; 1240 int wp_access; 1241 1242 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 1243 1244 switch (access_type) { 1245 case MMU_DATA_LOAD: 1246 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1247 wp_access = BP_MEM_READ; 1248 break; 1249 case MMU_DATA_STORE: 1250 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1251 wp_access = BP_MEM_WRITE; 1252 break; 1253 case MMU_INST_FETCH: 1254 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1255 wp_access = BP_MEM_READ; 1256 break; 1257 default: 1258 g_assert_not_reached(); 1259 } 1260 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1261 1262 if (unlikely(!tlb_hit(tlb_addr, addr))) { 1263 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, 1264 addr & TARGET_PAGE_MASK)) { 1265 tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr); 1266 /* TLB resize via tlb_fill may have moved the entry. */ 1267 index = tlb_index(env, mmu_idx, addr); 1268 entry = tlb_entry(env, mmu_idx, addr); 1269 } 1270 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1271 } 1272 1273 if (!size) { 1274 return NULL; 1275 } 1276 1277 if (unlikely(tlb_addr & TLB_FLAGS_MASK)) { 1278 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1279 1280 /* Reject I/O access, or other required slow-path. */ 1281 if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { 1282 return NULL; 1283 } 1284 1285 /* Handle watchpoints. */ 1286 if (tlb_addr & TLB_WATCHPOINT) { 1287 cpu_check_watchpoint(env_cpu(env), addr, size, 1288 iotlbentry->attrs, wp_access, retaddr); 1289 } 1290 1291 /* Handle clean RAM pages. */ 1292 if (tlb_addr & TLB_NOTDIRTY) { 1293 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1294 } 1295 } 1296 1297 return (void *)((uintptr_t)addr + entry->addend); 1298 } 1299 1300 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, 1301 MMUAccessType access_type, int mmu_idx) 1302 { 1303 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1304 target_ulong tlb_addr, page; 1305 size_t elt_ofs; 1306 1307 switch (access_type) { 1308 case MMU_DATA_LOAD: 1309 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1310 break; 1311 case MMU_DATA_STORE: 1312 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1313 break; 1314 case MMU_INST_FETCH: 1315 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1316 break; 1317 default: 1318 g_assert_not_reached(); 1319 } 1320 1321 page = addr & TARGET_PAGE_MASK; 1322 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1323 1324 if (!tlb_hit_page(tlb_addr, page)) { 1325 uintptr_t index = tlb_index(env, mmu_idx, addr); 1326 1327 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) { 1328 CPUState *cs = env_cpu(env); 1329 CPUClass *cc = CPU_GET_CLASS(cs); 1330 1331 if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) { 1332 /* Non-faulting page table read failed. */ 1333 return NULL; 1334 } 1335 1336 /* TLB resize via tlb_fill may have moved the entry. */ 1337 entry = tlb_entry(env, mmu_idx, addr); 1338 } 1339 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1340 } 1341 1342 if (tlb_addr & ~TARGET_PAGE_MASK) { 1343 /* IO access */ 1344 return NULL; 1345 } 1346 1347 return (void *)((uintptr_t)addr + entry->addend); 1348 } 1349 1350 1351 #ifdef CONFIG_PLUGIN 1352 /* 1353 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. 1354 * This should be a hot path as we will have just looked this path up 1355 * in the softmmu lookup code (or helper). We don't handle re-fills or 1356 * checking the victim table. This is purely informational. 1357 * 1358 * This should never fail as the memory access being instrumented 1359 * should have just filled the TLB. 1360 */ 1361 1362 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, 1363 bool is_store, struct qemu_plugin_hwaddr *data) 1364 { 1365 CPUArchState *env = cpu->env_ptr; 1366 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1367 uintptr_t index = tlb_index(env, mmu_idx, addr); 1368 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; 1369 1370 if (likely(tlb_hit(tlb_addr, addr))) { 1371 /* We must have an iotlb entry for MMIO */ 1372 if (tlb_addr & TLB_MMIO) { 1373 CPUIOTLBEntry *iotlbentry; 1374 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1375 data->is_io = true; 1376 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1377 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1378 } else { 1379 data->is_io = false; 1380 data->v.ram.hostaddr = addr + tlbe->addend; 1381 } 1382 return true; 1383 } 1384 return false; 1385 } 1386 1387 #endif 1388 1389 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1390 * operations, or io operations to proceed. Return the host address. */ 1391 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1392 TCGMemOpIdx oi, uintptr_t retaddr) 1393 { 1394 size_t mmu_idx = get_mmuidx(oi); 1395 uintptr_t index = tlb_index(env, mmu_idx, addr); 1396 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1397 target_ulong tlb_addr = tlb_addr_write(tlbe); 1398 MemOp mop = get_memop(oi); 1399 int a_bits = get_alignment_bits(mop); 1400 int s_bits = mop & MO_SIZE; 1401 void *hostaddr; 1402 1403 /* Adjust the given return address. */ 1404 retaddr -= GETPC_ADJ; 1405 1406 /* Enforce guest required alignment. */ 1407 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1408 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1409 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1410 mmu_idx, retaddr); 1411 } 1412 1413 /* Enforce qemu required alignment. */ 1414 if (unlikely(addr & ((1 << s_bits) - 1))) { 1415 /* We get here if guest alignment was not requested, 1416 or was not enforced by cpu_unaligned_access above. 1417 We might widen the access and emulate, but for now 1418 mark an exception and exit the cpu loop. */ 1419 goto stop_the_world; 1420 } 1421 1422 /* Check TLB entry and enforce page permissions. */ 1423 if (!tlb_hit(tlb_addr, addr)) { 1424 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1425 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, 1426 mmu_idx, retaddr); 1427 index = tlb_index(env, mmu_idx, addr); 1428 tlbe = tlb_entry(env, mmu_idx, addr); 1429 } 1430 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1431 } 1432 1433 /* Notice an IO access or a needs-MMU-lookup access */ 1434 if (unlikely(tlb_addr & TLB_MMIO)) { 1435 /* There's really nothing that can be done to 1436 support this apart from stop-the-world. */ 1437 goto stop_the_world; 1438 } 1439 1440 /* Let the guest notice RMW on a write-only page. */ 1441 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1442 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1443 mmu_idx, retaddr); 1444 /* Since we don't support reads and writes to different addresses, 1445 and we do have the proper page loaded for write, this shouldn't 1446 ever return. But just in case, handle via stop-the-world. */ 1447 goto stop_the_world; 1448 } 1449 1450 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1451 1452 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1453 notdirty_write(env_cpu(env), addr, 1 << s_bits, 1454 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); 1455 } 1456 1457 return hostaddr; 1458 1459 stop_the_world: 1460 cpu_loop_exit_atomic(env_cpu(env), retaddr); 1461 } 1462 1463 /* 1464 * Load Helpers 1465 * 1466 * We support two different access types. SOFTMMU_CODE_ACCESS is 1467 * specifically for reading instructions from system memory. It is 1468 * called by the translation loop and in some helpers where the code 1469 * is disassembled. It shouldn't be called directly by guest code. 1470 */ 1471 1472 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, 1473 TCGMemOpIdx oi, uintptr_t retaddr); 1474 1475 static inline uint64_t QEMU_ALWAYS_INLINE 1476 load_memop(const void *haddr, MemOp op) 1477 { 1478 switch (op) { 1479 case MO_UB: 1480 return ldub_p(haddr); 1481 case MO_BEUW: 1482 return lduw_be_p(haddr); 1483 case MO_LEUW: 1484 return lduw_le_p(haddr); 1485 case MO_BEUL: 1486 return (uint32_t)ldl_be_p(haddr); 1487 case MO_LEUL: 1488 return (uint32_t)ldl_le_p(haddr); 1489 case MO_BEQ: 1490 return ldq_be_p(haddr); 1491 case MO_LEQ: 1492 return ldq_le_p(haddr); 1493 default: 1494 qemu_build_not_reached(); 1495 } 1496 } 1497 1498 static inline uint64_t QEMU_ALWAYS_INLINE 1499 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, 1500 uintptr_t retaddr, MemOp op, bool code_read, 1501 FullLoadHelper *full_load) 1502 { 1503 uintptr_t mmu_idx = get_mmuidx(oi); 1504 uintptr_t index = tlb_index(env, mmu_idx, addr); 1505 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1506 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1507 const size_t tlb_off = code_read ? 1508 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); 1509 const MMUAccessType access_type = 1510 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; 1511 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1512 void *haddr; 1513 uint64_t res; 1514 size_t size = memop_size(op); 1515 1516 /* Handle CPU specific unaligned behaviour */ 1517 if (addr & ((1 << a_bits) - 1)) { 1518 cpu_unaligned_access(env_cpu(env), addr, access_type, 1519 mmu_idx, retaddr); 1520 } 1521 1522 /* If the TLB entry is for a different page, reload and try again. */ 1523 if (!tlb_hit(tlb_addr, addr)) { 1524 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1525 addr & TARGET_PAGE_MASK)) { 1526 tlb_fill(env_cpu(env), addr, size, 1527 access_type, mmu_idx, retaddr); 1528 index = tlb_index(env, mmu_idx, addr); 1529 entry = tlb_entry(env, mmu_idx, addr); 1530 } 1531 tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1532 tlb_addr &= ~TLB_INVALID_MASK; 1533 } 1534 1535 /* Handle anything that isn't just a straight memory access. */ 1536 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1537 CPUIOTLBEntry *iotlbentry; 1538 bool need_swap; 1539 1540 /* For anything that is unaligned, recurse through full_load. */ 1541 if ((addr & (size - 1)) != 0) { 1542 goto do_unaligned_access; 1543 } 1544 1545 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1546 1547 /* Handle watchpoints. */ 1548 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1549 /* On watchpoint hit, this will longjmp out. */ 1550 cpu_check_watchpoint(env_cpu(env), addr, size, 1551 iotlbentry->attrs, BP_MEM_READ, retaddr); 1552 } 1553 1554 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1555 1556 /* Handle I/O access. */ 1557 if (likely(tlb_addr & TLB_MMIO)) { 1558 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, 1559 access_type, op ^ (need_swap * MO_BSWAP)); 1560 } 1561 1562 haddr = (void *)((uintptr_t)addr + entry->addend); 1563 1564 /* 1565 * Keep these two load_memop separate to ensure that the compiler 1566 * is able to fold the entire function to a single instruction. 1567 * There is a build-time assert inside to remind you of this. ;-) 1568 */ 1569 if (unlikely(need_swap)) { 1570 return load_memop(haddr, op ^ MO_BSWAP); 1571 } 1572 return load_memop(haddr, op); 1573 } 1574 1575 /* Handle slow unaligned access (it spans two pages or IO). */ 1576 if (size > 1 1577 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1578 >= TARGET_PAGE_SIZE)) { 1579 target_ulong addr1, addr2; 1580 uint64_t r1, r2; 1581 unsigned shift; 1582 do_unaligned_access: 1583 addr1 = addr & ~((target_ulong)size - 1); 1584 addr2 = addr1 + size; 1585 r1 = full_load(env, addr1, oi, retaddr); 1586 r2 = full_load(env, addr2, oi, retaddr); 1587 shift = (addr & (size - 1)) * 8; 1588 1589 if (memop_big_endian(op)) { 1590 /* Big-endian combine. */ 1591 res = (r1 << shift) | (r2 >> ((size * 8) - shift)); 1592 } else { 1593 /* Little-endian combine. */ 1594 res = (r1 >> shift) | (r2 << ((size * 8) - shift)); 1595 } 1596 return res & MAKE_64BIT_MASK(0, size * 8); 1597 } 1598 1599 haddr = (void *)((uintptr_t)addr + entry->addend); 1600 return load_memop(haddr, op); 1601 } 1602 1603 /* 1604 * For the benefit of TCG generated code, we want to avoid the 1605 * complication of ABI-specific return type promotion and always 1606 * return a value extended to the register size of the host. This is 1607 * tcg_target_long, except in the case of a 32-bit host and 64-bit 1608 * data, and for that we always have uint64_t. 1609 * 1610 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. 1611 */ 1612 1613 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, 1614 TCGMemOpIdx oi, uintptr_t retaddr) 1615 { 1616 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); 1617 } 1618 1619 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, 1620 TCGMemOpIdx oi, uintptr_t retaddr) 1621 { 1622 return full_ldub_mmu(env, addr, oi, retaddr); 1623 } 1624 1625 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1626 TCGMemOpIdx oi, uintptr_t retaddr) 1627 { 1628 return load_helper(env, addr, oi, retaddr, MO_LEUW, false, 1629 full_le_lduw_mmu); 1630 } 1631 1632 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1633 TCGMemOpIdx oi, uintptr_t retaddr) 1634 { 1635 return full_le_lduw_mmu(env, addr, oi, retaddr); 1636 } 1637 1638 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1639 TCGMemOpIdx oi, uintptr_t retaddr) 1640 { 1641 return load_helper(env, addr, oi, retaddr, MO_BEUW, false, 1642 full_be_lduw_mmu); 1643 } 1644 1645 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1646 TCGMemOpIdx oi, uintptr_t retaddr) 1647 { 1648 return full_be_lduw_mmu(env, addr, oi, retaddr); 1649 } 1650 1651 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1652 TCGMemOpIdx oi, uintptr_t retaddr) 1653 { 1654 return load_helper(env, addr, oi, retaddr, MO_LEUL, false, 1655 full_le_ldul_mmu); 1656 } 1657 1658 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1659 TCGMemOpIdx oi, uintptr_t retaddr) 1660 { 1661 return full_le_ldul_mmu(env, addr, oi, retaddr); 1662 } 1663 1664 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1665 TCGMemOpIdx oi, uintptr_t retaddr) 1666 { 1667 return load_helper(env, addr, oi, retaddr, MO_BEUL, false, 1668 full_be_ldul_mmu); 1669 } 1670 1671 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1672 TCGMemOpIdx oi, uintptr_t retaddr) 1673 { 1674 return full_be_ldul_mmu(env, addr, oi, retaddr); 1675 } 1676 1677 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, 1678 TCGMemOpIdx oi, uintptr_t retaddr) 1679 { 1680 return load_helper(env, addr, oi, retaddr, MO_LEQ, false, 1681 helper_le_ldq_mmu); 1682 } 1683 1684 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, 1685 TCGMemOpIdx oi, uintptr_t retaddr) 1686 { 1687 return load_helper(env, addr, oi, retaddr, MO_BEQ, false, 1688 helper_be_ldq_mmu); 1689 } 1690 1691 /* 1692 * Provide signed versions of the load routines as well. We can of course 1693 * avoid this for 64-bit data, or for 32-bit data on 32-bit host. 1694 */ 1695 1696 1697 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, 1698 TCGMemOpIdx oi, uintptr_t retaddr) 1699 { 1700 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); 1701 } 1702 1703 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, 1704 TCGMemOpIdx oi, uintptr_t retaddr) 1705 { 1706 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); 1707 } 1708 1709 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, 1710 TCGMemOpIdx oi, uintptr_t retaddr) 1711 { 1712 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); 1713 } 1714 1715 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, 1716 TCGMemOpIdx oi, uintptr_t retaddr) 1717 { 1718 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); 1719 } 1720 1721 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, 1722 TCGMemOpIdx oi, uintptr_t retaddr) 1723 { 1724 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); 1725 } 1726 1727 /* 1728 * Load helpers for cpu_ldst.h. 1729 */ 1730 1731 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, 1732 int mmu_idx, uintptr_t retaddr, 1733 MemOp op, FullLoadHelper *full_load) 1734 { 1735 uint16_t meminfo; 1736 TCGMemOpIdx oi; 1737 uint64_t ret; 1738 1739 meminfo = trace_mem_get_info(op, mmu_idx, false); 1740 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 1741 1742 op &= ~MO_SIGN; 1743 oi = make_memop_idx(op, mmu_idx); 1744 ret = full_load(env, addr, oi, retaddr); 1745 1746 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 1747 1748 return ret; 1749 } 1750 1751 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1752 int mmu_idx, uintptr_t ra) 1753 { 1754 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu); 1755 } 1756 1757 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1758 int mmu_idx, uintptr_t ra) 1759 { 1760 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB, 1761 full_ldub_mmu); 1762 } 1763 1764 uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1765 int mmu_idx, uintptr_t ra) 1766 { 1767 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW, 1768 MO_TE == MO_LE 1769 ? full_le_lduw_mmu : full_be_lduw_mmu); 1770 } 1771 1772 int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1773 int mmu_idx, uintptr_t ra) 1774 { 1775 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW, 1776 MO_TE == MO_LE 1777 ? full_le_lduw_mmu : full_be_lduw_mmu); 1778 } 1779 1780 uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1781 int mmu_idx, uintptr_t ra) 1782 { 1783 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL, 1784 MO_TE == MO_LE 1785 ? full_le_ldul_mmu : full_be_ldul_mmu); 1786 } 1787 1788 uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1789 int mmu_idx, uintptr_t ra) 1790 { 1791 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ, 1792 MO_TE == MO_LE 1793 ? helper_le_ldq_mmu : helper_be_ldq_mmu); 1794 } 1795 1796 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, 1797 uintptr_t retaddr) 1798 { 1799 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1800 } 1801 1802 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1803 { 1804 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1805 } 1806 1807 uint32_t cpu_lduw_data_ra(CPUArchState *env, target_ulong ptr, 1808 uintptr_t retaddr) 1809 { 1810 return cpu_lduw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1811 } 1812 1813 int cpu_ldsw_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1814 { 1815 return cpu_ldsw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1816 } 1817 1818 uint32_t cpu_ldl_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1819 { 1820 return cpu_ldl_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1821 } 1822 1823 uint64_t cpu_ldq_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1824 { 1825 return cpu_ldq_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1826 } 1827 1828 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) 1829 { 1830 return cpu_ldub_data_ra(env, ptr, 0); 1831 } 1832 1833 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) 1834 { 1835 return cpu_ldsb_data_ra(env, ptr, 0); 1836 } 1837 1838 uint32_t cpu_lduw_data(CPUArchState *env, target_ulong ptr) 1839 { 1840 return cpu_lduw_data_ra(env, ptr, 0); 1841 } 1842 1843 int cpu_ldsw_data(CPUArchState *env, target_ulong ptr) 1844 { 1845 return cpu_ldsw_data_ra(env, ptr, 0); 1846 } 1847 1848 uint32_t cpu_ldl_data(CPUArchState *env, target_ulong ptr) 1849 { 1850 return cpu_ldl_data_ra(env, ptr, 0); 1851 } 1852 1853 uint64_t cpu_ldq_data(CPUArchState *env, target_ulong ptr) 1854 { 1855 return cpu_ldq_data_ra(env, ptr, 0); 1856 } 1857 1858 /* 1859 * Store Helpers 1860 */ 1861 1862 static inline void QEMU_ALWAYS_INLINE 1863 store_memop(void *haddr, uint64_t val, MemOp op) 1864 { 1865 switch (op) { 1866 case MO_UB: 1867 stb_p(haddr, val); 1868 break; 1869 case MO_BEUW: 1870 stw_be_p(haddr, val); 1871 break; 1872 case MO_LEUW: 1873 stw_le_p(haddr, val); 1874 break; 1875 case MO_BEUL: 1876 stl_be_p(haddr, val); 1877 break; 1878 case MO_LEUL: 1879 stl_le_p(haddr, val); 1880 break; 1881 case MO_BEQ: 1882 stq_be_p(haddr, val); 1883 break; 1884 case MO_LEQ: 1885 stq_le_p(haddr, val); 1886 break; 1887 default: 1888 qemu_build_not_reached(); 1889 } 1890 } 1891 1892 static inline void QEMU_ALWAYS_INLINE 1893 store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 1894 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) 1895 { 1896 uintptr_t mmu_idx = get_mmuidx(oi); 1897 uintptr_t index = tlb_index(env, mmu_idx, addr); 1898 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1899 target_ulong tlb_addr = tlb_addr_write(entry); 1900 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 1901 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1902 void *haddr; 1903 size_t size = memop_size(op); 1904 1905 /* Handle CPU specific unaligned behaviour */ 1906 if (addr & ((1 << a_bits) - 1)) { 1907 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1908 mmu_idx, retaddr); 1909 } 1910 1911 /* If the TLB entry is for a different page, reload and try again. */ 1912 if (!tlb_hit(tlb_addr, addr)) { 1913 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1914 addr & TARGET_PAGE_MASK)) { 1915 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, 1916 mmu_idx, retaddr); 1917 index = tlb_index(env, mmu_idx, addr); 1918 entry = tlb_entry(env, mmu_idx, addr); 1919 } 1920 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; 1921 } 1922 1923 /* Handle anything that isn't just a straight memory access. */ 1924 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1925 CPUIOTLBEntry *iotlbentry; 1926 bool need_swap; 1927 1928 /* For anything that is unaligned, recurse through byte stores. */ 1929 if ((addr & (size - 1)) != 0) { 1930 goto do_unaligned_access; 1931 } 1932 1933 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1934 1935 /* Handle watchpoints. */ 1936 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1937 /* On watchpoint hit, this will longjmp out. */ 1938 cpu_check_watchpoint(env_cpu(env), addr, size, 1939 iotlbentry->attrs, BP_MEM_WRITE, retaddr); 1940 } 1941 1942 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1943 1944 /* Handle I/O access. */ 1945 if (tlb_addr & TLB_MMIO) { 1946 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, 1947 op ^ (need_swap * MO_BSWAP)); 1948 return; 1949 } 1950 1951 /* Ignore writes to ROM. */ 1952 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { 1953 return; 1954 } 1955 1956 /* Handle clean RAM pages. */ 1957 if (tlb_addr & TLB_NOTDIRTY) { 1958 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1959 } 1960 1961 haddr = (void *)((uintptr_t)addr + entry->addend); 1962 1963 /* 1964 * Keep these two store_memop separate to ensure that the compiler 1965 * is able to fold the entire function to a single instruction. 1966 * There is a build-time assert inside to remind you of this. ;-) 1967 */ 1968 if (unlikely(need_swap)) { 1969 store_memop(haddr, val, op ^ MO_BSWAP); 1970 } else { 1971 store_memop(haddr, val, op); 1972 } 1973 return; 1974 } 1975 1976 /* Handle slow unaligned access (it spans two pages or IO). */ 1977 if (size > 1 1978 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1979 >= TARGET_PAGE_SIZE)) { 1980 int i; 1981 uintptr_t index2; 1982 CPUTLBEntry *entry2; 1983 target_ulong page2, tlb_addr2; 1984 size_t size2; 1985 1986 do_unaligned_access: 1987 /* 1988 * Ensure the second page is in the TLB. Note that the first page 1989 * is already guaranteed to be filled, and that the second page 1990 * cannot evict the first. 1991 */ 1992 page2 = (addr + size) & TARGET_PAGE_MASK; 1993 size2 = (addr + size) & ~TARGET_PAGE_MASK; 1994 index2 = tlb_index(env, mmu_idx, page2); 1995 entry2 = tlb_entry(env, mmu_idx, page2); 1996 tlb_addr2 = tlb_addr_write(entry2); 1997 if (!tlb_hit_page(tlb_addr2, page2)) { 1998 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { 1999 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, 2000 mmu_idx, retaddr); 2001 index2 = tlb_index(env, mmu_idx, page2); 2002 entry2 = tlb_entry(env, mmu_idx, page2); 2003 } 2004 tlb_addr2 = tlb_addr_write(entry2); 2005 } 2006 2007 /* 2008 * Handle watchpoints. Since this may trap, all checks 2009 * must happen before any store. 2010 */ 2011 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2012 cpu_check_watchpoint(env_cpu(env), addr, size - size2, 2013 env_tlb(env)->d[mmu_idx].iotlb[index].attrs, 2014 BP_MEM_WRITE, retaddr); 2015 } 2016 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { 2017 cpu_check_watchpoint(env_cpu(env), page2, size2, 2018 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, 2019 BP_MEM_WRITE, retaddr); 2020 } 2021 2022 /* 2023 * XXX: not efficient, but simple. 2024 * This loop must go in the forward direction to avoid issues 2025 * with self-modifying code in Windows 64-bit. 2026 */ 2027 for (i = 0; i < size; ++i) { 2028 uint8_t val8; 2029 if (memop_big_endian(op)) { 2030 /* Big-endian extract. */ 2031 val8 = val >> (((size - 1) * 8) - (i * 8)); 2032 } else { 2033 /* Little-endian extract. */ 2034 val8 = val >> (i * 8); 2035 } 2036 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2037 } 2038 return; 2039 } 2040 2041 haddr = (void *)((uintptr_t)addr + entry->addend); 2042 store_memop(haddr, val, op); 2043 } 2044 2045 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, 2046 TCGMemOpIdx oi, uintptr_t retaddr) 2047 { 2048 store_helper(env, addr, val, oi, retaddr, MO_UB); 2049 } 2050 2051 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2052 TCGMemOpIdx oi, uintptr_t retaddr) 2053 { 2054 store_helper(env, addr, val, oi, retaddr, MO_LEUW); 2055 } 2056 2057 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2058 TCGMemOpIdx oi, uintptr_t retaddr) 2059 { 2060 store_helper(env, addr, val, oi, retaddr, MO_BEUW); 2061 } 2062 2063 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2064 TCGMemOpIdx oi, uintptr_t retaddr) 2065 { 2066 store_helper(env, addr, val, oi, retaddr, MO_LEUL); 2067 } 2068 2069 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2070 TCGMemOpIdx oi, uintptr_t retaddr) 2071 { 2072 store_helper(env, addr, val, oi, retaddr, MO_BEUL); 2073 } 2074 2075 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2076 TCGMemOpIdx oi, uintptr_t retaddr) 2077 { 2078 store_helper(env, addr, val, oi, retaddr, MO_LEQ); 2079 } 2080 2081 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2082 TCGMemOpIdx oi, uintptr_t retaddr) 2083 { 2084 store_helper(env, addr, val, oi, retaddr, MO_BEQ); 2085 } 2086 2087 /* 2088 * Store Helpers for cpu_ldst.h 2089 */ 2090 2091 static inline void QEMU_ALWAYS_INLINE 2092 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2093 int mmu_idx, uintptr_t retaddr, MemOp op) 2094 { 2095 TCGMemOpIdx oi; 2096 uint16_t meminfo; 2097 2098 meminfo = trace_mem_get_info(op, mmu_idx, true); 2099 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2100 2101 oi = make_memop_idx(op, mmu_idx); 2102 store_helper(env, addr, val, oi, retaddr, op); 2103 2104 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2105 } 2106 2107 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2108 int mmu_idx, uintptr_t retaddr) 2109 { 2110 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); 2111 } 2112 2113 void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2114 int mmu_idx, uintptr_t retaddr) 2115 { 2116 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW); 2117 } 2118 2119 void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2120 int mmu_idx, uintptr_t retaddr) 2121 { 2122 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL); 2123 } 2124 2125 void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2126 int mmu_idx, uintptr_t retaddr) 2127 { 2128 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ); 2129 } 2130 2131 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, 2132 uint32_t val, uintptr_t retaddr) 2133 { 2134 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2135 } 2136 2137 void cpu_stw_data_ra(CPUArchState *env, target_ulong ptr, 2138 uint32_t val, uintptr_t retaddr) 2139 { 2140 cpu_stw_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2141 } 2142 2143 void cpu_stl_data_ra(CPUArchState *env, target_ulong ptr, 2144 uint32_t val, uintptr_t retaddr) 2145 { 2146 cpu_stl_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2147 } 2148 2149 void cpu_stq_data_ra(CPUArchState *env, target_ulong ptr, 2150 uint64_t val, uintptr_t retaddr) 2151 { 2152 cpu_stq_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2153 } 2154 2155 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2156 { 2157 cpu_stb_data_ra(env, ptr, val, 0); 2158 } 2159 2160 void cpu_stw_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2161 { 2162 cpu_stw_data_ra(env, ptr, val, 0); 2163 } 2164 2165 void cpu_stl_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2166 { 2167 cpu_stl_data_ra(env, ptr, val, 0); 2168 } 2169 2170 void cpu_stq_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2171 { 2172 cpu_stq_data_ra(env, ptr, val, 0); 2173 } 2174 2175 /* First set of helpers allows passing in of OI and RETADDR. This makes 2176 them callable from other helpers. */ 2177 2178 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 2179 #define ATOMIC_NAME(X) \ 2180 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 2181 #define ATOMIC_MMU_DECLS 2182 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) 2183 #define ATOMIC_MMU_CLEANUP 2184 #define ATOMIC_MMU_IDX get_mmuidx(oi) 2185 2186 #include "atomic_common.inc.c" 2187 2188 #define DATA_SIZE 1 2189 #include "atomic_template.h" 2190 2191 #define DATA_SIZE 2 2192 #include "atomic_template.h" 2193 2194 #define DATA_SIZE 4 2195 #include "atomic_template.h" 2196 2197 #ifdef CONFIG_ATOMIC64 2198 #define DATA_SIZE 8 2199 #include "atomic_template.h" 2200 #endif 2201 2202 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 2203 #define DATA_SIZE 16 2204 #include "atomic_template.h" 2205 #endif 2206 2207 /* Second set of helpers are directly callable from TCG as helpers. */ 2208 2209 #undef EXTRA_ARGS 2210 #undef ATOMIC_NAME 2211 #undef ATOMIC_MMU_LOOKUP 2212 #define EXTRA_ARGS , TCGMemOpIdx oi 2213 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 2214 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) 2215 2216 #define DATA_SIZE 1 2217 #include "atomic_template.h" 2218 2219 #define DATA_SIZE 2 2220 #include "atomic_template.h" 2221 2222 #define DATA_SIZE 4 2223 #include "atomic_template.h" 2224 2225 #ifdef CONFIG_ATOMIC64 2226 #define DATA_SIZE 8 2227 #include "atomic_template.h" 2228 #endif 2229 #undef ATOMIC_MMU_IDX 2230 2231 /* Code access functions. */ 2232 2233 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, 2234 TCGMemOpIdx oi, uintptr_t retaddr) 2235 { 2236 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); 2237 } 2238 2239 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) 2240 { 2241 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); 2242 return full_ldub_code(env, addr, oi, 0); 2243 } 2244 2245 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, 2246 TCGMemOpIdx oi, uintptr_t retaddr) 2247 { 2248 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); 2249 } 2250 2251 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) 2252 { 2253 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); 2254 return full_lduw_code(env, addr, oi, 0); 2255 } 2256 2257 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, 2258 TCGMemOpIdx oi, uintptr_t retaddr) 2259 { 2260 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); 2261 } 2262 2263 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) 2264 { 2265 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); 2266 return full_ldl_code(env, addr, oi, 0); 2267 } 2268 2269 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, 2270 TCGMemOpIdx oi, uintptr_t retaddr) 2271 { 2272 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code); 2273 } 2274 2275 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) 2276 { 2277 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true)); 2278 return full_ldq_code(env, addr, oi, 0); 2279 } 2280