1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "hw/core/tcg-cpu-ops.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/cputlb.h" 27 #include "exec/memory-internal.h" 28 #include "exec/ram_addr.h" 29 #include "tcg/tcg.h" 30 #include "qemu/error-report.h" 31 #include "exec/log.h" 32 #include "exec/helper-proto.h" 33 #include "qemu/atomic.h" 34 #include "qemu/atomic128.h" 35 #include "exec/translate-all.h" 36 #include "trace/trace-root.h" 37 #include "trace/mem.h" 38 #include "tb-hash.h" 39 #include "internal.h" 40 #ifdef CONFIG_PLUGIN 41 #include "qemu/plugin-memory.h" 42 #endif 43 44 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 45 /* #define DEBUG_TLB */ 46 /* #define DEBUG_TLB_LOG */ 47 48 #ifdef DEBUG_TLB 49 # define DEBUG_TLB_GATE 1 50 # ifdef DEBUG_TLB_LOG 51 # define DEBUG_TLB_LOG_GATE 1 52 # else 53 # define DEBUG_TLB_LOG_GATE 0 54 # endif 55 #else 56 # define DEBUG_TLB_GATE 0 57 # define DEBUG_TLB_LOG_GATE 0 58 #endif 59 60 #define tlb_debug(fmt, ...) do { \ 61 if (DEBUG_TLB_LOG_GATE) { \ 62 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 63 ## __VA_ARGS__); \ 64 } else if (DEBUG_TLB_GATE) { \ 65 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 66 } \ 67 } while (0) 68 69 #define assert_cpu_is_self(cpu) do { \ 70 if (DEBUG_TLB_GATE) { \ 71 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 72 } \ 73 } while (0) 74 75 /* run_on_cpu_data.target_ptr should always be big enough for a 76 * target_ulong even on 32 bit builds */ 77 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 78 79 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 80 */ 81 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 82 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 83 84 static inline size_t tlb_n_entries(CPUTLBDescFast *fast) 85 { 86 return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1; 87 } 88 89 static inline size_t sizeof_tlb(CPUTLBDescFast *fast) 90 { 91 return fast->mask + (1 << CPU_TLB_ENTRY_BITS); 92 } 93 94 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, 95 size_t max_entries) 96 { 97 desc->window_begin_ns = ns; 98 desc->window_max_entries = max_entries; 99 } 100 101 static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) 102 { 103 unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr); 104 105 for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { 106 qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL); 107 } 108 } 109 110 static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr) 111 { 112 /* Discard jump cache entries for any tb which might potentially 113 overlap the flushed page. */ 114 tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE); 115 tb_jmp_cache_clear_page(cpu, addr); 116 } 117 118 /** 119 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 120 * @desc: The CPUTLBDesc portion of the TLB 121 * @fast: The CPUTLBDescFast portion of the same TLB 122 * 123 * Called with tlb_lock_held. 124 * 125 * We have two main constraints when resizing a TLB: (1) we only resize it 126 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 127 * the array or unnecessarily flushing it), which means we do not control how 128 * frequently the resizing can occur; (2) we don't have access to the guest's 129 * future scheduling decisions, and therefore have to decide the magnitude of 130 * the resize based on past observations. 131 * 132 * In general, a memory-hungry process can benefit greatly from an appropriately 133 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 134 * we just have to make the TLB as large as possible; while an oversized TLB 135 * results in minimal TLB miss rates, it also takes longer to be flushed 136 * (flushes can be _very_ frequent), and the reduced locality can also hurt 137 * performance. 138 * 139 * To achieve near-optimal performance for all kinds of workloads, we: 140 * 141 * 1. Aggressively increase the size of the TLB when the use rate of the 142 * TLB being flushed is high, since it is likely that in the near future this 143 * memory-hungry process will execute again, and its memory hungriness will 144 * probably be similar. 145 * 146 * 2. Slowly reduce the size of the TLB as the use rate declines over a 147 * reasonably large time window. The rationale is that if in such a time window 148 * we have not observed a high TLB use rate, it is likely that we won't observe 149 * it in the near future. In that case, once a time window expires we downsize 150 * the TLB to match the maximum use rate observed in the window. 151 * 152 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 153 * since in that range performance is likely near-optimal. Recall that the TLB 154 * is direct mapped, so we want the use rate to be low (or at least not too 155 * high), since otherwise we are likely to have a significant amount of 156 * conflict misses. 157 */ 158 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast, 159 int64_t now) 160 { 161 size_t old_size = tlb_n_entries(fast); 162 size_t rate; 163 size_t new_size = old_size; 164 int64_t window_len_ms = 100; 165 int64_t window_len_ns = window_len_ms * 1000 * 1000; 166 bool window_expired = now > desc->window_begin_ns + window_len_ns; 167 168 if (desc->n_used_entries > desc->window_max_entries) { 169 desc->window_max_entries = desc->n_used_entries; 170 } 171 rate = desc->window_max_entries * 100 / old_size; 172 173 if (rate > 70) { 174 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 175 } else if (rate < 30 && window_expired) { 176 size_t ceil = pow2ceil(desc->window_max_entries); 177 size_t expected_rate = desc->window_max_entries * 100 / ceil; 178 179 /* 180 * Avoid undersizing when the max number of entries seen is just below 181 * a pow2. For instance, if max_entries == 1025, the expected use rate 182 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 183 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 184 * later. Thus, make sure that the expected use rate remains below 70%. 185 * (and since we double the size, that means the lowest rate we'd 186 * expect to get is 35%, which is still in the 30-70% range where 187 * we consider that the size is appropriate.) 188 */ 189 if (expected_rate > 70) { 190 ceil *= 2; 191 } 192 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 193 } 194 195 if (new_size == old_size) { 196 if (window_expired) { 197 tlb_window_reset(desc, now, desc->n_used_entries); 198 } 199 return; 200 } 201 202 g_free(fast->table); 203 g_free(desc->iotlb); 204 205 tlb_window_reset(desc, now, 0); 206 /* desc->n_used_entries is cleared by the caller */ 207 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 208 fast->table = g_try_new(CPUTLBEntry, new_size); 209 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 210 211 /* 212 * If the allocations fail, try smaller sizes. We just freed some 213 * memory, so going back to half of new_size has a good chance of working. 214 * Increased memory pressure elsewhere in the system might cause the 215 * allocations to fail though, so we progressively reduce the allocation 216 * size, aborting if we cannot even allocate the smallest TLB we support. 217 */ 218 while (fast->table == NULL || desc->iotlb == NULL) { 219 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 220 error_report("%s: %s", __func__, strerror(errno)); 221 abort(); 222 } 223 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 224 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 225 226 g_free(fast->table); 227 g_free(desc->iotlb); 228 fast->table = g_try_new(CPUTLBEntry, new_size); 229 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 230 } 231 } 232 233 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) 234 { 235 desc->n_used_entries = 0; 236 desc->large_page_addr = -1; 237 desc->large_page_mask = -1; 238 desc->vindex = 0; 239 memset(fast->table, -1, sizeof_tlb(fast)); 240 memset(desc->vtable, -1, sizeof(desc->vtable)); 241 } 242 243 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx, 244 int64_t now) 245 { 246 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; 247 CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx]; 248 249 tlb_mmu_resize_locked(desc, fast, now); 250 tlb_mmu_flush_locked(desc, fast); 251 } 252 253 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now) 254 { 255 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 256 257 tlb_window_reset(desc, now, 0); 258 desc->n_used_entries = 0; 259 fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 260 fast->table = g_new(CPUTLBEntry, n_entries); 261 desc->iotlb = g_new(CPUIOTLBEntry, n_entries); 262 tlb_mmu_flush_locked(desc, fast); 263 } 264 265 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 266 { 267 env_tlb(env)->d[mmu_idx].n_used_entries++; 268 } 269 270 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 271 { 272 env_tlb(env)->d[mmu_idx].n_used_entries--; 273 } 274 275 void tlb_init(CPUState *cpu) 276 { 277 CPUArchState *env = cpu->env_ptr; 278 int64_t now = get_clock_realtime(); 279 int i; 280 281 qemu_spin_init(&env_tlb(env)->c.lock); 282 283 /* All tlbs are initialized flushed. */ 284 env_tlb(env)->c.dirty = 0; 285 286 for (i = 0; i < NB_MMU_MODES; i++) { 287 tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now); 288 } 289 } 290 291 void tlb_destroy(CPUState *cpu) 292 { 293 CPUArchState *env = cpu->env_ptr; 294 int i; 295 296 qemu_spin_destroy(&env_tlb(env)->c.lock); 297 for (i = 0; i < NB_MMU_MODES; i++) { 298 CPUTLBDesc *desc = &env_tlb(env)->d[i]; 299 CPUTLBDescFast *fast = &env_tlb(env)->f[i]; 300 301 g_free(fast->table); 302 g_free(desc->iotlb); 303 } 304 } 305 306 /* flush_all_helper: run fn across all cpus 307 * 308 * If the wait flag is set then the src cpu's helper will be queued as 309 * "safe" work and the loop exited creating a synchronisation point 310 * where all queued work will be finished before execution starts 311 * again. 312 */ 313 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 314 run_on_cpu_data d) 315 { 316 CPUState *cpu; 317 318 CPU_FOREACH(cpu) { 319 if (cpu != src) { 320 async_run_on_cpu(cpu, fn, d); 321 } 322 } 323 } 324 325 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 326 { 327 CPUState *cpu; 328 size_t full = 0, part = 0, elide = 0; 329 330 CPU_FOREACH(cpu) { 331 CPUArchState *env = cpu->env_ptr; 332 333 full += qatomic_read(&env_tlb(env)->c.full_flush_count); 334 part += qatomic_read(&env_tlb(env)->c.part_flush_count); 335 elide += qatomic_read(&env_tlb(env)->c.elide_flush_count); 336 } 337 *pfull = full; 338 *ppart = part; 339 *pelide = elide; 340 } 341 342 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 343 { 344 CPUArchState *env = cpu->env_ptr; 345 uint16_t asked = data.host_int; 346 uint16_t all_dirty, work, to_clean; 347 int64_t now = get_clock_realtime(); 348 349 assert_cpu_is_self(cpu); 350 351 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 352 353 qemu_spin_lock(&env_tlb(env)->c.lock); 354 355 all_dirty = env_tlb(env)->c.dirty; 356 to_clean = asked & all_dirty; 357 all_dirty &= ~to_clean; 358 env_tlb(env)->c.dirty = all_dirty; 359 360 for (work = to_clean; work != 0; work &= work - 1) { 361 int mmu_idx = ctz32(work); 362 tlb_flush_one_mmuidx_locked(env, mmu_idx, now); 363 } 364 365 qemu_spin_unlock(&env_tlb(env)->c.lock); 366 367 cpu_tb_jmp_cache_clear(cpu); 368 369 if (to_clean == ALL_MMUIDX_BITS) { 370 qatomic_set(&env_tlb(env)->c.full_flush_count, 371 env_tlb(env)->c.full_flush_count + 1); 372 } else { 373 qatomic_set(&env_tlb(env)->c.part_flush_count, 374 env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); 375 if (to_clean != asked) { 376 qatomic_set(&env_tlb(env)->c.elide_flush_count, 377 env_tlb(env)->c.elide_flush_count + 378 ctpop16(asked & ~to_clean)); 379 } 380 } 381 } 382 383 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 384 { 385 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 386 387 if (cpu->created && !qemu_cpu_is_self(cpu)) { 388 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 389 RUN_ON_CPU_HOST_INT(idxmap)); 390 } else { 391 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 392 } 393 } 394 395 void tlb_flush(CPUState *cpu) 396 { 397 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 398 } 399 400 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 401 { 402 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 403 404 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 405 406 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 407 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 408 } 409 410 void tlb_flush_all_cpus(CPUState *src_cpu) 411 { 412 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 413 } 414 415 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 416 { 417 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 418 419 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 420 421 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 422 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 423 } 424 425 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 426 { 427 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 428 } 429 430 static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry, 431 target_ulong page, target_ulong mask) 432 { 433 page &= mask; 434 mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK; 435 436 return (page == (tlb_entry->addr_read & mask) || 437 page == (tlb_addr_write(tlb_entry) & mask) || 438 page == (tlb_entry->addr_code & mask)); 439 } 440 441 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 442 target_ulong page) 443 { 444 return tlb_hit_page_mask_anyprot(tlb_entry, page, -1); 445 } 446 447 /** 448 * tlb_entry_is_empty - return true if the entry is not in use 449 * @te: pointer to CPUTLBEntry 450 */ 451 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 452 { 453 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 454 } 455 456 /* Called with tlb_c.lock held */ 457 static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry, 458 target_ulong page, 459 target_ulong mask) 460 { 461 if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) { 462 memset(tlb_entry, -1, sizeof(*tlb_entry)); 463 return true; 464 } 465 return false; 466 } 467 468 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 469 target_ulong page) 470 { 471 return tlb_flush_entry_mask_locked(tlb_entry, page, -1); 472 } 473 474 /* Called with tlb_c.lock held */ 475 static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx, 476 target_ulong page, 477 target_ulong mask) 478 { 479 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; 480 int k; 481 482 assert_cpu_is_self(env_cpu(env)); 483 for (k = 0; k < CPU_VTLB_SIZE; k++) { 484 if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) { 485 tlb_n_used_entries_dec(env, mmu_idx); 486 } 487 } 488 } 489 490 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 491 target_ulong page) 492 { 493 tlb_flush_vtlb_page_mask_locked(env, mmu_idx, page, -1); 494 } 495 496 static void tlb_flush_page_locked(CPUArchState *env, int midx, 497 target_ulong page) 498 { 499 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; 500 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; 501 502 /* Check if we need to flush due to large pages. */ 503 if ((page & lp_mask) == lp_addr) { 504 tlb_debug("forcing full flush midx %d (" 505 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 506 midx, lp_addr, lp_mask); 507 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); 508 } else { 509 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 510 tlb_n_used_entries_dec(env, midx); 511 } 512 tlb_flush_vtlb_page_locked(env, midx, page); 513 } 514 } 515 516 /** 517 * tlb_flush_page_by_mmuidx_async_0: 518 * @cpu: cpu on which to flush 519 * @addr: page of virtual address to flush 520 * @idxmap: set of mmu_idx to flush 521 * 522 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page 523 * at @addr from the tlbs indicated by @idxmap from @cpu. 524 */ 525 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, 526 target_ulong addr, 527 uint16_t idxmap) 528 { 529 CPUArchState *env = cpu->env_ptr; 530 int mmu_idx; 531 532 assert_cpu_is_self(cpu); 533 534 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); 535 536 qemu_spin_lock(&env_tlb(env)->c.lock); 537 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 538 if ((idxmap >> mmu_idx) & 1) { 539 tlb_flush_page_locked(env, mmu_idx, addr); 540 } 541 } 542 qemu_spin_unlock(&env_tlb(env)->c.lock); 543 544 tb_flush_jmp_cache(cpu, addr); 545 } 546 547 /** 548 * tlb_flush_page_by_mmuidx_async_1: 549 * @cpu: cpu on which to flush 550 * @data: encoded addr + idxmap 551 * 552 * Helper for tlb_flush_page_by_mmuidx and friends, called through 553 * async_run_on_cpu. The idxmap parameter is encoded in the page 554 * offset of the target_ptr field. This limits the set of mmu_idx 555 * that can be passed via this method. 556 */ 557 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, 558 run_on_cpu_data data) 559 { 560 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; 561 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; 562 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; 563 564 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 565 } 566 567 typedef struct { 568 target_ulong addr; 569 uint16_t idxmap; 570 } TLBFlushPageByMMUIdxData; 571 572 /** 573 * tlb_flush_page_by_mmuidx_async_2: 574 * @cpu: cpu on which to flush 575 * @data: allocated addr + idxmap 576 * 577 * Helper for tlb_flush_page_by_mmuidx and friends, called through 578 * async_run_on_cpu. The addr+idxmap parameters are stored in a 579 * TLBFlushPageByMMUIdxData structure that has been allocated 580 * specifically for this helper. Free the structure when done. 581 */ 582 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, 583 run_on_cpu_data data) 584 { 585 TLBFlushPageByMMUIdxData *d = data.host_ptr; 586 587 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap); 588 g_free(d); 589 } 590 591 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 592 { 593 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 594 595 /* This should already be page aligned */ 596 addr &= TARGET_PAGE_MASK; 597 598 if (qemu_cpu_is_self(cpu)) { 599 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 600 } else if (idxmap < TARGET_PAGE_SIZE) { 601 /* 602 * Most targets have only a few mmu_idx. In the case where 603 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid 604 * allocating memory for this operation. 605 */ 606 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1, 607 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 608 } else { 609 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1); 610 611 /* Otherwise allocate a structure, freed by the worker. */ 612 d->addr = addr; 613 d->idxmap = idxmap; 614 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2, 615 RUN_ON_CPU_HOST_PTR(d)); 616 } 617 } 618 619 void tlb_flush_page(CPUState *cpu, target_ulong addr) 620 { 621 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 622 } 623 624 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 625 uint16_t idxmap) 626 { 627 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 628 629 /* This should already be page aligned */ 630 addr &= TARGET_PAGE_MASK; 631 632 /* 633 * Allocate memory to hold addr+idxmap only when needed. 634 * See tlb_flush_page_by_mmuidx for details. 635 */ 636 if (idxmap < TARGET_PAGE_SIZE) { 637 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 638 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 639 } else { 640 CPUState *dst_cpu; 641 642 /* Allocate a separate data block for each destination cpu. */ 643 CPU_FOREACH(dst_cpu) { 644 if (dst_cpu != src_cpu) { 645 TLBFlushPageByMMUIdxData *d 646 = g_new(TLBFlushPageByMMUIdxData, 1); 647 648 d->addr = addr; 649 d->idxmap = idxmap; 650 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 651 RUN_ON_CPU_HOST_PTR(d)); 652 } 653 } 654 } 655 656 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); 657 } 658 659 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 660 { 661 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 662 } 663 664 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 665 target_ulong addr, 666 uint16_t idxmap) 667 { 668 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 669 670 /* This should already be page aligned */ 671 addr &= TARGET_PAGE_MASK; 672 673 /* 674 * Allocate memory to hold addr+idxmap only when needed. 675 * See tlb_flush_page_by_mmuidx for details. 676 */ 677 if (idxmap < TARGET_PAGE_SIZE) { 678 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 679 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 680 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1, 681 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 682 } else { 683 CPUState *dst_cpu; 684 TLBFlushPageByMMUIdxData *d; 685 686 /* Allocate a separate data block for each destination cpu. */ 687 CPU_FOREACH(dst_cpu) { 688 if (dst_cpu != src_cpu) { 689 d = g_new(TLBFlushPageByMMUIdxData, 1); 690 d->addr = addr; 691 d->idxmap = idxmap; 692 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 693 RUN_ON_CPU_HOST_PTR(d)); 694 } 695 } 696 697 d = g_new(TLBFlushPageByMMUIdxData, 1); 698 d->addr = addr; 699 d->idxmap = idxmap; 700 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2, 701 RUN_ON_CPU_HOST_PTR(d)); 702 } 703 } 704 705 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 706 { 707 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 708 } 709 710 static void tlb_flush_range_locked(CPUArchState *env, int midx, 711 target_ulong addr, target_ulong len, 712 unsigned bits) 713 { 714 CPUTLBDesc *d = &env_tlb(env)->d[midx]; 715 CPUTLBDescFast *f = &env_tlb(env)->f[midx]; 716 target_ulong mask = MAKE_64BIT_MASK(0, bits); 717 718 /* 719 * If @bits is smaller than the tlb size, there may be multiple entries 720 * within the TLB; otherwise all addresses that match under @mask hit 721 * the same TLB entry. 722 * TODO: Perhaps allow bits to be a few bits less than the size. 723 * For now, just flush the entire TLB. 724 * 725 * If @len is larger than the tlb size, then it will take longer to 726 * test all of the entries in the TLB than it will to flush it all. 727 */ 728 if (mask < f->mask || len > f->mask) { 729 tlb_debug("forcing full flush midx %d (" 730 TARGET_FMT_lx "/" TARGET_FMT_lx "+" TARGET_FMT_lx ")\n", 731 midx, addr, mask, len); 732 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); 733 return; 734 } 735 736 /* 737 * Check if we need to flush due to large pages. 738 * Because large_page_mask contains all 1's from the msb, 739 * we only need to test the end of the range. 740 */ 741 if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) { 742 tlb_debug("forcing full flush midx %d (" 743 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 744 midx, d->large_page_addr, d->large_page_mask); 745 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); 746 return; 747 } 748 749 for (target_ulong i = 0; i < len; i += TARGET_PAGE_SIZE) { 750 target_ulong page = addr + i; 751 CPUTLBEntry *entry = tlb_entry(env, midx, page); 752 753 if (tlb_flush_entry_mask_locked(entry, page, mask)) { 754 tlb_n_used_entries_dec(env, midx); 755 } 756 tlb_flush_vtlb_page_mask_locked(env, midx, page, mask); 757 } 758 } 759 760 typedef struct { 761 target_ulong addr; 762 target_ulong len; 763 uint16_t idxmap; 764 uint16_t bits; 765 } TLBFlushRangeData; 766 767 static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu, 768 TLBFlushRangeData d) 769 { 770 CPUArchState *env = cpu->env_ptr; 771 int mmu_idx; 772 773 assert_cpu_is_self(cpu); 774 775 tlb_debug("range:" TARGET_FMT_lx "/%u+" TARGET_FMT_lx " mmu_map:0x%x\n", 776 d.addr, d.bits, d.len, d.idxmap); 777 778 qemu_spin_lock(&env_tlb(env)->c.lock); 779 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 780 if ((d.idxmap >> mmu_idx) & 1) { 781 tlb_flush_range_locked(env, mmu_idx, d.addr, d.len, d.bits); 782 } 783 } 784 qemu_spin_unlock(&env_tlb(env)->c.lock); 785 786 for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) { 787 tb_flush_jmp_cache(cpu, d.addr + i); 788 } 789 } 790 791 static void tlb_flush_range_by_mmuidx_async_1(CPUState *cpu, 792 run_on_cpu_data data) 793 { 794 TLBFlushRangeData *d = data.host_ptr; 795 tlb_flush_range_by_mmuidx_async_0(cpu, *d); 796 g_free(d); 797 } 798 799 void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, 800 target_ulong len, uint16_t idxmap, 801 unsigned bits) 802 { 803 TLBFlushRangeData d; 804 805 /* 806 * If all bits are significant, and len is small, 807 * this devolves to tlb_flush_page. 808 */ 809 if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) { 810 tlb_flush_page_by_mmuidx(cpu, addr, idxmap); 811 return; 812 } 813 /* If no page bits are significant, this devolves to tlb_flush. */ 814 if (bits < TARGET_PAGE_BITS) { 815 tlb_flush_by_mmuidx(cpu, idxmap); 816 return; 817 } 818 819 /* This should already be page aligned */ 820 d.addr = addr & TARGET_PAGE_MASK; 821 d.len = len; 822 d.idxmap = idxmap; 823 d.bits = bits; 824 825 if (qemu_cpu_is_self(cpu)) { 826 tlb_flush_range_by_mmuidx_async_0(cpu, d); 827 } else { 828 /* Otherwise allocate a structure, freed by the worker. */ 829 TLBFlushRangeData *p = g_memdup(&d, sizeof(d)); 830 async_run_on_cpu(cpu, tlb_flush_range_by_mmuidx_async_1, 831 RUN_ON_CPU_HOST_PTR(p)); 832 } 833 } 834 835 void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, 836 uint16_t idxmap, unsigned bits) 837 { 838 tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits); 839 } 840 841 void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu, 842 target_ulong addr, target_ulong len, 843 uint16_t idxmap, unsigned bits) 844 { 845 TLBFlushRangeData d; 846 CPUState *dst_cpu; 847 848 /* 849 * If all bits are significant, and len is small, 850 * this devolves to tlb_flush_page. 851 */ 852 if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) { 853 tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap); 854 return; 855 } 856 /* If no page bits are significant, this devolves to tlb_flush. */ 857 if (bits < TARGET_PAGE_BITS) { 858 tlb_flush_by_mmuidx_all_cpus(src_cpu, idxmap); 859 return; 860 } 861 862 /* This should already be page aligned */ 863 d.addr = addr & TARGET_PAGE_MASK; 864 d.len = len; 865 d.idxmap = idxmap; 866 d.bits = bits; 867 868 /* Allocate a separate data block for each destination cpu. */ 869 CPU_FOREACH(dst_cpu) { 870 if (dst_cpu != src_cpu) { 871 TLBFlushRangeData *p = g_memdup(&d, sizeof(d)); 872 async_run_on_cpu(dst_cpu, 873 tlb_flush_range_by_mmuidx_async_1, 874 RUN_ON_CPU_HOST_PTR(p)); 875 } 876 } 877 878 tlb_flush_range_by_mmuidx_async_0(src_cpu, d); 879 } 880 881 void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu, 882 target_ulong addr, 883 uint16_t idxmap, unsigned bits) 884 { 885 tlb_flush_range_by_mmuidx_all_cpus(src_cpu, addr, TARGET_PAGE_SIZE, 886 idxmap, bits); 887 } 888 889 void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 890 target_ulong addr, 891 target_ulong len, 892 uint16_t idxmap, 893 unsigned bits) 894 { 895 TLBFlushRangeData d, *p; 896 CPUState *dst_cpu; 897 898 /* 899 * If all bits are significant, and len is small, 900 * this devolves to tlb_flush_page. 901 */ 902 if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) { 903 tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap); 904 return; 905 } 906 /* If no page bits are significant, this devolves to tlb_flush. */ 907 if (bits < TARGET_PAGE_BITS) { 908 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap); 909 return; 910 } 911 912 /* This should already be page aligned */ 913 d.addr = addr & TARGET_PAGE_MASK; 914 d.len = len; 915 d.idxmap = idxmap; 916 d.bits = bits; 917 918 /* Allocate a separate data block for each destination cpu. */ 919 CPU_FOREACH(dst_cpu) { 920 if (dst_cpu != src_cpu) { 921 p = g_memdup(&d, sizeof(d)); 922 async_run_on_cpu(dst_cpu, tlb_flush_range_by_mmuidx_async_1, 923 RUN_ON_CPU_HOST_PTR(p)); 924 } 925 } 926 927 p = g_memdup(&d, sizeof(d)); 928 async_safe_run_on_cpu(src_cpu, tlb_flush_range_by_mmuidx_async_1, 929 RUN_ON_CPU_HOST_PTR(p)); 930 } 931 932 void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 933 target_ulong addr, 934 uint16_t idxmap, 935 unsigned bits) 936 { 937 tlb_flush_range_by_mmuidx_all_cpus_synced(src_cpu, addr, TARGET_PAGE_SIZE, 938 idxmap, bits); 939 } 940 941 /* update the TLBs so that writes to code in the virtual page 'addr' 942 can be detected */ 943 void tlb_protect_code(ram_addr_t ram_addr) 944 { 945 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 946 DIRTY_MEMORY_CODE); 947 } 948 949 /* update the TLB so that writes in physical page 'phys_addr' are no longer 950 tested for self modifying code */ 951 void tlb_unprotect_code(ram_addr_t ram_addr) 952 { 953 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 954 } 955 956 957 /* 958 * Dirty write flag handling 959 * 960 * When the TCG code writes to a location it looks up the address in 961 * the TLB and uses that data to compute the final address. If any of 962 * the lower bits of the address are set then the slow path is forced. 963 * There are a number of reasons to do this but for normal RAM the 964 * most usual is detecting writes to code regions which may invalidate 965 * generated code. 966 * 967 * Other vCPUs might be reading their TLBs during guest execution, so we update 968 * te->addr_write with qatomic_set. We don't need to worry about this for 969 * oversized guests as MTTCG is disabled for them. 970 * 971 * Called with tlb_c.lock held. 972 */ 973 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 974 uintptr_t start, uintptr_t length) 975 { 976 uintptr_t addr = tlb_entry->addr_write; 977 978 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | 979 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { 980 addr &= TARGET_PAGE_MASK; 981 addr += tlb_entry->addend; 982 if ((addr - start) < length) { 983 #if TCG_OVERSIZED_GUEST 984 tlb_entry->addr_write |= TLB_NOTDIRTY; 985 #else 986 qatomic_set(&tlb_entry->addr_write, 987 tlb_entry->addr_write | TLB_NOTDIRTY); 988 #endif 989 } 990 } 991 } 992 993 /* 994 * Called with tlb_c.lock held. 995 * Called only from the vCPU context, i.e. the TLB's owner thread. 996 */ 997 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 998 { 999 *d = *s; 1000 } 1001 1002 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 1003 * the target vCPU). 1004 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 1005 * thing actually updated is the target TLB entry ->addr_write flags. 1006 */ 1007 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 1008 { 1009 CPUArchState *env; 1010 1011 int mmu_idx; 1012 1013 env = cpu->env_ptr; 1014 qemu_spin_lock(&env_tlb(env)->c.lock); 1015 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 1016 unsigned int i; 1017 unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); 1018 1019 for (i = 0; i < n; i++) { 1020 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], 1021 start1, length); 1022 } 1023 1024 for (i = 0; i < CPU_VTLB_SIZE; i++) { 1025 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], 1026 start1, length); 1027 } 1028 } 1029 qemu_spin_unlock(&env_tlb(env)->c.lock); 1030 } 1031 1032 /* Called with tlb_c.lock held */ 1033 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 1034 target_ulong vaddr) 1035 { 1036 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 1037 tlb_entry->addr_write = vaddr; 1038 } 1039 } 1040 1041 /* update the TLB corresponding to virtual page vaddr 1042 so that it is no longer dirty */ 1043 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 1044 { 1045 CPUArchState *env = cpu->env_ptr; 1046 int mmu_idx; 1047 1048 assert_cpu_is_self(cpu); 1049 1050 vaddr &= TARGET_PAGE_MASK; 1051 qemu_spin_lock(&env_tlb(env)->c.lock); 1052 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 1053 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 1054 } 1055 1056 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 1057 int k; 1058 for (k = 0; k < CPU_VTLB_SIZE; k++) { 1059 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); 1060 } 1061 } 1062 qemu_spin_unlock(&env_tlb(env)->c.lock); 1063 } 1064 1065 /* Our TLB does not support large pages, so remember the area covered by 1066 large pages and trigger a full TLB flush if these are invalidated. */ 1067 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 1068 target_ulong vaddr, target_ulong size) 1069 { 1070 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; 1071 target_ulong lp_mask = ~(size - 1); 1072 1073 if (lp_addr == (target_ulong)-1) { 1074 /* No previous large page. */ 1075 lp_addr = vaddr; 1076 } else { 1077 /* Extend the existing region to include the new page. 1078 This is a compromise between unnecessary flushes and 1079 the cost of maintaining a full variable size TLB. */ 1080 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; 1081 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 1082 lp_mask <<= 1; 1083 } 1084 } 1085 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; 1086 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; 1087 } 1088 1089 /* Add a new TLB entry. At most one entry for a given virtual address 1090 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 1091 * supplied size is only used by tlb_flush_page. 1092 * 1093 * Called from TCG-generated code, which is under an RCU read-side 1094 * critical section. 1095 */ 1096 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 1097 hwaddr paddr, MemTxAttrs attrs, int prot, 1098 int mmu_idx, target_ulong size) 1099 { 1100 CPUArchState *env = cpu->env_ptr; 1101 CPUTLB *tlb = env_tlb(env); 1102 CPUTLBDesc *desc = &tlb->d[mmu_idx]; 1103 MemoryRegionSection *section; 1104 unsigned int index; 1105 target_ulong address; 1106 target_ulong write_address; 1107 uintptr_t addend; 1108 CPUTLBEntry *te, tn; 1109 hwaddr iotlb, xlat, sz, paddr_page; 1110 target_ulong vaddr_page; 1111 int asidx = cpu_asidx_from_attrs(cpu, attrs); 1112 int wp_flags; 1113 bool is_ram, is_romd; 1114 1115 assert_cpu_is_self(cpu); 1116 1117 if (size <= TARGET_PAGE_SIZE) { 1118 sz = TARGET_PAGE_SIZE; 1119 } else { 1120 tlb_add_large_page(env, mmu_idx, vaddr, size); 1121 sz = size; 1122 } 1123 vaddr_page = vaddr & TARGET_PAGE_MASK; 1124 paddr_page = paddr & TARGET_PAGE_MASK; 1125 1126 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 1127 &xlat, &sz, attrs, &prot); 1128 assert(sz >= TARGET_PAGE_SIZE); 1129 1130 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 1131 " prot=%x idx=%d\n", 1132 vaddr, paddr, prot, mmu_idx); 1133 1134 address = vaddr_page; 1135 if (size < TARGET_PAGE_SIZE) { 1136 /* Repeat the MMU check and TLB fill on every access. */ 1137 address |= TLB_INVALID_MASK; 1138 } 1139 if (attrs.byte_swap) { 1140 address |= TLB_BSWAP; 1141 } 1142 1143 is_ram = memory_region_is_ram(section->mr); 1144 is_romd = memory_region_is_romd(section->mr); 1145 1146 if (is_ram || is_romd) { 1147 /* RAM and ROMD both have associated host memory. */ 1148 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 1149 } else { 1150 /* I/O does not; force the host address to NULL. */ 1151 addend = 0; 1152 } 1153 1154 write_address = address; 1155 if (is_ram) { 1156 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 1157 /* 1158 * Computing is_clean is expensive; avoid all that unless 1159 * the page is actually writable. 1160 */ 1161 if (prot & PAGE_WRITE) { 1162 if (section->readonly) { 1163 write_address |= TLB_DISCARD_WRITE; 1164 } else if (cpu_physical_memory_is_clean(iotlb)) { 1165 write_address |= TLB_NOTDIRTY; 1166 } 1167 } 1168 } else { 1169 /* I/O or ROMD */ 1170 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; 1171 /* 1172 * Writes to romd devices must go through MMIO to enable write. 1173 * Reads to romd devices go through the ram_ptr found above, 1174 * but of course reads to I/O must go through MMIO. 1175 */ 1176 write_address |= TLB_MMIO; 1177 if (!is_romd) { 1178 address = write_address; 1179 } 1180 } 1181 1182 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, 1183 TARGET_PAGE_SIZE); 1184 1185 index = tlb_index(env, mmu_idx, vaddr_page); 1186 te = tlb_entry(env, mmu_idx, vaddr_page); 1187 1188 /* 1189 * Hold the TLB lock for the rest of the function. We could acquire/release 1190 * the lock several times in the function, but it is faster to amortize the 1191 * acquisition cost by acquiring it just once. Note that this leads to 1192 * a longer critical section, but this is not a concern since the TLB lock 1193 * is unlikely to be contended. 1194 */ 1195 qemu_spin_lock(&tlb->c.lock); 1196 1197 /* Note that the tlb is no longer clean. */ 1198 tlb->c.dirty |= 1 << mmu_idx; 1199 1200 /* Make sure there's no cached translation for the new page. */ 1201 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 1202 1203 /* 1204 * Only evict the old entry to the victim tlb if it's for a 1205 * different page; otherwise just overwrite the stale data. 1206 */ 1207 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 1208 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; 1209 CPUTLBEntry *tv = &desc->vtable[vidx]; 1210 1211 /* Evict the old entry into the victim tlb. */ 1212 copy_tlb_helper_locked(tv, te); 1213 desc->viotlb[vidx] = desc->iotlb[index]; 1214 tlb_n_used_entries_dec(env, mmu_idx); 1215 } 1216 1217 /* refill the tlb */ 1218 /* 1219 * At this point iotlb contains a physical section number in the lower 1220 * TARGET_PAGE_BITS, and either 1221 * + the ram_addr_t of the page base of the target RAM (RAM) 1222 * + the offset within section->mr of the page base (I/O, ROMD) 1223 * We subtract the vaddr_page (which is page aligned and thus won't 1224 * disturb the low bits) to give an offset which can be added to the 1225 * (non-page-aligned) vaddr of the eventual memory access to get 1226 * the MemoryRegion offset for the access. Note that the vaddr we 1227 * subtract here is that of the page base, and not the same as the 1228 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 1229 */ 1230 desc->iotlb[index].addr = iotlb - vaddr_page; 1231 desc->iotlb[index].attrs = attrs; 1232 1233 /* Now calculate the new entry */ 1234 tn.addend = addend - vaddr_page; 1235 if (prot & PAGE_READ) { 1236 tn.addr_read = address; 1237 if (wp_flags & BP_MEM_READ) { 1238 tn.addr_read |= TLB_WATCHPOINT; 1239 } 1240 } else { 1241 tn.addr_read = -1; 1242 } 1243 1244 if (prot & PAGE_EXEC) { 1245 tn.addr_code = address; 1246 } else { 1247 tn.addr_code = -1; 1248 } 1249 1250 tn.addr_write = -1; 1251 if (prot & PAGE_WRITE) { 1252 tn.addr_write = write_address; 1253 if (prot & PAGE_WRITE_INV) { 1254 tn.addr_write |= TLB_INVALID_MASK; 1255 } 1256 if (wp_flags & BP_MEM_WRITE) { 1257 tn.addr_write |= TLB_WATCHPOINT; 1258 } 1259 } 1260 1261 copy_tlb_helper_locked(te, &tn); 1262 tlb_n_used_entries_inc(env, mmu_idx); 1263 qemu_spin_unlock(&tlb->c.lock); 1264 } 1265 1266 /* Add a new TLB entry, but without specifying the memory 1267 * transaction attributes to be used. 1268 */ 1269 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 1270 hwaddr paddr, int prot, 1271 int mmu_idx, target_ulong size) 1272 { 1273 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 1274 prot, mmu_idx, size); 1275 } 1276 1277 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 1278 { 1279 ram_addr_t ram_addr; 1280 1281 ram_addr = qemu_ram_addr_from_host(ptr); 1282 if (ram_addr == RAM_ADDR_INVALID) { 1283 error_report("Bad ram pointer %p", ptr); 1284 abort(); 1285 } 1286 return ram_addr; 1287 } 1288 1289 /* 1290 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the 1291 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must 1292 * be discarded and looked up again (e.g. via tlb_entry()). 1293 */ 1294 static void tlb_fill(CPUState *cpu, target_ulong addr, int size, 1295 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1296 { 1297 CPUClass *cc = CPU_GET_CLASS(cpu); 1298 bool ok; 1299 1300 /* 1301 * This is not a probe, so only valid return is success; failure 1302 * should result in exception + longjmp to the cpu loop. 1303 */ 1304 ok = cc->tcg_ops->tlb_fill(cpu, addr, size, 1305 access_type, mmu_idx, false, retaddr); 1306 assert(ok); 1307 } 1308 1309 static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr, 1310 MMUAccessType access_type, 1311 int mmu_idx, uintptr_t retaddr) 1312 { 1313 CPUClass *cc = CPU_GET_CLASS(cpu); 1314 1315 cc->tcg_ops->do_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr); 1316 } 1317 1318 static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr, 1319 vaddr addr, unsigned size, 1320 MMUAccessType access_type, 1321 int mmu_idx, MemTxAttrs attrs, 1322 MemTxResult response, 1323 uintptr_t retaddr) 1324 { 1325 CPUClass *cc = CPU_GET_CLASS(cpu); 1326 1327 if (!cpu->ignore_memory_transaction_failures && 1328 cc->tcg_ops->do_transaction_failed) { 1329 cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size, 1330 access_type, mmu_idx, attrs, 1331 response, retaddr); 1332 } 1333 } 1334 1335 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1336 int mmu_idx, target_ulong addr, uintptr_t retaddr, 1337 MMUAccessType access_type, MemOp op) 1338 { 1339 CPUState *cpu = env_cpu(env); 1340 hwaddr mr_offset; 1341 MemoryRegionSection *section; 1342 MemoryRegion *mr; 1343 uint64_t val; 1344 bool locked = false; 1345 MemTxResult r; 1346 1347 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1348 mr = section->mr; 1349 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1350 cpu->mem_io_pc = retaddr; 1351 if (!cpu->can_do_io) { 1352 cpu_io_recompile(cpu, retaddr); 1353 } 1354 1355 if (!qemu_mutex_iothread_locked()) { 1356 qemu_mutex_lock_iothread(); 1357 locked = true; 1358 } 1359 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); 1360 if (r != MEMTX_OK) { 1361 hwaddr physaddr = mr_offset + 1362 section->offset_within_address_space - 1363 section->offset_within_region; 1364 1365 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, 1366 mmu_idx, iotlbentry->attrs, r, retaddr); 1367 } 1368 if (locked) { 1369 qemu_mutex_unlock_iothread(); 1370 } 1371 1372 return val; 1373 } 1374 1375 /* 1376 * Save a potentially trashed IOTLB entry for later lookup by plugin. 1377 * This is read by tlb_plugin_lookup if the iotlb entry doesn't match 1378 * because of the side effect of io_writex changing memory layout. 1379 */ 1380 static void save_iotlb_data(CPUState *cs, hwaddr addr, 1381 MemoryRegionSection *section, hwaddr mr_offset) 1382 { 1383 #ifdef CONFIG_PLUGIN 1384 SavedIOTLB *saved = &cs->saved_iotlb; 1385 saved->addr = addr; 1386 saved->section = section; 1387 saved->mr_offset = mr_offset; 1388 #endif 1389 } 1390 1391 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1392 int mmu_idx, uint64_t val, target_ulong addr, 1393 uintptr_t retaddr, MemOp op) 1394 { 1395 CPUState *cpu = env_cpu(env); 1396 hwaddr mr_offset; 1397 MemoryRegionSection *section; 1398 MemoryRegion *mr; 1399 bool locked = false; 1400 MemTxResult r; 1401 1402 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1403 mr = section->mr; 1404 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1405 if (!cpu->can_do_io) { 1406 cpu_io_recompile(cpu, retaddr); 1407 } 1408 cpu->mem_io_pc = retaddr; 1409 1410 /* 1411 * The memory_region_dispatch may trigger a flush/resize 1412 * so for plugins we save the iotlb_data just in case. 1413 */ 1414 save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset); 1415 1416 if (!qemu_mutex_iothread_locked()) { 1417 qemu_mutex_lock_iothread(); 1418 locked = true; 1419 } 1420 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); 1421 if (r != MEMTX_OK) { 1422 hwaddr physaddr = mr_offset + 1423 section->offset_within_address_space - 1424 section->offset_within_region; 1425 1426 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), 1427 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, 1428 retaddr); 1429 } 1430 if (locked) { 1431 qemu_mutex_unlock_iothread(); 1432 } 1433 } 1434 1435 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) 1436 { 1437 #if TCG_OVERSIZED_GUEST 1438 return *(target_ulong *)((uintptr_t)entry + ofs); 1439 #else 1440 /* ofs might correspond to .addr_write, so use qatomic_read */ 1441 return qatomic_read((target_ulong *)((uintptr_t)entry + ofs)); 1442 #endif 1443 } 1444 1445 /* Return true if ADDR is present in the victim tlb, and has been copied 1446 back to the main tlb. */ 1447 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1448 size_t elt_ofs, target_ulong page) 1449 { 1450 size_t vidx; 1451 1452 assert_cpu_is_self(env_cpu(env)); 1453 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1454 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; 1455 target_ulong cmp; 1456 1457 /* elt_ofs might correspond to .addr_write, so use qatomic_read */ 1458 #if TCG_OVERSIZED_GUEST 1459 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1460 #else 1461 cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1462 #endif 1463 1464 if (cmp == page) { 1465 /* Found entry in victim tlb, swap tlb and iotlb. */ 1466 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index]; 1467 1468 qemu_spin_lock(&env_tlb(env)->c.lock); 1469 copy_tlb_helper_locked(&tmptlb, tlb); 1470 copy_tlb_helper_locked(tlb, vtlb); 1471 copy_tlb_helper_locked(vtlb, &tmptlb); 1472 qemu_spin_unlock(&env_tlb(env)->c.lock); 1473 1474 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1475 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; 1476 tmpio = *io; *io = *vio; *vio = tmpio; 1477 return true; 1478 } 1479 } 1480 return false; 1481 } 1482 1483 /* Macro to call the above, with local variables from the use context. */ 1484 #define VICTIM_TLB_HIT(TY, ADDR) \ 1485 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1486 (ADDR) & TARGET_PAGE_MASK) 1487 1488 /* 1489 * Return a ram_addr_t for the virtual address for execution. 1490 * 1491 * Return -1 if we can't translate and execute from an entire page 1492 * of RAM. This will force us to execute by loading and translating 1493 * one insn at a time, without caching. 1494 * 1495 * NOTE: This function will trigger an exception if the page is 1496 * not executable. 1497 */ 1498 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, 1499 void **hostp) 1500 { 1501 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1502 uintptr_t index = tlb_index(env, mmu_idx, addr); 1503 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1504 void *p; 1505 1506 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1507 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1508 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1509 index = tlb_index(env, mmu_idx, addr); 1510 entry = tlb_entry(env, mmu_idx, addr); 1511 1512 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { 1513 /* 1514 * The MMU protection covers a smaller range than a target 1515 * page, so we must redo the MMU check for every insn. 1516 */ 1517 return -1; 1518 } 1519 } 1520 assert(tlb_hit(entry->addr_code, addr)); 1521 } 1522 1523 if (unlikely(entry->addr_code & TLB_MMIO)) { 1524 /* The region is not backed by RAM. */ 1525 if (hostp) { 1526 *hostp = NULL; 1527 } 1528 return -1; 1529 } 1530 1531 p = (void *)((uintptr_t)addr + entry->addend); 1532 if (hostp) { 1533 *hostp = p; 1534 } 1535 return qemu_ram_addr_from_host_nofail(p); 1536 } 1537 1538 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1539 { 1540 return get_page_addr_code_hostp(env, addr, NULL); 1541 } 1542 1543 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, 1544 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) 1545 { 1546 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; 1547 1548 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); 1549 1550 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 1551 struct page_collection *pages 1552 = page_collection_lock(ram_addr, ram_addr + size); 1553 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); 1554 page_collection_unlock(pages); 1555 } 1556 1557 /* 1558 * Set both VGA and migration bits for simplicity and to remove 1559 * the notdirty callback faster. 1560 */ 1561 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); 1562 1563 /* We remove the notdirty callback only if the code has been flushed. */ 1564 if (!cpu_physical_memory_is_clean(ram_addr)) { 1565 trace_memory_notdirty_set_dirty(mem_vaddr); 1566 tlb_set_dirty(cpu, mem_vaddr); 1567 } 1568 } 1569 1570 static int probe_access_internal(CPUArchState *env, target_ulong addr, 1571 int fault_size, MMUAccessType access_type, 1572 int mmu_idx, bool nonfault, 1573 void **phost, uintptr_t retaddr) 1574 { 1575 uintptr_t index = tlb_index(env, mmu_idx, addr); 1576 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1577 target_ulong tlb_addr, page_addr; 1578 size_t elt_ofs; 1579 int flags; 1580 1581 switch (access_type) { 1582 case MMU_DATA_LOAD: 1583 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1584 break; 1585 case MMU_DATA_STORE: 1586 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1587 break; 1588 case MMU_INST_FETCH: 1589 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1590 break; 1591 default: 1592 g_assert_not_reached(); 1593 } 1594 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1595 1596 page_addr = addr & TARGET_PAGE_MASK; 1597 if (!tlb_hit_page(tlb_addr, page_addr)) { 1598 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) { 1599 CPUState *cs = env_cpu(env); 1600 CPUClass *cc = CPU_GET_CLASS(cs); 1601 1602 if (!cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type, 1603 mmu_idx, nonfault, retaddr)) { 1604 /* Non-faulting page table read failed. */ 1605 *phost = NULL; 1606 return TLB_INVALID_MASK; 1607 } 1608 1609 /* TLB resize via tlb_fill may have moved the entry. */ 1610 entry = tlb_entry(env, mmu_idx, addr); 1611 } 1612 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1613 } 1614 flags = tlb_addr & TLB_FLAGS_MASK; 1615 1616 /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */ 1617 if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) { 1618 *phost = NULL; 1619 return TLB_MMIO; 1620 } 1621 1622 /* Everything else is RAM. */ 1623 *phost = (void *)((uintptr_t)addr + entry->addend); 1624 return flags; 1625 } 1626 1627 int probe_access_flags(CPUArchState *env, target_ulong addr, 1628 MMUAccessType access_type, int mmu_idx, 1629 bool nonfault, void **phost, uintptr_t retaddr) 1630 { 1631 int flags; 1632 1633 flags = probe_access_internal(env, addr, 0, access_type, mmu_idx, 1634 nonfault, phost, retaddr); 1635 1636 /* Handle clean RAM pages. */ 1637 if (unlikely(flags & TLB_NOTDIRTY)) { 1638 uintptr_t index = tlb_index(env, mmu_idx, addr); 1639 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1640 1641 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); 1642 flags &= ~TLB_NOTDIRTY; 1643 } 1644 1645 return flags; 1646 } 1647 1648 void *probe_access(CPUArchState *env, target_ulong addr, int size, 1649 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1650 { 1651 void *host; 1652 int flags; 1653 1654 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 1655 1656 flags = probe_access_internal(env, addr, size, access_type, mmu_idx, 1657 false, &host, retaddr); 1658 1659 /* Per the interface, size == 0 merely faults the access. */ 1660 if (size == 0) { 1661 return NULL; 1662 } 1663 1664 if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) { 1665 uintptr_t index = tlb_index(env, mmu_idx, addr); 1666 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1667 1668 /* Handle watchpoints. */ 1669 if (flags & TLB_WATCHPOINT) { 1670 int wp_access = (access_type == MMU_DATA_STORE 1671 ? BP_MEM_WRITE : BP_MEM_READ); 1672 cpu_check_watchpoint(env_cpu(env), addr, size, 1673 iotlbentry->attrs, wp_access, retaddr); 1674 } 1675 1676 /* Handle clean RAM pages. */ 1677 if (flags & TLB_NOTDIRTY) { 1678 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); 1679 } 1680 } 1681 1682 return host; 1683 } 1684 1685 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, 1686 MMUAccessType access_type, int mmu_idx) 1687 { 1688 void *host; 1689 int flags; 1690 1691 flags = probe_access_internal(env, addr, 0, access_type, 1692 mmu_idx, true, &host, 0); 1693 1694 /* No combination of flags are expected by the caller. */ 1695 return flags ? NULL : host; 1696 } 1697 1698 #ifdef CONFIG_PLUGIN 1699 /* 1700 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. 1701 * This should be a hot path as we will have just looked this path up 1702 * in the softmmu lookup code (or helper). We don't handle re-fills or 1703 * checking the victim table. This is purely informational. 1704 * 1705 * This almost never fails as the memory access being instrumented 1706 * should have just filled the TLB. The one corner case is io_writex 1707 * which can cause TLB flushes and potential resizing of the TLBs 1708 * losing the information we need. In those cases we need to recover 1709 * data from a copy of the iotlbentry. As long as this always occurs 1710 * from the same thread (which a mem callback will be) this is safe. 1711 */ 1712 1713 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, 1714 bool is_store, struct qemu_plugin_hwaddr *data) 1715 { 1716 CPUArchState *env = cpu->env_ptr; 1717 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1718 uintptr_t index = tlb_index(env, mmu_idx, addr); 1719 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; 1720 1721 if (likely(tlb_hit(tlb_addr, addr))) { 1722 /* We must have an iotlb entry for MMIO */ 1723 if (tlb_addr & TLB_MMIO) { 1724 CPUIOTLBEntry *iotlbentry; 1725 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1726 data->is_io = true; 1727 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1728 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1729 } else { 1730 data->is_io = false; 1731 data->v.ram.hostaddr = addr + tlbe->addend; 1732 } 1733 return true; 1734 } else { 1735 SavedIOTLB *saved = &cpu->saved_iotlb; 1736 data->is_io = true; 1737 data->v.io.section = saved->section; 1738 data->v.io.offset = saved->mr_offset; 1739 return true; 1740 } 1741 } 1742 1743 #endif 1744 1745 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1746 * operations, or io operations to proceed. Return the host address. */ 1747 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1748 TCGMemOpIdx oi, uintptr_t retaddr) 1749 { 1750 size_t mmu_idx = get_mmuidx(oi); 1751 uintptr_t index = tlb_index(env, mmu_idx, addr); 1752 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1753 target_ulong tlb_addr = tlb_addr_write(tlbe); 1754 MemOp mop = get_memop(oi); 1755 int a_bits = get_alignment_bits(mop); 1756 int s_bits = mop & MO_SIZE; 1757 void *hostaddr; 1758 1759 /* Adjust the given return address. */ 1760 retaddr -= GETPC_ADJ; 1761 1762 /* Enforce guest required alignment. */ 1763 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1764 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1765 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1766 mmu_idx, retaddr); 1767 } 1768 1769 /* Enforce qemu required alignment. */ 1770 if (unlikely(addr & ((1 << s_bits) - 1))) { 1771 /* We get here if guest alignment was not requested, 1772 or was not enforced by cpu_unaligned_access above. 1773 We might widen the access and emulate, but for now 1774 mark an exception and exit the cpu loop. */ 1775 goto stop_the_world; 1776 } 1777 1778 /* Check TLB entry and enforce page permissions. */ 1779 if (!tlb_hit(tlb_addr, addr)) { 1780 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1781 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, 1782 mmu_idx, retaddr); 1783 index = tlb_index(env, mmu_idx, addr); 1784 tlbe = tlb_entry(env, mmu_idx, addr); 1785 } 1786 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1787 } 1788 1789 /* Notice an IO access or a needs-MMU-lookup access */ 1790 if (unlikely(tlb_addr & TLB_MMIO)) { 1791 /* There's really nothing that can be done to 1792 support this apart from stop-the-world. */ 1793 goto stop_the_world; 1794 } 1795 1796 /* Let the guest notice RMW on a write-only page. */ 1797 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1798 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1799 mmu_idx, retaddr); 1800 /* Since we don't support reads and writes to different addresses, 1801 and we do have the proper page loaded for write, this shouldn't 1802 ever return. But just in case, handle via stop-the-world. */ 1803 goto stop_the_world; 1804 } 1805 1806 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1807 1808 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1809 notdirty_write(env_cpu(env), addr, 1 << s_bits, 1810 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); 1811 } 1812 1813 return hostaddr; 1814 1815 stop_the_world: 1816 cpu_loop_exit_atomic(env_cpu(env), retaddr); 1817 } 1818 1819 /* 1820 * Load Helpers 1821 * 1822 * We support two different access types. SOFTMMU_CODE_ACCESS is 1823 * specifically for reading instructions from system memory. It is 1824 * called by the translation loop and in some helpers where the code 1825 * is disassembled. It shouldn't be called directly by guest code. 1826 */ 1827 1828 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, 1829 TCGMemOpIdx oi, uintptr_t retaddr); 1830 1831 static inline uint64_t QEMU_ALWAYS_INLINE 1832 load_memop(const void *haddr, MemOp op) 1833 { 1834 switch (op) { 1835 case MO_UB: 1836 return ldub_p(haddr); 1837 case MO_BEUW: 1838 return lduw_be_p(haddr); 1839 case MO_LEUW: 1840 return lduw_le_p(haddr); 1841 case MO_BEUL: 1842 return (uint32_t)ldl_be_p(haddr); 1843 case MO_LEUL: 1844 return (uint32_t)ldl_le_p(haddr); 1845 case MO_BEQ: 1846 return ldq_be_p(haddr); 1847 case MO_LEQ: 1848 return ldq_le_p(haddr); 1849 default: 1850 qemu_build_not_reached(); 1851 } 1852 } 1853 1854 static inline uint64_t QEMU_ALWAYS_INLINE 1855 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, 1856 uintptr_t retaddr, MemOp op, bool code_read, 1857 FullLoadHelper *full_load) 1858 { 1859 uintptr_t mmu_idx = get_mmuidx(oi); 1860 uintptr_t index = tlb_index(env, mmu_idx, addr); 1861 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1862 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1863 const size_t tlb_off = code_read ? 1864 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); 1865 const MMUAccessType access_type = 1866 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; 1867 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1868 void *haddr; 1869 uint64_t res; 1870 size_t size = memop_size(op); 1871 1872 /* Handle CPU specific unaligned behaviour */ 1873 if (addr & ((1 << a_bits) - 1)) { 1874 cpu_unaligned_access(env_cpu(env), addr, access_type, 1875 mmu_idx, retaddr); 1876 } 1877 1878 /* If the TLB entry is for a different page, reload and try again. */ 1879 if (!tlb_hit(tlb_addr, addr)) { 1880 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1881 addr & TARGET_PAGE_MASK)) { 1882 tlb_fill(env_cpu(env), addr, size, 1883 access_type, mmu_idx, retaddr); 1884 index = tlb_index(env, mmu_idx, addr); 1885 entry = tlb_entry(env, mmu_idx, addr); 1886 } 1887 tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1888 tlb_addr &= ~TLB_INVALID_MASK; 1889 } 1890 1891 /* Handle anything that isn't just a straight memory access. */ 1892 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1893 CPUIOTLBEntry *iotlbentry; 1894 bool need_swap; 1895 1896 /* For anything that is unaligned, recurse through full_load. */ 1897 if ((addr & (size - 1)) != 0) { 1898 goto do_unaligned_access; 1899 } 1900 1901 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1902 1903 /* Handle watchpoints. */ 1904 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1905 /* On watchpoint hit, this will longjmp out. */ 1906 cpu_check_watchpoint(env_cpu(env), addr, size, 1907 iotlbentry->attrs, BP_MEM_READ, retaddr); 1908 } 1909 1910 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1911 1912 /* Handle I/O access. */ 1913 if (likely(tlb_addr & TLB_MMIO)) { 1914 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, 1915 access_type, op ^ (need_swap * MO_BSWAP)); 1916 } 1917 1918 haddr = (void *)((uintptr_t)addr + entry->addend); 1919 1920 /* 1921 * Keep these two load_memop separate to ensure that the compiler 1922 * is able to fold the entire function to a single instruction. 1923 * There is a build-time assert inside to remind you of this. ;-) 1924 */ 1925 if (unlikely(need_swap)) { 1926 return load_memop(haddr, op ^ MO_BSWAP); 1927 } 1928 return load_memop(haddr, op); 1929 } 1930 1931 /* Handle slow unaligned access (it spans two pages or IO). */ 1932 if (size > 1 1933 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1934 >= TARGET_PAGE_SIZE)) { 1935 target_ulong addr1, addr2; 1936 uint64_t r1, r2; 1937 unsigned shift; 1938 do_unaligned_access: 1939 addr1 = addr & ~((target_ulong)size - 1); 1940 addr2 = addr1 + size; 1941 r1 = full_load(env, addr1, oi, retaddr); 1942 r2 = full_load(env, addr2, oi, retaddr); 1943 shift = (addr & (size - 1)) * 8; 1944 1945 if (memop_big_endian(op)) { 1946 /* Big-endian combine. */ 1947 res = (r1 << shift) | (r2 >> ((size * 8) - shift)); 1948 } else { 1949 /* Little-endian combine. */ 1950 res = (r1 >> shift) | (r2 << ((size * 8) - shift)); 1951 } 1952 return res & MAKE_64BIT_MASK(0, size * 8); 1953 } 1954 1955 haddr = (void *)((uintptr_t)addr + entry->addend); 1956 return load_memop(haddr, op); 1957 } 1958 1959 /* 1960 * For the benefit of TCG generated code, we want to avoid the 1961 * complication of ABI-specific return type promotion and always 1962 * return a value extended to the register size of the host. This is 1963 * tcg_target_long, except in the case of a 32-bit host and 64-bit 1964 * data, and for that we always have uint64_t. 1965 * 1966 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. 1967 */ 1968 1969 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, 1970 TCGMemOpIdx oi, uintptr_t retaddr) 1971 { 1972 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); 1973 } 1974 1975 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, 1976 TCGMemOpIdx oi, uintptr_t retaddr) 1977 { 1978 return full_ldub_mmu(env, addr, oi, retaddr); 1979 } 1980 1981 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1982 TCGMemOpIdx oi, uintptr_t retaddr) 1983 { 1984 return load_helper(env, addr, oi, retaddr, MO_LEUW, false, 1985 full_le_lduw_mmu); 1986 } 1987 1988 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1989 TCGMemOpIdx oi, uintptr_t retaddr) 1990 { 1991 return full_le_lduw_mmu(env, addr, oi, retaddr); 1992 } 1993 1994 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1995 TCGMemOpIdx oi, uintptr_t retaddr) 1996 { 1997 return load_helper(env, addr, oi, retaddr, MO_BEUW, false, 1998 full_be_lduw_mmu); 1999 } 2000 2001 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, 2002 TCGMemOpIdx oi, uintptr_t retaddr) 2003 { 2004 return full_be_lduw_mmu(env, addr, oi, retaddr); 2005 } 2006 2007 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, 2008 TCGMemOpIdx oi, uintptr_t retaddr) 2009 { 2010 return load_helper(env, addr, oi, retaddr, MO_LEUL, false, 2011 full_le_ldul_mmu); 2012 } 2013 2014 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, 2015 TCGMemOpIdx oi, uintptr_t retaddr) 2016 { 2017 return full_le_ldul_mmu(env, addr, oi, retaddr); 2018 } 2019 2020 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, 2021 TCGMemOpIdx oi, uintptr_t retaddr) 2022 { 2023 return load_helper(env, addr, oi, retaddr, MO_BEUL, false, 2024 full_be_ldul_mmu); 2025 } 2026 2027 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, 2028 TCGMemOpIdx oi, uintptr_t retaddr) 2029 { 2030 return full_be_ldul_mmu(env, addr, oi, retaddr); 2031 } 2032 2033 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, 2034 TCGMemOpIdx oi, uintptr_t retaddr) 2035 { 2036 return load_helper(env, addr, oi, retaddr, MO_LEQ, false, 2037 helper_le_ldq_mmu); 2038 } 2039 2040 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, 2041 TCGMemOpIdx oi, uintptr_t retaddr) 2042 { 2043 return load_helper(env, addr, oi, retaddr, MO_BEQ, false, 2044 helper_be_ldq_mmu); 2045 } 2046 2047 /* 2048 * Provide signed versions of the load routines as well. We can of course 2049 * avoid this for 64-bit data, or for 32-bit data on 32-bit host. 2050 */ 2051 2052 2053 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, 2054 TCGMemOpIdx oi, uintptr_t retaddr) 2055 { 2056 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); 2057 } 2058 2059 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, 2060 TCGMemOpIdx oi, uintptr_t retaddr) 2061 { 2062 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); 2063 } 2064 2065 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, 2066 TCGMemOpIdx oi, uintptr_t retaddr) 2067 { 2068 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); 2069 } 2070 2071 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, 2072 TCGMemOpIdx oi, uintptr_t retaddr) 2073 { 2074 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); 2075 } 2076 2077 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, 2078 TCGMemOpIdx oi, uintptr_t retaddr) 2079 { 2080 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); 2081 } 2082 2083 /* 2084 * Load helpers for cpu_ldst.h. 2085 */ 2086 2087 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, 2088 int mmu_idx, uintptr_t retaddr, 2089 MemOp op, FullLoadHelper *full_load) 2090 { 2091 uint16_t meminfo; 2092 TCGMemOpIdx oi; 2093 uint64_t ret; 2094 2095 meminfo = trace_mem_get_info(op, mmu_idx, false); 2096 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2097 2098 op &= ~MO_SIGN; 2099 oi = make_memop_idx(op, mmu_idx); 2100 ret = full_load(env, addr, oi, retaddr); 2101 2102 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2103 2104 return ret; 2105 } 2106 2107 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2108 int mmu_idx, uintptr_t ra) 2109 { 2110 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu); 2111 } 2112 2113 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2114 int mmu_idx, uintptr_t ra) 2115 { 2116 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB, 2117 full_ldub_mmu); 2118 } 2119 2120 uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2121 int mmu_idx, uintptr_t ra) 2122 { 2123 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu); 2124 } 2125 2126 int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2127 int mmu_idx, uintptr_t ra) 2128 { 2129 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW, 2130 full_be_lduw_mmu); 2131 } 2132 2133 uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2134 int mmu_idx, uintptr_t ra) 2135 { 2136 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu); 2137 } 2138 2139 uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2140 int mmu_idx, uintptr_t ra) 2141 { 2142 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu); 2143 } 2144 2145 uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2146 int mmu_idx, uintptr_t ra) 2147 { 2148 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu); 2149 } 2150 2151 int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2152 int mmu_idx, uintptr_t ra) 2153 { 2154 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW, 2155 full_le_lduw_mmu); 2156 } 2157 2158 uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2159 int mmu_idx, uintptr_t ra) 2160 { 2161 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu); 2162 } 2163 2164 uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2165 int mmu_idx, uintptr_t ra) 2166 { 2167 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu); 2168 } 2169 2170 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, 2171 uintptr_t retaddr) 2172 { 2173 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2174 } 2175 2176 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 2177 { 2178 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2179 } 2180 2181 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr, 2182 uintptr_t retaddr) 2183 { 2184 return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2185 } 2186 2187 int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 2188 { 2189 return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2190 } 2191 2192 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr, 2193 uintptr_t retaddr) 2194 { 2195 return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2196 } 2197 2198 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr, 2199 uintptr_t retaddr) 2200 { 2201 return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2202 } 2203 2204 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr, 2205 uintptr_t retaddr) 2206 { 2207 return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2208 } 2209 2210 int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 2211 { 2212 return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2213 } 2214 2215 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr, 2216 uintptr_t retaddr) 2217 { 2218 return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2219 } 2220 2221 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr, 2222 uintptr_t retaddr) 2223 { 2224 return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2225 } 2226 2227 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) 2228 { 2229 return cpu_ldub_data_ra(env, ptr, 0); 2230 } 2231 2232 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) 2233 { 2234 return cpu_ldsb_data_ra(env, ptr, 0); 2235 } 2236 2237 uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr) 2238 { 2239 return cpu_lduw_be_data_ra(env, ptr, 0); 2240 } 2241 2242 int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr) 2243 { 2244 return cpu_ldsw_be_data_ra(env, ptr, 0); 2245 } 2246 2247 uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr) 2248 { 2249 return cpu_ldl_be_data_ra(env, ptr, 0); 2250 } 2251 2252 uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr) 2253 { 2254 return cpu_ldq_be_data_ra(env, ptr, 0); 2255 } 2256 2257 uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr) 2258 { 2259 return cpu_lduw_le_data_ra(env, ptr, 0); 2260 } 2261 2262 int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr) 2263 { 2264 return cpu_ldsw_le_data_ra(env, ptr, 0); 2265 } 2266 2267 uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr) 2268 { 2269 return cpu_ldl_le_data_ra(env, ptr, 0); 2270 } 2271 2272 uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr) 2273 { 2274 return cpu_ldq_le_data_ra(env, ptr, 0); 2275 } 2276 2277 /* 2278 * Store Helpers 2279 */ 2280 2281 static inline void QEMU_ALWAYS_INLINE 2282 store_memop(void *haddr, uint64_t val, MemOp op) 2283 { 2284 switch (op) { 2285 case MO_UB: 2286 stb_p(haddr, val); 2287 break; 2288 case MO_BEUW: 2289 stw_be_p(haddr, val); 2290 break; 2291 case MO_LEUW: 2292 stw_le_p(haddr, val); 2293 break; 2294 case MO_BEUL: 2295 stl_be_p(haddr, val); 2296 break; 2297 case MO_LEUL: 2298 stl_le_p(haddr, val); 2299 break; 2300 case MO_BEQ: 2301 stq_be_p(haddr, val); 2302 break; 2303 case MO_LEQ: 2304 stq_le_p(haddr, val); 2305 break; 2306 default: 2307 qemu_build_not_reached(); 2308 } 2309 } 2310 2311 static void __attribute__((noinline)) 2312 store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val, 2313 uintptr_t retaddr, size_t size, uintptr_t mmu_idx, 2314 bool big_endian) 2315 { 2316 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 2317 uintptr_t index, index2; 2318 CPUTLBEntry *entry, *entry2; 2319 target_ulong page2, tlb_addr, tlb_addr2; 2320 TCGMemOpIdx oi; 2321 size_t size2; 2322 int i; 2323 2324 /* 2325 * Ensure the second page is in the TLB. Note that the first page 2326 * is already guaranteed to be filled, and that the second page 2327 * cannot evict the first. 2328 */ 2329 page2 = (addr + size) & TARGET_PAGE_MASK; 2330 size2 = (addr + size) & ~TARGET_PAGE_MASK; 2331 index2 = tlb_index(env, mmu_idx, page2); 2332 entry2 = tlb_entry(env, mmu_idx, page2); 2333 2334 tlb_addr2 = tlb_addr_write(entry2); 2335 if (!tlb_hit_page(tlb_addr2, page2)) { 2336 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { 2337 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, 2338 mmu_idx, retaddr); 2339 index2 = tlb_index(env, mmu_idx, page2); 2340 entry2 = tlb_entry(env, mmu_idx, page2); 2341 } 2342 tlb_addr2 = tlb_addr_write(entry2); 2343 } 2344 2345 index = tlb_index(env, mmu_idx, addr); 2346 entry = tlb_entry(env, mmu_idx, addr); 2347 tlb_addr = tlb_addr_write(entry); 2348 2349 /* 2350 * Handle watchpoints. Since this may trap, all checks 2351 * must happen before any store. 2352 */ 2353 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2354 cpu_check_watchpoint(env_cpu(env), addr, size - size2, 2355 env_tlb(env)->d[mmu_idx].iotlb[index].attrs, 2356 BP_MEM_WRITE, retaddr); 2357 } 2358 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { 2359 cpu_check_watchpoint(env_cpu(env), page2, size2, 2360 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, 2361 BP_MEM_WRITE, retaddr); 2362 } 2363 2364 /* 2365 * XXX: not efficient, but simple. 2366 * This loop must go in the forward direction to avoid issues 2367 * with self-modifying code in Windows 64-bit. 2368 */ 2369 oi = make_memop_idx(MO_UB, mmu_idx); 2370 if (big_endian) { 2371 for (i = 0; i < size; ++i) { 2372 /* Big-endian extract. */ 2373 uint8_t val8 = val >> (((size - 1) * 8) - (i * 8)); 2374 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2375 } 2376 } else { 2377 for (i = 0; i < size; ++i) { 2378 /* Little-endian extract. */ 2379 uint8_t val8 = val >> (i * 8); 2380 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2381 } 2382 } 2383 } 2384 2385 static inline void QEMU_ALWAYS_INLINE 2386 store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2387 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) 2388 { 2389 uintptr_t mmu_idx = get_mmuidx(oi); 2390 uintptr_t index = tlb_index(env, mmu_idx, addr); 2391 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 2392 target_ulong tlb_addr = tlb_addr_write(entry); 2393 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 2394 unsigned a_bits = get_alignment_bits(get_memop(oi)); 2395 void *haddr; 2396 size_t size = memop_size(op); 2397 2398 /* Handle CPU specific unaligned behaviour */ 2399 if (addr & ((1 << a_bits) - 1)) { 2400 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 2401 mmu_idx, retaddr); 2402 } 2403 2404 /* If the TLB entry is for a different page, reload and try again. */ 2405 if (!tlb_hit(tlb_addr, addr)) { 2406 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 2407 addr & TARGET_PAGE_MASK)) { 2408 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, 2409 mmu_idx, retaddr); 2410 index = tlb_index(env, mmu_idx, addr); 2411 entry = tlb_entry(env, mmu_idx, addr); 2412 } 2413 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; 2414 } 2415 2416 /* Handle anything that isn't just a straight memory access. */ 2417 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 2418 CPUIOTLBEntry *iotlbentry; 2419 bool need_swap; 2420 2421 /* For anything that is unaligned, recurse through byte stores. */ 2422 if ((addr & (size - 1)) != 0) { 2423 goto do_unaligned_access; 2424 } 2425 2426 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 2427 2428 /* Handle watchpoints. */ 2429 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2430 /* On watchpoint hit, this will longjmp out. */ 2431 cpu_check_watchpoint(env_cpu(env), addr, size, 2432 iotlbentry->attrs, BP_MEM_WRITE, retaddr); 2433 } 2434 2435 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 2436 2437 /* Handle I/O access. */ 2438 if (tlb_addr & TLB_MMIO) { 2439 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, 2440 op ^ (need_swap * MO_BSWAP)); 2441 return; 2442 } 2443 2444 /* Ignore writes to ROM. */ 2445 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { 2446 return; 2447 } 2448 2449 /* Handle clean RAM pages. */ 2450 if (tlb_addr & TLB_NOTDIRTY) { 2451 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 2452 } 2453 2454 haddr = (void *)((uintptr_t)addr + entry->addend); 2455 2456 /* 2457 * Keep these two store_memop separate to ensure that the compiler 2458 * is able to fold the entire function to a single instruction. 2459 * There is a build-time assert inside to remind you of this. ;-) 2460 */ 2461 if (unlikely(need_swap)) { 2462 store_memop(haddr, val, op ^ MO_BSWAP); 2463 } else { 2464 store_memop(haddr, val, op); 2465 } 2466 return; 2467 } 2468 2469 /* Handle slow unaligned access (it spans two pages or IO). */ 2470 if (size > 1 2471 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 2472 >= TARGET_PAGE_SIZE)) { 2473 do_unaligned_access: 2474 store_helper_unaligned(env, addr, val, retaddr, size, 2475 mmu_idx, memop_big_endian(op)); 2476 return; 2477 } 2478 2479 haddr = (void *)((uintptr_t)addr + entry->addend); 2480 store_memop(haddr, val, op); 2481 } 2482 2483 void __attribute__((noinline)) 2484 helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, 2485 TCGMemOpIdx oi, uintptr_t retaddr) 2486 { 2487 store_helper(env, addr, val, oi, retaddr, MO_UB); 2488 } 2489 2490 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2491 TCGMemOpIdx oi, uintptr_t retaddr) 2492 { 2493 store_helper(env, addr, val, oi, retaddr, MO_LEUW); 2494 } 2495 2496 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2497 TCGMemOpIdx oi, uintptr_t retaddr) 2498 { 2499 store_helper(env, addr, val, oi, retaddr, MO_BEUW); 2500 } 2501 2502 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2503 TCGMemOpIdx oi, uintptr_t retaddr) 2504 { 2505 store_helper(env, addr, val, oi, retaddr, MO_LEUL); 2506 } 2507 2508 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2509 TCGMemOpIdx oi, uintptr_t retaddr) 2510 { 2511 store_helper(env, addr, val, oi, retaddr, MO_BEUL); 2512 } 2513 2514 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2515 TCGMemOpIdx oi, uintptr_t retaddr) 2516 { 2517 store_helper(env, addr, val, oi, retaddr, MO_LEQ); 2518 } 2519 2520 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2521 TCGMemOpIdx oi, uintptr_t retaddr) 2522 { 2523 store_helper(env, addr, val, oi, retaddr, MO_BEQ); 2524 } 2525 2526 /* 2527 * Store Helpers for cpu_ldst.h 2528 */ 2529 2530 static inline void QEMU_ALWAYS_INLINE 2531 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2532 int mmu_idx, uintptr_t retaddr, MemOp op) 2533 { 2534 TCGMemOpIdx oi; 2535 uint16_t meminfo; 2536 2537 meminfo = trace_mem_get_info(op, mmu_idx, true); 2538 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2539 2540 oi = make_memop_idx(op, mmu_idx); 2541 store_helper(env, addr, val, oi, retaddr, op); 2542 2543 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2544 } 2545 2546 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2547 int mmu_idx, uintptr_t retaddr) 2548 { 2549 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); 2550 } 2551 2552 void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2553 int mmu_idx, uintptr_t retaddr) 2554 { 2555 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW); 2556 } 2557 2558 void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2559 int mmu_idx, uintptr_t retaddr) 2560 { 2561 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL); 2562 } 2563 2564 void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2565 int mmu_idx, uintptr_t retaddr) 2566 { 2567 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ); 2568 } 2569 2570 void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2571 int mmu_idx, uintptr_t retaddr) 2572 { 2573 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW); 2574 } 2575 2576 void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2577 int mmu_idx, uintptr_t retaddr) 2578 { 2579 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL); 2580 } 2581 2582 void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2583 int mmu_idx, uintptr_t retaddr) 2584 { 2585 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ); 2586 } 2587 2588 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, 2589 uint32_t val, uintptr_t retaddr) 2590 { 2591 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2592 } 2593 2594 void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr, 2595 uint32_t val, uintptr_t retaddr) 2596 { 2597 cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2598 } 2599 2600 void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr, 2601 uint32_t val, uintptr_t retaddr) 2602 { 2603 cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2604 } 2605 2606 void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr, 2607 uint64_t val, uintptr_t retaddr) 2608 { 2609 cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2610 } 2611 2612 void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr, 2613 uint32_t val, uintptr_t retaddr) 2614 { 2615 cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2616 } 2617 2618 void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr, 2619 uint32_t val, uintptr_t retaddr) 2620 { 2621 cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2622 } 2623 2624 void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr, 2625 uint64_t val, uintptr_t retaddr) 2626 { 2627 cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2628 } 2629 2630 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2631 { 2632 cpu_stb_data_ra(env, ptr, val, 0); 2633 } 2634 2635 void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2636 { 2637 cpu_stw_be_data_ra(env, ptr, val, 0); 2638 } 2639 2640 void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2641 { 2642 cpu_stl_be_data_ra(env, ptr, val, 0); 2643 } 2644 2645 void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2646 { 2647 cpu_stq_be_data_ra(env, ptr, val, 0); 2648 } 2649 2650 void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2651 { 2652 cpu_stw_le_data_ra(env, ptr, val, 0); 2653 } 2654 2655 void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2656 { 2657 cpu_stl_le_data_ra(env, ptr, val, 0); 2658 } 2659 2660 void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2661 { 2662 cpu_stq_le_data_ra(env, ptr, val, 0); 2663 } 2664 2665 /* First set of helpers allows passing in of OI and RETADDR. This makes 2666 them callable from other helpers. */ 2667 2668 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 2669 #define ATOMIC_NAME(X) \ 2670 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 2671 #define ATOMIC_MMU_DECLS 2672 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) 2673 #define ATOMIC_MMU_CLEANUP 2674 #define ATOMIC_MMU_IDX get_mmuidx(oi) 2675 2676 #include "atomic_common.c.inc" 2677 2678 #define DATA_SIZE 1 2679 #include "atomic_template.h" 2680 2681 #define DATA_SIZE 2 2682 #include "atomic_template.h" 2683 2684 #define DATA_SIZE 4 2685 #include "atomic_template.h" 2686 2687 #ifdef CONFIG_ATOMIC64 2688 #define DATA_SIZE 8 2689 #include "atomic_template.h" 2690 #endif 2691 2692 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 2693 #define DATA_SIZE 16 2694 #include "atomic_template.h" 2695 #endif 2696 2697 /* Second set of helpers are directly callable from TCG as helpers. */ 2698 2699 #undef EXTRA_ARGS 2700 #undef ATOMIC_NAME 2701 #undef ATOMIC_MMU_LOOKUP 2702 #define EXTRA_ARGS , TCGMemOpIdx oi 2703 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 2704 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) 2705 2706 #define DATA_SIZE 1 2707 #include "atomic_template.h" 2708 2709 #define DATA_SIZE 2 2710 #include "atomic_template.h" 2711 2712 #define DATA_SIZE 4 2713 #include "atomic_template.h" 2714 2715 #ifdef CONFIG_ATOMIC64 2716 #define DATA_SIZE 8 2717 #include "atomic_template.h" 2718 #endif 2719 #undef ATOMIC_MMU_IDX 2720 2721 /* Code access functions. */ 2722 2723 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, 2724 TCGMemOpIdx oi, uintptr_t retaddr) 2725 { 2726 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); 2727 } 2728 2729 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) 2730 { 2731 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); 2732 return full_ldub_code(env, addr, oi, 0); 2733 } 2734 2735 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, 2736 TCGMemOpIdx oi, uintptr_t retaddr) 2737 { 2738 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); 2739 } 2740 2741 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) 2742 { 2743 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); 2744 return full_lduw_code(env, addr, oi, 0); 2745 } 2746 2747 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, 2748 TCGMemOpIdx oi, uintptr_t retaddr) 2749 { 2750 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); 2751 } 2752 2753 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) 2754 { 2755 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); 2756 return full_ldl_code(env, addr, oi, 0); 2757 } 2758 2759 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, 2760 TCGMemOpIdx oi, uintptr_t retaddr) 2761 { 2762 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code); 2763 } 2764 2765 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) 2766 { 2767 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true)); 2768 return full_ldq_code(env, addr, oi, 0); 2769 } 2770