1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/address-spaces.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/cputlb.h" 28 #include "exec/tb-hash.h" 29 #include "exec/memory-internal.h" 30 #include "exec/ram_addr.h" 31 #include "tcg/tcg.h" 32 #include "qemu/error-report.h" 33 #include "exec/log.h" 34 #include "exec/helper-proto.h" 35 #include "qemu/atomic.h" 36 #include "qemu/atomic128.h" 37 #include "exec/translate-all.h" 38 #include "trace/trace-root.h" 39 #include "trace/mem.h" 40 #include "internal.h" 41 #ifdef CONFIG_PLUGIN 42 #include "qemu/plugin-memory.h" 43 #endif 44 45 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 46 /* #define DEBUG_TLB */ 47 /* #define DEBUG_TLB_LOG */ 48 49 #ifdef DEBUG_TLB 50 # define DEBUG_TLB_GATE 1 51 # ifdef DEBUG_TLB_LOG 52 # define DEBUG_TLB_LOG_GATE 1 53 # else 54 # define DEBUG_TLB_LOG_GATE 0 55 # endif 56 #else 57 # define DEBUG_TLB_GATE 0 58 # define DEBUG_TLB_LOG_GATE 0 59 #endif 60 61 #define tlb_debug(fmt, ...) do { \ 62 if (DEBUG_TLB_LOG_GATE) { \ 63 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 64 ## __VA_ARGS__); \ 65 } else if (DEBUG_TLB_GATE) { \ 66 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 67 } \ 68 } while (0) 69 70 #define assert_cpu_is_self(cpu) do { \ 71 if (DEBUG_TLB_GATE) { \ 72 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 73 } \ 74 } while (0) 75 76 /* run_on_cpu_data.target_ptr should always be big enough for a 77 * target_ulong even on 32 bit builds */ 78 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 79 80 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 81 */ 82 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 83 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 84 85 static inline size_t tlb_n_entries(CPUTLBDescFast *fast) 86 { 87 return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1; 88 } 89 90 static inline size_t sizeof_tlb(CPUTLBDescFast *fast) 91 { 92 return fast->mask + (1 << CPU_TLB_ENTRY_BITS); 93 } 94 95 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, 96 size_t max_entries) 97 { 98 desc->window_begin_ns = ns; 99 desc->window_max_entries = max_entries; 100 } 101 102 static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) 103 { 104 unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr); 105 106 for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { 107 qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL); 108 } 109 } 110 111 static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr) 112 { 113 /* Discard jump cache entries for any tb which might potentially 114 overlap the flushed page. */ 115 tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE); 116 tb_jmp_cache_clear_page(cpu, addr); 117 } 118 119 /** 120 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 121 * @desc: The CPUTLBDesc portion of the TLB 122 * @fast: The CPUTLBDescFast portion of the same TLB 123 * 124 * Called with tlb_lock_held. 125 * 126 * We have two main constraints when resizing a TLB: (1) we only resize it 127 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 128 * the array or unnecessarily flushing it), which means we do not control how 129 * frequently the resizing can occur; (2) we don't have access to the guest's 130 * future scheduling decisions, and therefore have to decide the magnitude of 131 * the resize based on past observations. 132 * 133 * In general, a memory-hungry process can benefit greatly from an appropriately 134 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 135 * we just have to make the TLB as large as possible; while an oversized TLB 136 * results in minimal TLB miss rates, it also takes longer to be flushed 137 * (flushes can be _very_ frequent), and the reduced locality can also hurt 138 * performance. 139 * 140 * To achieve near-optimal performance for all kinds of workloads, we: 141 * 142 * 1. Aggressively increase the size of the TLB when the use rate of the 143 * TLB being flushed is high, since it is likely that in the near future this 144 * memory-hungry process will execute again, and its memory hungriness will 145 * probably be similar. 146 * 147 * 2. Slowly reduce the size of the TLB as the use rate declines over a 148 * reasonably large time window. The rationale is that if in such a time window 149 * we have not observed a high TLB use rate, it is likely that we won't observe 150 * it in the near future. In that case, once a time window expires we downsize 151 * the TLB to match the maximum use rate observed in the window. 152 * 153 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 154 * since in that range performance is likely near-optimal. Recall that the TLB 155 * is direct mapped, so we want the use rate to be low (or at least not too 156 * high), since otherwise we are likely to have a significant amount of 157 * conflict misses. 158 */ 159 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast, 160 int64_t now) 161 { 162 size_t old_size = tlb_n_entries(fast); 163 size_t rate; 164 size_t new_size = old_size; 165 int64_t window_len_ms = 100; 166 int64_t window_len_ns = window_len_ms * 1000 * 1000; 167 bool window_expired = now > desc->window_begin_ns + window_len_ns; 168 169 if (desc->n_used_entries > desc->window_max_entries) { 170 desc->window_max_entries = desc->n_used_entries; 171 } 172 rate = desc->window_max_entries * 100 / old_size; 173 174 if (rate > 70) { 175 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 176 } else if (rate < 30 && window_expired) { 177 size_t ceil = pow2ceil(desc->window_max_entries); 178 size_t expected_rate = desc->window_max_entries * 100 / ceil; 179 180 /* 181 * Avoid undersizing when the max number of entries seen is just below 182 * a pow2. For instance, if max_entries == 1025, the expected use rate 183 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 184 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 185 * later. Thus, make sure that the expected use rate remains below 70%. 186 * (and since we double the size, that means the lowest rate we'd 187 * expect to get is 35%, which is still in the 30-70% range where 188 * we consider that the size is appropriate.) 189 */ 190 if (expected_rate > 70) { 191 ceil *= 2; 192 } 193 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 194 } 195 196 if (new_size == old_size) { 197 if (window_expired) { 198 tlb_window_reset(desc, now, desc->n_used_entries); 199 } 200 return; 201 } 202 203 g_free(fast->table); 204 g_free(desc->iotlb); 205 206 tlb_window_reset(desc, now, 0); 207 /* desc->n_used_entries is cleared by the caller */ 208 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 209 fast->table = g_try_new(CPUTLBEntry, new_size); 210 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 211 212 /* 213 * If the allocations fail, try smaller sizes. We just freed some 214 * memory, so going back to half of new_size has a good chance of working. 215 * Increased memory pressure elsewhere in the system might cause the 216 * allocations to fail though, so we progressively reduce the allocation 217 * size, aborting if we cannot even allocate the smallest TLB we support. 218 */ 219 while (fast->table == NULL || desc->iotlb == NULL) { 220 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 221 error_report("%s: %s", __func__, strerror(errno)); 222 abort(); 223 } 224 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 225 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 226 227 g_free(fast->table); 228 g_free(desc->iotlb); 229 fast->table = g_try_new(CPUTLBEntry, new_size); 230 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 231 } 232 } 233 234 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) 235 { 236 desc->n_used_entries = 0; 237 desc->large_page_addr = -1; 238 desc->large_page_mask = -1; 239 desc->vindex = 0; 240 memset(fast->table, -1, sizeof_tlb(fast)); 241 memset(desc->vtable, -1, sizeof(desc->vtable)); 242 } 243 244 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx, 245 int64_t now) 246 { 247 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; 248 CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx]; 249 250 tlb_mmu_resize_locked(desc, fast, now); 251 tlb_mmu_flush_locked(desc, fast); 252 } 253 254 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now) 255 { 256 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 257 258 tlb_window_reset(desc, now, 0); 259 desc->n_used_entries = 0; 260 fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 261 fast->table = g_new(CPUTLBEntry, n_entries); 262 desc->iotlb = g_new(CPUIOTLBEntry, n_entries); 263 tlb_mmu_flush_locked(desc, fast); 264 } 265 266 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 267 { 268 env_tlb(env)->d[mmu_idx].n_used_entries++; 269 } 270 271 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 272 { 273 env_tlb(env)->d[mmu_idx].n_used_entries--; 274 } 275 276 void tlb_init(CPUState *cpu) 277 { 278 CPUArchState *env = cpu->env_ptr; 279 int64_t now = get_clock_realtime(); 280 int i; 281 282 qemu_spin_init(&env_tlb(env)->c.lock); 283 284 /* All tlbs are initialized flushed. */ 285 env_tlb(env)->c.dirty = 0; 286 287 for (i = 0; i < NB_MMU_MODES; i++) { 288 tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now); 289 } 290 } 291 292 void tlb_destroy(CPUState *cpu) 293 { 294 CPUArchState *env = cpu->env_ptr; 295 int i; 296 297 qemu_spin_destroy(&env_tlb(env)->c.lock); 298 for (i = 0; i < NB_MMU_MODES; i++) { 299 CPUTLBDesc *desc = &env_tlb(env)->d[i]; 300 CPUTLBDescFast *fast = &env_tlb(env)->f[i]; 301 302 g_free(fast->table); 303 g_free(desc->iotlb); 304 } 305 } 306 307 /* flush_all_helper: run fn across all cpus 308 * 309 * If the wait flag is set then the src cpu's helper will be queued as 310 * "safe" work and the loop exited creating a synchronisation point 311 * where all queued work will be finished before execution starts 312 * again. 313 */ 314 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 315 run_on_cpu_data d) 316 { 317 CPUState *cpu; 318 319 CPU_FOREACH(cpu) { 320 if (cpu != src) { 321 async_run_on_cpu(cpu, fn, d); 322 } 323 } 324 } 325 326 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 327 { 328 CPUState *cpu; 329 size_t full = 0, part = 0, elide = 0; 330 331 CPU_FOREACH(cpu) { 332 CPUArchState *env = cpu->env_ptr; 333 334 full += qatomic_read(&env_tlb(env)->c.full_flush_count); 335 part += qatomic_read(&env_tlb(env)->c.part_flush_count); 336 elide += qatomic_read(&env_tlb(env)->c.elide_flush_count); 337 } 338 *pfull = full; 339 *ppart = part; 340 *pelide = elide; 341 } 342 343 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 344 { 345 CPUArchState *env = cpu->env_ptr; 346 uint16_t asked = data.host_int; 347 uint16_t all_dirty, work, to_clean; 348 int64_t now = get_clock_realtime(); 349 350 assert_cpu_is_self(cpu); 351 352 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 353 354 qemu_spin_lock(&env_tlb(env)->c.lock); 355 356 all_dirty = env_tlb(env)->c.dirty; 357 to_clean = asked & all_dirty; 358 all_dirty &= ~to_clean; 359 env_tlb(env)->c.dirty = all_dirty; 360 361 for (work = to_clean; work != 0; work &= work - 1) { 362 int mmu_idx = ctz32(work); 363 tlb_flush_one_mmuidx_locked(env, mmu_idx, now); 364 } 365 366 qemu_spin_unlock(&env_tlb(env)->c.lock); 367 368 cpu_tb_jmp_cache_clear(cpu); 369 370 if (to_clean == ALL_MMUIDX_BITS) { 371 qatomic_set(&env_tlb(env)->c.full_flush_count, 372 env_tlb(env)->c.full_flush_count + 1); 373 } else { 374 qatomic_set(&env_tlb(env)->c.part_flush_count, 375 env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); 376 if (to_clean != asked) { 377 qatomic_set(&env_tlb(env)->c.elide_flush_count, 378 env_tlb(env)->c.elide_flush_count + 379 ctpop16(asked & ~to_clean)); 380 } 381 } 382 } 383 384 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 385 { 386 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 387 388 if (cpu->created && !qemu_cpu_is_self(cpu)) { 389 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 390 RUN_ON_CPU_HOST_INT(idxmap)); 391 } else { 392 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 393 } 394 } 395 396 void tlb_flush(CPUState *cpu) 397 { 398 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 399 } 400 401 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 402 { 403 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 404 405 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 406 407 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 408 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 409 } 410 411 void tlb_flush_all_cpus(CPUState *src_cpu) 412 { 413 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 414 } 415 416 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 417 { 418 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 419 420 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 421 422 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 423 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 424 } 425 426 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 427 { 428 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 429 } 430 431 static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry, 432 target_ulong page, target_ulong mask) 433 { 434 page &= mask; 435 mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK; 436 437 return (page == (tlb_entry->addr_read & mask) || 438 page == (tlb_addr_write(tlb_entry) & mask) || 439 page == (tlb_entry->addr_code & mask)); 440 } 441 442 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 443 target_ulong page) 444 { 445 return tlb_hit_page_mask_anyprot(tlb_entry, page, -1); 446 } 447 448 /** 449 * tlb_entry_is_empty - return true if the entry is not in use 450 * @te: pointer to CPUTLBEntry 451 */ 452 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 453 { 454 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 455 } 456 457 /* Called with tlb_c.lock held */ 458 static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry, 459 target_ulong page, 460 target_ulong mask) 461 { 462 if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) { 463 memset(tlb_entry, -1, sizeof(*tlb_entry)); 464 return true; 465 } 466 return false; 467 } 468 469 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 470 target_ulong page) 471 { 472 return tlb_flush_entry_mask_locked(tlb_entry, page, -1); 473 } 474 475 /* Called with tlb_c.lock held */ 476 static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx, 477 target_ulong page, 478 target_ulong mask) 479 { 480 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; 481 int k; 482 483 assert_cpu_is_self(env_cpu(env)); 484 for (k = 0; k < CPU_VTLB_SIZE; k++) { 485 if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) { 486 tlb_n_used_entries_dec(env, mmu_idx); 487 } 488 } 489 } 490 491 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 492 target_ulong page) 493 { 494 tlb_flush_vtlb_page_mask_locked(env, mmu_idx, page, -1); 495 } 496 497 static void tlb_flush_page_locked(CPUArchState *env, int midx, 498 target_ulong page) 499 { 500 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; 501 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; 502 503 /* Check if we need to flush due to large pages. */ 504 if ((page & lp_mask) == lp_addr) { 505 tlb_debug("forcing full flush midx %d (" 506 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 507 midx, lp_addr, lp_mask); 508 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); 509 } else { 510 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 511 tlb_n_used_entries_dec(env, midx); 512 } 513 tlb_flush_vtlb_page_locked(env, midx, page); 514 } 515 } 516 517 /** 518 * tlb_flush_page_by_mmuidx_async_0: 519 * @cpu: cpu on which to flush 520 * @addr: page of virtual address to flush 521 * @idxmap: set of mmu_idx to flush 522 * 523 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page 524 * at @addr from the tlbs indicated by @idxmap from @cpu. 525 */ 526 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, 527 target_ulong addr, 528 uint16_t idxmap) 529 { 530 CPUArchState *env = cpu->env_ptr; 531 int mmu_idx; 532 533 assert_cpu_is_self(cpu); 534 535 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); 536 537 qemu_spin_lock(&env_tlb(env)->c.lock); 538 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 539 if ((idxmap >> mmu_idx) & 1) { 540 tlb_flush_page_locked(env, mmu_idx, addr); 541 } 542 } 543 qemu_spin_unlock(&env_tlb(env)->c.lock); 544 545 tb_flush_jmp_cache(cpu, addr); 546 } 547 548 /** 549 * tlb_flush_page_by_mmuidx_async_1: 550 * @cpu: cpu on which to flush 551 * @data: encoded addr + idxmap 552 * 553 * Helper for tlb_flush_page_by_mmuidx and friends, called through 554 * async_run_on_cpu. The idxmap parameter is encoded in the page 555 * offset of the target_ptr field. This limits the set of mmu_idx 556 * that can be passed via this method. 557 */ 558 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, 559 run_on_cpu_data data) 560 { 561 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; 562 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; 563 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; 564 565 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 566 } 567 568 typedef struct { 569 target_ulong addr; 570 uint16_t idxmap; 571 } TLBFlushPageByMMUIdxData; 572 573 /** 574 * tlb_flush_page_by_mmuidx_async_2: 575 * @cpu: cpu on which to flush 576 * @data: allocated addr + idxmap 577 * 578 * Helper for tlb_flush_page_by_mmuidx and friends, called through 579 * async_run_on_cpu. The addr+idxmap parameters are stored in a 580 * TLBFlushPageByMMUIdxData structure that has been allocated 581 * specifically for this helper. Free the structure when done. 582 */ 583 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, 584 run_on_cpu_data data) 585 { 586 TLBFlushPageByMMUIdxData *d = data.host_ptr; 587 588 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap); 589 g_free(d); 590 } 591 592 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 593 { 594 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 595 596 /* This should already be page aligned */ 597 addr &= TARGET_PAGE_MASK; 598 599 if (qemu_cpu_is_self(cpu)) { 600 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 601 } else if (idxmap < TARGET_PAGE_SIZE) { 602 /* 603 * Most targets have only a few mmu_idx. In the case where 604 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid 605 * allocating memory for this operation. 606 */ 607 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1, 608 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 609 } else { 610 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1); 611 612 /* Otherwise allocate a structure, freed by the worker. */ 613 d->addr = addr; 614 d->idxmap = idxmap; 615 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2, 616 RUN_ON_CPU_HOST_PTR(d)); 617 } 618 } 619 620 void tlb_flush_page(CPUState *cpu, target_ulong addr) 621 { 622 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 623 } 624 625 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 626 uint16_t idxmap) 627 { 628 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 629 630 /* This should already be page aligned */ 631 addr &= TARGET_PAGE_MASK; 632 633 /* 634 * Allocate memory to hold addr+idxmap only when needed. 635 * See tlb_flush_page_by_mmuidx for details. 636 */ 637 if (idxmap < TARGET_PAGE_SIZE) { 638 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 639 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 640 } else { 641 CPUState *dst_cpu; 642 643 /* Allocate a separate data block for each destination cpu. */ 644 CPU_FOREACH(dst_cpu) { 645 if (dst_cpu != src_cpu) { 646 TLBFlushPageByMMUIdxData *d 647 = g_new(TLBFlushPageByMMUIdxData, 1); 648 649 d->addr = addr; 650 d->idxmap = idxmap; 651 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 652 RUN_ON_CPU_HOST_PTR(d)); 653 } 654 } 655 } 656 657 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); 658 } 659 660 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 661 { 662 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 663 } 664 665 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 666 target_ulong addr, 667 uint16_t idxmap) 668 { 669 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 670 671 /* This should already be page aligned */ 672 addr &= TARGET_PAGE_MASK; 673 674 /* 675 * Allocate memory to hold addr+idxmap only when needed. 676 * See tlb_flush_page_by_mmuidx for details. 677 */ 678 if (idxmap < TARGET_PAGE_SIZE) { 679 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 680 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 681 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1, 682 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 683 } else { 684 CPUState *dst_cpu; 685 TLBFlushPageByMMUIdxData *d; 686 687 /* Allocate a separate data block for each destination cpu. */ 688 CPU_FOREACH(dst_cpu) { 689 if (dst_cpu != src_cpu) { 690 d = g_new(TLBFlushPageByMMUIdxData, 1); 691 d->addr = addr; 692 d->idxmap = idxmap; 693 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 694 RUN_ON_CPU_HOST_PTR(d)); 695 } 696 } 697 698 d = g_new(TLBFlushPageByMMUIdxData, 1); 699 d->addr = addr; 700 d->idxmap = idxmap; 701 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2, 702 RUN_ON_CPU_HOST_PTR(d)); 703 } 704 } 705 706 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 707 { 708 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 709 } 710 711 static void tlb_flush_page_bits_locked(CPUArchState *env, int midx, 712 target_ulong page, unsigned bits) 713 { 714 CPUTLBDesc *d = &env_tlb(env)->d[midx]; 715 CPUTLBDescFast *f = &env_tlb(env)->f[midx]; 716 target_ulong mask = MAKE_64BIT_MASK(0, bits); 717 718 /* 719 * If @bits is smaller than the tlb size, there may be multiple entries 720 * within the TLB; otherwise all addresses that match under @mask hit 721 * the same TLB entry. 722 * 723 * TODO: Perhaps allow bits to be a few bits less than the size. 724 * For now, just flush the entire TLB. 725 */ 726 if (mask < f->mask) { 727 tlb_debug("forcing full flush midx %d (" 728 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 729 midx, page, mask); 730 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); 731 return; 732 } 733 734 /* Check if we need to flush due to large pages. */ 735 if ((page & d->large_page_mask) == d->large_page_addr) { 736 tlb_debug("forcing full flush midx %d (" 737 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 738 midx, d->large_page_addr, d->large_page_mask); 739 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); 740 return; 741 } 742 743 if (tlb_flush_entry_mask_locked(tlb_entry(env, midx, page), page, mask)) { 744 tlb_n_used_entries_dec(env, midx); 745 } 746 tlb_flush_vtlb_page_mask_locked(env, midx, page, mask); 747 } 748 749 typedef struct { 750 target_ulong addr; 751 uint16_t idxmap; 752 uint16_t bits; 753 } TLBFlushPageBitsByMMUIdxData; 754 755 static void 756 tlb_flush_page_bits_by_mmuidx_async_0(CPUState *cpu, 757 TLBFlushPageBitsByMMUIdxData d) 758 { 759 CPUArchState *env = cpu->env_ptr; 760 int mmu_idx; 761 762 assert_cpu_is_self(cpu); 763 764 tlb_debug("page addr:" TARGET_FMT_lx "/%u mmu_map:0x%x\n", 765 d.addr, d.bits, d.idxmap); 766 767 qemu_spin_lock(&env_tlb(env)->c.lock); 768 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 769 if ((d.idxmap >> mmu_idx) & 1) { 770 tlb_flush_page_bits_locked(env, mmu_idx, d.addr, d.bits); 771 } 772 } 773 qemu_spin_unlock(&env_tlb(env)->c.lock); 774 775 tb_flush_jmp_cache(cpu, d.addr); 776 } 777 778 static bool encode_pbm_to_runon(run_on_cpu_data *out, 779 TLBFlushPageBitsByMMUIdxData d) 780 { 781 /* We need 6 bits to hold to hold @bits up to 63. */ 782 if (d.idxmap <= MAKE_64BIT_MASK(0, TARGET_PAGE_BITS - 6)) { 783 *out = RUN_ON_CPU_TARGET_PTR(d.addr | (d.idxmap << 6) | d.bits); 784 return true; 785 } 786 return false; 787 } 788 789 static TLBFlushPageBitsByMMUIdxData 790 decode_runon_to_pbm(run_on_cpu_data data) 791 { 792 target_ulong addr_map_bits = (target_ulong) data.target_ptr; 793 return (TLBFlushPageBitsByMMUIdxData){ 794 .addr = addr_map_bits & TARGET_PAGE_MASK, 795 .idxmap = (addr_map_bits & ~TARGET_PAGE_MASK) >> 6, 796 .bits = addr_map_bits & 0x3f 797 }; 798 } 799 800 static void tlb_flush_page_bits_by_mmuidx_async_1(CPUState *cpu, 801 run_on_cpu_data runon) 802 { 803 tlb_flush_page_bits_by_mmuidx_async_0(cpu, decode_runon_to_pbm(runon)); 804 } 805 806 static void tlb_flush_page_bits_by_mmuidx_async_2(CPUState *cpu, 807 run_on_cpu_data data) 808 { 809 TLBFlushPageBitsByMMUIdxData *d = data.host_ptr; 810 tlb_flush_page_bits_by_mmuidx_async_0(cpu, *d); 811 g_free(d); 812 } 813 814 void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, 815 uint16_t idxmap, unsigned bits) 816 { 817 TLBFlushPageBitsByMMUIdxData d; 818 run_on_cpu_data runon; 819 820 /* If all bits are significant, this devolves to tlb_flush_page. */ 821 if (bits >= TARGET_LONG_BITS) { 822 tlb_flush_page_by_mmuidx(cpu, addr, idxmap); 823 return; 824 } 825 /* If no page bits are significant, this devolves to tlb_flush. */ 826 if (bits < TARGET_PAGE_BITS) { 827 tlb_flush_by_mmuidx(cpu, idxmap); 828 return; 829 } 830 831 /* This should already be page aligned */ 832 d.addr = addr & TARGET_PAGE_MASK; 833 d.idxmap = idxmap; 834 d.bits = bits; 835 836 if (qemu_cpu_is_self(cpu)) { 837 tlb_flush_page_bits_by_mmuidx_async_0(cpu, d); 838 } else if (encode_pbm_to_runon(&runon, d)) { 839 async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); 840 } else { 841 TLBFlushPageBitsByMMUIdxData *p 842 = g_new(TLBFlushPageBitsByMMUIdxData, 1); 843 844 /* Otherwise allocate a structure, freed by the worker. */ 845 *p = d; 846 async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_2, 847 RUN_ON_CPU_HOST_PTR(p)); 848 } 849 } 850 851 void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu, 852 target_ulong addr, 853 uint16_t idxmap, 854 unsigned bits) 855 { 856 TLBFlushPageBitsByMMUIdxData d; 857 run_on_cpu_data runon; 858 859 /* If all bits are significant, this devolves to tlb_flush_page. */ 860 if (bits >= TARGET_LONG_BITS) { 861 tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap); 862 return; 863 } 864 /* If no page bits are significant, this devolves to tlb_flush. */ 865 if (bits < TARGET_PAGE_BITS) { 866 tlb_flush_by_mmuidx_all_cpus(src_cpu, idxmap); 867 return; 868 } 869 870 /* This should already be page aligned */ 871 d.addr = addr & TARGET_PAGE_MASK; 872 d.idxmap = idxmap; 873 d.bits = bits; 874 875 if (encode_pbm_to_runon(&runon, d)) { 876 flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); 877 } else { 878 CPUState *dst_cpu; 879 TLBFlushPageBitsByMMUIdxData *p; 880 881 /* Allocate a separate data block for each destination cpu. */ 882 CPU_FOREACH(dst_cpu) { 883 if (dst_cpu != src_cpu) { 884 p = g_new(TLBFlushPageBitsByMMUIdxData, 1); 885 *p = d; 886 async_run_on_cpu(dst_cpu, 887 tlb_flush_page_bits_by_mmuidx_async_2, 888 RUN_ON_CPU_HOST_PTR(p)); 889 } 890 } 891 } 892 893 tlb_flush_page_bits_by_mmuidx_async_0(src_cpu, d); 894 } 895 896 void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 897 target_ulong addr, 898 uint16_t idxmap, 899 unsigned bits) 900 { 901 TLBFlushPageBitsByMMUIdxData d; 902 run_on_cpu_data runon; 903 904 /* If all bits are significant, this devolves to tlb_flush_page. */ 905 if (bits >= TARGET_LONG_BITS) { 906 tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap); 907 return; 908 } 909 /* If no page bits are significant, this devolves to tlb_flush. */ 910 if (bits < TARGET_PAGE_BITS) { 911 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap); 912 return; 913 } 914 915 /* This should already be page aligned */ 916 d.addr = addr & TARGET_PAGE_MASK; 917 d.idxmap = idxmap; 918 d.bits = bits; 919 920 if (encode_pbm_to_runon(&runon, d)) { 921 flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); 922 async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, 923 runon); 924 } else { 925 CPUState *dst_cpu; 926 TLBFlushPageBitsByMMUIdxData *p; 927 928 /* Allocate a separate data block for each destination cpu. */ 929 CPU_FOREACH(dst_cpu) { 930 if (dst_cpu != src_cpu) { 931 p = g_new(TLBFlushPageBitsByMMUIdxData, 1); 932 *p = d; 933 async_run_on_cpu(dst_cpu, tlb_flush_page_bits_by_mmuidx_async_2, 934 RUN_ON_CPU_HOST_PTR(p)); 935 } 936 } 937 938 p = g_new(TLBFlushPageBitsByMMUIdxData, 1); 939 *p = d; 940 async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_2, 941 RUN_ON_CPU_HOST_PTR(p)); 942 } 943 } 944 945 /* update the TLBs so that writes to code in the virtual page 'addr' 946 can be detected */ 947 void tlb_protect_code(ram_addr_t ram_addr) 948 { 949 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 950 DIRTY_MEMORY_CODE); 951 } 952 953 /* update the TLB so that writes in physical page 'phys_addr' are no longer 954 tested for self modifying code */ 955 void tlb_unprotect_code(ram_addr_t ram_addr) 956 { 957 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 958 } 959 960 961 /* 962 * Dirty write flag handling 963 * 964 * When the TCG code writes to a location it looks up the address in 965 * the TLB and uses that data to compute the final address. If any of 966 * the lower bits of the address are set then the slow path is forced. 967 * There are a number of reasons to do this but for normal RAM the 968 * most usual is detecting writes to code regions which may invalidate 969 * generated code. 970 * 971 * Other vCPUs might be reading their TLBs during guest execution, so we update 972 * te->addr_write with qatomic_set. We don't need to worry about this for 973 * oversized guests as MTTCG is disabled for them. 974 * 975 * Called with tlb_c.lock held. 976 */ 977 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 978 uintptr_t start, uintptr_t length) 979 { 980 uintptr_t addr = tlb_entry->addr_write; 981 982 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | 983 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { 984 addr &= TARGET_PAGE_MASK; 985 addr += tlb_entry->addend; 986 if ((addr - start) < length) { 987 #if TCG_OVERSIZED_GUEST 988 tlb_entry->addr_write |= TLB_NOTDIRTY; 989 #else 990 qatomic_set(&tlb_entry->addr_write, 991 tlb_entry->addr_write | TLB_NOTDIRTY); 992 #endif 993 } 994 } 995 } 996 997 /* 998 * Called with tlb_c.lock held. 999 * Called only from the vCPU context, i.e. the TLB's owner thread. 1000 */ 1001 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 1002 { 1003 *d = *s; 1004 } 1005 1006 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 1007 * the target vCPU). 1008 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 1009 * thing actually updated is the target TLB entry ->addr_write flags. 1010 */ 1011 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 1012 { 1013 CPUArchState *env; 1014 1015 int mmu_idx; 1016 1017 env = cpu->env_ptr; 1018 qemu_spin_lock(&env_tlb(env)->c.lock); 1019 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 1020 unsigned int i; 1021 unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); 1022 1023 for (i = 0; i < n; i++) { 1024 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], 1025 start1, length); 1026 } 1027 1028 for (i = 0; i < CPU_VTLB_SIZE; i++) { 1029 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], 1030 start1, length); 1031 } 1032 } 1033 qemu_spin_unlock(&env_tlb(env)->c.lock); 1034 } 1035 1036 /* Called with tlb_c.lock held */ 1037 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 1038 target_ulong vaddr) 1039 { 1040 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 1041 tlb_entry->addr_write = vaddr; 1042 } 1043 } 1044 1045 /* update the TLB corresponding to virtual page vaddr 1046 so that it is no longer dirty */ 1047 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 1048 { 1049 CPUArchState *env = cpu->env_ptr; 1050 int mmu_idx; 1051 1052 assert_cpu_is_self(cpu); 1053 1054 vaddr &= TARGET_PAGE_MASK; 1055 qemu_spin_lock(&env_tlb(env)->c.lock); 1056 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 1057 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 1058 } 1059 1060 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 1061 int k; 1062 for (k = 0; k < CPU_VTLB_SIZE; k++) { 1063 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); 1064 } 1065 } 1066 qemu_spin_unlock(&env_tlb(env)->c.lock); 1067 } 1068 1069 /* Our TLB does not support large pages, so remember the area covered by 1070 large pages and trigger a full TLB flush if these are invalidated. */ 1071 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 1072 target_ulong vaddr, target_ulong size) 1073 { 1074 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; 1075 target_ulong lp_mask = ~(size - 1); 1076 1077 if (lp_addr == (target_ulong)-1) { 1078 /* No previous large page. */ 1079 lp_addr = vaddr; 1080 } else { 1081 /* Extend the existing region to include the new page. 1082 This is a compromise between unnecessary flushes and 1083 the cost of maintaining a full variable size TLB. */ 1084 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; 1085 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 1086 lp_mask <<= 1; 1087 } 1088 } 1089 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; 1090 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; 1091 } 1092 1093 /* Add a new TLB entry. At most one entry for a given virtual address 1094 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 1095 * supplied size is only used by tlb_flush_page. 1096 * 1097 * Called from TCG-generated code, which is under an RCU read-side 1098 * critical section. 1099 */ 1100 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 1101 hwaddr paddr, MemTxAttrs attrs, int prot, 1102 int mmu_idx, target_ulong size) 1103 { 1104 CPUArchState *env = cpu->env_ptr; 1105 CPUTLB *tlb = env_tlb(env); 1106 CPUTLBDesc *desc = &tlb->d[mmu_idx]; 1107 MemoryRegionSection *section; 1108 unsigned int index; 1109 target_ulong address; 1110 target_ulong write_address; 1111 uintptr_t addend; 1112 CPUTLBEntry *te, tn; 1113 hwaddr iotlb, xlat, sz, paddr_page; 1114 target_ulong vaddr_page; 1115 int asidx = cpu_asidx_from_attrs(cpu, attrs); 1116 int wp_flags; 1117 bool is_ram, is_romd; 1118 1119 assert_cpu_is_self(cpu); 1120 1121 if (size <= TARGET_PAGE_SIZE) { 1122 sz = TARGET_PAGE_SIZE; 1123 } else { 1124 tlb_add_large_page(env, mmu_idx, vaddr, size); 1125 sz = size; 1126 } 1127 vaddr_page = vaddr & TARGET_PAGE_MASK; 1128 paddr_page = paddr & TARGET_PAGE_MASK; 1129 1130 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 1131 &xlat, &sz, attrs, &prot); 1132 assert(sz >= TARGET_PAGE_SIZE); 1133 1134 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 1135 " prot=%x idx=%d\n", 1136 vaddr, paddr, prot, mmu_idx); 1137 1138 address = vaddr_page; 1139 if (size < TARGET_PAGE_SIZE) { 1140 /* Repeat the MMU check and TLB fill on every access. */ 1141 address |= TLB_INVALID_MASK; 1142 } 1143 if (attrs.byte_swap) { 1144 address |= TLB_BSWAP; 1145 } 1146 1147 is_ram = memory_region_is_ram(section->mr); 1148 is_romd = memory_region_is_romd(section->mr); 1149 1150 if (is_ram || is_romd) { 1151 /* RAM and ROMD both have associated host memory. */ 1152 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 1153 } else { 1154 /* I/O does not; force the host address to NULL. */ 1155 addend = 0; 1156 } 1157 1158 write_address = address; 1159 if (is_ram) { 1160 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 1161 /* 1162 * Computing is_clean is expensive; avoid all that unless 1163 * the page is actually writable. 1164 */ 1165 if (prot & PAGE_WRITE) { 1166 if (section->readonly) { 1167 write_address |= TLB_DISCARD_WRITE; 1168 } else if (cpu_physical_memory_is_clean(iotlb)) { 1169 write_address |= TLB_NOTDIRTY; 1170 } 1171 } 1172 } else { 1173 /* I/O or ROMD */ 1174 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; 1175 /* 1176 * Writes to romd devices must go through MMIO to enable write. 1177 * Reads to romd devices go through the ram_ptr found above, 1178 * but of course reads to I/O must go through MMIO. 1179 */ 1180 write_address |= TLB_MMIO; 1181 if (!is_romd) { 1182 address = write_address; 1183 } 1184 } 1185 1186 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, 1187 TARGET_PAGE_SIZE); 1188 1189 index = tlb_index(env, mmu_idx, vaddr_page); 1190 te = tlb_entry(env, mmu_idx, vaddr_page); 1191 1192 /* 1193 * Hold the TLB lock for the rest of the function. We could acquire/release 1194 * the lock several times in the function, but it is faster to amortize the 1195 * acquisition cost by acquiring it just once. Note that this leads to 1196 * a longer critical section, but this is not a concern since the TLB lock 1197 * is unlikely to be contended. 1198 */ 1199 qemu_spin_lock(&tlb->c.lock); 1200 1201 /* Note that the tlb is no longer clean. */ 1202 tlb->c.dirty |= 1 << mmu_idx; 1203 1204 /* Make sure there's no cached translation for the new page. */ 1205 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 1206 1207 /* 1208 * Only evict the old entry to the victim tlb if it's for a 1209 * different page; otherwise just overwrite the stale data. 1210 */ 1211 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 1212 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; 1213 CPUTLBEntry *tv = &desc->vtable[vidx]; 1214 1215 /* Evict the old entry into the victim tlb. */ 1216 copy_tlb_helper_locked(tv, te); 1217 desc->viotlb[vidx] = desc->iotlb[index]; 1218 tlb_n_used_entries_dec(env, mmu_idx); 1219 } 1220 1221 /* refill the tlb */ 1222 /* 1223 * At this point iotlb contains a physical section number in the lower 1224 * TARGET_PAGE_BITS, and either 1225 * + the ram_addr_t of the page base of the target RAM (RAM) 1226 * + the offset within section->mr of the page base (I/O, ROMD) 1227 * We subtract the vaddr_page (which is page aligned and thus won't 1228 * disturb the low bits) to give an offset which can be added to the 1229 * (non-page-aligned) vaddr of the eventual memory access to get 1230 * the MemoryRegion offset for the access. Note that the vaddr we 1231 * subtract here is that of the page base, and not the same as the 1232 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 1233 */ 1234 desc->iotlb[index].addr = iotlb - vaddr_page; 1235 desc->iotlb[index].attrs = attrs; 1236 1237 /* Now calculate the new entry */ 1238 tn.addend = addend - vaddr_page; 1239 if (prot & PAGE_READ) { 1240 tn.addr_read = address; 1241 if (wp_flags & BP_MEM_READ) { 1242 tn.addr_read |= TLB_WATCHPOINT; 1243 } 1244 } else { 1245 tn.addr_read = -1; 1246 } 1247 1248 if (prot & PAGE_EXEC) { 1249 tn.addr_code = address; 1250 } else { 1251 tn.addr_code = -1; 1252 } 1253 1254 tn.addr_write = -1; 1255 if (prot & PAGE_WRITE) { 1256 tn.addr_write = write_address; 1257 if (prot & PAGE_WRITE_INV) { 1258 tn.addr_write |= TLB_INVALID_MASK; 1259 } 1260 if (wp_flags & BP_MEM_WRITE) { 1261 tn.addr_write |= TLB_WATCHPOINT; 1262 } 1263 } 1264 1265 copy_tlb_helper_locked(te, &tn); 1266 tlb_n_used_entries_inc(env, mmu_idx); 1267 qemu_spin_unlock(&tlb->c.lock); 1268 } 1269 1270 /* Add a new TLB entry, but without specifying the memory 1271 * transaction attributes to be used. 1272 */ 1273 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 1274 hwaddr paddr, int prot, 1275 int mmu_idx, target_ulong size) 1276 { 1277 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 1278 prot, mmu_idx, size); 1279 } 1280 1281 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 1282 { 1283 ram_addr_t ram_addr; 1284 1285 ram_addr = qemu_ram_addr_from_host(ptr); 1286 if (ram_addr == RAM_ADDR_INVALID) { 1287 error_report("Bad ram pointer %p", ptr); 1288 abort(); 1289 } 1290 return ram_addr; 1291 } 1292 1293 /* 1294 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the 1295 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must 1296 * be discarded and looked up again (e.g. via tlb_entry()). 1297 */ 1298 static void tlb_fill(CPUState *cpu, target_ulong addr, int size, 1299 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1300 { 1301 CPUClass *cc = CPU_GET_CLASS(cpu); 1302 bool ok; 1303 1304 /* 1305 * This is not a probe, so only valid return is success; failure 1306 * should result in exception + longjmp to the cpu loop. 1307 */ 1308 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr); 1309 assert(ok); 1310 } 1311 1312 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1313 int mmu_idx, target_ulong addr, uintptr_t retaddr, 1314 MMUAccessType access_type, MemOp op) 1315 { 1316 CPUState *cpu = env_cpu(env); 1317 hwaddr mr_offset; 1318 MemoryRegionSection *section; 1319 MemoryRegion *mr; 1320 uint64_t val; 1321 bool locked = false; 1322 MemTxResult r; 1323 1324 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1325 mr = section->mr; 1326 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1327 cpu->mem_io_pc = retaddr; 1328 if (!cpu->can_do_io) { 1329 cpu_io_recompile(cpu, retaddr); 1330 } 1331 1332 if (!qemu_mutex_iothread_locked()) { 1333 qemu_mutex_lock_iothread(); 1334 locked = true; 1335 } 1336 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); 1337 if (r != MEMTX_OK) { 1338 hwaddr physaddr = mr_offset + 1339 section->offset_within_address_space - 1340 section->offset_within_region; 1341 1342 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, 1343 mmu_idx, iotlbentry->attrs, r, retaddr); 1344 } 1345 if (locked) { 1346 qemu_mutex_unlock_iothread(); 1347 } 1348 1349 return val; 1350 } 1351 1352 /* 1353 * Save a potentially trashed IOTLB entry for later lookup by plugin. 1354 * This is read by tlb_plugin_lookup if the iotlb entry doesn't match 1355 * because of the side effect of io_writex changing memory layout. 1356 */ 1357 static void save_iotlb_data(CPUState *cs, hwaddr addr, 1358 MemoryRegionSection *section, hwaddr mr_offset) 1359 { 1360 #ifdef CONFIG_PLUGIN 1361 SavedIOTLB *saved = &cs->saved_iotlb; 1362 saved->addr = addr; 1363 saved->section = section; 1364 saved->mr_offset = mr_offset; 1365 #endif 1366 } 1367 1368 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1369 int mmu_idx, uint64_t val, target_ulong addr, 1370 uintptr_t retaddr, MemOp op) 1371 { 1372 CPUState *cpu = env_cpu(env); 1373 hwaddr mr_offset; 1374 MemoryRegionSection *section; 1375 MemoryRegion *mr; 1376 bool locked = false; 1377 MemTxResult r; 1378 1379 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1380 mr = section->mr; 1381 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1382 if (!cpu->can_do_io) { 1383 cpu_io_recompile(cpu, retaddr); 1384 } 1385 cpu->mem_io_pc = retaddr; 1386 1387 /* 1388 * The memory_region_dispatch may trigger a flush/resize 1389 * so for plugins we save the iotlb_data just in case. 1390 */ 1391 save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset); 1392 1393 if (!qemu_mutex_iothread_locked()) { 1394 qemu_mutex_lock_iothread(); 1395 locked = true; 1396 } 1397 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); 1398 if (r != MEMTX_OK) { 1399 hwaddr physaddr = mr_offset + 1400 section->offset_within_address_space - 1401 section->offset_within_region; 1402 1403 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), 1404 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, 1405 retaddr); 1406 } 1407 if (locked) { 1408 qemu_mutex_unlock_iothread(); 1409 } 1410 } 1411 1412 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) 1413 { 1414 #if TCG_OVERSIZED_GUEST 1415 return *(target_ulong *)((uintptr_t)entry + ofs); 1416 #else 1417 /* ofs might correspond to .addr_write, so use qatomic_read */ 1418 return qatomic_read((target_ulong *)((uintptr_t)entry + ofs)); 1419 #endif 1420 } 1421 1422 /* Return true if ADDR is present in the victim tlb, and has been copied 1423 back to the main tlb. */ 1424 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1425 size_t elt_ofs, target_ulong page) 1426 { 1427 size_t vidx; 1428 1429 assert_cpu_is_self(env_cpu(env)); 1430 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1431 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; 1432 target_ulong cmp; 1433 1434 /* elt_ofs might correspond to .addr_write, so use qatomic_read */ 1435 #if TCG_OVERSIZED_GUEST 1436 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1437 #else 1438 cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1439 #endif 1440 1441 if (cmp == page) { 1442 /* Found entry in victim tlb, swap tlb and iotlb. */ 1443 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index]; 1444 1445 qemu_spin_lock(&env_tlb(env)->c.lock); 1446 copy_tlb_helper_locked(&tmptlb, tlb); 1447 copy_tlb_helper_locked(tlb, vtlb); 1448 copy_tlb_helper_locked(vtlb, &tmptlb); 1449 qemu_spin_unlock(&env_tlb(env)->c.lock); 1450 1451 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1452 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; 1453 tmpio = *io; *io = *vio; *vio = tmpio; 1454 return true; 1455 } 1456 } 1457 return false; 1458 } 1459 1460 /* Macro to call the above, with local variables from the use context. */ 1461 #define VICTIM_TLB_HIT(TY, ADDR) \ 1462 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1463 (ADDR) & TARGET_PAGE_MASK) 1464 1465 /* 1466 * Return a ram_addr_t for the virtual address for execution. 1467 * 1468 * Return -1 if we can't translate and execute from an entire page 1469 * of RAM. This will force us to execute by loading and translating 1470 * one insn at a time, without caching. 1471 * 1472 * NOTE: This function will trigger an exception if the page is 1473 * not executable. 1474 */ 1475 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, 1476 void **hostp) 1477 { 1478 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1479 uintptr_t index = tlb_index(env, mmu_idx, addr); 1480 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1481 void *p; 1482 1483 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1484 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1485 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1486 index = tlb_index(env, mmu_idx, addr); 1487 entry = tlb_entry(env, mmu_idx, addr); 1488 1489 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { 1490 /* 1491 * The MMU protection covers a smaller range than a target 1492 * page, so we must redo the MMU check for every insn. 1493 */ 1494 return -1; 1495 } 1496 } 1497 assert(tlb_hit(entry->addr_code, addr)); 1498 } 1499 1500 if (unlikely(entry->addr_code & TLB_MMIO)) { 1501 /* The region is not backed by RAM. */ 1502 if (hostp) { 1503 *hostp = NULL; 1504 } 1505 return -1; 1506 } 1507 1508 p = (void *)((uintptr_t)addr + entry->addend); 1509 if (hostp) { 1510 *hostp = p; 1511 } 1512 return qemu_ram_addr_from_host_nofail(p); 1513 } 1514 1515 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1516 { 1517 return get_page_addr_code_hostp(env, addr, NULL); 1518 } 1519 1520 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, 1521 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) 1522 { 1523 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; 1524 1525 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); 1526 1527 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 1528 struct page_collection *pages 1529 = page_collection_lock(ram_addr, ram_addr + size); 1530 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); 1531 page_collection_unlock(pages); 1532 } 1533 1534 /* 1535 * Set both VGA and migration bits for simplicity and to remove 1536 * the notdirty callback faster. 1537 */ 1538 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); 1539 1540 /* We remove the notdirty callback only if the code has been flushed. */ 1541 if (!cpu_physical_memory_is_clean(ram_addr)) { 1542 trace_memory_notdirty_set_dirty(mem_vaddr); 1543 tlb_set_dirty(cpu, mem_vaddr); 1544 } 1545 } 1546 1547 static int probe_access_internal(CPUArchState *env, target_ulong addr, 1548 int fault_size, MMUAccessType access_type, 1549 int mmu_idx, bool nonfault, 1550 void **phost, uintptr_t retaddr) 1551 { 1552 uintptr_t index = tlb_index(env, mmu_idx, addr); 1553 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1554 target_ulong tlb_addr, page_addr; 1555 size_t elt_ofs; 1556 int flags; 1557 1558 switch (access_type) { 1559 case MMU_DATA_LOAD: 1560 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1561 break; 1562 case MMU_DATA_STORE: 1563 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1564 break; 1565 case MMU_INST_FETCH: 1566 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1567 break; 1568 default: 1569 g_assert_not_reached(); 1570 } 1571 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1572 1573 page_addr = addr & TARGET_PAGE_MASK; 1574 if (!tlb_hit_page(tlb_addr, page_addr)) { 1575 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) { 1576 CPUState *cs = env_cpu(env); 1577 CPUClass *cc = CPU_GET_CLASS(cs); 1578 1579 if (!cc->tlb_fill(cs, addr, fault_size, access_type, 1580 mmu_idx, nonfault, retaddr)) { 1581 /* Non-faulting page table read failed. */ 1582 *phost = NULL; 1583 return TLB_INVALID_MASK; 1584 } 1585 1586 /* TLB resize via tlb_fill may have moved the entry. */ 1587 entry = tlb_entry(env, mmu_idx, addr); 1588 } 1589 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1590 } 1591 flags = tlb_addr & TLB_FLAGS_MASK; 1592 1593 /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */ 1594 if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) { 1595 *phost = NULL; 1596 return TLB_MMIO; 1597 } 1598 1599 /* Everything else is RAM. */ 1600 *phost = (void *)((uintptr_t)addr + entry->addend); 1601 return flags; 1602 } 1603 1604 int probe_access_flags(CPUArchState *env, target_ulong addr, 1605 MMUAccessType access_type, int mmu_idx, 1606 bool nonfault, void **phost, uintptr_t retaddr) 1607 { 1608 int flags; 1609 1610 flags = probe_access_internal(env, addr, 0, access_type, mmu_idx, 1611 nonfault, phost, retaddr); 1612 1613 /* Handle clean RAM pages. */ 1614 if (unlikely(flags & TLB_NOTDIRTY)) { 1615 uintptr_t index = tlb_index(env, mmu_idx, addr); 1616 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1617 1618 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); 1619 flags &= ~TLB_NOTDIRTY; 1620 } 1621 1622 return flags; 1623 } 1624 1625 void *probe_access(CPUArchState *env, target_ulong addr, int size, 1626 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1627 { 1628 void *host; 1629 int flags; 1630 1631 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 1632 1633 flags = probe_access_internal(env, addr, size, access_type, mmu_idx, 1634 false, &host, retaddr); 1635 1636 /* Per the interface, size == 0 merely faults the access. */ 1637 if (size == 0) { 1638 return NULL; 1639 } 1640 1641 if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) { 1642 uintptr_t index = tlb_index(env, mmu_idx, addr); 1643 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1644 1645 /* Handle watchpoints. */ 1646 if (flags & TLB_WATCHPOINT) { 1647 int wp_access = (access_type == MMU_DATA_STORE 1648 ? BP_MEM_WRITE : BP_MEM_READ); 1649 cpu_check_watchpoint(env_cpu(env), addr, size, 1650 iotlbentry->attrs, wp_access, retaddr); 1651 } 1652 1653 /* Handle clean RAM pages. */ 1654 if (flags & TLB_NOTDIRTY) { 1655 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); 1656 } 1657 } 1658 1659 return host; 1660 } 1661 1662 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, 1663 MMUAccessType access_type, int mmu_idx) 1664 { 1665 void *host; 1666 int flags; 1667 1668 flags = probe_access_internal(env, addr, 0, access_type, 1669 mmu_idx, true, &host, 0); 1670 1671 /* No combination of flags are expected by the caller. */ 1672 return flags ? NULL : host; 1673 } 1674 1675 #ifdef CONFIG_PLUGIN 1676 /* 1677 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. 1678 * This should be a hot path as we will have just looked this path up 1679 * in the softmmu lookup code (or helper). We don't handle re-fills or 1680 * checking the victim table. This is purely informational. 1681 * 1682 * This almost never fails as the memory access being instrumented 1683 * should have just filled the TLB. The one corner case is io_writex 1684 * which can cause TLB flushes and potential resizing of the TLBs 1685 * losing the information we need. In those cases we need to recover 1686 * data from a copy of the iotlbentry. As long as this always occurs 1687 * from the same thread (which a mem callback will be) this is safe. 1688 */ 1689 1690 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, 1691 bool is_store, struct qemu_plugin_hwaddr *data) 1692 { 1693 CPUArchState *env = cpu->env_ptr; 1694 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1695 uintptr_t index = tlb_index(env, mmu_idx, addr); 1696 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; 1697 1698 if (likely(tlb_hit(tlb_addr, addr))) { 1699 /* We must have an iotlb entry for MMIO */ 1700 if (tlb_addr & TLB_MMIO) { 1701 CPUIOTLBEntry *iotlbentry; 1702 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1703 data->is_io = true; 1704 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1705 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1706 } else { 1707 data->is_io = false; 1708 data->v.ram.hostaddr = addr + tlbe->addend; 1709 } 1710 return true; 1711 } else { 1712 SavedIOTLB *saved = &cpu->saved_iotlb; 1713 data->is_io = true; 1714 data->v.io.section = saved->section; 1715 data->v.io.offset = saved->mr_offset; 1716 return true; 1717 } 1718 } 1719 1720 #endif 1721 1722 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1723 * operations, or io operations to proceed. Return the host address. */ 1724 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1725 TCGMemOpIdx oi, uintptr_t retaddr) 1726 { 1727 size_t mmu_idx = get_mmuidx(oi); 1728 uintptr_t index = tlb_index(env, mmu_idx, addr); 1729 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1730 target_ulong tlb_addr = tlb_addr_write(tlbe); 1731 MemOp mop = get_memop(oi); 1732 int a_bits = get_alignment_bits(mop); 1733 int s_bits = mop & MO_SIZE; 1734 void *hostaddr; 1735 1736 /* Adjust the given return address. */ 1737 retaddr -= GETPC_ADJ; 1738 1739 /* Enforce guest required alignment. */ 1740 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1741 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1742 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1743 mmu_idx, retaddr); 1744 } 1745 1746 /* Enforce qemu required alignment. */ 1747 if (unlikely(addr & ((1 << s_bits) - 1))) { 1748 /* We get here if guest alignment was not requested, 1749 or was not enforced by cpu_unaligned_access above. 1750 We might widen the access and emulate, but for now 1751 mark an exception and exit the cpu loop. */ 1752 goto stop_the_world; 1753 } 1754 1755 /* Check TLB entry and enforce page permissions. */ 1756 if (!tlb_hit(tlb_addr, addr)) { 1757 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1758 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, 1759 mmu_idx, retaddr); 1760 index = tlb_index(env, mmu_idx, addr); 1761 tlbe = tlb_entry(env, mmu_idx, addr); 1762 } 1763 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1764 } 1765 1766 /* Notice an IO access or a needs-MMU-lookup access */ 1767 if (unlikely(tlb_addr & TLB_MMIO)) { 1768 /* There's really nothing that can be done to 1769 support this apart from stop-the-world. */ 1770 goto stop_the_world; 1771 } 1772 1773 /* Let the guest notice RMW on a write-only page. */ 1774 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1775 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1776 mmu_idx, retaddr); 1777 /* Since we don't support reads and writes to different addresses, 1778 and we do have the proper page loaded for write, this shouldn't 1779 ever return. But just in case, handle via stop-the-world. */ 1780 goto stop_the_world; 1781 } 1782 1783 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1784 1785 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1786 notdirty_write(env_cpu(env), addr, 1 << s_bits, 1787 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); 1788 } 1789 1790 return hostaddr; 1791 1792 stop_the_world: 1793 cpu_loop_exit_atomic(env_cpu(env), retaddr); 1794 } 1795 1796 /* 1797 * Load Helpers 1798 * 1799 * We support two different access types. SOFTMMU_CODE_ACCESS is 1800 * specifically for reading instructions from system memory. It is 1801 * called by the translation loop and in some helpers where the code 1802 * is disassembled. It shouldn't be called directly by guest code. 1803 */ 1804 1805 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, 1806 TCGMemOpIdx oi, uintptr_t retaddr); 1807 1808 static inline uint64_t QEMU_ALWAYS_INLINE 1809 load_memop(const void *haddr, MemOp op) 1810 { 1811 switch (op) { 1812 case MO_UB: 1813 return ldub_p(haddr); 1814 case MO_BEUW: 1815 return lduw_be_p(haddr); 1816 case MO_LEUW: 1817 return lduw_le_p(haddr); 1818 case MO_BEUL: 1819 return (uint32_t)ldl_be_p(haddr); 1820 case MO_LEUL: 1821 return (uint32_t)ldl_le_p(haddr); 1822 case MO_BEQ: 1823 return ldq_be_p(haddr); 1824 case MO_LEQ: 1825 return ldq_le_p(haddr); 1826 default: 1827 qemu_build_not_reached(); 1828 } 1829 } 1830 1831 static inline uint64_t QEMU_ALWAYS_INLINE 1832 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, 1833 uintptr_t retaddr, MemOp op, bool code_read, 1834 FullLoadHelper *full_load) 1835 { 1836 uintptr_t mmu_idx = get_mmuidx(oi); 1837 uintptr_t index = tlb_index(env, mmu_idx, addr); 1838 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1839 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1840 const size_t tlb_off = code_read ? 1841 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); 1842 const MMUAccessType access_type = 1843 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; 1844 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1845 void *haddr; 1846 uint64_t res; 1847 size_t size = memop_size(op); 1848 1849 /* Handle CPU specific unaligned behaviour */ 1850 if (addr & ((1 << a_bits) - 1)) { 1851 cpu_unaligned_access(env_cpu(env), addr, access_type, 1852 mmu_idx, retaddr); 1853 } 1854 1855 /* If the TLB entry is for a different page, reload and try again. */ 1856 if (!tlb_hit(tlb_addr, addr)) { 1857 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1858 addr & TARGET_PAGE_MASK)) { 1859 tlb_fill(env_cpu(env), addr, size, 1860 access_type, mmu_idx, retaddr); 1861 index = tlb_index(env, mmu_idx, addr); 1862 entry = tlb_entry(env, mmu_idx, addr); 1863 } 1864 tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1865 tlb_addr &= ~TLB_INVALID_MASK; 1866 } 1867 1868 /* Handle anything that isn't just a straight memory access. */ 1869 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1870 CPUIOTLBEntry *iotlbentry; 1871 bool need_swap; 1872 1873 /* For anything that is unaligned, recurse through full_load. */ 1874 if ((addr & (size - 1)) != 0) { 1875 goto do_unaligned_access; 1876 } 1877 1878 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1879 1880 /* Handle watchpoints. */ 1881 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1882 /* On watchpoint hit, this will longjmp out. */ 1883 cpu_check_watchpoint(env_cpu(env), addr, size, 1884 iotlbentry->attrs, BP_MEM_READ, retaddr); 1885 } 1886 1887 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1888 1889 /* Handle I/O access. */ 1890 if (likely(tlb_addr & TLB_MMIO)) { 1891 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, 1892 access_type, op ^ (need_swap * MO_BSWAP)); 1893 } 1894 1895 haddr = (void *)((uintptr_t)addr + entry->addend); 1896 1897 /* 1898 * Keep these two load_memop separate to ensure that the compiler 1899 * is able to fold the entire function to a single instruction. 1900 * There is a build-time assert inside to remind you of this. ;-) 1901 */ 1902 if (unlikely(need_swap)) { 1903 return load_memop(haddr, op ^ MO_BSWAP); 1904 } 1905 return load_memop(haddr, op); 1906 } 1907 1908 /* Handle slow unaligned access (it spans two pages or IO). */ 1909 if (size > 1 1910 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1911 >= TARGET_PAGE_SIZE)) { 1912 target_ulong addr1, addr2; 1913 uint64_t r1, r2; 1914 unsigned shift; 1915 do_unaligned_access: 1916 addr1 = addr & ~((target_ulong)size - 1); 1917 addr2 = addr1 + size; 1918 r1 = full_load(env, addr1, oi, retaddr); 1919 r2 = full_load(env, addr2, oi, retaddr); 1920 shift = (addr & (size - 1)) * 8; 1921 1922 if (memop_big_endian(op)) { 1923 /* Big-endian combine. */ 1924 res = (r1 << shift) | (r2 >> ((size * 8) - shift)); 1925 } else { 1926 /* Little-endian combine. */ 1927 res = (r1 >> shift) | (r2 << ((size * 8) - shift)); 1928 } 1929 return res & MAKE_64BIT_MASK(0, size * 8); 1930 } 1931 1932 haddr = (void *)((uintptr_t)addr + entry->addend); 1933 return load_memop(haddr, op); 1934 } 1935 1936 /* 1937 * For the benefit of TCG generated code, we want to avoid the 1938 * complication of ABI-specific return type promotion and always 1939 * return a value extended to the register size of the host. This is 1940 * tcg_target_long, except in the case of a 32-bit host and 64-bit 1941 * data, and for that we always have uint64_t. 1942 * 1943 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. 1944 */ 1945 1946 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, 1947 TCGMemOpIdx oi, uintptr_t retaddr) 1948 { 1949 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); 1950 } 1951 1952 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, 1953 TCGMemOpIdx oi, uintptr_t retaddr) 1954 { 1955 return full_ldub_mmu(env, addr, oi, retaddr); 1956 } 1957 1958 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1959 TCGMemOpIdx oi, uintptr_t retaddr) 1960 { 1961 return load_helper(env, addr, oi, retaddr, MO_LEUW, false, 1962 full_le_lduw_mmu); 1963 } 1964 1965 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1966 TCGMemOpIdx oi, uintptr_t retaddr) 1967 { 1968 return full_le_lduw_mmu(env, addr, oi, retaddr); 1969 } 1970 1971 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1972 TCGMemOpIdx oi, uintptr_t retaddr) 1973 { 1974 return load_helper(env, addr, oi, retaddr, MO_BEUW, false, 1975 full_be_lduw_mmu); 1976 } 1977 1978 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1979 TCGMemOpIdx oi, uintptr_t retaddr) 1980 { 1981 return full_be_lduw_mmu(env, addr, oi, retaddr); 1982 } 1983 1984 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1985 TCGMemOpIdx oi, uintptr_t retaddr) 1986 { 1987 return load_helper(env, addr, oi, retaddr, MO_LEUL, false, 1988 full_le_ldul_mmu); 1989 } 1990 1991 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1992 TCGMemOpIdx oi, uintptr_t retaddr) 1993 { 1994 return full_le_ldul_mmu(env, addr, oi, retaddr); 1995 } 1996 1997 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1998 TCGMemOpIdx oi, uintptr_t retaddr) 1999 { 2000 return load_helper(env, addr, oi, retaddr, MO_BEUL, false, 2001 full_be_ldul_mmu); 2002 } 2003 2004 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, 2005 TCGMemOpIdx oi, uintptr_t retaddr) 2006 { 2007 return full_be_ldul_mmu(env, addr, oi, retaddr); 2008 } 2009 2010 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, 2011 TCGMemOpIdx oi, uintptr_t retaddr) 2012 { 2013 return load_helper(env, addr, oi, retaddr, MO_LEQ, false, 2014 helper_le_ldq_mmu); 2015 } 2016 2017 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, 2018 TCGMemOpIdx oi, uintptr_t retaddr) 2019 { 2020 return load_helper(env, addr, oi, retaddr, MO_BEQ, false, 2021 helper_be_ldq_mmu); 2022 } 2023 2024 /* 2025 * Provide signed versions of the load routines as well. We can of course 2026 * avoid this for 64-bit data, or for 32-bit data on 32-bit host. 2027 */ 2028 2029 2030 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, 2031 TCGMemOpIdx oi, uintptr_t retaddr) 2032 { 2033 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); 2034 } 2035 2036 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, 2037 TCGMemOpIdx oi, uintptr_t retaddr) 2038 { 2039 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); 2040 } 2041 2042 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, 2043 TCGMemOpIdx oi, uintptr_t retaddr) 2044 { 2045 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); 2046 } 2047 2048 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, 2049 TCGMemOpIdx oi, uintptr_t retaddr) 2050 { 2051 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); 2052 } 2053 2054 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, 2055 TCGMemOpIdx oi, uintptr_t retaddr) 2056 { 2057 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); 2058 } 2059 2060 /* 2061 * Load helpers for cpu_ldst.h. 2062 */ 2063 2064 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, 2065 int mmu_idx, uintptr_t retaddr, 2066 MemOp op, FullLoadHelper *full_load) 2067 { 2068 uint16_t meminfo; 2069 TCGMemOpIdx oi; 2070 uint64_t ret; 2071 2072 meminfo = trace_mem_get_info(op, mmu_idx, false); 2073 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2074 2075 op &= ~MO_SIGN; 2076 oi = make_memop_idx(op, mmu_idx); 2077 ret = full_load(env, addr, oi, retaddr); 2078 2079 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2080 2081 return ret; 2082 } 2083 2084 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2085 int mmu_idx, uintptr_t ra) 2086 { 2087 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu); 2088 } 2089 2090 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2091 int mmu_idx, uintptr_t ra) 2092 { 2093 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB, 2094 full_ldub_mmu); 2095 } 2096 2097 uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2098 int mmu_idx, uintptr_t ra) 2099 { 2100 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu); 2101 } 2102 2103 int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2104 int mmu_idx, uintptr_t ra) 2105 { 2106 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW, 2107 full_be_lduw_mmu); 2108 } 2109 2110 uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2111 int mmu_idx, uintptr_t ra) 2112 { 2113 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu); 2114 } 2115 2116 uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2117 int mmu_idx, uintptr_t ra) 2118 { 2119 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu); 2120 } 2121 2122 uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2123 int mmu_idx, uintptr_t ra) 2124 { 2125 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu); 2126 } 2127 2128 int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2129 int mmu_idx, uintptr_t ra) 2130 { 2131 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW, 2132 full_le_lduw_mmu); 2133 } 2134 2135 uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2136 int mmu_idx, uintptr_t ra) 2137 { 2138 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu); 2139 } 2140 2141 uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2142 int mmu_idx, uintptr_t ra) 2143 { 2144 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu); 2145 } 2146 2147 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, 2148 uintptr_t retaddr) 2149 { 2150 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2151 } 2152 2153 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 2154 { 2155 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2156 } 2157 2158 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr, 2159 uintptr_t retaddr) 2160 { 2161 return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2162 } 2163 2164 int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 2165 { 2166 return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2167 } 2168 2169 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr, 2170 uintptr_t retaddr) 2171 { 2172 return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2173 } 2174 2175 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr, 2176 uintptr_t retaddr) 2177 { 2178 return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2179 } 2180 2181 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr, 2182 uintptr_t retaddr) 2183 { 2184 return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2185 } 2186 2187 int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 2188 { 2189 return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2190 } 2191 2192 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr, 2193 uintptr_t retaddr) 2194 { 2195 return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2196 } 2197 2198 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr, 2199 uintptr_t retaddr) 2200 { 2201 return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2202 } 2203 2204 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) 2205 { 2206 return cpu_ldub_data_ra(env, ptr, 0); 2207 } 2208 2209 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) 2210 { 2211 return cpu_ldsb_data_ra(env, ptr, 0); 2212 } 2213 2214 uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr) 2215 { 2216 return cpu_lduw_be_data_ra(env, ptr, 0); 2217 } 2218 2219 int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr) 2220 { 2221 return cpu_ldsw_be_data_ra(env, ptr, 0); 2222 } 2223 2224 uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr) 2225 { 2226 return cpu_ldl_be_data_ra(env, ptr, 0); 2227 } 2228 2229 uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr) 2230 { 2231 return cpu_ldq_be_data_ra(env, ptr, 0); 2232 } 2233 2234 uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr) 2235 { 2236 return cpu_lduw_le_data_ra(env, ptr, 0); 2237 } 2238 2239 int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr) 2240 { 2241 return cpu_ldsw_le_data_ra(env, ptr, 0); 2242 } 2243 2244 uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr) 2245 { 2246 return cpu_ldl_le_data_ra(env, ptr, 0); 2247 } 2248 2249 uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr) 2250 { 2251 return cpu_ldq_le_data_ra(env, ptr, 0); 2252 } 2253 2254 /* 2255 * Store Helpers 2256 */ 2257 2258 static inline void QEMU_ALWAYS_INLINE 2259 store_memop(void *haddr, uint64_t val, MemOp op) 2260 { 2261 switch (op) { 2262 case MO_UB: 2263 stb_p(haddr, val); 2264 break; 2265 case MO_BEUW: 2266 stw_be_p(haddr, val); 2267 break; 2268 case MO_LEUW: 2269 stw_le_p(haddr, val); 2270 break; 2271 case MO_BEUL: 2272 stl_be_p(haddr, val); 2273 break; 2274 case MO_LEUL: 2275 stl_le_p(haddr, val); 2276 break; 2277 case MO_BEQ: 2278 stq_be_p(haddr, val); 2279 break; 2280 case MO_LEQ: 2281 stq_le_p(haddr, val); 2282 break; 2283 default: 2284 qemu_build_not_reached(); 2285 } 2286 } 2287 2288 static void __attribute__((noinline)) 2289 store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val, 2290 uintptr_t retaddr, size_t size, uintptr_t mmu_idx, 2291 bool big_endian) 2292 { 2293 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 2294 uintptr_t index, index2; 2295 CPUTLBEntry *entry, *entry2; 2296 target_ulong page2, tlb_addr, tlb_addr2; 2297 TCGMemOpIdx oi; 2298 size_t size2; 2299 int i; 2300 2301 /* 2302 * Ensure the second page is in the TLB. Note that the first page 2303 * is already guaranteed to be filled, and that the second page 2304 * cannot evict the first. 2305 */ 2306 page2 = (addr + size) & TARGET_PAGE_MASK; 2307 size2 = (addr + size) & ~TARGET_PAGE_MASK; 2308 index2 = tlb_index(env, mmu_idx, page2); 2309 entry2 = tlb_entry(env, mmu_idx, page2); 2310 2311 tlb_addr2 = tlb_addr_write(entry2); 2312 if (!tlb_hit_page(tlb_addr2, page2)) { 2313 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { 2314 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, 2315 mmu_idx, retaddr); 2316 index2 = tlb_index(env, mmu_idx, page2); 2317 entry2 = tlb_entry(env, mmu_idx, page2); 2318 } 2319 tlb_addr2 = tlb_addr_write(entry2); 2320 } 2321 2322 index = tlb_index(env, mmu_idx, addr); 2323 entry = tlb_entry(env, mmu_idx, addr); 2324 tlb_addr = tlb_addr_write(entry); 2325 2326 /* 2327 * Handle watchpoints. Since this may trap, all checks 2328 * must happen before any store. 2329 */ 2330 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2331 cpu_check_watchpoint(env_cpu(env), addr, size - size2, 2332 env_tlb(env)->d[mmu_idx].iotlb[index].attrs, 2333 BP_MEM_WRITE, retaddr); 2334 } 2335 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { 2336 cpu_check_watchpoint(env_cpu(env), page2, size2, 2337 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, 2338 BP_MEM_WRITE, retaddr); 2339 } 2340 2341 /* 2342 * XXX: not efficient, but simple. 2343 * This loop must go in the forward direction to avoid issues 2344 * with self-modifying code in Windows 64-bit. 2345 */ 2346 oi = make_memop_idx(MO_UB, mmu_idx); 2347 if (big_endian) { 2348 for (i = 0; i < size; ++i) { 2349 /* Big-endian extract. */ 2350 uint8_t val8 = val >> (((size - 1) * 8) - (i * 8)); 2351 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2352 } 2353 } else { 2354 for (i = 0; i < size; ++i) { 2355 /* Little-endian extract. */ 2356 uint8_t val8 = val >> (i * 8); 2357 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2358 } 2359 } 2360 } 2361 2362 static inline void QEMU_ALWAYS_INLINE 2363 store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2364 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) 2365 { 2366 uintptr_t mmu_idx = get_mmuidx(oi); 2367 uintptr_t index = tlb_index(env, mmu_idx, addr); 2368 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 2369 target_ulong tlb_addr = tlb_addr_write(entry); 2370 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 2371 unsigned a_bits = get_alignment_bits(get_memop(oi)); 2372 void *haddr; 2373 size_t size = memop_size(op); 2374 2375 /* Handle CPU specific unaligned behaviour */ 2376 if (addr & ((1 << a_bits) - 1)) { 2377 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 2378 mmu_idx, retaddr); 2379 } 2380 2381 /* If the TLB entry is for a different page, reload and try again. */ 2382 if (!tlb_hit(tlb_addr, addr)) { 2383 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 2384 addr & TARGET_PAGE_MASK)) { 2385 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, 2386 mmu_idx, retaddr); 2387 index = tlb_index(env, mmu_idx, addr); 2388 entry = tlb_entry(env, mmu_idx, addr); 2389 } 2390 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; 2391 } 2392 2393 /* Handle anything that isn't just a straight memory access. */ 2394 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 2395 CPUIOTLBEntry *iotlbentry; 2396 bool need_swap; 2397 2398 /* For anything that is unaligned, recurse through byte stores. */ 2399 if ((addr & (size - 1)) != 0) { 2400 goto do_unaligned_access; 2401 } 2402 2403 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 2404 2405 /* Handle watchpoints. */ 2406 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2407 /* On watchpoint hit, this will longjmp out. */ 2408 cpu_check_watchpoint(env_cpu(env), addr, size, 2409 iotlbentry->attrs, BP_MEM_WRITE, retaddr); 2410 } 2411 2412 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 2413 2414 /* Handle I/O access. */ 2415 if (tlb_addr & TLB_MMIO) { 2416 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, 2417 op ^ (need_swap * MO_BSWAP)); 2418 return; 2419 } 2420 2421 /* Ignore writes to ROM. */ 2422 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { 2423 return; 2424 } 2425 2426 /* Handle clean RAM pages. */ 2427 if (tlb_addr & TLB_NOTDIRTY) { 2428 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 2429 } 2430 2431 haddr = (void *)((uintptr_t)addr + entry->addend); 2432 2433 /* 2434 * Keep these two store_memop separate to ensure that the compiler 2435 * is able to fold the entire function to a single instruction. 2436 * There is a build-time assert inside to remind you of this. ;-) 2437 */ 2438 if (unlikely(need_swap)) { 2439 store_memop(haddr, val, op ^ MO_BSWAP); 2440 } else { 2441 store_memop(haddr, val, op); 2442 } 2443 return; 2444 } 2445 2446 /* Handle slow unaligned access (it spans two pages or IO). */ 2447 if (size > 1 2448 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 2449 >= TARGET_PAGE_SIZE)) { 2450 do_unaligned_access: 2451 store_helper_unaligned(env, addr, val, retaddr, size, 2452 mmu_idx, memop_big_endian(op)); 2453 return; 2454 } 2455 2456 haddr = (void *)((uintptr_t)addr + entry->addend); 2457 store_memop(haddr, val, op); 2458 } 2459 2460 void __attribute__((noinline)) 2461 helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, 2462 TCGMemOpIdx oi, uintptr_t retaddr) 2463 { 2464 store_helper(env, addr, val, oi, retaddr, MO_UB); 2465 } 2466 2467 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2468 TCGMemOpIdx oi, uintptr_t retaddr) 2469 { 2470 store_helper(env, addr, val, oi, retaddr, MO_LEUW); 2471 } 2472 2473 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2474 TCGMemOpIdx oi, uintptr_t retaddr) 2475 { 2476 store_helper(env, addr, val, oi, retaddr, MO_BEUW); 2477 } 2478 2479 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2480 TCGMemOpIdx oi, uintptr_t retaddr) 2481 { 2482 store_helper(env, addr, val, oi, retaddr, MO_LEUL); 2483 } 2484 2485 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2486 TCGMemOpIdx oi, uintptr_t retaddr) 2487 { 2488 store_helper(env, addr, val, oi, retaddr, MO_BEUL); 2489 } 2490 2491 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2492 TCGMemOpIdx oi, uintptr_t retaddr) 2493 { 2494 store_helper(env, addr, val, oi, retaddr, MO_LEQ); 2495 } 2496 2497 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2498 TCGMemOpIdx oi, uintptr_t retaddr) 2499 { 2500 store_helper(env, addr, val, oi, retaddr, MO_BEQ); 2501 } 2502 2503 /* 2504 * Store Helpers for cpu_ldst.h 2505 */ 2506 2507 static inline void QEMU_ALWAYS_INLINE 2508 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2509 int mmu_idx, uintptr_t retaddr, MemOp op) 2510 { 2511 TCGMemOpIdx oi; 2512 uint16_t meminfo; 2513 2514 meminfo = trace_mem_get_info(op, mmu_idx, true); 2515 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2516 2517 oi = make_memop_idx(op, mmu_idx); 2518 store_helper(env, addr, val, oi, retaddr, op); 2519 2520 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2521 } 2522 2523 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2524 int mmu_idx, uintptr_t retaddr) 2525 { 2526 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); 2527 } 2528 2529 void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2530 int mmu_idx, uintptr_t retaddr) 2531 { 2532 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW); 2533 } 2534 2535 void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2536 int mmu_idx, uintptr_t retaddr) 2537 { 2538 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL); 2539 } 2540 2541 void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2542 int mmu_idx, uintptr_t retaddr) 2543 { 2544 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ); 2545 } 2546 2547 void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2548 int mmu_idx, uintptr_t retaddr) 2549 { 2550 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW); 2551 } 2552 2553 void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2554 int mmu_idx, uintptr_t retaddr) 2555 { 2556 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL); 2557 } 2558 2559 void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2560 int mmu_idx, uintptr_t retaddr) 2561 { 2562 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ); 2563 } 2564 2565 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, 2566 uint32_t val, uintptr_t retaddr) 2567 { 2568 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2569 } 2570 2571 void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr, 2572 uint32_t val, uintptr_t retaddr) 2573 { 2574 cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2575 } 2576 2577 void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr, 2578 uint32_t val, uintptr_t retaddr) 2579 { 2580 cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2581 } 2582 2583 void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr, 2584 uint64_t val, uintptr_t retaddr) 2585 { 2586 cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2587 } 2588 2589 void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr, 2590 uint32_t val, uintptr_t retaddr) 2591 { 2592 cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2593 } 2594 2595 void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr, 2596 uint32_t val, uintptr_t retaddr) 2597 { 2598 cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2599 } 2600 2601 void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr, 2602 uint64_t val, uintptr_t retaddr) 2603 { 2604 cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2605 } 2606 2607 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2608 { 2609 cpu_stb_data_ra(env, ptr, val, 0); 2610 } 2611 2612 void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2613 { 2614 cpu_stw_be_data_ra(env, ptr, val, 0); 2615 } 2616 2617 void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2618 { 2619 cpu_stl_be_data_ra(env, ptr, val, 0); 2620 } 2621 2622 void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2623 { 2624 cpu_stq_be_data_ra(env, ptr, val, 0); 2625 } 2626 2627 void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2628 { 2629 cpu_stw_le_data_ra(env, ptr, val, 0); 2630 } 2631 2632 void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2633 { 2634 cpu_stl_le_data_ra(env, ptr, val, 0); 2635 } 2636 2637 void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2638 { 2639 cpu_stq_le_data_ra(env, ptr, val, 0); 2640 } 2641 2642 /* First set of helpers allows passing in of OI and RETADDR. This makes 2643 them callable from other helpers. */ 2644 2645 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 2646 #define ATOMIC_NAME(X) \ 2647 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 2648 #define ATOMIC_MMU_DECLS 2649 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) 2650 #define ATOMIC_MMU_CLEANUP 2651 #define ATOMIC_MMU_IDX get_mmuidx(oi) 2652 2653 #include "atomic_common.c.inc" 2654 2655 #define DATA_SIZE 1 2656 #include "atomic_template.h" 2657 2658 #define DATA_SIZE 2 2659 #include "atomic_template.h" 2660 2661 #define DATA_SIZE 4 2662 #include "atomic_template.h" 2663 2664 #ifdef CONFIG_ATOMIC64 2665 #define DATA_SIZE 8 2666 #include "atomic_template.h" 2667 #endif 2668 2669 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 2670 #define DATA_SIZE 16 2671 #include "atomic_template.h" 2672 #endif 2673 2674 /* Second set of helpers are directly callable from TCG as helpers. */ 2675 2676 #undef EXTRA_ARGS 2677 #undef ATOMIC_NAME 2678 #undef ATOMIC_MMU_LOOKUP 2679 #define EXTRA_ARGS , TCGMemOpIdx oi 2680 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 2681 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) 2682 2683 #define DATA_SIZE 1 2684 #include "atomic_template.h" 2685 2686 #define DATA_SIZE 2 2687 #include "atomic_template.h" 2688 2689 #define DATA_SIZE 4 2690 #include "atomic_template.h" 2691 2692 #ifdef CONFIG_ATOMIC64 2693 #define DATA_SIZE 8 2694 #include "atomic_template.h" 2695 #endif 2696 #undef ATOMIC_MMU_IDX 2697 2698 /* Code access functions. */ 2699 2700 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, 2701 TCGMemOpIdx oi, uintptr_t retaddr) 2702 { 2703 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); 2704 } 2705 2706 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) 2707 { 2708 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); 2709 return full_ldub_code(env, addr, oi, 0); 2710 } 2711 2712 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, 2713 TCGMemOpIdx oi, uintptr_t retaddr) 2714 { 2715 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); 2716 } 2717 2718 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) 2719 { 2720 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); 2721 return full_lduw_code(env, addr, oi, 0); 2722 } 2723 2724 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, 2725 TCGMemOpIdx oi, uintptr_t retaddr) 2726 { 2727 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); 2728 } 2729 2730 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) 2731 { 2732 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); 2733 return full_ldl_code(env, addr, oi, 0); 2734 } 2735 2736 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, 2737 TCGMemOpIdx oi, uintptr_t retaddr) 2738 { 2739 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code); 2740 } 2741 2742 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) 2743 { 2744 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true)); 2745 return full_ldq_code(env, addr, oi, 0); 2746 } 2747