1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "hw/core/tcg-cpu-ops.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/cputlb.h" 27 #include "exec/tb-hash.h" 28 #include "exec/memory-internal.h" 29 #include "exec/ram_addr.h" 30 #include "tcg/tcg.h" 31 #include "qemu/error-report.h" 32 #include "exec/log.h" 33 #include "exec/helper-proto.h" 34 #include "qemu/atomic.h" 35 #include "qemu/atomic128.h" 36 #include "exec/translate-all.h" 37 #include "trace/trace-root.h" 38 #include "trace/mem.h" 39 #include "internal.h" 40 #ifdef CONFIG_PLUGIN 41 #include "qemu/plugin-memory.h" 42 #endif 43 44 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 45 /* #define DEBUG_TLB */ 46 /* #define DEBUG_TLB_LOG */ 47 48 #ifdef DEBUG_TLB 49 # define DEBUG_TLB_GATE 1 50 # ifdef DEBUG_TLB_LOG 51 # define DEBUG_TLB_LOG_GATE 1 52 # else 53 # define DEBUG_TLB_LOG_GATE 0 54 # endif 55 #else 56 # define DEBUG_TLB_GATE 0 57 # define DEBUG_TLB_LOG_GATE 0 58 #endif 59 60 #define tlb_debug(fmt, ...) do { \ 61 if (DEBUG_TLB_LOG_GATE) { \ 62 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 63 ## __VA_ARGS__); \ 64 } else if (DEBUG_TLB_GATE) { \ 65 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 66 } \ 67 } while (0) 68 69 #define assert_cpu_is_self(cpu) do { \ 70 if (DEBUG_TLB_GATE) { \ 71 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 72 } \ 73 } while (0) 74 75 /* run_on_cpu_data.target_ptr should always be big enough for a 76 * target_ulong even on 32 bit builds */ 77 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 78 79 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 80 */ 81 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 82 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 83 84 static inline size_t tlb_n_entries(CPUTLBDescFast *fast) 85 { 86 return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1; 87 } 88 89 static inline size_t sizeof_tlb(CPUTLBDescFast *fast) 90 { 91 return fast->mask + (1 << CPU_TLB_ENTRY_BITS); 92 } 93 94 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, 95 size_t max_entries) 96 { 97 desc->window_begin_ns = ns; 98 desc->window_max_entries = max_entries; 99 } 100 101 static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) 102 { 103 unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr); 104 105 for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { 106 qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL); 107 } 108 } 109 110 static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr) 111 { 112 /* Discard jump cache entries for any tb which might potentially 113 overlap the flushed page. */ 114 tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE); 115 tb_jmp_cache_clear_page(cpu, addr); 116 } 117 118 /** 119 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 120 * @desc: The CPUTLBDesc portion of the TLB 121 * @fast: The CPUTLBDescFast portion of the same TLB 122 * 123 * Called with tlb_lock_held. 124 * 125 * We have two main constraints when resizing a TLB: (1) we only resize it 126 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 127 * the array or unnecessarily flushing it), which means we do not control how 128 * frequently the resizing can occur; (2) we don't have access to the guest's 129 * future scheduling decisions, and therefore have to decide the magnitude of 130 * the resize based on past observations. 131 * 132 * In general, a memory-hungry process can benefit greatly from an appropriately 133 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 134 * we just have to make the TLB as large as possible; while an oversized TLB 135 * results in minimal TLB miss rates, it also takes longer to be flushed 136 * (flushes can be _very_ frequent), and the reduced locality can also hurt 137 * performance. 138 * 139 * To achieve near-optimal performance for all kinds of workloads, we: 140 * 141 * 1. Aggressively increase the size of the TLB when the use rate of the 142 * TLB being flushed is high, since it is likely that in the near future this 143 * memory-hungry process will execute again, and its memory hungriness will 144 * probably be similar. 145 * 146 * 2. Slowly reduce the size of the TLB as the use rate declines over a 147 * reasonably large time window. The rationale is that if in such a time window 148 * we have not observed a high TLB use rate, it is likely that we won't observe 149 * it in the near future. In that case, once a time window expires we downsize 150 * the TLB to match the maximum use rate observed in the window. 151 * 152 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 153 * since in that range performance is likely near-optimal. Recall that the TLB 154 * is direct mapped, so we want the use rate to be low (or at least not too 155 * high), since otherwise we are likely to have a significant amount of 156 * conflict misses. 157 */ 158 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast, 159 int64_t now) 160 { 161 size_t old_size = tlb_n_entries(fast); 162 size_t rate; 163 size_t new_size = old_size; 164 int64_t window_len_ms = 100; 165 int64_t window_len_ns = window_len_ms * 1000 * 1000; 166 bool window_expired = now > desc->window_begin_ns + window_len_ns; 167 168 if (desc->n_used_entries > desc->window_max_entries) { 169 desc->window_max_entries = desc->n_used_entries; 170 } 171 rate = desc->window_max_entries * 100 / old_size; 172 173 if (rate > 70) { 174 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 175 } else if (rate < 30 && window_expired) { 176 size_t ceil = pow2ceil(desc->window_max_entries); 177 size_t expected_rate = desc->window_max_entries * 100 / ceil; 178 179 /* 180 * Avoid undersizing when the max number of entries seen is just below 181 * a pow2. For instance, if max_entries == 1025, the expected use rate 182 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 183 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 184 * later. Thus, make sure that the expected use rate remains below 70%. 185 * (and since we double the size, that means the lowest rate we'd 186 * expect to get is 35%, which is still in the 30-70% range where 187 * we consider that the size is appropriate.) 188 */ 189 if (expected_rate > 70) { 190 ceil *= 2; 191 } 192 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 193 } 194 195 if (new_size == old_size) { 196 if (window_expired) { 197 tlb_window_reset(desc, now, desc->n_used_entries); 198 } 199 return; 200 } 201 202 g_free(fast->table); 203 g_free(desc->iotlb); 204 205 tlb_window_reset(desc, now, 0); 206 /* desc->n_used_entries is cleared by the caller */ 207 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 208 fast->table = g_try_new(CPUTLBEntry, new_size); 209 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 210 211 /* 212 * If the allocations fail, try smaller sizes. We just freed some 213 * memory, so going back to half of new_size has a good chance of working. 214 * Increased memory pressure elsewhere in the system might cause the 215 * allocations to fail though, so we progressively reduce the allocation 216 * size, aborting if we cannot even allocate the smallest TLB we support. 217 */ 218 while (fast->table == NULL || desc->iotlb == NULL) { 219 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 220 error_report("%s: %s", __func__, strerror(errno)); 221 abort(); 222 } 223 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 224 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 225 226 g_free(fast->table); 227 g_free(desc->iotlb); 228 fast->table = g_try_new(CPUTLBEntry, new_size); 229 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 230 } 231 } 232 233 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) 234 { 235 desc->n_used_entries = 0; 236 desc->large_page_addr = -1; 237 desc->large_page_mask = -1; 238 desc->vindex = 0; 239 memset(fast->table, -1, sizeof_tlb(fast)); 240 memset(desc->vtable, -1, sizeof(desc->vtable)); 241 } 242 243 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx, 244 int64_t now) 245 { 246 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; 247 CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx]; 248 249 tlb_mmu_resize_locked(desc, fast, now); 250 tlb_mmu_flush_locked(desc, fast); 251 } 252 253 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now) 254 { 255 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 256 257 tlb_window_reset(desc, now, 0); 258 desc->n_used_entries = 0; 259 fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 260 fast->table = g_new(CPUTLBEntry, n_entries); 261 desc->iotlb = g_new(CPUIOTLBEntry, n_entries); 262 tlb_mmu_flush_locked(desc, fast); 263 } 264 265 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 266 { 267 env_tlb(env)->d[mmu_idx].n_used_entries++; 268 } 269 270 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 271 { 272 env_tlb(env)->d[mmu_idx].n_used_entries--; 273 } 274 275 void tlb_init(CPUState *cpu) 276 { 277 CPUArchState *env = cpu->env_ptr; 278 int64_t now = get_clock_realtime(); 279 int i; 280 281 qemu_spin_init(&env_tlb(env)->c.lock); 282 283 /* All tlbs are initialized flushed. */ 284 env_tlb(env)->c.dirty = 0; 285 286 for (i = 0; i < NB_MMU_MODES; i++) { 287 tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now); 288 } 289 } 290 291 void tlb_destroy(CPUState *cpu) 292 { 293 CPUArchState *env = cpu->env_ptr; 294 int i; 295 296 qemu_spin_destroy(&env_tlb(env)->c.lock); 297 for (i = 0; i < NB_MMU_MODES; i++) { 298 CPUTLBDesc *desc = &env_tlb(env)->d[i]; 299 CPUTLBDescFast *fast = &env_tlb(env)->f[i]; 300 301 g_free(fast->table); 302 g_free(desc->iotlb); 303 } 304 } 305 306 /* flush_all_helper: run fn across all cpus 307 * 308 * If the wait flag is set then the src cpu's helper will be queued as 309 * "safe" work and the loop exited creating a synchronisation point 310 * where all queued work will be finished before execution starts 311 * again. 312 */ 313 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 314 run_on_cpu_data d) 315 { 316 CPUState *cpu; 317 318 CPU_FOREACH(cpu) { 319 if (cpu != src) { 320 async_run_on_cpu(cpu, fn, d); 321 } 322 } 323 } 324 325 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 326 { 327 CPUState *cpu; 328 size_t full = 0, part = 0, elide = 0; 329 330 CPU_FOREACH(cpu) { 331 CPUArchState *env = cpu->env_ptr; 332 333 full += qatomic_read(&env_tlb(env)->c.full_flush_count); 334 part += qatomic_read(&env_tlb(env)->c.part_flush_count); 335 elide += qatomic_read(&env_tlb(env)->c.elide_flush_count); 336 } 337 *pfull = full; 338 *ppart = part; 339 *pelide = elide; 340 } 341 342 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 343 { 344 CPUArchState *env = cpu->env_ptr; 345 uint16_t asked = data.host_int; 346 uint16_t all_dirty, work, to_clean; 347 int64_t now = get_clock_realtime(); 348 349 assert_cpu_is_self(cpu); 350 351 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 352 353 qemu_spin_lock(&env_tlb(env)->c.lock); 354 355 all_dirty = env_tlb(env)->c.dirty; 356 to_clean = asked & all_dirty; 357 all_dirty &= ~to_clean; 358 env_tlb(env)->c.dirty = all_dirty; 359 360 for (work = to_clean; work != 0; work &= work - 1) { 361 int mmu_idx = ctz32(work); 362 tlb_flush_one_mmuidx_locked(env, mmu_idx, now); 363 } 364 365 qemu_spin_unlock(&env_tlb(env)->c.lock); 366 367 cpu_tb_jmp_cache_clear(cpu); 368 369 if (to_clean == ALL_MMUIDX_BITS) { 370 qatomic_set(&env_tlb(env)->c.full_flush_count, 371 env_tlb(env)->c.full_flush_count + 1); 372 } else { 373 qatomic_set(&env_tlb(env)->c.part_flush_count, 374 env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); 375 if (to_clean != asked) { 376 qatomic_set(&env_tlb(env)->c.elide_flush_count, 377 env_tlb(env)->c.elide_flush_count + 378 ctpop16(asked & ~to_clean)); 379 } 380 } 381 } 382 383 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 384 { 385 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 386 387 if (cpu->created && !qemu_cpu_is_self(cpu)) { 388 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 389 RUN_ON_CPU_HOST_INT(idxmap)); 390 } else { 391 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 392 } 393 } 394 395 void tlb_flush(CPUState *cpu) 396 { 397 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 398 } 399 400 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 401 { 402 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 403 404 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 405 406 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 407 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 408 } 409 410 void tlb_flush_all_cpus(CPUState *src_cpu) 411 { 412 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 413 } 414 415 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 416 { 417 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 418 419 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 420 421 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 422 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 423 } 424 425 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 426 { 427 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 428 } 429 430 static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry, 431 target_ulong page, target_ulong mask) 432 { 433 page &= mask; 434 mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK; 435 436 return (page == (tlb_entry->addr_read & mask) || 437 page == (tlb_addr_write(tlb_entry) & mask) || 438 page == (tlb_entry->addr_code & mask)); 439 } 440 441 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 442 target_ulong page) 443 { 444 return tlb_hit_page_mask_anyprot(tlb_entry, page, -1); 445 } 446 447 /** 448 * tlb_entry_is_empty - return true if the entry is not in use 449 * @te: pointer to CPUTLBEntry 450 */ 451 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 452 { 453 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 454 } 455 456 /* Called with tlb_c.lock held */ 457 static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry, 458 target_ulong page, 459 target_ulong mask) 460 { 461 if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) { 462 memset(tlb_entry, -1, sizeof(*tlb_entry)); 463 return true; 464 } 465 return false; 466 } 467 468 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 469 target_ulong page) 470 { 471 return tlb_flush_entry_mask_locked(tlb_entry, page, -1); 472 } 473 474 /* Called with tlb_c.lock held */ 475 static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx, 476 target_ulong page, 477 target_ulong mask) 478 { 479 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; 480 int k; 481 482 assert_cpu_is_self(env_cpu(env)); 483 for (k = 0; k < CPU_VTLB_SIZE; k++) { 484 if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) { 485 tlb_n_used_entries_dec(env, mmu_idx); 486 } 487 } 488 } 489 490 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 491 target_ulong page) 492 { 493 tlb_flush_vtlb_page_mask_locked(env, mmu_idx, page, -1); 494 } 495 496 static void tlb_flush_page_locked(CPUArchState *env, int midx, 497 target_ulong page) 498 { 499 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; 500 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; 501 502 /* Check if we need to flush due to large pages. */ 503 if ((page & lp_mask) == lp_addr) { 504 tlb_debug("forcing full flush midx %d (" 505 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 506 midx, lp_addr, lp_mask); 507 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); 508 } else { 509 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 510 tlb_n_used_entries_dec(env, midx); 511 } 512 tlb_flush_vtlb_page_locked(env, midx, page); 513 } 514 } 515 516 /** 517 * tlb_flush_page_by_mmuidx_async_0: 518 * @cpu: cpu on which to flush 519 * @addr: page of virtual address to flush 520 * @idxmap: set of mmu_idx to flush 521 * 522 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page 523 * at @addr from the tlbs indicated by @idxmap from @cpu. 524 */ 525 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, 526 target_ulong addr, 527 uint16_t idxmap) 528 { 529 CPUArchState *env = cpu->env_ptr; 530 int mmu_idx; 531 532 assert_cpu_is_self(cpu); 533 534 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); 535 536 qemu_spin_lock(&env_tlb(env)->c.lock); 537 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 538 if ((idxmap >> mmu_idx) & 1) { 539 tlb_flush_page_locked(env, mmu_idx, addr); 540 } 541 } 542 qemu_spin_unlock(&env_tlb(env)->c.lock); 543 544 tb_flush_jmp_cache(cpu, addr); 545 } 546 547 /** 548 * tlb_flush_page_by_mmuidx_async_1: 549 * @cpu: cpu on which to flush 550 * @data: encoded addr + idxmap 551 * 552 * Helper for tlb_flush_page_by_mmuidx and friends, called through 553 * async_run_on_cpu. The idxmap parameter is encoded in the page 554 * offset of the target_ptr field. This limits the set of mmu_idx 555 * that can be passed via this method. 556 */ 557 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, 558 run_on_cpu_data data) 559 { 560 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; 561 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; 562 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; 563 564 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 565 } 566 567 typedef struct { 568 target_ulong addr; 569 uint16_t idxmap; 570 } TLBFlushPageByMMUIdxData; 571 572 /** 573 * tlb_flush_page_by_mmuidx_async_2: 574 * @cpu: cpu on which to flush 575 * @data: allocated addr + idxmap 576 * 577 * Helper for tlb_flush_page_by_mmuidx and friends, called through 578 * async_run_on_cpu. The addr+idxmap parameters are stored in a 579 * TLBFlushPageByMMUIdxData structure that has been allocated 580 * specifically for this helper. Free the structure when done. 581 */ 582 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, 583 run_on_cpu_data data) 584 { 585 TLBFlushPageByMMUIdxData *d = data.host_ptr; 586 587 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap); 588 g_free(d); 589 } 590 591 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 592 { 593 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 594 595 /* This should already be page aligned */ 596 addr &= TARGET_PAGE_MASK; 597 598 if (qemu_cpu_is_self(cpu)) { 599 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 600 } else if (idxmap < TARGET_PAGE_SIZE) { 601 /* 602 * Most targets have only a few mmu_idx. In the case where 603 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid 604 * allocating memory for this operation. 605 */ 606 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1, 607 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 608 } else { 609 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1); 610 611 /* Otherwise allocate a structure, freed by the worker. */ 612 d->addr = addr; 613 d->idxmap = idxmap; 614 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2, 615 RUN_ON_CPU_HOST_PTR(d)); 616 } 617 } 618 619 void tlb_flush_page(CPUState *cpu, target_ulong addr) 620 { 621 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 622 } 623 624 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 625 uint16_t idxmap) 626 { 627 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 628 629 /* This should already be page aligned */ 630 addr &= TARGET_PAGE_MASK; 631 632 /* 633 * Allocate memory to hold addr+idxmap only when needed. 634 * See tlb_flush_page_by_mmuidx for details. 635 */ 636 if (idxmap < TARGET_PAGE_SIZE) { 637 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 638 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 639 } else { 640 CPUState *dst_cpu; 641 642 /* Allocate a separate data block for each destination cpu. */ 643 CPU_FOREACH(dst_cpu) { 644 if (dst_cpu != src_cpu) { 645 TLBFlushPageByMMUIdxData *d 646 = g_new(TLBFlushPageByMMUIdxData, 1); 647 648 d->addr = addr; 649 d->idxmap = idxmap; 650 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 651 RUN_ON_CPU_HOST_PTR(d)); 652 } 653 } 654 } 655 656 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); 657 } 658 659 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 660 { 661 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 662 } 663 664 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 665 target_ulong addr, 666 uint16_t idxmap) 667 { 668 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 669 670 /* This should already be page aligned */ 671 addr &= TARGET_PAGE_MASK; 672 673 /* 674 * Allocate memory to hold addr+idxmap only when needed. 675 * See tlb_flush_page_by_mmuidx for details. 676 */ 677 if (idxmap < TARGET_PAGE_SIZE) { 678 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 679 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 680 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1, 681 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 682 } else { 683 CPUState *dst_cpu; 684 TLBFlushPageByMMUIdxData *d; 685 686 /* Allocate a separate data block for each destination cpu. */ 687 CPU_FOREACH(dst_cpu) { 688 if (dst_cpu != src_cpu) { 689 d = g_new(TLBFlushPageByMMUIdxData, 1); 690 d->addr = addr; 691 d->idxmap = idxmap; 692 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 693 RUN_ON_CPU_HOST_PTR(d)); 694 } 695 } 696 697 d = g_new(TLBFlushPageByMMUIdxData, 1); 698 d->addr = addr; 699 d->idxmap = idxmap; 700 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2, 701 RUN_ON_CPU_HOST_PTR(d)); 702 } 703 } 704 705 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 706 { 707 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 708 } 709 710 static void tlb_flush_range_locked(CPUArchState *env, int midx, 711 target_ulong addr, target_ulong len, 712 unsigned bits) 713 { 714 CPUTLBDesc *d = &env_tlb(env)->d[midx]; 715 CPUTLBDescFast *f = &env_tlb(env)->f[midx]; 716 target_ulong mask = MAKE_64BIT_MASK(0, bits); 717 718 /* 719 * If @bits is smaller than the tlb size, there may be multiple entries 720 * within the TLB; otherwise all addresses that match under @mask hit 721 * the same TLB entry. 722 * TODO: Perhaps allow bits to be a few bits less than the size. 723 * For now, just flush the entire TLB. 724 * 725 * If @len is larger than the tlb size, then it will take longer to 726 * test all of the entries in the TLB than it will to flush it all. 727 */ 728 if (mask < f->mask || len > f->mask) { 729 tlb_debug("forcing full flush midx %d (" 730 TARGET_FMT_lx "/" TARGET_FMT_lx "+" TARGET_FMT_lx ")\n", 731 midx, addr, mask, len); 732 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); 733 return; 734 } 735 736 /* 737 * Check if we need to flush due to large pages. 738 * Because large_page_mask contains all 1's from the msb, 739 * we only need to test the end of the range. 740 */ 741 if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) { 742 tlb_debug("forcing full flush midx %d (" 743 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 744 midx, d->large_page_addr, d->large_page_mask); 745 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); 746 return; 747 } 748 749 for (target_ulong i = 0; i < len; i += TARGET_PAGE_SIZE) { 750 target_ulong page = addr + i; 751 CPUTLBEntry *entry = tlb_entry(env, midx, page); 752 753 if (tlb_flush_entry_mask_locked(entry, page, mask)) { 754 tlb_n_used_entries_dec(env, midx); 755 } 756 tlb_flush_vtlb_page_mask_locked(env, midx, page, mask); 757 } 758 } 759 760 typedef struct { 761 target_ulong addr; 762 target_ulong len; 763 uint16_t idxmap; 764 uint16_t bits; 765 } TLBFlushRangeData; 766 767 static void 768 tlb_flush_page_bits_by_mmuidx_async_0(CPUState *cpu, 769 TLBFlushRangeData d) 770 { 771 CPUArchState *env = cpu->env_ptr; 772 int mmu_idx; 773 774 assert_cpu_is_self(cpu); 775 776 tlb_debug("range:" TARGET_FMT_lx "/%u+" TARGET_FMT_lx " mmu_map:0x%x\n", 777 d.addr, d.bits, d.len, d.idxmap); 778 779 qemu_spin_lock(&env_tlb(env)->c.lock); 780 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 781 if ((d.idxmap >> mmu_idx) & 1) { 782 tlb_flush_range_locked(env, mmu_idx, d.addr, d.len, d.bits); 783 } 784 } 785 qemu_spin_unlock(&env_tlb(env)->c.lock); 786 787 for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) { 788 tb_flush_jmp_cache(cpu, d.addr + i); 789 } 790 } 791 792 static void tlb_flush_page_bits_by_mmuidx_async_2(CPUState *cpu, 793 run_on_cpu_data data) 794 { 795 TLBFlushRangeData *d = data.host_ptr; 796 tlb_flush_page_bits_by_mmuidx_async_0(cpu, *d); 797 g_free(d); 798 } 799 800 void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr, 801 target_ulong len, uint16_t idxmap, 802 unsigned bits) 803 { 804 TLBFlushRangeData d; 805 806 /* 807 * If all bits are significant, and len is small, 808 * this devolves to tlb_flush_page. 809 */ 810 if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) { 811 tlb_flush_page_by_mmuidx(cpu, addr, idxmap); 812 return; 813 } 814 /* If no page bits are significant, this devolves to tlb_flush. */ 815 if (bits < TARGET_PAGE_BITS) { 816 tlb_flush_by_mmuidx(cpu, idxmap); 817 return; 818 } 819 820 /* This should already be page aligned */ 821 d.addr = addr & TARGET_PAGE_MASK; 822 d.len = len; 823 d.idxmap = idxmap; 824 d.bits = bits; 825 826 if (qemu_cpu_is_self(cpu)) { 827 tlb_flush_page_bits_by_mmuidx_async_0(cpu, d); 828 } else { 829 /* Otherwise allocate a structure, freed by the worker. */ 830 TLBFlushRangeData *p = g_memdup(&d, sizeof(d)); 831 async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_2, 832 RUN_ON_CPU_HOST_PTR(p)); 833 } 834 } 835 836 void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, 837 uint16_t idxmap, unsigned bits) 838 { 839 tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits); 840 } 841 842 void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu, 843 target_ulong addr, target_ulong len, 844 uint16_t idxmap, unsigned bits) 845 { 846 TLBFlushRangeData d; 847 CPUState *dst_cpu; 848 849 /* 850 * If all bits are significant, and len is small, 851 * this devolves to tlb_flush_page. 852 */ 853 if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) { 854 tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap); 855 return; 856 } 857 /* If no page bits are significant, this devolves to tlb_flush. */ 858 if (bits < TARGET_PAGE_BITS) { 859 tlb_flush_by_mmuidx_all_cpus(src_cpu, idxmap); 860 return; 861 } 862 863 /* This should already be page aligned */ 864 d.addr = addr & TARGET_PAGE_MASK; 865 d.len = len; 866 d.idxmap = idxmap; 867 d.bits = bits; 868 869 /* Allocate a separate data block for each destination cpu. */ 870 CPU_FOREACH(dst_cpu) { 871 if (dst_cpu != src_cpu) { 872 TLBFlushRangeData *p = g_memdup(&d, sizeof(d)); 873 async_run_on_cpu(dst_cpu, 874 tlb_flush_page_bits_by_mmuidx_async_2, 875 RUN_ON_CPU_HOST_PTR(p)); 876 } 877 } 878 879 tlb_flush_page_bits_by_mmuidx_async_0(src_cpu, d); 880 } 881 882 void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu, 883 target_ulong addr, 884 uint16_t idxmap, unsigned bits) 885 { 886 tlb_flush_range_by_mmuidx_all_cpus(src_cpu, addr, TARGET_PAGE_SIZE, 887 idxmap, bits); 888 } 889 890 void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 891 target_ulong addr, 892 target_ulong len, 893 uint16_t idxmap, 894 unsigned bits) 895 { 896 TLBFlushRangeData d, *p; 897 CPUState *dst_cpu; 898 899 /* 900 * If all bits are significant, and len is small, 901 * this devolves to tlb_flush_page. 902 */ 903 if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) { 904 tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap); 905 return; 906 } 907 /* If no page bits are significant, this devolves to tlb_flush. */ 908 if (bits < TARGET_PAGE_BITS) { 909 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap); 910 return; 911 } 912 913 /* This should already be page aligned */ 914 d.addr = addr & TARGET_PAGE_MASK; 915 d.len = len; 916 d.idxmap = idxmap; 917 d.bits = bits; 918 919 /* Allocate a separate data block for each destination cpu. */ 920 CPU_FOREACH(dst_cpu) { 921 if (dst_cpu != src_cpu) { 922 p = g_memdup(&d, sizeof(d)); 923 async_run_on_cpu(dst_cpu, tlb_flush_page_bits_by_mmuidx_async_2, 924 RUN_ON_CPU_HOST_PTR(p)); 925 } 926 } 927 928 p = g_memdup(&d, sizeof(d)); 929 async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_2, 930 RUN_ON_CPU_HOST_PTR(p)); 931 } 932 933 void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 934 target_ulong addr, 935 uint16_t idxmap, 936 unsigned bits) 937 { 938 tlb_flush_range_by_mmuidx_all_cpus_synced(src_cpu, addr, TARGET_PAGE_SIZE, 939 idxmap, bits); 940 } 941 942 /* update the TLBs so that writes to code in the virtual page 'addr' 943 can be detected */ 944 void tlb_protect_code(ram_addr_t ram_addr) 945 { 946 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 947 DIRTY_MEMORY_CODE); 948 } 949 950 /* update the TLB so that writes in physical page 'phys_addr' are no longer 951 tested for self modifying code */ 952 void tlb_unprotect_code(ram_addr_t ram_addr) 953 { 954 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 955 } 956 957 958 /* 959 * Dirty write flag handling 960 * 961 * When the TCG code writes to a location it looks up the address in 962 * the TLB and uses that data to compute the final address. If any of 963 * the lower bits of the address are set then the slow path is forced. 964 * There are a number of reasons to do this but for normal RAM the 965 * most usual is detecting writes to code regions which may invalidate 966 * generated code. 967 * 968 * Other vCPUs might be reading their TLBs during guest execution, so we update 969 * te->addr_write with qatomic_set. We don't need to worry about this for 970 * oversized guests as MTTCG is disabled for them. 971 * 972 * Called with tlb_c.lock held. 973 */ 974 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 975 uintptr_t start, uintptr_t length) 976 { 977 uintptr_t addr = tlb_entry->addr_write; 978 979 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | 980 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { 981 addr &= TARGET_PAGE_MASK; 982 addr += tlb_entry->addend; 983 if ((addr - start) < length) { 984 #if TCG_OVERSIZED_GUEST 985 tlb_entry->addr_write |= TLB_NOTDIRTY; 986 #else 987 qatomic_set(&tlb_entry->addr_write, 988 tlb_entry->addr_write | TLB_NOTDIRTY); 989 #endif 990 } 991 } 992 } 993 994 /* 995 * Called with tlb_c.lock held. 996 * Called only from the vCPU context, i.e. the TLB's owner thread. 997 */ 998 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 999 { 1000 *d = *s; 1001 } 1002 1003 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 1004 * the target vCPU). 1005 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 1006 * thing actually updated is the target TLB entry ->addr_write flags. 1007 */ 1008 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 1009 { 1010 CPUArchState *env; 1011 1012 int mmu_idx; 1013 1014 env = cpu->env_ptr; 1015 qemu_spin_lock(&env_tlb(env)->c.lock); 1016 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 1017 unsigned int i; 1018 unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); 1019 1020 for (i = 0; i < n; i++) { 1021 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], 1022 start1, length); 1023 } 1024 1025 for (i = 0; i < CPU_VTLB_SIZE; i++) { 1026 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], 1027 start1, length); 1028 } 1029 } 1030 qemu_spin_unlock(&env_tlb(env)->c.lock); 1031 } 1032 1033 /* Called with tlb_c.lock held */ 1034 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 1035 target_ulong vaddr) 1036 { 1037 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 1038 tlb_entry->addr_write = vaddr; 1039 } 1040 } 1041 1042 /* update the TLB corresponding to virtual page vaddr 1043 so that it is no longer dirty */ 1044 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 1045 { 1046 CPUArchState *env = cpu->env_ptr; 1047 int mmu_idx; 1048 1049 assert_cpu_is_self(cpu); 1050 1051 vaddr &= TARGET_PAGE_MASK; 1052 qemu_spin_lock(&env_tlb(env)->c.lock); 1053 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 1054 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 1055 } 1056 1057 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 1058 int k; 1059 for (k = 0; k < CPU_VTLB_SIZE; k++) { 1060 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); 1061 } 1062 } 1063 qemu_spin_unlock(&env_tlb(env)->c.lock); 1064 } 1065 1066 /* Our TLB does not support large pages, so remember the area covered by 1067 large pages and trigger a full TLB flush if these are invalidated. */ 1068 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 1069 target_ulong vaddr, target_ulong size) 1070 { 1071 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; 1072 target_ulong lp_mask = ~(size - 1); 1073 1074 if (lp_addr == (target_ulong)-1) { 1075 /* No previous large page. */ 1076 lp_addr = vaddr; 1077 } else { 1078 /* Extend the existing region to include the new page. 1079 This is a compromise between unnecessary flushes and 1080 the cost of maintaining a full variable size TLB. */ 1081 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; 1082 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 1083 lp_mask <<= 1; 1084 } 1085 } 1086 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; 1087 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; 1088 } 1089 1090 /* Add a new TLB entry. At most one entry for a given virtual address 1091 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 1092 * supplied size is only used by tlb_flush_page. 1093 * 1094 * Called from TCG-generated code, which is under an RCU read-side 1095 * critical section. 1096 */ 1097 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 1098 hwaddr paddr, MemTxAttrs attrs, int prot, 1099 int mmu_idx, target_ulong size) 1100 { 1101 CPUArchState *env = cpu->env_ptr; 1102 CPUTLB *tlb = env_tlb(env); 1103 CPUTLBDesc *desc = &tlb->d[mmu_idx]; 1104 MemoryRegionSection *section; 1105 unsigned int index; 1106 target_ulong address; 1107 target_ulong write_address; 1108 uintptr_t addend; 1109 CPUTLBEntry *te, tn; 1110 hwaddr iotlb, xlat, sz, paddr_page; 1111 target_ulong vaddr_page; 1112 int asidx = cpu_asidx_from_attrs(cpu, attrs); 1113 int wp_flags; 1114 bool is_ram, is_romd; 1115 1116 assert_cpu_is_self(cpu); 1117 1118 if (size <= TARGET_PAGE_SIZE) { 1119 sz = TARGET_PAGE_SIZE; 1120 } else { 1121 tlb_add_large_page(env, mmu_idx, vaddr, size); 1122 sz = size; 1123 } 1124 vaddr_page = vaddr & TARGET_PAGE_MASK; 1125 paddr_page = paddr & TARGET_PAGE_MASK; 1126 1127 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 1128 &xlat, &sz, attrs, &prot); 1129 assert(sz >= TARGET_PAGE_SIZE); 1130 1131 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 1132 " prot=%x idx=%d\n", 1133 vaddr, paddr, prot, mmu_idx); 1134 1135 address = vaddr_page; 1136 if (size < TARGET_PAGE_SIZE) { 1137 /* Repeat the MMU check and TLB fill on every access. */ 1138 address |= TLB_INVALID_MASK; 1139 } 1140 if (attrs.byte_swap) { 1141 address |= TLB_BSWAP; 1142 } 1143 1144 is_ram = memory_region_is_ram(section->mr); 1145 is_romd = memory_region_is_romd(section->mr); 1146 1147 if (is_ram || is_romd) { 1148 /* RAM and ROMD both have associated host memory. */ 1149 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 1150 } else { 1151 /* I/O does not; force the host address to NULL. */ 1152 addend = 0; 1153 } 1154 1155 write_address = address; 1156 if (is_ram) { 1157 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 1158 /* 1159 * Computing is_clean is expensive; avoid all that unless 1160 * the page is actually writable. 1161 */ 1162 if (prot & PAGE_WRITE) { 1163 if (section->readonly) { 1164 write_address |= TLB_DISCARD_WRITE; 1165 } else if (cpu_physical_memory_is_clean(iotlb)) { 1166 write_address |= TLB_NOTDIRTY; 1167 } 1168 } 1169 } else { 1170 /* I/O or ROMD */ 1171 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; 1172 /* 1173 * Writes to romd devices must go through MMIO to enable write. 1174 * Reads to romd devices go through the ram_ptr found above, 1175 * but of course reads to I/O must go through MMIO. 1176 */ 1177 write_address |= TLB_MMIO; 1178 if (!is_romd) { 1179 address = write_address; 1180 } 1181 } 1182 1183 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, 1184 TARGET_PAGE_SIZE); 1185 1186 index = tlb_index(env, mmu_idx, vaddr_page); 1187 te = tlb_entry(env, mmu_idx, vaddr_page); 1188 1189 /* 1190 * Hold the TLB lock for the rest of the function. We could acquire/release 1191 * the lock several times in the function, but it is faster to amortize the 1192 * acquisition cost by acquiring it just once. Note that this leads to 1193 * a longer critical section, but this is not a concern since the TLB lock 1194 * is unlikely to be contended. 1195 */ 1196 qemu_spin_lock(&tlb->c.lock); 1197 1198 /* Note that the tlb is no longer clean. */ 1199 tlb->c.dirty |= 1 << mmu_idx; 1200 1201 /* Make sure there's no cached translation for the new page. */ 1202 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 1203 1204 /* 1205 * Only evict the old entry to the victim tlb if it's for a 1206 * different page; otherwise just overwrite the stale data. 1207 */ 1208 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 1209 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; 1210 CPUTLBEntry *tv = &desc->vtable[vidx]; 1211 1212 /* Evict the old entry into the victim tlb. */ 1213 copy_tlb_helper_locked(tv, te); 1214 desc->viotlb[vidx] = desc->iotlb[index]; 1215 tlb_n_used_entries_dec(env, mmu_idx); 1216 } 1217 1218 /* refill the tlb */ 1219 /* 1220 * At this point iotlb contains a physical section number in the lower 1221 * TARGET_PAGE_BITS, and either 1222 * + the ram_addr_t of the page base of the target RAM (RAM) 1223 * + the offset within section->mr of the page base (I/O, ROMD) 1224 * We subtract the vaddr_page (which is page aligned and thus won't 1225 * disturb the low bits) to give an offset which can be added to the 1226 * (non-page-aligned) vaddr of the eventual memory access to get 1227 * the MemoryRegion offset for the access. Note that the vaddr we 1228 * subtract here is that of the page base, and not the same as the 1229 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 1230 */ 1231 desc->iotlb[index].addr = iotlb - vaddr_page; 1232 desc->iotlb[index].attrs = attrs; 1233 1234 /* Now calculate the new entry */ 1235 tn.addend = addend - vaddr_page; 1236 if (prot & PAGE_READ) { 1237 tn.addr_read = address; 1238 if (wp_flags & BP_MEM_READ) { 1239 tn.addr_read |= TLB_WATCHPOINT; 1240 } 1241 } else { 1242 tn.addr_read = -1; 1243 } 1244 1245 if (prot & PAGE_EXEC) { 1246 tn.addr_code = address; 1247 } else { 1248 tn.addr_code = -1; 1249 } 1250 1251 tn.addr_write = -1; 1252 if (prot & PAGE_WRITE) { 1253 tn.addr_write = write_address; 1254 if (prot & PAGE_WRITE_INV) { 1255 tn.addr_write |= TLB_INVALID_MASK; 1256 } 1257 if (wp_flags & BP_MEM_WRITE) { 1258 tn.addr_write |= TLB_WATCHPOINT; 1259 } 1260 } 1261 1262 copy_tlb_helper_locked(te, &tn); 1263 tlb_n_used_entries_inc(env, mmu_idx); 1264 qemu_spin_unlock(&tlb->c.lock); 1265 } 1266 1267 /* Add a new TLB entry, but without specifying the memory 1268 * transaction attributes to be used. 1269 */ 1270 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 1271 hwaddr paddr, int prot, 1272 int mmu_idx, target_ulong size) 1273 { 1274 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 1275 prot, mmu_idx, size); 1276 } 1277 1278 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 1279 { 1280 ram_addr_t ram_addr; 1281 1282 ram_addr = qemu_ram_addr_from_host(ptr); 1283 if (ram_addr == RAM_ADDR_INVALID) { 1284 error_report("Bad ram pointer %p", ptr); 1285 abort(); 1286 } 1287 return ram_addr; 1288 } 1289 1290 /* 1291 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the 1292 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must 1293 * be discarded and looked up again (e.g. via tlb_entry()). 1294 */ 1295 static void tlb_fill(CPUState *cpu, target_ulong addr, int size, 1296 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1297 { 1298 CPUClass *cc = CPU_GET_CLASS(cpu); 1299 bool ok; 1300 1301 /* 1302 * This is not a probe, so only valid return is success; failure 1303 * should result in exception + longjmp to the cpu loop. 1304 */ 1305 ok = cc->tcg_ops->tlb_fill(cpu, addr, size, 1306 access_type, mmu_idx, false, retaddr); 1307 assert(ok); 1308 } 1309 1310 static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr, 1311 MMUAccessType access_type, 1312 int mmu_idx, uintptr_t retaddr) 1313 { 1314 CPUClass *cc = CPU_GET_CLASS(cpu); 1315 1316 cc->tcg_ops->do_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr); 1317 } 1318 1319 static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr, 1320 vaddr addr, unsigned size, 1321 MMUAccessType access_type, 1322 int mmu_idx, MemTxAttrs attrs, 1323 MemTxResult response, 1324 uintptr_t retaddr) 1325 { 1326 CPUClass *cc = CPU_GET_CLASS(cpu); 1327 1328 if (!cpu->ignore_memory_transaction_failures && 1329 cc->tcg_ops->do_transaction_failed) { 1330 cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size, 1331 access_type, mmu_idx, attrs, 1332 response, retaddr); 1333 } 1334 } 1335 1336 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1337 int mmu_idx, target_ulong addr, uintptr_t retaddr, 1338 MMUAccessType access_type, MemOp op) 1339 { 1340 CPUState *cpu = env_cpu(env); 1341 hwaddr mr_offset; 1342 MemoryRegionSection *section; 1343 MemoryRegion *mr; 1344 uint64_t val; 1345 bool locked = false; 1346 MemTxResult r; 1347 1348 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1349 mr = section->mr; 1350 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1351 cpu->mem_io_pc = retaddr; 1352 if (!cpu->can_do_io) { 1353 cpu_io_recompile(cpu, retaddr); 1354 } 1355 1356 if (!qemu_mutex_iothread_locked()) { 1357 qemu_mutex_lock_iothread(); 1358 locked = true; 1359 } 1360 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); 1361 if (r != MEMTX_OK) { 1362 hwaddr physaddr = mr_offset + 1363 section->offset_within_address_space - 1364 section->offset_within_region; 1365 1366 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, 1367 mmu_idx, iotlbentry->attrs, r, retaddr); 1368 } 1369 if (locked) { 1370 qemu_mutex_unlock_iothread(); 1371 } 1372 1373 return val; 1374 } 1375 1376 /* 1377 * Save a potentially trashed IOTLB entry for later lookup by plugin. 1378 * This is read by tlb_plugin_lookup if the iotlb entry doesn't match 1379 * because of the side effect of io_writex changing memory layout. 1380 */ 1381 static void save_iotlb_data(CPUState *cs, hwaddr addr, 1382 MemoryRegionSection *section, hwaddr mr_offset) 1383 { 1384 #ifdef CONFIG_PLUGIN 1385 SavedIOTLB *saved = &cs->saved_iotlb; 1386 saved->addr = addr; 1387 saved->section = section; 1388 saved->mr_offset = mr_offset; 1389 #endif 1390 } 1391 1392 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1393 int mmu_idx, uint64_t val, target_ulong addr, 1394 uintptr_t retaddr, MemOp op) 1395 { 1396 CPUState *cpu = env_cpu(env); 1397 hwaddr mr_offset; 1398 MemoryRegionSection *section; 1399 MemoryRegion *mr; 1400 bool locked = false; 1401 MemTxResult r; 1402 1403 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1404 mr = section->mr; 1405 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1406 if (!cpu->can_do_io) { 1407 cpu_io_recompile(cpu, retaddr); 1408 } 1409 cpu->mem_io_pc = retaddr; 1410 1411 /* 1412 * The memory_region_dispatch may trigger a flush/resize 1413 * so for plugins we save the iotlb_data just in case. 1414 */ 1415 save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset); 1416 1417 if (!qemu_mutex_iothread_locked()) { 1418 qemu_mutex_lock_iothread(); 1419 locked = true; 1420 } 1421 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); 1422 if (r != MEMTX_OK) { 1423 hwaddr physaddr = mr_offset + 1424 section->offset_within_address_space - 1425 section->offset_within_region; 1426 1427 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), 1428 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, 1429 retaddr); 1430 } 1431 if (locked) { 1432 qemu_mutex_unlock_iothread(); 1433 } 1434 } 1435 1436 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) 1437 { 1438 #if TCG_OVERSIZED_GUEST 1439 return *(target_ulong *)((uintptr_t)entry + ofs); 1440 #else 1441 /* ofs might correspond to .addr_write, so use qatomic_read */ 1442 return qatomic_read((target_ulong *)((uintptr_t)entry + ofs)); 1443 #endif 1444 } 1445 1446 /* Return true if ADDR is present in the victim tlb, and has been copied 1447 back to the main tlb. */ 1448 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1449 size_t elt_ofs, target_ulong page) 1450 { 1451 size_t vidx; 1452 1453 assert_cpu_is_self(env_cpu(env)); 1454 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1455 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; 1456 target_ulong cmp; 1457 1458 /* elt_ofs might correspond to .addr_write, so use qatomic_read */ 1459 #if TCG_OVERSIZED_GUEST 1460 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1461 #else 1462 cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1463 #endif 1464 1465 if (cmp == page) { 1466 /* Found entry in victim tlb, swap tlb and iotlb. */ 1467 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index]; 1468 1469 qemu_spin_lock(&env_tlb(env)->c.lock); 1470 copy_tlb_helper_locked(&tmptlb, tlb); 1471 copy_tlb_helper_locked(tlb, vtlb); 1472 copy_tlb_helper_locked(vtlb, &tmptlb); 1473 qemu_spin_unlock(&env_tlb(env)->c.lock); 1474 1475 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1476 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; 1477 tmpio = *io; *io = *vio; *vio = tmpio; 1478 return true; 1479 } 1480 } 1481 return false; 1482 } 1483 1484 /* Macro to call the above, with local variables from the use context. */ 1485 #define VICTIM_TLB_HIT(TY, ADDR) \ 1486 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1487 (ADDR) & TARGET_PAGE_MASK) 1488 1489 /* 1490 * Return a ram_addr_t for the virtual address for execution. 1491 * 1492 * Return -1 if we can't translate and execute from an entire page 1493 * of RAM. This will force us to execute by loading and translating 1494 * one insn at a time, without caching. 1495 * 1496 * NOTE: This function will trigger an exception if the page is 1497 * not executable. 1498 */ 1499 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, 1500 void **hostp) 1501 { 1502 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1503 uintptr_t index = tlb_index(env, mmu_idx, addr); 1504 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1505 void *p; 1506 1507 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1508 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1509 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1510 index = tlb_index(env, mmu_idx, addr); 1511 entry = tlb_entry(env, mmu_idx, addr); 1512 1513 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { 1514 /* 1515 * The MMU protection covers a smaller range than a target 1516 * page, so we must redo the MMU check for every insn. 1517 */ 1518 return -1; 1519 } 1520 } 1521 assert(tlb_hit(entry->addr_code, addr)); 1522 } 1523 1524 if (unlikely(entry->addr_code & TLB_MMIO)) { 1525 /* The region is not backed by RAM. */ 1526 if (hostp) { 1527 *hostp = NULL; 1528 } 1529 return -1; 1530 } 1531 1532 p = (void *)((uintptr_t)addr + entry->addend); 1533 if (hostp) { 1534 *hostp = p; 1535 } 1536 return qemu_ram_addr_from_host_nofail(p); 1537 } 1538 1539 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1540 { 1541 return get_page_addr_code_hostp(env, addr, NULL); 1542 } 1543 1544 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, 1545 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) 1546 { 1547 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; 1548 1549 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); 1550 1551 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 1552 struct page_collection *pages 1553 = page_collection_lock(ram_addr, ram_addr + size); 1554 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); 1555 page_collection_unlock(pages); 1556 } 1557 1558 /* 1559 * Set both VGA and migration bits for simplicity and to remove 1560 * the notdirty callback faster. 1561 */ 1562 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); 1563 1564 /* We remove the notdirty callback only if the code has been flushed. */ 1565 if (!cpu_physical_memory_is_clean(ram_addr)) { 1566 trace_memory_notdirty_set_dirty(mem_vaddr); 1567 tlb_set_dirty(cpu, mem_vaddr); 1568 } 1569 } 1570 1571 static int probe_access_internal(CPUArchState *env, target_ulong addr, 1572 int fault_size, MMUAccessType access_type, 1573 int mmu_idx, bool nonfault, 1574 void **phost, uintptr_t retaddr) 1575 { 1576 uintptr_t index = tlb_index(env, mmu_idx, addr); 1577 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1578 target_ulong tlb_addr, page_addr; 1579 size_t elt_ofs; 1580 int flags; 1581 1582 switch (access_type) { 1583 case MMU_DATA_LOAD: 1584 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1585 break; 1586 case MMU_DATA_STORE: 1587 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1588 break; 1589 case MMU_INST_FETCH: 1590 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1591 break; 1592 default: 1593 g_assert_not_reached(); 1594 } 1595 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1596 1597 page_addr = addr & TARGET_PAGE_MASK; 1598 if (!tlb_hit_page(tlb_addr, page_addr)) { 1599 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) { 1600 CPUState *cs = env_cpu(env); 1601 CPUClass *cc = CPU_GET_CLASS(cs); 1602 1603 if (!cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type, 1604 mmu_idx, nonfault, retaddr)) { 1605 /* Non-faulting page table read failed. */ 1606 *phost = NULL; 1607 return TLB_INVALID_MASK; 1608 } 1609 1610 /* TLB resize via tlb_fill may have moved the entry. */ 1611 entry = tlb_entry(env, mmu_idx, addr); 1612 } 1613 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1614 } 1615 flags = tlb_addr & TLB_FLAGS_MASK; 1616 1617 /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */ 1618 if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) { 1619 *phost = NULL; 1620 return TLB_MMIO; 1621 } 1622 1623 /* Everything else is RAM. */ 1624 *phost = (void *)((uintptr_t)addr + entry->addend); 1625 return flags; 1626 } 1627 1628 int probe_access_flags(CPUArchState *env, target_ulong addr, 1629 MMUAccessType access_type, int mmu_idx, 1630 bool nonfault, void **phost, uintptr_t retaddr) 1631 { 1632 int flags; 1633 1634 flags = probe_access_internal(env, addr, 0, access_type, mmu_idx, 1635 nonfault, phost, retaddr); 1636 1637 /* Handle clean RAM pages. */ 1638 if (unlikely(flags & TLB_NOTDIRTY)) { 1639 uintptr_t index = tlb_index(env, mmu_idx, addr); 1640 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1641 1642 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); 1643 flags &= ~TLB_NOTDIRTY; 1644 } 1645 1646 return flags; 1647 } 1648 1649 void *probe_access(CPUArchState *env, target_ulong addr, int size, 1650 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1651 { 1652 void *host; 1653 int flags; 1654 1655 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 1656 1657 flags = probe_access_internal(env, addr, size, access_type, mmu_idx, 1658 false, &host, retaddr); 1659 1660 /* Per the interface, size == 0 merely faults the access. */ 1661 if (size == 0) { 1662 return NULL; 1663 } 1664 1665 if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) { 1666 uintptr_t index = tlb_index(env, mmu_idx, addr); 1667 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1668 1669 /* Handle watchpoints. */ 1670 if (flags & TLB_WATCHPOINT) { 1671 int wp_access = (access_type == MMU_DATA_STORE 1672 ? BP_MEM_WRITE : BP_MEM_READ); 1673 cpu_check_watchpoint(env_cpu(env), addr, size, 1674 iotlbentry->attrs, wp_access, retaddr); 1675 } 1676 1677 /* Handle clean RAM pages. */ 1678 if (flags & TLB_NOTDIRTY) { 1679 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); 1680 } 1681 } 1682 1683 return host; 1684 } 1685 1686 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, 1687 MMUAccessType access_type, int mmu_idx) 1688 { 1689 void *host; 1690 int flags; 1691 1692 flags = probe_access_internal(env, addr, 0, access_type, 1693 mmu_idx, true, &host, 0); 1694 1695 /* No combination of flags are expected by the caller. */ 1696 return flags ? NULL : host; 1697 } 1698 1699 #ifdef CONFIG_PLUGIN 1700 /* 1701 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. 1702 * This should be a hot path as we will have just looked this path up 1703 * in the softmmu lookup code (or helper). We don't handle re-fills or 1704 * checking the victim table. This is purely informational. 1705 * 1706 * This almost never fails as the memory access being instrumented 1707 * should have just filled the TLB. The one corner case is io_writex 1708 * which can cause TLB flushes and potential resizing of the TLBs 1709 * losing the information we need. In those cases we need to recover 1710 * data from a copy of the iotlbentry. As long as this always occurs 1711 * from the same thread (which a mem callback will be) this is safe. 1712 */ 1713 1714 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, 1715 bool is_store, struct qemu_plugin_hwaddr *data) 1716 { 1717 CPUArchState *env = cpu->env_ptr; 1718 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1719 uintptr_t index = tlb_index(env, mmu_idx, addr); 1720 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; 1721 1722 if (likely(tlb_hit(tlb_addr, addr))) { 1723 /* We must have an iotlb entry for MMIO */ 1724 if (tlb_addr & TLB_MMIO) { 1725 CPUIOTLBEntry *iotlbentry; 1726 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1727 data->is_io = true; 1728 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1729 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1730 } else { 1731 data->is_io = false; 1732 data->v.ram.hostaddr = addr + tlbe->addend; 1733 } 1734 return true; 1735 } else { 1736 SavedIOTLB *saved = &cpu->saved_iotlb; 1737 data->is_io = true; 1738 data->v.io.section = saved->section; 1739 data->v.io.offset = saved->mr_offset; 1740 return true; 1741 } 1742 } 1743 1744 #endif 1745 1746 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1747 * operations, or io operations to proceed. Return the host address. */ 1748 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1749 TCGMemOpIdx oi, uintptr_t retaddr) 1750 { 1751 size_t mmu_idx = get_mmuidx(oi); 1752 uintptr_t index = tlb_index(env, mmu_idx, addr); 1753 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1754 target_ulong tlb_addr = tlb_addr_write(tlbe); 1755 MemOp mop = get_memop(oi); 1756 int a_bits = get_alignment_bits(mop); 1757 int s_bits = mop & MO_SIZE; 1758 void *hostaddr; 1759 1760 /* Adjust the given return address. */ 1761 retaddr -= GETPC_ADJ; 1762 1763 /* Enforce guest required alignment. */ 1764 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1765 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1766 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1767 mmu_idx, retaddr); 1768 } 1769 1770 /* Enforce qemu required alignment. */ 1771 if (unlikely(addr & ((1 << s_bits) - 1))) { 1772 /* We get here if guest alignment was not requested, 1773 or was not enforced by cpu_unaligned_access above. 1774 We might widen the access and emulate, but for now 1775 mark an exception and exit the cpu loop. */ 1776 goto stop_the_world; 1777 } 1778 1779 /* Check TLB entry and enforce page permissions. */ 1780 if (!tlb_hit(tlb_addr, addr)) { 1781 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1782 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, 1783 mmu_idx, retaddr); 1784 index = tlb_index(env, mmu_idx, addr); 1785 tlbe = tlb_entry(env, mmu_idx, addr); 1786 } 1787 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1788 } 1789 1790 /* Notice an IO access or a needs-MMU-lookup access */ 1791 if (unlikely(tlb_addr & TLB_MMIO)) { 1792 /* There's really nothing that can be done to 1793 support this apart from stop-the-world. */ 1794 goto stop_the_world; 1795 } 1796 1797 /* Let the guest notice RMW on a write-only page. */ 1798 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1799 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1800 mmu_idx, retaddr); 1801 /* Since we don't support reads and writes to different addresses, 1802 and we do have the proper page loaded for write, this shouldn't 1803 ever return. But just in case, handle via stop-the-world. */ 1804 goto stop_the_world; 1805 } 1806 1807 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1808 1809 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1810 notdirty_write(env_cpu(env), addr, 1 << s_bits, 1811 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); 1812 } 1813 1814 return hostaddr; 1815 1816 stop_the_world: 1817 cpu_loop_exit_atomic(env_cpu(env), retaddr); 1818 } 1819 1820 /* 1821 * Load Helpers 1822 * 1823 * We support two different access types. SOFTMMU_CODE_ACCESS is 1824 * specifically for reading instructions from system memory. It is 1825 * called by the translation loop and in some helpers where the code 1826 * is disassembled. It shouldn't be called directly by guest code. 1827 */ 1828 1829 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, 1830 TCGMemOpIdx oi, uintptr_t retaddr); 1831 1832 static inline uint64_t QEMU_ALWAYS_INLINE 1833 load_memop(const void *haddr, MemOp op) 1834 { 1835 switch (op) { 1836 case MO_UB: 1837 return ldub_p(haddr); 1838 case MO_BEUW: 1839 return lduw_be_p(haddr); 1840 case MO_LEUW: 1841 return lduw_le_p(haddr); 1842 case MO_BEUL: 1843 return (uint32_t)ldl_be_p(haddr); 1844 case MO_LEUL: 1845 return (uint32_t)ldl_le_p(haddr); 1846 case MO_BEQ: 1847 return ldq_be_p(haddr); 1848 case MO_LEQ: 1849 return ldq_le_p(haddr); 1850 default: 1851 qemu_build_not_reached(); 1852 } 1853 } 1854 1855 static inline uint64_t QEMU_ALWAYS_INLINE 1856 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, 1857 uintptr_t retaddr, MemOp op, bool code_read, 1858 FullLoadHelper *full_load) 1859 { 1860 uintptr_t mmu_idx = get_mmuidx(oi); 1861 uintptr_t index = tlb_index(env, mmu_idx, addr); 1862 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1863 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1864 const size_t tlb_off = code_read ? 1865 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); 1866 const MMUAccessType access_type = 1867 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; 1868 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1869 void *haddr; 1870 uint64_t res; 1871 size_t size = memop_size(op); 1872 1873 /* Handle CPU specific unaligned behaviour */ 1874 if (addr & ((1 << a_bits) - 1)) { 1875 cpu_unaligned_access(env_cpu(env), addr, access_type, 1876 mmu_idx, retaddr); 1877 } 1878 1879 /* If the TLB entry is for a different page, reload and try again. */ 1880 if (!tlb_hit(tlb_addr, addr)) { 1881 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1882 addr & TARGET_PAGE_MASK)) { 1883 tlb_fill(env_cpu(env), addr, size, 1884 access_type, mmu_idx, retaddr); 1885 index = tlb_index(env, mmu_idx, addr); 1886 entry = tlb_entry(env, mmu_idx, addr); 1887 } 1888 tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1889 tlb_addr &= ~TLB_INVALID_MASK; 1890 } 1891 1892 /* Handle anything that isn't just a straight memory access. */ 1893 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1894 CPUIOTLBEntry *iotlbentry; 1895 bool need_swap; 1896 1897 /* For anything that is unaligned, recurse through full_load. */ 1898 if ((addr & (size - 1)) != 0) { 1899 goto do_unaligned_access; 1900 } 1901 1902 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1903 1904 /* Handle watchpoints. */ 1905 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1906 /* On watchpoint hit, this will longjmp out. */ 1907 cpu_check_watchpoint(env_cpu(env), addr, size, 1908 iotlbentry->attrs, BP_MEM_READ, retaddr); 1909 } 1910 1911 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1912 1913 /* Handle I/O access. */ 1914 if (likely(tlb_addr & TLB_MMIO)) { 1915 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, 1916 access_type, op ^ (need_swap * MO_BSWAP)); 1917 } 1918 1919 haddr = (void *)((uintptr_t)addr + entry->addend); 1920 1921 /* 1922 * Keep these two load_memop separate to ensure that the compiler 1923 * is able to fold the entire function to a single instruction. 1924 * There is a build-time assert inside to remind you of this. ;-) 1925 */ 1926 if (unlikely(need_swap)) { 1927 return load_memop(haddr, op ^ MO_BSWAP); 1928 } 1929 return load_memop(haddr, op); 1930 } 1931 1932 /* Handle slow unaligned access (it spans two pages or IO). */ 1933 if (size > 1 1934 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1935 >= TARGET_PAGE_SIZE)) { 1936 target_ulong addr1, addr2; 1937 uint64_t r1, r2; 1938 unsigned shift; 1939 do_unaligned_access: 1940 addr1 = addr & ~((target_ulong)size - 1); 1941 addr2 = addr1 + size; 1942 r1 = full_load(env, addr1, oi, retaddr); 1943 r2 = full_load(env, addr2, oi, retaddr); 1944 shift = (addr & (size - 1)) * 8; 1945 1946 if (memop_big_endian(op)) { 1947 /* Big-endian combine. */ 1948 res = (r1 << shift) | (r2 >> ((size * 8) - shift)); 1949 } else { 1950 /* Little-endian combine. */ 1951 res = (r1 >> shift) | (r2 << ((size * 8) - shift)); 1952 } 1953 return res & MAKE_64BIT_MASK(0, size * 8); 1954 } 1955 1956 haddr = (void *)((uintptr_t)addr + entry->addend); 1957 return load_memop(haddr, op); 1958 } 1959 1960 /* 1961 * For the benefit of TCG generated code, we want to avoid the 1962 * complication of ABI-specific return type promotion and always 1963 * return a value extended to the register size of the host. This is 1964 * tcg_target_long, except in the case of a 32-bit host and 64-bit 1965 * data, and for that we always have uint64_t. 1966 * 1967 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. 1968 */ 1969 1970 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, 1971 TCGMemOpIdx oi, uintptr_t retaddr) 1972 { 1973 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); 1974 } 1975 1976 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, 1977 TCGMemOpIdx oi, uintptr_t retaddr) 1978 { 1979 return full_ldub_mmu(env, addr, oi, retaddr); 1980 } 1981 1982 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1983 TCGMemOpIdx oi, uintptr_t retaddr) 1984 { 1985 return load_helper(env, addr, oi, retaddr, MO_LEUW, false, 1986 full_le_lduw_mmu); 1987 } 1988 1989 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1990 TCGMemOpIdx oi, uintptr_t retaddr) 1991 { 1992 return full_le_lduw_mmu(env, addr, oi, retaddr); 1993 } 1994 1995 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1996 TCGMemOpIdx oi, uintptr_t retaddr) 1997 { 1998 return load_helper(env, addr, oi, retaddr, MO_BEUW, false, 1999 full_be_lduw_mmu); 2000 } 2001 2002 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, 2003 TCGMemOpIdx oi, uintptr_t retaddr) 2004 { 2005 return full_be_lduw_mmu(env, addr, oi, retaddr); 2006 } 2007 2008 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, 2009 TCGMemOpIdx oi, uintptr_t retaddr) 2010 { 2011 return load_helper(env, addr, oi, retaddr, MO_LEUL, false, 2012 full_le_ldul_mmu); 2013 } 2014 2015 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, 2016 TCGMemOpIdx oi, uintptr_t retaddr) 2017 { 2018 return full_le_ldul_mmu(env, addr, oi, retaddr); 2019 } 2020 2021 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, 2022 TCGMemOpIdx oi, uintptr_t retaddr) 2023 { 2024 return load_helper(env, addr, oi, retaddr, MO_BEUL, false, 2025 full_be_ldul_mmu); 2026 } 2027 2028 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, 2029 TCGMemOpIdx oi, uintptr_t retaddr) 2030 { 2031 return full_be_ldul_mmu(env, addr, oi, retaddr); 2032 } 2033 2034 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, 2035 TCGMemOpIdx oi, uintptr_t retaddr) 2036 { 2037 return load_helper(env, addr, oi, retaddr, MO_LEQ, false, 2038 helper_le_ldq_mmu); 2039 } 2040 2041 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, 2042 TCGMemOpIdx oi, uintptr_t retaddr) 2043 { 2044 return load_helper(env, addr, oi, retaddr, MO_BEQ, false, 2045 helper_be_ldq_mmu); 2046 } 2047 2048 /* 2049 * Provide signed versions of the load routines as well. We can of course 2050 * avoid this for 64-bit data, or for 32-bit data on 32-bit host. 2051 */ 2052 2053 2054 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, 2055 TCGMemOpIdx oi, uintptr_t retaddr) 2056 { 2057 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); 2058 } 2059 2060 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, 2061 TCGMemOpIdx oi, uintptr_t retaddr) 2062 { 2063 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); 2064 } 2065 2066 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, 2067 TCGMemOpIdx oi, uintptr_t retaddr) 2068 { 2069 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); 2070 } 2071 2072 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, 2073 TCGMemOpIdx oi, uintptr_t retaddr) 2074 { 2075 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); 2076 } 2077 2078 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, 2079 TCGMemOpIdx oi, uintptr_t retaddr) 2080 { 2081 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); 2082 } 2083 2084 /* 2085 * Load helpers for cpu_ldst.h. 2086 */ 2087 2088 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, 2089 int mmu_idx, uintptr_t retaddr, 2090 MemOp op, FullLoadHelper *full_load) 2091 { 2092 uint16_t meminfo; 2093 TCGMemOpIdx oi; 2094 uint64_t ret; 2095 2096 meminfo = trace_mem_get_info(op, mmu_idx, false); 2097 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2098 2099 op &= ~MO_SIGN; 2100 oi = make_memop_idx(op, mmu_idx); 2101 ret = full_load(env, addr, oi, retaddr); 2102 2103 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2104 2105 return ret; 2106 } 2107 2108 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2109 int mmu_idx, uintptr_t ra) 2110 { 2111 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu); 2112 } 2113 2114 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2115 int mmu_idx, uintptr_t ra) 2116 { 2117 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB, 2118 full_ldub_mmu); 2119 } 2120 2121 uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2122 int mmu_idx, uintptr_t ra) 2123 { 2124 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu); 2125 } 2126 2127 int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2128 int mmu_idx, uintptr_t ra) 2129 { 2130 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW, 2131 full_be_lduw_mmu); 2132 } 2133 2134 uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2135 int mmu_idx, uintptr_t ra) 2136 { 2137 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu); 2138 } 2139 2140 uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2141 int mmu_idx, uintptr_t ra) 2142 { 2143 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu); 2144 } 2145 2146 uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2147 int mmu_idx, uintptr_t ra) 2148 { 2149 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu); 2150 } 2151 2152 int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2153 int mmu_idx, uintptr_t ra) 2154 { 2155 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW, 2156 full_le_lduw_mmu); 2157 } 2158 2159 uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2160 int mmu_idx, uintptr_t ra) 2161 { 2162 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu); 2163 } 2164 2165 uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2166 int mmu_idx, uintptr_t ra) 2167 { 2168 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu); 2169 } 2170 2171 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, 2172 uintptr_t retaddr) 2173 { 2174 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2175 } 2176 2177 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 2178 { 2179 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2180 } 2181 2182 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr, 2183 uintptr_t retaddr) 2184 { 2185 return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2186 } 2187 2188 int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 2189 { 2190 return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2191 } 2192 2193 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr, 2194 uintptr_t retaddr) 2195 { 2196 return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2197 } 2198 2199 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr, 2200 uintptr_t retaddr) 2201 { 2202 return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2203 } 2204 2205 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr, 2206 uintptr_t retaddr) 2207 { 2208 return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2209 } 2210 2211 int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 2212 { 2213 return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2214 } 2215 2216 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr, 2217 uintptr_t retaddr) 2218 { 2219 return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2220 } 2221 2222 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr, 2223 uintptr_t retaddr) 2224 { 2225 return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2226 } 2227 2228 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) 2229 { 2230 return cpu_ldub_data_ra(env, ptr, 0); 2231 } 2232 2233 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) 2234 { 2235 return cpu_ldsb_data_ra(env, ptr, 0); 2236 } 2237 2238 uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr) 2239 { 2240 return cpu_lduw_be_data_ra(env, ptr, 0); 2241 } 2242 2243 int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr) 2244 { 2245 return cpu_ldsw_be_data_ra(env, ptr, 0); 2246 } 2247 2248 uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr) 2249 { 2250 return cpu_ldl_be_data_ra(env, ptr, 0); 2251 } 2252 2253 uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr) 2254 { 2255 return cpu_ldq_be_data_ra(env, ptr, 0); 2256 } 2257 2258 uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr) 2259 { 2260 return cpu_lduw_le_data_ra(env, ptr, 0); 2261 } 2262 2263 int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr) 2264 { 2265 return cpu_ldsw_le_data_ra(env, ptr, 0); 2266 } 2267 2268 uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr) 2269 { 2270 return cpu_ldl_le_data_ra(env, ptr, 0); 2271 } 2272 2273 uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr) 2274 { 2275 return cpu_ldq_le_data_ra(env, ptr, 0); 2276 } 2277 2278 /* 2279 * Store Helpers 2280 */ 2281 2282 static inline void QEMU_ALWAYS_INLINE 2283 store_memop(void *haddr, uint64_t val, MemOp op) 2284 { 2285 switch (op) { 2286 case MO_UB: 2287 stb_p(haddr, val); 2288 break; 2289 case MO_BEUW: 2290 stw_be_p(haddr, val); 2291 break; 2292 case MO_LEUW: 2293 stw_le_p(haddr, val); 2294 break; 2295 case MO_BEUL: 2296 stl_be_p(haddr, val); 2297 break; 2298 case MO_LEUL: 2299 stl_le_p(haddr, val); 2300 break; 2301 case MO_BEQ: 2302 stq_be_p(haddr, val); 2303 break; 2304 case MO_LEQ: 2305 stq_le_p(haddr, val); 2306 break; 2307 default: 2308 qemu_build_not_reached(); 2309 } 2310 } 2311 2312 static void __attribute__((noinline)) 2313 store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val, 2314 uintptr_t retaddr, size_t size, uintptr_t mmu_idx, 2315 bool big_endian) 2316 { 2317 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 2318 uintptr_t index, index2; 2319 CPUTLBEntry *entry, *entry2; 2320 target_ulong page2, tlb_addr, tlb_addr2; 2321 TCGMemOpIdx oi; 2322 size_t size2; 2323 int i; 2324 2325 /* 2326 * Ensure the second page is in the TLB. Note that the first page 2327 * is already guaranteed to be filled, and that the second page 2328 * cannot evict the first. 2329 */ 2330 page2 = (addr + size) & TARGET_PAGE_MASK; 2331 size2 = (addr + size) & ~TARGET_PAGE_MASK; 2332 index2 = tlb_index(env, mmu_idx, page2); 2333 entry2 = tlb_entry(env, mmu_idx, page2); 2334 2335 tlb_addr2 = tlb_addr_write(entry2); 2336 if (!tlb_hit_page(tlb_addr2, page2)) { 2337 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { 2338 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, 2339 mmu_idx, retaddr); 2340 index2 = tlb_index(env, mmu_idx, page2); 2341 entry2 = tlb_entry(env, mmu_idx, page2); 2342 } 2343 tlb_addr2 = tlb_addr_write(entry2); 2344 } 2345 2346 index = tlb_index(env, mmu_idx, addr); 2347 entry = tlb_entry(env, mmu_idx, addr); 2348 tlb_addr = tlb_addr_write(entry); 2349 2350 /* 2351 * Handle watchpoints. Since this may trap, all checks 2352 * must happen before any store. 2353 */ 2354 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2355 cpu_check_watchpoint(env_cpu(env), addr, size - size2, 2356 env_tlb(env)->d[mmu_idx].iotlb[index].attrs, 2357 BP_MEM_WRITE, retaddr); 2358 } 2359 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { 2360 cpu_check_watchpoint(env_cpu(env), page2, size2, 2361 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, 2362 BP_MEM_WRITE, retaddr); 2363 } 2364 2365 /* 2366 * XXX: not efficient, but simple. 2367 * This loop must go in the forward direction to avoid issues 2368 * with self-modifying code in Windows 64-bit. 2369 */ 2370 oi = make_memop_idx(MO_UB, mmu_idx); 2371 if (big_endian) { 2372 for (i = 0; i < size; ++i) { 2373 /* Big-endian extract. */ 2374 uint8_t val8 = val >> (((size - 1) * 8) - (i * 8)); 2375 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2376 } 2377 } else { 2378 for (i = 0; i < size; ++i) { 2379 /* Little-endian extract. */ 2380 uint8_t val8 = val >> (i * 8); 2381 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2382 } 2383 } 2384 } 2385 2386 static inline void QEMU_ALWAYS_INLINE 2387 store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2388 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) 2389 { 2390 uintptr_t mmu_idx = get_mmuidx(oi); 2391 uintptr_t index = tlb_index(env, mmu_idx, addr); 2392 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 2393 target_ulong tlb_addr = tlb_addr_write(entry); 2394 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 2395 unsigned a_bits = get_alignment_bits(get_memop(oi)); 2396 void *haddr; 2397 size_t size = memop_size(op); 2398 2399 /* Handle CPU specific unaligned behaviour */ 2400 if (addr & ((1 << a_bits) - 1)) { 2401 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 2402 mmu_idx, retaddr); 2403 } 2404 2405 /* If the TLB entry is for a different page, reload and try again. */ 2406 if (!tlb_hit(tlb_addr, addr)) { 2407 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 2408 addr & TARGET_PAGE_MASK)) { 2409 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, 2410 mmu_idx, retaddr); 2411 index = tlb_index(env, mmu_idx, addr); 2412 entry = tlb_entry(env, mmu_idx, addr); 2413 } 2414 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; 2415 } 2416 2417 /* Handle anything that isn't just a straight memory access. */ 2418 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 2419 CPUIOTLBEntry *iotlbentry; 2420 bool need_swap; 2421 2422 /* For anything that is unaligned, recurse through byte stores. */ 2423 if ((addr & (size - 1)) != 0) { 2424 goto do_unaligned_access; 2425 } 2426 2427 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 2428 2429 /* Handle watchpoints. */ 2430 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2431 /* On watchpoint hit, this will longjmp out. */ 2432 cpu_check_watchpoint(env_cpu(env), addr, size, 2433 iotlbentry->attrs, BP_MEM_WRITE, retaddr); 2434 } 2435 2436 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 2437 2438 /* Handle I/O access. */ 2439 if (tlb_addr & TLB_MMIO) { 2440 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, 2441 op ^ (need_swap * MO_BSWAP)); 2442 return; 2443 } 2444 2445 /* Ignore writes to ROM. */ 2446 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { 2447 return; 2448 } 2449 2450 /* Handle clean RAM pages. */ 2451 if (tlb_addr & TLB_NOTDIRTY) { 2452 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 2453 } 2454 2455 haddr = (void *)((uintptr_t)addr + entry->addend); 2456 2457 /* 2458 * Keep these two store_memop separate to ensure that the compiler 2459 * is able to fold the entire function to a single instruction. 2460 * There is a build-time assert inside to remind you of this. ;-) 2461 */ 2462 if (unlikely(need_swap)) { 2463 store_memop(haddr, val, op ^ MO_BSWAP); 2464 } else { 2465 store_memop(haddr, val, op); 2466 } 2467 return; 2468 } 2469 2470 /* Handle slow unaligned access (it spans two pages or IO). */ 2471 if (size > 1 2472 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 2473 >= TARGET_PAGE_SIZE)) { 2474 do_unaligned_access: 2475 store_helper_unaligned(env, addr, val, retaddr, size, 2476 mmu_idx, memop_big_endian(op)); 2477 return; 2478 } 2479 2480 haddr = (void *)((uintptr_t)addr + entry->addend); 2481 store_memop(haddr, val, op); 2482 } 2483 2484 void __attribute__((noinline)) 2485 helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, 2486 TCGMemOpIdx oi, uintptr_t retaddr) 2487 { 2488 store_helper(env, addr, val, oi, retaddr, MO_UB); 2489 } 2490 2491 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2492 TCGMemOpIdx oi, uintptr_t retaddr) 2493 { 2494 store_helper(env, addr, val, oi, retaddr, MO_LEUW); 2495 } 2496 2497 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2498 TCGMemOpIdx oi, uintptr_t retaddr) 2499 { 2500 store_helper(env, addr, val, oi, retaddr, MO_BEUW); 2501 } 2502 2503 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2504 TCGMemOpIdx oi, uintptr_t retaddr) 2505 { 2506 store_helper(env, addr, val, oi, retaddr, MO_LEUL); 2507 } 2508 2509 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2510 TCGMemOpIdx oi, uintptr_t retaddr) 2511 { 2512 store_helper(env, addr, val, oi, retaddr, MO_BEUL); 2513 } 2514 2515 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2516 TCGMemOpIdx oi, uintptr_t retaddr) 2517 { 2518 store_helper(env, addr, val, oi, retaddr, MO_LEQ); 2519 } 2520 2521 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2522 TCGMemOpIdx oi, uintptr_t retaddr) 2523 { 2524 store_helper(env, addr, val, oi, retaddr, MO_BEQ); 2525 } 2526 2527 /* 2528 * Store Helpers for cpu_ldst.h 2529 */ 2530 2531 static inline void QEMU_ALWAYS_INLINE 2532 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2533 int mmu_idx, uintptr_t retaddr, MemOp op) 2534 { 2535 TCGMemOpIdx oi; 2536 uint16_t meminfo; 2537 2538 meminfo = trace_mem_get_info(op, mmu_idx, true); 2539 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2540 2541 oi = make_memop_idx(op, mmu_idx); 2542 store_helper(env, addr, val, oi, retaddr, op); 2543 2544 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2545 } 2546 2547 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2548 int mmu_idx, uintptr_t retaddr) 2549 { 2550 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); 2551 } 2552 2553 void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2554 int mmu_idx, uintptr_t retaddr) 2555 { 2556 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW); 2557 } 2558 2559 void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2560 int mmu_idx, uintptr_t retaddr) 2561 { 2562 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL); 2563 } 2564 2565 void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2566 int mmu_idx, uintptr_t retaddr) 2567 { 2568 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ); 2569 } 2570 2571 void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2572 int mmu_idx, uintptr_t retaddr) 2573 { 2574 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW); 2575 } 2576 2577 void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2578 int mmu_idx, uintptr_t retaddr) 2579 { 2580 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL); 2581 } 2582 2583 void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2584 int mmu_idx, uintptr_t retaddr) 2585 { 2586 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ); 2587 } 2588 2589 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, 2590 uint32_t val, uintptr_t retaddr) 2591 { 2592 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2593 } 2594 2595 void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr, 2596 uint32_t val, uintptr_t retaddr) 2597 { 2598 cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2599 } 2600 2601 void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr, 2602 uint32_t val, uintptr_t retaddr) 2603 { 2604 cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2605 } 2606 2607 void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr, 2608 uint64_t val, uintptr_t retaddr) 2609 { 2610 cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2611 } 2612 2613 void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr, 2614 uint32_t val, uintptr_t retaddr) 2615 { 2616 cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2617 } 2618 2619 void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr, 2620 uint32_t val, uintptr_t retaddr) 2621 { 2622 cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2623 } 2624 2625 void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr, 2626 uint64_t val, uintptr_t retaddr) 2627 { 2628 cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2629 } 2630 2631 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2632 { 2633 cpu_stb_data_ra(env, ptr, val, 0); 2634 } 2635 2636 void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2637 { 2638 cpu_stw_be_data_ra(env, ptr, val, 0); 2639 } 2640 2641 void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2642 { 2643 cpu_stl_be_data_ra(env, ptr, val, 0); 2644 } 2645 2646 void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2647 { 2648 cpu_stq_be_data_ra(env, ptr, val, 0); 2649 } 2650 2651 void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2652 { 2653 cpu_stw_le_data_ra(env, ptr, val, 0); 2654 } 2655 2656 void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2657 { 2658 cpu_stl_le_data_ra(env, ptr, val, 0); 2659 } 2660 2661 void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2662 { 2663 cpu_stq_le_data_ra(env, ptr, val, 0); 2664 } 2665 2666 /* First set of helpers allows passing in of OI and RETADDR. This makes 2667 them callable from other helpers. */ 2668 2669 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 2670 #define ATOMIC_NAME(X) \ 2671 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 2672 #define ATOMIC_MMU_DECLS 2673 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) 2674 #define ATOMIC_MMU_CLEANUP 2675 #define ATOMIC_MMU_IDX get_mmuidx(oi) 2676 2677 #include "atomic_common.c.inc" 2678 2679 #define DATA_SIZE 1 2680 #include "atomic_template.h" 2681 2682 #define DATA_SIZE 2 2683 #include "atomic_template.h" 2684 2685 #define DATA_SIZE 4 2686 #include "atomic_template.h" 2687 2688 #ifdef CONFIG_ATOMIC64 2689 #define DATA_SIZE 8 2690 #include "atomic_template.h" 2691 #endif 2692 2693 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 2694 #define DATA_SIZE 16 2695 #include "atomic_template.h" 2696 #endif 2697 2698 /* Second set of helpers are directly callable from TCG as helpers. */ 2699 2700 #undef EXTRA_ARGS 2701 #undef ATOMIC_NAME 2702 #undef ATOMIC_MMU_LOOKUP 2703 #define EXTRA_ARGS , TCGMemOpIdx oi 2704 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 2705 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) 2706 2707 #define DATA_SIZE 1 2708 #include "atomic_template.h" 2709 2710 #define DATA_SIZE 2 2711 #include "atomic_template.h" 2712 2713 #define DATA_SIZE 4 2714 #include "atomic_template.h" 2715 2716 #ifdef CONFIG_ATOMIC64 2717 #define DATA_SIZE 8 2718 #include "atomic_template.h" 2719 #endif 2720 #undef ATOMIC_MMU_IDX 2721 2722 /* Code access functions. */ 2723 2724 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, 2725 TCGMemOpIdx oi, uintptr_t retaddr) 2726 { 2727 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); 2728 } 2729 2730 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) 2731 { 2732 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); 2733 return full_ldub_code(env, addr, oi, 0); 2734 } 2735 2736 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, 2737 TCGMemOpIdx oi, uintptr_t retaddr) 2738 { 2739 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); 2740 } 2741 2742 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) 2743 { 2744 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); 2745 return full_lduw_code(env, addr, oi, 0); 2746 } 2747 2748 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, 2749 TCGMemOpIdx oi, uintptr_t retaddr) 2750 { 2751 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); 2752 } 2753 2754 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) 2755 { 2756 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); 2757 return full_ldl_code(env, addr, oi, 0); 2758 } 2759 2760 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, 2761 TCGMemOpIdx oi, uintptr_t retaddr) 2762 { 2763 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code); 2764 } 2765 2766 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) 2767 { 2768 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true)); 2769 return full_ldq_code(env, addr, oi, 0); 2770 } 2771