1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/address-spaces.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/cputlb.h" 28 #include "exec/memory-internal.h" 29 #include "exec/ram_addr.h" 30 #include "tcg/tcg.h" 31 #include "qemu/error-report.h" 32 #include "exec/log.h" 33 #include "exec/helper-proto.h" 34 #include "qemu/atomic.h" 35 #include "qemu/atomic128.h" 36 #include "translate-all.h" 37 #include "trace-root.h" 38 #include "trace/mem.h" 39 #ifdef CONFIG_PLUGIN 40 #include "qemu/plugin-memory.h" 41 #endif 42 43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 44 /* #define DEBUG_TLB */ 45 /* #define DEBUG_TLB_LOG */ 46 47 #ifdef DEBUG_TLB 48 # define DEBUG_TLB_GATE 1 49 # ifdef DEBUG_TLB_LOG 50 # define DEBUG_TLB_LOG_GATE 1 51 # else 52 # define DEBUG_TLB_LOG_GATE 0 53 # endif 54 #else 55 # define DEBUG_TLB_GATE 0 56 # define DEBUG_TLB_LOG_GATE 0 57 #endif 58 59 #define tlb_debug(fmt, ...) do { \ 60 if (DEBUG_TLB_LOG_GATE) { \ 61 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 62 ## __VA_ARGS__); \ 63 } else if (DEBUG_TLB_GATE) { \ 64 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 65 } \ 66 } while (0) 67 68 #define assert_cpu_is_self(cpu) do { \ 69 if (DEBUG_TLB_GATE) { \ 70 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 71 } \ 72 } while (0) 73 74 /* run_on_cpu_data.target_ptr should always be big enough for a 75 * target_ulong even on 32 bit builds */ 76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 77 78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 79 */ 80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 82 83 static inline size_t tlb_n_entries(CPUTLBDescFast *fast) 84 { 85 return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1; 86 } 87 88 static inline size_t sizeof_tlb(CPUTLBDescFast *fast) 89 { 90 return fast->mask + (1 << CPU_TLB_ENTRY_BITS); 91 } 92 93 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, 94 size_t max_entries) 95 { 96 desc->window_begin_ns = ns; 97 desc->window_max_entries = max_entries; 98 } 99 100 static void tlb_dyn_init(CPUArchState *env) 101 { 102 int i; 103 104 for (i = 0; i < NB_MMU_MODES; i++) { 105 CPUTLBDesc *desc = &env_tlb(env)->d[i]; 106 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 107 108 tlb_window_reset(desc, get_clock_realtime(), 0); 109 desc->n_used_entries = 0; 110 env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 111 env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries); 112 env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries); 113 } 114 } 115 116 /** 117 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 118 * @desc: The CPUTLBDesc portion of the TLB 119 * @fast: The CPUTLBDescFast portion of the same TLB 120 * 121 * Called with tlb_lock_held. 122 * 123 * We have two main constraints when resizing a TLB: (1) we only resize it 124 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 125 * the array or unnecessarily flushing it), which means we do not control how 126 * frequently the resizing can occur; (2) we don't have access to the guest's 127 * future scheduling decisions, and therefore have to decide the magnitude of 128 * the resize based on past observations. 129 * 130 * In general, a memory-hungry process can benefit greatly from an appropriately 131 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 132 * we just have to make the TLB as large as possible; while an oversized TLB 133 * results in minimal TLB miss rates, it also takes longer to be flushed 134 * (flushes can be _very_ frequent), and the reduced locality can also hurt 135 * performance. 136 * 137 * To achieve near-optimal performance for all kinds of workloads, we: 138 * 139 * 1. Aggressively increase the size of the TLB when the use rate of the 140 * TLB being flushed is high, since it is likely that in the near future this 141 * memory-hungry process will execute again, and its memory hungriness will 142 * probably be similar. 143 * 144 * 2. Slowly reduce the size of the TLB as the use rate declines over a 145 * reasonably large time window. The rationale is that if in such a time window 146 * we have not observed a high TLB use rate, it is likely that we won't observe 147 * it in the near future. In that case, once a time window expires we downsize 148 * the TLB to match the maximum use rate observed in the window. 149 * 150 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 151 * since in that range performance is likely near-optimal. Recall that the TLB 152 * is direct mapped, so we want the use rate to be low (or at least not too 153 * high), since otherwise we are likely to have a significant amount of 154 * conflict misses. 155 */ 156 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) 157 { 158 size_t old_size = tlb_n_entries(fast); 159 size_t rate; 160 size_t new_size = old_size; 161 int64_t now = get_clock_realtime(); 162 int64_t window_len_ms = 100; 163 int64_t window_len_ns = window_len_ms * 1000 * 1000; 164 bool window_expired = now > desc->window_begin_ns + window_len_ns; 165 166 if (desc->n_used_entries > desc->window_max_entries) { 167 desc->window_max_entries = desc->n_used_entries; 168 } 169 rate = desc->window_max_entries * 100 / old_size; 170 171 if (rate > 70) { 172 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 173 } else if (rate < 30 && window_expired) { 174 size_t ceil = pow2ceil(desc->window_max_entries); 175 size_t expected_rate = desc->window_max_entries * 100 / ceil; 176 177 /* 178 * Avoid undersizing when the max number of entries seen is just below 179 * a pow2. For instance, if max_entries == 1025, the expected use rate 180 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 181 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 182 * later. Thus, make sure that the expected use rate remains below 70%. 183 * (and since we double the size, that means the lowest rate we'd 184 * expect to get is 35%, which is still in the 30-70% range where 185 * we consider that the size is appropriate.) 186 */ 187 if (expected_rate > 70) { 188 ceil *= 2; 189 } 190 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 191 } 192 193 if (new_size == old_size) { 194 if (window_expired) { 195 tlb_window_reset(desc, now, desc->n_used_entries); 196 } 197 return; 198 } 199 200 g_free(fast->table); 201 g_free(desc->iotlb); 202 203 tlb_window_reset(desc, now, 0); 204 /* desc->n_used_entries is cleared by the caller */ 205 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 206 fast->table = g_try_new(CPUTLBEntry, new_size); 207 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 208 209 /* 210 * If the allocations fail, try smaller sizes. We just freed some 211 * memory, so going back to half of new_size has a good chance of working. 212 * Increased memory pressure elsewhere in the system might cause the 213 * allocations to fail though, so we progressively reduce the allocation 214 * size, aborting if we cannot even allocate the smallest TLB we support. 215 */ 216 while (fast->table == NULL || desc->iotlb == NULL) { 217 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 218 error_report("%s: %s", __func__, strerror(errno)); 219 abort(); 220 } 221 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 222 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 223 224 g_free(fast->table); 225 g_free(desc->iotlb); 226 fast->table = g_try_new(CPUTLBEntry, new_size); 227 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 228 } 229 } 230 231 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) 232 { 233 desc->n_used_entries = 0; 234 desc->large_page_addr = -1; 235 desc->large_page_mask = -1; 236 desc->vindex = 0; 237 memset(fast->table, -1, sizeof_tlb(fast)); 238 memset(desc->vtable, -1, sizeof(desc->vtable)); 239 } 240 241 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) 242 { 243 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; 244 CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx]; 245 246 tlb_mmu_resize_locked(desc, fast); 247 tlb_mmu_flush_locked(desc, fast); 248 } 249 250 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 251 { 252 env_tlb(env)->d[mmu_idx].n_used_entries++; 253 } 254 255 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 256 { 257 env_tlb(env)->d[mmu_idx].n_used_entries--; 258 } 259 260 void tlb_init(CPUState *cpu) 261 { 262 CPUArchState *env = cpu->env_ptr; 263 264 qemu_spin_init(&env_tlb(env)->c.lock); 265 266 /* Ensure that cpu_reset performs a full flush. */ 267 env_tlb(env)->c.dirty = ALL_MMUIDX_BITS; 268 269 tlb_dyn_init(env); 270 } 271 272 /* flush_all_helper: run fn across all cpus 273 * 274 * If the wait flag is set then the src cpu's helper will be queued as 275 * "safe" work and the loop exited creating a synchronisation point 276 * where all queued work will be finished before execution starts 277 * again. 278 */ 279 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 280 run_on_cpu_data d) 281 { 282 CPUState *cpu; 283 284 CPU_FOREACH(cpu) { 285 if (cpu != src) { 286 async_run_on_cpu(cpu, fn, d); 287 } 288 } 289 } 290 291 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 292 { 293 CPUState *cpu; 294 size_t full = 0, part = 0, elide = 0; 295 296 CPU_FOREACH(cpu) { 297 CPUArchState *env = cpu->env_ptr; 298 299 full += atomic_read(&env_tlb(env)->c.full_flush_count); 300 part += atomic_read(&env_tlb(env)->c.part_flush_count); 301 elide += atomic_read(&env_tlb(env)->c.elide_flush_count); 302 } 303 *pfull = full; 304 *ppart = part; 305 *pelide = elide; 306 } 307 308 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 309 { 310 CPUArchState *env = cpu->env_ptr; 311 uint16_t asked = data.host_int; 312 uint16_t all_dirty, work, to_clean; 313 314 assert_cpu_is_self(cpu); 315 316 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 317 318 qemu_spin_lock(&env_tlb(env)->c.lock); 319 320 all_dirty = env_tlb(env)->c.dirty; 321 to_clean = asked & all_dirty; 322 all_dirty &= ~to_clean; 323 env_tlb(env)->c.dirty = all_dirty; 324 325 for (work = to_clean; work != 0; work &= work - 1) { 326 int mmu_idx = ctz32(work); 327 tlb_flush_one_mmuidx_locked(env, mmu_idx); 328 } 329 330 qemu_spin_unlock(&env_tlb(env)->c.lock); 331 332 cpu_tb_jmp_cache_clear(cpu); 333 334 if (to_clean == ALL_MMUIDX_BITS) { 335 atomic_set(&env_tlb(env)->c.full_flush_count, 336 env_tlb(env)->c.full_flush_count + 1); 337 } else { 338 atomic_set(&env_tlb(env)->c.part_flush_count, 339 env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); 340 if (to_clean != asked) { 341 atomic_set(&env_tlb(env)->c.elide_flush_count, 342 env_tlb(env)->c.elide_flush_count + 343 ctpop16(asked & ~to_clean)); 344 } 345 } 346 } 347 348 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 349 { 350 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 351 352 if (cpu->created && !qemu_cpu_is_self(cpu)) { 353 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 354 RUN_ON_CPU_HOST_INT(idxmap)); 355 } else { 356 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 357 } 358 } 359 360 void tlb_flush(CPUState *cpu) 361 { 362 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 363 } 364 365 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 366 { 367 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 368 369 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 370 371 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 372 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 373 } 374 375 void tlb_flush_all_cpus(CPUState *src_cpu) 376 { 377 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 378 } 379 380 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 381 { 382 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 383 384 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 385 386 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 387 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 388 } 389 390 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 391 { 392 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 393 } 394 395 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 396 target_ulong page) 397 { 398 return tlb_hit_page(tlb_entry->addr_read, page) || 399 tlb_hit_page(tlb_addr_write(tlb_entry), page) || 400 tlb_hit_page(tlb_entry->addr_code, page); 401 } 402 403 /** 404 * tlb_entry_is_empty - return true if the entry is not in use 405 * @te: pointer to CPUTLBEntry 406 */ 407 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 408 { 409 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 410 } 411 412 /* Called with tlb_c.lock held */ 413 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 414 target_ulong page) 415 { 416 if (tlb_hit_page_anyprot(tlb_entry, page)) { 417 memset(tlb_entry, -1, sizeof(*tlb_entry)); 418 return true; 419 } 420 return false; 421 } 422 423 /* Called with tlb_c.lock held */ 424 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 425 target_ulong page) 426 { 427 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; 428 int k; 429 430 assert_cpu_is_self(env_cpu(env)); 431 for (k = 0; k < CPU_VTLB_SIZE; k++) { 432 if (tlb_flush_entry_locked(&d->vtable[k], page)) { 433 tlb_n_used_entries_dec(env, mmu_idx); 434 } 435 } 436 } 437 438 static void tlb_flush_page_locked(CPUArchState *env, int midx, 439 target_ulong page) 440 { 441 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; 442 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; 443 444 /* Check if we need to flush due to large pages. */ 445 if ((page & lp_mask) == lp_addr) { 446 tlb_debug("forcing full flush midx %d (" 447 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 448 midx, lp_addr, lp_mask); 449 tlb_flush_one_mmuidx_locked(env, midx); 450 } else { 451 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 452 tlb_n_used_entries_dec(env, midx); 453 } 454 tlb_flush_vtlb_page_locked(env, midx, page); 455 } 456 } 457 458 /** 459 * tlb_flush_page_by_mmuidx_async_0: 460 * @cpu: cpu on which to flush 461 * @addr: page of virtual address to flush 462 * @idxmap: set of mmu_idx to flush 463 * 464 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page 465 * at @addr from the tlbs indicated by @idxmap from @cpu. 466 */ 467 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, 468 target_ulong addr, 469 uint16_t idxmap) 470 { 471 CPUArchState *env = cpu->env_ptr; 472 int mmu_idx; 473 474 assert_cpu_is_self(cpu); 475 476 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); 477 478 qemu_spin_lock(&env_tlb(env)->c.lock); 479 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 480 if ((idxmap >> mmu_idx) & 1) { 481 tlb_flush_page_locked(env, mmu_idx, addr); 482 } 483 } 484 qemu_spin_unlock(&env_tlb(env)->c.lock); 485 486 tb_flush_jmp_cache(cpu, addr); 487 } 488 489 /** 490 * tlb_flush_page_by_mmuidx_async_1: 491 * @cpu: cpu on which to flush 492 * @data: encoded addr + idxmap 493 * 494 * Helper for tlb_flush_page_by_mmuidx and friends, called through 495 * async_run_on_cpu. The idxmap parameter is encoded in the page 496 * offset of the target_ptr field. This limits the set of mmu_idx 497 * that can be passed via this method. 498 */ 499 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, 500 run_on_cpu_data data) 501 { 502 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; 503 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; 504 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; 505 506 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 507 } 508 509 typedef struct { 510 target_ulong addr; 511 uint16_t idxmap; 512 } TLBFlushPageByMMUIdxData; 513 514 /** 515 * tlb_flush_page_by_mmuidx_async_2: 516 * @cpu: cpu on which to flush 517 * @data: allocated addr + idxmap 518 * 519 * Helper for tlb_flush_page_by_mmuidx and friends, called through 520 * async_run_on_cpu. The addr+idxmap parameters are stored in a 521 * TLBFlushPageByMMUIdxData structure that has been allocated 522 * specifically for this helper. Free the structure when done. 523 */ 524 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, 525 run_on_cpu_data data) 526 { 527 TLBFlushPageByMMUIdxData *d = data.host_ptr; 528 529 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap); 530 g_free(d); 531 } 532 533 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 534 { 535 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 536 537 /* This should already be page aligned */ 538 addr &= TARGET_PAGE_MASK; 539 540 if (qemu_cpu_is_self(cpu)) { 541 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 542 } else if (idxmap < TARGET_PAGE_SIZE) { 543 /* 544 * Most targets have only a few mmu_idx. In the case where 545 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid 546 * allocating memory for this operation. 547 */ 548 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1, 549 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 550 } else { 551 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1); 552 553 /* Otherwise allocate a structure, freed by the worker. */ 554 d->addr = addr; 555 d->idxmap = idxmap; 556 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2, 557 RUN_ON_CPU_HOST_PTR(d)); 558 } 559 } 560 561 void tlb_flush_page(CPUState *cpu, target_ulong addr) 562 { 563 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 564 } 565 566 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 567 uint16_t idxmap) 568 { 569 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 570 571 /* This should already be page aligned */ 572 addr &= TARGET_PAGE_MASK; 573 574 /* 575 * Allocate memory to hold addr+idxmap only when needed. 576 * See tlb_flush_page_by_mmuidx for details. 577 */ 578 if (idxmap < TARGET_PAGE_SIZE) { 579 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 580 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 581 } else { 582 CPUState *dst_cpu; 583 584 /* Allocate a separate data block for each destination cpu. */ 585 CPU_FOREACH(dst_cpu) { 586 if (dst_cpu != src_cpu) { 587 TLBFlushPageByMMUIdxData *d 588 = g_new(TLBFlushPageByMMUIdxData, 1); 589 590 d->addr = addr; 591 d->idxmap = idxmap; 592 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 593 RUN_ON_CPU_HOST_PTR(d)); 594 } 595 } 596 } 597 598 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); 599 } 600 601 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 602 { 603 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 604 } 605 606 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 607 target_ulong addr, 608 uint16_t idxmap) 609 { 610 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 611 612 /* This should already be page aligned */ 613 addr &= TARGET_PAGE_MASK; 614 615 /* 616 * Allocate memory to hold addr+idxmap only when needed. 617 * See tlb_flush_page_by_mmuidx for details. 618 */ 619 if (idxmap < TARGET_PAGE_SIZE) { 620 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 621 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 622 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1, 623 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 624 } else { 625 CPUState *dst_cpu; 626 TLBFlushPageByMMUIdxData *d; 627 628 /* Allocate a separate data block for each destination cpu. */ 629 CPU_FOREACH(dst_cpu) { 630 if (dst_cpu != src_cpu) { 631 d = g_new(TLBFlushPageByMMUIdxData, 1); 632 d->addr = addr; 633 d->idxmap = idxmap; 634 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 635 RUN_ON_CPU_HOST_PTR(d)); 636 } 637 } 638 639 d = g_new(TLBFlushPageByMMUIdxData, 1); 640 d->addr = addr; 641 d->idxmap = idxmap; 642 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2, 643 RUN_ON_CPU_HOST_PTR(d)); 644 } 645 } 646 647 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 648 { 649 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 650 } 651 652 /* update the TLBs so that writes to code in the virtual page 'addr' 653 can be detected */ 654 void tlb_protect_code(ram_addr_t ram_addr) 655 { 656 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 657 DIRTY_MEMORY_CODE); 658 } 659 660 /* update the TLB so that writes in physical page 'phys_addr' are no longer 661 tested for self modifying code */ 662 void tlb_unprotect_code(ram_addr_t ram_addr) 663 { 664 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 665 } 666 667 668 /* 669 * Dirty write flag handling 670 * 671 * When the TCG code writes to a location it looks up the address in 672 * the TLB and uses that data to compute the final address. If any of 673 * the lower bits of the address are set then the slow path is forced. 674 * There are a number of reasons to do this but for normal RAM the 675 * most usual is detecting writes to code regions which may invalidate 676 * generated code. 677 * 678 * Other vCPUs might be reading their TLBs during guest execution, so we update 679 * te->addr_write with atomic_set. We don't need to worry about this for 680 * oversized guests as MTTCG is disabled for them. 681 * 682 * Called with tlb_c.lock held. 683 */ 684 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 685 uintptr_t start, uintptr_t length) 686 { 687 uintptr_t addr = tlb_entry->addr_write; 688 689 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | 690 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { 691 addr &= TARGET_PAGE_MASK; 692 addr += tlb_entry->addend; 693 if ((addr - start) < length) { 694 #if TCG_OVERSIZED_GUEST 695 tlb_entry->addr_write |= TLB_NOTDIRTY; 696 #else 697 atomic_set(&tlb_entry->addr_write, 698 tlb_entry->addr_write | TLB_NOTDIRTY); 699 #endif 700 } 701 } 702 } 703 704 /* 705 * Called with tlb_c.lock held. 706 * Called only from the vCPU context, i.e. the TLB's owner thread. 707 */ 708 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 709 { 710 *d = *s; 711 } 712 713 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 714 * the target vCPU). 715 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 716 * thing actually updated is the target TLB entry ->addr_write flags. 717 */ 718 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 719 { 720 CPUArchState *env; 721 722 int mmu_idx; 723 724 env = cpu->env_ptr; 725 qemu_spin_lock(&env_tlb(env)->c.lock); 726 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 727 unsigned int i; 728 unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); 729 730 for (i = 0; i < n; i++) { 731 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], 732 start1, length); 733 } 734 735 for (i = 0; i < CPU_VTLB_SIZE; i++) { 736 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], 737 start1, length); 738 } 739 } 740 qemu_spin_unlock(&env_tlb(env)->c.lock); 741 } 742 743 /* Called with tlb_c.lock held */ 744 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 745 target_ulong vaddr) 746 { 747 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 748 tlb_entry->addr_write = vaddr; 749 } 750 } 751 752 /* update the TLB corresponding to virtual page vaddr 753 so that it is no longer dirty */ 754 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 755 { 756 CPUArchState *env = cpu->env_ptr; 757 int mmu_idx; 758 759 assert_cpu_is_self(cpu); 760 761 vaddr &= TARGET_PAGE_MASK; 762 qemu_spin_lock(&env_tlb(env)->c.lock); 763 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 764 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 765 } 766 767 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 768 int k; 769 for (k = 0; k < CPU_VTLB_SIZE; k++) { 770 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); 771 } 772 } 773 qemu_spin_unlock(&env_tlb(env)->c.lock); 774 } 775 776 /* Our TLB does not support large pages, so remember the area covered by 777 large pages and trigger a full TLB flush if these are invalidated. */ 778 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 779 target_ulong vaddr, target_ulong size) 780 { 781 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; 782 target_ulong lp_mask = ~(size - 1); 783 784 if (lp_addr == (target_ulong)-1) { 785 /* No previous large page. */ 786 lp_addr = vaddr; 787 } else { 788 /* Extend the existing region to include the new page. 789 This is a compromise between unnecessary flushes and 790 the cost of maintaining a full variable size TLB. */ 791 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; 792 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 793 lp_mask <<= 1; 794 } 795 } 796 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; 797 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; 798 } 799 800 /* Add a new TLB entry. At most one entry for a given virtual address 801 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 802 * supplied size is only used by tlb_flush_page. 803 * 804 * Called from TCG-generated code, which is under an RCU read-side 805 * critical section. 806 */ 807 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 808 hwaddr paddr, MemTxAttrs attrs, int prot, 809 int mmu_idx, target_ulong size) 810 { 811 CPUArchState *env = cpu->env_ptr; 812 CPUTLB *tlb = env_tlb(env); 813 CPUTLBDesc *desc = &tlb->d[mmu_idx]; 814 MemoryRegionSection *section; 815 unsigned int index; 816 target_ulong address; 817 target_ulong write_address; 818 uintptr_t addend; 819 CPUTLBEntry *te, tn; 820 hwaddr iotlb, xlat, sz, paddr_page; 821 target_ulong vaddr_page; 822 int asidx = cpu_asidx_from_attrs(cpu, attrs); 823 int wp_flags; 824 bool is_ram, is_romd; 825 826 assert_cpu_is_self(cpu); 827 828 if (size <= TARGET_PAGE_SIZE) { 829 sz = TARGET_PAGE_SIZE; 830 } else { 831 tlb_add_large_page(env, mmu_idx, vaddr, size); 832 sz = size; 833 } 834 vaddr_page = vaddr & TARGET_PAGE_MASK; 835 paddr_page = paddr & TARGET_PAGE_MASK; 836 837 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 838 &xlat, &sz, attrs, &prot); 839 assert(sz >= TARGET_PAGE_SIZE); 840 841 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 842 " prot=%x idx=%d\n", 843 vaddr, paddr, prot, mmu_idx); 844 845 address = vaddr_page; 846 if (size < TARGET_PAGE_SIZE) { 847 /* Repeat the MMU check and TLB fill on every access. */ 848 address |= TLB_INVALID_MASK; 849 } 850 if (attrs.byte_swap) { 851 address |= TLB_BSWAP; 852 } 853 854 is_ram = memory_region_is_ram(section->mr); 855 is_romd = memory_region_is_romd(section->mr); 856 857 if (is_ram || is_romd) { 858 /* RAM and ROMD both have associated host memory. */ 859 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 860 } else { 861 /* I/O does not; force the host address to NULL. */ 862 addend = 0; 863 } 864 865 write_address = address; 866 if (is_ram) { 867 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 868 /* 869 * Computing is_clean is expensive; avoid all that unless 870 * the page is actually writable. 871 */ 872 if (prot & PAGE_WRITE) { 873 if (section->readonly) { 874 write_address |= TLB_DISCARD_WRITE; 875 } else if (cpu_physical_memory_is_clean(iotlb)) { 876 write_address |= TLB_NOTDIRTY; 877 } 878 } 879 } else { 880 /* I/O or ROMD */ 881 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; 882 /* 883 * Writes to romd devices must go through MMIO to enable write. 884 * Reads to romd devices go through the ram_ptr found above, 885 * but of course reads to I/O must go through MMIO. 886 */ 887 write_address |= TLB_MMIO; 888 if (!is_romd) { 889 address = write_address; 890 } 891 } 892 893 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, 894 TARGET_PAGE_SIZE); 895 896 index = tlb_index(env, mmu_idx, vaddr_page); 897 te = tlb_entry(env, mmu_idx, vaddr_page); 898 899 /* 900 * Hold the TLB lock for the rest of the function. We could acquire/release 901 * the lock several times in the function, but it is faster to amortize the 902 * acquisition cost by acquiring it just once. Note that this leads to 903 * a longer critical section, but this is not a concern since the TLB lock 904 * is unlikely to be contended. 905 */ 906 qemu_spin_lock(&tlb->c.lock); 907 908 /* Note that the tlb is no longer clean. */ 909 tlb->c.dirty |= 1 << mmu_idx; 910 911 /* Make sure there's no cached translation for the new page. */ 912 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 913 914 /* 915 * Only evict the old entry to the victim tlb if it's for a 916 * different page; otherwise just overwrite the stale data. 917 */ 918 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 919 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; 920 CPUTLBEntry *tv = &desc->vtable[vidx]; 921 922 /* Evict the old entry into the victim tlb. */ 923 copy_tlb_helper_locked(tv, te); 924 desc->viotlb[vidx] = desc->iotlb[index]; 925 tlb_n_used_entries_dec(env, mmu_idx); 926 } 927 928 /* refill the tlb */ 929 /* 930 * At this point iotlb contains a physical section number in the lower 931 * TARGET_PAGE_BITS, and either 932 * + the ram_addr_t of the page base of the target RAM (RAM) 933 * + the offset within section->mr of the page base (I/O, ROMD) 934 * We subtract the vaddr_page (which is page aligned and thus won't 935 * disturb the low bits) to give an offset which can be added to the 936 * (non-page-aligned) vaddr of the eventual memory access to get 937 * the MemoryRegion offset for the access. Note that the vaddr we 938 * subtract here is that of the page base, and not the same as the 939 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 940 */ 941 desc->iotlb[index].addr = iotlb - vaddr_page; 942 desc->iotlb[index].attrs = attrs; 943 944 /* Now calculate the new entry */ 945 tn.addend = addend - vaddr_page; 946 if (prot & PAGE_READ) { 947 tn.addr_read = address; 948 if (wp_flags & BP_MEM_READ) { 949 tn.addr_read |= TLB_WATCHPOINT; 950 } 951 } else { 952 tn.addr_read = -1; 953 } 954 955 if (prot & PAGE_EXEC) { 956 tn.addr_code = address; 957 } else { 958 tn.addr_code = -1; 959 } 960 961 tn.addr_write = -1; 962 if (prot & PAGE_WRITE) { 963 tn.addr_write = write_address; 964 if (prot & PAGE_WRITE_INV) { 965 tn.addr_write |= TLB_INVALID_MASK; 966 } 967 if (wp_flags & BP_MEM_WRITE) { 968 tn.addr_write |= TLB_WATCHPOINT; 969 } 970 } 971 972 copy_tlb_helper_locked(te, &tn); 973 tlb_n_used_entries_inc(env, mmu_idx); 974 qemu_spin_unlock(&tlb->c.lock); 975 } 976 977 /* Add a new TLB entry, but without specifying the memory 978 * transaction attributes to be used. 979 */ 980 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 981 hwaddr paddr, int prot, 982 int mmu_idx, target_ulong size) 983 { 984 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 985 prot, mmu_idx, size); 986 } 987 988 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 989 { 990 ram_addr_t ram_addr; 991 992 ram_addr = qemu_ram_addr_from_host(ptr); 993 if (ram_addr == RAM_ADDR_INVALID) { 994 error_report("Bad ram pointer %p", ptr); 995 abort(); 996 } 997 return ram_addr; 998 } 999 1000 /* 1001 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the 1002 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must 1003 * be discarded and looked up again (e.g. via tlb_entry()). 1004 */ 1005 static void tlb_fill(CPUState *cpu, target_ulong addr, int size, 1006 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1007 { 1008 CPUClass *cc = CPU_GET_CLASS(cpu); 1009 bool ok; 1010 1011 /* 1012 * This is not a probe, so only valid return is success; failure 1013 * should result in exception + longjmp to the cpu loop. 1014 */ 1015 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr); 1016 assert(ok); 1017 } 1018 1019 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1020 int mmu_idx, target_ulong addr, uintptr_t retaddr, 1021 MMUAccessType access_type, MemOp op) 1022 { 1023 CPUState *cpu = env_cpu(env); 1024 hwaddr mr_offset; 1025 MemoryRegionSection *section; 1026 MemoryRegion *mr; 1027 uint64_t val; 1028 bool locked = false; 1029 MemTxResult r; 1030 1031 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1032 mr = section->mr; 1033 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1034 cpu->mem_io_pc = retaddr; 1035 if (!cpu->can_do_io) { 1036 cpu_io_recompile(cpu, retaddr); 1037 } 1038 1039 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 1040 qemu_mutex_lock_iothread(); 1041 locked = true; 1042 } 1043 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); 1044 if (r != MEMTX_OK) { 1045 hwaddr physaddr = mr_offset + 1046 section->offset_within_address_space - 1047 section->offset_within_region; 1048 1049 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, 1050 mmu_idx, iotlbentry->attrs, r, retaddr); 1051 } 1052 if (locked) { 1053 qemu_mutex_unlock_iothread(); 1054 } 1055 1056 return val; 1057 } 1058 1059 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1060 int mmu_idx, uint64_t val, target_ulong addr, 1061 uintptr_t retaddr, MemOp op) 1062 { 1063 CPUState *cpu = env_cpu(env); 1064 hwaddr mr_offset; 1065 MemoryRegionSection *section; 1066 MemoryRegion *mr; 1067 bool locked = false; 1068 MemTxResult r; 1069 1070 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1071 mr = section->mr; 1072 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1073 if (!cpu->can_do_io) { 1074 cpu_io_recompile(cpu, retaddr); 1075 } 1076 cpu->mem_io_pc = retaddr; 1077 1078 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 1079 qemu_mutex_lock_iothread(); 1080 locked = true; 1081 } 1082 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); 1083 if (r != MEMTX_OK) { 1084 hwaddr physaddr = mr_offset + 1085 section->offset_within_address_space - 1086 section->offset_within_region; 1087 1088 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), 1089 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, 1090 retaddr); 1091 } 1092 if (locked) { 1093 qemu_mutex_unlock_iothread(); 1094 } 1095 } 1096 1097 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) 1098 { 1099 #if TCG_OVERSIZED_GUEST 1100 return *(target_ulong *)((uintptr_t)entry + ofs); 1101 #else 1102 /* ofs might correspond to .addr_write, so use atomic_read */ 1103 return atomic_read((target_ulong *)((uintptr_t)entry + ofs)); 1104 #endif 1105 } 1106 1107 /* Return true if ADDR is present in the victim tlb, and has been copied 1108 back to the main tlb. */ 1109 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1110 size_t elt_ofs, target_ulong page) 1111 { 1112 size_t vidx; 1113 1114 assert_cpu_is_self(env_cpu(env)); 1115 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1116 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; 1117 target_ulong cmp; 1118 1119 /* elt_ofs might correspond to .addr_write, so use atomic_read */ 1120 #if TCG_OVERSIZED_GUEST 1121 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1122 #else 1123 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1124 #endif 1125 1126 if (cmp == page) { 1127 /* Found entry in victim tlb, swap tlb and iotlb. */ 1128 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index]; 1129 1130 qemu_spin_lock(&env_tlb(env)->c.lock); 1131 copy_tlb_helper_locked(&tmptlb, tlb); 1132 copy_tlb_helper_locked(tlb, vtlb); 1133 copy_tlb_helper_locked(vtlb, &tmptlb); 1134 qemu_spin_unlock(&env_tlb(env)->c.lock); 1135 1136 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1137 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; 1138 tmpio = *io; *io = *vio; *vio = tmpio; 1139 return true; 1140 } 1141 } 1142 return false; 1143 } 1144 1145 /* Macro to call the above, with local variables from the use context. */ 1146 #define VICTIM_TLB_HIT(TY, ADDR) \ 1147 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1148 (ADDR) & TARGET_PAGE_MASK) 1149 1150 /* 1151 * Return a ram_addr_t for the virtual address for execution. 1152 * 1153 * Return -1 if we can't translate and execute from an entire page 1154 * of RAM. This will force us to execute by loading and translating 1155 * one insn at a time, without caching. 1156 * 1157 * NOTE: This function will trigger an exception if the page is 1158 * not executable. 1159 */ 1160 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, 1161 void **hostp) 1162 { 1163 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1164 uintptr_t index = tlb_index(env, mmu_idx, addr); 1165 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1166 void *p; 1167 1168 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1169 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1170 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1171 index = tlb_index(env, mmu_idx, addr); 1172 entry = tlb_entry(env, mmu_idx, addr); 1173 1174 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { 1175 /* 1176 * The MMU protection covers a smaller range than a target 1177 * page, so we must redo the MMU check for every insn. 1178 */ 1179 return -1; 1180 } 1181 } 1182 assert(tlb_hit(entry->addr_code, addr)); 1183 } 1184 1185 if (unlikely(entry->addr_code & TLB_MMIO)) { 1186 /* The region is not backed by RAM. */ 1187 if (hostp) { 1188 *hostp = NULL; 1189 } 1190 return -1; 1191 } 1192 1193 p = (void *)((uintptr_t)addr + entry->addend); 1194 if (hostp) { 1195 *hostp = p; 1196 } 1197 return qemu_ram_addr_from_host_nofail(p); 1198 } 1199 1200 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1201 { 1202 return get_page_addr_code_hostp(env, addr, NULL); 1203 } 1204 1205 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, 1206 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) 1207 { 1208 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; 1209 1210 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); 1211 1212 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 1213 struct page_collection *pages 1214 = page_collection_lock(ram_addr, ram_addr + size); 1215 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); 1216 page_collection_unlock(pages); 1217 } 1218 1219 /* 1220 * Set both VGA and migration bits for simplicity and to remove 1221 * the notdirty callback faster. 1222 */ 1223 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); 1224 1225 /* We remove the notdirty callback only if the code has been flushed. */ 1226 if (!cpu_physical_memory_is_clean(ram_addr)) { 1227 trace_memory_notdirty_set_dirty(mem_vaddr); 1228 tlb_set_dirty(cpu, mem_vaddr); 1229 } 1230 } 1231 1232 /* 1233 * Probe for whether the specified guest access is permitted. If it is not 1234 * permitted then an exception will be taken in the same way as if this 1235 * were a real access (and we will not return). 1236 * If the size is 0 or the page requires I/O access, returns NULL; otherwise, 1237 * returns the address of the host page similar to tlb_vaddr_to_host(). 1238 */ 1239 void *probe_access(CPUArchState *env, target_ulong addr, int size, 1240 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1241 { 1242 uintptr_t index = tlb_index(env, mmu_idx, addr); 1243 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1244 target_ulong tlb_addr; 1245 size_t elt_ofs; 1246 int wp_access; 1247 1248 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 1249 1250 switch (access_type) { 1251 case MMU_DATA_LOAD: 1252 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1253 wp_access = BP_MEM_READ; 1254 break; 1255 case MMU_DATA_STORE: 1256 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1257 wp_access = BP_MEM_WRITE; 1258 break; 1259 case MMU_INST_FETCH: 1260 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1261 wp_access = BP_MEM_READ; 1262 break; 1263 default: 1264 g_assert_not_reached(); 1265 } 1266 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1267 1268 if (unlikely(!tlb_hit(tlb_addr, addr))) { 1269 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, 1270 addr & TARGET_PAGE_MASK)) { 1271 tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr); 1272 /* TLB resize via tlb_fill may have moved the entry. */ 1273 index = tlb_index(env, mmu_idx, addr); 1274 entry = tlb_entry(env, mmu_idx, addr); 1275 } 1276 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1277 } 1278 1279 if (!size) { 1280 return NULL; 1281 } 1282 1283 if (unlikely(tlb_addr & TLB_FLAGS_MASK)) { 1284 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1285 1286 /* Reject I/O access, or other required slow-path. */ 1287 if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { 1288 return NULL; 1289 } 1290 1291 /* Handle watchpoints. */ 1292 if (tlb_addr & TLB_WATCHPOINT) { 1293 cpu_check_watchpoint(env_cpu(env), addr, size, 1294 iotlbentry->attrs, wp_access, retaddr); 1295 } 1296 1297 /* Handle clean RAM pages. */ 1298 if (tlb_addr & TLB_NOTDIRTY) { 1299 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1300 } 1301 } 1302 1303 return (void *)((uintptr_t)addr + entry->addend); 1304 } 1305 1306 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, 1307 MMUAccessType access_type, int mmu_idx) 1308 { 1309 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1310 target_ulong tlb_addr, page; 1311 size_t elt_ofs; 1312 1313 switch (access_type) { 1314 case MMU_DATA_LOAD: 1315 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1316 break; 1317 case MMU_DATA_STORE: 1318 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1319 break; 1320 case MMU_INST_FETCH: 1321 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1322 break; 1323 default: 1324 g_assert_not_reached(); 1325 } 1326 1327 page = addr & TARGET_PAGE_MASK; 1328 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1329 1330 if (!tlb_hit_page(tlb_addr, page)) { 1331 uintptr_t index = tlb_index(env, mmu_idx, addr); 1332 1333 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) { 1334 CPUState *cs = env_cpu(env); 1335 CPUClass *cc = CPU_GET_CLASS(cs); 1336 1337 if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) { 1338 /* Non-faulting page table read failed. */ 1339 return NULL; 1340 } 1341 1342 /* TLB resize via tlb_fill may have moved the entry. */ 1343 entry = tlb_entry(env, mmu_idx, addr); 1344 } 1345 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1346 } 1347 1348 if (tlb_addr & ~TARGET_PAGE_MASK) { 1349 /* IO access */ 1350 return NULL; 1351 } 1352 1353 return (void *)((uintptr_t)addr + entry->addend); 1354 } 1355 1356 1357 #ifdef CONFIG_PLUGIN 1358 /* 1359 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. 1360 * This should be a hot path as we will have just looked this path up 1361 * in the softmmu lookup code (or helper). We don't handle re-fills or 1362 * checking the victim table. This is purely informational. 1363 * 1364 * This should never fail as the memory access being instrumented 1365 * should have just filled the TLB. 1366 */ 1367 1368 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, 1369 bool is_store, struct qemu_plugin_hwaddr *data) 1370 { 1371 CPUArchState *env = cpu->env_ptr; 1372 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1373 uintptr_t index = tlb_index(env, mmu_idx, addr); 1374 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; 1375 1376 if (likely(tlb_hit(tlb_addr, addr))) { 1377 /* We must have an iotlb entry for MMIO */ 1378 if (tlb_addr & TLB_MMIO) { 1379 CPUIOTLBEntry *iotlbentry; 1380 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1381 data->is_io = true; 1382 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1383 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1384 } else { 1385 data->is_io = false; 1386 data->v.ram.hostaddr = addr + tlbe->addend; 1387 } 1388 return true; 1389 } 1390 return false; 1391 } 1392 1393 #endif 1394 1395 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1396 * operations, or io operations to proceed. Return the host address. */ 1397 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1398 TCGMemOpIdx oi, uintptr_t retaddr) 1399 { 1400 size_t mmu_idx = get_mmuidx(oi); 1401 uintptr_t index = tlb_index(env, mmu_idx, addr); 1402 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1403 target_ulong tlb_addr = tlb_addr_write(tlbe); 1404 MemOp mop = get_memop(oi); 1405 int a_bits = get_alignment_bits(mop); 1406 int s_bits = mop & MO_SIZE; 1407 void *hostaddr; 1408 1409 /* Adjust the given return address. */ 1410 retaddr -= GETPC_ADJ; 1411 1412 /* Enforce guest required alignment. */ 1413 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1414 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1415 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1416 mmu_idx, retaddr); 1417 } 1418 1419 /* Enforce qemu required alignment. */ 1420 if (unlikely(addr & ((1 << s_bits) - 1))) { 1421 /* We get here if guest alignment was not requested, 1422 or was not enforced by cpu_unaligned_access above. 1423 We might widen the access and emulate, but for now 1424 mark an exception and exit the cpu loop. */ 1425 goto stop_the_world; 1426 } 1427 1428 /* Check TLB entry and enforce page permissions. */ 1429 if (!tlb_hit(tlb_addr, addr)) { 1430 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1431 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, 1432 mmu_idx, retaddr); 1433 index = tlb_index(env, mmu_idx, addr); 1434 tlbe = tlb_entry(env, mmu_idx, addr); 1435 } 1436 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1437 } 1438 1439 /* Notice an IO access or a needs-MMU-lookup access */ 1440 if (unlikely(tlb_addr & TLB_MMIO)) { 1441 /* There's really nothing that can be done to 1442 support this apart from stop-the-world. */ 1443 goto stop_the_world; 1444 } 1445 1446 /* Let the guest notice RMW on a write-only page. */ 1447 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1448 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1449 mmu_idx, retaddr); 1450 /* Since we don't support reads and writes to different addresses, 1451 and we do have the proper page loaded for write, this shouldn't 1452 ever return. But just in case, handle via stop-the-world. */ 1453 goto stop_the_world; 1454 } 1455 1456 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1457 1458 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1459 notdirty_write(env_cpu(env), addr, 1 << s_bits, 1460 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); 1461 } 1462 1463 return hostaddr; 1464 1465 stop_the_world: 1466 cpu_loop_exit_atomic(env_cpu(env), retaddr); 1467 } 1468 1469 /* 1470 * Load Helpers 1471 * 1472 * We support two different access types. SOFTMMU_CODE_ACCESS is 1473 * specifically for reading instructions from system memory. It is 1474 * called by the translation loop and in some helpers where the code 1475 * is disassembled. It shouldn't be called directly by guest code. 1476 */ 1477 1478 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, 1479 TCGMemOpIdx oi, uintptr_t retaddr); 1480 1481 static inline uint64_t QEMU_ALWAYS_INLINE 1482 load_memop(const void *haddr, MemOp op) 1483 { 1484 switch (op) { 1485 case MO_UB: 1486 return ldub_p(haddr); 1487 case MO_BEUW: 1488 return lduw_be_p(haddr); 1489 case MO_LEUW: 1490 return lduw_le_p(haddr); 1491 case MO_BEUL: 1492 return (uint32_t)ldl_be_p(haddr); 1493 case MO_LEUL: 1494 return (uint32_t)ldl_le_p(haddr); 1495 case MO_BEQ: 1496 return ldq_be_p(haddr); 1497 case MO_LEQ: 1498 return ldq_le_p(haddr); 1499 default: 1500 qemu_build_not_reached(); 1501 } 1502 } 1503 1504 static inline uint64_t QEMU_ALWAYS_INLINE 1505 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, 1506 uintptr_t retaddr, MemOp op, bool code_read, 1507 FullLoadHelper *full_load) 1508 { 1509 uintptr_t mmu_idx = get_mmuidx(oi); 1510 uintptr_t index = tlb_index(env, mmu_idx, addr); 1511 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1512 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1513 const size_t tlb_off = code_read ? 1514 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); 1515 const MMUAccessType access_type = 1516 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; 1517 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1518 void *haddr; 1519 uint64_t res; 1520 size_t size = memop_size(op); 1521 1522 /* Handle CPU specific unaligned behaviour */ 1523 if (addr & ((1 << a_bits) - 1)) { 1524 cpu_unaligned_access(env_cpu(env), addr, access_type, 1525 mmu_idx, retaddr); 1526 } 1527 1528 /* If the TLB entry is for a different page, reload and try again. */ 1529 if (!tlb_hit(tlb_addr, addr)) { 1530 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1531 addr & TARGET_PAGE_MASK)) { 1532 tlb_fill(env_cpu(env), addr, size, 1533 access_type, mmu_idx, retaddr); 1534 index = tlb_index(env, mmu_idx, addr); 1535 entry = tlb_entry(env, mmu_idx, addr); 1536 } 1537 tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1538 tlb_addr &= ~TLB_INVALID_MASK; 1539 } 1540 1541 /* Handle anything that isn't just a straight memory access. */ 1542 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1543 CPUIOTLBEntry *iotlbentry; 1544 bool need_swap; 1545 1546 /* For anything that is unaligned, recurse through full_load. */ 1547 if ((addr & (size - 1)) != 0) { 1548 goto do_unaligned_access; 1549 } 1550 1551 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1552 1553 /* Handle watchpoints. */ 1554 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1555 /* On watchpoint hit, this will longjmp out. */ 1556 cpu_check_watchpoint(env_cpu(env), addr, size, 1557 iotlbentry->attrs, BP_MEM_READ, retaddr); 1558 } 1559 1560 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1561 1562 /* Handle I/O access. */ 1563 if (likely(tlb_addr & TLB_MMIO)) { 1564 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, 1565 access_type, op ^ (need_swap * MO_BSWAP)); 1566 } 1567 1568 haddr = (void *)((uintptr_t)addr + entry->addend); 1569 1570 /* 1571 * Keep these two load_memop separate to ensure that the compiler 1572 * is able to fold the entire function to a single instruction. 1573 * There is a build-time assert inside to remind you of this. ;-) 1574 */ 1575 if (unlikely(need_swap)) { 1576 return load_memop(haddr, op ^ MO_BSWAP); 1577 } 1578 return load_memop(haddr, op); 1579 } 1580 1581 /* Handle slow unaligned access (it spans two pages or IO). */ 1582 if (size > 1 1583 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1584 >= TARGET_PAGE_SIZE)) { 1585 target_ulong addr1, addr2; 1586 uint64_t r1, r2; 1587 unsigned shift; 1588 do_unaligned_access: 1589 addr1 = addr & ~((target_ulong)size - 1); 1590 addr2 = addr1 + size; 1591 r1 = full_load(env, addr1, oi, retaddr); 1592 r2 = full_load(env, addr2, oi, retaddr); 1593 shift = (addr & (size - 1)) * 8; 1594 1595 if (memop_big_endian(op)) { 1596 /* Big-endian combine. */ 1597 res = (r1 << shift) | (r2 >> ((size * 8) - shift)); 1598 } else { 1599 /* Little-endian combine. */ 1600 res = (r1 >> shift) | (r2 << ((size * 8) - shift)); 1601 } 1602 return res & MAKE_64BIT_MASK(0, size * 8); 1603 } 1604 1605 haddr = (void *)((uintptr_t)addr + entry->addend); 1606 return load_memop(haddr, op); 1607 } 1608 1609 /* 1610 * For the benefit of TCG generated code, we want to avoid the 1611 * complication of ABI-specific return type promotion and always 1612 * return a value extended to the register size of the host. This is 1613 * tcg_target_long, except in the case of a 32-bit host and 64-bit 1614 * data, and for that we always have uint64_t. 1615 * 1616 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. 1617 */ 1618 1619 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, 1620 TCGMemOpIdx oi, uintptr_t retaddr) 1621 { 1622 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); 1623 } 1624 1625 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, 1626 TCGMemOpIdx oi, uintptr_t retaddr) 1627 { 1628 return full_ldub_mmu(env, addr, oi, retaddr); 1629 } 1630 1631 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1632 TCGMemOpIdx oi, uintptr_t retaddr) 1633 { 1634 return load_helper(env, addr, oi, retaddr, MO_LEUW, false, 1635 full_le_lduw_mmu); 1636 } 1637 1638 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1639 TCGMemOpIdx oi, uintptr_t retaddr) 1640 { 1641 return full_le_lduw_mmu(env, addr, oi, retaddr); 1642 } 1643 1644 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1645 TCGMemOpIdx oi, uintptr_t retaddr) 1646 { 1647 return load_helper(env, addr, oi, retaddr, MO_BEUW, false, 1648 full_be_lduw_mmu); 1649 } 1650 1651 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1652 TCGMemOpIdx oi, uintptr_t retaddr) 1653 { 1654 return full_be_lduw_mmu(env, addr, oi, retaddr); 1655 } 1656 1657 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1658 TCGMemOpIdx oi, uintptr_t retaddr) 1659 { 1660 return load_helper(env, addr, oi, retaddr, MO_LEUL, false, 1661 full_le_ldul_mmu); 1662 } 1663 1664 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1665 TCGMemOpIdx oi, uintptr_t retaddr) 1666 { 1667 return full_le_ldul_mmu(env, addr, oi, retaddr); 1668 } 1669 1670 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1671 TCGMemOpIdx oi, uintptr_t retaddr) 1672 { 1673 return load_helper(env, addr, oi, retaddr, MO_BEUL, false, 1674 full_be_ldul_mmu); 1675 } 1676 1677 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1678 TCGMemOpIdx oi, uintptr_t retaddr) 1679 { 1680 return full_be_ldul_mmu(env, addr, oi, retaddr); 1681 } 1682 1683 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, 1684 TCGMemOpIdx oi, uintptr_t retaddr) 1685 { 1686 return load_helper(env, addr, oi, retaddr, MO_LEQ, false, 1687 helper_le_ldq_mmu); 1688 } 1689 1690 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, 1691 TCGMemOpIdx oi, uintptr_t retaddr) 1692 { 1693 return load_helper(env, addr, oi, retaddr, MO_BEQ, false, 1694 helper_be_ldq_mmu); 1695 } 1696 1697 /* 1698 * Provide signed versions of the load routines as well. We can of course 1699 * avoid this for 64-bit data, or for 32-bit data on 32-bit host. 1700 */ 1701 1702 1703 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, 1704 TCGMemOpIdx oi, uintptr_t retaddr) 1705 { 1706 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); 1707 } 1708 1709 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, 1710 TCGMemOpIdx oi, uintptr_t retaddr) 1711 { 1712 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); 1713 } 1714 1715 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, 1716 TCGMemOpIdx oi, uintptr_t retaddr) 1717 { 1718 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); 1719 } 1720 1721 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, 1722 TCGMemOpIdx oi, uintptr_t retaddr) 1723 { 1724 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); 1725 } 1726 1727 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, 1728 TCGMemOpIdx oi, uintptr_t retaddr) 1729 { 1730 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); 1731 } 1732 1733 /* 1734 * Load helpers for cpu_ldst.h. 1735 */ 1736 1737 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, 1738 int mmu_idx, uintptr_t retaddr, 1739 MemOp op, FullLoadHelper *full_load) 1740 { 1741 uint16_t meminfo; 1742 TCGMemOpIdx oi; 1743 uint64_t ret; 1744 1745 meminfo = trace_mem_get_info(op, mmu_idx, false); 1746 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 1747 1748 op &= ~MO_SIGN; 1749 oi = make_memop_idx(op, mmu_idx); 1750 ret = full_load(env, addr, oi, retaddr); 1751 1752 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 1753 1754 return ret; 1755 } 1756 1757 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1758 int mmu_idx, uintptr_t ra) 1759 { 1760 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu); 1761 } 1762 1763 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1764 int mmu_idx, uintptr_t ra) 1765 { 1766 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB, 1767 full_ldub_mmu); 1768 } 1769 1770 uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1771 int mmu_idx, uintptr_t ra) 1772 { 1773 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW, 1774 MO_TE == MO_LE 1775 ? full_le_lduw_mmu : full_be_lduw_mmu); 1776 } 1777 1778 int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1779 int mmu_idx, uintptr_t ra) 1780 { 1781 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW, 1782 MO_TE == MO_LE 1783 ? full_le_lduw_mmu : full_be_lduw_mmu); 1784 } 1785 1786 uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1787 int mmu_idx, uintptr_t ra) 1788 { 1789 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL, 1790 MO_TE == MO_LE 1791 ? full_le_ldul_mmu : full_be_ldul_mmu); 1792 } 1793 1794 uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1795 int mmu_idx, uintptr_t ra) 1796 { 1797 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ, 1798 MO_TE == MO_LE 1799 ? helper_le_ldq_mmu : helper_be_ldq_mmu); 1800 } 1801 1802 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, 1803 uintptr_t retaddr) 1804 { 1805 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1806 } 1807 1808 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1809 { 1810 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1811 } 1812 1813 uint32_t cpu_lduw_data_ra(CPUArchState *env, target_ulong ptr, 1814 uintptr_t retaddr) 1815 { 1816 return cpu_lduw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1817 } 1818 1819 int cpu_ldsw_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1820 { 1821 return cpu_ldsw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1822 } 1823 1824 uint32_t cpu_ldl_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1825 { 1826 return cpu_ldl_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1827 } 1828 1829 uint64_t cpu_ldq_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1830 { 1831 return cpu_ldq_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1832 } 1833 1834 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) 1835 { 1836 return cpu_ldub_data_ra(env, ptr, 0); 1837 } 1838 1839 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) 1840 { 1841 return cpu_ldsb_data_ra(env, ptr, 0); 1842 } 1843 1844 uint32_t cpu_lduw_data(CPUArchState *env, target_ulong ptr) 1845 { 1846 return cpu_lduw_data_ra(env, ptr, 0); 1847 } 1848 1849 int cpu_ldsw_data(CPUArchState *env, target_ulong ptr) 1850 { 1851 return cpu_ldsw_data_ra(env, ptr, 0); 1852 } 1853 1854 uint32_t cpu_ldl_data(CPUArchState *env, target_ulong ptr) 1855 { 1856 return cpu_ldl_data_ra(env, ptr, 0); 1857 } 1858 1859 uint64_t cpu_ldq_data(CPUArchState *env, target_ulong ptr) 1860 { 1861 return cpu_ldq_data_ra(env, ptr, 0); 1862 } 1863 1864 /* 1865 * Store Helpers 1866 */ 1867 1868 static inline void QEMU_ALWAYS_INLINE 1869 store_memop(void *haddr, uint64_t val, MemOp op) 1870 { 1871 switch (op) { 1872 case MO_UB: 1873 stb_p(haddr, val); 1874 break; 1875 case MO_BEUW: 1876 stw_be_p(haddr, val); 1877 break; 1878 case MO_LEUW: 1879 stw_le_p(haddr, val); 1880 break; 1881 case MO_BEUL: 1882 stl_be_p(haddr, val); 1883 break; 1884 case MO_LEUL: 1885 stl_le_p(haddr, val); 1886 break; 1887 case MO_BEQ: 1888 stq_be_p(haddr, val); 1889 break; 1890 case MO_LEQ: 1891 stq_le_p(haddr, val); 1892 break; 1893 default: 1894 qemu_build_not_reached(); 1895 } 1896 } 1897 1898 static inline void QEMU_ALWAYS_INLINE 1899 store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 1900 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) 1901 { 1902 uintptr_t mmu_idx = get_mmuidx(oi); 1903 uintptr_t index = tlb_index(env, mmu_idx, addr); 1904 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1905 target_ulong tlb_addr = tlb_addr_write(entry); 1906 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 1907 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1908 void *haddr; 1909 size_t size = memop_size(op); 1910 1911 /* Handle CPU specific unaligned behaviour */ 1912 if (addr & ((1 << a_bits) - 1)) { 1913 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1914 mmu_idx, retaddr); 1915 } 1916 1917 /* If the TLB entry is for a different page, reload and try again. */ 1918 if (!tlb_hit(tlb_addr, addr)) { 1919 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1920 addr & TARGET_PAGE_MASK)) { 1921 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, 1922 mmu_idx, retaddr); 1923 index = tlb_index(env, mmu_idx, addr); 1924 entry = tlb_entry(env, mmu_idx, addr); 1925 } 1926 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; 1927 } 1928 1929 /* Handle anything that isn't just a straight memory access. */ 1930 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1931 CPUIOTLBEntry *iotlbentry; 1932 bool need_swap; 1933 1934 /* For anything that is unaligned, recurse through byte stores. */ 1935 if ((addr & (size - 1)) != 0) { 1936 goto do_unaligned_access; 1937 } 1938 1939 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1940 1941 /* Handle watchpoints. */ 1942 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1943 /* On watchpoint hit, this will longjmp out. */ 1944 cpu_check_watchpoint(env_cpu(env), addr, size, 1945 iotlbentry->attrs, BP_MEM_WRITE, retaddr); 1946 } 1947 1948 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1949 1950 /* Handle I/O access. */ 1951 if (tlb_addr & TLB_MMIO) { 1952 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, 1953 op ^ (need_swap * MO_BSWAP)); 1954 return; 1955 } 1956 1957 /* Ignore writes to ROM. */ 1958 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { 1959 return; 1960 } 1961 1962 /* Handle clean RAM pages. */ 1963 if (tlb_addr & TLB_NOTDIRTY) { 1964 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1965 } 1966 1967 haddr = (void *)((uintptr_t)addr + entry->addend); 1968 1969 /* 1970 * Keep these two store_memop separate to ensure that the compiler 1971 * is able to fold the entire function to a single instruction. 1972 * There is a build-time assert inside to remind you of this. ;-) 1973 */ 1974 if (unlikely(need_swap)) { 1975 store_memop(haddr, val, op ^ MO_BSWAP); 1976 } else { 1977 store_memop(haddr, val, op); 1978 } 1979 return; 1980 } 1981 1982 /* Handle slow unaligned access (it spans two pages or IO). */ 1983 if (size > 1 1984 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1985 >= TARGET_PAGE_SIZE)) { 1986 int i; 1987 uintptr_t index2; 1988 CPUTLBEntry *entry2; 1989 target_ulong page2, tlb_addr2; 1990 size_t size2; 1991 1992 do_unaligned_access: 1993 /* 1994 * Ensure the second page is in the TLB. Note that the first page 1995 * is already guaranteed to be filled, and that the second page 1996 * cannot evict the first. 1997 */ 1998 page2 = (addr + size) & TARGET_PAGE_MASK; 1999 size2 = (addr + size) & ~TARGET_PAGE_MASK; 2000 index2 = tlb_index(env, mmu_idx, page2); 2001 entry2 = tlb_entry(env, mmu_idx, page2); 2002 tlb_addr2 = tlb_addr_write(entry2); 2003 if (!tlb_hit_page(tlb_addr2, page2)) { 2004 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { 2005 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, 2006 mmu_idx, retaddr); 2007 index2 = tlb_index(env, mmu_idx, page2); 2008 entry2 = tlb_entry(env, mmu_idx, page2); 2009 } 2010 tlb_addr2 = tlb_addr_write(entry2); 2011 } 2012 2013 /* 2014 * Handle watchpoints. Since this may trap, all checks 2015 * must happen before any store. 2016 */ 2017 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2018 cpu_check_watchpoint(env_cpu(env), addr, size - size2, 2019 env_tlb(env)->d[mmu_idx].iotlb[index].attrs, 2020 BP_MEM_WRITE, retaddr); 2021 } 2022 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { 2023 cpu_check_watchpoint(env_cpu(env), page2, size2, 2024 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, 2025 BP_MEM_WRITE, retaddr); 2026 } 2027 2028 /* 2029 * XXX: not efficient, but simple. 2030 * This loop must go in the forward direction to avoid issues 2031 * with self-modifying code in Windows 64-bit. 2032 */ 2033 for (i = 0; i < size; ++i) { 2034 uint8_t val8; 2035 if (memop_big_endian(op)) { 2036 /* Big-endian extract. */ 2037 val8 = val >> (((size - 1) * 8) - (i * 8)); 2038 } else { 2039 /* Little-endian extract. */ 2040 val8 = val >> (i * 8); 2041 } 2042 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2043 } 2044 return; 2045 } 2046 2047 haddr = (void *)((uintptr_t)addr + entry->addend); 2048 store_memop(haddr, val, op); 2049 } 2050 2051 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, 2052 TCGMemOpIdx oi, uintptr_t retaddr) 2053 { 2054 store_helper(env, addr, val, oi, retaddr, MO_UB); 2055 } 2056 2057 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2058 TCGMemOpIdx oi, uintptr_t retaddr) 2059 { 2060 store_helper(env, addr, val, oi, retaddr, MO_LEUW); 2061 } 2062 2063 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2064 TCGMemOpIdx oi, uintptr_t retaddr) 2065 { 2066 store_helper(env, addr, val, oi, retaddr, MO_BEUW); 2067 } 2068 2069 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2070 TCGMemOpIdx oi, uintptr_t retaddr) 2071 { 2072 store_helper(env, addr, val, oi, retaddr, MO_LEUL); 2073 } 2074 2075 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2076 TCGMemOpIdx oi, uintptr_t retaddr) 2077 { 2078 store_helper(env, addr, val, oi, retaddr, MO_BEUL); 2079 } 2080 2081 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2082 TCGMemOpIdx oi, uintptr_t retaddr) 2083 { 2084 store_helper(env, addr, val, oi, retaddr, MO_LEQ); 2085 } 2086 2087 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2088 TCGMemOpIdx oi, uintptr_t retaddr) 2089 { 2090 store_helper(env, addr, val, oi, retaddr, MO_BEQ); 2091 } 2092 2093 /* 2094 * Store Helpers for cpu_ldst.h 2095 */ 2096 2097 static inline void QEMU_ALWAYS_INLINE 2098 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2099 int mmu_idx, uintptr_t retaddr, MemOp op) 2100 { 2101 TCGMemOpIdx oi; 2102 uint16_t meminfo; 2103 2104 meminfo = trace_mem_get_info(op, mmu_idx, true); 2105 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2106 2107 oi = make_memop_idx(op, mmu_idx); 2108 store_helper(env, addr, val, oi, retaddr, op); 2109 2110 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2111 } 2112 2113 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2114 int mmu_idx, uintptr_t retaddr) 2115 { 2116 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); 2117 } 2118 2119 void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2120 int mmu_idx, uintptr_t retaddr) 2121 { 2122 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW); 2123 } 2124 2125 void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2126 int mmu_idx, uintptr_t retaddr) 2127 { 2128 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL); 2129 } 2130 2131 void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2132 int mmu_idx, uintptr_t retaddr) 2133 { 2134 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ); 2135 } 2136 2137 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, 2138 uint32_t val, uintptr_t retaddr) 2139 { 2140 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2141 } 2142 2143 void cpu_stw_data_ra(CPUArchState *env, target_ulong ptr, 2144 uint32_t val, uintptr_t retaddr) 2145 { 2146 cpu_stw_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2147 } 2148 2149 void cpu_stl_data_ra(CPUArchState *env, target_ulong ptr, 2150 uint32_t val, uintptr_t retaddr) 2151 { 2152 cpu_stl_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2153 } 2154 2155 void cpu_stq_data_ra(CPUArchState *env, target_ulong ptr, 2156 uint64_t val, uintptr_t retaddr) 2157 { 2158 cpu_stq_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2159 } 2160 2161 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2162 { 2163 cpu_stb_data_ra(env, ptr, val, 0); 2164 } 2165 2166 void cpu_stw_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2167 { 2168 cpu_stw_data_ra(env, ptr, val, 0); 2169 } 2170 2171 void cpu_stl_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2172 { 2173 cpu_stl_data_ra(env, ptr, val, 0); 2174 } 2175 2176 void cpu_stq_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2177 { 2178 cpu_stq_data_ra(env, ptr, val, 0); 2179 } 2180 2181 /* First set of helpers allows passing in of OI and RETADDR. This makes 2182 them callable from other helpers. */ 2183 2184 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 2185 #define ATOMIC_NAME(X) \ 2186 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 2187 #define ATOMIC_MMU_DECLS 2188 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) 2189 #define ATOMIC_MMU_CLEANUP 2190 #define ATOMIC_MMU_IDX get_mmuidx(oi) 2191 2192 #include "atomic_common.inc.c" 2193 2194 #define DATA_SIZE 1 2195 #include "atomic_template.h" 2196 2197 #define DATA_SIZE 2 2198 #include "atomic_template.h" 2199 2200 #define DATA_SIZE 4 2201 #include "atomic_template.h" 2202 2203 #ifdef CONFIG_ATOMIC64 2204 #define DATA_SIZE 8 2205 #include "atomic_template.h" 2206 #endif 2207 2208 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 2209 #define DATA_SIZE 16 2210 #include "atomic_template.h" 2211 #endif 2212 2213 /* Second set of helpers are directly callable from TCG as helpers. */ 2214 2215 #undef EXTRA_ARGS 2216 #undef ATOMIC_NAME 2217 #undef ATOMIC_MMU_LOOKUP 2218 #define EXTRA_ARGS , TCGMemOpIdx oi 2219 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 2220 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) 2221 2222 #define DATA_SIZE 1 2223 #include "atomic_template.h" 2224 2225 #define DATA_SIZE 2 2226 #include "atomic_template.h" 2227 2228 #define DATA_SIZE 4 2229 #include "atomic_template.h" 2230 2231 #ifdef CONFIG_ATOMIC64 2232 #define DATA_SIZE 8 2233 #include "atomic_template.h" 2234 #endif 2235 #undef ATOMIC_MMU_IDX 2236 2237 /* Code access functions. */ 2238 2239 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, 2240 TCGMemOpIdx oi, uintptr_t retaddr) 2241 { 2242 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); 2243 } 2244 2245 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) 2246 { 2247 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); 2248 return full_ldub_code(env, addr, oi, 0); 2249 } 2250 2251 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, 2252 TCGMemOpIdx oi, uintptr_t retaddr) 2253 { 2254 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); 2255 } 2256 2257 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) 2258 { 2259 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); 2260 return full_lduw_code(env, addr, oi, 0); 2261 } 2262 2263 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, 2264 TCGMemOpIdx oi, uintptr_t retaddr) 2265 { 2266 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); 2267 } 2268 2269 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) 2270 { 2271 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); 2272 return full_ldl_code(env, addr, oi, 0); 2273 } 2274 2275 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, 2276 TCGMemOpIdx oi, uintptr_t retaddr) 2277 { 2278 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code); 2279 } 2280 2281 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) 2282 { 2283 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true)); 2284 return full_ldq_code(env, addr, oi, 0); 2285 } 2286