1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/address-spaces.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/cputlb.h" 28 #include "exec/memory-internal.h" 29 #include "exec/ram_addr.h" 30 #include "tcg/tcg.h" 31 #include "qemu/error-report.h" 32 #include "exec/log.h" 33 #include "exec/helper-proto.h" 34 #include "qemu/atomic.h" 35 #include "qemu/atomic128.h" 36 #include "translate-all.h" 37 #include "trace-root.h" 38 #include "trace/mem.h" 39 #ifdef CONFIG_PLUGIN 40 #include "qemu/plugin-memory.h" 41 #endif 42 43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 44 /* #define DEBUG_TLB */ 45 /* #define DEBUG_TLB_LOG */ 46 47 #ifdef DEBUG_TLB 48 # define DEBUG_TLB_GATE 1 49 # ifdef DEBUG_TLB_LOG 50 # define DEBUG_TLB_LOG_GATE 1 51 # else 52 # define DEBUG_TLB_LOG_GATE 0 53 # endif 54 #else 55 # define DEBUG_TLB_GATE 0 56 # define DEBUG_TLB_LOG_GATE 0 57 #endif 58 59 #define tlb_debug(fmt, ...) do { \ 60 if (DEBUG_TLB_LOG_GATE) { \ 61 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 62 ## __VA_ARGS__); \ 63 } else if (DEBUG_TLB_GATE) { \ 64 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 65 } \ 66 } while (0) 67 68 #define assert_cpu_is_self(cpu) do { \ 69 if (DEBUG_TLB_GATE) { \ 70 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 71 } \ 72 } while (0) 73 74 /* run_on_cpu_data.target_ptr should always be big enough for a 75 * target_ulong even on 32 bit builds */ 76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 77 78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 79 */ 80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 82 83 static inline size_t tlb_n_entries(CPUTLBDescFast *fast) 84 { 85 return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1; 86 } 87 88 static inline size_t sizeof_tlb(CPUTLBDescFast *fast) 89 { 90 return fast->mask + (1 << CPU_TLB_ENTRY_BITS); 91 } 92 93 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, 94 size_t max_entries) 95 { 96 desc->window_begin_ns = ns; 97 desc->window_max_entries = max_entries; 98 } 99 100 /** 101 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 102 * @desc: The CPUTLBDesc portion of the TLB 103 * @fast: The CPUTLBDescFast portion of the same TLB 104 * 105 * Called with tlb_lock_held. 106 * 107 * We have two main constraints when resizing a TLB: (1) we only resize it 108 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 109 * the array or unnecessarily flushing it), which means we do not control how 110 * frequently the resizing can occur; (2) we don't have access to the guest's 111 * future scheduling decisions, and therefore have to decide the magnitude of 112 * the resize based on past observations. 113 * 114 * In general, a memory-hungry process can benefit greatly from an appropriately 115 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 116 * we just have to make the TLB as large as possible; while an oversized TLB 117 * results in minimal TLB miss rates, it also takes longer to be flushed 118 * (flushes can be _very_ frequent), and the reduced locality can also hurt 119 * performance. 120 * 121 * To achieve near-optimal performance for all kinds of workloads, we: 122 * 123 * 1. Aggressively increase the size of the TLB when the use rate of the 124 * TLB being flushed is high, since it is likely that in the near future this 125 * memory-hungry process will execute again, and its memory hungriness will 126 * probably be similar. 127 * 128 * 2. Slowly reduce the size of the TLB as the use rate declines over a 129 * reasonably large time window. The rationale is that if in such a time window 130 * we have not observed a high TLB use rate, it is likely that we won't observe 131 * it in the near future. In that case, once a time window expires we downsize 132 * the TLB to match the maximum use rate observed in the window. 133 * 134 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 135 * since in that range performance is likely near-optimal. Recall that the TLB 136 * is direct mapped, so we want the use rate to be low (or at least not too 137 * high), since otherwise we are likely to have a significant amount of 138 * conflict misses. 139 */ 140 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast, 141 int64_t now) 142 { 143 size_t old_size = tlb_n_entries(fast); 144 size_t rate; 145 size_t new_size = old_size; 146 int64_t window_len_ms = 100; 147 int64_t window_len_ns = window_len_ms * 1000 * 1000; 148 bool window_expired = now > desc->window_begin_ns + window_len_ns; 149 150 if (desc->n_used_entries > desc->window_max_entries) { 151 desc->window_max_entries = desc->n_used_entries; 152 } 153 rate = desc->window_max_entries * 100 / old_size; 154 155 if (rate > 70) { 156 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 157 } else if (rate < 30 && window_expired) { 158 size_t ceil = pow2ceil(desc->window_max_entries); 159 size_t expected_rate = desc->window_max_entries * 100 / ceil; 160 161 /* 162 * Avoid undersizing when the max number of entries seen is just below 163 * a pow2. For instance, if max_entries == 1025, the expected use rate 164 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 165 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 166 * later. Thus, make sure that the expected use rate remains below 70%. 167 * (and since we double the size, that means the lowest rate we'd 168 * expect to get is 35%, which is still in the 30-70% range where 169 * we consider that the size is appropriate.) 170 */ 171 if (expected_rate > 70) { 172 ceil *= 2; 173 } 174 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 175 } 176 177 if (new_size == old_size) { 178 if (window_expired) { 179 tlb_window_reset(desc, now, desc->n_used_entries); 180 } 181 return; 182 } 183 184 g_free(fast->table); 185 g_free(desc->iotlb); 186 187 tlb_window_reset(desc, now, 0); 188 /* desc->n_used_entries is cleared by the caller */ 189 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 190 fast->table = g_try_new(CPUTLBEntry, new_size); 191 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 192 193 /* 194 * If the allocations fail, try smaller sizes. We just freed some 195 * memory, so going back to half of new_size has a good chance of working. 196 * Increased memory pressure elsewhere in the system might cause the 197 * allocations to fail though, so we progressively reduce the allocation 198 * size, aborting if we cannot even allocate the smallest TLB we support. 199 */ 200 while (fast->table == NULL || desc->iotlb == NULL) { 201 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 202 error_report("%s: %s", __func__, strerror(errno)); 203 abort(); 204 } 205 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 206 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 207 208 g_free(fast->table); 209 g_free(desc->iotlb); 210 fast->table = g_try_new(CPUTLBEntry, new_size); 211 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 212 } 213 } 214 215 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) 216 { 217 desc->n_used_entries = 0; 218 desc->large_page_addr = -1; 219 desc->large_page_mask = -1; 220 desc->vindex = 0; 221 memset(fast->table, -1, sizeof_tlb(fast)); 222 memset(desc->vtable, -1, sizeof(desc->vtable)); 223 } 224 225 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx, 226 int64_t now) 227 { 228 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; 229 CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx]; 230 231 tlb_mmu_resize_locked(desc, fast, now); 232 tlb_mmu_flush_locked(desc, fast); 233 } 234 235 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now) 236 { 237 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 238 239 tlb_window_reset(desc, now, 0); 240 desc->n_used_entries = 0; 241 fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 242 fast->table = g_new(CPUTLBEntry, n_entries); 243 desc->iotlb = g_new(CPUIOTLBEntry, n_entries); 244 tlb_mmu_flush_locked(desc, fast); 245 } 246 247 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 248 { 249 env_tlb(env)->d[mmu_idx].n_used_entries++; 250 } 251 252 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 253 { 254 env_tlb(env)->d[mmu_idx].n_used_entries--; 255 } 256 257 void tlb_init(CPUState *cpu) 258 { 259 CPUArchState *env = cpu->env_ptr; 260 int64_t now = get_clock_realtime(); 261 int i; 262 263 qemu_spin_init(&env_tlb(env)->c.lock); 264 265 /* All tlbs are initialized flushed. */ 266 env_tlb(env)->c.dirty = 0; 267 268 for (i = 0; i < NB_MMU_MODES; i++) { 269 tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now); 270 } 271 } 272 273 /* flush_all_helper: run fn across all cpus 274 * 275 * If the wait flag is set then the src cpu's helper will be queued as 276 * "safe" work and the loop exited creating a synchronisation point 277 * where all queued work will be finished before execution starts 278 * again. 279 */ 280 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 281 run_on_cpu_data d) 282 { 283 CPUState *cpu; 284 285 CPU_FOREACH(cpu) { 286 if (cpu != src) { 287 async_run_on_cpu(cpu, fn, d); 288 } 289 } 290 } 291 292 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 293 { 294 CPUState *cpu; 295 size_t full = 0, part = 0, elide = 0; 296 297 CPU_FOREACH(cpu) { 298 CPUArchState *env = cpu->env_ptr; 299 300 full += atomic_read(&env_tlb(env)->c.full_flush_count); 301 part += atomic_read(&env_tlb(env)->c.part_flush_count); 302 elide += atomic_read(&env_tlb(env)->c.elide_flush_count); 303 } 304 *pfull = full; 305 *ppart = part; 306 *pelide = elide; 307 } 308 309 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 310 { 311 CPUArchState *env = cpu->env_ptr; 312 uint16_t asked = data.host_int; 313 uint16_t all_dirty, work, to_clean; 314 int64_t now = get_clock_realtime(); 315 316 assert_cpu_is_self(cpu); 317 318 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 319 320 qemu_spin_lock(&env_tlb(env)->c.lock); 321 322 all_dirty = env_tlb(env)->c.dirty; 323 to_clean = asked & all_dirty; 324 all_dirty &= ~to_clean; 325 env_tlb(env)->c.dirty = all_dirty; 326 327 for (work = to_clean; work != 0; work &= work - 1) { 328 int mmu_idx = ctz32(work); 329 tlb_flush_one_mmuidx_locked(env, mmu_idx, now); 330 } 331 332 qemu_spin_unlock(&env_tlb(env)->c.lock); 333 334 cpu_tb_jmp_cache_clear(cpu); 335 336 if (to_clean == ALL_MMUIDX_BITS) { 337 atomic_set(&env_tlb(env)->c.full_flush_count, 338 env_tlb(env)->c.full_flush_count + 1); 339 } else { 340 atomic_set(&env_tlb(env)->c.part_flush_count, 341 env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); 342 if (to_clean != asked) { 343 atomic_set(&env_tlb(env)->c.elide_flush_count, 344 env_tlb(env)->c.elide_flush_count + 345 ctpop16(asked & ~to_clean)); 346 } 347 } 348 } 349 350 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 351 { 352 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 353 354 if (cpu->created && !qemu_cpu_is_self(cpu)) { 355 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 356 RUN_ON_CPU_HOST_INT(idxmap)); 357 } else { 358 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 359 } 360 } 361 362 void tlb_flush(CPUState *cpu) 363 { 364 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 365 } 366 367 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 368 { 369 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 370 371 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 372 373 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 374 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 375 } 376 377 void tlb_flush_all_cpus(CPUState *src_cpu) 378 { 379 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 380 } 381 382 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 383 { 384 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 385 386 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 387 388 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 389 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 390 } 391 392 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 393 { 394 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 395 } 396 397 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 398 target_ulong page) 399 { 400 return tlb_hit_page(tlb_entry->addr_read, page) || 401 tlb_hit_page(tlb_addr_write(tlb_entry), page) || 402 tlb_hit_page(tlb_entry->addr_code, page); 403 } 404 405 /** 406 * tlb_entry_is_empty - return true if the entry is not in use 407 * @te: pointer to CPUTLBEntry 408 */ 409 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 410 { 411 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 412 } 413 414 /* Called with tlb_c.lock held */ 415 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 416 target_ulong page) 417 { 418 if (tlb_hit_page_anyprot(tlb_entry, page)) { 419 memset(tlb_entry, -1, sizeof(*tlb_entry)); 420 return true; 421 } 422 return false; 423 } 424 425 /* Called with tlb_c.lock held */ 426 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 427 target_ulong page) 428 { 429 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; 430 int k; 431 432 assert_cpu_is_self(env_cpu(env)); 433 for (k = 0; k < CPU_VTLB_SIZE; k++) { 434 if (tlb_flush_entry_locked(&d->vtable[k], page)) { 435 tlb_n_used_entries_dec(env, mmu_idx); 436 } 437 } 438 } 439 440 static void tlb_flush_page_locked(CPUArchState *env, int midx, 441 target_ulong page) 442 { 443 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; 444 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; 445 446 /* Check if we need to flush due to large pages. */ 447 if ((page & lp_mask) == lp_addr) { 448 tlb_debug("forcing full flush midx %d (" 449 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 450 midx, lp_addr, lp_mask); 451 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); 452 } else { 453 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 454 tlb_n_used_entries_dec(env, midx); 455 } 456 tlb_flush_vtlb_page_locked(env, midx, page); 457 } 458 } 459 460 /** 461 * tlb_flush_page_by_mmuidx_async_0: 462 * @cpu: cpu on which to flush 463 * @addr: page of virtual address to flush 464 * @idxmap: set of mmu_idx to flush 465 * 466 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page 467 * at @addr from the tlbs indicated by @idxmap from @cpu. 468 */ 469 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, 470 target_ulong addr, 471 uint16_t idxmap) 472 { 473 CPUArchState *env = cpu->env_ptr; 474 int mmu_idx; 475 476 assert_cpu_is_self(cpu); 477 478 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); 479 480 qemu_spin_lock(&env_tlb(env)->c.lock); 481 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 482 if ((idxmap >> mmu_idx) & 1) { 483 tlb_flush_page_locked(env, mmu_idx, addr); 484 } 485 } 486 qemu_spin_unlock(&env_tlb(env)->c.lock); 487 488 tb_flush_jmp_cache(cpu, addr); 489 } 490 491 /** 492 * tlb_flush_page_by_mmuidx_async_1: 493 * @cpu: cpu on which to flush 494 * @data: encoded addr + idxmap 495 * 496 * Helper for tlb_flush_page_by_mmuidx and friends, called through 497 * async_run_on_cpu. The idxmap parameter is encoded in the page 498 * offset of the target_ptr field. This limits the set of mmu_idx 499 * that can be passed via this method. 500 */ 501 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, 502 run_on_cpu_data data) 503 { 504 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; 505 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; 506 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; 507 508 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 509 } 510 511 typedef struct { 512 target_ulong addr; 513 uint16_t idxmap; 514 } TLBFlushPageByMMUIdxData; 515 516 /** 517 * tlb_flush_page_by_mmuidx_async_2: 518 * @cpu: cpu on which to flush 519 * @data: allocated addr + idxmap 520 * 521 * Helper for tlb_flush_page_by_mmuidx and friends, called through 522 * async_run_on_cpu. The addr+idxmap parameters are stored in a 523 * TLBFlushPageByMMUIdxData structure that has been allocated 524 * specifically for this helper. Free the structure when done. 525 */ 526 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, 527 run_on_cpu_data data) 528 { 529 TLBFlushPageByMMUIdxData *d = data.host_ptr; 530 531 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap); 532 g_free(d); 533 } 534 535 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 536 { 537 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 538 539 /* This should already be page aligned */ 540 addr &= TARGET_PAGE_MASK; 541 542 if (qemu_cpu_is_self(cpu)) { 543 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 544 } else if (idxmap < TARGET_PAGE_SIZE) { 545 /* 546 * Most targets have only a few mmu_idx. In the case where 547 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid 548 * allocating memory for this operation. 549 */ 550 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1, 551 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 552 } else { 553 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1); 554 555 /* Otherwise allocate a structure, freed by the worker. */ 556 d->addr = addr; 557 d->idxmap = idxmap; 558 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2, 559 RUN_ON_CPU_HOST_PTR(d)); 560 } 561 } 562 563 void tlb_flush_page(CPUState *cpu, target_ulong addr) 564 { 565 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 566 } 567 568 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 569 uint16_t idxmap) 570 { 571 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 572 573 /* This should already be page aligned */ 574 addr &= TARGET_PAGE_MASK; 575 576 /* 577 * Allocate memory to hold addr+idxmap only when needed. 578 * See tlb_flush_page_by_mmuidx for details. 579 */ 580 if (idxmap < TARGET_PAGE_SIZE) { 581 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 582 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 583 } else { 584 CPUState *dst_cpu; 585 586 /* Allocate a separate data block for each destination cpu. */ 587 CPU_FOREACH(dst_cpu) { 588 if (dst_cpu != src_cpu) { 589 TLBFlushPageByMMUIdxData *d 590 = g_new(TLBFlushPageByMMUIdxData, 1); 591 592 d->addr = addr; 593 d->idxmap = idxmap; 594 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 595 RUN_ON_CPU_HOST_PTR(d)); 596 } 597 } 598 } 599 600 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); 601 } 602 603 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 604 { 605 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 606 } 607 608 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 609 target_ulong addr, 610 uint16_t idxmap) 611 { 612 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 613 614 /* This should already be page aligned */ 615 addr &= TARGET_PAGE_MASK; 616 617 /* 618 * Allocate memory to hold addr+idxmap only when needed. 619 * See tlb_flush_page_by_mmuidx for details. 620 */ 621 if (idxmap < TARGET_PAGE_SIZE) { 622 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 623 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 624 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1, 625 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 626 } else { 627 CPUState *dst_cpu; 628 TLBFlushPageByMMUIdxData *d; 629 630 /* Allocate a separate data block for each destination cpu. */ 631 CPU_FOREACH(dst_cpu) { 632 if (dst_cpu != src_cpu) { 633 d = g_new(TLBFlushPageByMMUIdxData, 1); 634 d->addr = addr; 635 d->idxmap = idxmap; 636 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 637 RUN_ON_CPU_HOST_PTR(d)); 638 } 639 } 640 641 d = g_new(TLBFlushPageByMMUIdxData, 1); 642 d->addr = addr; 643 d->idxmap = idxmap; 644 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2, 645 RUN_ON_CPU_HOST_PTR(d)); 646 } 647 } 648 649 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 650 { 651 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 652 } 653 654 /* update the TLBs so that writes to code in the virtual page 'addr' 655 can be detected */ 656 void tlb_protect_code(ram_addr_t ram_addr) 657 { 658 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 659 DIRTY_MEMORY_CODE); 660 } 661 662 /* update the TLB so that writes in physical page 'phys_addr' are no longer 663 tested for self modifying code */ 664 void tlb_unprotect_code(ram_addr_t ram_addr) 665 { 666 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 667 } 668 669 670 /* 671 * Dirty write flag handling 672 * 673 * When the TCG code writes to a location it looks up the address in 674 * the TLB and uses that data to compute the final address. If any of 675 * the lower bits of the address are set then the slow path is forced. 676 * There are a number of reasons to do this but for normal RAM the 677 * most usual is detecting writes to code regions which may invalidate 678 * generated code. 679 * 680 * Other vCPUs might be reading their TLBs during guest execution, so we update 681 * te->addr_write with atomic_set. We don't need to worry about this for 682 * oversized guests as MTTCG is disabled for them. 683 * 684 * Called with tlb_c.lock held. 685 */ 686 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 687 uintptr_t start, uintptr_t length) 688 { 689 uintptr_t addr = tlb_entry->addr_write; 690 691 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | 692 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { 693 addr &= TARGET_PAGE_MASK; 694 addr += tlb_entry->addend; 695 if ((addr - start) < length) { 696 #if TCG_OVERSIZED_GUEST 697 tlb_entry->addr_write |= TLB_NOTDIRTY; 698 #else 699 atomic_set(&tlb_entry->addr_write, 700 tlb_entry->addr_write | TLB_NOTDIRTY); 701 #endif 702 } 703 } 704 } 705 706 /* 707 * Called with tlb_c.lock held. 708 * Called only from the vCPU context, i.e. the TLB's owner thread. 709 */ 710 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 711 { 712 *d = *s; 713 } 714 715 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 716 * the target vCPU). 717 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 718 * thing actually updated is the target TLB entry ->addr_write flags. 719 */ 720 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 721 { 722 CPUArchState *env; 723 724 int mmu_idx; 725 726 env = cpu->env_ptr; 727 qemu_spin_lock(&env_tlb(env)->c.lock); 728 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 729 unsigned int i; 730 unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); 731 732 for (i = 0; i < n; i++) { 733 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], 734 start1, length); 735 } 736 737 for (i = 0; i < CPU_VTLB_SIZE; i++) { 738 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], 739 start1, length); 740 } 741 } 742 qemu_spin_unlock(&env_tlb(env)->c.lock); 743 } 744 745 /* Called with tlb_c.lock held */ 746 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 747 target_ulong vaddr) 748 { 749 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 750 tlb_entry->addr_write = vaddr; 751 } 752 } 753 754 /* update the TLB corresponding to virtual page vaddr 755 so that it is no longer dirty */ 756 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 757 { 758 CPUArchState *env = cpu->env_ptr; 759 int mmu_idx; 760 761 assert_cpu_is_self(cpu); 762 763 vaddr &= TARGET_PAGE_MASK; 764 qemu_spin_lock(&env_tlb(env)->c.lock); 765 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 766 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 767 } 768 769 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 770 int k; 771 for (k = 0; k < CPU_VTLB_SIZE; k++) { 772 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); 773 } 774 } 775 qemu_spin_unlock(&env_tlb(env)->c.lock); 776 } 777 778 /* Our TLB does not support large pages, so remember the area covered by 779 large pages and trigger a full TLB flush if these are invalidated. */ 780 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 781 target_ulong vaddr, target_ulong size) 782 { 783 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; 784 target_ulong lp_mask = ~(size - 1); 785 786 if (lp_addr == (target_ulong)-1) { 787 /* No previous large page. */ 788 lp_addr = vaddr; 789 } else { 790 /* Extend the existing region to include the new page. 791 This is a compromise between unnecessary flushes and 792 the cost of maintaining a full variable size TLB. */ 793 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; 794 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 795 lp_mask <<= 1; 796 } 797 } 798 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; 799 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; 800 } 801 802 /* Add a new TLB entry. At most one entry for a given virtual address 803 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 804 * supplied size is only used by tlb_flush_page. 805 * 806 * Called from TCG-generated code, which is under an RCU read-side 807 * critical section. 808 */ 809 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 810 hwaddr paddr, MemTxAttrs attrs, int prot, 811 int mmu_idx, target_ulong size) 812 { 813 CPUArchState *env = cpu->env_ptr; 814 CPUTLB *tlb = env_tlb(env); 815 CPUTLBDesc *desc = &tlb->d[mmu_idx]; 816 MemoryRegionSection *section; 817 unsigned int index; 818 target_ulong address; 819 target_ulong write_address; 820 uintptr_t addend; 821 CPUTLBEntry *te, tn; 822 hwaddr iotlb, xlat, sz, paddr_page; 823 target_ulong vaddr_page; 824 int asidx = cpu_asidx_from_attrs(cpu, attrs); 825 int wp_flags; 826 bool is_ram, is_romd; 827 828 assert_cpu_is_self(cpu); 829 830 if (size <= TARGET_PAGE_SIZE) { 831 sz = TARGET_PAGE_SIZE; 832 } else { 833 tlb_add_large_page(env, mmu_idx, vaddr, size); 834 sz = size; 835 } 836 vaddr_page = vaddr & TARGET_PAGE_MASK; 837 paddr_page = paddr & TARGET_PAGE_MASK; 838 839 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 840 &xlat, &sz, attrs, &prot); 841 assert(sz >= TARGET_PAGE_SIZE); 842 843 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 844 " prot=%x idx=%d\n", 845 vaddr, paddr, prot, mmu_idx); 846 847 address = vaddr_page; 848 if (size < TARGET_PAGE_SIZE) { 849 /* Repeat the MMU check and TLB fill on every access. */ 850 address |= TLB_INVALID_MASK; 851 } 852 if (attrs.byte_swap) { 853 address |= TLB_BSWAP; 854 } 855 856 is_ram = memory_region_is_ram(section->mr); 857 is_romd = memory_region_is_romd(section->mr); 858 859 if (is_ram || is_romd) { 860 /* RAM and ROMD both have associated host memory. */ 861 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 862 } else { 863 /* I/O does not; force the host address to NULL. */ 864 addend = 0; 865 } 866 867 write_address = address; 868 if (is_ram) { 869 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 870 /* 871 * Computing is_clean is expensive; avoid all that unless 872 * the page is actually writable. 873 */ 874 if (prot & PAGE_WRITE) { 875 if (section->readonly) { 876 write_address |= TLB_DISCARD_WRITE; 877 } else if (cpu_physical_memory_is_clean(iotlb)) { 878 write_address |= TLB_NOTDIRTY; 879 } 880 } 881 } else { 882 /* I/O or ROMD */ 883 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; 884 /* 885 * Writes to romd devices must go through MMIO to enable write. 886 * Reads to romd devices go through the ram_ptr found above, 887 * but of course reads to I/O must go through MMIO. 888 */ 889 write_address |= TLB_MMIO; 890 if (!is_romd) { 891 address = write_address; 892 } 893 } 894 895 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, 896 TARGET_PAGE_SIZE); 897 898 index = tlb_index(env, mmu_idx, vaddr_page); 899 te = tlb_entry(env, mmu_idx, vaddr_page); 900 901 /* 902 * Hold the TLB lock for the rest of the function. We could acquire/release 903 * the lock several times in the function, but it is faster to amortize the 904 * acquisition cost by acquiring it just once. Note that this leads to 905 * a longer critical section, but this is not a concern since the TLB lock 906 * is unlikely to be contended. 907 */ 908 qemu_spin_lock(&tlb->c.lock); 909 910 /* Note that the tlb is no longer clean. */ 911 tlb->c.dirty |= 1 << mmu_idx; 912 913 /* Make sure there's no cached translation for the new page. */ 914 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 915 916 /* 917 * Only evict the old entry to the victim tlb if it's for a 918 * different page; otherwise just overwrite the stale data. 919 */ 920 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 921 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; 922 CPUTLBEntry *tv = &desc->vtable[vidx]; 923 924 /* Evict the old entry into the victim tlb. */ 925 copy_tlb_helper_locked(tv, te); 926 desc->viotlb[vidx] = desc->iotlb[index]; 927 tlb_n_used_entries_dec(env, mmu_idx); 928 } 929 930 /* refill the tlb */ 931 /* 932 * At this point iotlb contains a physical section number in the lower 933 * TARGET_PAGE_BITS, and either 934 * + the ram_addr_t of the page base of the target RAM (RAM) 935 * + the offset within section->mr of the page base (I/O, ROMD) 936 * We subtract the vaddr_page (which is page aligned and thus won't 937 * disturb the low bits) to give an offset which can be added to the 938 * (non-page-aligned) vaddr of the eventual memory access to get 939 * the MemoryRegion offset for the access. Note that the vaddr we 940 * subtract here is that of the page base, and not the same as the 941 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 942 */ 943 desc->iotlb[index].addr = iotlb - vaddr_page; 944 desc->iotlb[index].attrs = attrs; 945 946 /* Now calculate the new entry */ 947 tn.addend = addend - vaddr_page; 948 if (prot & PAGE_READ) { 949 tn.addr_read = address; 950 if (wp_flags & BP_MEM_READ) { 951 tn.addr_read |= TLB_WATCHPOINT; 952 } 953 } else { 954 tn.addr_read = -1; 955 } 956 957 if (prot & PAGE_EXEC) { 958 tn.addr_code = address; 959 } else { 960 tn.addr_code = -1; 961 } 962 963 tn.addr_write = -1; 964 if (prot & PAGE_WRITE) { 965 tn.addr_write = write_address; 966 if (prot & PAGE_WRITE_INV) { 967 tn.addr_write |= TLB_INVALID_MASK; 968 } 969 if (wp_flags & BP_MEM_WRITE) { 970 tn.addr_write |= TLB_WATCHPOINT; 971 } 972 } 973 974 copy_tlb_helper_locked(te, &tn); 975 tlb_n_used_entries_inc(env, mmu_idx); 976 qemu_spin_unlock(&tlb->c.lock); 977 } 978 979 /* Add a new TLB entry, but without specifying the memory 980 * transaction attributes to be used. 981 */ 982 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 983 hwaddr paddr, int prot, 984 int mmu_idx, target_ulong size) 985 { 986 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 987 prot, mmu_idx, size); 988 } 989 990 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 991 { 992 ram_addr_t ram_addr; 993 994 ram_addr = qemu_ram_addr_from_host(ptr); 995 if (ram_addr == RAM_ADDR_INVALID) { 996 error_report("Bad ram pointer %p", ptr); 997 abort(); 998 } 999 return ram_addr; 1000 } 1001 1002 /* 1003 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the 1004 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must 1005 * be discarded and looked up again (e.g. via tlb_entry()). 1006 */ 1007 static void tlb_fill(CPUState *cpu, target_ulong addr, int size, 1008 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1009 { 1010 CPUClass *cc = CPU_GET_CLASS(cpu); 1011 bool ok; 1012 1013 /* 1014 * This is not a probe, so only valid return is success; failure 1015 * should result in exception + longjmp to the cpu loop. 1016 */ 1017 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr); 1018 assert(ok); 1019 } 1020 1021 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1022 int mmu_idx, target_ulong addr, uintptr_t retaddr, 1023 MMUAccessType access_type, MemOp op) 1024 { 1025 CPUState *cpu = env_cpu(env); 1026 hwaddr mr_offset; 1027 MemoryRegionSection *section; 1028 MemoryRegion *mr; 1029 uint64_t val; 1030 bool locked = false; 1031 MemTxResult r; 1032 1033 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1034 mr = section->mr; 1035 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1036 cpu->mem_io_pc = retaddr; 1037 if (!cpu->can_do_io) { 1038 cpu_io_recompile(cpu, retaddr); 1039 } 1040 1041 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 1042 qemu_mutex_lock_iothread(); 1043 locked = true; 1044 } 1045 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); 1046 if (r != MEMTX_OK) { 1047 hwaddr physaddr = mr_offset + 1048 section->offset_within_address_space - 1049 section->offset_within_region; 1050 1051 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, 1052 mmu_idx, iotlbentry->attrs, r, retaddr); 1053 } 1054 if (locked) { 1055 qemu_mutex_unlock_iothread(); 1056 } 1057 1058 return val; 1059 } 1060 1061 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1062 int mmu_idx, uint64_t val, target_ulong addr, 1063 uintptr_t retaddr, MemOp op) 1064 { 1065 CPUState *cpu = env_cpu(env); 1066 hwaddr mr_offset; 1067 MemoryRegionSection *section; 1068 MemoryRegion *mr; 1069 bool locked = false; 1070 MemTxResult r; 1071 1072 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1073 mr = section->mr; 1074 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1075 if (!cpu->can_do_io) { 1076 cpu_io_recompile(cpu, retaddr); 1077 } 1078 cpu->mem_io_pc = retaddr; 1079 1080 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 1081 qemu_mutex_lock_iothread(); 1082 locked = true; 1083 } 1084 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); 1085 if (r != MEMTX_OK) { 1086 hwaddr physaddr = mr_offset + 1087 section->offset_within_address_space - 1088 section->offset_within_region; 1089 1090 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), 1091 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, 1092 retaddr); 1093 } 1094 if (locked) { 1095 qemu_mutex_unlock_iothread(); 1096 } 1097 } 1098 1099 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) 1100 { 1101 #if TCG_OVERSIZED_GUEST 1102 return *(target_ulong *)((uintptr_t)entry + ofs); 1103 #else 1104 /* ofs might correspond to .addr_write, so use atomic_read */ 1105 return atomic_read((target_ulong *)((uintptr_t)entry + ofs)); 1106 #endif 1107 } 1108 1109 /* Return true if ADDR is present in the victim tlb, and has been copied 1110 back to the main tlb. */ 1111 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1112 size_t elt_ofs, target_ulong page) 1113 { 1114 size_t vidx; 1115 1116 assert_cpu_is_self(env_cpu(env)); 1117 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1118 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; 1119 target_ulong cmp; 1120 1121 /* elt_ofs might correspond to .addr_write, so use atomic_read */ 1122 #if TCG_OVERSIZED_GUEST 1123 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1124 #else 1125 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1126 #endif 1127 1128 if (cmp == page) { 1129 /* Found entry in victim tlb, swap tlb and iotlb. */ 1130 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index]; 1131 1132 qemu_spin_lock(&env_tlb(env)->c.lock); 1133 copy_tlb_helper_locked(&tmptlb, tlb); 1134 copy_tlb_helper_locked(tlb, vtlb); 1135 copy_tlb_helper_locked(vtlb, &tmptlb); 1136 qemu_spin_unlock(&env_tlb(env)->c.lock); 1137 1138 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1139 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; 1140 tmpio = *io; *io = *vio; *vio = tmpio; 1141 return true; 1142 } 1143 } 1144 return false; 1145 } 1146 1147 /* Macro to call the above, with local variables from the use context. */ 1148 #define VICTIM_TLB_HIT(TY, ADDR) \ 1149 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1150 (ADDR) & TARGET_PAGE_MASK) 1151 1152 /* 1153 * Return a ram_addr_t for the virtual address for execution. 1154 * 1155 * Return -1 if we can't translate and execute from an entire page 1156 * of RAM. This will force us to execute by loading and translating 1157 * one insn at a time, without caching. 1158 * 1159 * NOTE: This function will trigger an exception if the page is 1160 * not executable. 1161 */ 1162 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, 1163 void **hostp) 1164 { 1165 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1166 uintptr_t index = tlb_index(env, mmu_idx, addr); 1167 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1168 void *p; 1169 1170 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1171 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1172 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1173 index = tlb_index(env, mmu_idx, addr); 1174 entry = tlb_entry(env, mmu_idx, addr); 1175 1176 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { 1177 /* 1178 * The MMU protection covers a smaller range than a target 1179 * page, so we must redo the MMU check for every insn. 1180 */ 1181 return -1; 1182 } 1183 } 1184 assert(tlb_hit(entry->addr_code, addr)); 1185 } 1186 1187 if (unlikely(entry->addr_code & TLB_MMIO)) { 1188 /* The region is not backed by RAM. */ 1189 if (hostp) { 1190 *hostp = NULL; 1191 } 1192 return -1; 1193 } 1194 1195 p = (void *)((uintptr_t)addr + entry->addend); 1196 if (hostp) { 1197 *hostp = p; 1198 } 1199 return qemu_ram_addr_from_host_nofail(p); 1200 } 1201 1202 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1203 { 1204 return get_page_addr_code_hostp(env, addr, NULL); 1205 } 1206 1207 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, 1208 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) 1209 { 1210 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; 1211 1212 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); 1213 1214 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 1215 struct page_collection *pages 1216 = page_collection_lock(ram_addr, ram_addr + size); 1217 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); 1218 page_collection_unlock(pages); 1219 } 1220 1221 /* 1222 * Set both VGA and migration bits for simplicity and to remove 1223 * the notdirty callback faster. 1224 */ 1225 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); 1226 1227 /* We remove the notdirty callback only if the code has been flushed. */ 1228 if (!cpu_physical_memory_is_clean(ram_addr)) { 1229 trace_memory_notdirty_set_dirty(mem_vaddr); 1230 tlb_set_dirty(cpu, mem_vaddr); 1231 } 1232 } 1233 1234 static int probe_access_internal(CPUArchState *env, target_ulong addr, 1235 int fault_size, MMUAccessType access_type, 1236 int mmu_idx, bool nonfault, 1237 void **phost, uintptr_t retaddr) 1238 { 1239 uintptr_t index = tlb_index(env, mmu_idx, addr); 1240 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1241 target_ulong tlb_addr, page_addr; 1242 size_t elt_ofs; 1243 int flags; 1244 1245 switch (access_type) { 1246 case MMU_DATA_LOAD: 1247 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1248 break; 1249 case MMU_DATA_STORE: 1250 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1251 break; 1252 case MMU_INST_FETCH: 1253 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1254 break; 1255 default: 1256 g_assert_not_reached(); 1257 } 1258 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1259 1260 page_addr = addr & TARGET_PAGE_MASK; 1261 if (!tlb_hit_page(tlb_addr, page_addr)) { 1262 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) { 1263 CPUState *cs = env_cpu(env); 1264 CPUClass *cc = CPU_GET_CLASS(cs); 1265 1266 if (!cc->tlb_fill(cs, addr, fault_size, access_type, 1267 mmu_idx, nonfault, retaddr)) { 1268 /* Non-faulting page table read failed. */ 1269 *phost = NULL; 1270 return TLB_INVALID_MASK; 1271 } 1272 1273 /* TLB resize via tlb_fill may have moved the entry. */ 1274 entry = tlb_entry(env, mmu_idx, addr); 1275 } 1276 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1277 } 1278 flags = tlb_addr & TLB_FLAGS_MASK; 1279 1280 /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */ 1281 if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) { 1282 *phost = NULL; 1283 return TLB_MMIO; 1284 } 1285 1286 /* Everything else is RAM. */ 1287 *phost = (void *)((uintptr_t)addr + entry->addend); 1288 return flags; 1289 } 1290 1291 int probe_access_flags(CPUArchState *env, target_ulong addr, 1292 MMUAccessType access_type, int mmu_idx, 1293 bool nonfault, void **phost, uintptr_t retaddr) 1294 { 1295 int flags; 1296 1297 flags = probe_access_internal(env, addr, 0, access_type, mmu_idx, 1298 nonfault, phost, retaddr); 1299 1300 /* Handle clean RAM pages. */ 1301 if (unlikely(flags & TLB_NOTDIRTY)) { 1302 uintptr_t index = tlb_index(env, mmu_idx, addr); 1303 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1304 1305 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); 1306 flags &= ~TLB_NOTDIRTY; 1307 } 1308 1309 return flags; 1310 } 1311 1312 void *probe_access(CPUArchState *env, target_ulong addr, int size, 1313 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1314 { 1315 void *host; 1316 int flags; 1317 1318 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 1319 1320 flags = probe_access_internal(env, addr, size, access_type, mmu_idx, 1321 false, &host, retaddr); 1322 1323 /* Per the interface, size == 0 merely faults the access. */ 1324 if (size == 0) { 1325 return NULL; 1326 } 1327 1328 if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) { 1329 uintptr_t index = tlb_index(env, mmu_idx, addr); 1330 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1331 1332 /* Handle watchpoints. */ 1333 if (flags & TLB_WATCHPOINT) { 1334 int wp_access = (access_type == MMU_DATA_STORE 1335 ? BP_MEM_WRITE : BP_MEM_READ); 1336 cpu_check_watchpoint(env_cpu(env), addr, size, 1337 iotlbentry->attrs, wp_access, retaddr); 1338 } 1339 1340 /* Handle clean RAM pages. */ 1341 if (flags & TLB_NOTDIRTY) { 1342 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); 1343 } 1344 } 1345 1346 return host; 1347 } 1348 1349 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, 1350 MMUAccessType access_type, int mmu_idx) 1351 { 1352 void *host; 1353 int flags; 1354 1355 flags = probe_access_internal(env, addr, 0, access_type, 1356 mmu_idx, true, &host, 0); 1357 1358 /* No combination of flags are expected by the caller. */ 1359 return flags ? NULL : host; 1360 } 1361 1362 #ifdef CONFIG_PLUGIN 1363 /* 1364 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. 1365 * This should be a hot path as we will have just looked this path up 1366 * in the softmmu lookup code (or helper). We don't handle re-fills or 1367 * checking the victim table. This is purely informational. 1368 * 1369 * This should never fail as the memory access being instrumented 1370 * should have just filled the TLB. 1371 */ 1372 1373 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, 1374 bool is_store, struct qemu_plugin_hwaddr *data) 1375 { 1376 CPUArchState *env = cpu->env_ptr; 1377 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1378 uintptr_t index = tlb_index(env, mmu_idx, addr); 1379 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; 1380 1381 if (likely(tlb_hit(tlb_addr, addr))) { 1382 /* We must have an iotlb entry for MMIO */ 1383 if (tlb_addr & TLB_MMIO) { 1384 CPUIOTLBEntry *iotlbentry; 1385 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1386 data->is_io = true; 1387 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1388 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1389 } else { 1390 data->is_io = false; 1391 data->v.ram.hostaddr = addr + tlbe->addend; 1392 } 1393 return true; 1394 } 1395 return false; 1396 } 1397 1398 #endif 1399 1400 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1401 * operations, or io operations to proceed. Return the host address. */ 1402 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1403 TCGMemOpIdx oi, uintptr_t retaddr) 1404 { 1405 size_t mmu_idx = get_mmuidx(oi); 1406 uintptr_t index = tlb_index(env, mmu_idx, addr); 1407 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1408 target_ulong tlb_addr = tlb_addr_write(tlbe); 1409 MemOp mop = get_memop(oi); 1410 int a_bits = get_alignment_bits(mop); 1411 int s_bits = mop & MO_SIZE; 1412 void *hostaddr; 1413 1414 /* Adjust the given return address. */ 1415 retaddr -= GETPC_ADJ; 1416 1417 /* Enforce guest required alignment. */ 1418 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1419 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1420 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1421 mmu_idx, retaddr); 1422 } 1423 1424 /* Enforce qemu required alignment. */ 1425 if (unlikely(addr & ((1 << s_bits) - 1))) { 1426 /* We get here if guest alignment was not requested, 1427 or was not enforced by cpu_unaligned_access above. 1428 We might widen the access and emulate, but for now 1429 mark an exception and exit the cpu loop. */ 1430 goto stop_the_world; 1431 } 1432 1433 /* Check TLB entry and enforce page permissions. */ 1434 if (!tlb_hit(tlb_addr, addr)) { 1435 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1436 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, 1437 mmu_idx, retaddr); 1438 index = tlb_index(env, mmu_idx, addr); 1439 tlbe = tlb_entry(env, mmu_idx, addr); 1440 } 1441 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1442 } 1443 1444 /* Notice an IO access or a needs-MMU-lookup access */ 1445 if (unlikely(tlb_addr & TLB_MMIO)) { 1446 /* There's really nothing that can be done to 1447 support this apart from stop-the-world. */ 1448 goto stop_the_world; 1449 } 1450 1451 /* Let the guest notice RMW on a write-only page. */ 1452 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1453 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1454 mmu_idx, retaddr); 1455 /* Since we don't support reads and writes to different addresses, 1456 and we do have the proper page loaded for write, this shouldn't 1457 ever return. But just in case, handle via stop-the-world. */ 1458 goto stop_the_world; 1459 } 1460 1461 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1462 1463 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1464 notdirty_write(env_cpu(env), addr, 1 << s_bits, 1465 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); 1466 } 1467 1468 return hostaddr; 1469 1470 stop_the_world: 1471 cpu_loop_exit_atomic(env_cpu(env), retaddr); 1472 } 1473 1474 /* 1475 * Load Helpers 1476 * 1477 * We support two different access types. SOFTMMU_CODE_ACCESS is 1478 * specifically for reading instructions from system memory. It is 1479 * called by the translation loop and in some helpers where the code 1480 * is disassembled. It shouldn't be called directly by guest code. 1481 */ 1482 1483 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, 1484 TCGMemOpIdx oi, uintptr_t retaddr); 1485 1486 static inline uint64_t QEMU_ALWAYS_INLINE 1487 load_memop(const void *haddr, MemOp op) 1488 { 1489 switch (op) { 1490 case MO_UB: 1491 return ldub_p(haddr); 1492 case MO_BEUW: 1493 return lduw_be_p(haddr); 1494 case MO_LEUW: 1495 return lduw_le_p(haddr); 1496 case MO_BEUL: 1497 return (uint32_t)ldl_be_p(haddr); 1498 case MO_LEUL: 1499 return (uint32_t)ldl_le_p(haddr); 1500 case MO_BEQ: 1501 return ldq_be_p(haddr); 1502 case MO_LEQ: 1503 return ldq_le_p(haddr); 1504 default: 1505 qemu_build_not_reached(); 1506 } 1507 } 1508 1509 static inline uint64_t QEMU_ALWAYS_INLINE 1510 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, 1511 uintptr_t retaddr, MemOp op, bool code_read, 1512 FullLoadHelper *full_load) 1513 { 1514 uintptr_t mmu_idx = get_mmuidx(oi); 1515 uintptr_t index = tlb_index(env, mmu_idx, addr); 1516 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1517 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1518 const size_t tlb_off = code_read ? 1519 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); 1520 const MMUAccessType access_type = 1521 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; 1522 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1523 void *haddr; 1524 uint64_t res; 1525 size_t size = memop_size(op); 1526 1527 /* Handle CPU specific unaligned behaviour */ 1528 if (addr & ((1 << a_bits) - 1)) { 1529 cpu_unaligned_access(env_cpu(env), addr, access_type, 1530 mmu_idx, retaddr); 1531 } 1532 1533 /* If the TLB entry is for a different page, reload and try again. */ 1534 if (!tlb_hit(tlb_addr, addr)) { 1535 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1536 addr & TARGET_PAGE_MASK)) { 1537 tlb_fill(env_cpu(env), addr, size, 1538 access_type, mmu_idx, retaddr); 1539 index = tlb_index(env, mmu_idx, addr); 1540 entry = tlb_entry(env, mmu_idx, addr); 1541 } 1542 tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1543 tlb_addr &= ~TLB_INVALID_MASK; 1544 } 1545 1546 /* Handle anything that isn't just a straight memory access. */ 1547 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1548 CPUIOTLBEntry *iotlbentry; 1549 bool need_swap; 1550 1551 /* For anything that is unaligned, recurse through full_load. */ 1552 if ((addr & (size - 1)) != 0) { 1553 goto do_unaligned_access; 1554 } 1555 1556 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1557 1558 /* Handle watchpoints. */ 1559 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1560 /* On watchpoint hit, this will longjmp out. */ 1561 cpu_check_watchpoint(env_cpu(env), addr, size, 1562 iotlbentry->attrs, BP_MEM_READ, retaddr); 1563 } 1564 1565 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1566 1567 /* Handle I/O access. */ 1568 if (likely(tlb_addr & TLB_MMIO)) { 1569 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, 1570 access_type, op ^ (need_swap * MO_BSWAP)); 1571 } 1572 1573 haddr = (void *)((uintptr_t)addr + entry->addend); 1574 1575 /* 1576 * Keep these two load_memop separate to ensure that the compiler 1577 * is able to fold the entire function to a single instruction. 1578 * There is a build-time assert inside to remind you of this. ;-) 1579 */ 1580 if (unlikely(need_swap)) { 1581 return load_memop(haddr, op ^ MO_BSWAP); 1582 } 1583 return load_memop(haddr, op); 1584 } 1585 1586 /* Handle slow unaligned access (it spans two pages or IO). */ 1587 if (size > 1 1588 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1589 >= TARGET_PAGE_SIZE)) { 1590 target_ulong addr1, addr2; 1591 uint64_t r1, r2; 1592 unsigned shift; 1593 do_unaligned_access: 1594 addr1 = addr & ~((target_ulong)size - 1); 1595 addr2 = addr1 + size; 1596 r1 = full_load(env, addr1, oi, retaddr); 1597 r2 = full_load(env, addr2, oi, retaddr); 1598 shift = (addr & (size - 1)) * 8; 1599 1600 if (memop_big_endian(op)) { 1601 /* Big-endian combine. */ 1602 res = (r1 << shift) | (r2 >> ((size * 8) - shift)); 1603 } else { 1604 /* Little-endian combine. */ 1605 res = (r1 >> shift) | (r2 << ((size * 8) - shift)); 1606 } 1607 return res & MAKE_64BIT_MASK(0, size * 8); 1608 } 1609 1610 haddr = (void *)((uintptr_t)addr + entry->addend); 1611 return load_memop(haddr, op); 1612 } 1613 1614 /* 1615 * For the benefit of TCG generated code, we want to avoid the 1616 * complication of ABI-specific return type promotion and always 1617 * return a value extended to the register size of the host. This is 1618 * tcg_target_long, except in the case of a 32-bit host and 64-bit 1619 * data, and for that we always have uint64_t. 1620 * 1621 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. 1622 */ 1623 1624 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, 1625 TCGMemOpIdx oi, uintptr_t retaddr) 1626 { 1627 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); 1628 } 1629 1630 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, 1631 TCGMemOpIdx oi, uintptr_t retaddr) 1632 { 1633 return full_ldub_mmu(env, addr, oi, retaddr); 1634 } 1635 1636 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1637 TCGMemOpIdx oi, uintptr_t retaddr) 1638 { 1639 return load_helper(env, addr, oi, retaddr, MO_LEUW, false, 1640 full_le_lduw_mmu); 1641 } 1642 1643 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1644 TCGMemOpIdx oi, uintptr_t retaddr) 1645 { 1646 return full_le_lduw_mmu(env, addr, oi, retaddr); 1647 } 1648 1649 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1650 TCGMemOpIdx oi, uintptr_t retaddr) 1651 { 1652 return load_helper(env, addr, oi, retaddr, MO_BEUW, false, 1653 full_be_lduw_mmu); 1654 } 1655 1656 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1657 TCGMemOpIdx oi, uintptr_t retaddr) 1658 { 1659 return full_be_lduw_mmu(env, addr, oi, retaddr); 1660 } 1661 1662 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1663 TCGMemOpIdx oi, uintptr_t retaddr) 1664 { 1665 return load_helper(env, addr, oi, retaddr, MO_LEUL, false, 1666 full_le_ldul_mmu); 1667 } 1668 1669 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1670 TCGMemOpIdx oi, uintptr_t retaddr) 1671 { 1672 return full_le_ldul_mmu(env, addr, oi, retaddr); 1673 } 1674 1675 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1676 TCGMemOpIdx oi, uintptr_t retaddr) 1677 { 1678 return load_helper(env, addr, oi, retaddr, MO_BEUL, false, 1679 full_be_ldul_mmu); 1680 } 1681 1682 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1683 TCGMemOpIdx oi, uintptr_t retaddr) 1684 { 1685 return full_be_ldul_mmu(env, addr, oi, retaddr); 1686 } 1687 1688 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, 1689 TCGMemOpIdx oi, uintptr_t retaddr) 1690 { 1691 return load_helper(env, addr, oi, retaddr, MO_LEQ, false, 1692 helper_le_ldq_mmu); 1693 } 1694 1695 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, 1696 TCGMemOpIdx oi, uintptr_t retaddr) 1697 { 1698 return load_helper(env, addr, oi, retaddr, MO_BEQ, false, 1699 helper_be_ldq_mmu); 1700 } 1701 1702 /* 1703 * Provide signed versions of the load routines as well. We can of course 1704 * avoid this for 64-bit data, or for 32-bit data on 32-bit host. 1705 */ 1706 1707 1708 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, 1709 TCGMemOpIdx oi, uintptr_t retaddr) 1710 { 1711 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); 1712 } 1713 1714 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, 1715 TCGMemOpIdx oi, uintptr_t retaddr) 1716 { 1717 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); 1718 } 1719 1720 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, 1721 TCGMemOpIdx oi, uintptr_t retaddr) 1722 { 1723 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); 1724 } 1725 1726 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, 1727 TCGMemOpIdx oi, uintptr_t retaddr) 1728 { 1729 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); 1730 } 1731 1732 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, 1733 TCGMemOpIdx oi, uintptr_t retaddr) 1734 { 1735 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); 1736 } 1737 1738 /* 1739 * Load helpers for cpu_ldst.h. 1740 */ 1741 1742 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, 1743 int mmu_idx, uintptr_t retaddr, 1744 MemOp op, FullLoadHelper *full_load) 1745 { 1746 uint16_t meminfo; 1747 TCGMemOpIdx oi; 1748 uint64_t ret; 1749 1750 meminfo = trace_mem_get_info(op, mmu_idx, false); 1751 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 1752 1753 op &= ~MO_SIGN; 1754 oi = make_memop_idx(op, mmu_idx); 1755 ret = full_load(env, addr, oi, retaddr); 1756 1757 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 1758 1759 return ret; 1760 } 1761 1762 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1763 int mmu_idx, uintptr_t ra) 1764 { 1765 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu); 1766 } 1767 1768 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1769 int mmu_idx, uintptr_t ra) 1770 { 1771 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB, 1772 full_ldub_mmu); 1773 } 1774 1775 uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1776 int mmu_idx, uintptr_t ra) 1777 { 1778 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW, 1779 MO_TE == MO_LE 1780 ? full_le_lduw_mmu : full_be_lduw_mmu); 1781 } 1782 1783 int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1784 int mmu_idx, uintptr_t ra) 1785 { 1786 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW, 1787 MO_TE == MO_LE 1788 ? full_le_lduw_mmu : full_be_lduw_mmu); 1789 } 1790 1791 uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1792 int mmu_idx, uintptr_t ra) 1793 { 1794 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL, 1795 MO_TE == MO_LE 1796 ? full_le_ldul_mmu : full_be_ldul_mmu); 1797 } 1798 1799 uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, 1800 int mmu_idx, uintptr_t ra) 1801 { 1802 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ, 1803 MO_TE == MO_LE 1804 ? helper_le_ldq_mmu : helper_be_ldq_mmu); 1805 } 1806 1807 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, 1808 uintptr_t retaddr) 1809 { 1810 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1811 } 1812 1813 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1814 { 1815 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1816 } 1817 1818 uint32_t cpu_lduw_data_ra(CPUArchState *env, target_ulong ptr, 1819 uintptr_t retaddr) 1820 { 1821 return cpu_lduw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1822 } 1823 1824 int cpu_ldsw_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1825 { 1826 return cpu_ldsw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1827 } 1828 1829 uint32_t cpu_ldl_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1830 { 1831 return cpu_ldl_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1832 } 1833 1834 uint64_t cpu_ldq_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 1835 { 1836 return cpu_ldq_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 1837 } 1838 1839 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) 1840 { 1841 return cpu_ldub_data_ra(env, ptr, 0); 1842 } 1843 1844 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) 1845 { 1846 return cpu_ldsb_data_ra(env, ptr, 0); 1847 } 1848 1849 uint32_t cpu_lduw_data(CPUArchState *env, target_ulong ptr) 1850 { 1851 return cpu_lduw_data_ra(env, ptr, 0); 1852 } 1853 1854 int cpu_ldsw_data(CPUArchState *env, target_ulong ptr) 1855 { 1856 return cpu_ldsw_data_ra(env, ptr, 0); 1857 } 1858 1859 uint32_t cpu_ldl_data(CPUArchState *env, target_ulong ptr) 1860 { 1861 return cpu_ldl_data_ra(env, ptr, 0); 1862 } 1863 1864 uint64_t cpu_ldq_data(CPUArchState *env, target_ulong ptr) 1865 { 1866 return cpu_ldq_data_ra(env, ptr, 0); 1867 } 1868 1869 /* 1870 * Store Helpers 1871 */ 1872 1873 static inline void QEMU_ALWAYS_INLINE 1874 store_memop(void *haddr, uint64_t val, MemOp op) 1875 { 1876 switch (op) { 1877 case MO_UB: 1878 stb_p(haddr, val); 1879 break; 1880 case MO_BEUW: 1881 stw_be_p(haddr, val); 1882 break; 1883 case MO_LEUW: 1884 stw_le_p(haddr, val); 1885 break; 1886 case MO_BEUL: 1887 stl_be_p(haddr, val); 1888 break; 1889 case MO_LEUL: 1890 stl_le_p(haddr, val); 1891 break; 1892 case MO_BEQ: 1893 stq_be_p(haddr, val); 1894 break; 1895 case MO_LEQ: 1896 stq_le_p(haddr, val); 1897 break; 1898 default: 1899 qemu_build_not_reached(); 1900 } 1901 } 1902 1903 static inline void QEMU_ALWAYS_INLINE 1904 store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 1905 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) 1906 { 1907 uintptr_t mmu_idx = get_mmuidx(oi); 1908 uintptr_t index = tlb_index(env, mmu_idx, addr); 1909 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1910 target_ulong tlb_addr = tlb_addr_write(entry); 1911 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 1912 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1913 void *haddr; 1914 size_t size = memop_size(op); 1915 1916 /* Handle CPU specific unaligned behaviour */ 1917 if (addr & ((1 << a_bits) - 1)) { 1918 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1919 mmu_idx, retaddr); 1920 } 1921 1922 /* If the TLB entry is for a different page, reload and try again. */ 1923 if (!tlb_hit(tlb_addr, addr)) { 1924 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1925 addr & TARGET_PAGE_MASK)) { 1926 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, 1927 mmu_idx, retaddr); 1928 index = tlb_index(env, mmu_idx, addr); 1929 entry = tlb_entry(env, mmu_idx, addr); 1930 } 1931 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; 1932 } 1933 1934 /* Handle anything that isn't just a straight memory access. */ 1935 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1936 CPUIOTLBEntry *iotlbentry; 1937 bool need_swap; 1938 1939 /* For anything that is unaligned, recurse through byte stores. */ 1940 if ((addr & (size - 1)) != 0) { 1941 goto do_unaligned_access; 1942 } 1943 1944 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1945 1946 /* Handle watchpoints. */ 1947 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1948 /* On watchpoint hit, this will longjmp out. */ 1949 cpu_check_watchpoint(env_cpu(env), addr, size, 1950 iotlbentry->attrs, BP_MEM_WRITE, retaddr); 1951 } 1952 1953 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1954 1955 /* Handle I/O access. */ 1956 if (tlb_addr & TLB_MMIO) { 1957 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, 1958 op ^ (need_swap * MO_BSWAP)); 1959 return; 1960 } 1961 1962 /* Ignore writes to ROM. */ 1963 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { 1964 return; 1965 } 1966 1967 /* Handle clean RAM pages. */ 1968 if (tlb_addr & TLB_NOTDIRTY) { 1969 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1970 } 1971 1972 haddr = (void *)((uintptr_t)addr + entry->addend); 1973 1974 /* 1975 * Keep these two store_memop separate to ensure that the compiler 1976 * is able to fold the entire function to a single instruction. 1977 * There is a build-time assert inside to remind you of this. ;-) 1978 */ 1979 if (unlikely(need_swap)) { 1980 store_memop(haddr, val, op ^ MO_BSWAP); 1981 } else { 1982 store_memop(haddr, val, op); 1983 } 1984 return; 1985 } 1986 1987 /* Handle slow unaligned access (it spans two pages or IO). */ 1988 if (size > 1 1989 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1990 >= TARGET_PAGE_SIZE)) { 1991 int i; 1992 uintptr_t index2; 1993 CPUTLBEntry *entry2; 1994 target_ulong page2, tlb_addr2; 1995 size_t size2; 1996 1997 do_unaligned_access: 1998 /* 1999 * Ensure the second page is in the TLB. Note that the first page 2000 * is already guaranteed to be filled, and that the second page 2001 * cannot evict the first. 2002 */ 2003 page2 = (addr + size) & TARGET_PAGE_MASK; 2004 size2 = (addr + size) & ~TARGET_PAGE_MASK; 2005 index2 = tlb_index(env, mmu_idx, page2); 2006 entry2 = tlb_entry(env, mmu_idx, page2); 2007 tlb_addr2 = tlb_addr_write(entry2); 2008 if (!tlb_hit_page(tlb_addr2, page2)) { 2009 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { 2010 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, 2011 mmu_idx, retaddr); 2012 index2 = tlb_index(env, mmu_idx, page2); 2013 entry2 = tlb_entry(env, mmu_idx, page2); 2014 } 2015 tlb_addr2 = tlb_addr_write(entry2); 2016 } 2017 2018 /* 2019 * Handle watchpoints. Since this may trap, all checks 2020 * must happen before any store. 2021 */ 2022 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2023 cpu_check_watchpoint(env_cpu(env), addr, size - size2, 2024 env_tlb(env)->d[mmu_idx].iotlb[index].attrs, 2025 BP_MEM_WRITE, retaddr); 2026 } 2027 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { 2028 cpu_check_watchpoint(env_cpu(env), page2, size2, 2029 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, 2030 BP_MEM_WRITE, retaddr); 2031 } 2032 2033 /* 2034 * XXX: not efficient, but simple. 2035 * This loop must go in the forward direction to avoid issues 2036 * with self-modifying code in Windows 64-bit. 2037 */ 2038 for (i = 0; i < size; ++i) { 2039 uint8_t val8; 2040 if (memop_big_endian(op)) { 2041 /* Big-endian extract. */ 2042 val8 = val >> (((size - 1) * 8) - (i * 8)); 2043 } else { 2044 /* Little-endian extract. */ 2045 val8 = val >> (i * 8); 2046 } 2047 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2048 } 2049 return; 2050 } 2051 2052 haddr = (void *)((uintptr_t)addr + entry->addend); 2053 store_memop(haddr, val, op); 2054 } 2055 2056 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, 2057 TCGMemOpIdx oi, uintptr_t retaddr) 2058 { 2059 store_helper(env, addr, val, oi, retaddr, MO_UB); 2060 } 2061 2062 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2063 TCGMemOpIdx oi, uintptr_t retaddr) 2064 { 2065 store_helper(env, addr, val, oi, retaddr, MO_LEUW); 2066 } 2067 2068 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2069 TCGMemOpIdx oi, uintptr_t retaddr) 2070 { 2071 store_helper(env, addr, val, oi, retaddr, MO_BEUW); 2072 } 2073 2074 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2075 TCGMemOpIdx oi, uintptr_t retaddr) 2076 { 2077 store_helper(env, addr, val, oi, retaddr, MO_LEUL); 2078 } 2079 2080 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2081 TCGMemOpIdx oi, uintptr_t retaddr) 2082 { 2083 store_helper(env, addr, val, oi, retaddr, MO_BEUL); 2084 } 2085 2086 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2087 TCGMemOpIdx oi, uintptr_t retaddr) 2088 { 2089 store_helper(env, addr, val, oi, retaddr, MO_LEQ); 2090 } 2091 2092 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2093 TCGMemOpIdx oi, uintptr_t retaddr) 2094 { 2095 store_helper(env, addr, val, oi, retaddr, MO_BEQ); 2096 } 2097 2098 /* 2099 * Store Helpers for cpu_ldst.h 2100 */ 2101 2102 static inline void QEMU_ALWAYS_INLINE 2103 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2104 int mmu_idx, uintptr_t retaddr, MemOp op) 2105 { 2106 TCGMemOpIdx oi; 2107 uint16_t meminfo; 2108 2109 meminfo = trace_mem_get_info(op, mmu_idx, true); 2110 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2111 2112 oi = make_memop_idx(op, mmu_idx); 2113 store_helper(env, addr, val, oi, retaddr, op); 2114 2115 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2116 } 2117 2118 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2119 int mmu_idx, uintptr_t retaddr) 2120 { 2121 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); 2122 } 2123 2124 void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2125 int mmu_idx, uintptr_t retaddr) 2126 { 2127 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW); 2128 } 2129 2130 void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2131 int mmu_idx, uintptr_t retaddr) 2132 { 2133 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL); 2134 } 2135 2136 void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2137 int mmu_idx, uintptr_t retaddr) 2138 { 2139 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ); 2140 } 2141 2142 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, 2143 uint32_t val, uintptr_t retaddr) 2144 { 2145 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2146 } 2147 2148 void cpu_stw_data_ra(CPUArchState *env, target_ulong ptr, 2149 uint32_t val, uintptr_t retaddr) 2150 { 2151 cpu_stw_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2152 } 2153 2154 void cpu_stl_data_ra(CPUArchState *env, target_ulong ptr, 2155 uint32_t val, uintptr_t retaddr) 2156 { 2157 cpu_stl_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2158 } 2159 2160 void cpu_stq_data_ra(CPUArchState *env, target_ulong ptr, 2161 uint64_t val, uintptr_t retaddr) 2162 { 2163 cpu_stq_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2164 } 2165 2166 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2167 { 2168 cpu_stb_data_ra(env, ptr, val, 0); 2169 } 2170 2171 void cpu_stw_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2172 { 2173 cpu_stw_data_ra(env, ptr, val, 0); 2174 } 2175 2176 void cpu_stl_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2177 { 2178 cpu_stl_data_ra(env, ptr, val, 0); 2179 } 2180 2181 void cpu_stq_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2182 { 2183 cpu_stq_data_ra(env, ptr, val, 0); 2184 } 2185 2186 /* First set of helpers allows passing in of OI and RETADDR. This makes 2187 them callable from other helpers. */ 2188 2189 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 2190 #define ATOMIC_NAME(X) \ 2191 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 2192 #define ATOMIC_MMU_DECLS 2193 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) 2194 #define ATOMIC_MMU_CLEANUP 2195 #define ATOMIC_MMU_IDX get_mmuidx(oi) 2196 2197 #include "atomic_common.inc.c" 2198 2199 #define DATA_SIZE 1 2200 #include "atomic_template.h" 2201 2202 #define DATA_SIZE 2 2203 #include "atomic_template.h" 2204 2205 #define DATA_SIZE 4 2206 #include "atomic_template.h" 2207 2208 #ifdef CONFIG_ATOMIC64 2209 #define DATA_SIZE 8 2210 #include "atomic_template.h" 2211 #endif 2212 2213 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 2214 #define DATA_SIZE 16 2215 #include "atomic_template.h" 2216 #endif 2217 2218 /* Second set of helpers are directly callable from TCG as helpers. */ 2219 2220 #undef EXTRA_ARGS 2221 #undef ATOMIC_NAME 2222 #undef ATOMIC_MMU_LOOKUP 2223 #define EXTRA_ARGS , TCGMemOpIdx oi 2224 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 2225 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) 2226 2227 #define DATA_SIZE 1 2228 #include "atomic_template.h" 2229 2230 #define DATA_SIZE 2 2231 #include "atomic_template.h" 2232 2233 #define DATA_SIZE 4 2234 #include "atomic_template.h" 2235 2236 #ifdef CONFIG_ATOMIC64 2237 #define DATA_SIZE 8 2238 #include "atomic_template.h" 2239 #endif 2240 #undef ATOMIC_MMU_IDX 2241 2242 /* Code access functions. */ 2243 2244 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, 2245 TCGMemOpIdx oi, uintptr_t retaddr) 2246 { 2247 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); 2248 } 2249 2250 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) 2251 { 2252 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); 2253 return full_ldub_code(env, addr, oi, 0); 2254 } 2255 2256 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, 2257 TCGMemOpIdx oi, uintptr_t retaddr) 2258 { 2259 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); 2260 } 2261 2262 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) 2263 { 2264 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); 2265 return full_lduw_code(env, addr, oi, 0); 2266 } 2267 2268 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, 2269 TCGMemOpIdx oi, uintptr_t retaddr) 2270 { 2271 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); 2272 } 2273 2274 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) 2275 { 2276 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); 2277 return full_ldl_code(env, addr, oi, 0); 2278 } 2279 2280 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, 2281 TCGMemOpIdx oi, uintptr_t retaddr) 2282 { 2283 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code); 2284 } 2285 2286 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) 2287 { 2288 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true)); 2289 return full_ldq_code(env, addr, oi, 0); 2290 } 2291