1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/address-spaces.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/cputlb.h" 28 #include "exec/memory-internal.h" 29 #include "exec/ram_addr.h" 30 #include "tcg/tcg.h" 31 #include "qemu/error-report.h" 32 #include "exec/log.h" 33 #include "exec/helper-proto.h" 34 #include "qemu/atomic.h" 35 #include "qemu/atomic128.h" 36 #include "exec/translate-all.h" 37 #include "trace/trace-root.h" 38 #include "trace/mem.h" 39 #ifdef CONFIG_PLUGIN 40 #include "qemu/plugin-memory.h" 41 #endif 42 43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 44 /* #define DEBUG_TLB */ 45 /* #define DEBUG_TLB_LOG */ 46 47 #ifdef DEBUG_TLB 48 # define DEBUG_TLB_GATE 1 49 # ifdef DEBUG_TLB_LOG 50 # define DEBUG_TLB_LOG_GATE 1 51 # else 52 # define DEBUG_TLB_LOG_GATE 0 53 # endif 54 #else 55 # define DEBUG_TLB_GATE 0 56 # define DEBUG_TLB_LOG_GATE 0 57 #endif 58 59 #define tlb_debug(fmt, ...) do { \ 60 if (DEBUG_TLB_LOG_GATE) { \ 61 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 62 ## __VA_ARGS__); \ 63 } else if (DEBUG_TLB_GATE) { \ 64 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 65 } \ 66 } while (0) 67 68 #define assert_cpu_is_self(cpu) do { \ 69 if (DEBUG_TLB_GATE) { \ 70 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 71 } \ 72 } while (0) 73 74 /* run_on_cpu_data.target_ptr should always be big enough for a 75 * target_ulong even on 32 bit builds */ 76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 77 78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 79 */ 80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 82 83 static inline size_t tlb_n_entries(CPUTLBDescFast *fast) 84 { 85 return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1; 86 } 87 88 static inline size_t sizeof_tlb(CPUTLBDescFast *fast) 89 { 90 return fast->mask + (1 << CPU_TLB_ENTRY_BITS); 91 } 92 93 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, 94 size_t max_entries) 95 { 96 desc->window_begin_ns = ns; 97 desc->window_max_entries = max_entries; 98 } 99 100 /** 101 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 102 * @desc: The CPUTLBDesc portion of the TLB 103 * @fast: The CPUTLBDescFast portion of the same TLB 104 * 105 * Called with tlb_lock_held. 106 * 107 * We have two main constraints when resizing a TLB: (1) we only resize it 108 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 109 * the array or unnecessarily flushing it), which means we do not control how 110 * frequently the resizing can occur; (2) we don't have access to the guest's 111 * future scheduling decisions, and therefore have to decide the magnitude of 112 * the resize based on past observations. 113 * 114 * In general, a memory-hungry process can benefit greatly from an appropriately 115 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 116 * we just have to make the TLB as large as possible; while an oversized TLB 117 * results in minimal TLB miss rates, it also takes longer to be flushed 118 * (flushes can be _very_ frequent), and the reduced locality can also hurt 119 * performance. 120 * 121 * To achieve near-optimal performance for all kinds of workloads, we: 122 * 123 * 1. Aggressively increase the size of the TLB when the use rate of the 124 * TLB being flushed is high, since it is likely that in the near future this 125 * memory-hungry process will execute again, and its memory hungriness will 126 * probably be similar. 127 * 128 * 2. Slowly reduce the size of the TLB as the use rate declines over a 129 * reasonably large time window. The rationale is that if in such a time window 130 * we have not observed a high TLB use rate, it is likely that we won't observe 131 * it in the near future. In that case, once a time window expires we downsize 132 * the TLB to match the maximum use rate observed in the window. 133 * 134 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 135 * since in that range performance is likely near-optimal. Recall that the TLB 136 * is direct mapped, so we want the use rate to be low (or at least not too 137 * high), since otherwise we are likely to have a significant amount of 138 * conflict misses. 139 */ 140 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast, 141 int64_t now) 142 { 143 size_t old_size = tlb_n_entries(fast); 144 size_t rate; 145 size_t new_size = old_size; 146 int64_t window_len_ms = 100; 147 int64_t window_len_ns = window_len_ms * 1000 * 1000; 148 bool window_expired = now > desc->window_begin_ns + window_len_ns; 149 150 if (desc->n_used_entries > desc->window_max_entries) { 151 desc->window_max_entries = desc->n_used_entries; 152 } 153 rate = desc->window_max_entries * 100 / old_size; 154 155 if (rate > 70) { 156 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 157 } else if (rate < 30 && window_expired) { 158 size_t ceil = pow2ceil(desc->window_max_entries); 159 size_t expected_rate = desc->window_max_entries * 100 / ceil; 160 161 /* 162 * Avoid undersizing when the max number of entries seen is just below 163 * a pow2. For instance, if max_entries == 1025, the expected use rate 164 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 165 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 166 * later. Thus, make sure that the expected use rate remains below 70%. 167 * (and since we double the size, that means the lowest rate we'd 168 * expect to get is 35%, which is still in the 30-70% range where 169 * we consider that the size is appropriate.) 170 */ 171 if (expected_rate > 70) { 172 ceil *= 2; 173 } 174 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 175 } 176 177 if (new_size == old_size) { 178 if (window_expired) { 179 tlb_window_reset(desc, now, desc->n_used_entries); 180 } 181 return; 182 } 183 184 g_free(fast->table); 185 g_free(desc->iotlb); 186 187 tlb_window_reset(desc, now, 0); 188 /* desc->n_used_entries is cleared by the caller */ 189 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 190 fast->table = g_try_new(CPUTLBEntry, new_size); 191 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 192 193 /* 194 * If the allocations fail, try smaller sizes. We just freed some 195 * memory, so going back to half of new_size has a good chance of working. 196 * Increased memory pressure elsewhere in the system might cause the 197 * allocations to fail though, so we progressively reduce the allocation 198 * size, aborting if we cannot even allocate the smallest TLB we support. 199 */ 200 while (fast->table == NULL || desc->iotlb == NULL) { 201 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 202 error_report("%s: %s", __func__, strerror(errno)); 203 abort(); 204 } 205 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 206 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 207 208 g_free(fast->table); 209 g_free(desc->iotlb); 210 fast->table = g_try_new(CPUTLBEntry, new_size); 211 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); 212 } 213 } 214 215 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) 216 { 217 desc->n_used_entries = 0; 218 desc->large_page_addr = -1; 219 desc->large_page_mask = -1; 220 desc->vindex = 0; 221 memset(fast->table, -1, sizeof_tlb(fast)); 222 memset(desc->vtable, -1, sizeof(desc->vtable)); 223 } 224 225 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx, 226 int64_t now) 227 { 228 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; 229 CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx]; 230 231 tlb_mmu_resize_locked(desc, fast, now); 232 tlb_mmu_flush_locked(desc, fast); 233 } 234 235 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now) 236 { 237 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 238 239 tlb_window_reset(desc, now, 0); 240 desc->n_used_entries = 0; 241 fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 242 fast->table = g_new(CPUTLBEntry, n_entries); 243 desc->iotlb = g_new(CPUIOTLBEntry, n_entries); 244 tlb_mmu_flush_locked(desc, fast); 245 } 246 247 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 248 { 249 env_tlb(env)->d[mmu_idx].n_used_entries++; 250 } 251 252 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 253 { 254 env_tlb(env)->d[mmu_idx].n_used_entries--; 255 } 256 257 void tlb_init(CPUState *cpu) 258 { 259 CPUArchState *env = cpu->env_ptr; 260 int64_t now = get_clock_realtime(); 261 int i; 262 263 qemu_spin_init(&env_tlb(env)->c.lock); 264 265 /* All tlbs are initialized flushed. */ 266 env_tlb(env)->c.dirty = 0; 267 268 for (i = 0; i < NB_MMU_MODES; i++) { 269 tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now); 270 } 271 } 272 273 void tlb_destroy(CPUState *cpu) 274 { 275 CPUArchState *env = cpu->env_ptr; 276 int i; 277 278 qemu_spin_destroy(&env_tlb(env)->c.lock); 279 for (i = 0; i < NB_MMU_MODES; i++) { 280 CPUTLBDesc *desc = &env_tlb(env)->d[i]; 281 CPUTLBDescFast *fast = &env_tlb(env)->f[i]; 282 283 g_free(fast->table); 284 g_free(desc->iotlb); 285 } 286 } 287 288 /* flush_all_helper: run fn across all cpus 289 * 290 * If the wait flag is set then the src cpu's helper will be queued as 291 * "safe" work and the loop exited creating a synchronisation point 292 * where all queued work will be finished before execution starts 293 * again. 294 */ 295 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 296 run_on_cpu_data d) 297 { 298 CPUState *cpu; 299 300 CPU_FOREACH(cpu) { 301 if (cpu != src) { 302 async_run_on_cpu(cpu, fn, d); 303 } 304 } 305 } 306 307 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 308 { 309 CPUState *cpu; 310 size_t full = 0, part = 0, elide = 0; 311 312 CPU_FOREACH(cpu) { 313 CPUArchState *env = cpu->env_ptr; 314 315 full += qatomic_read(&env_tlb(env)->c.full_flush_count); 316 part += qatomic_read(&env_tlb(env)->c.part_flush_count); 317 elide += qatomic_read(&env_tlb(env)->c.elide_flush_count); 318 } 319 *pfull = full; 320 *ppart = part; 321 *pelide = elide; 322 } 323 324 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 325 { 326 CPUArchState *env = cpu->env_ptr; 327 uint16_t asked = data.host_int; 328 uint16_t all_dirty, work, to_clean; 329 int64_t now = get_clock_realtime(); 330 331 assert_cpu_is_self(cpu); 332 333 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 334 335 qemu_spin_lock(&env_tlb(env)->c.lock); 336 337 all_dirty = env_tlb(env)->c.dirty; 338 to_clean = asked & all_dirty; 339 all_dirty &= ~to_clean; 340 env_tlb(env)->c.dirty = all_dirty; 341 342 for (work = to_clean; work != 0; work &= work - 1) { 343 int mmu_idx = ctz32(work); 344 tlb_flush_one_mmuidx_locked(env, mmu_idx, now); 345 } 346 347 qemu_spin_unlock(&env_tlb(env)->c.lock); 348 349 cpu_tb_jmp_cache_clear(cpu); 350 351 if (to_clean == ALL_MMUIDX_BITS) { 352 qatomic_set(&env_tlb(env)->c.full_flush_count, 353 env_tlb(env)->c.full_flush_count + 1); 354 } else { 355 qatomic_set(&env_tlb(env)->c.part_flush_count, 356 env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); 357 if (to_clean != asked) { 358 qatomic_set(&env_tlb(env)->c.elide_flush_count, 359 env_tlb(env)->c.elide_flush_count + 360 ctpop16(asked & ~to_clean)); 361 } 362 } 363 } 364 365 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 366 { 367 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 368 369 if (cpu->created && !qemu_cpu_is_self(cpu)) { 370 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 371 RUN_ON_CPU_HOST_INT(idxmap)); 372 } else { 373 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 374 } 375 } 376 377 void tlb_flush(CPUState *cpu) 378 { 379 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 380 } 381 382 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 383 { 384 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 385 386 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 387 388 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 389 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 390 } 391 392 void tlb_flush_all_cpus(CPUState *src_cpu) 393 { 394 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 395 } 396 397 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 398 { 399 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 400 401 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 402 403 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 404 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 405 } 406 407 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 408 { 409 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 410 } 411 412 static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry, 413 target_ulong page, target_ulong mask) 414 { 415 page &= mask; 416 mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK; 417 418 return (page == (tlb_entry->addr_read & mask) || 419 page == (tlb_addr_write(tlb_entry) & mask) || 420 page == (tlb_entry->addr_code & mask)); 421 } 422 423 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 424 target_ulong page) 425 { 426 return tlb_hit_page_mask_anyprot(tlb_entry, page, -1); 427 } 428 429 /** 430 * tlb_entry_is_empty - return true if the entry is not in use 431 * @te: pointer to CPUTLBEntry 432 */ 433 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 434 { 435 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 436 } 437 438 /* Called with tlb_c.lock held */ 439 static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry, 440 target_ulong page, 441 target_ulong mask) 442 { 443 if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) { 444 memset(tlb_entry, -1, sizeof(*tlb_entry)); 445 return true; 446 } 447 return false; 448 } 449 450 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 451 target_ulong page) 452 { 453 return tlb_flush_entry_mask_locked(tlb_entry, page, -1); 454 } 455 456 /* Called with tlb_c.lock held */ 457 static void tlb_flush_vtlb_page_mask_locked(CPUArchState *env, int mmu_idx, 458 target_ulong page, 459 target_ulong mask) 460 { 461 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; 462 int k; 463 464 assert_cpu_is_self(env_cpu(env)); 465 for (k = 0; k < CPU_VTLB_SIZE; k++) { 466 if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) { 467 tlb_n_used_entries_dec(env, mmu_idx); 468 } 469 } 470 } 471 472 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 473 target_ulong page) 474 { 475 tlb_flush_vtlb_page_mask_locked(env, mmu_idx, page, -1); 476 } 477 478 static void tlb_flush_page_locked(CPUArchState *env, int midx, 479 target_ulong page) 480 { 481 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; 482 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; 483 484 /* Check if we need to flush due to large pages. */ 485 if ((page & lp_mask) == lp_addr) { 486 tlb_debug("forcing full flush midx %d (" 487 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 488 midx, lp_addr, lp_mask); 489 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); 490 } else { 491 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 492 tlb_n_used_entries_dec(env, midx); 493 } 494 tlb_flush_vtlb_page_locked(env, midx, page); 495 } 496 } 497 498 /** 499 * tlb_flush_page_by_mmuidx_async_0: 500 * @cpu: cpu on which to flush 501 * @addr: page of virtual address to flush 502 * @idxmap: set of mmu_idx to flush 503 * 504 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page 505 * at @addr from the tlbs indicated by @idxmap from @cpu. 506 */ 507 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, 508 target_ulong addr, 509 uint16_t idxmap) 510 { 511 CPUArchState *env = cpu->env_ptr; 512 int mmu_idx; 513 514 assert_cpu_is_self(cpu); 515 516 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); 517 518 qemu_spin_lock(&env_tlb(env)->c.lock); 519 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 520 if ((idxmap >> mmu_idx) & 1) { 521 tlb_flush_page_locked(env, mmu_idx, addr); 522 } 523 } 524 qemu_spin_unlock(&env_tlb(env)->c.lock); 525 526 tb_flush_jmp_cache(cpu, addr); 527 } 528 529 /** 530 * tlb_flush_page_by_mmuidx_async_1: 531 * @cpu: cpu on which to flush 532 * @data: encoded addr + idxmap 533 * 534 * Helper for tlb_flush_page_by_mmuidx and friends, called through 535 * async_run_on_cpu. The idxmap parameter is encoded in the page 536 * offset of the target_ptr field. This limits the set of mmu_idx 537 * that can be passed via this method. 538 */ 539 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, 540 run_on_cpu_data data) 541 { 542 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; 543 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; 544 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; 545 546 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 547 } 548 549 typedef struct { 550 target_ulong addr; 551 uint16_t idxmap; 552 } TLBFlushPageByMMUIdxData; 553 554 /** 555 * tlb_flush_page_by_mmuidx_async_2: 556 * @cpu: cpu on which to flush 557 * @data: allocated addr + idxmap 558 * 559 * Helper for tlb_flush_page_by_mmuidx and friends, called through 560 * async_run_on_cpu. The addr+idxmap parameters are stored in a 561 * TLBFlushPageByMMUIdxData structure that has been allocated 562 * specifically for this helper. Free the structure when done. 563 */ 564 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, 565 run_on_cpu_data data) 566 { 567 TLBFlushPageByMMUIdxData *d = data.host_ptr; 568 569 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap); 570 g_free(d); 571 } 572 573 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 574 { 575 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 576 577 /* This should already be page aligned */ 578 addr &= TARGET_PAGE_MASK; 579 580 if (qemu_cpu_is_self(cpu)) { 581 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); 582 } else if (idxmap < TARGET_PAGE_SIZE) { 583 /* 584 * Most targets have only a few mmu_idx. In the case where 585 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid 586 * allocating memory for this operation. 587 */ 588 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1, 589 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 590 } else { 591 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1); 592 593 /* Otherwise allocate a structure, freed by the worker. */ 594 d->addr = addr; 595 d->idxmap = idxmap; 596 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2, 597 RUN_ON_CPU_HOST_PTR(d)); 598 } 599 } 600 601 void tlb_flush_page(CPUState *cpu, target_ulong addr) 602 { 603 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 604 } 605 606 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 607 uint16_t idxmap) 608 { 609 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 610 611 /* This should already be page aligned */ 612 addr &= TARGET_PAGE_MASK; 613 614 /* 615 * Allocate memory to hold addr+idxmap only when needed. 616 * See tlb_flush_page_by_mmuidx for details. 617 */ 618 if (idxmap < TARGET_PAGE_SIZE) { 619 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 620 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 621 } else { 622 CPUState *dst_cpu; 623 624 /* Allocate a separate data block for each destination cpu. */ 625 CPU_FOREACH(dst_cpu) { 626 if (dst_cpu != src_cpu) { 627 TLBFlushPageByMMUIdxData *d 628 = g_new(TLBFlushPageByMMUIdxData, 1); 629 630 d->addr = addr; 631 d->idxmap = idxmap; 632 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 633 RUN_ON_CPU_HOST_PTR(d)); 634 } 635 } 636 } 637 638 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); 639 } 640 641 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 642 { 643 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 644 } 645 646 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 647 target_ulong addr, 648 uint16_t idxmap) 649 { 650 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 651 652 /* This should already be page aligned */ 653 addr &= TARGET_PAGE_MASK; 654 655 /* 656 * Allocate memory to hold addr+idxmap only when needed. 657 * See tlb_flush_page_by_mmuidx for details. 658 */ 659 if (idxmap < TARGET_PAGE_SIZE) { 660 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, 661 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 662 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1, 663 RUN_ON_CPU_TARGET_PTR(addr | idxmap)); 664 } else { 665 CPUState *dst_cpu; 666 TLBFlushPageByMMUIdxData *d; 667 668 /* Allocate a separate data block for each destination cpu. */ 669 CPU_FOREACH(dst_cpu) { 670 if (dst_cpu != src_cpu) { 671 d = g_new(TLBFlushPageByMMUIdxData, 1); 672 d->addr = addr; 673 d->idxmap = idxmap; 674 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, 675 RUN_ON_CPU_HOST_PTR(d)); 676 } 677 } 678 679 d = g_new(TLBFlushPageByMMUIdxData, 1); 680 d->addr = addr; 681 d->idxmap = idxmap; 682 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2, 683 RUN_ON_CPU_HOST_PTR(d)); 684 } 685 } 686 687 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 688 { 689 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 690 } 691 692 static void tlb_flush_page_bits_locked(CPUArchState *env, int midx, 693 target_ulong page, unsigned bits) 694 { 695 CPUTLBDesc *d = &env_tlb(env)->d[midx]; 696 CPUTLBDescFast *f = &env_tlb(env)->f[midx]; 697 target_ulong mask = MAKE_64BIT_MASK(0, bits); 698 699 /* 700 * If @bits is smaller than the tlb size, there may be multiple entries 701 * within the TLB; otherwise all addresses that match under @mask hit 702 * the same TLB entry. 703 * 704 * TODO: Perhaps allow bits to be a few bits less than the size. 705 * For now, just flush the entire TLB. 706 */ 707 if (mask < f->mask) { 708 tlb_debug("forcing full flush midx %d (" 709 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 710 midx, page, mask); 711 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); 712 return; 713 } 714 715 /* Check if we need to flush due to large pages. */ 716 if ((page & d->large_page_mask) == d->large_page_addr) { 717 tlb_debug("forcing full flush midx %d (" 718 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 719 midx, d->large_page_addr, d->large_page_mask); 720 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); 721 return; 722 } 723 724 if (tlb_flush_entry_mask_locked(tlb_entry(env, midx, page), page, mask)) { 725 tlb_n_used_entries_dec(env, midx); 726 } 727 tlb_flush_vtlb_page_mask_locked(env, midx, page, mask); 728 } 729 730 typedef struct { 731 target_ulong addr; 732 uint16_t idxmap; 733 uint16_t bits; 734 } TLBFlushPageBitsByMMUIdxData; 735 736 static void 737 tlb_flush_page_bits_by_mmuidx_async_0(CPUState *cpu, 738 TLBFlushPageBitsByMMUIdxData d) 739 { 740 CPUArchState *env = cpu->env_ptr; 741 int mmu_idx; 742 743 assert_cpu_is_self(cpu); 744 745 tlb_debug("page addr:" TARGET_FMT_lx "/%u mmu_map:0x%x\n", 746 d.addr, d.bits, d.idxmap); 747 748 qemu_spin_lock(&env_tlb(env)->c.lock); 749 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 750 if ((d.idxmap >> mmu_idx) & 1) { 751 tlb_flush_page_bits_locked(env, mmu_idx, d.addr, d.bits); 752 } 753 } 754 qemu_spin_unlock(&env_tlb(env)->c.lock); 755 756 tb_flush_jmp_cache(cpu, d.addr); 757 } 758 759 static bool encode_pbm_to_runon(run_on_cpu_data *out, 760 TLBFlushPageBitsByMMUIdxData d) 761 { 762 /* We need 6 bits to hold to hold @bits up to 63. */ 763 if (d.idxmap <= MAKE_64BIT_MASK(0, TARGET_PAGE_BITS - 6)) { 764 *out = RUN_ON_CPU_TARGET_PTR(d.addr | (d.idxmap << 6) | d.bits); 765 return true; 766 } 767 return false; 768 } 769 770 static TLBFlushPageBitsByMMUIdxData 771 decode_runon_to_pbm(run_on_cpu_data data) 772 { 773 target_ulong addr_map_bits = (target_ulong) data.target_ptr; 774 return (TLBFlushPageBitsByMMUIdxData){ 775 .addr = addr_map_bits & TARGET_PAGE_MASK, 776 .idxmap = (addr_map_bits & ~TARGET_PAGE_MASK) >> 6, 777 .bits = addr_map_bits & 0x3f 778 }; 779 } 780 781 static void tlb_flush_page_bits_by_mmuidx_async_1(CPUState *cpu, 782 run_on_cpu_data runon) 783 { 784 tlb_flush_page_bits_by_mmuidx_async_0(cpu, decode_runon_to_pbm(runon)); 785 } 786 787 static void tlb_flush_page_bits_by_mmuidx_async_2(CPUState *cpu, 788 run_on_cpu_data data) 789 { 790 TLBFlushPageBitsByMMUIdxData *d = data.host_ptr; 791 tlb_flush_page_bits_by_mmuidx_async_0(cpu, *d); 792 g_free(d); 793 } 794 795 void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr, 796 uint16_t idxmap, unsigned bits) 797 { 798 TLBFlushPageBitsByMMUIdxData d; 799 run_on_cpu_data runon; 800 801 /* If all bits are significant, this devolves to tlb_flush_page. */ 802 if (bits >= TARGET_LONG_BITS) { 803 tlb_flush_page_by_mmuidx(cpu, addr, idxmap); 804 return; 805 } 806 /* If no page bits are significant, this devolves to tlb_flush. */ 807 if (bits < TARGET_PAGE_BITS) { 808 tlb_flush_by_mmuidx(cpu, idxmap); 809 return; 810 } 811 812 /* This should already be page aligned */ 813 d.addr = addr & TARGET_PAGE_MASK; 814 d.idxmap = idxmap; 815 d.bits = bits; 816 817 if (qemu_cpu_is_self(cpu)) { 818 tlb_flush_page_bits_by_mmuidx_async_0(cpu, d); 819 } else if (encode_pbm_to_runon(&runon, d)) { 820 async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); 821 } else { 822 TLBFlushPageBitsByMMUIdxData *p 823 = g_new(TLBFlushPageBitsByMMUIdxData, 1); 824 825 /* Otherwise allocate a structure, freed by the worker. */ 826 *p = d; 827 async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_2, 828 RUN_ON_CPU_HOST_PTR(p)); 829 } 830 } 831 832 void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu, 833 target_ulong addr, 834 uint16_t idxmap, 835 unsigned bits) 836 { 837 TLBFlushPageBitsByMMUIdxData d; 838 run_on_cpu_data runon; 839 840 /* If all bits are significant, this devolves to tlb_flush_page. */ 841 if (bits >= TARGET_LONG_BITS) { 842 tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap); 843 return; 844 } 845 /* If no page bits are significant, this devolves to tlb_flush. */ 846 if (bits < TARGET_PAGE_BITS) { 847 tlb_flush_by_mmuidx_all_cpus(src_cpu, idxmap); 848 return; 849 } 850 851 /* This should already be page aligned */ 852 d.addr = addr & TARGET_PAGE_MASK; 853 d.idxmap = idxmap; 854 d.bits = bits; 855 856 if (encode_pbm_to_runon(&runon, d)) { 857 flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); 858 } else { 859 CPUState *dst_cpu; 860 TLBFlushPageBitsByMMUIdxData *p; 861 862 /* Allocate a separate data block for each destination cpu. */ 863 CPU_FOREACH(dst_cpu) { 864 if (dst_cpu != src_cpu) { 865 p = g_new(TLBFlushPageBitsByMMUIdxData, 1); 866 *p = d; 867 async_run_on_cpu(dst_cpu, 868 tlb_flush_page_bits_by_mmuidx_async_2, 869 RUN_ON_CPU_HOST_PTR(p)); 870 } 871 } 872 } 873 874 tlb_flush_page_bits_by_mmuidx_async_0(src_cpu, d); 875 } 876 877 void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 878 target_ulong addr, 879 uint16_t idxmap, 880 unsigned bits) 881 { 882 TLBFlushPageBitsByMMUIdxData d; 883 run_on_cpu_data runon; 884 885 /* If all bits are significant, this devolves to tlb_flush_page. */ 886 if (bits >= TARGET_LONG_BITS) { 887 tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap); 888 return; 889 } 890 /* If no page bits are significant, this devolves to tlb_flush. */ 891 if (bits < TARGET_PAGE_BITS) { 892 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap); 893 return; 894 } 895 896 /* This should already be page aligned */ 897 d.addr = addr & TARGET_PAGE_MASK; 898 d.idxmap = idxmap; 899 d.bits = bits; 900 901 if (encode_pbm_to_runon(&runon, d)) { 902 flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon); 903 async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, 904 runon); 905 } else { 906 CPUState *dst_cpu; 907 TLBFlushPageBitsByMMUIdxData *p; 908 909 /* Allocate a separate data block for each destination cpu. */ 910 CPU_FOREACH(dst_cpu) { 911 if (dst_cpu != src_cpu) { 912 p = g_new(TLBFlushPageBitsByMMUIdxData, 1); 913 *p = d; 914 async_run_on_cpu(dst_cpu, tlb_flush_page_bits_by_mmuidx_async_2, 915 RUN_ON_CPU_HOST_PTR(p)); 916 } 917 } 918 919 p = g_new(TLBFlushPageBitsByMMUIdxData, 1); 920 *p = d; 921 async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_2, 922 RUN_ON_CPU_HOST_PTR(p)); 923 } 924 } 925 926 /* update the TLBs so that writes to code in the virtual page 'addr' 927 can be detected */ 928 void tlb_protect_code(ram_addr_t ram_addr) 929 { 930 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 931 DIRTY_MEMORY_CODE); 932 } 933 934 /* update the TLB so that writes in physical page 'phys_addr' are no longer 935 tested for self modifying code */ 936 void tlb_unprotect_code(ram_addr_t ram_addr) 937 { 938 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 939 } 940 941 942 /* 943 * Dirty write flag handling 944 * 945 * When the TCG code writes to a location it looks up the address in 946 * the TLB and uses that data to compute the final address. If any of 947 * the lower bits of the address are set then the slow path is forced. 948 * There are a number of reasons to do this but for normal RAM the 949 * most usual is detecting writes to code regions which may invalidate 950 * generated code. 951 * 952 * Other vCPUs might be reading their TLBs during guest execution, so we update 953 * te->addr_write with qatomic_set. We don't need to worry about this for 954 * oversized guests as MTTCG is disabled for them. 955 * 956 * Called with tlb_c.lock held. 957 */ 958 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 959 uintptr_t start, uintptr_t length) 960 { 961 uintptr_t addr = tlb_entry->addr_write; 962 963 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | 964 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { 965 addr &= TARGET_PAGE_MASK; 966 addr += tlb_entry->addend; 967 if ((addr - start) < length) { 968 #if TCG_OVERSIZED_GUEST 969 tlb_entry->addr_write |= TLB_NOTDIRTY; 970 #else 971 qatomic_set(&tlb_entry->addr_write, 972 tlb_entry->addr_write | TLB_NOTDIRTY); 973 #endif 974 } 975 } 976 } 977 978 /* 979 * Called with tlb_c.lock held. 980 * Called only from the vCPU context, i.e. the TLB's owner thread. 981 */ 982 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 983 { 984 *d = *s; 985 } 986 987 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 988 * the target vCPU). 989 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 990 * thing actually updated is the target TLB entry ->addr_write flags. 991 */ 992 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 993 { 994 CPUArchState *env; 995 996 int mmu_idx; 997 998 env = cpu->env_ptr; 999 qemu_spin_lock(&env_tlb(env)->c.lock); 1000 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 1001 unsigned int i; 1002 unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); 1003 1004 for (i = 0; i < n; i++) { 1005 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], 1006 start1, length); 1007 } 1008 1009 for (i = 0; i < CPU_VTLB_SIZE; i++) { 1010 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], 1011 start1, length); 1012 } 1013 } 1014 qemu_spin_unlock(&env_tlb(env)->c.lock); 1015 } 1016 1017 /* Called with tlb_c.lock held */ 1018 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 1019 target_ulong vaddr) 1020 { 1021 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 1022 tlb_entry->addr_write = vaddr; 1023 } 1024 } 1025 1026 /* update the TLB corresponding to virtual page vaddr 1027 so that it is no longer dirty */ 1028 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 1029 { 1030 CPUArchState *env = cpu->env_ptr; 1031 int mmu_idx; 1032 1033 assert_cpu_is_self(cpu); 1034 1035 vaddr &= TARGET_PAGE_MASK; 1036 qemu_spin_lock(&env_tlb(env)->c.lock); 1037 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 1038 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 1039 } 1040 1041 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 1042 int k; 1043 for (k = 0; k < CPU_VTLB_SIZE; k++) { 1044 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); 1045 } 1046 } 1047 qemu_spin_unlock(&env_tlb(env)->c.lock); 1048 } 1049 1050 /* Our TLB does not support large pages, so remember the area covered by 1051 large pages and trigger a full TLB flush if these are invalidated. */ 1052 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 1053 target_ulong vaddr, target_ulong size) 1054 { 1055 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; 1056 target_ulong lp_mask = ~(size - 1); 1057 1058 if (lp_addr == (target_ulong)-1) { 1059 /* No previous large page. */ 1060 lp_addr = vaddr; 1061 } else { 1062 /* Extend the existing region to include the new page. 1063 This is a compromise between unnecessary flushes and 1064 the cost of maintaining a full variable size TLB. */ 1065 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; 1066 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 1067 lp_mask <<= 1; 1068 } 1069 } 1070 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; 1071 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; 1072 } 1073 1074 /* Add a new TLB entry. At most one entry for a given virtual address 1075 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 1076 * supplied size is only used by tlb_flush_page. 1077 * 1078 * Called from TCG-generated code, which is under an RCU read-side 1079 * critical section. 1080 */ 1081 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 1082 hwaddr paddr, MemTxAttrs attrs, int prot, 1083 int mmu_idx, target_ulong size) 1084 { 1085 CPUArchState *env = cpu->env_ptr; 1086 CPUTLB *tlb = env_tlb(env); 1087 CPUTLBDesc *desc = &tlb->d[mmu_idx]; 1088 MemoryRegionSection *section; 1089 unsigned int index; 1090 target_ulong address; 1091 target_ulong write_address; 1092 uintptr_t addend; 1093 CPUTLBEntry *te, tn; 1094 hwaddr iotlb, xlat, sz, paddr_page; 1095 target_ulong vaddr_page; 1096 int asidx = cpu_asidx_from_attrs(cpu, attrs); 1097 int wp_flags; 1098 bool is_ram, is_romd; 1099 1100 assert_cpu_is_self(cpu); 1101 1102 if (size <= TARGET_PAGE_SIZE) { 1103 sz = TARGET_PAGE_SIZE; 1104 } else { 1105 tlb_add_large_page(env, mmu_idx, vaddr, size); 1106 sz = size; 1107 } 1108 vaddr_page = vaddr & TARGET_PAGE_MASK; 1109 paddr_page = paddr & TARGET_PAGE_MASK; 1110 1111 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 1112 &xlat, &sz, attrs, &prot); 1113 assert(sz >= TARGET_PAGE_SIZE); 1114 1115 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 1116 " prot=%x idx=%d\n", 1117 vaddr, paddr, prot, mmu_idx); 1118 1119 address = vaddr_page; 1120 if (size < TARGET_PAGE_SIZE) { 1121 /* Repeat the MMU check and TLB fill on every access. */ 1122 address |= TLB_INVALID_MASK; 1123 } 1124 if (attrs.byte_swap) { 1125 address |= TLB_BSWAP; 1126 } 1127 1128 is_ram = memory_region_is_ram(section->mr); 1129 is_romd = memory_region_is_romd(section->mr); 1130 1131 if (is_ram || is_romd) { 1132 /* RAM and ROMD both have associated host memory. */ 1133 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 1134 } else { 1135 /* I/O does not; force the host address to NULL. */ 1136 addend = 0; 1137 } 1138 1139 write_address = address; 1140 if (is_ram) { 1141 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 1142 /* 1143 * Computing is_clean is expensive; avoid all that unless 1144 * the page is actually writable. 1145 */ 1146 if (prot & PAGE_WRITE) { 1147 if (section->readonly) { 1148 write_address |= TLB_DISCARD_WRITE; 1149 } else if (cpu_physical_memory_is_clean(iotlb)) { 1150 write_address |= TLB_NOTDIRTY; 1151 } 1152 } 1153 } else { 1154 /* I/O or ROMD */ 1155 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; 1156 /* 1157 * Writes to romd devices must go through MMIO to enable write. 1158 * Reads to romd devices go through the ram_ptr found above, 1159 * but of course reads to I/O must go through MMIO. 1160 */ 1161 write_address |= TLB_MMIO; 1162 if (!is_romd) { 1163 address = write_address; 1164 } 1165 } 1166 1167 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, 1168 TARGET_PAGE_SIZE); 1169 1170 index = tlb_index(env, mmu_idx, vaddr_page); 1171 te = tlb_entry(env, mmu_idx, vaddr_page); 1172 1173 /* 1174 * Hold the TLB lock for the rest of the function. We could acquire/release 1175 * the lock several times in the function, but it is faster to amortize the 1176 * acquisition cost by acquiring it just once. Note that this leads to 1177 * a longer critical section, but this is not a concern since the TLB lock 1178 * is unlikely to be contended. 1179 */ 1180 qemu_spin_lock(&tlb->c.lock); 1181 1182 /* Note that the tlb is no longer clean. */ 1183 tlb->c.dirty |= 1 << mmu_idx; 1184 1185 /* Make sure there's no cached translation for the new page. */ 1186 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 1187 1188 /* 1189 * Only evict the old entry to the victim tlb if it's for a 1190 * different page; otherwise just overwrite the stale data. 1191 */ 1192 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 1193 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; 1194 CPUTLBEntry *tv = &desc->vtable[vidx]; 1195 1196 /* Evict the old entry into the victim tlb. */ 1197 copy_tlb_helper_locked(tv, te); 1198 desc->viotlb[vidx] = desc->iotlb[index]; 1199 tlb_n_used_entries_dec(env, mmu_idx); 1200 } 1201 1202 /* refill the tlb */ 1203 /* 1204 * At this point iotlb contains a physical section number in the lower 1205 * TARGET_PAGE_BITS, and either 1206 * + the ram_addr_t of the page base of the target RAM (RAM) 1207 * + the offset within section->mr of the page base (I/O, ROMD) 1208 * We subtract the vaddr_page (which is page aligned and thus won't 1209 * disturb the low bits) to give an offset which can be added to the 1210 * (non-page-aligned) vaddr of the eventual memory access to get 1211 * the MemoryRegion offset for the access. Note that the vaddr we 1212 * subtract here is that of the page base, and not the same as the 1213 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 1214 */ 1215 desc->iotlb[index].addr = iotlb - vaddr_page; 1216 desc->iotlb[index].attrs = attrs; 1217 1218 /* Now calculate the new entry */ 1219 tn.addend = addend - vaddr_page; 1220 if (prot & PAGE_READ) { 1221 tn.addr_read = address; 1222 if (wp_flags & BP_MEM_READ) { 1223 tn.addr_read |= TLB_WATCHPOINT; 1224 } 1225 } else { 1226 tn.addr_read = -1; 1227 } 1228 1229 if (prot & PAGE_EXEC) { 1230 tn.addr_code = address; 1231 } else { 1232 tn.addr_code = -1; 1233 } 1234 1235 tn.addr_write = -1; 1236 if (prot & PAGE_WRITE) { 1237 tn.addr_write = write_address; 1238 if (prot & PAGE_WRITE_INV) { 1239 tn.addr_write |= TLB_INVALID_MASK; 1240 } 1241 if (wp_flags & BP_MEM_WRITE) { 1242 tn.addr_write |= TLB_WATCHPOINT; 1243 } 1244 } 1245 1246 copy_tlb_helper_locked(te, &tn); 1247 tlb_n_used_entries_inc(env, mmu_idx); 1248 qemu_spin_unlock(&tlb->c.lock); 1249 } 1250 1251 /* Add a new TLB entry, but without specifying the memory 1252 * transaction attributes to be used. 1253 */ 1254 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 1255 hwaddr paddr, int prot, 1256 int mmu_idx, target_ulong size) 1257 { 1258 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 1259 prot, mmu_idx, size); 1260 } 1261 1262 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 1263 { 1264 ram_addr_t ram_addr; 1265 1266 ram_addr = qemu_ram_addr_from_host(ptr); 1267 if (ram_addr == RAM_ADDR_INVALID) { 1268 error_report("Bad ram pointer %p", ptr); 1269 abort(); 1270 } 1271 return ram_addr; 1272 } 1273 1274 /* 1275 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the 1276 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must 1277 * be discarded and looked up again (e.g. via tlb_entry()). 1278 */ 1279 static void tlb_fill(CPUState *cpu, target_ulong addr, int size, 1280 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1281 { 1282 CPUClass *cc = CPU_GET_CLASS(cpu); 1283 bool ok; 1284 1285 /* 1286 * This is not a probe, so only valid return is success; failure 1287 * should result in exception + longjmp to the cpu loop. 1288 */ 1289 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr); 1290 assert(ok); 1291 } 1292 1293 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1294 int mmu_idx, target_ulong addr, uintptr_t retaddr, 1295 MMUAccessType access_type, MemOp op) 1296 { 1297 CPUState *cpu = env_cpu(env); 1298 hwaddr mr_offset; 1299 MemoryRegionSection *section; 1300 MemoryRegion *mr; 1301 uint64_t val; 1302 bool locked = false; 1303 MemTxResult r; 1304 1305 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1306 mr = section->mr; 1307 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1308 cpu->mem_io_pc = retaddr; 1309 if (!cpu->can_do_io) { 1310 cpu_io_recompile(cpu, retaddr); 1311 } 1312 1313 if (!qemu_mutex_iothread_locked()) { 1314 qemu_mutex_lock_iothread(); 1315 locked = true; 1316 } 1317 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); 1318 if (r != MEMTX_OK) { 1319 hwaddr physaddr = mr_offset + 1320 section->offset_within_address_space - 1321 section->offset_within_region; 1322 1323 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, 1324 mmu_idx, iotlbentry->attrs, r, retaddr); 1325 } 1326 if (locked) { 1327 qemu_mutex_unlock_iothread(); 1328 } 1329 1330 return val; 1331 } 1332 1333 /* 1334 * Save a potentially trashed IOTLB entry for later lookup by plugin. 1335 * This is read by tlb_plugin_lookup if the iotlb entry doesn't match 1336 * because of the side effect of io_writex changing memory layout. 1337 */ 1338 static void save_iotlb_data(CPUState *cs, hwaddr addr, 1339 MemoryRegionSection *section, hwaddr mr_offset) 1340 { 1341 #ifdef CONFIG_PLUGIN 1342 SavedIOTLB *saved = &cs->saved_iotlb; 1343 saved->addr = addr; 1344 saved->section = section; 1345 saved->mr_offset = mr_offset; 1346 #endif 1347 } 1348 1349 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 1350 int mmu_idx, uint64_t val, target_ulong addr, 1351 uintptr_t retaddr, MemOp op) 1352 { 1353 CPUState *cpu = env_cpu(env); 1354 hwaddr mr_offset; 1355 MemoryRegionSection *section; 1356 MemoryRegion *mr; 1357 bool locked = false; 1358 MemTxResult r; 1359 1360 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1361 mr = section->mr; 1362 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1363 if (!cpu->can_do_io) { 1364 cpu_io_recompile(cpu, retaddr); 1365 } 1366 cpu->mem_io_pc = retaddr; 1367 1368 /* 1369 * The memory_region_dispatch may trigger a flush/resize 1370 * so for plugins we save the iotlb_data just in case. 1371 */ 1372 save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset); 1373 1374 if (!qemu_mutex_iothread_locked()) { 1375 qemu_mutex_lock_iothread(); 1376 locked = true; 1377 } 1378 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); 1379 if (r != MEMTX_OK) { 1380 hwaddr physaddr = mr_offset + 1381 section->offset_within_address_space - 1382 section->offset_within_region; 1383 1384 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), 1385 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, 1386 retaddr); 1387 } 1388 if (locked) { 1389 qemu_mutex_unlock_iothread(); 1390 } 1391 } 1392 1393 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) 1394 { 1395 #if TCG_OVERSIZED_GUEST 1396 return *(target_ulong *)((uintptr_t)entry + ofs); 1397 #else 1398 /* ofs might correspond to .addr_write, so use qatomic_read */ 1399 return qatomic_read((target_ulong *)((uintptr_t)entry + ofs)); 1400 #endif 1401 } 1402 1403 /* Return true if ADDR is present in the victim tlb, and has been copied 1404 back to the main tlb. */ 1405 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1406 size_t elt_ofs, target_ulong page) 1407 { 1408 size_t vidx; 1409 1410 assert_cpu_is_self(env_cpu(env)); 1411 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1412 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; 1413 target_ulong cmp; 1414 1415 /* elt_ofs might correspond to .addr_write, so use qatomic_read */ 1416 #if TCG_OVERSIZED_GUEST 1417 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1418 #else 1419 cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1420 #endif 1421 1422 if (cmp == page) { 1423 /* Found entry in victim tlb, swap tlb and iotlb. */ 1424 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index]; 1425 1426 qemu_spin_lock(&env_tlb(env)->c.lock); 1427 copy_tlb_helper_locked(&tmptlb, tlb); 1428 copy_tlb_helper_locked(tlb, vtlb); 1429 copy_tlb_helper_locked(vtlb, &tmptlb); 1430 qemu_spin_unlock(&env_tlb(env)->c.lock); 1431 1432 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1433 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; 1434 tmpio = *io; *io = *vio; *vio = tmpio; 1435 return true; 1436 } 1437 } 1438 return false; 1439 } 1440 1441 /* Macro to call the above, with local variables from the use context. */ 1442 #define VICTIM_TLB_HIT(TY, ADDR) \ 1443 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1444 (ADDR) & TARGET_PAGE_MASK) 1445 1446 /* 1447 * Return a ram_addr_t for the virtual address for execution. 1448 * 1449 * Return -1 if we can't translate and execute from an entire page 1450 * of RAM. This will force us to execute by loading and translating 1451 * one insn at a time, without caching. 1452 * 1453 * NOTE: This function will trigger an exception if the page is 1454 * not executable. 1455 */ 1456 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, 1457 void **hostp) 1458 { 1459 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1460 uintptr_t index = tlb_index(env, mmu_idx, addr); 1461 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1462 void *p; 1463 1464 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1465 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1466 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1467 index = tlb_index(env, mmu_idx, addr); 1468 entry = tlb_entry(env, mmu_idx, addr); 1469 1470 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { 1471 /* 1472 * The MMU protection covers a smaller range than a target 1473 * page, so we must redo the MMU check for every insn. 1474 */ 1475 return -1; 1476 } 1477 } 1478 assert(tlb_hit(entry->addr_code, addr)); 1479 } 1480 1481 if (unlikely(entry->addr_code & TLB_MMIO)) { 1482 /* The region is not backed by RAM. */ 1483 if (hostp) { 1484 *hostp = NULL; 1485 } 1486 return -1; 1487 } 1488 1489 p = (void *)((uintptr_t)addr + entry->addend); 1490 if (hostp) { 1491 *hostp = p; 1492 } 1493 return qemu_ram_addr_from_host_nofail(p); 1494 } 1495 1496 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1497 { 1498 return get_page_addr_code_hostp(env, addr, NULL); 1499 } 1500 1501 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, 1502 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) 1503 { 1504 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; 1505 1506 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); 1507 1508 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 1509 struct page_collection *pages 1510 = page_collection_lock(ram_addr, ram_addr + size); 1511 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); 1512 page_collection_unlock(pages); 1513 } 1514 1515 /* 1516 * Set both VGA and migration bits for simplicity and to remove 1517 * the notdirty callback faster. 1518 */ 1519 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); 1520 1521 /* We remove the notdirty callback only if the code has been flushed. */ 1522 if (!cpu_physical_memory_is_clean(ram_addr)) { 1523 trace_memory_notdirty_set_dirty(mem_vaddr); 1524 tlb_set_dirty(cpu, mem_vaddr); 1525 } 1526 } 1527 1528 static int probe_access_internal(CPUArchState *env, target_ulong addr, 1529 int fault_size, MMUAccessType access_type, 1530 int mmu_idx, bool nonfault, 1531 void **phost, uintptr_t retaddr) 1532 { 1533 uintptr_t index = tlb_index(env, mmu_idx, addr); 1534 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1535 target_ulong tlb_addr, page_addr; 1536 size_t elt_ofs; 1537 int flags; 1538 1539 switch (access_type) { 1540 case MMU_DATA_LOAD: 1541 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1542 break; 1543 case MMU_DATA_STORE: 1544 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1545 break; 1546 case MMU_INST_FETCH: 1547 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1548 break; 1549 default: 1550 g_assert_not_reached(); 1551 } 1552 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1553 1554 page_addr = addr & TARGET_PAGE_MASK; 1555 if (!tlb_hit_page(tlb_addr, page_addr)) { 1556 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) { 1557 CPUState *cs = env_cpu(env); 1558 CPUClass *cc = CPU_GET_CLASS(cs); 1559 1560 if (!cc->tlb_fill(cs, addr, fault_size, access_type, 1561 mmu_idx, nonfault, retaddr)) { 1562 /* Non-faulting page table read failed. */ 1563 *phost = NULL; 1564 return TLB_INVALID_MASK; 1565 } 1566 1567 /* TLB resize via tlb_fill may have moved the entry. */ 1568 entry = tlb_entry(env, mmu_idx, addr); 1569 } 1570 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1571 } 1572 flags = tlb_addr & TLB_FLAGS_MASK; 1573 1574 /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */ 1575 if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) { 1576 *phost = NULL; 1577 return TLB_MMIO; 1578 } 1579 1580 /* Everything else is RAM. */ 1581 *phost = (void *)((uintptr_t)addr + entry->addend); 1582 return flags; 1583 } 1584 1585 int probe_access_flags(CPUArchState *env, target_ulong addr, 1586 MMUAccessType access_type, int mmu_idx, 1587 bool nonfault, void **phost, uintptr_t retaddr) 1588 { 1589 int flags; 1590 1591 flags = probe_access_internal(env, addr, 0, access_type, mmu_idx, 1592 nonfault, phost, retaddr); 1593 1594 /* Handle clean RAM pages. */ 1595 if (unlikely(flags & TLB_NOTDIRTY)) { 1596 uintptr_t index = tlb_index(env, mmu_idx, addr); 1597 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1598 1599 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); 1600 flags &= ~TLB_NOTDIRTY; 1601 } 1602 1603 return flags; 1604 } 1605 1606 void *probe_access(CPUArchState *env, target_ulong addr, int size, 1607 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1608 { 1609 void *host; 1610 int flags; 1611 1612 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 1613 1614 flags = probe_access_internal(env, addr, size, access_type, mmu_idx, 1615 false, &host, retaddr); 1616 1617 /* Per the interface, size == 0 merely faults the access. */ 1618 if (size == 0) { 1619 return NULL; 1620 } 1621 1622 if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) { 1623 uintptr_t index = tlb_index(env, mmu_idx, addr); 1624 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1625 1626 /* Handle watchpoints. */ 1627 if (flags & TLB_WATCHPOINT) { 1628 int wp_access = (access_type == MMU_DATA_STORE 1629 ? BP_MEM_WRITE : BP_MEM_READ); 1630 cpu_check_watchpoint(env_cpu(env), addr, size, 1631 iotlbentry->attrs, wp_access, retaddr); 1632 } 1633 1634 /* Handle clean RAM pages. */ 1635 if (flags & TLB_NOTDIRTY) { 1636 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr); 1637 } 1638 } 1639 1640 return host; 1641 } 1642 1643 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, 1644 MMUAccessType access_type, int mmu_idx) 1645 { 1646 void *host; 1647 int flags; 1648 1649 flags = probe_access_internal(env, addr, 0, access_type, 1650 mmu_idx, true, &host, 0); 1651 1652 /* No combination of flags are expected by the caller. */ 1653 return flags ? NULL : host; 1654 } 1655 1656 #ifdef CONFIG_PLUGIN 1657 /* 1658 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. 1659 * This should be a hot path as we will have just looked this path up 1660 * in the softmmu lookup code (or helper). We don't handle re-fills or 1661 * checking the victim table. This is purely informational. 1662 * 1663 * This almost never fails as the memory access being instrumented 1664 * should have just filled the TLB. The one corner case is io_writex 1665 * which can cause TLB flushes and potential resizing of the TLBs 1666 * losing the information we need. In those cases we need to recover 1667 * data from a copy of the iotlbentry. As long as this always occurs 1668 * from the same thread (which a mem callback will be) this is safe. 1669 */ 1670 1671 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, 1672 bool is_store, struct qemu_plugin_hwaddr *data) 1673 { 1674 CPUArchState *env = cpu->env_ptr; 1675 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1676 uintptr_t index = tlb_index(env, mmu_idx, addr); 1677 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; 1678 1679 if (likely(tlb_hit(tlb_addr, addr))) { 1680 /* We must have an iotlb entry for MMIO */ 1681 if (tlb_addr & TLB_MMIO) { 1682 CPUIOTLBEntry *iotlbentry; 1683 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1684 data->is_io = true; 1685 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1686 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1687 } else { 1688 data->is_io = false; 1689 data->v.ram.hostaddr = addr + tlbe->addend; 1690 } 1691 return true; 1692 } else { 1693 SavedIOTLB *saved = &cpu->saved_iotlb; 1694 data->is_io = true; 1695 data->v.io.section = saved->section; 1696 data->v.io.offset = saved->mr_offset; 1697 return true; 1698 } 1699 } 1700 1701 #endif 1702 1703 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1704 * operations, or io operations to proceed. Return the host address. */ 1705 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1706 TCGMemOpIdx oi, uintptr_t retaddr) 1707 { 1708 size_t mmu_idx = get_mmuidx(oi); 1709 uintptr_t index = tlb_index(env, mmu_idx, addr); 1710 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1711 target_ulong tlb_addr = tlb_addr_write(tlbe); 1712 MemOp mop = get_memop(oi); 1713 int a_bits = get_alignment_bits(mop); 1714 int s_bits = mop & MO_SIZE; 1715 void *hostaddr; 1716 1717 /* Adjust the given return address. */ 1718 retaddr -= GETPC_ADJ; 1719 1720 /* Enforce guest required alignment. */ 1721 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1722 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1723 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1724 mmu_idx, retaddr); 1725 } 1726 1727 /* Enforce qemu required alignment. */ 1728 if (unlikely(addr & ((1 << s_bits) - 1))) { 1729 /* We get here if guest alignment was not requested, 1730 or was not enforced by cpu_unaligned_access above. 1731 We might widen the access and emulate, but for now 1732 mark an exception and exit the cpu loop. */ 1733 goto stop_the_world; 1734 } 1735 1736 /* Check TLB entry and enforce page permissions. */ 1737 if (!tlb_hit(tlb_addr, addr)) { 1738 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1739 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, 1740 mmu_idx, retaddr); 1741 index = tlb_index(env, mmu_idx, addr); 1742 tlbe = tlb_entry(env, mmu_idx, addr); 1743 } 1744 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1745 } 1746 1747 /* Notice an IO access or a needs-MMU-lookup access */ 1748 if (unlikely(tlb_addr & TLB_MMIO)) { 1749 /* There's really nothing that can be done to 1750 support this apart from stop-the-world. */ 1751 goto stop_the_world; 1752 } 1753 1754 /* Let the guest notice RMW on a write-only page. */ 1755 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1756 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1757 mmu_idx, retaddr); 1758 /* Since we don't support reads and writes to different addresses, 1759 and we do have the proper page loaded for write, this shouldn't 1760 ever return. But just in case, handle via stop-the-world. */ 1761 goto stop_the_world; 1762 } 1763 1764 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1765 1766 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1767 notdirty_write(env_cpu(env), addr, 1 << s_bits, 1768 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); 1769 } 1770 1771 return hostaddr; 1772 1773 stop_the_world: 1774 cpu_loop_exit_atomic(env_cpu(env), retaddr); 1775 } 1776 1777 /* 1778 * Load Helpers 1779 * 1780 * We support two different access types. SOFTMMU_CODE_ACCESS is 1781 * specifically for reading instructions from system memory. It is 1782 * called by the translation loop and in some helpers where the code 1783 * is disassembled. It shouldn't be called directly by guest code. 1784 */ 1785 1786 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, 1787 TCGMemOpIdx oi, uintptr_t retaddr); 1788 1789 static inline uint64_t QEMU_ALWAYS_INLINE 1790 load_memop(const void *haddr, MemOp op) 1791 { 1792 switch (op) { 1793 case MO_UB: 1794 return ldub_p(haddr); 1795 case MO_BEUW: 1796 return lduw_be_p(haddr); 1797 case MO_LEUW: 1798 return lduw_le_p(haddr); 1799 case MO_BEUL: 1800 return (uint32_t)ldl_be_p(haddr); 1801 case MO_LEUL: 1802 return (uint32_t)ldl_le_p(haddr); 1803 case MO_BEQ: 1804 return ldq_be_p(haddr); 1805 case MO_LEQ: 1806 return ldq_le_p(haddr); 1807 default: 1808 qemu_build_not_reached(); 1809 } 1810 } 1811 1812 static inline uint64_t QEMU_ALWAYS_INLINE 1813 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, 1814 uintptr_t retaddr, MemOp op, bool code_read, 1815 FullLoadHelper *full_load) 1816 { 1817 uintptr_t mmu_idx = get_mmuidx(oi); 1818 uintptr_t index = tlb_index(env, mmu_idx, addr); 1819 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1820 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1821 const size_t tlb_off = code_read ? 1822 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); 1823 const MMUAccessType access_type = 1824 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; 1825 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1826 void *haddr; 1827 uint64_t res; 1828 size_t size = memop_size(op); 1829 1830 /* Handle CPU specific unaligned behaviour */ 1831 if (addr & ((1 << a_bits) - 1)) { 1832 cpu_unaligned_access(env_cpu(env), addr, access_type, 1833 mmu_idx, retaddr); 1834 } 1835 1836 /* If the TLB entry is for a different page, reload and try again. */ 1837 if (!tlb_hit(tlb_addr, addr)) { 1838 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1839 addr & TARGET_PAGE_MASK)) { 1840 tlb_fill(env_cpu(env), addr, size, 1841 access_type, mmu_idx, retaddr); 1842 index = tlb_index(env, mmu_idx, addr); 1843 entry = tlb_entry(env, mmu_idx, addr); 1844 } 1845 tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1846 tlb_addr &= ~TLB_INVALID_MASK; 1847 } 1848 1849 /* Handle anything that isn't just a straight memory access. */ 1850 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1851 CPUIOTLBEntry *iotlbentry; 1852 bool need_swap; 1853 1854 /* For anything that is unaligned, recurse through full_load. */ 1855 if ((addr & (size - 1)) != 0) { 1856 goto do_unaligned_access; 1857 } 1858 1859 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1860 1861 /* Handle watchpoints. */ 1862 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1863 /* On watchpoint hit, this will longjmp out. */ 1864 cpu_check_watchpoint(env_cpu(env), addr, size, 1865 iotlbentry->attrs, BP_MEM_READ, retaddr); 1866 } 1867 1868 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1869 1870 /* Handle I/O access. */ 1871 if (likely(tlb_addr & TLB_MMIO)) { 1872 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, 1873 access_type, op ^ (need_swap * MO_BSWAP)); 1874 } 1875 1876 haddr = (void *)((uintptr_t)addr + entry->addend); 1877 1878 /* 1879 * Keep these two load_memop separate to ensure that the compiler 1880 * is able to fold the entire function to a single instruction. 1881 * There is a build-time assert inside to remind you of this. ;-) 1882 */ 1883 if (unlikely(need_swap)) { 1884 return load_memop(haddr, op ^ MO_BSWAP); 1885 } 1886 return load_memop(haddr, op); 1887 } 1888 1889 /* Handle slow unaligned access (it spans two pages or IO). */ 1890 if (size > 1 1891 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1892 >= TARGET_PAGE_SIZE)) { 1893 target_ulong addr1, addr2; 1894 uint64_t r1, r2; 1895 unsigned shift; 1896 do_unaligned_access: 1897 addr1 = addr & ~((target_ulong)size - 1); 1898 addr2 = addr1 + size; 1899 r1 = full_load(env, addr1, oi, retaddr); 1900 r2 = full_load(env, addr2, oi, retaddr); 1901 shift = (addr & (size - 1)) * 8; 1902 1903 if (memop_big_endian(op)) { 1904 /* Big-endian combine. */ 1905 res = (r1 << shift) | (r2 >> ((size * 8) - shift)); 1906 } else { 1907 /* Little-endian combine. */ 1908 res = (r1 >> shift) | (r2 << ((size * 8) - shift)); 1909 } 1910 return res & MAKE_64BIT_MASK(0, size * 8); 1911 } 1912 1913 haddr = (void *)((uintptr_t)addr + entry->addend); 1914 return load_memop(haddr, op); 1915 } 1916 1917 /* 1918 * For the benefit of TCG generated code, we want to avoid the 1919 * complication of ABI-specific return type promotion and always 1920 * return a value extended to the register size of the host. This is 1921 * tcg_target_long, except in the case of a 32-bit host and 64-bit 1922 * data, and for that we always have uint64_t. 1923 * 1924 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. 1925 */ 1926 1927 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, 1928 TCGMemOpIdx oi, uintptr_t retaddr) 1929 { 1930 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); 1931 } 1932 1933 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, 1934 TCGMemOpIdx oi, uintptr_t retaddr) 1935 { 1936 return full_ldub_mmu(env, addr, oi, retaddr); 1937 } 1938 1939 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1940 TCGMemOpIdx oi, uintptr_t retaddr) 1941 { 1942 return load_helper(env, addr, oi, retaddr, MO_LEUW, false, 1943 full_le_lduw_mmu); 1944 } 1945 1946 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1947 TCGMemOpIdx oi, uintptr_t retaddr) 1948 { 1949 return full_le_lduw_mmu(env, addr, oi, retaddr); 1950 } 1951 1952 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1953 TCGMemOpIdx oi, uintptr_t retaddr) 1954 { 1955 return load_helper(env, addr, oi, retaddr, MO_BEUW, false, 1956 full_be_lduw_mmu); 1957 } 1958 1959 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1960 TCGMemOpIdx oi, uintptr_t retaddr) 1961 { 1962 return full_be_lduw_mmu(env, addr, oi, retaddr); 1963 } 1964 1965 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1966 TCGMemOpIdx oi, uintptr_t retaddr) 1967 { 1968 return load_helper(env, addr, oi, retaddr, MO_LEUL, false, 1969 full_le_ldul_mmu); 1970 } 1971 1972 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1973 TCGMemOpIdx oi, uintptr_t retaddr) 1974 { 1975 return full_le_ldul_mmu(env, addr, oi, retaddr); 1976 } 1977 1978 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1979 TCGMemOpIdx oi, uintptr_t retaddr) 1980 { 1981 return load_helper(env, addr, oi, retaddr, MO_BEUL, false, 1982 full_be_ldul_mmu); 1983 } 1984 1985 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1986 TCGMemOpIdx oi, uintptr_t retaddr) 1987 { 1988 return full_be_ldul_mmu(env, addr, oi, retaddr); 1989 } 1990 1991 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, 1992 TCGMemOpIdx oi, uintptr_t retaddr) 1993 { 1994 return load_helper(env, addr, oi, retaddr, MO_LEQ, false, 1995 helper_le_ldq_mmu); 1996 } 1997 1998 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, 1999 TCGMemOpIdx oi, uintptr_t retaddr) 2000 { 2001 return load_helper(env, addr, oi, retaddr, MO_BEQ, false, 2002 helper_be_ldq_mmu); 2003 } 2004 2005 /* 2006 * Provide signed versions of the load routines as well. We can of course 2007 * avoid this for 64-bit data, or for 32-bit data on 32-bit host. 2008 */ 2009 2010 2011 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, 2012 TCGMemOpIdx oi, uintptr_t retaddr) 2013 { 2014 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); 2015 } 2016 2017 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, 2018 TCGMemOpIdx oi, uintptr_t retaddr) 2019 { 2020 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); 2021 } 2022 2023 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, 2024 TCGMemOpIdx oi, uintptr_t retaddr) 2025 { 2026 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); 2027 } 2028 2029 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, 2030 TCGMemOpIdx oi, uintptr_t retaddr) 2031 { 2032 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); 2033 } 2034 2035 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, 2036 TCGMemOpIdx oi, uintptr_t retaddr) 2037 { 2038 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); 2039 } 2040 2041 /* 2042 * Load helpers for cpu_ldst.h. 2043 */ 2044 2045 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr, 2046 int mmu_idx, uintptr_t retaddr, 2047 MemOp op, FullLoadHelper *full_load) 2048 { 2049 uint16_t meminfo; 2050 TCGMemOpIdx oi; 2051 uint64_t ret; 2052 2053 meminfo = trace_mem_get_info(op, mmu_idx, false); 2054 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2055 2056 op &= ~MO_SIGN; 2057 oi = make_memop_idx(op, mmu_idx); 2058 ret = full_load(env, addr, oi, retaddr); 2059 2060 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2061 2062 return ret; 2063 } 2064 2065 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2066 int mmu_idx, uintptr_t ra) 2067 { 2068 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu); 2069 } 2070 2071 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2072 int mmu_idx, uintptr_t ra) 2073 { 2074 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB, 2075 full_ldub_mmu); 2076 } 2077 2078 uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2079 int mmu_idx, uintptr_t ra) 2080 { 2081 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu); 2082 } 2083 2084 int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2085 int mmu_idx, uintptr_t ra) 2086 { 2087 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW, 2088 full_be_lduw_mmu); 2089 } 2090 2091 uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2092 int mmu_idx, uintptr_t ra) 2093 { 2094 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu); 2095 } 2096 2097 uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2098 int mmu_idx, uintptr_t ra) 2099 { 2100 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu); 2101 } 2102 2103 uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2104 int mmu_idx, uintptr_t ra) 2105 { 2106 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu); 2107 } 2108 2109 int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2110 int mmu_idx, uintptr_t ra) 2111 { 2112 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW, 2113 full_le_lduw_mmu); 2114 } 2115 2116 uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2117 int mmu_idx, uintptr_t ra) 2118 { 2119 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu); 2120 } 2121 2122 uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, 2123 int mmu_idx, uintptr_t ra) 2124 { 2125 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu); 2126 } 2127 2128 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, 2129 uintptr_t retaddr) 2130 { 2131 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2132 } 2133 2134 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 2135 { 2136 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2137 } 2138 2139 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr, 2140 uintptr_t retaddr) 2141 { 2142 return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2143 } 2144 2145 int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 2146 { 2147 return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2148 } 2149 2150 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr, 2151 uintptr_t retaddr) 2152 { 2153 return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2154 } 2155 2156 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr, 2157 uintptr_t retaddr) 2158 { 2159 return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2160 } 2161 2162 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr, 2163 uintptr_t retaddr) 2164 { 2165 return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2166 } 2167 2168 int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) 2169 { 2170 return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2171 } 2172 2173 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr, 2174 uintptr_t retaddr) 2175 { 2176 return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2177 } 2178 2179 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr, 2180 uintptr_t retaddr) 2181 { 2182 return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); 2183 } 2184 2185 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) 2186 { 2187 return cpu_ldub_data_ra(env, ptr, 0); 2188 } 2189 2190 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) 2191 { 2192 return cpu_ldsb_data_ra(env, ptr, 0); 2193 } 2194 2195 uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr) 2196 { 2197 return cpu_lduw_be_data_ra(env, ptr, 0); 2198 } 2199 2200 int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr) 2201 { 2202 return cpu_ldsw_be_data_ra(env, ptr, 0); 2203 } 2204 2205 uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr) 2206 { 2207 return cpu_ldl_be_data_ra(env, ptr, 0); 2208 } 2209 2210 uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr) 2211 { 2212 return cpu_ldq_be_data_ra(env, ptr, 0); 2213 } 2214 2215 uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr) 2216 { 2217 return cpu_lduw_le_data_ra(env, ptr, 0); 2218 } 2219 2220 int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr) 2221 { 2222 return cpu_ldsw_le_data_ra(env, ptr, 0); 2223 } 2224 2225 uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr) 2226 { 2227 return cpu_ldl_le_data_ra(env, ptr, 0); 2228 } 2229 2230 uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr) 2231 { 2232 return cpu_ldq_le_data_ra(env, ptr, 0); 2233 } 2234 2235 /* 2236 * Store Helpers 2237 */ 2238 2239 static inline void QEMU_ALWAYS_INLINE 2240 store_memop(void *haddr, uint64_t val, MemOp op) 2241 { 2242 switch (op) { 2243 case MO_UB: 2244 stb_p(haddr, val); 2245 break; 2246 case MO_BEUW: 2247 stw_be_p(haddr, val); 2248 break; 2249 case MO_LEUW: 2250 stw_le_p(haddr, val); 2251 break; 2252 case MO_BEUL: 2253 stl_be_p(haddr, val); 2254 break; 2255 case MO_LEUL: 2256 stl_le_p(haddr, val); 2257 break; 2258 case MO_BEQ: 2259 stq_be_p(haddr, val); 2260 break; 2261 case MO_LEQ: 2262 stq_le_p(haddr, val); 2263 break; 2264 default: 2265 qemu_build_not_reached(); 2266 } 2267 } 2268 2269 static void __attribute__((noinline)) 2270 store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val, 2271 uintptr_t retaddr, size_t size, uintptr_t mmu_idx, 2272 bool big_endian) 2273 { 2274 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 2275 uintptr_t index, index2; 2276 CPUTLBEntry *entry, *entry2; 2277 target_ulong page2, tlb_addr, tlb_addr2; 2278 TCGMemOpIdx oi; 2279 size_t size2; 2280 int i; 2281 2282 /* 2283 * Ensure the second page is in the TLB. Note that the first page 2284 * is already guaranteed to be filled, and that the second page 2285 * cannot evict the first. 2286 */ 2287 page2 = (addr + size) & TARGET_PAGE_MASK; 2288 size2 = (addr + size) & ~TARGET_PAGE_MASK; 2289 index2 = tlb_index(env, mmu_idx, page2); 2290 entry2 = tlb_entry(env, mmu_idx, page2); 2291 2292 tlb_addr2 = tlb_addr_write(entry2); 2293 if (!tlb_hit_page(tlb_addr2, page2)) { 2294 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { 2295 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, 2296 mmu_idx, retaddr); 2297 index2 = tlb_index(env, mmu_idx, page2); 2298 entry2 = tlb_entry(env, mmu_idx, page2); 2299 } 2300 tlb_addr2 = tlb_addr_write(entry2); 2301 } 2302 2303 index = tlb_index(env, mmu_idx, addr); 2304 entry = tlb_entry(env, mmu_idx, addr); 2305 tlb_addr = tlb_addr_write(entry); 2306 2307 /* 2308 * Handle watchpoints. Since this may trap, all checks 2309 * must happen before any store. 2310 */ 2311 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2312 cpu_check_watchpoint(env_cpu(env), addr, size - size2, 2313 env_tlb(env)->d[mmu_idx].iotlb[index].attrs, 2314 BP_MEM_WRITE, retaddr); 2315 } 2316 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { 2317 cpu_check_watchpoint(env_cpu(env), page2, size2, 2318 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, 2319 BP_MEM_WRITE, retaddr); 2320 } 2321 2322 /* 2323 * XXX: not efficient, but simple. 2324 * This loop must go in the forward direction to avoid issues 2325 * with self-modifying code in Windows 64-bit. 2326 */ 2327 oi = make_memop_idx(MO_UB, mmu_idx); 2328 if (big_endian) { 2329 for (i = 0; i < size; ++i) { 2330 /* Big-endian extract. */ 2331 uint8_t val8 = val >> (((size - 1) * 8) - (i * 8)); 2332 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2333 } 2334 } else { 2335 for (i = 0; i < size; ++i) { 2336 /* Little-endian extract. */ 2337 uint8_t val8 = val >> (i * 8); 2338 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 2339 } 2340 } 2341 } 2342 2343 static inline void QEMU_ALWAYS_INLINE 2344 store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2345 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) 2346 { 2347 uintptr_t mmu_idx = get_mmuidx(oi); 2348 uintptr_t index = tlb_index(env, mmu_idx, addr); 2349 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 2350 target_ulong tlb_addr = tlb_addr_write(entry); 2351 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 2352 unsigned a_bits = get_alignment_bits(get_memop(oi)); 2353 void *haddr; 2354 size_t size = memop_size(op); 2355 2356 /* Handle CPU specific unaligned behaviour */ 2357 if (addr & ((1 << a_bits) - 1)) { 2358 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 2359 mmu_idx, retaddr); 2360 } 2361 2362 /* If the TLB entry is for a different page, reload and try again. */ 2363 if (!tlb_hit(tlb_addr, addr)) { 2364 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 2365 addr & TARGET_PAGE_MASK)) { 2366 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, 2367 mmu_idx, retaddr); 2368 index = tlb_index(env, mmu_idx, addr); 2369 entry = tlb_entry(env, mmu_idx, addr); 2370 } 2371 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; 2372 } 2373 2374 /* Handle anything that isn't just a straight memory access. */ 2375 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 2376 CPUIOTLBEntry *iotlbentry; 2377 bool need_swap; 2378 2379 /* For anything that is unaligned, recurse through byte stores. */ 2380 if ((addr & (size - 1)) != 0) { 2381 goto do_unaligned_access; 2382 } 2383 2384 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 2385 2386 /* Handle watchpoints. */ 2387 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 2388 /* On watchpoint hit, this will longjmp out. */ 2389 cpu_check_watchpoint(env_cpu(env), addr, size, 2390 iotlbentry->attrs, BP_MEM_WRITE, retaddr); 2391 } 2392 2393 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 2394 2395 /* Handle I/O access. */ 2396 if (tlb_addr & TLB_MMIO) { 2397 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, 2398 op ^ (need_swap * MO_BSWAP)); 2399 return; 2400 } 2401 2402 /* Ignore writes to ROM. */ 2403 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { 2404 return; 2405 } 2406 2407 /* Handle clean RAM pages. */ 2408 if (tlb_addr & TLB_NOTDIRTY) { 2409 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 2410 } 2411 2412 haddr = (void *)((uintptr_t)addr + entry->addend); 2413 2414 /* 2415 * Keep these two store_memop separate to ensure that the compiler 2416 * is able to fold the entire function to a single instruction. 2417 * There is a build-time assert inside to remind you of this. ;-) 2418 */ 2419 if (unlikely(need_swap)) { 2420 store_memop(haddr, val, op ^ MO_BSWAP); 2421 } else { 2422 store_memop(haddr, val, op); 2423 } 2424 return; 2425 } 2426 2427 /* Handle slow unaligned access (it spans two pages or IO). */ 2428 if (size > 1 2429 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 2430 >= TARGET_PAGE_SIZE)) { 2431 do_unaligned_access: 2432 store_helper_unaligned(env, addr, val, retaddr, size, 2433 mmu_idx, memop_big_endian(op)); 2434 return; 2435 } 2436 2437 haddr = (void *)((uintptr_t)addr + entry->addend); 2438 store_memop(haddr, val, op); 2439 } 2440 2441 void __attribute__((noinline)) 2442 helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, 2443 TCGMemOpIdx oi, uintptr_t retaddr) 2444 { 2445 store_helper(env, addr, val, oi, retaddr, MO_UB); 2446 } 2447 2448 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2449 TCGMemOpIdx oi, uintptr_t retaddr) 2450 { 2451 store_helper(env, addr, val, oi, retaddr, MO_LEUW); 2452 } 2453 2454 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 2455 TCGMemOpIdx oi, uintptr_t retaddr) 2456 { 2457 store_helper(env, addr, val, oi, retaddr, MO_BEUW); 2458 } 2459 2460 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2461 TCGMemOpIdx oi, uintptr_t retaddr) 2462 { 2463 store_helper(env, addr, val, oi, retaddr, MO_LEUL); 2464 } 2465 2466 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 2467 TCGMemOpIdx oi, uintptr_t retaddr) 2468 { 2469 store_helper(env, addr, val, oi, retaddr, MO_BEUL); 2470 } 2471 2472 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2473 TCGMemOpIdx oi, uintptr_t retaddr) 2474 { 2475 store_helper(env, addr, val, oi, retaddr, MO_LEQ); 2476 } 2477 2478 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 2479 TCGMemOpIdx oi, uintptr_t retaddr) 2480 { 2481 store_helper(env, addr, val, oi, retaddr, MO_BEQ); 2482 } 2483 2484 /* 2485 * Store Helpers for cpu_ldst.h 2486 */ 2487 2488 static inline void QEMU_ALWAYS_INLINE 2489 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 2490 int mmu_idx, uintptr_t retaddr, MemOp op) 2491 { 2492 TCGMemOpIdx oi; 2493 uint16_t meminfo; 2494 2495 meminfo = trace_mem_get_info(op, mmu_idx, true); 2496 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo); 2497 2498 oi = make_memop_idx(op, mmu_idx); 2499 store_helper(env, addr, val, oi, retaddr, op); 2500 2501 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo); 2502 } 2503 2504 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2505 int mmu_idx, uintptr_t retaddr) 2506 { 2507 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); 2508 } 2509 2510 void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2511 int mmu_idx, uintptr_t retaddr) 2512 { 2513 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW); 2514 } 2515 2516 void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2517 int mmu_idx, uintptr_t retaddr) 2518 { 2519 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL); 2520 } 2521 2522 void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2523 int mmu_idx, uintptr_t retaddr) 2524 { 2525 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ); 2526 } 2527 2528 void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2529 int mmu_idx, uintptr_t retaddr) 2530 { 2531 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW); 2532 } 2533 2534 void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, 2535 int mmu_idx, uintptr_t retaddr) 2536 { 2537 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL); 2538 } 2539 2540 void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, 2541 int mmu_idx, uintptr_t retaddr) 2542 { 2543 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ); 2544 } 2545 2546 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, 2547 uint32_t val, uintptr_t retaddr) 2548 { 2549 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2550 } 2551 2552 void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr, 2553 uint32_t val, uintptr_t retaddr) 2554 { 2555 cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2556 } 2557 2558 void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr, 2559 uint32_t val, uintptr_t retaddr) 2560 { 2561 cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2562 } 2563 2564 void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr, 2565 uint64_t val, uintptr_t retaddr) 2566 { 2567 cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2568 } 2569 2570 void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr, 2571 uint32_t val, uintptr_t retaddr) 2572 { 2573 cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2574 } 2575 2576 void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr, 2577 uint32_t val, uintptr_t retaddr) 2578 { 2579 cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2580 } 2581 2582 void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr, 2583 uint64_t val, uintptr_t retaddr) 2584 { 2585 cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); 2586 } 2587 2588 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2589 { 2590 cpu_stb_data_ra(env, ptr, val, 0); 2591 } 2592 2593 void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2594 { 2595 cpu_stw_be_data_ra(env, ptr, val, 0); 2596 } 2597 2598 void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2599 { 2600 cpu_stl_be_data_ra(env, ptr, val, 0); 2601 } 2602 2603 void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2604 { 2605 cpu_stq_be_data_ra(env, ptr, val, 0); 2606 } 2607 2608 void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2609 { 2610 cpu_stw_le_data_ra(env, ptr, val, 0); 2611 } 2612 2613 void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) 2614 { 2615 cpu_stl_le_data_ra(env, ptr, val, 0); 2616 } 2617 2618 void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val) 2619 { 2620 cpu_stq_le_data_ra(env, ptr, val, 0); 2621 } 2622 2623 /* First set of helpers allows passing in of OI and RETADDR. This makes 2624 them callable from other helpers. */ 2625 2626 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 2627 #define ATOMIC_NAME(X) \ 2628 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 2629 #define ATOMIC_MMU_DECLS 2630 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) 2631 #define ATOMIC_MMU_CLEANUP 2632 #define ATOMIC_MMU_IDX get_mmuidx(oi) 2633 2634 #include "atomic_common.c.inc" 2635 2636 #define DATA_SIZE 1 2637 #include "atomic_template.h" 2638 2639 #define DATA_SIZE 2 2640 #include "atomic_template.h" 2641 2642 #define DATA_SIZE 4 2643 #include "atomic_template.h" 2644 2645 #ifdef CONFIG_ATOMIC64 2646 #define DATA_SIZE 8 2647 #include "atomic_template.h" 2648 #endif 2649 2650 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 2651 #define DATA_SIZE 16 2652 #include "atomic_template.h" 2653 #endif 2654 2655 /* Second set of helpers are directly callable from TCG as helpers. */ 2656 2657 #undef EXTRA_ARGS 2658 #undef ATOMIC_NAME 2659 #undef ATOMIC_MMU_LOOKUP 2660 #define EXTRA_ARGS , TCGMemOpIdx oi 2661 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 2662 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) 2663 2664 #define DATA_SIZE 1 2665 #include "atomic_template.h" 2666 2667 #define DATA_SIZE 2 2668 #include "atomic_template.h" 2669 2670 #define DATA_SIZE 4 2671 #include "atomic_template.h" 2672 2673 #ifdef CONFIG_ATOMIC64 2674 #define DATA_SIZE 8 2675 #include "atomic_template.h" 2676 #endif 2677 #undef ATOMIC_MMU_IDX 2678 2679 /* Code access functions. */ 2680 2681 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr, 2682 TCGMemOpIdx oi, uintptr_t retaddr) 2683 { 2684 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code); 2685 } 2686 2687 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr) 2688 { 2689 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true)); 2690 return full_ldub_code(env, addr, oi, 0); 2691 } 2692 2693 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr, 2694 TCGMemOpIdx oi, uintptr_t retaddr) 2695 { 2696 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code); 2697 } 2698 2699 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr) 2700 { 2701 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true)); 2702 return full_lduw_code(env, addr, oi, 0); 2703 } 2704 2705 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr, 2706 TCGMemOpIdx oi, uintptr_t retaddr) 2707 { 2708 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code); 2709 } 2710 2711 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr) 2712 { 2713 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true)); 2714 return full_ldl_code(env, addr, oi, 0); 2715 } 2716 2717 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr, 2718 TCGMemOpIdx oi, uintptr_t retaddr) 2719 { 2720 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code); 2721 } 2722 2723 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) 2724 { 2725 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true)); 2726 return full_ldq_code(env, addr, oi, 0); 2727 } 2728