1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/address-spaces.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/cputlb.h" 28 #include "exec/memory-internal.h" 29 #include "exec/ram_addr.h" 30 #include "tcg/tcg.h" 31 #include "qemu/error-report.h" 32 #include "exec/log.h" 33 #include "exec/helper-proto.h" 34 #include "qemu/atomic.h" 35 #include "qemu/atomic128.h" 36 #include "translate-all.h" 37 #ifdef CONFIG_PLUGIN 38 #include "qemu/plugin-memory.h" 39 #endif 40 41 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 42 /* #define DEBUG_TLB */ 43 /* #define DEBUG_TLB_LOG */ 44 45 #ifdef DEBUG_TLB 46 # define DEBUG_TLB_GATE 1 47 # ifdef DEBUG_TLB_LOG 48 # define DEBUG_TLB_LOG_GATE 1 49 # else 50 # define DEBUG_TLB_LOG_GATE 0 51 # endif 52 #else 53 # define DEBUG_TLB_GATE 0 54 # define DEBUG_TLB_LOG_GATE 0 55 #endif 56 57 #define tlb_debug(fmt, ...) do { \ 58 if (DEBUG_TLB_LOG_GATE) { \ 59 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 60 ## __VA_ARGS__); \ 61 } else if (DEBUG_TLB_GATE) { \ 62 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 63 } \ 64 } while (0) 65 66 #define assert_cpu_is_self(cpu) do { \ 67 if (DEBUG_TLB_GATE) { \ 68 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 69 } \ 70 } while (0) 71 72 /* run_on_cpu_data.target_ptr should always be big enough for a 73 * target_ulong even on 32 bit builds */ 74 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 75 76 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 77 */ 78 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 79 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 80 81 static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx) 82 { 83 return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS); 84 } 85 86 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, 87 size_t max_entries) 88 { 89 desc->window_begin_ns = ns; 90 desc->window_max_entries = max_entries; 91 } 92 93 static void tlb_dyn_init(CPUArchState *env) 94 { 95 int i; 96 97 for (i = 0; i < NB_MMU_MODES; i++) { 98 CPUTLBDesc *desc = &env_tlb(env)->d[i]; 99 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 100 101 tlb_window_reset(desc, get_clock_realtime(), 0); 102 desc->n_used_entries = 0; 103 env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 104 env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries); 105 env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries); 106 } 107 } 108 109 /** 110 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 111 * @env: CPU that owns the TLB 112 * @mmu_idx: MMU index of the TLB 113 * 114 * Called with tlb_lock_held. 115 * 116 * We have two main constraints when resizing a TLB: (1) we only resize it 117 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 118 * the array or unnecessarily flushing it), which means we do not control how 119 * frequently the resizing can occur; (2) we don't have access to the guest's 120 * future scheduling decisions, and therefore have to decide the magnitude of 121 * the resize based on past observations. 122 * 123 * In general, a memory-hungry process can benefit greatly from an appropriately 124 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 125 * we just have to make the TLB as large as possible; while an oversized TLB 126 * results in minimal TLB miss rates, it also takes longer to be flushed 127 * (flushes can be _very_ frequent), and the reduced locality can also hurt 128 * performance. 129 * 130 * To achieve near-optimal performance for all kinds of workloads, we: 131 * 132 * 1. Aggressively increase the size of the TLB when the use rate of the 133 * TLB being flushed is high, since it is likely that in the near future this 134 * memory-hungry process will execute again, and its memory hungriness will 135 * probably be similar. 136 * 137 * 2. Slowly reduce the size of the TLB as the use rate declines over a 138 * reasonably large time window. The rationale is that if in such a time window 139 * we have not observed a high TLB use rate, it is likely that we won't observe 140 * it in the near future. In that case, once a time window expires we downsize 141 * the TLB to match the maximum use rate observed in the window. 142 * 143 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 144 * since in that range performance is likely near-optimal. Recall that the TLB 145 * is direct mapped, so we want the use rate to be low (or at least not too 146 * high), since otherwise we are likely to have a significant amount of 147 * conflict misses. 148 */ 149 static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) 150 { 151 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; 152 size_t old_size = tlb_n_entries(env, mmu_idx); 153 size_t rate; 154 size_t new_size = old_size; 155 int64_t now = get_clock_realtime(); 156 int64_t window_len_ms = 100; 157 int64_t window_len_ns = window_len_ms * 1000 * 1000; 158 bool window_expired = now > desc->window_begin_ns + window_len_ns; 159 160 if (desc->n_used_entries > desc->window_max_entries) { 161 desc->window_max_entries = desc->n_used_entries; 162 } 163 rate = desc->window_max_entries * 100 / old_size; 164 165 if (rate > 70) { 166 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 167 } else if (rate < 30 && window_expired) { 168 size_t ceil = pow2ceil(desc->window_max_entries); 169 size_t expected_rate = desc->window_max_entries * 100 / ceil; 170 171 /* 172 * Avoid undersizing when the max number of entries seen is just below 173 * a pow2. For instance, if max_entries == 1025, the expected use rate 174 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 175 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 176 * later. Thus, make sure that the expected use rate remains below 70%. 177 * (and since we double the size, that means the lowest rate we'd 178 * expect to get is 35%, which is still in the 30-70% range where 179 * we consider that the size is appropriate.) 180 */ 181 if (expected_rate > 70) { 182 ceil *= 2; 183 } 184 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 185 } 186 187 if (new_size == old_size) { 188 if (window_expired) { 189 tlb_window_reset(desc, now, desc->n_used_entries); 190 } 191 return; 192 } 193 194 g_free(env_tlb(env)->f[mmu_idx].table); 195 g_free(env_tlb(env)->d[mmu_idx].iotlb); 196 197 tlb_window_reset(desc, now, 0); 198 /* desc->n_used_entries is cleared by the caller */ 199 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 200 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); 201 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); 202 /* 203 * If the allocations fail, try smaller sizes. We just freed some 204 * memory, so going back to half of new_size has a good chance of working. 205 * Increased memory pressure elsewhere in the system might cause the 206 * allocations to fail though, so we progressively reduce the allocation 207 * size, aborting if we cannot even allocate the smallest TLB we support. 208 */ 209 while (env_tlb(env)->f[mmu_idx].table == NULL || 210 env_tlb(env)->d[mmu_idx].iotlb == NULL) { 211 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 212 error_report("%s: %s", __func__, strerror(errno)); 213 abort(); 214 } 215 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 216 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 217 218 g_free(env_tlb(env)->f[mmu_idx].table); 219 g_free(env_tlb(env)->d[mmu_idx].iotlb); 220 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); 221 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); 222 } 223 } 224 225 static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx) 226 { 227 tlb_mmu_resize_locked(env, mmu_idx); 228 memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx)); 229 env_tlb(env)->d[mmu_idx].n_used_entries = 0; 230 } 231 232 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 233 { 234 env_tlb(env)->d[mmu_idx].n_used_entries++; 235 } 236 237 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 238 { 239 env_tlb(env)->d[mmu_idx].n_used_entries--; 240 } 241 242 void tlb_init(CPUState *cpu) 243 { 244 CPUArchState *env = cpu->env_ptr; 245 246 qemu_spin_init(&env_tlb(env)->c.lock); 247 248 /* Ensure that cpu_reset performs a full flush. */ 249 env_tlb(env)->c.dirty = ALL_MMUIDX_BITS; 250 251 tlb_dyn_init(env); 252 } 253 254 /* flush_all_helper: run fn across all cpus 255 * 256 * If the wait flag is set then the src cpu's helper will be queued as 257 * "safe" work and the loop exited creating a synchronisation point 258 * where all queued work will be finished before execution starts 259 * again. 260 */ 261 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 262 run_on_cpu_data d) 263 { 264 CPUState *cpu; 265 266 CPU_FOREACH(cpu) { 267 if (cpu != src) { 268 async_run_on_cpu(cpu, fn, d); 269 } 270 } 271 } 272 273 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 274 { 275 CPUState *cpu; 276 size_t full = 0, part = 0, elide = 0; 277 278 CPU_FOREACH(cpu) { 279 CPUArchState *env = cpu->env_ptr; 280 281 full += atomic_read(&env_tlb(env)->c.full_flush_count); 282 part += atomic_read(&env_tlb(env)->c.part_flush_count); 283 elide += atomic_read(&env_tlb(env)->c.elide_flush_count); 284 } 285 *pfull = full; 286 *ppart = part; 287 *pelide = elide; 288 } 289 290 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) 291 { 292 tlb_table_flush_by_mmuidx(env, mmu_idx); 293 env_tlb(env)->d[mmu_idx].large_page_addr = -1; 294 env_tlb(env)->d[mmu_idx].large_page_mask = -1; 295 env_tlb(env)->d[mmu_idx].vindex = 0; 296 memset(env_tlb(env)->d[mmu_idx].vtable, -1, 297 sizeof(env_tlb(env)->d[0].vtable)); 298 } 299 300 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 301 { 302 CPUArchState *env = cpu->env_ptr; 303 uint16_t asked = data.host_int; 304 uint16_t all_dirty, work, to_clean; 305 306 assert_cpu_is_self(cpu); 307 308 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 309 310 qemu_spin_lock(&env_tlb(env)->c.lock); 311 312 all_dirty = env_tlb(env)->c.dirty; 313 to_clean = asked & all_dirty; 314 all_dirty &= ~to_clean; 315 env_tlb(env)->c.dirty = all_dirty; 316 317 for (work = to_clean; work != 0; work &= work - 1) { 318 int mmu_idx = ctz32(work); 319 tlb_flush_one_mmuidx_locked(env, mmu_idx); 320 } 321 322 qemu_spin_unlock(&env_tlb(env)->c.lock); 323 324 cpu_tb_jmp_cache_clear(cpu); 325 326 if (to_clean == ALL_MMUIDX_BITS) { 327 atomic_set(&env_tlb(env)->c.full_flush_count, 328 env_tlb(env)->c.full_flush_count + 1); 329 } else { 330 atomic_set(&env_tlb(env)->c.part_flush_count, 331 env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); 332 if (to_clean != asked) { 333 atomic_set(&env_tlb(env)->c.elide_flush_count, 334 env_tlb(env)->c.elide_flush_count + 335 ctpop16(asked & ~to_clean)); 336 } 337 } 338 } 339 340 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 341 { 342 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 343 344 if (cpu->created && !qemu_cpu_is_self(cpu)) { 345 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 346 RUN_ON_CPU_HOST_INT(idxmap)); 347 } else { 348 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 349 } 350 } 351 352 void tlb_flush(CPUState *cpu) 353 { 354 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 355 } 356 357 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 358 { 359 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 360 361 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 362 363 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 364 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 365 } 366 367 void tlb_flush_all_cpus(CPUState *src_cpu) 368 { 369 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 370 } 371 372 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 373 { 374 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 375 376 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 377 378 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 379 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 380 } 381 382 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 383 { 384 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 385 } 386 387 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 388 target_ulong page) 389 { 390 return tlb_hit_page(tlb_entry->addr_read, page) || 391 tlb_hit_page(tlb_addr_write(tlb_entry), page) || 392 tlb_hit_page(tlb_entry->addr_code, page); 393 } 394 395 /** 396 * tlb_entry_is_empty - return true if the entry is not in use 397 * @te: pointer to CPUTLBEntry 398 */ 399 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 400 { 401 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 402 } 403 404 /* Called with tlb_c.lock held */ 405 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 406 target_ulong page) 407 { 408 if (tlb_hit_page_anyprot(tlb_entry, page)) { 409 memset(tlb_entry, -1, sizeof(*tlb_entry)); 410 return true; 411 } 412 return false; 413 } 414 415 /* Called with tlb_c.lock held */ 416 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 417 target_ulong page) 418 { 419 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; 420 int k; 421 422 assert_cpu_is_self(env_cpu(env)); 423 for (k = 0; k < CPU_VTLB_SIZE; k++) { 424 if (tlb_flush_entry_locked(&d->vtable[k], page)) { 425 tlb_n_used_entries_dec(env, mmu_idx); 426 } 427 } 428 } 429 430 static void tlb_flush_page_locked(CPUArchState *env, int midx, 431 target_ulong page) 432 { 433 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; 434 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; 435 436 /* Check if we need to flush due to large pages. */ 437 if ((page & lp_mask) == lp_addr) { 438 tlb_debug("forcing full flush midx %d (" 439 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 440 midx, lp_addr, lp_mask); 441 tlb_flush_one_mmuidx_locked(env, midx); 442 } else { 443 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 444 tlb_n_used_entries_dec(env, midx); 445 } 446 tlb_flush_vtlb_page_locked(env, midx, page); 447 } 448 } 449 450 /* As we are going to hijack the bottom bits of the page address for a 451 * mmuidx bit mask we need to fail to build if we can't do that 452 */ 453 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN); 454 455 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, 456 run_on_cpu_data data) 457 { 458 CPUArchState *env = cpu->env_ptr; 459 target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr; 460 target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK; 461 unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS; 462 int mmu_idx; 463 464 assert_cpu_is_self(cpu); 465 466 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n", 467 addr, mmu_idx_bitmap); 468 469 qemu_spin_lock(&env_tlb(env)->c.lock); 470 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 471 if (test_bit(mmu_idx, &mmu_idx_bitmap)) { 472 tlb_flush_page_locked(env, mmu_idx, addr); 473 } 474 } 475 qemu_spin_unlock(&env_tlb(env)->c.lock); 476 477 tb_flush_jmp_cache(cpu, addr); 478 } 479 480 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 481 { 482 target_ulong addr_and_mmu_idx; 483 484 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 485 486 /* This should already be page aligned */ 487 addr_and_mmu_idx = addr & TARGET_PAGE_MASK; 488 addr_and_mmu_idx |= idxmap; 489 490 if (!qemu_cpu_is_self(cpu)) { 491 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work, 492 RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 493 } else { 494 tlb_flush_page_by_mmuidx_async_work( 495 cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 496 } 497 } 498 499 void tlb_flush_page(CPUState *cpu, target_ulong addr) 500 { 501 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 502 } 503 504 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 505 uint16_t idxmap) 506 { 507 const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work; 508 target_ulong addr_and_mmu_idx; 509 510 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 511 512 /* This should already be page aligned */ 513 addr_and_mmu_idx = addr & TARGET_PAGE_MASK; 514 addr_and_mmu_idx |= idxmap; 515 516 flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 517 fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 518 } 519 520 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 521 { 522 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 523 } 524 525 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 526 target_ulong addr, 527 uint16_t idxmap) 528 { 529 const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work; 530 target_ulong addr_and_mmu_idx; 531 532 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 533 534 /* This should already be page aligned */ 535 addr_and_mmu_idx = addr & TARGET_PAGE_MASK; 536 addr_and_mmu_idx |= idxmap; 537 538 flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 539 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 540 } 541 542 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 543 { 544 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 545 } 546 547 /* update the TLBs so that writes to code in the virtual page 'addr' 548 can be detected */ 549 void tlb_protect_code(ram_addr_t ram_addr) 550 { 551 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 552 DIRTY_MEMORY_CODE); 553 } 554 555 /* update the TLB so that writes in physical page 'phys_addr' are no longer 556 tested for self modifying code */ 557 void tlb_unprotect_code(ram_addr_t ram_addr) 558 { 559 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 560 } 561 562 563 /* 564 * Dirty write flag handling 565 * 566 * When the TCG code writes to a location it looks up the address in 567 * the TLB and uses that data to compute the final address. If any of 568 * the lower bits of the address are set then the slow path is forced. 569 * There are a number of reasons to do this but for normal RAM the 570 * most usual is detecting writes to code regions which may invalidate 571 * generated code. 572 * 573 * Other vCPUs might be reading their TLBs during guest execution, so we update 574 * te->addr_write with atomic_set. We don't need to worry about this for 575 * oversized guests as MTTCG is disabled for them. 576 * 577 * Called with tlb_c.lock held. 578 */ 579 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 580 uintptr_t start, uintptr_t length) 581 { 582 uintptr_t addr = tlb_entry->addr_write; 583 584 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | 585 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { 586 addr &= TARGET_PAGE_MASK; 587 addr += tlb_entry->addend; 588 if ((addr - start) < length) { 589 #if TCG_OVERSIZED_GUEST 590 tlb_entry->addr_write |= TLB_NOTDIRTY; 591 #else 592 atomic_set(&tlb_entry->addr_write, 593 tlb_entry->addr_write | TLB_NOTDIRTY); 594 #endif 595 } 596 } 597 } 598 599 /* 600 * Called with tlb_c.lock held. 601 * Called only from the vCPU context, i.e. the TLB's owner thread. 602 */ 603 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 604 { 605 *d = *s; 606 } 607 608 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 609 * the target vCPU). 610 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 611 * thing actually updated is the target TLB entry ->addr_write flags. 612 */ 613 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 614 { 615 CPUArchState *env; 616 617 int mmu_idx; 618 619 env = cpu->env_ptr; 620 qemu_spin_lock(&env_tlb(env)->c.lock); 621 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 622 unsigned int i; 623 unsigned int n = tlb_n_entries(env, mmu_idx); 624 625 for (i = 0; i < n; i++) { 626 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], 627 start1, length); 628 } 629 630 for (i = 0; i < CPU_VTLB_SIZE; i++) { 631 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], 632 start1, length); 633 } 634 } 635 qemu_spin_unlock(&env_tlb(env)->c.lock); 636 } 637 638 /* Called with tlb_c.lock held */ 639 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 640 target_ulong vaddr) 641 { 642 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 643 tlb_entry->addr_write = vaddr; 644 } 645 } 646 647 /* update the TLB corresponding to virtual page vaddr 648 so that it is no longer dirty */ 649 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 650 { 651 CPUArchState *env = cpu->env_ptr; 652 int mmu_idx; 653 654 assert_cpu_is_self(cpu); 655 656 vaddr &= TARGET_PAGE_MASK; 657 qemu_spin_lock(&env_tlb(env)->c.lock); 658 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 659 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 660 } 661 662 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 663 int k; 664 for (k = 0; k < CPU_VTLB_SIZE; k++) { 665 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); 666 } 667 } 668 qemu_spin_unlock(&env_tlb(env)->c.lock); 669 } 670 671 /* Our TLB does not support large pages, so remember the area covered by 672 large pages and trigger a full TLB flush if these are invalidated. */ 673 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 674 target_ulong vaddr, target_ulong size) 675 { 676 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; 677 target_ulong lp_mask = ~(size - 1); 678 679 if (lp_addr == (target_ulong)-1) { 680 /* No previous large page. */ 681 lp_addr = vaddr; 682 } else { 683 /* Extend the existing region to include the new page. 684 This is a compromise between unnecessary flushes and 685 the cost of maintaining a full variable size TLB. */ 686 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; 687 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 688 lp_mask <<= 1; 689 } 690 } 691 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; 692 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; 693 } 694 695 /* Add a new TLB entry. At most one entry for a given virtual address 696 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 697 * supplied size is only used by tlb_flush_page. 698 * 699 * Called from TCG-generated code, which is under an RCU read-side 700 * critical section. 701 */ 702 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 703 hwaddr paddr, MemTxAttrs attrs, int prot, 704 int mmu_idx, target_ulong size) 705 { 706 CPUArchState *env = cpu->env_ptr; 707 CPUTLB *tlb = env_tlb(env); 708 CPUTLBDesc *desc = &tlb->d[mmu_idx]; 709 MemoryRegionSection *section; 710 unsigned int index; 711 target_ulong address; 712 target_ulong write_address; 713 uintptr_t addend; 714 CPUTLBEntry *te, tn; 715 hwaddr iotlb, xlat, sz, paddr_page; 716 target_ulong vaddr_page; 717 int asidx = cpu_asidx_from_attrs(cpu, attrs); 718 int wp_flags; 719 bool is_ram, is_romd; 720 721 assert_cpu_is_self(cpu); 722 723 if (size <= TARGET_PAGE_SIZE) { 724 sz = TARGET_PAGE_SIZE; 725 } else { 726 tlb_add_large_page(env, mmu_idx, vaddr, size); 727 sz = size; 728 } 729 vaddr_page = vaddr & TARGET_PAGE_MASK; 730 paddr_page = paddr & TARGET_PAGE_MASK; 731 732 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 733 &xlat, &sz, attrs, &prot); 734 assert(sz >= TARGET_PAGE_SIZE); 735 736 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 737 " prot=%x idx=%d\n", 738 vaddr, paddr, prot, mmu_idx); 739 740 address = vaddr_page; 741 if (size < TARGET_PAGE_SIZE) { 742 /* Repeat the MMU check and TLB fill on every access. */ 743 address |= TLB_INVALID_MASK; 744 } 745 if (attrs.byte_swap) { 746 address |= TLB_BSWAP; 747 } 748 749 is_ram = memory_region_is_ram(section->mr); 750 is_romd = memory_region_is_romd(section->mr); 751 752 if (is_ram || is_romd) { 753 /* RAM and ROMD both have associated host memory. */ 754 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 755 } else { 756 /* I/O does not; force the host address to NULL. */ 757 addend = 0; 758 } 759 760 write_address = address; 761 if (is_ram) { 762 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 763 /* 764 * Computing is_clean is expensive; avoid all that unless 765 * the page is actually writable. 766 */ 767 if (prot & PAGE_WRITE) { 768 if (section->readonly) { 769 write_address |= TLB_DISCARD_WRITE; 770 } else if (cpu_physical_memory_is_clean(iotlb)) { 771 write_address |= TLB_NOTDIRTY; 772 } 773 } 774 } else { 775 /* I/O or ROMD */ 776 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; 777 /* 778 * Writes to romd devices must go through MMIO to enable write. 779 * Reads to romd devices go through the ram_ptr found above, 780 * but of course reads to I/O must go through MMIO. 781 */ 782 write_address |= TLB_MMIO; 783 if (!is_romd) { 784 address = write_address; 785 } 786 } 787 788 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, 789 TARGET_PAGE_SIZE); 790 791 index = tlb_index(env, mmu_idx, vaddr_page); 792 te = tlb_entry(env, mmu_idx, vaddr_page); 793 794 /* 795 * Hold the TLB lock for the rest of the function. We could acquire/release 796 * the lock several times in the function, but it is faster to amortize the 797 * acquisition cost by acquiring it just once. Note that this leads to 798 * a longer critical section, but this is not a concern since the TLB lock 799 * is unlikely to be contended. 800 */ 801 qemu_spin_lock(&tlb->c.lock); 802 803 /* Note that the tlb is no longer clean. */ 804 tlb->c.dirty |= 1 << mmu_idx; 805 806 /* Make sure there's no cached translation for the new page. */ 807 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 808 809 /* 810 * Only evict the old entry to the victim tlb if it's for a 811 * different page; otherwise just overwrite the stale data. 812 */ 813 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 814 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; 815 CPUTLBEntry *tv = &desc->vtable[vidx]; 816 817 /* Evict the old entry into the victim tlb. */ 818 copy_tlb_helper_locked(tv, te); 819 desc->viotlb[vidx] = desc->iotlb[index]; 820 tlb_n_used_entries_dec(env, mmu_idx); 821 } 822 823 /* refill the tlb */ 824 /* 825 * At this point iotlb contains a physical section number in the lower 826 * TARGET_PAGE_BITS, and either 827 * + the ram_addr_t of the page base of the target RAM (RAM) 828 * + the offset within section->mr of the page base (I/O, ROMD) 829 * We subtract the vaddr_page (which is page aligned and thus won't 830 * disturb the low bits) to give an offset which can be added to the 831 * (non-page-aligned) vaddr of the eventual memory access to get 832 * the MemoryRegion offset for the access. Note that the vaddr we 833 * subtract here is that of the page base, and not the same as the 834 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 835 */ 836 desc->iotlb[index].addr = iotlb - vaddr_page; 837 desc->iotlb[index].attrs = attrs; 838 839 /* Now calculate the new entry */ 840 tn.addend = addend - vaddr_page; 841 if (prot & PAGE_READ) { 842 tn.addr_read = address; 843 if (wp_flags & BP_MEM_READ) { 844 tn.addr_read |= TLB_WATCHPOINT; 845 } 846 } else { 847 tn.addr_read = -1; 848 } 849 850 if (prot & PAGE_EXEC) { 851 tn.addr_code = address; 852 } else { 853 tn.addr_code = -1; 854 } 855 856 tn.addr_write = -1; 857 if (prot & PAGE_WRITE) { 858 tn.addr_write = write_address; 859 if (prot & PAGE_WRITE_INV) { 860 tn.addr_write |= TLB_INVALID_MASK; 861 } 862 if (wp_flags & BP_MEM_WRITE) { 863 tn.addr_write |= TLB_WATCHPOINT; 864 } 865 } 866 867 copy_tlb_helper_locked(te, &tn); 868 tlb_n_used_entries_inc(env, mmu_idx); 869 qemu_spin_unlock(&tlb->c.lock); 870 } 871 872 /* Add a new TLB entry, but without specifying the memory 873 * transaction attributes to be used. 874 */ 875 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 876 hwaddr paddr, int prot, 877 int mmu_idx, target_ulong size) 878 { 879 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 880 prot, mmu_idx, size); 881 } 882 883 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 884 { 885 ram_addr_t ram_addr; 886 887 ram_addr = qemu_ram_addr_from_host(ptr); 888 if (ram_addr == RAM_ADDR_INVALID) { 889 error_report("Bad ram pointer %p", ptr); 890 abort(); 891 } 892 return ram_addr; 893 } 894 895 /* 896 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the 897 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must 898 * be discarded and looked up again (e.g. via tlb_entry()). 899 */ 900 static void tlb_fill(CPUState *cpu, target_ulong addr, int size, 901 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 902 { 903 CPUClass *cc = CPU_GET_CLASS(cpu); 904 bool ok; 905 906 /* 907 * This is not a probe, so only valid return is success; failure 908 * should result in exception + longjmp to the cpu loop. 909 */ 910 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr); 911 assert(ok); 912 } 913 914 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 915 int mmu_idx, target_ulong addr, uintptr_t retaddr, 916 MMUAccessType access_type, MemOp op) 917 { 918 CPUState *cpu = env_cpu(env); 919 hwaddr mr_offset; 920 MemoryRegionSection *section; 921 MemoryRegion *mr; 922 uint64_t val; 923 bool locked = false; 924 MemTxResult r; 925 926 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 927 mr = section->mr; 928 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 929 cpu->mem_io_pc = retaddr; 930 if (!cpu->can_do_io) { 931 cpu_io_recompile(cpu, retaddr); 932 } 933 934 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 935 qemu_mutex_lock_iothread(); 936 locked = true; 937 } 938 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); 939 if (r != MEMTX_OK) { 940 hwaddr physaddr = mr_offset + 941 section->offset_within_address_space - 942 section->offset_within_region; 943 944 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, 945 mmu_idx, iotlbentry->attrs, r, retaddr); 946 } 947 if (locked) { 948 qemu_mutex_unlock_iothread(); 949 } 950 951 return val; 952 } 953 954 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 955 int mmu_idx, uint64_t val, target_ulong addr, 956 uintptr_t retaddr, MemOp op) 957 { 958 CPUState *cpu = env_cpu(env); 959 hwaddr mr_offset; 960 MemoryRegionSection *section; 961 MemoryRegion *mr; 962 bool locked = false; 963 MemTxResult r; 964 965 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 966 mr = section->mr; 967 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 968 if (!cpu->can_do_io) { 969 cpu_io_recompile(cpu, retaddr); 970 } 971 cpu->mem_io_pc = retaddr; 972 973 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 974 qemu_mutex_lock_iothread(); 975 locked = true; 976 } 977 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); 978 if (r != MEMTX_OK) { 979 hwaddr physaddr = mr_offset + 980 section->offset_within_address_space - 981 section->offset_within_region; 982 983 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), 984 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, 985 retaddr); 986 } 987 if (locked) { 988 qemu_mutex_unlock_iothread(); 989 } 990 } 991 992 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) 993 { 994 #if TCG_OVERSIZED_GUEST 995 return *(target_ulong *)((uintptr_t)entry + ofs); 996 #else 997 /* ofs might correspond to .addr_write, so use atomic_read */ 998 return atomic_read((target_ulong *)((uintptr_t)entry + ofs)); 999 #endif 1000 } 1001 1002 /* Return true if ADDR is present in the victim tlb, and has been copied 1003 back to the main tlb. */ 1004 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1005 size_t elt_ofs, target_ulong page) 1006 { 1007 size_t vidx; 1008 1009 assert_cpu_is_self(env_cpu(env)); 1010 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1011 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; 1012 target_ulong cmp; 1013 1014 /* elt_ofs might correspond to .addr_write, so use atomic_read */ 1015 #if TCG_OVERSIZED_GUEST 1016 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1017 #else 1018 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1019 #endif 1020 1021 if (cmp == page) { 1022 /* Found entry in victim tlb, swap tlb and iotlb. */ 1023 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index]; 1024 1025 qemu_spin_lock(&env_tlb(env)->c.lock); 1026 copy_tlb_helper_locked(&tmptlb, tlb); 1027 copy_tlb_helper_locked(tlb, vtlb); 1028 copy_tlb_helper_locked(vtlb, &tmptlb); 1029 qemu_spin_unlock(&env_tlb(env)->c.lock); 1030 1031 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1032 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; 1033 tmpio = *io; *io = *vio; *vio = tmpio; 1034 return true; 1035 } 1036 } 1037 return false; 1038 } 1039 1040 /* Macro to call the above, with local variables from the use context. */ 1041 #define VICTIM_TLB_HIT(TY, ADDR) \ 1042 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1043 (ADDR) & TARGET_PAGE_MASK) 1044 1045 /* 1046 * Return a ram_addr_t for the virtual address for execution. 1047 * 1048 * Return -1 if we can't translate and execute from an entire page 1049 * of RAM. This will force us to execute by loading and translating 1050 * one insn at a time, without caching. 1051 * 1052 * NOTE: This function will trigger an exception if the page is 1053 * not executable. 1054 */ 1055 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, 1056 void **hostp) 1057 { 1058 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1059 uintptr_t index = tlb_index(env, mmu_idx, addr); 1060 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1061 void *p; 1062 1063 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1064 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1065 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1066 index = tlb_index(env, mmu_idx, addr); 1067 entry = tlb_entry(env, mmu_idx, addr); 1068 1069 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { 1070 /* 1071 * The MMU protection covers a smaller range than a target 1072 * page, so we must redo the MMU check for every insn. 1073 */ 1074 return -1; 1075 } 1076 } 1077 assert(tlb_hit(entry->addr_code, addr)); 1078 } 1079 1080 if (unlikely(entry->addr_code & TLB_MMIO)) { 1081 /* The region is not backed by RAM. */ 1082 if (hostp) { 1083 *hostp = NULL; 1084 } 1085 return -1; 1086 } 1087 1088 p = (void *)((uintptr_t)addr + entry->addend); 1089 if (hostp) { 1090 *hostp = p; 1091 } 1092 return qemu_ram_addr_from_host_nofail(p); 1093 } 1094 1095 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1096 { 1097 return get_page_addr_code_hostp(env, addr, NULL); 1098 } 1099 1100 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, 1101 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) 1102 { 1103 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; 1104 1105 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); 1106 1107 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 1108 struct page_collection *pages 1109 = page_collection_lock(ram_addr, ram_addr + size); 1110 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); 1111 page_collection_unlock(pages); 1112 } 1113 1114 /* 1115 * Set both VGA and migration bits for simplicity and to remove 1116 * the notdirty callback faster. 1117 */ 1118 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); 1119 1120 /* We remove the notdirty callback only if the code has been flushed. */ 1121 if (!cpu_physical_memory_is_clean(ram_addr)) { 1122 trace_memory_notdirty_set_dirty(mem_vaddr); 1123 tlb_set_dirty(cpu, mem_vaddr); 1124 } 1125 } 1126 1127 /* 1128 * Probe for whether the specified guest access is permitted. If it is not 1129 * permitted then an exception will be taken in the same way as if this 1130 * were a real access (and we will not return). 1131 * If the size is 0 or the page requires I/O access, returns NULL; otherwise, 1132 * returns the address of the host page similar to tlb_vaddr_to_host(). 1133 */ 1134 void *probe_access(CPUArchState *env, target_ulong addr, int size, 1135 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1136 { 1137 uintptr_t index = tlb_index(env, mmu_idx, addr); 1138 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1139 target_ulong tlb_addr; 1140 size_t elt_ofs; 1141 int wp_access; 1142 1143 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 1144 1145 switch (access_type) { 1146 case MMU_DATA_LOAD: 1147 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1148 wp_access = BP_MEM_READ; 1149 break; 1150 case MMU_DATA_STORE: 1151 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1152 wp_access = BP_MEM_WRITE; 1153 break; 1154 case MMU_INST_FETCH: 1155 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1156 wp_access = BP_MEM_READ; 1157 break; 1158 default: 1159 g_assert_not_reached(); 1160 } 1161 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1162 1163 if (unlikely(!tlb_hit(tlb_addr, addr))) { 1164 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, 1165 addr & TARGET_PAGE_MASK)) { 1166 tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr); 1167 /* TLB resize via tlb_fill may have moved the entry. */ 1168 index = tlb_index(env, mmu_idx, addr); 1169 entry = tlb_entry(env, mmu_idx, addr); 1170 } 1171 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1172 } 1173 1174 if (!size) { 1175 return NULL; 1176 } 1177 1178 if (unlikely(tlb_addr & TLB_FLAGS_MASK)) { 1179 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1180 1181 /* Reject I/O access, or other required slow-path. */ 1182 if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { 1183 return NULL; 1184 } 1185 1186 /* Handle watchpoints. */ 1187 if (tlb_addr & TLB_WATCHPOINT) { 1188 cpu_check_watchpoint(env_cpu(env), addr, size, 1189 iotlbentry->attrs, wp_access, retaddr); 1190 } 1191 1192 /* Handle clean RAM pages. */ 1193 if (tlb_addr & TLB_NOTDIRTY) { 1194 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1195 } 1196 } 1197 1198 return (void *)((uintptr_t)addr + entry->addend); 1199 } 1200 1201 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, 1202 MMUAccessType access_type, int mmu_idx) 1203 { 1204 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1205 target_ulong tlb_addr, page; 1206 size_t elt_ofs; 1207 1208 switch (access_type) { 1209 case MMU_DATA_LOAD: 1210 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1211 break; 1212 case MMU_DATA_STORE: 1213 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1214 break; 1215 case MMU_INST_FETCH: 1216 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1217 break; 1218 default: 1219 g_assert_not_reached(); 1220 } 1221 1222 page = addr & TARGET_PAGE_MASK; 1223 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1224 1225 if (!tlb_hit_page(tlb_addr, page)) { 1226 uintptr_t index = tlb_index(env, mmu_idx, addr); 1227 1228 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) { 1229 CPUState *cs = env_cpu(env); 1230 CPUClass *cc = CPU_GET_CLASS(cs); 1231 1232 if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) { 1233 /* Non-faulting page table read failed. */ 1234 return NULL; 1235 } 1236 1237 /* TLB resize via tlb_fill may have moved the entry. */ 1238 entry = tlb_entry(env, mmu_idx, addr); 1239 } 1240 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1241 } 1242 1243 if (tlb_addr & ~TARGET_PAGE_MASK) { 1244 /* IO access */ 1245 return NULL; 1246 } 1247 1248 return (void *)((uintptr_t)addr + entry->addend); 1249 } 1250 1251 1252 #ifdef CONFIG_PLUGIN 1253 /* 1254 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure. 1255 * This should be a hot path as we will have just looked this path up 1256 * in the softmmu lookup code (or helper). We don't handle re-fills or 1257 * checking the victim table. This is purely informational. 1258 * 1259 * This should never fail as the memory access being instrumented 1260 * should have just filled the TLB. 1261 */ 1262 1263 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, 1264 bool is_store, struct qemu_plugin_hwaddr *data) 1265 { 1266 CPUArchState *env = cpu->env_ptr; 1267 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1268 uintptr_t index = tlb_index(env, mmu_idx, addr); 1269 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read; 1270 1271 if (likely(tlb_hit(tlb_addr, addr))) { 1272 /* We must have an iotlb entry for MMIO */ 1273 if (tlb_addr & TLB_MMIO) { 1274 CPUIOTLBEntry *iotlbentry; 1275 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1276 data->is_io = true; 1277 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 1278 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 1279 } else { 1280 data->is_io = false; 1281 data->v.ram.hostaddr = addr + tlbe->addend; 1282 } 1283 return true; 1284 } 1285 return false; 1286 } 1287 1288 #endif 1289 1290 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1291 * operations, or io operations to proceed. Return the host address. */ 1292 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1293 TCGMemOpIdx oi, uintptr_t retaddr) 1294 { 1295 size_t mmu_idx = get_mmuidx(oi); 1296 uintptr_t index = tlb_index(env, mmu_idx, addr); 1297 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1298 target_ulong tlb_addr = tlb_addr_write(tlbe); 1299 MemOp mop = get_memop(oi); 1300 int a_bits = get_alignment_bits(mop); 1301 int s_bits = mop & MO_SIZE; 1302 void *hostaddr; 1303 1304 /* Adjust the given return address. */ 1305 retaddr -= GETPC_ADJ; 1306 1307 /* Enforce guest required alignment. */ 1308 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1309 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1310 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1311 mmu_idx, retaddr); 1312 } 1313 1314 /* Enforce qemu required alignment. */ 1315 if (unlikely(addr & ((1 << s_bits) - 1))) { 1316 /* We get here if guest alignment was not requested, 1317 or was not enforced by cpu_unaligned_access above. 1318 We might widen the access and emulate, but for now 1319 mark an exception and exit the cpu loop. */ 1320 goto stop_the_world; 1321 } 1322 1323 /* Check TLB entry and enforce page permissions. */ 1324 if (!tlb_hit(tlb_addr, addr)) { 1325 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1326 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, 1327 mmu_idx, retaddr); 1328 index = tlb_index(env, mmu_idx, addr); 1329 tlbe = tlb_entry(env, mmu_idx, addr); 1330 } 1331 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1332 } 1333 1334 /* Notice an IO access or a needs-MMU-lookup access */ 1335 if (unlikely(tlb_addr & TLB_MMIO)) { 1336 /* There's really nothing that can be done to 1337 support this apart from stop-the-world. */ 1338 goto stop_the_world; 1339 } 1340 1341 /* Let the guest notice RMW on a write-only page. */ 1342 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1343 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1344 mmu_idx, retaddr); 1345 /* Since we don't support reads and writes to different addresses, 1346 and we do have the proper page loaded for write, this shouldn't 1347 ever return. But just in case, handle via stop-the-world. */ 1348 goto stop_the_world; 1349 } 1350 1351 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1352 1353 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1354 notdirty_write(env_cpu(env), addr, 1 << s_bits, 1355 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); 1356 } 1357 1358 return hostaddr; 1359 1360 stop_the_world: 1361 cpu_loop_exit_atomic(env_cpu(env), retaddr); 1362 } 1363 1364 /* 1365 * Load Helpers 1366 * 1367 * We support two different access types. SOFTMMU_CODE_ACCESS is 1368 * specifically for reading instructions from system memory. It is 1369 * called by the translation loop and in some helpers where the code 1370 * is disassembled. It shouldn't be called directly by guest code. 1371 */ 1372 1373 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, 1374 TCGMemOpIdx oi, uintptr_t retaddr); 1375 1376 static inline uint64_t QEMU_ALWAYS_INLINE 1377 load_memop(const void *haddr, MemOp op) 1378 { 1379 switch (op) { 1380 case MO_UB: 1381 return ldub_p(haddr); 1382 case MO_BEUW: 1383 return lduw_be_p(haddr); 1384 case MO_LEUW: 1385 return lduw_le_p(haddr); 1386 case MO_BEUL: 1387 return (uint32_t)ldl_be_p(haddr); 1388 case MO_LEUL: 1389 return (uint32_t)ldl_le_p(haddr); 1390 case MO_BEQ: 1391 return ldq_be_p(haddr); 1392 case MO_LEQ: 1393 return ldq_le_p(haddr); 1394 default: 1395 qemu_build_not_reached(); 1396 } 1397 } 1398 1399 static inline uint64_t QEMU_ALWAYS_INLINE 1400 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, 1401 uintptr_t retaddr, MemOp op, bool code_read, 1402 FullLoadHelper *full_load) 1403 { 1404 uintptr_t mmu_idx = get_mmuidx(oi); 1405 uintptr_t index = tlb_index(env, mmu_idx, addr); 1406 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1407 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1408 const size_t tlb_off = code_read ? 1409 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); 1410 const MMUAccessType access_type = 1411 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; 1412 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1413 void *haddr; 1414 uint64_t res; 1415 size_t size = memop_size(op); 1416 1417 /* Handle CPU specific unaligned behaviour */ 1418 if (addr & ((1 << a_bits) - 1)) { 1419 cpu_unaligned_access(env_cpu(env), addr, access_type, 1420 mmu_idx, retaddr); 1421 } 1422 1423 /* If the TLB entry is for a different page, reload and try again. */ 1424 if (!tlb_hit(tlb_addr, addr)) { 1425 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1426 addr & TARGET_PAGE_MASK)) { 1427 tlb_fill(env_cpu(env), addr, size, 1428 access_type, mmu_idx, retaddr); 1429 index = tlb_index(env, mmu_idx, addr); 1430 entry = tlb_entry(env, mmu_idx, addr); 1431 } 1432 tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1433 tlb_addr &= ~TLB_INVALID_MASK; 1434 } 1435 1436 /* Handle anything that isn't just a straight memory access. */ 1437 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1438 CPUIOTLBEntry *iotlbentry; 1439 bool need_swap; 1440 1441 /* For anything that is unaligned, recurse through full_load. */ 1442 if ((addr & (size - 1)) != 0) { 1443 goto do_unaligned_access; 1444 } 1445 1446 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1447 1448 /* Handle watchpoints. */ 1449 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1450 /* On watchpoint hit, this will longjmp out. */ 1451 cpu_check_watchpoint(env_cpu(env), addr, size, 1452 iotlbentry->attrs, BP_MEM_READ, retaddr); 1453 } 1454 1455 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1456 1457 /* Handle I/O access. */ 1458 if (likely(tlb_addr & TLB_MMIO)) { 1459 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, 1460 access_type, op ^ (need_swap * MO_BSWAP)); 1461 } 1462 1463 haddr = (void *)((uintptr_t)addr + entry->addend); 1464 1465 /* 1466 * Keep these two load_memop separate to ensure that the compiler 1467 * is able to fold the entire function to a single instruction. 1468 * There is a build-time assert inside to remind you of this. ;-) 1469 */ 1470 if (unlikely(need_swap)) { 1471 return load_memop(haddr, op ^ MO_BSWAP); 1472 } 1473 return load_memop(haddr, op); 1474 } 1475 1476 /* Handle slow unaligned access (it spans two pages or IO). */ 1477 if (size > 1 1478 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1479 >= TARGET_PAGE_SIZE)) { 1480 target_ulong addr1, addr2; 1481 uint64_t r1, r2; 1482 unsigned shift; 1483 do_unaligned_access: 1484 addr1 = addr & ~((target_ulong)size - 1); 1485 addr2 = addr1 + size; 1486 r1 = full_load(env, addr1, oi, retaddr); 1487 r2 = full_load(env, addr2, oi, retaddr); 1488 shift = (addr & (size - 1)) * 8; 1489 1490 if (memop_big_endian(op)) { 1491 /* Big-endian combine. */ 1492 res = (r1 << shift) | (r2 >> ((size * 8) - shift)); 1493 } else { 1494 /* Little-endian combine. */ 1495 res = (r1 >> shift) | (r2 << ((size * 8) - shift)); 1496 } 1497 return res & MAKE_64BIT_MASK(0, size * 8); 1498 } 1499 1500 haddr = (void *)((uintptr_t)addr + entry->addend); 1501 return load_memop(haddr, op); 1502 } 1503 1504 /* 1505 * For the benefit of TCG generated code, we want to avoid the 1506 * complication of ABI-specific return type promotion and always 1507 * return a value extended to the register size of the host. This is 1508 * tcg_target_long, except in the case of a 32-bit host and 64-bit 1509 * data, and for that we always have uint64_t. 1510 * 1511 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. 1512 */ 1513 1514 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, 1515 TCGMemOpIdx oi, uintptr_t retaddr) 1516 { 1517 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); 1518 } 1519 1520 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, 1521 TCGMemOpIdx oi, uintptr_t retaddr) 1522 { 1523 return full_ldub_mmu(env, addr, oi, retaddr); 1524 } 1525 1526 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1527 TCGMemOpIdx oi, uintptr_t retaddr) 1528 { 1529 return load_helper(env, addr, oi, retaddr, MO_LEUW, false, 1530 full_le_lduw_mmu); 1531 } 1532 1533 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1534 TCGMemOpIdx oi, uintptr_t retaddr) 1535 { 1536 return full_le_lduw_mmu(env, addr, oi, retaddr); 1537 } 1538 1539 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1540 TCGMemOpIdx oi, uintptr_t retaddr) 1541 { 1542 return load_helper(env, addr, oi, retaddr, MO_BEUW, false, 1543 full_be_lduw_mmu); 1544 } 1545 1546 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1547 TCGMemOpIdx oi, uintptr_t retaddr) 1548 { 1549 return full_be_lduw_mmu(env, addr, oi, retaddr); 1550 } 1551 1552 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1553 TCGMemOpIdx oi, uintptr_t retaddr) 1554 { 1555 return load_helper(env, addr, oi, retaddr, MO_LEUL, false, 1556 full_le_ldul_mmu); 1557 } 1558 1559 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1560 TCGMemOpIdx oi, uintptr_t retaddr) 1561 { 1562 return full_le_ldul_mmu(env, addr, oi, retaddr); 1563 } 1564 1565 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1566 TCGMemOpIdx oi, uintptr_t retaddr) 1567 { 1568 return load_helper(env, addr, oi, retaddr, MO_BEUL, false, 1569 full_be_ldul_mmu); 1570 } 1571 1572 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1573 TCGMemOpIdx oi, uintptr_t retaddr) 1574 { 1575 return full_be_ldul_mmu(env, addr, oi, retaddr); 1576 } 1577 1578 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, 1579 TCGMemOpIdx oi, uintptr_t retaddr) 1580 { 1581 return load_helper(env, addr, oi, retaddr, MO_LEQ, false, 1582 helper_le_ldq_mmu); 1583 } 1584 1585 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, 1586 TCGMemOpIdx oi, uintptr_t retaddr) 1587 { 1588 return load_helper(env, addr, oi, retaddr, MO_BEQ, false, 1589 helper_be_ldq_mmu); 1590 } 1591 1592 /* 1593 * Provide signed versions of the load routines as well. We can of course 1594 * avoid this for 64-bit data, or for 32-bit data on 32-bit host. 1595 */ 1596 1597 1598 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, 1599 TCGMemOpIdx oi, uintptr_t retaddr) 1600 { 1601 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); 1602 } 1603 1604 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, 1605 TCGMemOpIdx oi, uintptr_t retaddr) 1606 { 1607 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); 1608 } 1609 1610 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, 1611 TCGMemOpIdx oi, uintptr_t retaddr) 1612 { 1613 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); 1614 } 1615 1616 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, 1617 TCGMemOpIdx oi, uintptr_t retaddr) 1618 { 1619 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); 1620 } 1621 1622 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, 1623 TCGMemOpIdx oi, uintptr_t retaddr) 1624 { 1625 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); 1626 } 1627 1628 /* 1629 * Store Helpers 1630 */ 1631 1632 static inline void QEMU_ALWAYS_INLINE 1633 store_memop(void *haddr, uint64_t val, MemOp op) 1634 { 1635 switch (op) { 1636 case MO_UB: 1637 stb_p(haddr, val); 1638 break; 1639 case MO_BEUW: 1640 stw_be_p(haddr, val); 1641 break; 1642 case MO_LEUW: 1643 stw_le_p(haddr, val); 1644 break; 1645 case MO_BEUL: 1646 stl_be_p(haddr, val); 1647 break; 1648 case MO_LEUL: 1649 stl_le_p(haddr, val); 1650 break; 1651 case MO_BEQ: 1652 stq_be_p(haddr, val); 1653 break; 1654 case MO_LEQ: 1655 stq_le_p(haddr, val); 1656 break; 1657 default: 1658 qemu_build_not_reached(); 1659 } 1660 } 1661 1662 static inline void QEMU_ALWAYS_INLINE 1663 store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 1664 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) 1665 { 1666 uintptr_t mmu_idx = get_mmuidx(oi); 1667 uintptr_t index = tlb_index(env, mmu_idx, addr); 1668 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1669 target_ulong tlb_addr = tlb_addr_write(entry); 1670 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 1671 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1672 void *haddr; 1673 size_t size = memop_size(op); 1674 1675 /* Handle CPU specific unaligned behaviour */ 1676 if (addr & ((1 << a_bits) - 1)) { 1677 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1678 mmu_idx, retaddr); 1679 } 1680 1681 /* If the TLB entry is for a different page, reload and try again. */ 1682 if (!tlb_hit(tlb_addr, addr)) { 1683 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1684 addr & TARGET_PAGE_MASK)) { 1685 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, 1686 mmu_idx, retaddr); 1687 index = tlb_index(env, mmu_idx, addr); 1688 entry = tlb_entry(env, mmu_idx, addr); 1689 } 1690 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; 1691 } 1692 1693 /* Handle anything that isn't just a straight memory access. */ 1694 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1695 CPUIOTLBEntry *iotlbentry; 1696 bool need_swap; 1697 1698 /* For anything that is unaligned, recurse through byte stores. */ 1699 if ((addr & (size - 1)) != 0) { 1700 goto do_unaligned_access; 1701 } 1702 1703 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1704 1705 /* Handle watchpoints. */ 1706 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1707 /* On watchpoint hit, this will longjmp out. */ 1708 cpu_check_watchpoint(env_cpu(env), addr, size, 1709 iotlbentry->attrs, BP_MEM_WRITE, retaddr); 1710 } 1711 1712 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1713 1714 /* Handle I/O access. */ 1715 if (tlb_addr & TLB_MMIO) { 1716 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, 1717 op ^ (need_swap * MO_BSWAP)); 1718 return; 1719 } 1720 1721 /* Ignore writes to ROM. */ 1722 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { 1723 return; 1724 } 1725 1726 /* Handle clean RAM pages. */ 1727 if (tlb_addr & TLB_NOTDIRTY) { 1728 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1729 } 1730 1731 haddr = (void *)((uintptr_t)addr + entry->addend); 1732 1733 /* 1734 * Keep these two store_memop separate to ensure that the compiler 1735 * is able to fold the entire function to a single instruction. 1736 * There is a build-time assert inside to remind you of this. ;-) 1737 */ 1738 if (unlikely(need_swap)) { 1739 store_memop(haddr, val, op ^ MO_BSWAP); 1740 } else { 1741 store_memop(haddr, val, op); 1742 } 1743 return; 1744 } 1745 1746 /* Handle slow unaligned access (it spans two pages or IO). */ 1747 if (size > 1 1748 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1749 >= TARGET_PAGE_SIZE)) { 1750 int i; 1751 uintptr_t index2; 1752 CPUTLBEntry *entry2; 1753 target_ulong page2, tlb_addr2; 1754 size_t size2; 1755 1756 do_unaligned_access: 1757 /* 1758 * Ensure the second page is in the TLB. Note that the first page 1759 * is already guaranteed to be filled, and that the second page 1760 * cannot evict the first. 1761 */ 1762 page2 = (addr + size) & TARGET_PAGE_MASK; 1763 size2 = (addr + size) & ~TARGET_PAGE_MASK; 1764 index2 = tlb_index(env, mmu_idx, page2); 1765 entry2 = tlb_entry(env, mmu_idx, page2); 1766 tlb_addr2 = tlb_addr_write(entry2); 1767 if (!tlb_hit_page(tlb_addr2, page2)) { 1768 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { 1769 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, 1770 mmu_idx, retaddr); 1771 index2 = tlb_index(env, mmu_idx, page2); 1772 entry2 = tlb_entry(env, mmu_idx, page2); 1773 } 1774 tlb_addr2 = tlb_addr_write(entry2); 1775 } 1776 1777 /* 1778 * Handle watchpoints. Since this may trap, all checks 1779 * must happen before any store. 1780 */ 1781 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1782 cpu_check_watchpoint(env_cpu(env), addr, size - size2, 1783 env_tlb(env)->d[mmu_idx].iotlb[index].attrs, 1784 BP_MEM_WRITE, retaddr); 1785 } 1786 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { 1787 cpu_check_watchpoint(env_cpu(env), page2, size2, 1788 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, 1789 BP_MEM_WRITE, retaddr); 1790 } 1791 1792 /* 1793 * XXX: not efficient, but simple. 1794 * This loop must go in the forward direction to avoid issues 1795 * with self-modifying code in Windows 64-bit. 1796 */ 1797 for (i = 0; i < size; ++i) { 1798 uint8_t val8; 1799 if (memop_big_endian(op)) { 1800 /* Big-endian extract. */ 1801 val8 = val >> (((size - 1) * 8) - (i * 8)); 1802 } else { 1803 /* Little-endian extract. */ 1804 val8 = val >> (i * 8); 1805 } 1806 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 1807 } 1808 return; 1809 } 1810 1811 haddr = (void *)((uintptr_t)addr + entry->addend); 1812 store_memop(haddr, val, op); 1813 } 1814 1815 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, 1816 TCGMemOpIdx oi, uintptr_t retaddr) 1817 { 1818 store_helper(env, addr, val, oi, retaddr, MO_UB); 1819 } 1820 1821 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 1822 TCGMemOpIdx oi, uintptr_t retaddr) 1823 { 1824 store_helper(env, addr, val, oi, retaddr, MO_LEUW); 1825 } 1826 1827 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 1828 TCGMemOpIdx oi, uintptr_t retaddr) 1829 { 1830 store_helper(env, addr, val, oi, retaddr, MO_BEUW); 1831 } 1832 1833 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 1834 TCGMemOpIdx oi, uintptr_t retaddr) 1835 { 1836 store_helper(env, addr, val, oi, retaddr, MO_LEUL); 1837 } 1838 1839 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 1840 TCGMemOpIdx oi, uintptr_t retaddr) 1841 { 1842 store_helper(env, addr, val, oi, retaddr, MO_BEUL); 1843 } 1844 1845 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 1846 TCGMemOpIdx oi, uintptr_t retaddr) 1847 { 1848 store_helper(env, addr, val, oi, retaddr, MO_LEQ); 1849 } 1850 1851 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 1852 TCGMemOpIdx oi, uintptr_t retaddr) 1853 { 1854 store_helper(env, addr, val, oi, retaddr, MO_BEQ); 1855 } 1856 1857 /* First set of helpers allows passing in of OI and RETADDR. This makes 1858 them callable from other helpers. */ 1859 1860 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 1861 #define ATOMIC_NAME(X) \ 1862 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 1863 #define ATOMIC_MMU_DECLS 1864 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) 1865 #define ATOMIC_MMU_CLEANUP 1866 #define ATOMIC_MMU_IDX get_mmuidx(oi) 1867 1868 #include "atomic_common.inc.c" 1869 1870 #define DATA_SIZE 1 1871 #include "atomic_template.h" 1872 1873 #define DATA_SIZE 2 1874 #include "atomic_template.h" 1875 1876 #define DATA_SIZE 4 1877 #include "atomic_template.h" 1878 1879 #ifdef CONFIG_ATOMIC64 1880 #define DATA_SIZE 8 1881 #include "atomic_template.h" 1882 #endif 1883 1884 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 1885 #define DATA_SIZE 16 1886 #include "atomic_template.h" 1887 #endif 1888 1889 /* Second set of helpers are directly callable from TCG as helpers. */ 1890 1891 #undef EXTRA_ARGS 1892 #undef ATOMIC_NAME 1893 #undef ATOMIC_MMU_LOOKUP 1894 #define EXTRA_ARGS , TCGMemOpIdx oi 1895 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 1896 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) 1897 1898 #define DATA_SIZE 1 1899 #include "atomic_template.h" 1900 1901 #define DATA_SIZE 2 1902 #include "atomic_template.h" 1903 1904 #define DATA_SIZE 4 1905 #include "atomic_template.h" 1906 1907 #ifdef CONFIG_ATOMIC64 1908 #define DATA_SIZE 8 1909 #include "atomic_template.h" 1910 #endif 1911 #undef ATOMIC_MMU_IDX 1912 1913 /* Code access functions. */ 1914 1915 static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr, 1916 TCGMemOpIdx oi, uintptr_t retaddr) 1917 { 1918 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_cmmu); 1919 } 1920 1921 uint8_t helper_ret_ldub_cmmu(CPUArchState *env, target_ulong addr, 1922 TCGMemOpIdx oi, uintptr_t retaddr) 1923 { 1924 return full_ldub_cmmu(env, addr, oi, retaddr); 1925 } 1926 1927 int8_t helper_ret_ldsb_cmmu(CPUArchState *env, target_ulong addr, 1928 TCGMemOpIdx oi, uintptr_t retaddr) 1929 { 1930 return (int8_t) full_ldub_cmmu(env, addr, oi, retaddr); 1931 } 1932 1933 static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr, 1934 TCGMemOpIdx oi, uintptr_t retaddr) 1935 { 1936 return load_helper(env, addr, oi, retaddr, MO_LEUW, true, 1937 full_le_lduw_cmmu); 1938 } 1939 1940 uint16_t helper_le_lduw_cmmu(CPUArchState *env, target_ulong addr, 1941 TCGMemOpIdx oi, uintptr_t retaddr) 1942 { 1943 return full_le_lduw_cmmu(env, addr, oi, retaddr); 1944 } 1945 1946 int16_t helper_le_ldsw_cmmu(CPUArchState *env, target_ulong addr, 1947 TCGMemOpIdx oi, uintptr_t retaddr) 1948 { 1949 return (int16_t) full_le_lduw_cmmu(env, addr, oi, retaddr); 1950 } 1951 1952 static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr, 1953 TCGMemOpIdx oi, uintptr_t retaddr) 1954 { 1955 return load_helper(env, addr, oi, retaddr, MO_BEUW, true, 1956 full_be_lduw_cmmu); 1957 } 1958 1959 uint16_t helper_be_lduw_cmmu(CPUArchState *env, target_ulong addr, 1960 TCGMemOpIdx oi, uintptr_t retaddr) 1961 { 1962 return full_be_lduw_cmmu(env, addr, oi, retaddr); 1963 } 1964 1965 int16_t helper_be_ldsw_cmmu(CPUArchState *env, target_ulong addr, 1966 TCGMemOpIdx oi, uintptr_t retaddr) 1967 { 1968 return (int16_t) full_be_lduw_cmmu(env, addr, oi, retaddr); 1969 } 1970 1971 static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr, 1972 TCGMemOpIdx oi, uintptr_t retaddr) 1973 { 1974 return load_helper(env, addr, oi, retaddr, MO_LEUL, true, 1975 full_le_ldul_cmmu); 1976 } 1977 1978 uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr, 1979 TCGMemOpIdx oi, uintptr_t retaddr) 1980 { 1981 return full_le_ldul_cmmu(env, addr, oi, retaddr); 1982 } 1983 1984 static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr, 1985 TCGMemOpIdx oi, uintptr_t retaddr) 1986 { 1987 return load_helper(env, addr, oi, retaddr, MO_BEUL, true, 1988 full_be_ldul_cmmu); 1989 } 1990 1991 uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr, 1992 TCGMemOpIdx oi, uintptr_t retaddr) 1993 { 1994 return full_be_ldul_cmmu(env, addr, oi, retaddr); 1995 } 1996 1997 uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr, 1998 TCGMemOpIdx oi, uintptr_t retaddr) 1999 { 2000 return load_helper(env, addr, oi, retaddr, MO_LEQ, true, 2001 helper_le_ldq_cmmu); 2002 } 2003 2004 uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr, 2005 TCGMemOpIdx oi, uintptr_t retaddr) 2006 { 2007 return load_helper(env, addr, oi, retaddr, MO_BEQ, true, 2008 helper_be_ldq_cmmu); 2009 } 2010