1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/address-spaces.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/cputlb.h" 28 #include "exec/memory-internal.h" 29 #include "exec/ram_addr.h" 30 #include "tcg/tcg.h" 31 #include "qemu/error-report.h" 32 #include "exec/log.h" 33 #include "exec/helper-proto.h" 34 #include "qemu/atomic.h" 35 #include "qemu/atomic128.h" 36 #include "translate-all.h" 37 38 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 39 /* #define DEBUG_TLB */ 40 /* #define DEBUG_TLB_LOG */ 41 42 #ifdef DEBUG_TLB 43 # define DEBUG_TLB_GATE 1 44 # ifdef DEBUG_TLB_LOG 45 # define DEBUG_TLB_LOG_GATE 1 46 # else 47 # define DEBUG_TLB_LOG_GATE 0 48 # endif 49 #else 50 # define DEBUG_TLB_GATE 0 51 # define DEBUG_TLB_LOG_GATE 0 52 #endif 53 54 #define tlb_debug(fmt, ...) do { \ 55 if (DEBUG_TLB_LOG_GATE) { \ 56 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 57 ## __VA_ARGS__); \ 58 } else if (DEBUG_TLB_GATE) { \ 59 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 60 } \ 61 } while (0) 62 63 #define assert_cpu_is_self(cpu) do { \ 64 if (DEBUG_TLB_GATE) { \ 65 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 66 } \ 67 } while (0) 68 69 /* run_on_cpu_data.target_ptr should always be big enough for a 70 * target_ulong even on 32 bit builds */ 71 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 72 73 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 74 */ 75 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 76 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 77 78 static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx) 79 { 80 return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS); 81 } 82 83 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, 84 size_t max_entries) 85 { 86 desc->window_begin_ns = ns; 87 desc->window_max_entries = max_entries; 88 } 89 90 static void tlb_dyn_init(CPUArchState *env) 91 { 92 int i; 93 94 for (i = 0; i < NB_MMU_MODES; i++) { 95 CPUTLBDesc *desc = &env_tlb(env)->d[i]; 96 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 97 98 tlb_window_reset(desc, get_clock_realtime(), 0); 99 desc->n_used_entries = 0; 100 env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 101 env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries); 102 env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries); 103 } 104 } 105 106 /** 107 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 108 * @env: CPU that owns the TLB 109 * @mmu_idx: MMU index of the TLB 110 * 111 * Called with tlb_lock_held. 112 * 113 * We have two main constraints when resizing a TLB: (1) we only resize it 114 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 115 * the array or unnecessarily flushing it), which means we do not control how 116 * frequently the resizing can occur; (2) we don't have access to the guest's 117 * future scheduling decisions, and therefore have to decide the magnitude of 118 * the resize based on past observations. 119 * 120 * In general, a memory-hungry process can benefit greatly from an appropriately 121 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 122 * we just have to make the TLB as large as possible; while an oversized TLB 123 * results in minimal TLB miss rates, it also takes longer to be flushed 124 * (flushes can be _very_ frequent), and the reduced locality can also hurt 125 * performance. 126 * 127 * To achieve near-optimal performance for all kinds of workloads, we: 128 * 129 * 1. Aggressively increase the size of the TLB when the use rate of the 130 * TLB being flushed is high, since it is likely that in the near future this 131 * memory-hungry process will execute again, and its memory hungriness will 132 * probably be similar. 133 * 134 * 2. Slowly reduce the size of the TLB as the use rate declines over a 135 * reasonably large time window. The rationale is that if in such a time window 136 * we have not observed a high TLB use rate, it is likely that we won't observe 137 * it in the near future. In that case, once a time window expires we downsize 138 * the TLB to match the maximum use rate observed in the window. 139 * 140 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 141 * since in that range performance is likely near-optimal. Recall that the TLB 142 * is direct mapped, so we want the use rate to be low (or at least not too 143 * high), since otherwise we are likely to have a significant amount of 144 * conflict misses. 145 */ 146 static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) 147 { 148 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; 149 size_t old_size = tlb_n_entries(env, mmu_idx); 150 size_t rate; 151 size_t new_size = old_size; 152 int64_t now = get_clock_realtime(); 153 int64_t window_len_ms = 100; 154 int64_t window_len_ns = window_len_ms * 1000 * 1000; 155 bool window_expired = now > desc->window_begin_ns + window_len_ns; 156 157 if (desc->n_used_entries > desc->window_max_entries) { 158 desc->window_max_entries = desc->n_used_entries; 159 } 160 rate = desc->window_max_entries * 100 / old_size; 161 162 if (rate > 70) { 163 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 164 } else if (rate < 30 && window_expired) { 165 size_t ceil = pow2ceil(desc->window_max_entries); 166 size_t expected_rate = desc->window_max_entries * 100 / ceil; 167 168 /* 169 * Avoid undersizing when the max number of entries seen is just below 170 * a pow2. For instance, if max_entries == 1025, the expected use rate 171 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 172 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 173 * later. Thus, make sure that the expected use rate remains below 70%. 174 * (and since we double the size, that means the lowest rate we'd 175 * expect to get is 35%, which is still in the 30-70% range where 176 * we consider that the size is appropriate.) 177 */ 178 if (expected_rate > 70) { 179 ceil *= 2; 180 } 181 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 182 } 183 184 if (new_size == old_size) { 185 if (window_expired) { 186 tlb_window_reset(desc, now, desc->n_used_entries); 187 } 188 return; 189 } 190 191 g_free(env_tlb(env)->f[mmu_idx].table); 192 g_free(env_tlb(env)->d[mmu_idx].iotlb); 193 194 tlb_window_reset(desc, now, 0); 195 /* desc->n_used_entries is cleared by the caller */ 196 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 197 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); 198 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); 199 /* 200 * If the allocations fail, try smaller sizes. We just freed some 201 * memory, so going back to half of new_size has a good chance of working. 202 * Increased memory pressure elsewhere in the system might cause the 203 * allocations to fail though, so we progressively reduce the allocation 204 * size, aborting if we cannot even allocate the smallest TLB we support. 205 */ 206 while (env_tlb(env)->f[mmu_idx].table == NULL || 207 env_tlb(env)->d[mmu_idx].iotlb == NULL) { 208 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 209 error_report("%s: %s", __func__, strerror(errno)); 210 abort(); 211 } 212 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 213 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; 214 215 g_free(env_tlb(env)->f[mmu_idx].table); 216 g_free(env_tlb(env)->d[mmu_idx].iotlb); 217 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); 218 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); 219 } 220 } 221 222 static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx) 223 { 224 tlb_mmu_resize_locked(env, mmu_idx); 225 memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx)); 226 env_tlb(env)->d[mmu_idx].n_used_entries = 0; 227 } 228 229 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 230 { 231 env_tlb(env)->d[mmu_idx].n_used_entries++; 232 } 233 234 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 235 { 236 env_tlb(env)->d[mmu_idx].n_used_entries--; 237 } 238 239 void tlb_init(CPUState *cpu) 240 { 241 CPUArchState *env = cpu->env_ptr; 242 243 qemu_spin_init(&env_tlb(env)->c.lock); 244 245 /* Ensure that cpu_reset performs a full flush. */ 246 env_tlb(env)->c.dirty = ALL_MMUIDX_BITS; 247 248 tlb_dyn_init(env); 249 } 250 251 /* flush_all_helper: run fn across all cpus 252 * 253 * If the wait flag is set then the src cpu's helper will be queued as 254 * "safe" work and the loop exited creating a synchronisation point 255 * where all queued work will be finished before execution starts 256 * again. 257 */ 258 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 259 run_on_cpu_data d) 260 { 261 CPUState *cpu; 262 263 CPU_FOREACH(cpu) { 264 if (cpu != src) { 265 async_run_on_cpu(cpu, fn, d); 266 } 267 } 268 } 269 270 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 271 { 272 CPUState *cpu; 273 size_t full = 0, part = 0, elide = 0; 274 275 CPU_FOREACH(cpu) { 276 CPUArchState *env = cpu->env_ptr; 277 278 full += atomic_read(&env_tlb(env)->c.full_flush_count); 279 part += atomic_read(&env_tlb(env)->c.part_flush_count); 280 elide += atomic_read(&env_tlb(env)->c.elide_flush_count); 281 } 282 *pfull = full; 283 *ppart = part; 284 *pelide = elide; 285 } 286 287 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) 288 { 289 tlb_table_flush_by_mmuidx(env, mmu_idx); 290 env_tlb(env)->d[mmu_idx].large_page_addr = -1; 291 env_tlb(env)->d[mmu_idx].large_page_mask = -1; 292 env_tlb(env)->d[mmu_idx].vindex = 0; 293 memset(env_tlb(env)->d[mmu_idx].vtable, -1, 294 sizeof(env_tlb(env)->d[0].vtable)); 295 } 296 297 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 298 { 299 CPUArchState *env = cpu->env_ptr; 300 uint16_t asked = data.host_int; 301 uint16_t all_dirty, work, to_clean; 302 303 assert_cpu_is_self(cpu); 304 305 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 306 307 qemu_spin_lock(&env_tlb(env)->c.lock); 308 309 all_dirty = env_tlb(env)->c.dirty; 310 to_clean = asked & all_dirty; 311 all_dirty &= ~to_clean; 312 env_tlb(env)->c.dirty = all_dirty; 313 314 for (work = to_clean; work != 0; work &= work - 1) { 315 int mmu_idx = ctz32(work); 316 tlb_flush_one_mmuidx_locked(env, mmu_idx); 317 } 318 319 qemu_spin_unlock(&env_tlb(env)->c.lock); 320 321 cpu_tb_jmp_cache_clear(cpu); 322 323 if (to_clean == ALL_MMUIDX_BITS) { 324 atomic_set(&env_tlb(env)->c.full_flush_count, 325 env_tlb(env)->c.full_flush_count + 1); 326 } else { 327 atomic_set(&env_tlb(env)->c.part_flush_count, 328 env_tlb(env)->c.part_flush_count + ctpop16(to_clean)); 329 if (to_clean != asked) { 330 atomic_set(&env_tlb(env)->c.elide_flush_count, 331 env_tlb(env)->c.elide_flush_count + 332 ctpop16(asked & ~to_clean)); 333 } 334 } 335 } 336 337 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 338 { 339 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 340 341 if (cpu->created && !qemu_cpu_is_self(cpu)) { 342 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 343 RUN_ON_CPU_HOST_INT(idxmap)); 344 } else { 345 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 346 } 347 } 348 349 void tlb_flush(CPUState *cpu) 350 { 351 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 352 } 353 354 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 355 { 356 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 357 358 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 359 360 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 361 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 362 } 363 364 void tlb_flush_all_cpus(CPUState *src_cpu) 365 { 366 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 367 } 368 369 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 370 { 371 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 372 373 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 374 375 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 376 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 377 } 378 379 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 380 { 381 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 382 } 383 384 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 385 target_ulong page) 386 { 387 return tlb_hit_page(tlb_entry->addr_read, page) || 388 tlb_hit_page(tlb_addr_write(tlb_entry), page) || 389 tlb_hit_page(tlb_entry->addr_code, page); 390 } 391 392 /** 393 * tlb_entry_is_empty - return true if the entry is not in use 394 * @te: pointer to CPUTLBEntry 395 */ 396 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 397 { 398 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 399 } 400 401 /* Called with tlb_c.lock held */ 402 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 403 target_ulong page) 404 { 405 if (tlb_hit_page_anyprot(tlb_entry, page)) { 406 memset(tlb_entry, -1, sizeof(*tlb_entry)); 407 return true; 408 } 409 return false; 410 } 411 412 /* Called with tlb_c.lock held */ 413 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 414 target_ulong page) 415 { 416 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx]; 417 int k; 418 419 assert_cpu_is_self(env_cpu(env)); 420 for (k = 0; k < CPU_VTLB_SIZE; k++) { 421 if (tlb_flush_entry_locked(&d->vtable[k], page)) { 422 tlb_n_used_entries_dec(env, mmu_idx); 423 } 424 } 425 } 426 427 static void tlb_flush_page_locked(CPUArchState *env, int midx, 428 target_ulong page) 429 { 430 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr; 431 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask; 432 433 /* Check if we need to flush due to large pages. */ 434 if ((page & lp_mask) == lp_addr) { 435 tlb_debug("forcing full flush midx %d (" 436 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 437 midx, lp_addr, lp_mask); 438 tlb_flush_one_mmuidx_locked(env, midx); 439 } else { 440 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 441 tlb_n_used_entries_dec(env, midx); 442 } 443 tlb_flush_vtlb_page_locked(env, midx, page); 444 } 445 } 446 447 /* As we are going to hijack the bottom bits of the page address for a 448 * mmuidx bit mask we need to fail to build if we can't do that 449 */ 450 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN); 451 452 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, 453 run_on_cpu_data data) 454 { 455 CPUArchState *env = cpu->env_ptr; 456 target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr; 457 target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK; 458 unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS; 459 int mmu_idx; 460 461 assert_cpu_is_self(cpu); 462 463 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n", 464 addr, mmu_idx_bitmap); 465 466 qemu_spin_lock(&env_tlb(env)->c.lock); 467 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 468 if (test_bit(mmu_idx, &mmu_idx_bitmap)) { 469 tlb_flush_page_locked(env, mmu_idx, addr); 470 } 471 } 472 qemu_spin_unlock(&env_tlb(env)->c.lock); 473 474 tb_flush_jmp_cache(cpu, addr); 475 } 476 477 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 478 { 479 target_ulong addr_and_mmu_idx; 480 481 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 482 483 /* This should already be page aligned */ 484 addr_and_mmu_idx = addr & TARGET_PAGE_MASK; 485 addr_and_mmu_idx |= idxmap; 486 487 if (!qemu_cpu_is_self(cpu)) { 488 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work, 489 RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 490 } else { 491 tlb_flush_page_by_mmuidx_async_work( 492 cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 493 } 494 } 495 496 void tlb_flush_page(CPUState *cpu, target_ulong addr) 497 { 498 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 499 } 500 501 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 502 uint16_t idxmap) 503 { 504 const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work; 505 target_ulong addr_and_mmu_idx; 506 507 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 508 509 /* This should already be page aligned */ 510 addr_and_mmu_idx = addr & TARGET_PAGE_MASK; 511 addr_and_mmu_idx |= idxmap; 512 513 flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 514 fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 515 } 516 517 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 518 { 519 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 520 } 521 522 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 523 target_ulong addr, 524 uint16_t idxmap) 525 { 526 const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work; 527 target_ulong addr_and_mmu_idx; 528 529 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 530 531 /* This should already be page aligned */ 532 addr_and_mmu_idx = addr & TARGET_PAGE_MASK; 533 addr_and_mmu_idx |= idxmap; 534 535 flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 536 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 537 } 538 539 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 540 { 541 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 542 } 543 544 /* update the TLBs so that writes to code in the virtual page 'addr' 545 can be detected */ 546 void tlb_protect_code(ram_addr_t ram_addr) 547 { 548 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 549 DIRTY_MEMORY_CODE); 550 } 551 552 /* update the TLB so that writes in physical page 'phys_addr' are no longer 553 tested for self modifying code */ 554 void tlb_unprotect_code(ram_addr_t ram_addr) 555 { 556 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 557 } 558 559 560 /* 561 * Dirty write flag handling 562 * 563 * When the TCG code writes to a location it looks up the address in 564 * the TLB and uses that data to compute the final address. If any of 565 * the lower bits of the address are set then the slow path is forced. 566 * There are a number of reasons to do this but for normal RAM the 567 * most usual is detecting writes to code regions which may invalidate 568 * generated code. 569 * 570 * Other vCPUs might be reading their TLBs during guest execution, so we update 571 * te->addr_write with atomic_set. We don't need to worry about this for 572 * oversized guests as MTTCG is disabled for them. 573 * 574 * Called with tlb_c.lock held. 575 */ 576 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 577 uintptr_t start, uintptr_t length) 578 { 579 uintptr_t addr = tlb_entry->addr_write; 580 581 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | 582 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) { 583 addr &= TARGET_PAGE_MASK; 584 addr += tlb_entry->addend; 585 if ((addr - start) < length) { 586 #if TCG_OVERSIZED_GUEST 587 tlb_entry->addr_write |= TLB_NOTDIRTY; 588 #else 589 atomic_set(&tlb_entry->addr_write, 590 tlb_entry->addr_write | TLB_NOTDIRTY); 591 #endif 592 } 593 } 594 } 595 596 /* 597 * Called with tlb_c.lock held. 598 * Called only from the vCPU context, i.e. the TLB's owner thread. 599 */ 600 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 601 { 602 *d = *s; 603 } 604 605 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 606 * the target vCPU). 607 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 608 * thing actually updated is the target TLB entry ->addr_write flags. 609 */ 610 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 611 { 612 CPUArchState *env; 613 614 int mmu_idx; 615 616 env = cpu->env_ptr; 617 qemu_spin_lock(&env_tlb(env)->c.lock); 618 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 619 unsigned int i; 620 unsigned int n = tlb_n_entries(env, mmu_idx); 621 622 for (i = 0; i < n; i++) { 623 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], 624 start1, length); 625 } 626 627 for (i = 0; i < CPU_VTLB_SIZE; i++) { 628 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i], 629 start1, length); 630 } 631 } 632 qemu_spin_unlock(&env_tlb(env)->c.lock); 633 } 634 635 /* Called with tlb_c.lock held */ 636 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 637 target_ulong vaddr) 638 { 639 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 640 tlb_entry->addr_write = vaddr; 641 } 642 } 643 644 /* update the TLB corresponding to virtual page vaddr 645 so that it is no longer dirty */ 646 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 647 { 648 CPUArchState *env = cpu->env_ptr; 649 int mmu_idx; 650 651 assert_cpu_is_self(cpu); 652 653 vaddr &= TARGET_PAGE_MASK; 654 qemu_spin_lock(&env_tlb(env)->c.lock); 655 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 656 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 657 } 658 659 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 660 int k; 661 for (k = 0; k < CPU_VTLB_SIZE; k++) { 662 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr); 663 } 664 } 665 qemu_spin_unlock(&env_tlb(env)->c.lock); 666 } 667 668 /* Our TLB does not support large pages, so remember the area covered by 669 large pages and trigger a full TLB flush if these are invalidated. */ 670 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 671 target_ulong vaddr, target_ulong size) 672 { 673 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr; 674 target_ulong lp_mask = ~(size - 1); 675 676 if (lp_addr == (target_ulong)-1) { 677 /* No previous large page. */ 678 lp_addr = vaddr; 679 } else { 680 /* Extend the existing region to include the new page. 681 This is a compromise between unnecessary flushes and 682 the cost of maintaining a full variable size TLB. */ 683 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask; 684 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 685 lp_mask <<= 1; 686 } 687 } 688 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask; 689 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask; 690 } 691 692 /* Add a new TLB entry. At most one entry for a given virtual address 693 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 694 * supplied size is only used by tlb_flush_page. 695 * 696 * Called from TCG-generated code, which is under an RCU read-side 697 * critical section. 698 */ 699 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 700 hwaddr paddr, MemTxAttrs attrs, int prot, 701 int mmu_idx, target_ulong size) 702 { 703 CPUArchState *env = cpu->env_ptr; 704 CPUTLB *tlb = env_tlb(env); 705 CPUTLBDesc *desc = &tlb->d[mmu_idx]; 706 MemoryRegionSection *section; 707 unsigned int index; 708 target_ulong address; 709 target_ulong write_address; 710 uintptr_t addend; 711 CPUTLBEntry *te, tn; 712 hwaddr iotlb, xlat, sz, paddr_page; 713 target_ulong vaddr_page; 714 int asidx = cpu_asidx_from_attrs(cpu, attrs); 715 int wp_flags; 716 bool is_ram, is_romd; 717 718 assert_cpu_is_self(cpu); 719 720 if (size <= TARGET_PAGE_SIZE) { 721 sz = TARGET_PAGE_SIZE; 722 } else { 723 tlb_add_large_page(env, mmu_idx, vaddr, size); 724 sz = size; 725 } 726 vaddr_page = vaddr & TARGET_PAGE_MASK; 727 paddr_page = paddr & TARGET_PAGE_MASK; 728 729 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 730 &xlat, &sz, attrs, &prot); 731 assert(sz >= TARGET_PAGE_SIZE); 732 733 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 734 " prot=%x idx=%d\n", 735 vaddr, paddr, prot, mmu_idx); 736 737 address = vaddr_page; 738 if (size < TARGET_PAGE_SIZE) { 739 /* Repeat the MMU check and TLB fill on every access. */ 740 address |= TLB_INVALID_MASK; 741 } 742 if (attrs.byte_swap) { 743 address |= TLB_BSWAP; 744 } 745 746 is_ram = memory_region_is_ram(section->mr); 747 is_romd = memory_region_is_romd(section->mr); 748 749 if (is_ram || is_romd) { 750 /* RAM and ROMD both have associated host memory. */ 751 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 752 } else { 753 /* I/O does not; force the host address to NULL. */ 754 addend = 0; 755 } 756 757 write_address = address; 758 if (is_ram) { 759 iotlb = memory_region_get_ram_addr(section->mr) + xlat; 760 /* 761 * Computing is_clean is expensive; avoid all that unless 762 * the page is actually writable. 763 */ 764 if (prot & PAGE_WRITE) { 765 if (section->readonly) { 766 write_address |= TLB_DISCARD_WRITE; 767 } else if (cpu_physical_memory_is_clean(iotlb)) { 768 write_address |= TLB_NOTDIRTY; 769 } 770 } 771 } else { 772 /* I/O or ROMD */ 773 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat; 774 /* 775 * Writes to romd devices must go through MMIO to enable write. 776 * Reads to romd devices go through the ram_ptr found above, 777 * but of course reads to I/O must go through MMIO. 778 */ 779 write_address |= TLB_MMIO; 780 if (!is_romd) { 781 address = write_address; 782 } 783 } 784 785 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page, 786 TARGET_PAGE_SIZE); 787 788 index = tlb_index(env, mmu_idx, vaddr_page); 789 te = tlb_entry(env, mmu_idx, vaddr_page); 790 791 /* 792 * Hold the TLB lock for the rest of the function. We could acquire/release 793 * the lock several times in the function, but it is faster to amortize the 794 * acquisition cost by acquiring it just once. Note that this leads to 795 * a longer critical section, but this is not a concern since the TLB lock 796 * is unlikely to be contended. 797 */ 798 qemu_spin_lock(&tlb->c.lock); 799 800 /* Note that the tlb is no longer clean. */ 801 tlb->c.dirty |= 1 << mmu_idx; 802 803 /* Make sure there's no cached translation for the new page. */ 804 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 805 806 /* 807 * Only evict the old entry to the victim tlb if it's for a 808 * different page; otherwise just overwrite the stale data. 809 */ 810 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 811 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE; 812 CPUTLBEntry *tv = &desc->vtable[vidx]; 813 814 /* Evict the old entry into the victim tlb. */ 815 copy_tlb_helper_locked(tv, te); 816 desc->viotlb[vidx] = desc->iotlb[index]; 817 tlb_n_used_entries_dec(env, mmu_idx); 818 } 819 820 /* refill the tlb */ 821 /* 822 * At this point iotlb contains a physical section number in the lower 823 * TARGET_PAGE_BITS, and either 824 * + the ram_addr_t of the page base of the target RAM (RAM) 825 * + the offset within section->mr of the page base (I/O, ROMD) 826 * We subtract the vaddr_page (which is page aligned and thus won't 827 * disturb the low bits) to give an offset which can be added to the 828 * (non-page-aligned) vaddr of the eventual memory access to get 829 * the MemoryRegion offset for the access. Note that the vaddr we 830 * subtract here is that of the page base, and not the same as the 831 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 832 */ 833 desc->iotlb[index].addr = iotlb - vaddr_page; 834 desc->iotlb[index].attrs = attrs; 835 836 /* Now calculate the new entry */ 837 tn.addend = addend - vaddr_page; 838 if (prot & PAGE_READ) { 839 tn.addr_read = address; 840 if (wp_flags & BP_MEM_READ) { 841 tn.addr_read |= TLB_WATCHPOINT; 842 } 843 } else { 844 tn.addr_read = -1; 845 } 846 847 if (prot & PAGE_EXEC) { 848 tn.addr_code = address; 849 } else { 850 tn.addr_code = -1; 851 } 852 853 tn.addr_write = -1; 854 if (prot & PAGE_WRITE) { 855 tn.addr_write = write_address; 856 if (prot & PAGE_WRITE_INV) { 857 tn.addr_write |= TLB_INVALID_MASK; 858 } 859 if (wp_flags & BP_MEM_WRITE) { 860 tn.addr_write |= TLB_WATCHPOINT; 861 } 862 } 863 864 copy_tlb_helper_locked(te, &tn); 865 tlb_n_used_entries_inc(env, mmu_idx); 866 qemu_spin_unlock(&tlb->c.lock); 867 } 868 869 /* Add a new TLB entry, but without specifying the memory 870 * transaction attributes to be used. 871 */ 872 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 873 hwaddr paddr, int prot, 874 int mmu_idx, target_ulong size) 875 { 876 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 877 prot, mmu_idx, size); 878 } 879 880 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 881 { 882 ram_addr_t ram_addr; 883 884 ram_addr = qemu_ram_addr_from_host(ptr); 885 if (ram_addr == RAM_ADDR_INVALID) { 886 error_report("Bad ram pointer %p", ptr); 887 abort(); 888 } 889 return ram_addr; 890 } 891 892 /* 893 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the 894 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must 895 * be discarded and looked up again (e.g. via tlb_entry()). 896 */ 897 static void tlb_fill(CPUState *cpu, target_ulong addr, int size, 898 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 899 { 900 CPUClass *cc = CPU_GET_CLASS(cpu); 901 bool ok; 902 903 /* 904 * This is not a probe, so only valid return is success; failure 905 * should result in exception + longjmp to the cpu loop. 906 */ 907 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr); 908 assert(ok); 909 } 910 911 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 912 int mmu_idx, target_ulong addr, uintptr_t retaddr, 913 MMUAccessType access_type, MemOp op) 914 { 915 CPUState *cpu = env_cpu(env); 916 hwaddr mr_offset; 917 MemoryRegionSection *section; 918 MemoryRegion *mr; 919 uint64_t val; 920 bool locked = false; 921 MemTxResult r; 922 923 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 924 mr = section->mr; 925 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 926 cpu->mem_io_pc = retaddr; 927 if (!cpu->can_do_io) { 928 cpu_io_recompile(cpu, retaddr); 929 } 930 931 cpu->mem_io_access_type = access_type; 932 933 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 934 qemu_mutex_lock_iothread(); 935 locked = true; 936 } 937 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs); 938 if (r != MEMTX_OK) { 939 hwaddr physaddr = mr_offset + 940 section->offset_within_address_space - 941 section->offset_within_region; 942 943 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, 944 mmu_idx, iotlbentry->attrs, r, retaddr); 945 } 946 if (locked) { 947 qemu_mutex_unlock_iothread(); 948 } 949 950 return val; 951 } 952 953 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 954 int mmu_idx, uint64_t val, target_ulong addr, 955 uintptr_t retaddr, MemOp op) 956 { 957 CPUState *cpu = env_cpu(env); 958 hwaddr mr_offset; 959 MemoryRegionSection *section; 960 MemoryRegion *mr; 961 bool locked = false; 962 MemTxResult r; 963 964 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 965 mr = section->mr; 966 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 967 if (!cpu->can_do_io) { 968 cpu_io_recompile(cpu, retaddr); 969 } 970 cpu->mem_io_pc = retaddr; 971 972 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 973 qemu_mutex_lock_iothread(); 974 locked = true; 975 } 976 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs); 977 if (r != MEMTX_OK) { 978 hwaddr physaddr = mr_offset + 979 section->offset_within_address_space - 980 section->offset_within_region; 981 982 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), 983 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r, 984 retaddr); 985 } 986 if (locked) { 987 qemu_mutex_unlock_iothread(); 988 } 989 } 990 991 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) 992 { 993 #if TCG_OVERSIZED_GUEST 994 return *(target_ulong *)((uintptr_t)entry + ofs); 995 #else 996 /* ofs might correspond to .addr_write, so use atomic_read */ 997 return atomic_read((target_ulong *)((uintptr_t)entry + ofs)); 998 #endif 999 } 1000 1001 /* Return true if ADDR is present in the victim tlb, and has been copied 1002 back to the main tlb. */ 1003 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1004 size_t elt_ofs, target_ulong page) 1005 { 1006 size_t vidx; 1007 1008 assert_cpu_is_self(env_cpu(env)); 1009 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1010 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx]; 1011 target_ulong cmp; 1012 1013 /* elt_ofs might correspond to .addr_write, so use atomic_read */ 1014 #if TCG_OVERSIZED_GUEST 1015 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1016 #else 1017 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1018 #endif 1019 1020 if (cmp == page) { 1021 /* Found entry in victim tlb, swap tlb and iotlb. */ 1022 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index]; 1023 1024 qemu_spin_lock(&env_tlb(env)->c.lock); 1025 copy_tlb_helper_locked(&tmptlb, tlb); 1026 copy_tlb_helper_locked(tlb, vtlb); 1027 copy_tlb_helper_locked(vtlb, &tmptlb); 1028 qemu_spin_unlock(&env_tlb(env)->c.lock); 1029 1030 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1031 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx]; 1032 tmpio = *io; *io = *vio; *vio = tmpio; 1033 return true; 1034 } 1035 } 1036 return false; 1037 } 1038 1039 /* Macro to call the above, with local variables from the use context. */ 1040 #define VICTIM_TLB_HIT(TY, ADDR) \ 1041 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1042 (ADDR) & TARGET_PAGE_MASK) 1043 1044 /* 1045 * Return a ram_addr_t for the virtual address for execution. 1046 * 1047 * Return -1 if we can't translate and execute from an entire page 1048 * of RAM. This will force us to execute by loading and translating 1049 * one insn at a time, without caching. 1050 * 1051 * NOTE: This function will trigger an exception if the page is 1052 * not executable. 1053 */ 1054 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1055 { 1056 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1057 uintptr_t index = tlb_index(env, mmu_idx, addr); 1058 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1059 void *p; 1060 1061 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1062 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1063 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1064 index = tlb_index(env, mmu_idx, addr); 1065 entry = tlb_entry(env, mmu_idx, addr); 1066 1067 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) { 1068 /* 1069 * The MMU protection covers a smaller range than a target 1070 * page, so we must redo the MMU check for every insn. 1071 */ 1072 return -1; 1073 } 1074 } 1075 assert(tlb_hit(entry->addr_code, addr)); 1076 } 1077 1078 if (unlikely(entry->addr_code & TLB_MMIO)) { 1079 /* The region is not backed by RAM. */ 1080 return -1; 1081 } 1082 1083 p = (void *)((uintptr_t)addr + entry->addend); 1084 return qemu_ram_addr_from_host_nofail(p); 1085 } 1086 1087 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, 1088 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr) 1089 { 1090 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; 1091 1092 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size); 1093 1094 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { 1095 struct page_collection *pages 1096 = page_collection_lock(ram_addr, ram_addr + size); 1097 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr); 1098 page_collection_unlock(pages); 1099 } 1100 1101 /* 1102 * Set both VGA and migration bits for simplicity and to remove 1103 * the notdirty callback faster. 1104 */ 1105 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE); 1106 1107 /* We remove the notdirty callback only if the code has been flushed. */ 1108 if (!cpu_physical_memory_is_clean(ram_addr)) { 1109 trace_memory_notdirty_set_dirty(mem_vaddr); 1110 tlb_set_dirty(cpu, mem_vaddr); 1111 } 1112 } 1113 1114 /* 1115 * Probe for whether the specified guest access is permitted. If it is not 1116 * permitted then an exception will be taken in the same way as if this 1117 * were a real access (and we will not return). 1118 * If the size is 0 or the page requires I/O access, returns NULL; otherwise, 1119 * returns the address of the host page similar to tlb_vaddr_to_host(). 1120 */ 1121 void *probe_access(CPUArchState *env, target_ulong addr, int size, 1122 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) 1123 { 1124 uintptr_t index = tlb_index(env, mmu_idx, addr); 1125 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1126 target_ulong tlb_addr; 1127 size_t elt_ofs; 1128 int wp_access; 1129 1130 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 1131 1132 switch (access_type) { 1133 case MMU_DATA_LOAD: 1134 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1135 wp_access = BP_MEM_READ; 1136 break; 1137 case MMU_DATA_STORE: 1138 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1139 wp_access = BP_MEM_WRITE; 1140 break; 1141 case MMU_INST_FETCH: 1142 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1143 wp_access = BP_MEM_READ; 1144 break; 1145 default: 1146 g_assert_not_reached(); 1147 } 1148 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1149 1150 if (unlikely(!tlb_hit(tlb_addr, addr))) { 1151 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, 1152 addr & TARGET_PAGE_MASK)) { 1153 tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr); 1154 /* TLB resize via tlb_fill may have moved the entry. */ 1155 index = tlb_index(env, mmu_idx, addr); 1156 entry = tlb_entry(env, mmu_idx, addr); 1157 } 1158 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1159 } 1160 1161 if (!size) { 1162 return NULL; 1163 } 1164 1165 if (unlikely(tlb_addr & TLB_FLAGS_MASK)) { 1166 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1167 1168 /* Reject I/O access, or other required slow-path. */ 1169 if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) { 1170 return NULL; 1171 } 1172 1173 /* Handle watchpoints. */ 1174 if (tlb_addr & TLB_WATCHPOINT) { 1175 cpu_check_watchpoint(env_cpu(env), addr, size, 1176 iotlbentry->attrs, wp_access, retaddr); 1177 } 1178 1179 /* Handle clean RAM pages. */ 1180 if (tlb_addr & TLB_NOTDIRTY) { 1181 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1182 } 1183 } 1184 1185 return (void *)((uintptr_t)addr + entry->addend); 1186 } 1187 1188 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, 1189 MMUAccessType access_type, int mmu_idx) 1190 { 1191 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1192 uintptr_t tlb_addr, page; 1193 size_t elt_ofs; 1194 1195 switch (access_type) { 1196 case MMU_DATA_LOAD: 1197 elt_ofs = offsetof(CPUTLBEntry, addr_read); 1198 break; 1199 case MMU_DATA_STORE: 1200 elt_ofs = offsetof(CPUTLBEntry, addr_write); 1201 break; 1202 case MMU_INST_FETCH: 1203 elt_ofs = offsetof(CPUTLBEntry, addr_code); 1204 break; 1205 default: 1206 g_assert_not_reached(); 1207 } 1208 1209 page = addr & TARGET_PAGE_MASK; 1210 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1211 1212 if (!tlb_hit_page(tlb_addr, page)) { 1213 uintptr_t index = tlb_index(env, mmu_idx, addr); 1214 1215 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) { 1216 CPUState *cs = env_cpu(env); 1217 CPUClass *cc = CPU_GET_CLASS(cs); 1218 1219 if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) { 1220 /* Non-faulting page table read failed. */ 1221 return NULL; 1222 } 1223 1224 /* TLB resize via tlb_fill may have moved the entry. */ 1225 entry = tlb_entry(env, mmu_idx, addr); 1226 } 1227 tlb_addr = tlb_read_ofs(entry, elt_ofs); 1228 } 1229 1230 if (tlb_addr & ~TARGET_PAGE_MASK) { 1231 /* IO access */ 1232 return NULL; 1233 } 1234 1235 return (void *)((uintptr_t)addr + entry->addend); 1236 } 1237 1238 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1239 * operations, or io operations to proceed. Return the host address. */ 1240 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1241 TCGMemOpIdx oi, uintptr_t retaddr) 1242 { 1243 size_t mmu_idx = get_mmuidx(oi); 1244 uintptr_t index = tlb_index(env, mmu_idx, addr); 1245 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1246 target_ulong tlb_addr = tlb_addr_write(tlbe); 1247 MemOp mop = get_memop(oi); 1248 int a_bits = get_alignment_bits(mop); 1249 int s_bits = mop & MO_SIZE; 1250 void *hostaddr; 1251 1252 /* Adjust the given return address. */ 1253 retaddr -= GETPC_ADJ; 1254 1255 /* Enforce guest required alignment. */ 1256 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1257 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1258 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1259 mmu_idx, retaddr); 1260 } 1261 1262 /* Enforce qemu required alignment. */ 1263 if (unlikely(addr & ((1 << s_bits) - 1))) { 1264 /* We get here if guest alignment was not requested, 1265 or was not enforced by cpu_unaligned_access above. 1266 We might widen the access and emulate, but for now 1267 mark an exception and exit the cpu loop. */ 1268 goto stop_the_world; 1269 } 1270 1271 /* Check TLB entry and enforce page permissions. */ 1272 if (!tlb_hit(tlb_addr, addr)) { 1273 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1274 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, 1275 mmu_idx, retaddr); 1276 index = tlb_index(env, mmu_idx, addr); 1277 tlbe = tlb_entry(env, mmu_idx, addr); 1278 } 1279 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1280 } 1281 1282 /* Notice an IO access or a needs-MMU-lookup access */ 1283 if (unlikely(tlb_addr & TLB_MMIO)) { 1284 /* There's really nothing that can be done to 1285 support this apart from stop-the-world. */ 1286 goto stop_the_world; 1287 } 1288 1289 /* Let the guest notice RMW on a write-only page. */ 1290 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1291 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1292 mmu_idx, retaddr); 1293 /* Since we don't support reads and writes to different addresses, 1294 and we do have the proper page loaded for write, this shouldn't 1295 ever return. But just in case, handle via stop-the-world. */ 1296 goto stop_the_world; 1297 } 1298 1299 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1300 1301 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1302 notdirty_write(env_cpu(env), addr, 1 << s_bits, 1303 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); 1304 } 1305 1306 return hostaddr; 1307 1308 stop_the_world: 1309 cpu_loop_exit_atomic(env_cpu(env), retaddr); 1310 } 1311 1312 /* 1313 * Load Helpers 1314 * 1315 * We support two different access types. SOFTMMU_CODE_ACCESS is 1316 * specifically for reading instructions from system memory. It is 1317 * called by the translation loop and in some helpers where the code 1318 * is disassembled. It shouldn't be called directly by guest code. 1319 */ 1320 1321 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr, 1322 TCGMemOpIdx oi, uintptr_t retaddr); 1323 1324 static inline uint64_t QEMU_ALWAYS_INLINE 1325 load_memop(const void *haddr, MemOp op) 1326 { 1327 switch (op) { 1328 case MO_UB: 1329 return ldub_p(haddr); 1330 case MO_BEUW: 1331 return lduw_be_p(haddr); 1332 case MO_LEUW: 1333 return lduw_le_p(haddr); 1334 case MO_BEUL: 1335 return (uint32_t)ldl_be_p(haddr); 1336 case MO_LEUL: 1337 return (uint32_t)ldl_le_p(haddr); 1338 case MO_BEQ: 1339 return ldq_be_p(haddr); 1340 case MO_LEQ: 1341 return ldq_le_p(haddr); 1342 default: 1343 qemu_build_not_reached(); 1344 } 1345 } 1346 1347 static inline uint64_t QEMU_ALWAYS_INLINE 1348 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, 1349 uintptr_t retaddr, MemOp op, bool code_read, 1350 FullLoadHelper *full_load) 1351 { 1352 uintptr_t mmu_idx = get_mmuidx(oi); 1353 uintptr_t index = tlb_index(env, mmu_idx, addr); 1354 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1355 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1356 const size_t tlb_off = code_read ? 1357 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read); 1358 const MMUAccessType access_type = 1359 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD; 1360 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1361 void *haddr; 1362 uint64_t res; 1363 size_t size = memop_size(op); 1364 1365 /* Handle CPU specific unaligned behaviour */ 1366 if (addr & ((1 << a_bits) - 1)) { 1367 cpu_unaligned_access(env_cpu(env), addr, access_type, 1368 mmu_idx, retaddr); 1369 } 1370 1371 /* If the TLB entry is for a different page, reload and try again. */ 1372 if (!tlb_hit(tlb_addr, addr)) { 1373 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1374 addr & TARGET_PAGE_MASK)) { 1375 tlb_fill(env_cpu(env), addr, size, 1376 access_type, mmu_idx, retaddr); 1377 index = tlb_index(env, mmu_idx, addr); 1378 entry = tlb_entry(env, mmu_idx, addr); 1379 } 1380 tlb_addr = code_read ? entry->addr_code : entry->addr_read; 1381 tlb_addr &= ~TLB_INVALID_MASK; 1382 } 1383 1384 /* Handle anything that isn't just a straight memory access. */ 1385 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1386 CPUIOTLBEntry *iotlbentry; 1387 bool need_swap; 1388 1389 /* For anything that is unaligned, recurse through full_load. */ 1390 if ((addr & (size - 1)) != 0) { 1391 goto do_unaligned_access; 1392 } 1393 1394 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1395 1396 /* Handle watchpoints. */ 1397 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1398 /* On watchpoint hit, this will longjmp out. */ 1399 cpu_check_watchpoint(env_cpu(env), addr, size, 1400 iotlbentry->attrs, BP_MEM_READ, retaddr); 1401 } 1402 1403 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1404 1405 /* Handle I/O access. */ 1406 if (likely(tlb_addr & TLB_MMIO)) { 1407 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, 1408 access_type, op ^ (need_swap * MO_BSWAP)); 1409 } 1410 1411 haddr = (void *)((uintptr_t)addr + entry->addend); 1412 1413 /* 1414 * Keep these two load_memop separate to ensure that the compiler 1415 * is able to fold the entire function to a single instruction. 1416 * There is a build-time assert inside to remind you of this. ;-) 1417 */ 1418 if (unlikely(need_swap)) { 1419 return load_memop(haddr, op ^ MO_BSWAP); 1420 } 1421 return load_memop(haddr, op); 1422 } 1423 1424 /* Handle slow unaligned access (it spans two pages or IO). */ 1425 if (size > 1 1426 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1427 >= TARGET_PAGE_SIZE)) { 1428 target_ulong addr1, addr2; 1429 uint64_t r1, r2; 1430 unsigned shift; 1431 do_unaligned_access: 1432 addr1 = addr & ~((target_ulong)size - 1); 1433 addr2 = addr1 + size; 1434 r1 = full_load(env, addr1, oi, retaddr); 1435 r2 = full_load(env, addr2, oi, retaddr); 1436 shift = (addr & (size - 1)) * 8; 1437 1438 if (memop_big_endian(op)) { 1439 /* Big-endian combine. */ 1440 res = (r1 << shift) | (r2 >> ((size * 8) - shift)); 1441 } else { 1442 /* Little-endian combine. */ 1443 res = (r1 >> shift) | (r2 << ((size * 8) - shift)); 1444 } 1445 return res & MAKE_64BIT_MASK(0, size * 8); 1446 } 1447 1448 haddr = (void *)((uintptr_t)addr + entry->addend); 1449 return load_memop(haddr, op); 1450 } 1451 1452 /* 1453 * For the benefit of TCG generated code, we want to avoid the 1454 * complication of ABI-specific return type promotion and always 1455 * return a value extended to the register size of the host. This is 1456 * tcg_target_long, except in the case of a 32-bit host and 64-bit 1457 * data, and for that we always have uint64_t. 1458 * 1459 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS. 1460 */ 1461 1462 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, 1463 TCGMemOpIdx oi, uintptr_t retaddr) 1464 { 1465 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu); 1466 } 1467 1468 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, 1469 TCGMemOpIdx oi, uintptr_t retaddr) 1470 { 1471 return full_ldub_mmu(env, addr, oi, retaddr); 1472 } 1473 1474 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1475 TCGMemOpIdx oi, uintptr_t retaddr) 1476 { 1477 return load_helper(env, addr, oi, retaddr, MO_LEUW, false, 1478 full_le_lduw_mmu); 1479 } 1480 1481 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, 1482 TCGMemOpIdx oi, uintptr_t retaddr) 1483 { 1484 return full_le_lduw_mmu(env, addr, oi, retaddr); 1485 } 1486 1487 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1488 TCGMemOpIdx oi, uintptr_t retaddr) 1489 { 1490 return load_helper(env, addr, oi, retaddr, MO_BEUW, false, 1491 full_be_lduw_mmu); 1492 } 1493 1494 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, 1495 TCGMemOpIdx oi, uintptr_t retaddr) 1496 { 1497 return full_be_lduw_mmu(env, addr, oi, retaddr); 1498 } 1499 1500 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1501 TCGMemOpIdx oi, uintptr_t retaddr) 1502 { 1503 return load_helper(env, addr, oi, retaddr, MO_LEUL, false, 1504 full_le_ldul_mmu); 1505 } 1506 1507 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, 1508 TCGMemOpIdx oi, uintptr_t retaddr) 1509 { 1510 return full_le_ldul_mmu(env, addr, oi, retaddr); 1511 } 1512 1513 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1514 TCGMemOpIdx oi, uintptr_t retaddr) 1515 { 1516 return load_helper(env, addr, oi, retaddr, MO_BEUL, false, 1517 full_be_ldul_mmu); 1518 } 1519 1520 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, 1521 TCGMemOpIdx oi, uintptr_t retaddr) 1522 { 1523 return full_be_ldul_mmu(env, addr, oi, retaddr); 1524 } 1525 1526 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, 1527 TCGMemOpIdx oi, uintptr_t retaddr) 1528 { 1529 return load_helper(env, addr, oi, retaddr, MO_LEQ, false, 1530 helper_le_ldq_mmu); 1531 } 1532 1533 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, 1534 TCGMemOpIdx oi, uintptr_t retaddr) 1535 { 1536 return load_helper(env, addr, oi, retaddr, MO_BEQ, false, 1537 helper_be_ldq_mmu); 1538 } 1539 1540 /* 1541 * Provide signed versions of the load routines as well. We can of course 1542 * avoid this for 64-bit data, or for 32-bit data on 32-bit host. 1543 */ 1544 1545 1546 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, 1547 TCGMemOpIdx oi, uintptr_t retaddr) 1548 { 1549 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr); 1550 } 1551 1552 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, 1553 TCGMemOpIdx oi, uintptr_t retaddr) 1554 { 1555 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr); 1556 } 1557 1558 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, 1559 TCGMemOpIdx oi, uintptr_t retaddr) 1560 { 1561 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr); 1562 } 1563 1564 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, 1565 TCGMemOpIdx oi, uintptr_t retaddr) 1566 { 1567 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr); 1568 } 1569 1570 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, 1571 TCGMemOpIdx oi, uintptr_t retaddr) 1572 { 1573 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr); 1574 } 1575 1576 /* 1577 * Store Helpers 1578 */ 1579 1580 static inline void QEMU_ALWAYS_INLINE 1581 store_memop(void *haddr, uint64_t val, MemOp op) 1582 { 1583 switch (op) { 1584 case MO_UB: 1585 stb_p(haddr, val); 1586 break; 1587 case MO_BEUW: 1588 stw_be_p(haddr, val); 1589 break; 1590 case MO_LEUW: 1591 stw_le_p(haddr, val); 1592 break; 1593 case MO_BEUL: 1594 stl_be_p(haddr, val); 1595 break; 1596 case MO_LEUL: 1597 stl_le_p(haddr, val); 1598 break; 1599 case MO_BEQ: 1600 stq_be_p(haddr, val); 1601 break; 1602 case MO_LEQ: 1603 stq_le_p(haddr, val); 1604 break; 1605 default: 1606 qemu_build_not_reached(); 1607 } 1608 } 1609 1610 static inline void QEMU_ALWAYS_INLINE 1611 store_helper(CPUArchState *env, target_ulong addr, uint64_t val, 1612 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op) 1613 { 1614 uintptr_t mmu_idx = get_mmuidx(oi); 1615 uintptr_t index = tlb_index(env, mmu_idx, addr); 1616 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1617 target_ulong tlb_addr = tlb_addr_write(entry); 1618 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write); 1619 unsigned a_bits = get_alignment_bits(get_memop(oi)); 1620 void *haddr; 1621 size_t size = memop_size(op); 1622 1623 /* Handle CPU specific unaligned behaviour */ 1624 if (addr & ((1 << a_bits) - 1)) { 1625 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, 1626 mmu_idx, retaddr); 1627 } 1628 1629 /* If the TLB entry is for a different page, reload and try again. */ 1630 if (!tlb_hit(tlb_addr, addr)) { 1631 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off, 1632 addr & TARGET_PAGE_MASK)) { 1633 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, 1634 mmu_idx, retaddr); 1635 index = tlb_index(env, mmu_idx, addr); 1636 entry = tlb_entry(env, mmu_idx, addr); 1637 } 1638 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; 1639 } 1640 1641 /* Handle anything that isn't just a straight memory access. */ 1642 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { 1643 CPUIOTLBEntry *iotlbentry; 1644 bool need_swap; 1645 1646 /* For anything that is unaligned, recurse through byte stores. */ 1647 if ((addr & (size - 1)) != 0) { 1648 goto do_unaligned_access; 1649 } 1650 1651 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; 1652 1653 /* Handle watchpoints. */ 1654 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1655 /* On watchpoint hit, this will longjmp out. */ 1656 cpu_check_watchpoint(env_cpu(env), addr, size, 1657 iotlbentry->attrs, BP_MEM_WRITE, retaddr); 1658 } 1659 1660 need_swap = size > 1 && (tlb_addr & TLB_BSWAP); 1661 1662 /* Handle I/O access. */ 1663 if (tlb_addr & TLB_MMIO) { 1664 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, 1665 op ^ (need_swap * MO_BSWAP)); 1666 return; 1667 } 1668 1669 /* Ignore writes to ROM. */ 1670 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { 1671 return; 1672 } 1673 1674 /* Handle clean RAM pages. */ 1675 if (tlb_addr & TLB_NOTDIRTY) { 1676 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr); 1677 } 1678 1679 haddr = (void *)((uintptr_t)addr + entry->addend); 1680 1681 /* 1682 * Keep these two store_memop separate to ensure that the compiler 1683 * is able to fold the entire function to a single instruction. 1684 * There is a build-time assert inside to remind you of this. ;-) 1685 */ 1686 if (unlikely(need_swap)) { 1687 store_memop(haddr, val, op ^ MO_BSWAP); 1688 } else { 1689 store_memop(haddr, val, op); 1690 } 1691 return; 1692 } 1693 1694 /* Handle slow unaligned access (it spans two pages or IO). */ 1695 if (size > 1 1696 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 1697 >= TARGET_PAGE_SIZE)) { 1698 int i; 1699 uintptr_t index2; 1700 CPUTLBEntry *entry2; 1701 target_ulong page2, tlb_addr2; 1702 size_t size2; 1703 1704 do_unaligned_access: 1705 /* 1706 * Ensure the second page is in the TLB. Note that the first page 1707 * is already guaranteed to be filled, and that the second page 1708 * cannot evict the first. 1709 */ 1710 page2 = (addr + size) & TARGET_PAGE_MASK; 1711 size2 = (addr + size) & ~TARGET_PAGE_MASK; 1712 index2 = tlb_index(env, mmu_idx, page2); 1713 entry2 = tlb_entry(env, mmu_idx, page2); 1714 tlb_addr2 = tlb_addr_write(entry2); 1715 if (!tlb_hit_page(tlb_addr2, page2)) { 1716 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) { 1717 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, 1718 mmu_idx, retaddr); 1719 index2 = tlb_index(env, mmu_idx, page2); 1720 entry2 = tlb_entry(env, mmu_idx, page2); 1721 } 1722 tlb_addr2 = tlb_addr_write(entry2); 1723 } 1724 1725 /* 1726 * Handle watchpoints. Since this may trap, all checks 1727 * must happen before any store. 1728 */ 1729 if (unlikely(tlb_addr & TLB_WATCHPOINT)) { 1730 cpu_check_watchpoint(env_cpu(env), addr, size - size2, 1731 env_tlb(env)->d[mmu_idx].iotlb[index].attrs, 1732 BP_MEM_WRITE, retaddr); 1733 } 1734 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { 1735 cpu_check_watchpoint(env_cpu(env), page2, size2, 1736 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs, 1737 BP_MEM_WRITE, retaddr); 1738 } 1739 1740 /* 1741 * XXX: not efficient, but simple. 1742 * This loop must go in the forward direction to avoid issues 1743 * with self-modifying code in Windows 64-bit. 1744 */ 1745 for (i = 0; i < size; ++i) { 1746 uint8_t val8; 1747 if (memop_big_endian(op)) { 1748 /* Big-endian extract. */ 1749 val8 = val >> (((size - 1) * 8) - (i * 8)); 1750 } else { 1751 /* Little-endian extract. */ 1752 val8 = val >> (i * 8); 1753 } 1754 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr); 1755 } 1756 return; 1757 } 1758 1759 haddr = (void *)((uintptr_t)addr + entry->addend); 1760 store_memop(haddr, val, op); 1761 } 1762 1763 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, 1764 TCGMemOpIdx oi, uintptr_t retaddr) 1765 { 1766 store_helper(env, addr, val, oi, retaddr, MO_UB); 1767 } 1768 1769 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 1770 TCGMemOpIdx oi, uintptr_t retaddr) 1771 { 1772 store_helper(env, addr, val, oi, retaddr, MO_LEUW); 1773 } 1774 1775 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, 1776 TCGMemOpIdx oi, uintptr_t retaddr) 1777 { 1778 store_helper(env, addr, val, oi, retaddr, MO_BEUW); 1779 } 1780 1781 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 1782 TCGMemOpIdx oi, uintptr_t retaddr) 1783 { 1784 store_helper(env, addr, val, oi, retaddr, MO_LEUL); 1785 } 1786 1787 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, 1788 TCGMemOpIdx oi, uintptr_t retaddr) 1789 { 1790 store_helper(env, addr, val, oi, retaddr, MO_BEUL); 1791 } 1792 1793 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 1794 TCGMemOpIdx oi, uintptr_t retaddr) 1795 { 1796 store_helper(env, addr, val, oi, retaddr, MO_LEQ); 1797 } 1798 1799 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, 1800 TCGMemOpIdx oi, uintptr_t retaddr) 1801 { 1802 store_helper(env, addr, val, oi, retaddr, MO_BEQ); 1803 } 1804 1805 /* First set of helpers allows passing in of OI and RETADDR. This makes 1806 them callable from other helpers. */ 1807 1808 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 1809 #define ATOMIC_NAME(X) \ 1810 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 1811 #define ATOMIC_MMU_DECLS 1812 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) 1813 #define ATOMIC_MMU_CLEANUP 1814 1815 #define DATA_SIZE 1 1816 #include "atomic_template.h" 1817 1818 #define DATA_SIZE 2 1819 #include "atomic_template.h" 1820 1821 #define DATA_SIZE 4 1822 #include "atomic_template.h" 1823 1824 #ifdef CONFIG_ATOMIC64 1825 #define DATA_SIZE 8 1826 #include "atomic_template.h" 1827 #endif 1828 1829 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 1830 #define DATA_SIZE 16 1831 #include "atomic_template.h" 1832 #endif 1833 1834 /* Second set of helpers are directly callable from TCG as helpers. */ 1835 1836 #undef EXTRA_ARGS 1837 #undef ATOMIC_NAME 1838 #undef ATOMIC_MMU_LOOKUP 1839 #define EXTRA_ARGS , TCGMemOpIdx oi 1840 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 1841 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) 1842 1843 #define DATA_SIZE 1 1844 #include "atomic_template.h" 1845 1846 #define DATA_SIZE 2 1847 #include "atomic_template.h" 1848 1849 #define DATA_SIZE 4 1850 #include "atomic_template.h" 1851 1852 #ifdef CONFIG_ATOMIC64 1853 #define DATA_SIZE 8 1854 #include "atomic_template.h" 1855 #endif 1856 1857 /* Code access functions. */ 1858 1859 static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr, 1860 TCGMemOpIdx oi, uintptr_t retaddr) 1861 { 1862 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_cmmu); 1863 } 1864 1865 uint8_t helper_ret_ldb_cmmu(CPUArchState *env, target_ulong addr, 1866 TCGMemOpIdx oi, uintptr_t retaddr) 1867 { 1868 return full_ldub_cmmu(env, addr, oi, retaddr); 1869 } 1870 1871 static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr, 1872 TCGMemOpIdx oi, uintptr_t retaddr) 1873 { 1874 return load_helper(env, addr, oi, retaddr, MO_LEUW, true, 1875 full_le_lduw_cmmu); 1876 } 1877 1878 uint16_t helper_le_ldw_cmmu(CPUArchState *env, target_ulong addr, 1879 TCGMemOpIdx oi, uintptr_t retaddr) 1880 { 1881 return full_le_lduw_cmmu(env, addr, oi, retaddr); 1882 } 1883 1884 static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr, 1885 TCGMemOpIdx oi, uintptr_t retaddr) 1886 { 1887 return load_helper(env, addr, oi, retaddr, MO_BEUW, true, 1888 full_be_lduw_cmmu); 1889 } 1890 1891 uint16_t helper_be_ldw_cmmu(CPUArchState *env, target_ulong addr, 1892 TCGMemOpIdx oi, uintptr_t retaddr) 1893 { 1894 return full_be_lduw_cmmu(env, addr, oi, retaddr); 1895 } 1896 1897 static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr, 1898 TCGMemOpIdx oi, uintptr_t retaddr) 1899 { 1900 return load_helper(env, addr, oi, retaddr, MO_LEUL, true, 1901 full_le_ldul_cmmu); 1902 } 1903 1904 uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr, 1905 TCGMemOpIdx oi, uintptr_t retaddr) 1906 { 1907 return full_le_ldul_cmmu(env, addr, oi, retaddr); 1908 } 1909 1910 static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr, 1911 TCGMemOpIdx oi, uintptr_t retaddr) 1912 { 1913 return load_helper(env, addr, oi, retaddr, MO_BEUL, true, 1914 full_be_ldul_cmmu); 1915 } 1916 1917 uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr, 1918 TCGMemOpIdx oi, uintptr_t retaddr) 1919 { 1920 return full_be_ldul_cmmu(env, addr, oi, retaddr); 1921 } 1922 1923 uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr, 1924 TCGMemOpIdx oi, uintptr_t retaddr) 1925 { 1926 return load_helper(env, addr, oi, retaddr, MO_LEQ, true, 1927 helper_le_ldq_cmmu); 1928 } 1929 1930 uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr, 1931 TCGMemOpIdx oi, uintptr_t retaddr) 1932 { 1933 return load_helper(env, addr, oi, retaddr, MO_BEQ, true, 1934 helper_be_ldq_cmmu); 1935 } 1936