1 /* 2 * Common CPU TLB handling 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 #include "exec/exec-all.h" 24 #include "exec/memory.h" 25 #include "exec/address-spaces.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/cputlb.h" 28 #include "exec/memory-internal.h" 29 #include "exec/ram_addr.h" 30 #include "tcg/tcg.h" 31 #include "qemu/error-report.h" 32 #include "exec/log.h" 33 #include "exec/helper-proto.h" 34 #include "qemu/atomic.h" 35 #include "qemu/atomic128.h" 36 37 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */ 38 /* #define DEBUG_TLB */ 39 /* #define DEBUG_TLB_LOG */ 40 41 #ifdef DEBUG_TLB 42 # define DEBUG_TLB_GATE 1 43 # ifdef DEBUG_TLB_LOG 44 # define DEBUG_TLB_LOG_GATE 1 45 # else 46 # define DEBUG_TLB_LOG_GATE 0 47 # endif 48 #else 49 # define DEBUG_TLB_GATE 0 50 # define DEBUG_TLB_LOG_GATE 0 51 #endif 52 53 #define tlb_debug(fmt, ...) do { \ 54 if (DEBUG_TLB_LOG_GATE) { \ 55 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \ 56 ## __VA_ARGS__); \ 57 } else if (DEBUG_TLB_GATE) { \ 58 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \ 59 } \ 60 } while (0) 61 62 #define assert_cpu_is_self(cpu) do { \ 63 if (DEBUG_TLB_GATE) { \ 64 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \ 65 } \ 66 } while (0) 67 68 /* run_on_cpu_data.target_ptr should always be big enough for a 69 * target_ulong even on 32 bit builds */ 70 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); 71 72 /* We currently can't handle more than 16 bits in the MMUIDX bitmask. 73 */ 74 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); 75 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) 76 77 #if TCG_TARGET_IMPLEMENTS_DYN_TLB 78 static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx) 79 { 80 return env->tlb_mask[mmu_idx] + (1 << CPU_TLB_ENTRY_BITS); 81 } 82 83 static void tlb_window_reset(CPUTLBWindow *window, int64_t ns, 84 size_t max_entries) 85 { 86 window->begin_ns = ns; 87 window->max_entries = max_entries; 88 } 89 90 static void tlb_dyn_init(CPUArchState *env) 91 { 92 int i; 93 94 for (i = 0; i < NB_MMU_MODES; i++) { 95 CPUTLBDesc *desc = &env->tlb_d[i]; 96 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; 97 98 tlb_window_reset(&desc->window, get_clock_realtime(), 0); 99 desc->n_used_entries = 0; 100 env->tlb_mask[i] = (n_entries - 1) << CPU_TLB_ENTRY_BITS; 101 env->tlb_table[i] = g_new(CPUTLBEntry, n_entries); 102 env->iotlb[i] = g_new(CPUIOTLBEntry, n_entries); 103 } 104 } 105 106 /** 107 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary 108 * @env: CPU that owns the TLB 109 * @mmu_idx: MMU index of the TLB 110 * 111 * Called with tlb_lock_held. 112 * 113 * We have two main constraints when resizing a TLB: (1) we only resize it 114 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing 115 * the array or unnecessarily flushing it), which means we do not control how 116 * frequently the resizing can occur; (2) we don't have access to the guest's 117 * future scheduling decisions, and therefore have to decide the magnitude of 118 * the resize based on past observations. 119 * 120 * In general, a memory-hungry process can benefit greatly from an appropriately 121 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that 122 * we just have to make the TLB as large as possible; while an oversized TLB 123 * results in minimal TLB miss rates, it also takes longer to be flushed 124 * (flushes can be _very_ frequent), and the reduced locality can also hurt 125 * performance. 126 * 127 * To achieve near-optimal performance for all kinds of workloads, we: 128 * 129 * 1. Aggressively increase the size of the TLB when the use rate of the 130 * TLB being flushed is high, since it is likely that in the near future this 131 * memory-hungry process will execute again, and its memory hungriness will 132 * probably be similar. 133 * 134 * 2. Slowly reduce the size of the TLB as the use rate declines over a 135 * reasonably large time window. The rationale is that if in such a time window 136 * we have not observed a high TLB use rate, it is likely that we won't observe 137 * it in the near future. In that case, once a time window expires we downsize 138 * the TLB to match the maximum use rate observed in the window. 139 * 140 * 3. Try to keep the maximum use rate in a time window in the 30-70% range, 141 * since in that range performance is likely near-optimal. Recall that the TLB 142 * is direct mapped, so we want the use rate to be low (or at least not too 143 * high), since otherwise we are likely to have a significant amount of 144 * conflict misses. 145 */ 146 static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) 147 { 148 CPUTLBDesc *desc = &env->tlb_d[mmu_idx]; 149 size_t old_size = tlb_n_entries(env, mmu_idx); 150 size_t rate; 151 size_t new_size = old_size; 152 int64_t now = get_clock_realtime(); 153 int64_t window_len_ms = 100; 154 int64_t window_len_ns = window_len_ms * 1000 * 1000; 155 bool window_expired = now > desc->window.begin_ns + window_len_ns; 156 157 if (desc->n_used_entries > desc->window.max_entries) { 158 desc->window.max_entries = desc->n_used_entries; 159 } 160 rate = desc->window.max_entries * 100 / old_size; 161 162 if (rate > 70) { 163 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS); 164 } else if (rate < 30 && window_expired) { 165 size_t ceil = pow2ceil(desc->window.max_entries); 166 size_t expected_rate = desc->window.max_entries * 100 / ceil; 167 168 /* 169 * Avoid undersizing when the max number of entries seen is just below 170 * a pow2. For instance, if max_entries == 1025, the expected use rate 171 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get 172 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size 173 * later. Thus, make sure that the expected use rate remains below 70%. 174 * (and since we double the size, that means the lowest rate we'd 175 * expect to get is 35%, which is still in the 30-70% range where 176 * we consider that the size is appropriate.) 177 */ 178 if (expected_rate > 70) { 179 ceil *= 2; 180 } 181 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS); 182 } 183 184 if (new_size == old_size) { 185 if (window_expired) { 186 tlb_window_reset(&desc->window, now, desc->n_used_entries); 187 } 188 return; 189 } 190 191 g_free(env->tlb_table[mmu_idx]); 192 g_free(env->iotlb[mmu_idx]); 193 194 tlb_window_reset(&desc->window, now, 0); 195 /* desc->n_used_entries is cleared by the caller */ 196 env->tlb_mask[mmu_idx] = (new_size - 1) << CPU_TLB_ENTRY_BITS; 197 env->tlb_table[mmu_idx] = g_try_new(CPUTLBEntry, new_size); 198 env->iotlb[mmu_idx] = g_try_new(CPUIOTLBEntry, new_size); 199 /* 200 * If the allocations fail, try smaller sizes. We just freed some 201 * memory, so going back to half of new_size has a good chance of working. 202 * Increased memory pressure elsewhere in the system might cause the 203 * allocations to fail though, so we progressively reduce the allocation 204 * size, aborting if we cannot even allocate the smallest TLB we support. 205 */ 206 while (env->tlb_table[mmu_idx] == NULL || env->iotlb[mmu_idx] == NULL) { 207 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { 208 error_report("%s: %s", __func__, strerror(errno)); 209 abort(); 210 } 211 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); 212 env->tlb_mask[mmu_idx] = (new_size - 1) << CPU_TLB_ENTRY_BITS; 213 214 g_free(env->tlb_table[mmu_idx]); 215 g_free(env->iotlb[mmu_idx]); 216 env->tlb_table[mmu_idx] = g_try_new(CPUTLBEntry, new_size); 217 env->iotlb[mmu_idx] = g_try_new(CPUIOTLBEntry, new_size); 218 } 219 } 220 221 static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx) 222 { 223 tlb_mmu_resize_locked(env, mmu_idx); 224 memset(env->tlb_table[mmu_idx], -1, sizeof_tlb(env, mmu_idx)); 225 env->tlb_d[mmu_idx].n_used_entries = 0; 226 } 227 228 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 229 { 230 env->tlb_d[mmu_idx].n_used_entries++; 231 } 232 233 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 234 { 235 env->tlb_d[mmu_idx].n_used_entries--; 236 } 237 238 #else /* !TCG_TARGET_IMPLEMENTS_DYN_TLB */ 239 240 static inline void tlb_dyn_init(CPUArchState *env) 241 { 242 } 243 244 static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx) 245 { 246 memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0])); 247 } 248 249 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) 250 { 251 } 252 253 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) 254 { 255 } 256 #endif /* TCG_TARGET_IMPLEMENTS_DYN_TLB */ 257 258 void tlb_init(CPUState *cpu) 259 { 260 CPUArchState *env = cpu->env_ptr; 261 262 qemu_spin_init(&env->tlb_c.lock); 263 264 /* Ensure that cpu_reset performs a full flush. */ 265 env->tlb_c.dirty = ALL_MMUIDX_BITS; 266 267 tlb_dyn_init(env); 268 } 269 270 /* flush_all_helper: run fn across all cpus 271 * 272 * If the wait flag is set then the src cpu's helper will be queued as 273 * "safe" work and the loop exited creating a synchronisation point 274 * where all queued work will be finished before execution starts 275 * again. 276 */ 277 static void flush_all_helper(CPUState *src, run_on_cpu_func fn, 278 run_on_cpu_data d) 279 { 280 CPUState *cpu; 281 282 CPU_FOREACH(cpu) { 283 if (cpu != src) { 284 async_run_on_cpu(cpu, fn, d); 285 } 286 } 287 } 288 289 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) 290 { 291 CPUState *cpu; 292 size_t full = 0, part = 0, elide = 0; 293 294 CPU_FOREACH(cpu) { 295 CPUArchState *env = cpu->env_ptr; 296 297 full += atomic_read(&env->tlb_c.full_flush_count); 298 part += atomic_read(&env->tlb_c.part_flush_count); 299 elide += atomic_read(&env->tlb_c.elide_flush_count); 300 } 301 *pfull = full; 302 *ppart = part; 303 *pelide = elide; 304 } 305 306 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) 307 { 308 tlb_table_flush_by_mmuidx(env, mmu_idx); 309 memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0])); 310 env->tlb_d[mmu_idx].large_page_addr = -1; 311 env->tlb_d[mmu_idx].large_page_mask = -1; 312 env->tlb_d[mmu_idx].vindex = 0; 313 } 314 315 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) 316 { 317 CPUArchState *env = cpu->env_ptr; 318 uint16_t asked = data.host_int; 319 uint16_t all_dirty, work, to_clean; 320 321 assert_cpu_is_self(cpu); 322 323 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); 324 325 qemu_spin_lock(&env->tlb_c.lock); 326 327 all_dirty = env->tlb_c.dirty; 328 to_clean = asked & all_dirty; 329 all_dirty &= ~to_clean; 330 env->tlb_c.dirty = all_dirty; 331 332 for (work = to_clean; work != 0; work &= work - 1) { 333 int mmu_idx = ctz32(work); 334 tlb_flush_one_mmuidx_locked(env, mmu_idx); 335 } 336 337 qemu_spin_unlock(&env->tlb_c.lock); 338 339 cpu_tb_jmp_cache_clear(cpu); 340 341 if (to_clean == ALL_MMUIDX_BITS) { 342 atomic_set(&env->tlb_c.full_flush_count, 343 env->tlb_c.full_flush_count + 1); 344 } else { 345 atomic_set(&env->tlb_c.part_flush_count, 346 env->tlb_c.part_flush_count + ctpop16(to_clean)); 347 if (to_clean != asked) { 348 atomic_set(&env->tlb_c.elide_flush_count, 349 env->tlb_c.elide_flush_count + 350 ctpop16(asked & ~to_clean)); 351 } 352 } 353 } 354 355 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap) 356 { 357 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap); 358 359 if (cpu->created && !qemu_cpu_is_self(cpu)) { 360 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work, 361 RUN_ON_CPU_HOST_INT(idxmap)); 362 } else { 363 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap)); 364 } 365 } 366 367 void tlb_flush(CPUState *cpu) 368 { 369 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS); 370 } 371 372 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap) 373 { 374 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 375 376 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 377 378 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 379 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap)); 380 } 381 382 void tlb_flush_all_cpus(CPUState *src_cpu) 383 { 384 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS); 385 } 386 387 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap) 388 { 389 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work; 390 391 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap); 392 393 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 394 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap)); 395 } 396 397 void tlb_flush_all_cpus_synced(CPUState *src_cpu) 398 { 399 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS); 400 } 401 402 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, 403 target_ulong page) 404 { 405 return tlb_hit_page(tlb_entry->addr_read, page) || 406 tlb_hit_page(tlb_addr_write(tlb_entry), page) || 407 tlb_hit_page(tlb_entry->addr_code, page); 408 } 409 410 /** 411 * tlb_entry_is_empty - return true if the entry is not in use 412 * @te: pointer to CPUTLBEntry 413 */ 414 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te) 415 { 416 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1; 417 } 418 419 /* Called with tlb_c.lock held */ 420 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, 421 target_ulong page) 422 { 423 if (tlb_hit_page_anyprot(tlb_entry, page)) { 424 memset(tlb_entry, -1, sizeof(*tlb_entry)); 425 return true; 426 } 427 return false; 428 } 429 430 /* Called with tlb_c.lock held */ 431 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, 432 target_ulong page) 433 { 434 int k; 435 436 assert_cpu_is_self(ENV_GET_CPU(env)); 437 for (k = 0; k < CPU_VTLB_SIZE; k++) { 438 if (tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page)) { 439 tlb_n_used_entries_dec(env, mmu_idx); 440 } 441 } 442 } 443 444 static void tlb_flush_page_locked(CPUArchState *env, int midx, 445 target_ulong page) 446 { 447 target_ulong lp_addr = env->tlb_d[midx].large_page_addr; 448 target_ulong lp_mask = env->tlb_d[midx].large_page_mask; 449 450 /* Check if we need to flush due to large pages. */ 451 if ((page & lp_mask) == lp_addr) { 452 tlb_debug("forcing full flush midx %d (" 453 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", 454 midx, lp_addr, lp_mask); 455 tlb_flush_one_mmuidx_locked(env, midx); 456 } else { 457 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { 458 tlb_n_used_entries_dec(env, midx); 459 } 460 tlb_flush_vtlb_page_locked(env, midx, page); 461 } 462 } 463 464 /* As we are going to hijack the bottom bits of the page address for a 465 * mmuidx bit mask we need to fail to build if we can't do that 466 */ 467 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN); 468 469 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, 470 run_on_cpu_data data) 471 { 472 CPUArchState *env = cpu->env_ptr; 473 target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr; 474 target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK; 475 unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS; 476 int mmu_idx; 477 478 assert_cpu_is_self(cpu); 479 480 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n", 481 addr, mmu_idx_bitmap); 482 483 qemu_spin_lock(&env->tlb_c.lock); 484 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 485 if (test_bit(mmu_idx, &mmu_idx_bitmap)) { 486 tlb_flush_page_locked(env, mmu_idx, addr); 487 } 488 } 489 qemu_spin_unlock(&env->tlb_c.lock); 490 491 tb_flush_jmp_cache(cpu, addr); 492 } 493 494 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) 495 { 496 target_ulong addr_and_mmu_idx; 497 498 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); 499 500 /* This should already be page aligned */ 501 addr_and_mmu_idx = addr & TARGET_PAGE_MASK; 502 addr_and_mmu_idx |= idxmap; 503 504 if (!qemu_cpu_is_self(cpu)) { 505 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work, 506 RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 507 } else { 508 tlb_flush_page_by_mmuidx_async_work( 509 cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 510 } 511 } 512 513 void tlb_flush_page(CPUState *cpu, target_ulong addr) 514 { 515 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS); 516 } 517 518 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, 519 uint16_t idxmap) 520 { 521 const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work; 522 target_ulong addr_and_mmu_idx; 523 524 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 525 526 /* This should already be page aligned */ 527 addr_and_mmu_idx = addr & TARGET_PAGE_MASK; 528 addr_and_mmu_idx |= idxmap; 529 530 flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 531 fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 532 } 533 534 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) 535 { 536 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS); 537 } 538 539 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, 540 target_ulong addr, 541 uint16_t idxmap) 542 { 543 const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work; 544 target_ulong addr_and_mmu_idx; 545 546 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); 547 548 /* This should already be page aligned */ 549 addr_and_mmu_idx = addr & TARGET_PAGE_MASK; 550 addr_and_mmu_idx |= idxmap; 551 552 flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 553 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); 554 } 555 556 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) 557 { 558 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS); 559 } 560 561 /* update the TLBs so that writes to code in the virtual page 'addr' 562 can be detected */ 563 void tlb_protect_code(ram_addr_t ram_addr) 564 { 565 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, 566 DIRTY_MEMORY_CODE); 567 } 568 569 /* update the TLB so that writes in physical page 'phys_addr' are no longer 570 tested for self modifying code */ 571 void tlb_unprotect_code(ram_addr_t ram_addr) 572 { 573 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); 574 } 575 576 577 /* 578 * Dirty write flag handling 579 * 580 * When the TCG code writes to a location it looks up the address in 581 * the TLB and uses that data to compute the final address. If any of 582 * the lower bits of the address are set then the slow path is forced. 583 * There are a number of reasons to do this but for normal RAM the 584 * most usual is detecting writes to code regions which may invalidate 585 * generated code. 586 * 587 * Other vCPUs might be reading their TLBs during guest execution, so we update 588 * te->addr_write with atomic_set. We don't need to worry about this for 589 * oversized guests as MTTCG is disabled for them. 590 * 591 * Called with tlb_c.lock held. 592 */ 593 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, 594 uintptr_t start, uintptr_t length) 595 { 596 uintptr_t addr = tlb_entry->addr_write; 597 598 if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) { 599 addr &= TARGET_PAGE_MASK; 600 addr += tlb_entry->addend; 601 if ((addr - start) < length) { 602 #if TCG_OVERSIZED_GUEST 603 tlb_entry->addr_write |= TLB_NOTDIRTY; 604 #else 605 atomic_set(&tlb_entry->addr_write, 606 tlb_entry->addr_write | TLB_NOTDIRTY); 607 #endif 608 } 609 } 610 } 611 612 /* 613 * Called with tlb_c.lock held. 614 * Called only from the vCPU context, i.e. the TLB's owner thread. 615 */ 616 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s) 617 { 618 *d = *s; 619 } 620 621 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of 622 * the target vCPU). 623 * We must take tlb_c.lock to avoid racing with another vCPU update. The only 624 * thing actually updated is the target TLB entry ->addr_write flags. 625 */ 626 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) 627 { 628 CPUArchState *env; 629 630 int mmu_idx; 631 632 env = cpu->env_ptr; 633 qemu_spin_lock(&env->tlb_c.lock); 634 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 635 unsigned int i; 636 unsigned int n = tlb_n_entries(env, mmu_idx); 637 638 for (i = 0; i < n; i++) { 639 tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1, 640 length); 641 } 642 643 for (i = 0; i < CPU_VTLB_SIZE; i++) { 644 tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1, 645 length); 646 } 647 } 648 qemu_spin_unlock(&env->tlb_c.lock); 649 } 650 651 /* Called with tlb_c.lock held */ 652 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, 653 target_ulong vaddr) 654 { 655 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { 656 tlb_entry->addr_write = vaddr; 657 } 658 } 659 660 /* update the TLB corresponding to virtual page vaddr 661 so that it is no longer dirty */ 662 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) 663 { 664 CPUArchState *env = cpu->env_ptr; 665 int mmu_idx; 666 667 assert_cpu_is_self(cpu); 668 669 vaddr &= TARGET_PAGE_MASK; 670 qemu_spin_lock(&env->tlb_c.lock); 671 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 672 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); 673 } 674 675 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { 676 int k; 677 for (k = 0; k < CPU_VTLB_SIZE; k++) { 678 tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr); 679 } 680 } 681 qemu_spin_unlock(&env->tlb_c.lock); 682 } 683 684 /* Our TLB does not support large pages, so remember the area covered by 685 large pages and trigger a full TLB flush if these are invalidated. */ 686 static void tlb_add_large_page(CPUArchState *env, int mmu_idx, 687 target_ulong vaddr, target_ulong size) 688 { 689 target_ulong lp_addr = env->tlb_d[mmu_idx].large_page_addr; 690 target_ulong lp_mask = ~(size - 1); 691 692 if (lp_addr == (target_ulong)-1) { 693 /* No previous large page. */ 694 lp_addr = vaddr; 695 } else { 696 /* Extend the existing region to include the new page. 697 This is a compromise between unnecessary flushes and 698 the cost of maintaining a full variable size TLB. */ 699 lp_mask &= env->tlb_d[mmu_idx].large_page_mask; 700 while (((lp_addr ^ vaddr) & lp_mask) != 0) { 701 lp_mask <<= 1; 702 } 703 } 704 env->tlb_d[mmu_idx].large_page_addr = lp_addr & lp_mask; 705 env->tlb_d[mmu_idx].large_page_mask = lp_mask; 706 } 707 708 /* Add a new TLB entry. At most one entry for a given virtual address 709 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the 710 * supplied size is only used by tlb_flush_page. 711 * 712 * Called from TCG-generated code, which is under an RCU read-side 713 * critical section. 714 */ 715 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, 716 hwaddr paddr, MemTxAttrs attrs, int prot, 717 int mmu_idx, target_ulong size) 718 { 719 CPUArchState *env = cpu->env_ptr; 720 MemoryRegionSection *section; 721 unsigned int index; 722 target_ulong address; 723 target_ulong code_address; 724 uintptr_t addend; 725 CPUTLBEntry *te, tn; 726 hwaddr iotlb, xlat, sz, paddr_page; 727 target_ulong vaddr_page; 728 int asidx = cpu_asidx_from_attrs(cpu, attrs); 729 730 assert_cpu_is_self(cpu); 731 732 if (size <= TARGET_PAGE_SIZE) { 733 sz = TARGET_PAGE_SIZE; 734 } else { 735 tlb_add_large_page(env, mmu_idx, vaddr, size); 736 sz = size; 737 } 738 vaddr_page = vaddr & TARGET_PAGE_MASK; 739 paddr_page = paddr & TARGET_PAGE_MASK; 740 741 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page, 742 &xlat, &sz, attrs, &prot); 743 assert(sz >= TARGET_PAGE_SIZE); 744 745 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx 746 " prot=%x idx=%d\n", 747 vaddr, paddr, prot, mmu_idx); 748 749 address = vaddr_page; 750 if (size < TARGET_PAGE_SIZE) { 751 /* 752 * Slow-path the TLB entries; we will repeat the MMU check and TLB 753 * fill on every access. 754 */ 755 address |= TLB_RECHECK; 756 } 757 if (!memory_region_is_ram(section->mr) && 758 !memory_region_is_romd(section->mr)) { 759 /* IO memory case */ 760 address |= TLB_MMIO; 761 addend = 0; 762 } else { 763 /* TLB_MMIO for rom/romd handled below */ 764 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat; 765 } 766 767 code_address = address; 768 iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page, 769 paddr_page, xlat, prot, &address); 770 771 index = tlb_index(env, mmu_idx, vaddr_page); 772 te = tlb_entry(env, mmu_idx, vaddr_page); 773 774 /* 775 * Hold the TLB lock for the rest of the function. We could acquire/release 776 * the lock several times in the function, but it is faster to amortize the 777 * acquisition cost by acquiring it just once. Note that this leads to 778 * a longer critical section, but this is not a concern since the TLB lock 779 * is unlikely to be contended. 780 */ 781 qemu_spin_lock(&env->tlb_c.lock); 782 783 /* Note that the tlb is no longer clean. */ 784 env->tlb_c.dirty |= 1 << mmu_idx; 785 786 /* Make sure there's no cached translation for the new page. */ 787 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); 788 789 /* 790 * Only evict the old entry to the victim tlb if it's for a 791 * different page; otherwise just overwrite the stale data. 792 */ 793 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { 794 unsigned vidx = env->tlb_d[mmu_idx].vindex++ % CPU_VTLB_SIZE; 795 CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx]; 796 797 /* Evict the old entry into the victim tlb. */ 798 copy_tlb_helper_locked(tv, te); 799 env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index]; 800 tlb_n_used_entries_dec(env, mmu_idx); 801 } 802 803 /* refill the tlb */ 804 /* 805 * At this point iotlb contains a physical section number in the lower 806 * TARGET_PAGE_BITS, and either 807 * + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM) 808 * + the offset within section->mr of the page base (otherwise) 809 * We subtract the vaddr_page (which is page aligned and thus won't 810 * disturb the low bits) to give an offset which can be added to the 811 * (non-page-aligned) vaddr of the eventual memory access to get 812 * the MemoryRegion offset for the access. Note that the vaddr we 813 * subtract here is that of the page base, and not the same as the 814 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). 815 */ 816 env->iotlb[mmu_idx][index].addr = iotlb - vaddr_page; 817 env->iotlb[mmu_idx][index].attrs = attrs; 818 819 /* Now calculate the new entry */ 820 tn.addend = addend - vaddr_page; 821 if (prot & PAGE_READ) { 822 tn.addr_read = address; 823 } else { 824 tn.addr_read = -1; 825 } 826 827 if (prot & PAGE_EXEC) { 828 tn.addr_code = code_address; 829 } else { 830 tn.addr_code = -1; 831 } 832 833 tn.addr_write = -1; 834 if (prot & PAGE_WRITE) { 835 if ((memory_region_is_ram(section->mr) && section->readonly) 836 || memory_region_is_romd(section->mr)) { 837 /* Write access calls the I/O callback. */ 838 tn.addr_write = address | TLB_MMIO; 839 } else if (memory_region_is_ram(section->mr) 840 && cpu_physical_memory_is_clean( 841 memory_region_get_ram_addr(section->mr) + xlat)) { 842 tn.addr_write = address | TLB_NOTDIRTY; 843 } else { 844 tn.addr_write = address; 845 } 846 if (prot & PAGE_WRITE_INV) { 847 tn.addr_write |= TLB_INVALID_MASK; 848 } 849 } 850 851 copy_tlb_helper_locked(te, &tn); 852 tlb_n_used_entries_inc(env, mmu_idx); 853 qemu_spin_unlock(&env->tlb_c.lock); 854 } 855 856 /* Add a new TLB entry, but without specifying the memory 857 * transaction attributes to be used. 858 */ 859 void tlb_set_page(CPUState *cpu, target_ulong vaddr, 860 hwaddr paddr, int prot, 861 int mmu_idx, target_ulong size) 862 { 863 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED, 864 prot, mmu_idx, size); 865 } 866 867 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) 868 { 869 ram_addr_t ram_addr; 870 871 ram_addr = qemu_ram_addr_from_host(ptr); 872 if (ram_addr == RAM_ADDR_INVALID) { 873 error_report("Bad ram pointer %p", ptr); 874 abort(); 875 } 876 return ram_addr; 877 } 878 879 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 880 int mmu_idx, 881 target_ulong addr, uintptr_t retaddr, 882 bool recheck, MMUAccessType access_type, int size) 883 { 884 CPUState *cpu = ENV_GET_CPU(env); 885 hwaddr mr_offset; 886 MemoryRegionSection *section; 887 MemoryRegion *mr; 888 uint64_t val; 889 bool locked = false; 890 MemTxResult r; 891 892 if (recheck) { 893 /* 894 * This is a TLB_RECHECK access, where the MMU protection 895 * covers a smaller range than a target page, and we must 896 * repeat the MMU check here. This tlb_fill() call might 897 * longjump out if this access should cause a guest exception. 898 */ 899 CPUTLBEntry *entry; 900 target_ulong tlb_addr; 901 902 tlb_fill(cpu, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); 903 904 entry = tlb_entry(env, mmu_idx, addr); 905 tlb_addr = entry->addr_read; 906 if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { 907 /* RAM access */ 908 uintptr_t haddr = addr + entry->addend; 909 910 return ldn_p((void *)haddr, size); 911 } 912 /* Fall through for handling IO accesses */ 913 } 914 915 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 916 mr = section->mr; 917 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 918 cpu->mem_io_pc = retaddr; 919 if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { 920 cpu_io_recompile(cpu, retaddr); 921 } 922 923 cpu->mem_io_vaddr = addr; 924 cpu->mem_io_access_type = access_type; 925 926 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 927 qemu_mutex_lock_iothread(); 928 locked = true; 929 } 930 r = memory_region_dispatch_read(mr, mr_offset, 931 &val, size, iotlbentry->attrs); 932 if (r != MEMTX_OK) { 933 hwaddr physaddr = mr_offset + 934 section->offset_within_address_space - 935 section->offset_within_region; 936 937 cpu_transaction_failed(cpu, physaddr, addr, size, access_type, 938 mmu_idx, iotlbentry->attrs, r, retaddr); 939 } 940 if (locked) { 941 qemu_mutex_unlock_iothread(); 942 } 943 944 return val; 945 } 946 947 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, 948 int mmu_idx, 949 uint64_t val, target_ulong addr, 950 uintptr_t retaddr, bool recheck, int size) 951 { 952 CPUState *cpu = ENV_GET_CPU(env); 953 hwaddr mr_offset; 954 MemoryRegionSection *section; 955 MemoryRegion *mr; 956 bool locked = false; 957 MemTxResult r; 958 959 if (recheck) { 960 /* 961 * This is a TLB_RECHECK access, where the MMU protection 962 * covers a smaller range than a target page, and we must 963 * repeat the MMU check here. This tlb_fill() call might 964 * longjump out if this access should cause a guest exception. 965 */ 966 CPUTLBEntry *entry; 967 target_ulong tlb_addr; 968 969 tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr); 970 971 entry = tlb_entry(env, mmu_idx, addr); 972 tlb_addr = tlb_addr_write(entry); 973 if (!(tlb_addr & ~(TARGET_PAGE_MASK | TLB_RECHECK))) { 974 /* RAM access */ 975 uintptr_t haddr = addr + entry->addend; 976 977 stn_p((void *)haddr, size, val); 978 return; 979 } 980 /* Fall through for handling IO accesses */ 981 } 982 983 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); 984 mr = section->mr; 985 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; 986 if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { 987 cpu_io_recompile(cpu, retaddr); 988 } 989 cpu->mem_io_vaddr = addr; 990 cpu->mem_io_pc = retaddr; 991 992 if (mr->global_locking && !qemu_mutex_iothread_locked()) { 993 qemu_mutex_lock_iothread(); 994 locked = true; 995 } 996 r = memory_region_dispatch_write(mr, mr_offset, 997 val, size, iotlbentry->attrs); 998 if (r != MEMTX_OK) { 999 hwaddr physaddr = mr_offset + 1000 section->offset_within_address_space - 1001 section->offset_within_region; 1002 1003 cpu_transaction_failed(cpu, physaddr, addr, size, MMU_DATA_STORE, 1004 mmu_idx, iotlbentry->attrs, r, retaddr); 1005 } 1006 if (locked) { 1007 qemu_mutex_unlock_iothread(); 1008 } 1009 } 1010 1011 /* Return true if ADDR is present in the victim tlb, and has been copied 1012 back to the main tlb. */ 1013 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index, 1014 size_t elt_ofs, target_ulong page) 1015 { 1016 size_t vidx; 1017 1018 assert_cpu_is_self(ENV_GET_CPU(env)); 1019 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { 1020 CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx]; 1021 target_ulong cmp; 1022 1023 /* elt_ofs might correspond to .addr_write, so use atomic_read */ 1024 #if TCG_OVERSIZED_GUEST 1025 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs); 1026 #else 1027 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs)); 1028 #endif 1029 1030 if (cmp == page) { 1031 /* Found entry in victim tlb, swap tlb and iotlb. */ 1032 CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index]; 1033 1034 qemu_spin_lock(&env->tlb_c.lock); 1035 copy_tlb_helper_locked(&tmptlb, tlb); 1036 copy_tlb_helper_locked(tlb, vtlb); 1037 copy_tlb_helper_locked(vtlb, &tmptlb); 1038 qemu_spin_unlock(&env->tlb_c.lock); 1039 1040 CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index]; 1041 CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx]; 1042 tmpio = *io; *io = *vio; *vio = tmpio; 1043 return true; 1044 } 1045 } 1046 return false; 1047 } 1048 1049 /* Macro to call the above, with local variables from the use context. */ 1050 #define VICTIM_TLB_HIT(TY, ADDR) \ 1051 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \ 1052 (ADDR) & TARGET_PAGE_MASK) 1053 1054 /* NOTE: this function can trigger an exception */ 1055 /* NOTE2: the returned address is not exactly the physical address: it 1056 * is actually a ram_addr_t (in system mode; the user mode emulation 1057 * version of this function returns a guest virtual address). 1058 */ 1059 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr) 1060 { 1061 uintptr_t mmu_idx = cpu_mmu_index(env, true); 1062 uintptr_t index = tlb_index(env, mmu_idx, addr); 1063 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1064 void *p; 1065 1066 if (unlikely(!tlb_hit(entry->addr_code, addr))) { 1067 if (!VICTIM_TLB_HIT(addr_code, addr)) { 1068 tlb_fill(ENV_GET_CPU(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0); 1069 } 1070 assert(tlb_hit(entry->addr_code, addr)); 1071 } 1072 1073 if (unlikely(entry->addr_code & (TLB_RECHECK | TLB_MMIO))) { 1074 /* 1075 * Return -1 if we can't translate and execute from an entire 1076 * page of RAM here, which will cause us to execute by loading 1077 * and translating one insn at a time, without caching: 1078 * - TLB_RECHECK: means the MMU protection covers a smaller range 1079 * than a target page, so we must redo the MMU check every insn 1080 * - TLB_MMIO: region is not backed by RAM 1081 */ 1082 return -1; 1083 } 1084 1085 p = (void *)((uintptr_t)addr + entry->addend); 1086 return qemu_ram_addr_from_host_nofail(p); 1087 } 1088 1089 /* Probe for whether the specified guest write access is permitted. 1090 * If it is not permitted then an exception will be taken in the same 1091 * way as if this were a real write access (and we will not return). 1092 * Otherwise the function will return, and there will be a valid 1093 * entry in the TLB for this access. 1094 */ 1095 void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx, 1096 uintptr_t retaddr) 1097 { 1098 uintptr_t index = tlb_index(env, mmu_idx, addr); 1099 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); 1100 1101 if (!tlb_hit(tlb_addr_write(entry), addr)) { 1102 /* TLB entry is for a different page */ 1103 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1104 tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE, 1105 mmu_idx, retaddr); 1106 } 1107 } 1108 } 1109 1110 /* Probe for a read-modify-write atomic operation. Do not allow unaligned 1111 * operations, or io operations to proceed. Return the host address. */ 1112 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, 1113 TCGMemOpIdx oi, uintptr_t retaddr, 1114 NotDirtyInfo *ndi) 1115 { 1116 size_t mmu_idx = get_mmuidx(oi); 1117 uintptr_t index = tlb_index(env, mmu_idx, addr); 1118 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); 1119 target_ulong tlb_addr = tlb_addr_write(tlbe); 1120 TCGMemOp mop = get_memop(oi); 1121 int a_bits = get_alignment_bits(mop); 1122 int s_bits = mop & MO_SIZE; 1123 void *hostaddr; 1124 1125 /* Adjust the given return address. */ 1126 retaddr -= GETPC_ADJ; 1127 1128 /* Enforce guest required alignment. */ 1129 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) { 1130 /* ??? Maybe indicate atomic op to cpu_unaligned_access */ 1131 cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, 1132 mmu_idx, retaddr); 1133 } 1134 1135 /* Enforce qemu required alignment. */ 1136 if (unlikely(addr & ((1 << s_bits) - 1))) { 1137 /* We get here if guest alignment was not requested, 1138 or was not enforced by cpu_unaligned_access above. 1139 We might widen the access and emulate, but for now 1140 mark an exception and exit the cpu loop. */ 1141 goto stop_the_world; 1142 } 1143 1144 /* Check TLB entry and enforce page permissions. */ 1145 if (!tlb_hit(tlb_addr, addr)) { 1146 if (!VICTIM_TLB_HIT(addr_write, addr)) { 1147 tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE, 1148 mmu_idx, retaddr); 1149 } 1150 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; 1151 } 1152 1153 /* Notice an IO access or a needs-MMU-lookup access */ 1154 if (unlikely(tlb_addr & (TLB_MMIO | TLB_RECHECK))) { 1155 /* There's really nothing that can be done to 1156 support this apart from stop-the-world. */ 1157 goto stop_the_world; 1158 } 1159 1160 /* Let the guest notice RMW on a write-only page. */ 1161 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { 1162 tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_LOAD, 1163 mmu_idx, retaddr); 1164 /* Since we don't support reads and writes to different addresses, 1165 and we do have the proper page loaded for write, this shouldn't 1166 ever return. But just in case, handle via stop-the-world. */ 1167 goto stop_the_world; 1168 } 1169 1170 hostaddr = (void *)((uintptr_t)addr + tlbe->addend); 1171 1172 ndi->active = false; 1173 if (unlikely(tlb_addr & TLB_NOTDIRTY)) { 1174 ndi->active = true; 1175 memory_notdirty_write_prepare(ndi, ENV_GET_CPU(env), addr, 1176 qemu_ram_addr_from_host_nofail(hostaddr), 1177 1 << s_bits); 1178 } 1179 1180 return hostaddr; 1181 1182 stop_the_world: 1183 cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr); 1184 } 1185 1186 #ifdef TARGET_WORDS_BIGENDIAN 1187 # define TGT_BE(X) (X) 1188 # define TGT_LE(X) BSWAP(X) 1189 #else 1190 # define TGT_BE(X) BSWAP(X) 1191 # define TGT_LE(X) (X) 1192 #endif 1193 1194 #define MMUSUFFIX _mmu 1195 1196 #define DATA_SIZE 1 1197 #include "softmmu_template.h" 1198 1199 #define DATA_SIZE 2 1200 #include "softmmu_template.h" 1201 1202 #define DATA_SIZE 4 1203 #include "softmmu_template.h" 1204 1205 #define DATA_SIZE 8 1206 #include "softmmu_template.h" 1207 1208 /* First set of helpers allows passing in of OI and RETADDR. This makes 1209 them callable from other helpers. */ 1210 1211 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr 1212 #define ATOMIC_NAME(X) \ 1213 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) 1214 #define ATOMIC_MMU_DECLS NotDirtyInfo ndi 1215 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi) 1216 #define ATOMIC_MMU_CLEANUP \ 1217 do { \ 1218 if (unlikely(ndi.active)) { \ 1219 memory_notdirty_write_complete(&ndi); \ 1220 } \ 1221 } while (0) 1222 1223 #define DATA_SIZE 1 1224 #include "atomic_template.h" 1225 1226 #define DATA_SIZE 2 1227 #include "atomic_template.h" 1228 1229 #define DATA_SIZE 4 1230 #include "atomic_template.h" 1231 1232 #ifdef CONFIG_ATOMIC64 1233 #define DATA_SIZE 8 1234 #include "atomic_template.h" 1235 #endif 1236 1237 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128 1238 #define DATA_SIZE 16 1239 #include "atomic_template.h" 1240 #endif 1241 1242 /* Second set of helpers are directly callable from TCG as helpers. */ 1243 1244 #undef EXTRA_ARGS 1245 #undef ATOMIC_NAME 1246 #undef ATOMIC_MMU_LOOKUP 1247 #define EXTRA_ARGS , TCGMemOpIdx oi 1248 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) 1249 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi) 1250 1251 #define DATA_SIZE 1 1252 #include "atomic_template.h" 1253 1254 #define DATA_SIZE 2 1255 #include "atomic_template.h" 1256 1257 #define DATA_SIZE 4 1258 #include "atomic_template.h" 1259 1260 #ifdef CONFIG_ATOMIC64 1261 #define DATA_SIZE 8 1262 #include "atomic_template.h" 1263 #endif 1264 1265 /* Code access functions. */ 1266 1267 #undef MMUSUFFIX 1268 #define MMUSUFFIX _cmmu 1269 #undef GETPC 1270 #define GETPC() ((uintptr_t)0) 1271 #define SOFTMMU_CODE_ACCESS 1272 1273 #define DATA_SIZE 1 1274 #include "softmmu_template.h" 1275 1276 #define DATA_SIZE 2 1277 #include "softmmu_template.h" 1278 1279 #define DATA_SIZE 4 1280 #include "softmmu_template.h" 1281 1282 #define DATA_SIZE 8 1283 #include "softmmu_template.h" 1284