1 /* 2 * S390x MMU related functions 3 * 4 * Copyright (c) 2011 Alexander Graf 5 * Copyright (c) 2015 Thomas Huth, IBM Corporation 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 */ 17 18 #include "qemu/osdep.h" 19 #include "qemu/error-report.h" 20 #include "exec/address-spaces.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "kvm_s390x.h" 24 #include "sysemu/kvm.h" 25 #include "sysemu/tcg.h" 26 #include "exec/exec-all.h" 27 #include "trace.h" 28 #include "hw/hw.h" 29 #include "hw/s390x/storage-keys.h" 30 31 /* #define DEBUG_S390 */ 32 /* #define DEBUG_S390_PTE */ 33 /* #define DEBUG_S390_STDOUT */ 34 35 #ifdef DEBUG_S390 36 #ifdef DEBUG_S390_STDOUT 37 #define DPRINTF(fmt, ...) \ 38 do { fprintf(stderr, fmt, ## __VA_ARGS__); \ 39 if (qemu_log_separate()) qemu_log(fmt, ##__VA_ARGS__); } while (0) 40 #else 41 #define DPRINTF(fmt, ...) \ 42 do { qemu_log(fmt, ## __VA_ARGS__); } while (0) 43 #endif 44 #else 45 #define DPRINTF(fmt, ...) \ 46 do { } while (0) 47 #endif 48 49 #ifdef DEBUG_S390_PTE 50 #define PTE_DPRINTF DPRINTF 51 #else 52 #define PTE_DPRINTF(fmt, ...) \ 53 do { } while (0) 54 #endif 55 56 /* Fetch/store bits in the translation exception code: */ 57 #define FS_READ 0x800 58 #define FS_WRITE 0x400 59 60 static void trigger_access_exception(CPUS390XState *env, uint32_t type, 61 uint32_t ilen, uint64_t tec) 62 { 63 S390CPU *cpu = env_archcpu(env); 64 65 if (kvm_enabled()) { 66 kvm_s390_access_exception(cpu, type, tec); 67 } else { 68 CPUState *cs = env_cpu(env); 69 if (type != PGM_ADDRESSING) { 70 stq_phys(cs->as, env->psa + offsetof(LowCore, trans_exc_code), tec); 71 } 72 trigger_pgm_exception(env, type, ilen); 73 } 74 } 75 76 static void trigger_prot_fault(CPUS390XState *env, target_ulong vaddr, 77 uint64_t asc, int rw, bool exc) 78 { 79 uint64_t tec; 80 81 tec = vaddr | (rw == MMU_DATA_STORE ? FS_WRITE : FS_READ) | 4 | asc >> 46; 82 83 DPRINTF("%s: trans_exc_code=%016" PRIx64 "\n", __func__, tec); 84 85 if (!exc) { 86 return; 87 } 88 89 trigger_access_exception(env, PGM_PROTECTION, ILEN_AUTO, tec); 90 } 91 92 static void trigger_page_fault(CPUS390XState *env, target_ulong vaddr, 93 uint32_t type, uint64_t asc, int rw, bool exc) 94 { 95 int ilen = ILEN_AUTO; 96 uint64_t tec; 97 98 tec = vaddr | (rw == MMU_DATA_STORE ? FS_WRITE : FS_READ) | asc >> 46; 99 100 DPRINTF("%s: trans_exc_code=%016" PRIx64 "\n", __func__, tec); 101 102 if (!exc) { 103 return; 104 } 105 106 /* Code accesses have an undefined ilc. */ 107 if (rw == MMU_INST_FETCH) { 108 ilen = 2; 109 } 110 111 trigger_access_exception(env, type, ilen, tec); 112 } 113 114 /* check whether the address would be proteted by Low-Address Protection */ 115 static bool is_low_address(uint64_t addr) 116 { 117 return addr <= 511 || (addr >= 4096 && addr <= 4607); 118 } 119 120 /* check whether Low-Address Protection is enabled for mmu_translate() */ 121 static bool lowprot_enabled(const CPUS390XState *env, uint64_t asc) 122 { 123 if (!(env->cregs[0] & CR0_LOWPROT)) { 124 return false; 125 } 126 if (!(env->psw.mask & PSW_MASK_DAT)) { 127 return true; 128 } 129 130 /* Check the private-space control bit */ 131 switch (asc) { 132 case PSW_ASC_PRIMARY: 133 return !(env->cregs[1] & ASCE_PRIVATE_SPACE); 134 case PSW_ASC_SECONDARY: 135 return !(env->cregs[7] & ASCE_PRIVATE_SPACE); 136 case PSW_ASC_HOME: 137 return !(env->cregs[13] & ASCE_PRIVATE_SPACE); 138 default: 139 /* We don't support access register mode */ 140 error_report("unsupported addressing mode"); 141 exit(1); 142 } 143 } 144 145 /** 146 * Translate real address to absolute (= physical) 147 * address by taking care of the prefix mapping. 148 */ 149 target_ulong mmu_real2abs(CPUS390XState *env, target_ulong raddr) 150 { 151 if (raddr < 0x2000) { 152 return raddr + env->psa; /* Map the lowcore. */ 153 } else if (raddr >= env->psa && raddr < env->psa + 0x2000) { 154 return raddr - env->psa; /* Map the 0 page. */ 155 } 156 return raddr; 157 } 158 159 /* Decode page table entry (normal 4KB page) */ 160 static int mmu_translate_pte(CPUS390XState *env, target_ulong vaddr, 161 uint64_t asc, uint64_t pt_entry, 162 target_ulong *raddr, int *flags, int rw, bool exc) 163 { 164 if (pt_entry & PAGE_INVALID) { 165 DPRINTF("%s: PTE=0x%" PRIx64 " invalid\n", __func__, pt_entry); 166 trigger_page_fault(env, vaddr, PGM_PAGE_TRANS, asc, rw, exc); 167 return -1; 168 } 169 if (pt_entry & PAGE_RES0) { 170 trigger_page_fault(env, vaddr, PGM_TRANS_SPEC, asc, rw, exc); 171 return -1; 172 } 173 if (pt_entry & PAGE_RO) { 174 *flags &= ~PAGE_WRITE; 175 } 176 177 *raddr = pt_entry & ASCE_ORIGIN; 178 179 PTE_DPRINTF("%s: PTE=0x%" PRIx64 "\n", __func__, pt_entry); 180 181 return 0; 182 } 183 184 /* Decode segment table entry */ 185 static int mmu_translate_segment(CPUS390XState *env, target_ulong vaddr, 186 uint64_t asc, uint64_t st_entry, 187 target_ulong *raddr, int *flags, int rw, 188 bool exc) 189 { 190 CPUState *cs = env_cpu(env); 191 uint64_t origin, offs, pt_entry; 192 193 if (st_entry & SEGMENT_ENTRY_RO) { 194 *flags &= ~PAGE_WRITE; 195 } 196 197 if ((st_entry & SEGMENT_ENTRY_FC) && (env->cregs[0] & CR0_EDAT)) { 198 /* Decode EDAT1 segment frame absolute address (1MB page) */ 199 *raddr = (st_entry & 0xfffffffffff00000ULL) | (vaddr & 0xfffff); 200 PTE_DPRINTF("%s: SEG=0x%" PRIx64 "\n", __func__, st_entry); 201 return 0; 202 } 203 204 /* Look up 4KB page entry */ 205 origin = st_entry & SEGMENT_ENTRY_ORIGIN; 206 offs = (vaddr & VADDR_PX) >> 9; 207 pt_entry = ldq_phys(cs->as, origin + offs); 208 PTE_DPRINTF("%s: 0x%" PRIx64 " + 0x%" PRIx64 " => 0x%016" PRIx64 "\n", 209 __func__, origin, offs, pt_entry); 210 return mmu_translate_pte(env, vaddr, asc, pt_entry, raddr, flags, rw, exc); 211 } 212 213 /* Decode region table entries */ 214 static int mmu_translate_region(CPUS390XState *env, target_ulong vaddr, 215 uint64_t asc, uint64_t entry, int level, 216 target_ulong *raddr, int *flags, int rw, 217 bool exc) 218 { 219 CPUState *cs = env_cpu(env); 220 uint64_t origin, offs, new_entry; 221 const int pchks[4] = { 222 PGM_SEGMENT_TRANS, PGM_REG_THIRD_TRANS, 223 PGM_REG_SEC_TRANS, PGM_REG_FIRST_TRANS 224 }; 225 226 PTE_DPRINTF("%s: 0x%" PRIx64 "\n", __func__, entry); 227 228 origin = entry & REGION_ENTRY_ORIGIN; 229 offs = (vaddr >> (17 + 11 * level / 4)) & 0x3ff8; 230 231 new_entry = ldq_phys(cs->as, origin + offs); 232 PTE_DPRINTF("%s: 0x%" PRIx64 " + 0x%" PRIx64 " => 0x%016" PRIx64 "\n", 233 __func__, origin, offs, new_entry); 234 235 if ((new_entry & REGION_ENTRY_INV) != 0) { 236 DPRINTF("%s: invalid region\n", __func__); 237 trigger_page_fault(env, vaddr, pchks[level / 4], asc, rw, exc); 238 return -1; 239 } 240 241 if ((new_entry & REGION_ENTRY_TYPE_MASK) != level) { 242 trigger_page_fault(env, vaddr, PGM_TRANS_SPEC, asc, rw, exc); 243 return -1; 244 } 245 246 if (level == ASCE_TYPE_SEGMENT) { 247 return mmu_translate_segment(env, vaddr, asc, new_entry, raddr, flags, 248 rw, exc); 249 } 250 251 /* Check region table offset and length */ 252 offs = (vaddr >> (28 + 11 * (level - 4) / 4)) & 3; 253 if (offs < ((new_entry & REGION_ENTRY_TF) >> 6) 254 || offs > (new_entry & REGION_ENTRY_LENGTH)) { 255 DPRINTF("%s: invalid offset or len (%lx)\n", __func__, new_entry); 256 trigger_page_fault(env, vaddr, pchks[level / 4 - 1], asc, rw, exc); 257 return -1; 258 } 259 260 if ((env->cregs[0] & CR0_EDAT) && (new_entry & REGION_ENTRY_RO)) { 261 *flags &= ~PAGE_WRITE; 262 } 263 264 /* yet another region */ 265 return mmu_translate_region(env, vaddr, asc, new_entry, level - 4, 266 raddr, flags, rw, exc); 267 } 268 269 static int mmu_translate_asce(CPUS390XState *env, target_ulong vaddr, 270 uint64_t asc, uint64_t asce, target_ulong *raddr, 271 int *flags, int rw, bool exc) 272 { 273 int level; 274 int r; 275 276 if (asce & ASCE_REAL_SPACE) { 277 /* direct mapping */ 278 *raddr = vaddr; 279 return 0; 280 } 281 282 level = asce & ASCE_TYPE_MASK; 283 switch (level) { 284 case ASCE_TYPE_REGION1: 285 if ((vaddr >> 62) > (asce & ASCE_TABLE_LENGTH)) { 286 trigger_page_fault(env, vaddr, PGM_REG_FIRST_TRANS, asc, rw, exc); 287 return -1; 288 } 289 break; 290 case ASCE_TYPE_REGION2: 291 if (vaddr & 0xffe0000000000000ULL) { 292 DPRINTF("%s: vaddr doesn't fit 0x%16" PRIx64 293 " 0xffe0000000000000ULL\n", __func__, vaddr); 294 trigger_page_fault(env, vaddr, PGM_ASCE_TYPE, asc, rw, exc); 295 return -1; 296 } 297 if ((vaddr >> 51 & 3) > (asce & ASCE_TABLE_LENGTH)) { 298 trigger_page_fault(env, vaddr, PGM_REG_SEC_TRANS, asc, rw, exc); 299 return -1; 300 } 301 break; 302 case ASCE_TYPE_REGION3: 303 if (vaddr & 0xfffffc0000000000ULL) { 304 DPRINTF("%s: vaddr doesn't fit 0x%16" PRIx64 305 " 0xfffffc0000000000ULL\n", __func__, vaddr); 306 trigger_page_fault(env, vaddr, PGM_ASCE_TYPE, asc, rw, exc); 307 return -1; 308 } 309 if ((vaddr >> 40 & 3) > (asce & ASCE_TABLE_LENGTH)) { 310 trigger_page_fault(env, vaddr, PGM_REG_THIRD_TRANS, asc, rw, exc); 311 return -1; 312 } 313 break; 314 case ASCE_TYPE_SEGMENT: 315 if (vaddr & 0xffffffff80000000ULL) { 316 DPRINTF("%s: vaddr doesn't fit 0x%16" PRIx64 317 " 0xffffffff80000000ULL\n", __func__, vaddr); 318 trigger_page_fault(env, vaddr, PGM_ASCE_TYPE, asc, rw, exc); 319 return -1; 320 } 321 if ((vaddr >> 29 & 3) > (asce & ASCE_TABLE_LENGTH)) { 322 trigger_page_fault(env, vaddr, PGM_SEGMENT_TRANS, asc, rw, exc); 323 return -1; 324 } 325 break; 326 } 327 328 r = mmu_translate_region(env, vaddr, asc, asce, level, raddr, flags, rw, 329 exc); 330 if (!r && rw == MMU_DATA_STORE && !(*flags & PAGE_WRITE)) { 331 trigger_prot_fault(env, vaddr, asc, rw, exc); 332 return -1; 333 } 334 335 return r; 336 } 337 338 static void mmu_handle_skey(target_ulong addr, int rw, int *flags) 339 { 340 static S390SKeysClass *skeyclass; 341 static S390SKeysState *ss; 342 uint8_t key; 343 int rc; 344 345 if (unlikely(addr >= ram_size)) { 346 return; 347 } 348 349 if (unlikely(!ss)) { 350 ss = s390_get_skeys_device(); 351 skeyclass = S390_SKEYS_GET_CLASS(ss); 352 } 353 354 /* 355 * Whenever we create a new TLB entry, we set the storage key reference 356 * bit. In case we allow write accesses, we set the storage key change 357 * bit. Whenever the guest changes the storage key, we have to flush the 358 * TLBs of all CPUs (the whole TLB or all affected entries), so that the 359 * next reference/change will result in an MMU fault and make us properly 360 * update the storage key here. 361 * 362 * Note 1: "record of references ... is not necessarily accurate", 363 * "change bit may be set in case no storing has occurred". 364 * -> We can set reference/change bits even on exceptions. 365 * Note 2: certain accesses seem to ignore storage keys. For example, 366 * DAT translation does not set reference bits for table accesses. 367 * 368 * TODO: key-controlled protection. Only CPU accesses make use of the 369 * PSW key. CSS accesses are different - we have to pass in the key. 370 * 371 * TODO: we have races between getting and setting the key. 372 */ 373 rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key); 374 if (rc) { 375 trace_get_skeys_nonzero(rc); 376 return; 377 } 378 379 switch (rw) { 380 case MMU_DATA_LOAD: 381 case MMU_INST_FETCH: 382 /* 383 * The TLB entry has to remain write-protected on read-faults if 384 * the storage key does not indicate a change already. Otherwise 385 * we might miss setting the change bit on write accesses. 386 */ 387 if (!(key & SK_C)) { 388 *flags &= ~PAGE_WRITE; 389 } 390 break; 391 case MMU_DATA_STORE: 392 key |= SK_C; 393 break; 394 default: 395 g_assert_not_reached(); 396 } 397 398 /* Any store/fetch sets the reference bit */ 399 key |= SK_R; 400 401 rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key); 402 if (rc) { 403 trace_set_skeys_nonzero(rc); 404 } 405 } 406 407 /** 408 * Translate a virtual (logical) address into a physical (absolute) address. 409 * @param vaddr the virtual address 410 * @param rw 0 = read, 1 = write, 2 = code fetch 411 * @param asc address space control (one of the PSW_ASC_* modes) 412 * @param raddr the translated address is stored to this pointer 413 * @param flags the PAGE_READ/WRITE/EXEC flags are stored to this pointer 414 * @param exc true = inject a program check if a fault occurred 415 * @return 0 if the translation was successful, -1 if a fault occurred 416 */ 417 int mmu_translate(CPUS390XState *env, target_ulong vaddr, int rw, uint64_t asc, 418 target_ulong *raddr, int *flags, bool exc) 419 { 420 uint64_t asce; 421 int r; 422 423 424 *flags = PAGE_READ | PAGE_WRITE | PAGE_EXEC; 425 if (is_low_address(vaddr & TARGET_PAGE_MASK) && lowprot_enabled(env, asc)) { 426 /* 427 * If any part of this page is currently protected, make sure the 428 * TLB entry will not be reused. 429 * 430 * As the protected range is always the first 512 bytes of the 431 * two first pages, we are able to catch all writes to these areas 432 * just by looking at the start address (triggering the tlb miss). 433 */ 434 *flags |= PAGE_WRITE_INV; 435 if (is_low_address(vaddr) && rw == MMU_DATA_STORE) { 436 if (exc) { 437 trigger_access_exception(env, PGM_PROTECTION, ILEN_AUTO, 0); 438 } 439 return -EACCES; 440 } 441 } 442 443 vaddr &= TARGET_PAGE_MASK; 444 445 if (!(env->psw.mask & PSW_MASK_DAT)) { 446 *raddr = vaddr; 447 goto nodat; 448 } 449 450 switch (asc) { 451 case PSW_ASC_PRIMARY: 452 PTE_DPRINTF("%s: asc=primary\n", __func__); 453 asce = env->cregs[1]; 454 break; 455 case PSW_ASC_HOME: 456 PTE_DPRINTF("%s: asc=home\n", __func__); 457 asce = env->cregs[13]; 458 break; 459 case PSW_ASC_SECONDARY: 460 PTE_DPRINTF("%s: asc=secondary\n", __func__); 461 asce = env->cregs[7]; 462 break; 463 case PSW_ASC_ACCREG: 464 default: 465 hw_error("guest switched to unknown asc mode\n"); 466 break; 467 } 468 469 /* perform the DAT translation */ 470 r = mmu_translate_asce(env, vaddr, asc, asce, raddr, flags, rw, exc); 471 if (r) { 472 return r; 473 } 474 475 nodat: 476 /* Convert real address -> absolute address */ 477 *raddr = mmu_real2abs(env, *raddr); 478 479 mmu_handle_skey(*raddr, rw, flags); 480 return 0; 481 } 482 483 /** 484 * translate_pages: Translate a set of consecutive logical page addresses 485 * to absolute addresses. This function is used for TCG and old KVM without 486 * the MEMOP interface. 487 */ 488 static int translate_pages(S390CPU *cpu, vaddr addr, int nr_pages, 489 target_ulong *pages, bool is_write) 490 { 491 uint64_t asc = cpu->env.psw.mask & PSW_MASK_ASC; 492 CPUS390XState *env = &cpu->env; 493 int ret, i, pflags; 494 495 for (i = 0; i < nr_pages; i++) { 496 ret = mmu_translate(env, addr, is_write, asc, &pages[i], &pflags, true); 497 if (ret) { 498 return ret; 499 } 500 if (!address_space_access_valid(&address_space_memory, pages[i], 501 TARGET_PAGE_SIZE, is_write, 502 MEMTXATTRS_UNSPECIFIED)) { 503 trigger_access_exception(env, PGM_ADDRESSING, ILEN_AUTO, 0); 504 return -EFAULT; 505 } 506 addr += TARGET_PAGE_SIZE; 507 } 508 509 return 0; 510 } 511 512 /** 513 * s390_cpu_virt_mem_rw: 514 * @laddr: the logical start address 515 * @ar: the access register number 516 * @hostbuf: buffer in host memory. NULL = do only checks w/o copying 517 * @len: length that should be transferred 518 * @is_write: true = write, false = read 519 * Returns: 0 on success, non-zero if an exception occurred 520 * 521 * Copy from/to guest memory using logical addresses. Note that we inject a 522 * program interrupt in case there is an error while accessing the memory. 523 * 524 * This function will always return (also for TCG), make sure to call 525 * s390_cpu_virt_mem_handle_exc() to properly exit the CPU loop. 526 */ 527 int s390_cpu_virt_mem_rw(S390CPU *cpu, vaddr laddr, uint8_t ar, void *hostbuf, 528 int len, bool is_write) 529 { 530 int currlen, nr_pages, i; 531 target_ulong *pages; 532 int ret; 533 534 if (kvm_enabled()) { 535 ret = kvm_s390_mem_op(cpu, laddr, ar, hostbuf, len, is_write); 536 if (ret >= 0) { 537 return ret; 538 } 539 } 540 541 nr_pages = (((laddr & ~TARGET_PAGE_MASK) + len - 1) >> TARGET_PAGE_BITS) 542 + 1; 543 pages = g_malloc(nr_pages * sizeof(*pages)); 544 545 ret = translate_pages(cpu, laddr, nr_pages, pages, is_write); 546 if (ret == 0 && hostbuf != NULL) { 547 /* Copy data by stepping through the area page by page */ 548 for (i = 0; i < nr_pages; i++) { 549 currlen = MIN(len, TARGET_PAGE_SIZE - (laddr % TARGET_PAGE_SIZE)); 550 cpu_physical_memory_rw(pages[i] | (laddr & ~TARGET_PAGE_MASK), 551 hostbuf, currlen, is_write); 552 laddr += currlen; 553 hostbuf += currlen; 554 len -= currlen; 555 } 556 } 557 558 g_free(pages); 559 return ret; 560 } 561 562 void s390_cpu_virt_mem_handle_exc(S390CPU *cpu, uintptr_t ra) 563 { 564 /* KVM will handle the interrupt automatically, TCG has to exit the TB */ 565 #ifdef CONFIG_TCG 566 if (tcg_enabled()) { 567 cpu_loop_exit_restore(CPU(cpu), ra); 568 } 569 #endif 570 } 571 572 /** 573 * Translate a real address into a physical (absolute) address. 574 * @param raddr the real address 575 * @param rw 0 = read, 1 = write, 2 = code fetch 576 * @param addr the translated address is stored to this pointer 577 * @param flags the PAGE_READ/WRITE/EXEC flags are stored to this pointer 578 * @return 0 if the translation was successful, < 0 if a fault occurred 579 */ 580 int mmu_translate_real(CPUS390XState *env, target_ulong raddr, int rw, 581 target_ulong *addr, int *flags) 582 { 583 const bool lowprot_enabled = env->cregs[0] & CR0_LOWPROT; 584 585 *flags = PAGE_READ | PAGE_WRITE | PAGE_EXEC; 586 if (is_low_address(raddr & TARGET_PAGE_MASK) && lowprot_enabled) { 587 /* see comment in mmu_translate() how this works */ 588 *flags |= PAGE_WRITE_INV; 589 if (is_low_address(raddr) && rw == MMU_DATA_STORE) { 590 trigger_access_exception(env, PGM_PROTECTION, ILEN_AUTO, 0); 591 return -EACCES; 592 } 593 } 594 595 *addr = mmu_real2abs(env, raddr & TARGET_PAGE_MASK); 596 597 mmu_handle_skey(*addr, rw, flags); 598 return 0; 599 } 600