1 /* 2 * guest access functions 3 * 4 * Copyright IBM Corp. 2014 5 * 6 */ 7 8 #include <linux/vmalloc.h> 9 #include <linux/mm_types.h> 10 #include <linux/err.h> 11 12 #include <asm/pgtable.h> 13 #include <asm/gmap.h> 14 #include "kvm-s390.h" 15 #include "gaccess.h" 16 #include <asm/switch_to.h> 17 18 union asce { 19 unsigned long val; 20 struct { 21 unsigned long origin : 52; /* Region- or Segment-Table Origin */ 22 unsigned long : 2; 23 unsigned long g : 1; /* Subspace Group Control */ 24 unsigned long p : 1; /* Private Space Control */ 25 unsigned long s : 1; /* Storage-Alteration-Event Control */ 26 unsigned long x : 1; /* Space-Switch-Event Control */ 27 unsigned long r : 1; /* Real-Space Control */ 28 unsigned long : 1; 29 unsigned long dt : 2; /* Designation-Type Control */ 30 unsigned long tl : 2; /* Region- or Segment-Table Length */ 31 }; 32 }; 33 34 enum { 35 ASCE_TYPE_SEGMENT = 0, 36 ASCE_TYPE_REGION3 = 1, 37 ASCE_TYPE_REGION2 = 2, 38 ASCE_TYPE_REGION1 = 3 39 }; 40 41 union region1_table_entry { 42 unsigned long val; 43 struct { 44 unsigned long rto: 52;/* Region-Table Origin */ 45 unsigned long : 2; 46 unsigned long p : 1; /* DAT-Protection Bit */ 47 unsigned long : 1; 48 unsigned long tf : 2; /* Region-Second-Table Offset */ 49 unsigned long i : 1; /* Region-Invalid Bit */ 50 unsigned long : 1; 51 unsigned long tt : 2; /* Table-Type Bits */ 52 unsigned long tl : 2; /* Region-Second-Table Length */ 53 }; 54 }; 55 56 union region2_table_entry { 57 unsigned long val; 58 struct { 59 unsigned long rto: 52;/* Region-Table Origin */ 60 unsigned long : 2; 61 unsigned long p : 1; /* DAT-Protection Bit */ 62 unsigned long : 1; 63 unsigned long tf : 2; /* Region-Third-Table Offset */ 64 unsigned long i : 1; /* Region-Invalid Bit */ 65 unsigned long : 1; 66 unsigned long tt : 2; /* Table-Type Bits */ 67 unsigned long tl : 2; /* Region-Third-Table Length */ 68 }; 69 }; 70 71 struct region3_table_entry_fc0 { 72 unsigned long sto: 52;/* Segment-Table Origin */ 73 unsigned long : 1; 74 unsigned long fc : 1; /* Format-Control */ 75 unsigned long p : 1; /* DAT-Protection Bit */ 76 unsigned long : 1; 77 unsigned long tf : 2; /* Segment-Table Offset */ 78 unsigned long i : 1; /* Region-Invalid Bit */ 79 unsigned long cr : 1; /* Common-Region Bit */ 80 unsigned long tt : 2; /* Table-Type Bits */ 81 unsigned long tl : 2; /* Segment-Table Length */ 82 }; 83 84 struct region3_table_entry_fc1 { 85 unsigned long rfaa : 33; /* Region-Frame Absolute Address */ 86 unsigned long : 14; 87 unsigned long av : 1; /* ACCF-Validity Control */ 88 unsigned long acc: 4; /* Access-Control Bits */ 89 unsigned long f : 1; /* Fetch-Protection Bit */ 90 unsigned long fc : 1; /* Format-Control */ 91 unsigned long p : 1; /* DAT-Protection Bit */ 92 unsigned long co : 1; /* Change-Recording Override */ 93 unsigned long : 2; 94 unsigned long i : 1; /* Region-Invalid Bit */ 95 unsigned long cr : 1; /* Common-Region Bit */ 96 unsigned long tt : 2; /* Table-Type Bits */ 97 unsigned long : 2; 98 }; 99 100 union region3_table_entry { 101 unsigned long val; 102 struct region3_table_entry_fc0 fc0; 103 struct region3_table_entry_fc1 fc1; 104 struct { 105 unsigned long : 53; 106 unsigned long fc : 1; /* Format-Control */ 107 unsigned long : 4; 108 unsigned long i : 1; /* Region-Invalid Bit */ 109 unsigned long cr : 1; /* Common-Region Bit */ 110 unsigned long tt : 2; /* Table-Type Bits */ 111 unsigned long : 2; 112 }; 113 }; 114 115 struct segment_entry_fc0 { 116 unsigned long pto: 53;/* Page-Table Origin */ 117 unsigned long fc : 1; /* Format-Control */ 118 unsigned long p : 1; /* DAT-Protection Bit */ 119 unsigned long : 3; 120 unsigned long i : 1; /* Segment-Invalid Bit */ 121 unsigned long cs : 1; /* Common-Segment Bit */ 122 unsigned long tt : 2; /* Table-Type Bits */ 123 unsigned long : 2; 124 }; 125 126 struct segment_entry_fc1 { 127 unsigned long sfaa : 44; /* Segment-Frame Absolute Address */ 128 unsigned long : 3; 129 unsigned long av : 1; /* ACCF-Validity Control */ 130 unsigned long acc: 4; /* Access-Control Bits */ 131 unsigned long f : 1; /* Fetch-Protection Bit */ 132 unsigned long fc : 1; /* Format-Control */ 133 unsigned long p : 1; /* DAT-Protection Bit */ 134 unsigned long co : 1; /* Change-Recording Override */ 135 unsigned long : 2; 136 unsigned long i : 1; /* Segment-Invalid Bit */ 137 unsigned long cs : 1; /* Common-Segment Bit */ 138 unsigned long tt : 2; /* Table-Type Bits */ 139 unsigned long : 2; 140 }; 141 142 union segment_table_entry { 143 unsigned long val; 144 struct segment_entry_fc0 fc0; 145 struct segment_entry_fc1 fc1; 146 struct { 147 unsigned long : 53; 148 unsigned long fc : 1; /* Format-Control */ 149 unsigned long : 4; 150 unsigned long i : 1; /* Segment-Invalid Bit */ 151 unsigned long cs : 1; /* Common-Segment Bit */ 152 unsigned long tt : 2; /* Table-Type Bits */ 153 unsigned long : 2; 154 }; 155 }; 156 157 enum { 158 TABLE_TYPE_SEGMENT = 0, 159 TABLE_TYPE_REGION3 = 1, 160 TABLE_TYPE_REGION2 = 2, 161 TABLE_TYPE_REGION1 = 3 162 }; 163 164 union page_table_entry { 165 unsigned long val; 166 struct { 167 unsigned long pfra : 52; /* Page-Frame Real Address */ 168 unsigned long z : 1; /* Zero Bit */ 169 unsigned long i : 1; /* Page-Invalid Bit */ 170 unsigned long p : 1; /* DAT-Protection Bit */ 171 unsigned long : 9; 172 }; 173 }; 174 175 /* 176 * vaddress union in order to easily decode a virtual address into its 177 * region first index, region second index etc. parts. 178 */ 179 union vaddress { 180 unsigned long addr; 181 struct { 182 unsigned long rfx : 11; 183 unsigned long rsx : 11; 184 unsigned long rtx : 11; 185 unsigned long sx : 11; 186 unsigned long px : 8; 187 unsigned long bx : 12; 188 }; 189 struct { 190 unsigned long rfx01 : 2; 191 unsigned long : 9; 192 unsigned long rsx01 : 2; 193 unsigned long : 9; 194 unsigned long rtx01 : 2; 195 unsigned long : 9; 196 unsigned long sx01 : 2; 197 unsigned long : 29; 198 }; 199 }; 200 201 /* 202 * raddress union which will contain the result (real or absolute address) 203 * after a page table walk. The rfaa, sfaa and pfra members are used to 204 * simply assign them the value of a region, segment or page table entry. 205 */ 206 union raddress { 207 unsigned long addr; 208 unsigned long rfaa : 33; /* Region-Frame Absolute Address */ 209 unsigned long sfaa : 44; /* Segment-Frame Absolute Address */ 210 unsigned long pfra : 52; /* Page-Frame Real Address */ 211 }; 212 213 union alet { 214 u32 val; 215 struct { 216 u32 reserved : 7; 217 u32 p : 1; 218 u32 alesn : 8; 219 u32 alen : 16; 220 }; 221 }; 222 223 union ald { 224 u32 val; 225 struct { 226 u32 : 1; 227 u32 alo : 24; 228 u32 all : 7; 229 }; 230 }; 231 232 struct ale { 233 unsigned long i : 1; /* ALEN-Invalid Bit */ 234 unsigned long : 5; 235 unsigned long fo : 1; /* Fetch-Only Bit */ 236 unsigned long p : 1; /* Private Bit */ 237 unsigned long alesn : 8; /* Access-List-Entry Sequence Number */ 238 unsigned long aleax : 16; /* Access-List-Entry Authorization Index */ 239 unsigned long : 32; 240 unsigned long : 1; 241 unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */ 242 unsigned long : 6; 243 unsigned long astesn : 32; /* ASTE Sequence Number */ 244 } __packed; 245 246 struct aste { 247 unsigned long i : 1; /* ASX-Invalid Bit */ 248 unsigned long ato : 29; /* Authority-Table Origin */ 249 unsigned long : 1; 250 unsigned long b : 1; /* Base-Space Bit */ 251 unsigned long ax : 16; /* Authorization Index */ 252 unsigned long atl : 12; /* Authority-Table Length */ 253 unsigned long : 2; 254 unsigned long ca : 1; /* Controlled-ASN Bit */ 255 unsigned long ra : 1; /* Reusable-ASN Bit */ 256 unsigned long asce : 64; /* Address-Space-Control Element */ 257 unsigned long ald : 32; 258 unsigned long astesn : 32; 259 /* .. more fields there */ 260 } __packed; 261 262 int ipte_lock_held(struct kvm_vcpu *vcpu) 263 { 264 if (vcpu->arch.sie_block->eca & ECA_SII) { 265 int rc; 266 267 read_lock(&vcpu->kvm->arch.sca_lock); 268 rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0; 269 read_unlock(&vcpu->kvm->arch.sca_lock); 270 return rc; 271 } 272 return vcpu->kvm->arch.ipte_lock_count != 0; 273 } 274 275 static void ipte_lock_simple(struct kvm_vcpu *vcpu) 276 { 277 union ipte_control old, new, *ic; 278 279 mutex_lock(&vcpu->kvm->arch.ipte_mutex); 280 vcpu->kvm->arch.ipte_lock_count++; 281 if (vcpu->kvm->arch.ipte_lock_count > 1) 282 goto out; 283 retry: 284 read_lock(&vcpu->kvm->arch.sca_lock); 285 ic = kvm_s390_get_ipte_control(vcpu->kvm); 286 do { 287 old = READ_ONCE(*ic); 288 if (old.k) { 289 read_unlock(&vcpu->kvm->arch.sca_lock); 290 cond_resched(); 291 goto retry; 292 } 293 new = old; 294 new.k = 1; 295 } while (cmpxchg(&ic->val, old.val, new.val) != old.val); 296 read_unlock(&vcpu->kvm->arch.sca_lock); 297 out: 298 mutex_unlock(&vcpu->kvm->arch.ipte_mutex); 299 } 300 301 static void ipte_unlock_simple(struct kvm_vcpu *vcpu) 302 { 303 union ipte_control old, new, *ic; 304 305 mutex_lock(&vcpu->kvm->arch.ipte_mutex); 306 vcpu->kvm->arch.ipte_lock_count--; 307 if (vcpu->kvm->arch.ipte_lock_count) 308 goto out; 309 read_lock(&vcpu->kvm->arch.sca_lock); 310 ic = kvm_s390_get_ipte_control(vcpu->kvm); 311 do { 312 old = READ_ONCE(*ic); 313 new = old; 314 new.k = 0; 315 } while (cmpxchg(&ic->val, old.val, new.val) != old.val); 316 read_unlock(&vcpu->kvm->arch.sca_lock); 317 wake_up(&vcpu->kvm->arch.ipte_wq); 318 out: 319 mutex_unlock(&vcpu->kvm->arch.ipte_mutex); 320 } 321 322 static void ipte_lock_siif(struct kvm_vcpu *vcpu) 323 { 324 union ipte_control old, new, *ic; 325 326 retry: 327 read_lock(&vcpu->kvm->arch.sca_lock); 328 ic = kvm_s390_get_ipte_control(vcpu->kvm); 329 do { 330 old = READ_ONCE(*ic); 331 if (old.kg) { 332 read_unlock(&vcpu->kvm->arch.sca_lock); 333 cond_resched(); 334 goto retry; 335 } 336 new = old; 337 new.k = 1; 338 new.kh++; 339 } while (cmpxchg(&ic->val, old.val, new.val) != old.val); 340 read_unlock(&vcpu->kvm->arch.sca_lock); 341 } 342 343 static void ipte_unlock_siif(struct kvm_vcpu *vcpu) 344 { 345 union ipte_control old, new, *ic; 346 347 read_lock(&vcpu->kvm->arch.sca_lock); 348 ic = kvm_s390_get_ipte_control(vcpu->kvm); 349 do { 350 old = READ_ONCE(*ic); 351 new = old; 352 new.kh--; 353 if (!new.kh) 354 new.k = 0; 355 } while (cmpxchg(&ic->val, old.val, new.val) != old.val); 356 read_unlock(&vcpu->kvm->arch.sca_lock); 357 if (!new.kh) 358 wake_up(&vcpu->kvm->arch.ipte_wq); 359 } 360 361 void ipte_lock(struct kvm_vcpu *vcpu) 362 { 363 if (vcpu->arch.sie_block->eca & ECA_SII) 364 ipte_lock_siif(vcpu); 365 else 366 ipte_lock_simple(vcpu); 367 } 368 369 void ipte_unlock(struct kvm_vcpu *vcpu) 370 { 371 if (vcpu->arch.sie_block->eca & ECA_SII) 372 ipte_unlock_siif(vcpu); 373 else 374 ipte_unlock_simple(vcpu); 375 } 376 377 static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar, 378 enum gacc_mode mode) 379 { 380 union alet alet; 381 struct ale ale; 382 struct aste aste; 383 unsigned long ald_addr, authority_table_addr; 384 union ald ald; 385 int eax, rc; 386 u8 authority_table; 387 388 if (ar >= NUM_ACRS) 389 return -EINVAL; 390 391 save_access_regs(vcpu->run->s.regs.acrs); 392 alet.val = vcpu->run->s.regs.acrs[ar]; 393 394 if (ar == 0 || alet.val == 0) { 395 asce->val = vcpu->arch.sie_block->gcr[1]; 396 return 0; 397 } else if (alet.val == 1) { 398 asce->val = vcpu->arch.sie_block->gcr[7]; 399 return 0; 400 } 401 402 if (alet.reserved) 403 return PGM_ALET_SPECIFICATION; 404 405 if (alet.p) 406 ald_addr = vcpu->arch.sie_block->gcr[5]; 407 else 408 ald_addr = vcpu->arch.sie_block->gcr[2]; 409 ald_addr &= 0x7fffffc0; 410 411 rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald)); 412 if (rc) 413 return rc; 414 415 if (alet.alen / 8 > ald.all) 416 return PGM_ALEN_TRANSLATION; 417 418 if (0x7fffffff - ald.alo * 128 < alet.alen * 16) 419 return PGM_ADDRESSING; 420 421 rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale, 422 sizeof(struct ale)); 423 if (rc) 424 return rc; 425 426 if (ale.i == 1) 427 return PGM_ALEN_TRANSLATION; 428 if (ale.alesn != alet.alesn) 429 return PGM_ALE_SEQUENCE; 430 431 rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste)); 432 if (rc) 433 return rc; 434 435 if (aste.i) 436 return PGM_ASTE_VALIDITY; 437 if (aste.astesn != ale.astesn) 438 return PGM_ASTE_SEQUENCE; 439 440 if (ale.p == 1) { 441 eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff; 442 if (ale.aleax != eax) { 443 if (eax / 16 > aste.atl) 444 return PGM_EXTENDED_AUTHORITY; 445 446 authority_table_addr = aste.ato * 4 + eax / 4; 447 448 rc = read_guest_real(vcpu, authority_table_addr, 449 &authority_table, 450 sizeof(u8)); 451 if (rc) 452 return rc; 453 454 if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0) 455 return PGM_EXTENDED_AUTHORITY; 456 } 457 } 458 459 if (ale.fo == 1 && mode == GACC_STORE) 460 return PGM_PROTECTION; 461 462 asce->val = aste.asce; 463 return 0; 464 } 465 466 struct trans_exc_code_bits { 467 unsigned long addr : 52; /* Translation-exception Address */ 468 unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */ 469 unsigned long : 2; 470 unsigned long b56 : 1; 471 unsigned long : 3; 472 unsigned long b60 : 1; 473 unsigned long b61 : 1; 474 unsigned long as : 2; /* ASCE Identifier */ 475 }; 476 477 enum { 478 FSI_UNKNOWN = 0, /* Unknown wether fetch or store */ 479 FSI_STORE = 1, /* Exception was due to store operation */ 480 FSI_FETCH = 2 /* Exception was due to fetch operation */ 481 }; 482 483 enum prot_type { 484 PROT_TYPE_LA = 0, 485 PROT_TYPE_KEYC = 1, 486 PROT_TYPE_ALC = 2, 487 PROT_TYPE_DAT = 3, 488 }; 489 490 static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, 491 u8 ar, enum gacc_mode mode, enum prot_type prot) 492 { 493 struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; 494 struct trans_exc_code_bits *tec; 495 496 memset(pgm, 0, sizeof(*pgm)); 497 pgm->code = code; 498 tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; 499 500 switch (code) { 501 case PGM_PROTECTION: 502 switch (prot) { 503 case PROT_TYPE_LA: 504 tec->b56 = 1; 505 break; 506 case PROT_TYPE_KEYC: 507 tec->b60 = 1; 508 break; 509 case PROT_TYPE_ALC: 510 tec->b60 = 1; 511 /* FALL THROUGH */ 512 case PROT_TYPE_DAT: 513 tec->b61 = 1; 514 break; 515 } 516 /* FALL THROUGH */ 517 case PGM_ASCE_TYPE: 518 case PGM_PAGE_TRANSLATION: 519 case PGM_REGION_FIRST_TRANS: 520 case PGM_REGION_SECOND_TRANS: 521 case PGM_REGION_THIRD_TRANS: 522 case PGM_SEGMENT_TRANSLATION: 523 /* 524 * op_access_id only applies to MOVE_PAGE -> set bit 61 525 * exc_access_id has to be set to 0 for some instructions. Both 526 * cases have to be handled by the caller. 527 */ 528 tec->addr = gva >> PAGE_SHIFT; 529 tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; 530 tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as; 531 /* FALL THROUGH */ 532 case PGM_ALEN_TRANSLATION: 533 case PGM_ALE_SEQUENCE: 534 case PGM_ASTE_VALIDITY: 535 case PGM_ASTE_SEQUENCE: 536 case PGM_EXTENDED_AUTHORITY: 537 /* 538 * We can always store exc_access_id, as it is 539 * undefined for non-ar cases. It is undefined for 540 * most DAT protection exceptions. 541 */ 542 pgm->exc_access_id = ar; 543 break; 544 } 545 return code; 546 } 547 548 static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, 549 unsigned long ga, u8 ar, enum gacc_mode mode) 550 { 551 int rc; 552 struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); 553 554 if (!psw.t) { 555 asce->val = 0; 556 asce->r = 1; 557 return 0; 558 } 559 560 if (mode == GACC_IFETCH) 561 psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY; 562 563 switch (psw.as) { 564 case PSW_AS_PRIMARY: 565 asce->val = vcpu->arch.sie_block->gcr[1]; 566 return 0; 567 case PSW_AS_SECONDARY: 568 asce->val = vcpu->arch.sie_block->gcr[7]; 569 return 0; 570 case PSW_AS_HOME: 571 asce->val = vcpu->arch.sie_block->gcr[13]; 572 return 0; 573 case PSW_AS_ACCREG: 574 rc = ar_translation(vcpu, asce, ar, mode); 575 if (rc > 0) 576 return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC); 577 return rc; 578 } 579 return 0; 580 } 581 582 static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) 583 { 584 return kvm_read_guest(kvm, gpa, val, sizeof(*val)); 585 } 586 587 /** 588 * guest_translate - translate a guest virtual into a guest absolute address 589 * @vcpu: virtual cpu 590 * @gva: guest virtual address 591 * @gpa: points to where guest physical (absolute) address should be stored 592 * @asce: effective asce 593 * @mode: indicates the access mode to be used 594 * 595 * Translate a guest virtual address into a guest absolute address by means 596 * of dynamic address translation as specified by the architecture. 597 * If the resulting absolute address is not available in the configuration 598 * an addressing exception is indicated and @gpa will not be changed. 599 * 600 * Returns: - zero on success; @gpa contains the resulting absolute address 601 * - a negative value if guest access failed due to e.g. broken 602 * guest mapping 603 * - a positve value if an access exception happened. In this case 604 * the returned value is the program interruption code as defined 605 * by the architecture 606 */ 607 static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, 608 unsigned long *gpa, const union asce asce, 609 enum gacc_mode mode) 610 { 611 union vaddress vaddr = {.addr = gva}; 612 union raddress raddr = {.addr = gva}; 613 union page_table_entry pte; 614 int dat_protection = 0; 615 union ctlreg0 ctlreg0; 616 unsigned long ptr; 617 int edat1, edat2; 618 619 ctlreg0.val = vcpu->arch.sie_block->gcr[0]; 620 edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8); 621 edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78); 622 if (asce.r) 623 goto real_address; 624 ptr = asce.origin * 4096; 625 switch (asce.dt) { 626 case ASCE_TYPE_REGION1: 627 if (vaddr.rfx01 > asce.tl) 628 return PGM_REGION_FIRST_TRANS; 629 ptr += vaddr.rfx * 8; 630 break; 631 case ASCE_TYPE_REGION2: 632 if (vaddr.rfx) 633 return PGM_ASCE_TYPE; 634 if (vaddr.rsx01 > asce.tl) 635 return PGM_REGION_SECOND_TRANS; 636 ptr += vaddr.rsx * 8; 637 break; 638 case ASCE_TYPE_REGION3: 639 if (vaddr.rfx || vaddr.rsx) 640 return PGM_ASCE_TYPE; 641 if (vaddr.rtx01 > asce.tl) 642 return PGM_REGION_THIRD_TRANS; 643 ptr += vaddr.rtx * 8; 644 break; 645 case ASCE_TYPE_SEGMENT: 646 if (vaddr.rfx || vaddr.rsx || vaddr.rtx) 647 return PGM_ASCE_TYPE; 648 if (vaddr.sx01 > asce.tl) 649 return PGM_SEGMENT_TRANSLATION; 650 ptr += vaddr.sx * 8; 651 break; 652 } 653 switch (asce.dt) { 654 case ASCE_TYPE_REGION1: { 655 union region1_table_entry rfte; 656 657 if (kvm_is_error_gpa(vcpu->kvm, ptr)) 658 return PGM_ADDRESSING; 659 if (deref_table(vcpu->kvm, ptr, &rfte.val)) 660 return -EFAULT; 661 if (rfte.i) 662 return PGM_REGION_FIRST_TRANS; 663 if (rfte.tt != TABLE_TYPE_REGION1) 664 return PGM_TRANSLATION_SPEC; 665 if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl) 666 return PGM_REGION_SECOND_TRANS; 667 if (edat1) 668 dat_protection |= rfte.p; 669 ptr = rfte.rto * 4096 + vaddr.rsx * 8; 670 } 671 /* fallthrough */ 672 case ASCE_TYPE_REGION2: { 673 union region2_table_entry rste; 674 675 if (kvm_is_error_gpa(vcpu->kvm, ptr)) 676 return PGM_ADDRESSING; 677 if (deref_table(vcpu->kvm, ptr, &rste.val)) 678 return -EFAULT; 679 if (rste.i) 680 return PGM_REGION_SECOND_TRANS; 681 if (rste.tt != TABLE_TYPE_REGION2) 682 return PGM_TRANSLATION_SPEC; 683 if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl) 684 return PGM_REGION_THIRD_TRANS; 685 if (edat1) 686 dat_protection |= rste.p; 687 ptr = rste.rto * 4096 + vaddr.rtx * 8; 688 } 689 /* fallthrough */ 690 case ASCE_TYPE_REGION3: { 691 union region3_table_entry rtte; 692 693 if (kvm_is_error_gpa(vcpu->kvm, ptr)) 694 return PGM_ADDRESSING; 695 if (deref_table(vcpu->kvm, ptr, &rtte.val)) 696 return -EFAULT; 697 if (rtte.i) 698 return PGM_REGION_THIRD_TRANS; 699 if (rtte.tt != TABLE_TYPE_REGION3) 700 return PGM_TRANSLATION_SPEC; 701 if (rtte.cr && asce.p && edat2) 702 return PGM_TRANSLATION_SPEC; 703 if (rtte.fc && edat2) { 704 dat_protection |= rtte.fc1.p; 705 raddr.rfaa = rtte.fc1.rfaa; 706 goto absolute_address; 707 } 708 if (vaddr.sx01 < rtte.fc0.tf) 709 return PGM_SEGMENT_TRANSLATION; 710 if (vaddr.sx01 > rtte.fc0.tl) 711 return PGM_SEGMENT_TRANSLATION; 712 if (edat1) 713 dat_protection |= rtte.fc0.p; 714 ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8; 715 } 716 /* fallthrough */ 717 case ASCE_TYPE_SEGMENT: { 718 union segment_table_entry ste; 719 720 if (kvm_is_error_gpa(vcpu->kvm, ptr)) 721 return PGM_ADDRESSING; 722 if (deref_table(vcpu->kvm, ptr, &ste.val)) 723 return -EFAULT; 724 if (ste.i) 725 return PGM_SEGMENT_TRANSLATION; 726 if (ste.tt != TABLE_TYPE_SEGMENT) 727 return PGM_TRANSLATION_SPEC; 728 if (ste.cs && asce.p) 729 return PGM_TRANSLATION_SPEC; 730 if (ste.fc && edat1) { 731 dat_protection |= ste.fc1.p; 732 raddr.sfaa = ste.fc1.sfaa; 733 goto absolute_address; 734 } 735 dat_protection |= ste.fc0.p; 736 ptr = ste.fc0.pto * 2048 + vaddr.px * 8; 737 } 738 } 739 if (kvm_is_error_gpa(vcpu->kvm, ptr)) 740 return PGM_ADDRESSING; 741 if (deref_table(vcpu->kvm, ptr, &pte.val)) 742 return -EFAULT; 743 if (pte.i) 744 return PGM_PAGE_TRANSLATION; 745 if (pte.z) 746 return PGM_TRANSLATION_SPEC; 747 dat_protection |= pte.p; 748 raddr.pfra = pte.pfra; 749 real_address: 750 raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); 751 absolute_address: 752 if (mode == GACC_STORE && dat_protection) 753 return PGM_PROTECTION; 754 if (kvm_is_error_gpa(vcpu->kvm, raddr.addr)) 755 return PGM_ADDRESSING; 756 *gpa = raddr.addr; 757 return 0; 758 } 759 760 static inline int is_low_address(unsigned long ga) 761 { 762 /* Check for address ranges 0..511 and 4096..4607 */ 763 return (ga & ~0x11fful) == 0; 764 } 765 766 static int low_address_protection_enabled(struct kvm_vcpu *vcpu, 767 const union asce asce) 768 { 769 union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; 770 psw_t *psw = &vcpu->arch.sie_block->gpsw; 771 772 if (!ctlreg0.lap) 773 return 0; 774 if (psw_bits(*psw).t && asce.p) 775 return 0; 776 return 1; 777 } 778 779 static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, 780 unsigned long *pages, unsigned long nr_pages, 781 const union asce asce, enum gacc_mode mode) 782 { 783 psw_t *psw = &vcpu->arch.sie_block->gpsw; 784 int lap_enabled, rc = 0; 785 786 lap_enabled = low_address_protection_enabled(vcpu, asce); 787 while (nr_pages) { 788 ga = kvm_s390_logical_to_effective(vcpu, ga); 789 if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) 790 return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode, 791 PROT_TYPE_LA); 792 ga &= PAGE_MASK; 793 if (psw_bits(*psw).t) { 794 rc = guest_translate(vcpu, ga, pages, asce, mode); 795 if (rc < 0) 796 return rc; 797 } else { 798 *pages = kvm_s390_real_to_abs(vcpu, ga); 799 if (kvm_is_error_gpa(vcpu->kvm, *pages)) 800 rc = PGM_ADDRESSING; 801 } 802 if (rc) 803 return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT); 804 ga += PAGE_SIZE; 805 pages++; 806 nr_pages--; 807 } 808 return 0; 809 } 810 811 int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, 812 unsigned long len, enum gacc_mode mode) 813 { 814 psw_t *psw = &vcpu->arch.sie_block->gpsw; 815 unsigned long _len, nr_pages, gpa, idx; 816 unsigned long pages_array[2]; 817 unsigned long *pages; 818 int need_ipte_lock; 819 union asce asce; 820 int rc; 821 822 if (!len) 823 return 0; 824 ga = kvm_s390_logical_to_effective(vcpu, ga); 825 rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode); 826 if (rc) 827 return rc; 828 nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; 829 pages = pages_array; 830 if (nr_pages > ARRAY_SIZE(pages_array)) 831 pages = vmalloc(nr_pages * sizeof(unsigned long)); 832 if (!pages) 833 return -ENOMEM; 834 need_ipte_lock = psw_bits(*psw).t && !asce.r; 835 if (need_ipte_lock) 836 ipte_lock(vcpu); 837 rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode); 838 for (idx = 0; idx < nr_pages && !rc; idx++) { 839 gpa = *(pages + idx) + (ga & ~PAGE_MASK); 840 _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); 841 if (mode == GACC_STORE) 842 rc = kvm_write_guest(vcpu->kvm, gpa, data, _len); 843 else 844 rc = kvm_read_guest(vcpu->kvm, gpa, data, _len); 845 len -= _len; 846 ga += _len; 847 data += _len; 848 } 849 if (need_ipte_lock) 850 ipte_unlock(vcpu); 851 if (nr_pages > ARRAY_SIZE(pages_array)) 852 vfree(pages); 853 return rc; 854 } 855 856 int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, 857 void *data, unsigned long len, enum gacc_mode mode) 858 { 859 unsigned long _len, gpa; 860 int rc = 0; 861 862 while (len && !rc) { 863 gpa = kvm_s390_real_to_abs(vcpu, gra); 864 _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); 865 if (mode) 866 rc = write_guest_abs(vcpu, gpa, data, _len); 867 else 868 rc = read_guest_abs(vcpu, gpa, data, _len); 869 len -= _len; 870 gra += _len; 871 data += _len; 872 } 873 return rc; 874 } 875 876 /** 877 * guest_translate_address - translate guest logical into guest absolute address 878 * 879 * Parameter semantics are the same as the ones from guest_translate. 880 * The memory contents at the guest address are not changed. 881 * 882 * Note: The IPTE lock is not taken during this function, so the caller 883 * has to take care of this. 884 */ 885 int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, 886 unsigned long *gpa, enum gacc_mode mode) 887 { 888 psw_t *psw = &vcpu->arch.sie_block->gpsw; 889 union asce asce; 890 int rc; 891 892 gva = kvm_s390_logical_to_effective(vcpu, gva); 893 rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); 894 if (rc) 895 return rc; 896 if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { 897 if (mode == GACC_STORE) 898 return trans_exc(vcpu, PGM_PROTECTION, gva, 0, 899 mode, PROT_TYPE_LA); 900 } 901 902 if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ 903 rc = guest_translate(vcpu, gva, gpa, asce, mode); 904 if (rc > 0) 905 return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT); 906 } else { 907 *gpa = kvm_s390_real_to_abs(vcpu, gva); 908 if (kvm_is_error_gpa(vcpu->kvm, *gpa)) 909 return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0); 910 } 911 912 return rc; 913 } 914 915 /** 916 * check_gva_range - test a range of guest virtual addresses for accessibility 917 */ 918 int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, 919 unsigned long length, enum gacc_mode mode) 920 { 921 unsigned long gpa; 922 unsigned long currlen; 923 int rc = 0; 924 925 ipte_lock(vcpu); 926 while (length > 0 && !rc) { 927 currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); 928 rc = guest_translate_address(vcpu, gva, ar, &gpa, mode); 929 gva += currlen; 930 length -= currlen; 931 } 932 ipte_unlock(vcpu); 933 934 return rc; 935 } 936 937 /** 938 * kvm_s390_check_low_addr_prot_real - check for low-address protection 939 * @gra: Guest real address 940 * 941 * Checks whether an address is subject to low-address protection and set 942 * up vcpu->arch.pgm accordingly if necessary. 943 * 944 * Return: 0 if no protection exception, or PGM_PROTECTION if protected. 945 */ 946 int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) 947 { 948 union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; 949 950 if (!ctlreg0.lap || !is_low_address(gra)) 951 return 0; 952 return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA); 953 } 954 955 /** 956 * kvm_s390_shadow_tables - walk the guest page table and create shadow tables 957 * @sg: pointer to the shadow guest address space structure 958 * @saddr: faulting address in the shadow gmap 959 * @pgt: pointer to the page table address result 960 * @fake: pgt references contiguous guest memory block, not a pgtable 961 */ 962 static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, 963 unsigned long *pgt, int *dat_protection, 964 int *fake) 965 { 966 struct gmap *parent; 967 union asce asce; 968 union vaddress vaddr; 969 unsigned long ptr; 970 int rc; 971 972 *fake = 0; 973 *dat_protection = 0; 974 parent = sg->parent; 975 vaddr.addr = saddr; 976 asce.val = sg->orig_asce; 977 ptr = asce.origin * 4096; 978 if (asce.r) { 979 *fake = 1; 980 asce.dt = ASCE_TYPE_REGION1; 981 } 982 switch (asce.dt) { 983 case ASCE_TYPE_REGION1: 984 if (vaddr.rfx01 > asce.tl && !asce.r) 985 return PGM_REGION_FIRST_TRANS; 986 break; 987 case ASCE_TYPE_REGION2: 988 if (vaddr.rfx) 989 return PGM_ASCE_TYPE; 990 if (vaddr.rsx01 > asce.tl) 991 return PGM_REGION_SECOND_TRANS; 992 break; 993 case ASCE_TYPE_REGION3: 994 if (vaddr.rfx || vaddr.rsx) 995 return PGM_ASCE_TYPE; 996 if (vaddr.rtx01 > asce.tl) 997 return PGM_REGION_THIRD_TRANS; 998 break; 999 case ASCE_TYPE_SEGMENT: 1000 if (vaddr.rfx || vaddr.rsx || vaddr.rtx) 1001 return PGM_ASCE_TYPE; 1002 if (vaddr.sx01 > asce.tl) 1003 return PGM_SEGMENT_TRANSLATION; 1004 break; 1005 } 1006 1007 switch (asce.dt) { 1008 case ASCE_TYPE_REGION1: { 1009 union region1_table_entry rfte; 1010 1011 if (*fake) { 1012 /* offset in 16EB guest memory block */ 1013 ptr = ptr + ((unsigned long) vaddr.rsx << 53UL); 1014 rfte.val = ptr; 1015 goto shadow_r2t; 1016 } 1017 rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val); 1018 if (rc) 1019 return rc; 1020 if (rfte.i) 1021 return PGM_REGION_FIRST_TRANS; 1022 if (rfte.tt != TABLE_TYPE_REGION1) 1023 return PGM_TRANSLATION_SPEC; 1024 if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl) 1025 return PGM_REGION_SECOND_TRANS; 1026 if (sg->edat_level >= 1) 1027 *dat_protection |= rfte.p; 1028 ptr = rfte.rto << 12UL; 1029 shadow_r2t: 1030 rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); 1031 if (rc) 1032 return rc; 1033 /* fallthrough */ 1034 } 1035 case ASCE_TYPE_REGION2: { 1036 union region2_table_entry rste; 1037 1038 if (*fake) { 1039 /* offset in 8PB guest memory block */ 1040 ptr = ptr + ((unsigned long) vaddr.rtx << 42UL); 1041 rste.val = ptr; 1042 goto shadow_r3t; 1043 } 1044 rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val); 1045 if (rc) 1046 return rc; 1047 if (rste.i) 1048 return PGM_REGION_SECOND_TRANS; 1049 if (rste.tt != TABLE_TYPE_REGION2) 1050 return PGM_TRANSLATION_SPEC; 1051 if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl) 1052 return PGM_REGION_THIRD_TRANS; 1053 if (sg->edat_level >= 1) 1054 *dat_protection |= rste.p; 1055 ptr = rste.rto << 12UL; 1056 shadow_r3t: 1057 rste.p |= *dat_protection; 1058 rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); 1059 if (rc) 1060 return rc; 1061 /* fallthrough */ 1062 } 1063 case ASCE_TYPE_REGION3: { 1064 union region3_table_entry rtte; 1065 1066 if (*fake) { 1067 /* offset in 4TB guest memory block */ 1068 ptr = ptr + ((unsigned long) vaddr.sx << 31UL); 1069 rtte.val = ptr; 1070 goto shadow_sgt; 1071 } 1072 rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val); 1073 if (rc) 1074 return rc; 1075 if (rtte.i) 1076 return PGM_REGION_THIRD_TRANS; 1077 if (rtte.tt != TABLE_TYPE_REGION3) 1078 return PGM_TRANSLATION_SPEC; 1079 if (rtte.cr && asce.p && sg->edat_level >= 2) 1080 return PGM_TRANSLATION_SPEC; 1081 if (rtte.fc && sg->edat_level >= 2) { 1082 *dat_protection |= rtte.fc0.p; 1083 *fake = 1; 1084 ptr = rtte.fc1.rfaa << 31UL; 1085 rtte.val = ptr; 1086 goto shadow_sgt; 1087 } 1088 if (vaddr.sx01 < rtte.fc0.tf || vaddr.sx01 > rtte.fc0.tl) 1089 return PGM_SEGMENT_TRANSLATION; 1090 if (sg->edat_level >= 1) 1091 *dat_protection |= rtte.fc0.p; 1092 ptr = rtte.fc0.sto << 12UL; 1093 shadow_sgt: 1094 rtte.fc0.p |= *dat_protection; 1095 rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); 1096 if (rc) 1097 return rc; 1098 /* fallthrough */ 1099 } 1100 case ASCE_TYPE_SEGMENT: { 1101 union segment_table_entry ste; 1102 1103 if (*fake) { 1104 /* offset in 2G guest memory block */ 1105 ptr = ptr + ((unsigned long) vaddr.sx << 20UL); 1106 ste.val = ptr; 1107 goto shadow_pgt; 1108 } 1109 rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val); 1110 if (rc) 1111 return rc; 1112 if (ste.i) 1113 return PGM_SEGMENT_TRANSLATION; 1114 if (ste.tt != TABLE_TYPE_SEGMENT) 1115 return PGM_TRANSLATION_SPEC; 1116 if (ste.cs && asce.p) 1117 return PGM_TRANSLATION_SPEC; 1118 *dat_protection |= ste.fc0.p; 1119 if (ste.fc && sg->edat_level >= 1) { 1120 *fake = 1; 1121 ptr = ste.fc1.sfaa << 20UL; 1122 ste.val = ptr; 1123 goto shadow_pgt; 1124 } 1125 ptr = ste.fc0.pto << 11UL; 1126 shadow_pgt: 1127 ste.fc0.p |= *dat_protection; 1128 rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake); 1129 if (rc) 1130 return rc; 1131 } 1132 } 1133 /* Return the parent address of the page table */ 1134 *pgt = ptr; 1135 return 0; 1136 } 1137 1138 /** 1139 * kvm_s390_shadow_fault - handle fault on a shadow page table 1140 * @vcpu: virtual cpu 1141 * @sg: pointer to the shadow guest address space structure 1142 * @saddr: faulting address in the shadow gmap 1143 * 1144 * Returns: - 0 if the shadow fault was successfully resolved 1145 * - > 0 (pgm exception code) on exceptions while faulting 1146 * - -EAGAIN if the caller can retry immediately 1147 * - -EFAULT when accessing invalid guest addresses 1148 * - -ENOMEM if out of memory 1149 */ 1150 int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, 1151 unsigned long saddr) 1152 { 1153 union vaddress vaddr; 1154 union page_table_entry pte; 1155 unsigned long pgt; 1156 int dat_protection, fake; 1157 int rc; 1158 1159 down_read(&sg->mm->mmap_sem); 1160 /* 1161 * We don't want any guest-2 tables to change - so the parent 1162 * tables/pointers we read stay valid - unshadowing is however 1163 * always possible - only guest_table_lock protects us. 1164 */ 1165 ipte_lock(vcpu); 1166 1167 rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); 1168 if (rc) 1169 rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection, 1170 &fake); 1171 1172 vaddr.addr = saddr; 1173 if (fake) { 1174 /* offset in 1MB guest memory block */ 1175 pte.val = pgt + ((unsigned long) vaddr.px << 12UL); 1176 goto shadow_page; 1177 } 1178 if (!rc) 1179 rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val); 1180 if (!rc && pte.i) 1181 rc = PGM_PAGE_TRANSLATION; 1182 if (!rc && pte.z) 1183 rc = PGM_TRANSLATION_SPEC; 1184 shadow_page: 1185 pte.p |= dat_protection; 1186 if (!rc) 1187 rc = gmap_shadow_page(sg, saddr, __pte(pte.val)); 1188 ipte_unlock(vcpu); 1189 up_read(&sg->mm->mmap_sem); 1190 return rc; 1191 } 1192