1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * vMTRR implementation 4 * 5 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 7 * Copyright(C) 2015 Intel Corporation. 8 * 9 * Authors: 10 * Yaniv Kamay <yaniv@qumranet.com> 11 * Avi Kivity <avi@qumranet.com> 12 * Marcelo Tosatti <mtosatti@redhat.com> 13 * Paolo Bonzini <pbonzini@redhat.com> 14 * Xiao Guangrong <guangrong.xiao@linux.intel.com> 15 */ 16 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 17 18 #include <linux/kvm_host.h> 19 #include <asm/mtrr.h> 20 21 #include "cpuid.h" 22 #include "mmu.h" 23 24 #define IA32_MTRR_DEF_TYPE_E (1ULL << 11) 25 #define IA32_MTRR_DEF_TYPE_FE (1ULL << 10) 26 #define IA32_MTRR_DEF_TYPE_TYPE_MASK (0xff) 27 28 static bool is_mtrr_base_msr(unsigned int msr) 29 { 30 /* MTRR base MSRs use even numbers, masks use odd numbers. */ 31 return !(msr & 0x1); 32 } 33 34 static bool msr_mtrr_valid(unsigned msr) 35 { 36 switch (msr) { 37 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1: 38 case MSR_MTRRfix64K_00000: 39 case MSR_MTRRfix16K_80000: 40 case MSR_MTRRfix16K_A0000: 41 case MSR_MTRRfix4K_C0000: 42 case MSR_MTRRfix4K_C8000: 43 case MSR_MTRRfix4K_D0000: 44 case MSR_MTRRfix4K_D8000: 45 case MSR_MTRRfix4K_E0000: 46 case MSR_MTRRfix4K_E8000: 47 case MSR_MTRRfix4K_F0000: 48 case MSR_MTRRfix4K_F8000: 49 case MSR_MTRRdefType: 50 case MSR_IA32_CR_PAT: 51 return true; 52 } 53 return false; 54 } 55 56 static bool valid_mtrr_type(unsigned t) 57 { 58 return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ 59 } 60 61 bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) 62 { 63 int i; 64 u64 mask; 65 66 if (!msr_mtrr_valid(msr)) 67 return false; 68 69 if (msr == MSR_IA32_CR_PAT) { 70 return kvm_pat_valid(data); 71 } else if (msr == MSR_MTRRdefType) { 72 if (data & ~0xcff) 73 return false; 74 return valid_mtrr_type(data & 0xff); 75 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { 76 for (i = 0; i < 8 ; i++) 77 if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) 78 return false; 79 return true; 80 } 81 82 /* variable MTRRs */ 83 WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR)); 84 85 mask = kvm_vcpu_reserved_gpa_bits_raw(vcpu); 86 if ((msr & 1) == 0) { 87 /* MTRR base */ 88 if (!valid_mtrr_type(data & 0xff)) 89 return false; 90 mask |= 0xf00; 91 } else 92 /* MTRR mask */ 93 mask |= 0x7ff; 94 95 return (data & mask) == 0; 96 } 97 EXPORT_SYMBOL_GPL(kvm_mtrr_valid); 98 99 static bool mtrr_is_enabled(struct kvm_mtrr *mtrr_state) 100 { 101 return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_E); 102 } 103 104 static bool fixed_mtrr_is_enabled(struct kvm_mtrr *mtrr_state) 105 { 106 return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_FE); 107 } 108 109 static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state) 110 { 111 return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK; 112 } 113 114 static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu) 115 { 116 /* 117 * Intel SDM 11.11.2.2: all MTRRs are disabled when 118 * IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC 119 * memory type is applied to all of physical memory. 120 * 121 * However, virtual machines can be run with CPUID such that 122 * there are no MTRRs. In that case, the firmware will never 123 * enable MTRRs and it is obviously undesirable to run the 124 * guest entirely with UC memory and we use WB. 125 */ 126 if (guest_cpuid_has(vcpu, X86_FEATURE_MTRR)) 127 return MTRR_TYPE_UNCACHABLE; 128 else 129 return MTRR_TYPE_WRBACK; 130 } 131 132 /* 133 * Three terms are used in the following code: 134 * - segment, it indicates the address segments covered by fixed MTRRs. 135 * - unit, it corresponds to the MSR entry in the segment. 136 * - range, a range is covered in one memory cache type. 137 */ 138 struct fixed_mtrr_segment { 139 u64 start; 140 u64 end; 141 142 int range_shift; 143 144 /* the start position in kvm_mtrr.fixed_ranges[]. */ 145 int range_start; 146 }; 147 148 static struct fixed_mtrr_segment fixed_seg_table[] = { 149 /* MSR_MTRRfix64K_00000, 1 unit. 64K fixed mtrr. */ 150 { 151 .start = 0x0, 152 .end = 0x80000, 153 .range_shift = 16, /* 64K */ 154 .range_start = 0, 155 }, 156 157 /* 158 * MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000, 2 units, 159 * 16K fixed mtrr. 160 */ 161 { 162 .start = 0x80000, 163 .end = 0xc0000, 164 .range_shift = 14, /* 16K */ 165 .range_start = 8, 166 }, 167 168 /* 169 * MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000, 8 units, 170 * 4K fixed mtrr. 171 */ 172 { 173 .start = 0xc0000, 174 .end = 0x100000, 175 .range_shift = 12, /* 12K */ 176 .range_start = 24, 177 } 178 }; 179 180 /* 181 * The size of unit is covered in one MSR, one MSR entry contains 182 * 8 ranges so that unit size is always 8 * 2^range_shift. 183 */ 184 static u64 fixed_mtrr_seg_unit_size(int seg) 185 { 186 return 8 << fixed_seg_table[seg].range_shift; 187 } 188 189 static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit) 190 { 191 switch (msr) { 192 case MSR_MTRRfix64K_00000: 193 *seg = 0; 194 *unit = 0; 195 break; 196 case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: 197 *seg = 1; 198 *unit = array_index_nospec( 199 msr - MSR_MTRRfix16K_80000, 200 MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1); 201 break; 202 case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: 203 *seg = 2; 204 *unit = array_index_nospec( 205 msr - MSR_MTRRfix4K_C0000, 206 MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1); 207 break; 208 default: 209 return false; 210 } 211 212 return true; 213 } 214 215 static void fixed_mtrr_seg_unit_range(int seg, int unit, u64 *start, u64 *end) 216 { 217 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 218 u64 unit_size = fixed_mtrr_seg_unit_size(seg); 219 220 *start = mtrr_seg->start + unit * unit_size; 221 *end = *start + unit_size; 222 WARN_ON(*end > mtrr_seg->end); 223 } 224 225 static int fixed_mtrr_seg_unit_range_index(int seg, int unit) 226 { 227 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 228 229 WARN_ON(mtrr_seg->start + unit * fixed_mtrr_seg_unit_size(seg) 230 > mtrr_seg->end); 231 232 /* each unit has 8 ranges. */ 233 return mtrr_seg->range_start + 8 * unit; 234 } 235 236 static int fixed_mtrr_seg_end_range_index(int seg) 237 { 238 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 239 int n; 240 241 n = (mtrr_seg->end - mtrr_seg->start) >> mtrr_seg->range_shift; 242 return mtrr_seg->range_start + n - 1; 243 } 244 245 static bool fixed_msr_to_range(u32 msr, u64 *start, u64 *end) 246 { 247 int seg, unit; 248 249 if (!fixed_msr_to_seg_unit(msr, &seg, &unit)) 250 return false; 251 252 fixed_mtrr_seg_unit_range(seg, unit, start, end); 253 return true; 254 } 255 256 static int fixed_msr_to_range_index(u32 msr) 257 { 258 int seg, unit; 259 260 if (!fixed_msr_to_seg_unit(msr, &seg, &unit)) 261 return -1; 262 263 return fixed_mtrr_seg_unit_range_index(seg, unit); 264 } 265 266 static int fixed_mtrr_addr_to_seg(u64 addr) 267 { 268 struct fixed_mtrr_segment *mtrr_seg; 269 int seg, seg_num = ARRAY_SIZE(fixed_seg_table); 270 271 for (seg = 0; seg < seg_num; seg++) { 272 mtrr_seg = &fixed_seg_table[seg]; 273 if (mtrr_seg->start <= addr && addr < mtrr_seg->end) 274 return seg; 275 } 276 277 return -1; 278 } 279 280 static int fixed_mtrr_addr_seg_to_range_index(u64 addr, int seg) 281 { 282 struct fixed_mtrr_segment *mtrr_seg; 283 int index; 284 285 mtrr_seg = &fixed_seg_table[seg]; 286 index = mtrr_seg->range_start; 287 index += (addr - mtrr_seg->start) >> mtrr_seg->range_shift; 288 return index; 289 } 290 291 static u64 fixed_mtrr_range_end_addr(int seg, int index) 292 { 293 struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg]; 294 int pos = index - mtrr_seg->range_start; 295 296 return mtrr_seg->start + ((pos + 1) << mtrr_seg->range_shift); 297 } 298 299 static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end) 300 { 301 u64 mask; 302 303 *start = range->base & PAGE_MASK; 304 305 mask = range->mask & PAGE_MASK; 306 307 /* This cannot overflow because writing to the reserved bits of 308 * variable MTRRs causes a #GP. 309 */ 310 *end = (*start | ~mask) + 1; 311 } 312 313 static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr) 314 { 315 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 316 gfn_t start, end; 317 int index; 318 319 if (msr == MSR_IA32_CR_PAT || !tdp_enabled || 320 !kvm_arch_has_noncoherent_dma(vcpu->kvm)) 321 return; 322 323 if (!mtrr_is_enabled(mtrr_state) && msr != MSR_MTRRdefType) 324 return; 325 326 /* fixed MTRRs. */ 327 if (fixed_msr_to_range(msr, &start, &end)) { 328 if (!fixed_mtrr_is_enabled(mtrr_state)) 329 return; 330 } else if (msr == MSR_MTRRdefType) { 331 start = 0x0; 332 end = ~0ULL; 333 } else { 334 /* variable range MTRRs. */ 335 index = (msr - 0x200) / 2; 336 var_mtrr_range(&mtrr_state->var_ranges[index], &start, &end); 337 } 338 339 kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end)); 340 } 341 342 static bool var_mtrr_range_is_valid(struct kvm_mtrr_range *range) 343 { 344 return (range->mask & (1 << 11)) != 0; 345 } 346 347 static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 348 { 349 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 350 struct kvm_mtrr_range *tmp, *cur; 351 int index; 352 353 index = (msr - 0x200) / 2; 354 cur = &mtrr_state->var_ranges[index]; 355 356 /* remove the entry if it's in the list. */ 357 if (var_mtrr_range_is_valid(cur)) 358 list_del(&mtrr_state->var_ranges[index].node); 359 360 /* 361 * Set all illegal GPA bits in the mask, since those bits must 362 * implicitly be 0. The bits are then cleared when reading them. 363 */ 364 if (is_mtrr_base_msr(msr)) 365 cur->base = data; 366 else 367 cur->mask = data | kvm_vcpu_reserved_gpa_bits_raw(vcpu); 368 369 /* add it to the list if it's enabled. */ 370 if (var_mtrr_range_is_valid(cur)) { 371 list_for_each_entry(tmp, &mtrr_state->head, node) 372 if (cur->base >= tmp->base) 373 break; 374 list_add_tail(&cur->node, &tmp->node); 375 } 376 } 377 378 int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 379 { 380 int index; 381 382 if (!kvm_mtrr_valid(vcpu, msr, data)) 383 return 1; 384 385 index = fixed_msr_to_range_index(msr); 386 if (index >= 0) 387 *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index] = data; 388 else if (msr == MSR_MTRRdefType) 389 vcpu->arch.mtrr_state.deftype = data; 390 else if (msr == MSR_IA32_CR_PAT) 391 vcpu->arch.pat = data; 392 else 393 set_var_mtrr_msr(vcpu, msr, data); 394 395 update_mtrr(vcpu, msr); 396 return 0; 397 } 398 399 int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 400 { 401 int index; 402 403 /* MSR_MTRRcap is a readonly MSR. */ 404 if (msr == MSR_MTRRcap) { 405 /* 406 * SMRR = 0 407 * WC = 1 408 * FIX = 1 409 * VCNT = KVM_NR_VAR_MTRR 410 */ 411 *pdata = 0x500 | KVM_NR_VAR_MTRR; 412 return 0; 413 } 414 415 if (!msr_mtrr_valid(msr)) 416 return 1; 417 418 index = fixed_msr_to_range_index(msr); 419 if (index >= 0) 420 *pdata = *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index]; 421 else if (msr == MSR_MTRRdefType) 422 *pdata = vcpu->arch.mtrr_state.deftype; 423 else if (msr == MSR_IA32_CR_PAT) 424 *pdata = vcpu->arch.pat; 425 else { /* Variable MTRRs */ 426 index = (msr - 0x200) / 2; 427 if (is_mtrr_base_msr(msr)) 428 *pdata = vcpu->arch.mtrr_state.var_ranges[index].base; 429 else 430 *pdata = vcpu->arch.mtrr_state.var_ranges[index].mask; 431 432 *pdata &= ~kvm_vcpu_reserved_gpa_bits_raw(vcpu); 433 } 434 435 return 0; 436 } 437 438 void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu) 439 { 440 INIT_LIST_HEAD(&vcpu->arch.mtrr_state.head); 441 } 442 443 struct mtrr_iter { 444 /* input fields. */ 445 struct kvm_mtrr *mtrr_state; 446 u64 start; 447 u64 end; 448 449 /* output fields. */ 450 int mem_type; 451 /* mtrr is completely disabled? */ 452 bool mtrr_disabled; 453 /* [start, end) is not fully covered in MTRRs? */ 454 bool partial_map; 455 456 /* private fields. */ 457 union { 458 /* used for fixed MTRRs. */ 459 struct { 460 int index; 461 int seg; 462 }; 463 464 /* used for var MTRRs. */ 465 struct { 466 struct kvm_mtrr_range *range; 467 /* max address has been covered in var MTRRs. */ 468 u64 start_max; 469 }; 470 }; 471 472 bool fixed; 473 }; 474 475 static bool mtrr_lookup_fixed_start(struct mtrr_iter *iter) 476 { 477 int seg, index; 478 479 if (!fixed_mtrr_is_enabled(iter->mtrr_state)) 480 return false; 481 482 seg = fixed_mtrr_addr_to_seg(iter->start); 483 if (seg < 0) 484 return false; 485 486 iter->fixed = true; 487 index = fixed_mtrr_addr_seg_to_range_index(iter->start, seg); 488 iter->index = index; 489 iter->seg = seg; 490 return true; 491 } 492 493 static bool match_var_range(struct mtrr_iter *iter, 494 struct kvm_mtrr_range *range) 495 { 496 u64 start, end; 497 498 var_mtrr_range(range, &start, &end); 499 if (!(start >= iter->end || end <= iter->start)) { 500 iter->range = range; 501 502 /* 503 * the function is called when we do kvm_mtrr.head walking. 504 * Range has the minimum base address which interleaves 505 * [looker->start_max, looker->end). 506 */ 507 iter->partial_map |= iter->start_max < start; 508 509 /* update the max address has been covered. */ 510 iter->start_max = max(iter->start_max, end); 511 return true; 512 } 513 514 return false; 515 } 516 517 static void __mtrr_lookup_var_next(struct mtrr_iter *iter) 518 { 519 struct kvm_mtrr *mtrr_state = iter->mtrr_state; 520 521 list_for_each_entry_continue(iter->range, &mtrr_state->head, node) 522 if (match_var_range(iter, iter->range)) 523 return; 524 525 iter->range = NULL; 526 iter->partial_map |= iter->start_max < iter->end; 527 } 528 529 static void mtrr_lookup_var_start(struct mtrr_iter *iter) 530 { 531 struct kvm_mtrr *mtrr_state = iter->mtrr_state; 532 533 iter->fixed = false; 534 iter->start_max = iter->start; 535 iter->range = NULL; 536 iter->range = list_prepare_entry(iter->range, &mtrr_state->head, node); 537 538 __mtrr_lookup_var_next(iter); 539 } 540 541 static void mtrr_lookup_fixed_next(struct mtrr_iter *iter) 542 { 543 /* terminate the lookup. */ 544 if (fixed_mtrr_range_end_addr(iter->seg, iter->index) >= iter->end) { 545 iter->fixed = false; 546 iter->range = NULL; 547 return; 548 } 549 550 iter->index++; 551 552 /* have looked up for all fixed MTRRs. */ 553 if (iter->index >= ARRAY_SIZE(iter->mtrr_state->fixed_ranges)) 554 return mtrr_lookup_var_start(iter); 555 556 /* switch to next segment. */ 557 if (iter->index > fixed_mtrr_seg_end_range_index(iter->seg)) 558 iter->seg++; 559 } 560 561 static void mtrr_lookup_var_next(struct mtrr_iter *iter) 562 { 563 __mtrr_lookup_var_next(iter); 564 } 565 566 static void mtrr_lookup_start(struct mtrr_iter *iter) 567 { 568 if (!mtrr_is_enabled(iter->mtrr_state)) { 569 iter->mtrr_disabled = true; 570 return; 571 } 572 573 if (!mtrr_lookup_fixed_start(iter)) 574 mtrr_lookup_var_start(iter); 575 } 576 577 static void mtrr_lookup_init(struct mtrr_iter *iter, 578 struct kvm_mtrr *mtrr_state, u64 start, u64 end) 579 { 580 iter->mtrr_state = mtrr_state; 581 iter->start = start; 582 iter->end = end; 583 iter->mtrr_disabled = false; 584 iter->partial_map = false; 585 iter->fixed = false; 586 iter->range = NULL; 587 588 mtrr_lookup_start(iter); 589 } 590 591 static bool mtrr_lookup_okay(struct mtrr_iter *iter) 592 { 593 if (iter->fixed) { 594 iter->mem_type = iter->mtrr_state->fixed_ranges[iter->index]; 595 return true; 596 } 597 598 if (iter->range) { 599 iter->mem_type = iter->range->base & 0xff; 600 return true; 601 } 602 603 return false; 604 } 605 606 static void mtrr_lookup_next(struct mtrr_iter *iter) 607 { 608 if (iter->fixed) 609 mtrr_lookup_fixed_next(iter); 610 else 611 mtrr_lookup_var_next(iter); 612 } 613 614 #define mtrr_for_each_mem_type(_iter_, _mtrr_, _gpa_start_, _gpa_end_) \ 615 for (mtrr_lookup_init(_iter_, _mtrr_, _gpa_start_, _gpa_end_); \ 616 mtrr_lookup_okay(_iter_); mtrr_lookup_next(_iter_)) 617 618 u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) 619 { 620 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 621 struct mtrr_iter iter; 622 u64 start, end; 623 int type = -1; 624 const int wt_wb_mask = (1 << MTRR_TYPE_WRBACK) 625 | (1 << MTRR_TYPE_WRTHROUGH); 626 627 start = gfn_to_gpa(gfn); 628 end = start + PAGE_SIZE; 629 630 mtrr_for_each_mem_type(&iter, mtrr_state, start, end) { 631 int curr_type = iter.mem_type; 632 633 /* 634 * Please refer to Intel SDM Volume 3: 11.11.4.1 MTRR 635 * Precedences. 636 */ 637 638 if (type == -1) { 639 type = curr_type; 640 continue; 641 } 642 643 /* 644 * If two or more variable memory ranges match and the 645 * memory types are identical, then that memory type is 646 * used. 647 */ 648 if (type == curr_type) 649 continue; 650 651 /* 652 * If two or more variable memory ranges match and one of 653 * the memory types is UC, the UC memory type used. 654 */ 655 if (curr_type == MTRR_TYPE_UNCACHABLE) 656 return MTRR_TYPE_UNCACHABLE; 657 658 /* 659 * If two or more variable memory ranges match and the 660 * memory types are WT and WB, the WT memory type is used. 661 */ 662 if (((1 << type) & wt_wb_mask) && 663 ((1 << curr_type) & wt_wb_mask)) { 664 type = MTRR_TYPE_WRTHROUGH; 665 continue; 666 } 667 668 /* 669 * For overlaps not defined by the above rules, processor 670 * behavior is undefined. 671 */ 672 673 /* We use WB for this undefined behavior. :( */ 674 return MTRR_TYPE_WRBACK; 675 } 676 677 if (iter.mtrr_disabled) 678 return mtrr_disabled_type(vcpu); 679 680 /* not contained in any MTRRs. */ 681 if (type == -1) 682 return mtrr_default_type(mtrr_state); 683 684 /* 685 * We just check one page, partially covered by MTRRs is 686 * impossible. 687 */ 688 WARN_ON(iter.partial_map); 689 690 return type; 691 } 692 EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type); 693 694 bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, 695 int page_num) 696 { 697 struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state; 698 struct mtrr_iter iter; 699 u64 start, end; 700 int type = -1; 701 702 start = gfn_to_gpa(gfn); 703 end = gfn_to_gpa(gfn + page_num); 704 mtrr_for_each_mem_type(&iter, mtrr_state, start, end) { 705 if (type == -1) { 706 type = iter.mem_type; 707 continue; 708 } 709 710 if (type != iter.mem_type) 711 return false; 712 } 713 714 if (iter.mtrr_disabled) 715 return true; 716 717 if (!iter.partial_map) 718 return true; 719 720 if (type == -1) 721 return true; 722 723 return type == mtrr_default_type(mtrr_state); 724 } 725