1 /* MTRR (Memory Type Range Register) cleanup 2 3 Copyright (C) 2009 Yinghai Lu 4 5 This library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Library General Public 7 License as published by the Free Software Foundation; either 8 version 2 of the License, or (at your option) any later version. 9 10 This library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Library General Public License for more details. 14 15 You should have received a copy of the GNU Library General Public 16 License along with this library; if not, write to the Free 17 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 */ 19 20 #include <linux/module.h> 21 #include <linux/init.h> 22 #include <linux/pci.h> 23 #include <linux/smp.h> 24 #include <linux/cpu.h> 25 #include <linux/mutex.h> 26 #include <linux/sort.h> 27 28 #include <asm/e820.h> 29 #include <asm/mtrr.h> 30 #include <asm/uaccess.h> 31 #include <asm/processor.h> 32 #include <asm/msr.h> 33 #include <asm/kvm_para.h> 34 #include "mtrr.h" 35 36 /* should be related to MTRR_VAR_RANGES nums */ 37 #define RANGE_NUM 256 38 39 struct res_range { 40 unsigned long start; 41 unsigned long end; 42 }; 43 44 static int __init 45 add_range(struct res_range *range, int nr_range, unsigned long start, 46 unsigned long end) 47 { 48 /* out of slots */ 49 if (nr_range >= RANGE_NUM) 50 return nr_range; 51 52 range[nr_range].start = start; 53 range[nr_range].end = end; 54 55 nr_range++; 56 57 return nr_range; 58 } 59 60 static int __init 61 add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, 62 unsigned long end) 63 { 64 int i; 65 66 /* try to merge it with old one */ 67 for (i = 0; i < nr_range; i++) { 68 unsigned long final_start, final_end; 69 unsigned long common_start, common_end; 70 71 if (!range[i].end) 72 continue; 73 74 common_start = max(range[i].start, start); 75 common_end = min(range[i].end, end); 76 if (common_start > common_end + 1) 77 continue; 78 79 final_start = min(range[i].start, start); 80 final_end = max(range[i].end, end); 81 82 range[i].start = final_start; 83 range[i].end = final_end; 84 return nr_range; 85 } 86 87 /* need to add that */ 88 return add_range(range, nr_range, start, end); 89 } 90 91 static void __init 92 subtract_range(struct res_range *range, unsigned long start, unsigned long end) 93 { 94 int i, j; 95 96 for (j = 0; j < RANGE_NUM; j++) { 97 if (!range[j].end) 98 continue; 99 100 if (start <= range[j].start && end >= range[j].end) { 101 range[j].start = 0; 102 range[j].end = 0; 103 continue; 104 } 105 106 if (start <= range[j].start && end < range[j].end && 107 range[j].start < end + 1) { 108 range[j].start = end + 1; 109 continue; 110 } 111 112 113 if (start > range[j].start && end >= range[j].end && 114 range[j].end > start - 1) { 115 range[j].end = start - 1; 116 continue; 117 } 118 119 if (start > range[j].start && end < range[j].end) { 120 /* find the new spare */ 121 for (i = 0; i < RANGE_NUM; i++) { 122 if (range[i].end == 0) 123 break; 124 } 125 if (i < RANGE_NUM) { 126 range[i].end = range[j].end; 127 range[i].start = end + 1; 128 } else { 129 printk(KERN_ERR "run of slot in ranges\n"); 130 } 131 range[j].end = start - 1; 132 continue; 133 } 134 } 135 } 136 137 static int __init cmp_range(const void *x1, const void *x2) 138 { 139 const struct res_range *r1 = x1; 140 const struct res_range *r2 = x2; 141 long start1, start2; 142 143 start1 = r1->start; 144 start2 = r2->start; 145 146 return start1 - start2; 147 } 148 149 struct var_mtrr_range_state { 150 unsigned long base_pfn; 151 unsigned long size_pfn; 152 mtrr_type type; 153 }; 154 155 static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; 156 static int __initdata debug_print; 157 158 static int __init 159 x86_get_mtrr_mem_range(struct res_range *range, int nr_range, 160 unsigned long extra_remove_base, 161 unsigned long extra_remove_size) 162 { 163 unsigned long base, size; 164 mtrr_type type; 165 int i; 166 167 for (i = 0; i < num_var_ranges; i++) { 168 type = range_state[i].type; 169 if (type != MTRR_TYPE_WRBACK) 170 continue; 171 base = range_state[i].base_pfn; 172 size = range_state[i].size_pfn; 173 nr_range = add_range_with_merge(range, nr_range, base, 174 base + size - 1); 175 } 176 if (debug_print) { 177 printk(KERN_DEBUG "After WB checking\n"); 178 for (i = 0; i < nr_range; i++) 179 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 180 range[i].start, range[i].end + 1); 181 } 182 183 /* take out UC ranges */ 184 for (i = 0; i < num_var_ranges; i++) { 185 type = range_state[i].type; 186 if (type != MTRR_TYPE_UNCACHABLE && 187 type != MTRR_TYPE_WRPROT) 188 continue; 189 size = range_state[i].size_pfn; 190 if (!size) 191 continue; 192 base = range_state[i].base_pfn; 193 if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && 194 (mtrr_state.enabled & 1)) { 195 /* Var MTRR contains UC entry below 1M? Skip it: */ 196 printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d " 197 "contains strange UC entry under 1M, check " 198 "with your system vendor!\n", i); 199 if (base + size <= (1<<(20-PAGE_SHIFT))) 200 continue; 201 size -= (1<<(20-PAGE_SHIFT)) - base; 202 base = 1<<(20-PAGE_SHIFT); 203 } 204 subtract_range(range, base, base + size - 1); 205 } 206 if (extra_remove_size) 207 subtract_range(range, extra_remove_base, 208 extra_remove_base + extra_remove_size - 1); 209 210 /* get new range num */ 211 nr_range = 0; 212 for (i = 0; i < RANGE_NUM; i++) { 213 if (!range[i].end) 214 continue; 215 nr_range++; 216 } 217 if (debug_print) { 218 printk(KERN_DEBUG "After UC checking\n"); 219 for (i = 0; i < nr_range; i++) 220 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 221 range[i].start, range[i].end + 1); 222 } 223 224 /* sort the ranges */ 225 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 226 if (debug_print) { 227 printk(KERN_DEBUG "After sorting\n"); 228 for (i = 0; i < nr_range; i++) 229 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 230 range[i].start, range[i].end + 1); 231 } 232 233 /* clear those is not used */ 234 for (i = nr_range; i < RANGE_NUM; i++) 235 memset(&range[i], 0, sizeof(range[i])); 236 237 return nr_range; 238 } 239 240 static struct res_range __initdata range[RANGE_NUM]; 241 static int __initdata nr_range; 242 243 #ifdef CONFIG_MTRR_SANITIZER 244 245 static unsigned long __init sum_ranges(struct res_range *range, int nr_range) 246 { 247 unsigned long sum; 248 int i; 249 250 sum = 0; 251 for (i = 0; i < nr_range; i++) 252 sum += range[i].end + 1 - range[i].start; 253 254 return sum; 255 } 256 257 static int enable_mtrr_cleanup __initdata = 258 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; 259 260 static int __init disable_mtrr_cleanup_setup(char *str) 261 { 262 enable_mtrr_cleanup = 0; 263 return 0; 264 } 265 early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); 266 267 static int __init enable_mtrr_cleanup_setup(char *str) 268 { 269 enable_mtrr_cleanup = 1; 270 return 0; 271 } 272 early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); 273 274 static int __init mtrr_cleanup_debug_setup(char *str) 275 { 276 debug_print = 1; 277 return 0; 278 } 279 early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); 280 281 struct var_mtrr_state { 282 unsigned long range_startk; 283 unsigned long range_sizek; 284 unsigned long chunk_sizek; 285 unsigned long gran_sizek; 286 unsigned int reg; 287 }; 288 289 static void __init 290 set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, 291 unsigned char type, unsigned int address_bits) 292 { 293 u32 base_lo, base_hi, mask_lo, mask_hi; 294 u64 base, mask; 295 296 if (!sizek) { 297 fill_mtrr_var_range(reg, 0, 0, 0, 0); 298 return; 299 } 300 301 mask = (1ULL << address_bits) - 1; 302 mask &= ~((((u64)sizek) << 10) - 1); 303 304 base = ((u64)basek) << 10; 305 306 base |= type; 307 mask |= 0x800; 308 309 base_lo = base & ((1ULL<<32) - 1); 310 base_hi = base >> 32; 311 312 mask_lo = mask & ((1ULL<<32) - 1); 313 mask_hi = mask >> 32; 314 315 fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); 316 } 317 318 static void __init 319 save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, 320 unsigned char type) 321 { 322 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); 323 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); 324 range_state[reg].type = type; 325 } 326 327 static void __init 328 set_var_mtrr_all(unsigned int address_bits) 329 { 330 unsigned long basek, sizek; 331 unsigned char type; 332 unsigned int reg; 333 334 for (reg = 0; reg < num_var_ranges; reg++) { 335 basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10); 336 sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); 337 type = range_state[reg].type; 338 339 set_var_mtrr(reg, basek, sizek, type, address_bits); 340 } 341 } 342 343 static unsigned long to_size_factor(unsigned long sizek, char *factorp) 344 { 345 char factor; 346 unsigned long base = sizek; 347 348 if (base & ((1<<10) - 1)) { 349 /* not MB alignment */ 350 factor = 'K'; 351 } else if (base & ((1<<20) - 1)) { 352 factor = 'M'; 353 base >>= 10; 354 } else { 355 factor = 'G'; 356 base >>= 20; 357 } 358 359 *factorp = factor; 360 361 return base; 362 } 363 364 static unsigned int __init 365 range_to_mtrr(unsigned int reg, unsigned long range_startk, 366 unsigned long range_sizek, unsigned char type) 367 { 368 if (!range_sizek || (reg >= num_var_ranges)) 369 return reg; 370 371 while (range_sizek) { 372 unsigned long max_align, align; 373 unsigned long sizek; 374 375 /* Compute the maximum size I can make a range */ 376 if (range_startk) 377 max_align = ffs(range_startk) - 1; 378 else 379 max_align = 32; 380 align = fls(range_sizek) - 1; 381 if (align > max_align) 382 align = max_align; 383 384 sizek = 1 << align; 385 if (debug_print) { 386 char start_factor = 'K', size_factor = 'K'; 387 unsigned long start_base, size_base; 388 389 start_base = to_size_factor(range_startk, 390 &start_factor), 391 size_base = to_size_factor(sizek, &size_factor), 392 393 printk(KERN_DEBUG "Setting variable MTRR %d, " 394 "base: %ld%cB, range: %ld%cB, type %s\n", 395 reg, start_base, start_factor, 396 size_base, size_factor, 397 (type == MTRR_TYPE_UNCACHABLE) ? "UC" : 398 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other") 399 ); 400 } 401 save_var_mtrr(reg++, range_startk, sizek, type); 402 range_startk += sizek; 403 range_sizek -= sizek; 404 if (reg >= num_var_ranges) 405 break; 406 } 407 return reg; 408 } 409 410 static unsigned __init 411 range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, 412 unsigned long sizek) 413 { 414 unsigned long hole_basek, hole_sizek; 415 unsigned long second_basek, second_sizek; 416 unsigned long range0_basek, range0_sizek; 417 unsigned long range_basek, range_sizek; 418 unsigned long chunk_sizek; 419 unsigned long gran_sizek; 420 421 hole_basek = 0; 422 hole_sizek = 0; 423 second_basek = 0; 424 second_sizek = 0; 425 chunk_sizek = state->chunk_sizek; 426 gran_sizek = state->gran_sizek; 427 428 /* align with gran size, prevent small block used up MTRRs */ 429 range_basek = ALIGN(state->range_startk, gran_sizek); 430 if ((range_basek > basek) && basek) 431 return second_sizek; 432 state->range_sizek -= (range_basek - state->range_startk); 433 range_sizek = ALIGN(state->range_sizek, gran_sizek); 434 435 while (range_sizek > state->range_sizek) { 436 range_sizek -= gran_sizek; 437 if (!range_sizek) 438 return 0; 439 } 440 state->range_sizek = range_sizek; 441 442 /* try to append some small hole */ 443 range0_basek = state->range_startk; 444 range0_sizek = ALIGN(state->range_sizek, chunk_sizek); 445 446 /* no increase */ 447 if (range0_sizek == state->range_sizek) { 448 if (debug_print) 449 printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", 450 range0_basek<<10, 451 (range0_basek + state->range_sizek)<<10); 452 state->reg = range_to_mtrr(state->reg, range0_basek, 453 state->range_sizek, MTRR_TYPE_WRBACK); 454 return 0; 455 } 456 457 /* only cut back, when it is not the last */ 458 if (sizek) { 459 while (range0_basek + range0_sizek > (basek + sizek)) { 460 if (range0_sizek >= chunk_sizek) 461 range0_sizek -= chunk_sizek; 462 else 463 range0_sizek = 0; 464 465 if (!range0_sizek) 466 break; 467 } 468 } 469 470 second_try: 471 range_basek = range0_basek + range0_sizek; 472 473 /* one hole in the middle */ 474 if (range_basek > basek && range_basek <= (basek + sizek)) 475 second_sizek = range_basek - basek; 476 477 if (range0_sizek > state->range_sizek) { 478 479 /* one hole in middle or at end */ 480 hole_sizek = range0_sizek - state->range_sizek - second_sizek; 481 482 /* hole size should be less than half of range0 size */ 483 if (hole_sizek >= (range0_sizek >> 1) && 484 range0_sizek >= chunk_sizek) { 485 range0_sizek -= chunk_sizek; 486 second_sizek = 0; 487 hole_sizek = 0; 488 489 goto second_try; 490 } 491 } 492 493 if (range0_sizek) { 494 if (debug_print) 495 printk(KERN_DEBUG "range0: %016lx - %016lx\n", 496 range0_basek<<10, 497 (range0_basek + range0_sizek)<<10); 498 state->reg = range_to_mtrr(state->reg, range0_basek, 499 range0_sizek, MTRR_TYPE_WRBACK); 500 } 501 502 if (range0_sizek < state->range_sizek) { 503 /* need to handle left over */ 504 range_sizek = state->range_sizek - range0_sizek; 505 506 if (debug_print) 507 printk(KERN_DEBUG "range: %016lx - %016lx\n", 508 range_basek<<10, 509 (range_basek + range_sizek)<<10); 510 state->reg = range_to_mtrr(state->reg, range_basek, 511 range_sizek, MTRR_TYPE_WRBACK); 512 } 513 514 if (hole_sizek) { 515 hole_basek = range_basek - hole_sizek - second_sizek; 516 if (debug_print) 517 printk(KERN_DEBUG "hole: %016lx - %016lx\n", 518 hole_basek<<10, 519 (hole_basek + hole_sizek)<<10); 520 state->reg = range_to_mtrr(state->reg, hole_basek, 521 hole_sizek, MTRR_TYPE_UNCACHABLE); 522 } 523 524 return second_sizek; 525 } 526 527 static void __init 528 set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, 529 unsigned long size_pfn) 530 { 531 unsigned long basek, sizek; 532 unsigned long second_sizek = 0; 533 534 if (state->reg >= num_var_ranges) 535 return; 536 537 basek = base_pfn << (PAGE_SHIFT - 10); 538 sizek = size_pfn << (PAGE_SHIFT - 10); 539 540 /* See if I can merge with the last range */ 541 if ((basek <= 1024) || 542 (state->range_startk + state->range_sizek == basek)) { 543 unsigned long endk = basek + sizek; 544 state->range_sizek = endk - state->range_startk; 545 return; 546 } 547 /* Write the range mtrrs */ 548 if (state->range_sizek != 0) 549 second_sizek = range_to_mtrr_with_hole(state, basek, sizek); 550 551 /* Allocate an msr */ 552 state->range_startk = basek + second_sizek; 553 state->range_sizek = sizek - second_sizek; 554 } 555 556 /* mininum size of mtrr block that can take hole */ 557 static u64 mtrr_chunk_size __initdata = (256ULL<<20); 558 559 static int __init parse_mtrr_chunk_size_opt(char *p) 560 { 561 if (!p) 562 return -EINVAL; 563 mtrr_chunk_size = memparse(p, &p); 564 return 0; 565 } 566 early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); 567 568 /* granity of mtrr of block */ 569 static u64 mtrr_gran_size __initdata; 570 571 static int __init parse_mtrr_gran_size_opt(char *p) 572 { 573 if (!p) 574 return -EINVAL; 575 mtrr_gran_size = memparse(p, &p); 576 return 0; 577 } 578 early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); 579 580 static int nr_mtrr_spare_reg __initdata = 581 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; 582 583 static int __init parse_mtrr_spare_reg(char *arg) 584 { 585 if (arg) 586 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); 587 return 0; 588 } 589 590 early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); 591 592 static int __init 593 x86_setup_var_mtrrs(struct res_range *range, int nr_range, 594 u64 chunk_size, u64 gran_size) 595 { 596 struct var_mtrr_state var_state; 597 int i; 598 int num_reg; 599 600 var_state.range_startk = 0; 601 var_state.range_sizek = 0; 602 var_state.reg = 0; 603 var_state.chunk_sizek = chunk_size >> 10; 604 var_state.gran_sizek = gran_size >> 10; 605 606 memset(range_state, 0, sizeof(range_state)); 607 608 /* Write the range etc */ 609 for (i = 0; i < nr_range; i++) 610 set_var_mtrr_range(&var_state, range[i].start, 611 range[i].end - range[i].start + 1); 612 613 /* Write the last range */ 614 if (var_state.range_sizek != 0) 615 range_to_mtrr_with_hole(&var_state, 0, 0); 616 617 num_reg = var_state.reg; 618 /* Clear out the extra MTRR's */ 619 while (var_state.reg < num_var_ranges) { 620 save_var_mtrr(var_state.reg, 0, 0, 0); 621 var_state.reg++; 622 } 623 624 return num_reg; 625 } 626 627 struct mtrr_cleanup_result { 628 unsigned long gran_sizek; 629 unsigned long chunk_sizek; 630 unsigned long lose_cover_sizek; 631 unsigned int num_reg; 632 int bad; 633 }; 634 635 /* 636 * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G 637 * chunk size: gran_size, ..., 2G 638 * so we need (1+16)*8 639 */ 640 #define NUM_RESULT 136 641 #define PSHIFT (PAGE_SHIFT - 10) 642 643 static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; 644 static unsigned long __initdata min_loss_pfn[RANGE_NUM]; 645 646 static void __init print_out_mtrr_range_state(void) 647 { 648 int i; 649 char start_factor = 'K', size_factor = 'K'; 650 unsigned long start_base, size_base; 651 mtrr_type type; 652 653 for (i = 0; i < num_var_ranges; i++) { 654 655 size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); 656 if (!size_base) 657 continue; 658 659 size_base = to_size_factor(size_base, &size_factor), 660 start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); 661 start_base = to_size_factor(start_base, &start_factor), 662 type = range_state[i].type; 663 664 printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", 665 i, start_base, start_factor, 666 size_base, size_factor, 667 (type == MTRR_TYPE_UNCACHABLE) ? "UC" : 668 ((type == MTRR_TYPE_WRPROT) ? "WP" : 669 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) 670 ); 671 } 672 } 673 674 static int __init mtrr_need_cleanup(void) 675 { 676 int i; 677 mtrr_type type; 678 unsigned long size; 679 /* extra one for all 0 */ 680 int num[MTRR_NUM_TYPES + 1]; 681 682 /* check entries number */ 683 memset(num, 0, sizeof(num)); 684 for (i = 0; i < num_var_ranges; i++) { 685 type = range_state[i].type; 686 size = range_state[i].size_pfn; 687 if (type >= MTRR_NUM_TYPES) 688 continue; 689 if (!size) 690 type = MTRR_NUM_TYPES; 691 if (type == MTRR_TYPE_WRPROT) 692 type = MTRR_TYPE_UNCACHABLE; 693 num[type]++; 694 } 695 696 /* check if we got UC entries */ 697 if (!num[MTRR_TYPE_UNCACHABLE]) 698 return 0; 699 700 /* check if we only had WB and UC */ 701 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != 702 num_var_ranges - num[MTRR_NUM_TYPES]) 703 return 0; 704 705 return 1; 706 } 707 708 static unsigned long __initdata range_sums; 709 static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size, 710 unsigned long extra_remove_base, 711 unsigned long extra_remove_size, 712 int i) 713 { 714 int num_reg; 715 static struct res_range range_new[RANGE_NUM]; 716 static int nr_range_new; 717 unsigned long range_sums_new; 718 719 /* convert ranges to var ranges state */ 720 num_reg = x86_setup_var_mtrrs(range, nr_range, 721 chunk_size, gran_size); 722 723 /* we got new setting in range_state, check it */ 724 memset(range_new, 0, sizeof(range_new)); 725 nr_range_new = x86_get_mtrr_mem_range(range_new, 0, 726 extra_remove_base, extra_remove_size); 727 range_sums_new = sum_ranges(range_new, nr_range_new); 728 729 result[i].chunk_sizek = chunk_size >> 10; 730 result[i].gran_sizek = gran_size >> 10; 731 result[i].num_reg = num_reg; 732 if (range_sums < range_sums_new) { 733 result[i].lose_cover_sizek = 734 (range_sums_new - range_sums) << PSHIFT; 735 result[i].bad = 1; 736 } else 737 result[i].lose_cover_sizek = 738 (range_sums - range_sums_new) << PSHIFT; 739 740 /* double check it */ 741 if (!result[i].bad && !result[i].lose_cover_sizek) { 742 if (nr_range_new != nr_range || 743 memcmp(range, range_new, sizeof(range))) 744 result[i].bad = 1; 745 } 746 747 if (!result[i].bad && (range_sums - range_sums_new < 748 min_loss_pfn[num_reg])) { 749 min_loss_pfn[num_reg] = 750 range_sums - range_sums_new; 751 } 752 } 753 754 static void __init mtrr_print_out_one_result(int i) 755 { 756 char gran_factor, chunk_factor, lose_factor; 757 unsigned long gran_base, chunk_base, lose_base; 758 759 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), 760 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), 761 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), 762 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", 763 result[i].bad ? "*BAD*" : " ", 764 gran_base, gran_factor, chunk_base, chunk_factor); 765 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", 766 result[i].num_reg, result[i].bad ? "-" : "", 767 lose_base, lose_factor); 768 } 769 770 static int __init mtrr_search_optimal_index(void) 771 { 772 int i; 773 int num_reg_good; 774 int index_good; 775 776 if (nr_mtrr_spare_reg >= num_var_ranges) 777 nr_mtrr_spare_reg = num_var_ranges - 1; 778 num_reg_good = -1; 779 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { 780 if (!min_loss_pfn[i]) 781 num_reg_good = i; 782 } 783 784 index_good = -1; 785 if (num_reg_good != -1) { 786 for (i = 0; i < NUM_RESULT; i++) { 787 if (!result[i].bad && 788 result[i].num_reg == num_reg_good && 789 !result[i].lose_cover_sizek) { 790 index_good = i; 791 break; 792 } 793 } 794 } 795 796 return index_good; 797 } 798 799 800 int __init mtrr_cleanup(unsigned address_bits) 801 { 802 unsigned long extra_remove_base, extra_remove_size; 803 unsigned long base, size, def, dummy; 804 mtrr_type type; 805 u64 chunk_size, gran_size; 806 int index_good; 807 int i; 808 809 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) 810 return 0; 811 rdmsr(MTRRdefType_MSR, def, dummy); 812 def &= 0xff; 813 if (def != MTRR_TYPE_UNCACHABLE) 814 return 0; 815 816 /* get it and store it aside */ 817 memset(range_state, 0, sizeof(range_state)); 818 for (i = 0; i < num_var_ranges; i++) { 819 mtrr_if->get(i, &base, &size, &type); 820 range_state[i].base_pfn = base; 821 range_state[i].size_pfn = size; 822 range_state[i].type = type; 823 } 824 825 /* check if we need handle it and can handle it */ 826 if (!mtrr_need_cleanup()) 827 return 0; 828 829 /* print original var MTRRs at first, for debugging: */ 830 printk(KERN_DEBUG "original variable MTRRs\n"); 831 print_out_mtrr_range_state(); 832 833 memset(range, 0, sizeof(range)); 834 extra_remove_size = 0; 835 extra_remove_base = 1 << (32 - PAGE_SHIFT); 836 if (mtrr_tom2) 837 extra_remove_size = 838 (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; 839 nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, 840 extra_remove_size); 841 /* 842 * [0, 1M) should always be coverred by var mtrr with WB 843 * and fixed mtrrs should take effective before var mtrr for it 844 */ 845 nr_range = add_range_with_merge(range, nr_range, 0, 846 (1ULL<<(20 - PAGE_SHIFT)) - 1); 847 /* sort the ranges */ 848 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 849 850 range_sums = sum_ranges(range, nr_range); 851 printk(KERN_INFO "total RAM coverred: %ldM\n", 852 range_sums >> (20 - PAGE_SHIFT)); 853 854 if (mtrr_chunk_size && mtrr_gran_size) { 855 i = 0; 856 mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, 857 extra_remove_base, extra_remove_size, i); 858 859 mtrr_print_out_one_result(i); 860 861 if (!result[i].bad) { 862 set_var_mtrr_all(address_bits); 863 printk(KERN_DEBUG "New variable MTRRs\n"); 864 print_out_mtrr_range_state(); 865 return 1; 866 } 867 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " 868 "will find optimal one\n"); 869 } 870 871 i = 0; 872 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); 873 memset(result, 0, sizeof(result)); 874 for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { 875 876 for (chunk_size = gran_size; chunk_size < (1ULL<<32); 877 chunk_size <<= 1) { 878 879 if (i >= NUM_RESULT) 880 continue; 881 882 mtrr_calc_range_state(chunk_size, gran_size, 883 extra_remove_base, extra_remove_size, i); 884 if (debug_print) { 885 mtrr_print_out_one_result(i); 886 printk(KERN_INFO "\n"); 887 } 888 889 i++; 890 } 891 } 892 893 /* try to find the optimal index */ 894 index_good = mtrr_search_optimal_index(); 895 896 if (index_good != -1) { 897 printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); 898 i = index_good; 899 mtrr_print_out_one_result(i); 900 901 /* convert ranges to var ranges state */ 902 chunk_size = result[i].chunk_sizek; 903 chunk_size <<= 10; 904 gran_size = result[i].gran_sizek; 905 gran_size <<= 10; 906 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); 907 set_var_mtrr_all(address_bits); 908 printk(KERN_DEBUG "New variable MTRRs\n"); 909 print_out_mtrr_range_state(); 910 return 1; 911 } else { 912 /* print out all */ 913 for (i = 0; i < NUM_RESULT; i++) 914 mtrr_print_out_one_result(i); 915 } 916 917 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); 918 printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); 919 920 return 0; 921 } 922 #else 923 int __init mtrr_cleanup(unsigned address_bits) 924 { 925 return 0; 926 } 927 #endif 928 929 static int disable_mtrr_trim; 930 931 static int __init disable_mtrr_trim_setup(char *str) 932 { 933 disable_mtrr_trim = 1; 934 return 0; 935 } 936 early_param("disable_mtrr_trim", disable_mtrr_trim_setup); 937 938 /* 939 * Newer AMD K8s and later CPUs have a special magic MSR way to force WB 940 * for memory >4GB. Check for that here. 941 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't 942 * apply to are wrong, but so far we don't know of any such case in the wild. 943 */ 944 #define Tom2Enabled (1U << 21) 945 #define Tom2ForceMemTypeWB (1U << 22) 946 947 int __init amd_special_default_mtrr(void) 948 { 949 u32 l, h; 950 951 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 952 return 0; 953 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) 954 return 0; 955 /* In case some hypervisor doesn't pass SYSCFG through */ 956 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) 957 return 0; 958 /* 959 * Memory between 4GB and top of mem is forced WB by this magic bit. 960 * Reserved before K8RevF, but should be zero there. 961 */ 962 if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) == 963 (Tom2Enabled | Tom2ForceMemTypeWB)) 964 return 1; 965 return 0; 966 } 967 968 static u64 __init real_trim_memory(unsigned long start_pfn, 969 unsigned long limit_pfn) 970 { 971 u64 trim_start, trim_size; 972 trim_start = start_pfn; 973 trim_start <<= PAGE_SHIFT; 974 trim_size = limit_pfn; 975 trim_size <<= PAGE_SHIFT; 976 trim_size -= trim_start; 977 978 return e820_update_range(trim_start, trim_size, E820_RAM, 979 E820_RESERVED); 980 } 981 /** 982 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs 983 * @end_pfn: ending page frame number 984 * 985 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain 986 * memory configurations. This routine checks that the highest MTRR matches 987 * the end of memory, to make sure the MTRRs having a write back type cover 988 * all of the memory the kernel is intending to use. If not, it'll trim any 989 * memory off the end by adjusting end_pfn, removing it from the kernel's 990 * allocation pools, warning the user with an obnoxious message. 991 */ 992 int __init mtrr_trim_uncached_memory(unsigned long end_pfn) 993 { 994 unsigned long i, base, size, highest_pfn = 0, def, dummy; 995 mtrr_type type; 996 u64 total_trim_size; 997 998 /* extra one for all 0 */ 999 int num[MTRR_NUM_TYPES + 1]; 1000 /* 1001 * Make sure we only trim uncachable memory on machines that 1002 * support the Intel MTRR architecture: 1003 */ 1004 if (!is_cpu(INTEL) || disable_mtrr_trim) 1005 return 0; 1006 rdmsr(MTRRdefType_MSR, def, dummy); 1007 def &= 0xff; 1008 if (def != MTRR_TYPE_UNCACHABLE) 1009 return 0; 1010 1011 /* get it and store it aside */ 1012 memset(range_state, 0, sizeof(range_state)); 1013 for (i = 0; i < num_var_ranges; i++) { 1014 mtrr_if->get(i, &base, &size, &type); 1015 range_state[i].base_pfn = base; 1016 range_state[i].size_pfn = size; 1017 range_state[i].type = type; 1018 } 1019 1020 /* Find highest cached pfn */ 1021 for (i = 0; i < num_var_ranges; i++) { 1022 type = range_state[i].type; 1023 if (type != MTRR_TYPE_WRBACK) 1024 continue; 1025 base = range_state[i].base_pfn; 1026 size = range_state[i].size_pfn; 1027 if (highest_pfn < base + size) 1028 highest_pfn = base + size; 1029 } 1030 1031 /* kvm/qemu doesn't have mtrr set right, don't trim them all */ 1032 if (!highest_pfn) { 1033 printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); 1034 return 0; 1035 } 1036 1037 /* check entries number */ 1038 memset(num, 0, sizeof(num)); 1039 for (i = 0; i < num_var_ranges; i++) { 1040 type = range_state[i].type; 1041 if (type >= MTRR_NUM_TYPES) 1042 continue; 1043 size = range_state[i].size_pfn; 1044 if (!size) 1045 type = MTRR_NUM_TYPES; 1046 num[type]++; 1047 } 1048 1049 /* no entry for WB? */ 1050 if (!num[MTRR_TYPE_WRBACK]) 1051 return 0; 1052 1053 /* check if we only had WB and UC */ 1054 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != 1055 num_var_ranges - num[MTRR_NUM_TYPES]) 1056 return 0; 1057 1058 memset(range, 0, sizeof(range)); 1059 nr_range = 0; 1060 if (mtrr_tom2) { 1061 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); 1062 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; 1063 if (highest_pfn < range[nr_range].end + 1) 1064 highest_pfn = range[nr_range].end + 1; 1065 nr_range++; 1066 } 1067 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); 1068 1069 total_trim_size = 0; 1070 /* check the head */ 1071 if (range[0].start) 1072 total_trim_size += real_trim_memory(0, range[0].start); 1073 /* check the holes */ 1074 for (i = 0; i < nr_range - 1; i++) { 1075 if (range[i].end + 1 < range[i+1].start) 1076 total_trim_size += real_trim_memory(range[i].end + 1, 1077 range[i+1].start); 1078 } 1079 /* check the top */ 1080 i = nr_range - 1; 1081 if (range[i].end + 1 < end_pfn) 1082 total_trim_size += real_trim_memory(range[i].end + 1, 1083 end_pfn); 1084 1085 if (total_trim_size) { 1086 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" 1087 " all of memory, losing %lluMB of RAM.\n", 1088 total_trim_size >> 20); 1089 1090 if (!changed_by_mtrr_cleanup) 1091 WARN_ON(1); 1092 1093 printk(KERN_INFO "update e820 for mtrr\n"); 1094 update_e820(); 1095 1096 return 1; 1097 } 1098 1099 return 0; 1100 } 1101 1102