1 /* 2 * MTRR (Memory Type Range Register) cleanup 3 * 4 * Copyright (C) 2009 Yinghai Lu 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public 17 * License along with this library; if not, write to the Free 18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 19 */ 20 #include <linux/module.h> 21 #include <linux/init.h> 22 #include <linux/pci.h> 23 #include <linux/smp.h> 24 #include <linux/cpu.h> 25 #include <linux/sort.h> 26 #include <linux/mutex.h> 27 #include <linux/uaccess.h> 28 #include <linux/kvm_para.h> 29 30 #include <asm/processor.h> 31 #include <asm/e820.h> 32 #include <asm/mtrr.h> 33 #include <asm/msr.h> 34 35 #include "mtrr.h" 36 37 struct res_range { 38 unsigned long start; 39 unsigned long end; 40 }; 41 42 struct var_mtrr_range_state { 43 unsigned long base_pfn; 44 unsigned long size_pfn; 45 mtrr_type type; 46 }; 47 48 struct var_mtrr_state { 49 unsigned long range_startk; 50 unsigned long range_sizek; 51 unsigned long chunk_sizek; 52 unsigned long gran_sizek; 53 unsigned int reg; 54 }; 55 56 /* Should be related to MTRR_VAR_RANGES nums */ 57 #define RANGE_NUM 256 58 59 static struct res_range __initdata range[RANGE_NUM]; 60 static int __initdata nr_range; 61 62 static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; 63 64 static int __initdata debug_print; 65 #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) 66 67 68 static int __init 69 add_range(struct res_range *range, int nr_range, 70 unsigned long start, unsigned long end) 71 { 72 /* Out of slots: */ 73 if (nr_range >= RANGE_NUM) 74 return nr_range; 75 76 range[nr_range].start = start; 77 range[nr_range].end = end; 78 79 nr_range++; 80 81 return nr_range; 82 } 83 84 static int __init 85 add_range_with_merge(struct res_range *range, int nr_range, 86 unsigned long start, unsigned long end) 87 { 88 int i; 89 90 /* Try to merge it with old one: */ 91 for (i = 0; i < nr_range; i++) { 92 unsigned long final_start, final_end; 93 unsigned long common_start, common_end; 94 95 if (!range[i].end) 96 continue; 97 98 common_start = max(range[i].start, start); 99 common_end = min(range[i].end, end); 100 if (common_start > common_end + 1) 101 continue; 102 103 final_start = min(range[i].start, start); 104 final_end = max(range[i].end, end); 105 106 range[i].start = final_start; 107 range[i].end = final_end; 108 return nr_range; 109 } 110 111 /* Need to add it: */ 112 return add_range(range, nr_range, start, end); 113 } 114 115 static void __init 116 subtract_range(struct res_range *range, unsigned long start, unsigned long end) 117 { 118 int i, j; 119 120 for (j = 0; j < RANGE_NUM; j++) { 121 if (!range[j].end) 122 continue; 123 124 if (start <= range[j].start && end >= range[j].end) { 125 range[j].start = 0; 126 range[j].end = 0; 127 continue; 128 } 129 130 if (start <= range[j].start && end < range[j].end && 131 range[j].start < end + 1) { 132 range[j].start = end + 1; 133 continue; 134 } 135 136 137 if (start > range[j].start && end >= range[j].end && 138 range[j].end > start - 1) { 139 range[j].end = start - 1; 140 continue; 141 } 142 143 if (start > range[j].start && end < range[j].end) { 144 /* Find the new spare: */ 145 for (i = 0; i < RANGE_NUM; i++) { 146 if (range[i].end == 0) 147 break; 148 } 149 if (i < RANGE_NUM) { 150 range[i].end = range[j].end; 151 range[i].start = end + 1; 152 } else { 153 printk(KERN_ERR "run of slot in ranges\n"); 154 } 155 range[j].end = start - 1; 156 continue; 157 } 158 } 159 } 160 161 static int __init cmp_range(const void *x1, const void *x2) 162 { 163 const struct res_range *r1 = x1; 164 const struct res_range *r2 = x2; 165 long start1, start2; 166 167 start1 = r1->start; 168 start2 = r2->start; 169 170 return start1 - start2; 171 } 172 173 #define BIOS_BUG_MSG KERN_WARNING \ 174 "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" 175 176 static int __init 177 x86_get_mtrr_mem_range(struct res_range *range, int nr_range, 178 unsigned long extra_remove_base, 179 unsigned long extra_remove_size) 180 { 181 unsigned long base, size; 182 mtrr_type type; 183 int i; 184 185 for (i = 0; i < num_var_ranges; i++) { 186 type = range_state[i].type; 187 if (type != MTRR_TYPE_WRBACK) 188 continue; 189 base = range_state[i].base_pfn; 190 size = range_state[i].size_pfn; 191 nr_range = add_range_with_merge(range, nr_range, base, 192 base + size - 1); 193 } 194 if (debug_print) { 195 printk(KERN_DEBUG "After WB checking\n"); 196 for (i = 0; i < nr_range; i++) 197 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 198 range[i].start, range[i].end + 1); 199 } 200 201 /* Take out UC ranges: */ 202 for (i = 0; i < num_var_ranges; i++) { 203 type = range_state[i].type; 204 if (type != MTRR_TYPE_UNCACHABLE && 205 type != MTRR_TYPE_WRPROT) 206 continue; 207 size = range_state[i].size_pfn; 208 if (!size) 209 continue; 210 base = range_state[i].base_pfn; 211 if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && 212 (mtrr_state.enabled & 1)) { 213 /* Var MTRR contains UC entry below 1M? Skip it: */ 214 printk(BIOS_BUG_MSG, i); 215 if (base + size <= (1<<(20-PAGE_SHIFT))) 216 continue; 217 size -= (1<<(20-PAGE_SHIFT)) - base; 218 base = 1<<(20-PAGE_SHIFT); 219 } 220 subtract_range(range, base, base + size - 1); 221 } 222 if (extra_remove_size) 223 subtract_range(range, extra_remove_base, 224 extra_remove_base + extra_remove_size - 1); 225 226 /* get new range num */ 227 nr_range = 0; 228 for (i = 0; i < RANGE_NUM; i++) { 229 if (!range[i].end) 230 continue; 231 nr_range++; 232 } 233 if (debug_print) { 234 printk(KERN_DEBUG "After UC checking\n"); 235 for (i = 0; i < nr_range; i++) 236 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 237 range[i].start, range[i].end + 1); 238 } 239 240 /* sort the ranges */ 241 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 242 if (debug_print) { 243 printk(KERN_DEBUG "After sorting\n"); 244 for (i = 0; i < nr_range; i++) 245 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 246 range[i].start, range[i].end + 1); 247 } 248 249 /* clear those is not used */ 250 for (i = nr_range; i < RANGE_NUM; i++) 251 memset(&range[i], 0, sizeof(range[i])); 252 253 return nr_range; 254 } 255 256 #ifdef CONFIG_MTRR_SANITIZER 257 258 static unsigned long __init sum_ranges(struct res_range *range, int nr_range) 259 { 260 unsigned long sum = 0; 261 int i; 262 263 for (i = 0; i < nr_range; i++) 264 sum += range[i].end + 1 - range[i].start; 265 266 return sum; 267 } 268 269 static int enable_mtrr_cleanup __initdata = 270 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; 271 272 static int __init disable_mtrr_cleanup_setup(char *str) 273 { 274 enable_mtrr_cleanup = 0; 275 return 0; 276 } 277 early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); 278 279 static int __init enable_mtrr_cleanup_setup(char *str) 280 { 281 enable_mtrr_cleanup = 1; 282 return 0; 283 } 284 early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); 285 286 static int __init mtrr_cleanup_debug_setup(char *str) 287 { 288 debug_print = 1; 289 return 0; 290 } 291 early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); 292 293 static void __init 294 set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, 295 unsigned char type, unsigned int address_bits) 296 { 297 u32 base_lo, base_hi, mask_lo, mask_hi; 298 u64 base, mask; 299 300 if (!sizek) { 301 fill_mtrr_var_range(reg, 0, 0, 0, 0); 302 return; 303 } 304 305 mask = (1ULL << address_bits) - 1; 306 mask &= ~((((u64)sizek) << 10) - 1); 307 308 base = ((u64)basek) << 10; 309 310 base |= type; 311 mask |= 0x800; 312 313 base_lo = base & ((1ULL<<32) - 1); 314 base_hi = base >> 32; 315 316 mask_lo = mask & ((1ULL<<32) - 1); 317 mask_hi = mask >> 32; 318 319 fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); 320 } 321 322 static void __init 323 save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, 324 unsigned char type) 325 { 326 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); 327 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); 328 range_state[reg].type = type; 329 } 330 331 static void __init set_var_mtrr_all(unsigned int address_bits) 332 { 333 unsigned long basek, sizek; 334 unsigned char type; 335 unsigned int reg; 336 337 for (reg = 0; reg < num_var_ranges; reg++) { 338 basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10); 339 sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); 340 type = range_state[reg].type; 341 342 set_var_mtrr(reg, basek, sizek, type, address_bits); 343 } 344 } 345 346 static unsigned long to_size_factor(unsigned long sizek, char *factorp) 347 { 348 unsigned long base = sizek; 349 char factor; 350 351 if (base & ((1<<10) - 1)) { 352 /* Not MB-aligned: */ 353 factor = 'K'; 354 } else if (base & ((1<<20) - 1)) { 355 factor = 'M'; 356 base >>= 10; 357 } else { 358 factor = 'G'; 359 base >>= 20; 360 } 361 362 *factorp = factor; 363 364 return base; 365 } 366 367 static unsigned int __init 368 range_to_mtrr(unsigned int reg, unsigned long range_startk, 369 unsigned long range_sizek, unsigned char type) 370 { 371 if (!range_sizek || (reg >= num_var_ranges)) 372 return reg; 373 374 while (range_sizek) { 375 unsigned long max_align, align; 376 unsigned long sizek; 377 378 /* Compute the maximum size with which we can make a range: */ 379 if (range_startk) 380 max_align = ffs(range_startk) - 1; 381 else 382 max_align = 32; 383 384 align = fls(range_sizek) - 1; 385 if (align > max_align) 386 align = max_align; 387 388 sizek = 1 << align; 389 if (debug_print) { 390 char start_factor = 'K', size_factor = 'K'; 391 unsigned long start_base, size_base; 392 393 start_base = to_size_factor(range_startk, &start_factor); 394 size_base = to_size_factor(sizek, &size_factor); 395 396 Dprintk("Setting variable MTRR %d, " 397 "base: %ld%cB, range: %ld%cB, type %s\n", 398 reg, start_base, start_factor, 399 size_base, size_factor, 400 (type == MTRR_TYPE_UNCACHABLE) ? "UC" : 401 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other") 402 ); 403 } 404 save_var_mtrr(reg++, range_startk, sizek, type); 405 range_startk += sizek; 406 range_sizek -= sizek; 407 if (reg >= num_var_ranges) 408 break; 409 } 410 return reg; 411 } 412 413 static unsigned __init 414 range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, 415 unsigned long sizek) 416 { 417 unsigned long hole_basek, hole_sizek; 418 unsigned long second_basek, second_sizek; 419 unsigned long range0_basek, range0_sizek; 420 unsigned long range_basek, range_sizek; 421 unsigned long chunk_sizek; 422 unsigned long gran_sizek; 423 424 hole_basek = 0; 425 hole_sizek = 0; 426 second_basek = 0; 427 second_sizek = 0; 428 chunk_sizek = state->chunk_sizek; 429 gran_sizek = state->gran_sizek; 430 431 /* Align with gran size, prevent small block used up MTRRs: */ 432 range_basek = ALIGN(state->range_startk, gran_sizek); 433 if ((range_basek > basek) && basek) 434 return second_sizek; 435 436 state->range_sizek -= (range_basek - state->range_startk); 437 range_sizek = ALIGN(state->range_sizek, gran_sizek); 438 439 while (range_sizek > state->range_sizek) { 440 range_sizek -= gran_sizek; 441 if (!range_sizek) 442 return 0; 443 } 444 state->range_sizek = range_sizek; 445 446 /* Try to append some small hole: */ 447 range0_basek = state->range_startk; 448 range0_sizek = ALIGN(state->range_sizek, chunk_sizek); 449 450 /* No increase: */ 451 if (range0_sizek == state->range_sizek) { 452 Dprintk("rangeX: %016lx - %016lx\n", 453 range0_basek<<10, 454 (range0_basek + state->range_sizek)<<10); 455 state->reg = range_to_mtrr(state->reg, range0_basek, 456 state->range_sizek, MTRR_TYPE_WRBACK); 457 return 0; 458 } 459 460 /* Only cut back when it is not the last: */ 461 if (sizek) { 462 while (range0_basek + range0_sizek > (basek + sizek)) { 463 if (range0_sizek >= chunk_sizek) 464 range0_sizek -= chunk_sizek; 465 else 466 range0_sizek = 0; 467 468 if (!range0_sizek) 469 break; 470 } 471 } 472 473 second_try: 474 range_basek = range0_basek + range0_sizek; 475 476 /* One hole in the middle: */ 477 if (range_basek > basek && range_basek <= (basek + sizek)) 478 second_sizek = range_basek - basek; 479 480 if (range0_sizek > state->range_sizek) { 481 482 /* One hole in middle or at the end: */ 483 hole_sizek = range0_sizek - state->range_sizek - second_sizek; 484 485 /* Hole size should be less than half of range0 size: */ 486 if (hole_sizek >= (range0_sizek >> 1) && 487 range0_sizek >= chunk_sizek) { 488 range0_sizek -= chunk_sizek; 489 second_sizek = 0; 490 hole_sizek = 0; 491 492 goto second_try; 493 } 494 } 495 496 if (range0_sizek) { 497 Dprintk("range0: %016lx - %016lx\n", 498 range0_basek<<10, 499 (range0_basek + range0_sizek)<<10); 500 state->reg = range_to_mtrr(state->reg, range0_basek, 501 range0_sizek, MTRR_TYPE_WRBACK); 502 } 503 504 if (range0_sizek < state->range_sizek) { 505 /* Need to handle left over range: */ 506 range_sizek = state->range_sizek - range0_sizek; 507 508 Dprintk("range: %016lx - %016lx\n", 509 range_basek<<10, 510 (range_basek + range_sizek)<<10); 511 512 state->reg = range_to_mtrr(state->reg, range_basek, 513 range_sizek, MTRR_TYPE_WRBACK); 514 } 515 516 if (hole_sizek) { 517 hole_basek = range_basek - hole_sizek - second_sizek; 518 Dprintk("hole: %016lx - %016lx\n", 519 hole_basek<<10, 520 (hole_basek + hole_sizek)<<10); 521 state->reg = range_to_mtrr(state->reg, hole_basek, 522 hole_sizek, MTRR_TYPE_UNCACHABLE); 523 } 524 525 return second_sizek; 526 } 527 528 static void __init 529 set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, 530 unsigned long size_pfn) 531 { 532 unsigned long basek, sizek; 533 unsigned long second_sizek = 0; 534 535 if (state->reg >= num_var_ranges) 536 return; 537 538 basek = base_pfn << (PAGE_SHIFT - 10); 539 sizek = size_pfn << (PAGE_SHIFT - 10); 540 541 /* See if I can merge with the last range: */ 542 if ((basek <= 1024) || 543 (state->range_startk + state->range_sizek == basek)) { 544 unsigned long endk = basek + sizek; 545 state->range_sizek = endk - state->range_startk; 546 return; 547 } 548 /* Write the range mtrrs: */ 549 if (state->range_sizek != 0) 550 second_sizek = range_to_mtrr_with_hole(state, basek, sizek); 551 552 /* Allocate an msr: */ 553 state->range_startk = basek + second_sizek; 554 state->range_sizek = sizek - second_sizek; 555 } 556 557 /* Mininum size of mtrr block that can take hole: */ 558 static u64 mtrr_chunk_size __initdata = (256ULL<<20); 559 560 static int __init parse_mtrr_chunk_size_opt(char *p) 561 { 562 if (!p) 563 return -EINVAL; 564 mtrr_chunk_size = memparse(p, &p); 565 return 0; 566 } 567 early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); 568 569 /* Granularity of mtrr of block: */ 570 static u64 mtrr_gran_size __initdata; 571 572 static int __init parse_mtrr_gran_size_opt(char *p) 573 { 574 if (!p) 575 return -EINVAL; 576 mtrr_gran_size = memparse(p, &p); 577 return 0; 578 } 579 early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); 580 581 static unsigned long nr_mtrr_spare_reg __initdata = 582 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; 583 584 static int __init parse_mtrr_spare_reg(char *arg) 585 { 586 if (arg) 587 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); 588 return 0; 589 } 590 early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); 591 592 static int __init 593 x86_setup_var_mtrrs(struct res_range *range, int nr_range, 594 u64 chunk_size, u64 gran_size) 595 { 596 struct var_mtrr_state var_state; 597 int num_reg; 598 int i; 599 600 var_state.range_startk = 0; 601 var_state.range_sizek = 0; 602 var_state.reg = 0; 603 var_state.chunk_sizek = chunk_size >> 10; 604 var_state.gran_sizek = gran_size >> 10; 605 606 memset(range_state, 0, sizeof(range_state)); 607 608 /* Write the range: */ 609 for (i = 0; i < nr_range; i++) { 610 set_var_mtrr_range(&var_state, range[i].start, 611 range[i].end - range[i].start + 1); 612 } 613 614 /* Write the last range: */ 615 if (var_state.range_sizek != 0) 616 range_to_mtrr_with_hole(&var_state, 0, 0); 617 618 num_reg = var_state.reg; 619 /* Clear out the extra MTRR's: */ 620 while (var_state.reg < num_var_ranges) { 621 save_var_mtrr(var_state.reg, 0, 0, 0); 622 var_state.reg++; 623 } 624 625 return num_reg; 626 } 627 628 struct mtrr_cleanup_result { 629 unsigned long gran_sizek; 630 unsigned long chunk_sizek; 631 unsigned long lose_cover_sizek; 632 unsigned int num_reg; 633 int bad; 634 }; 635 636 /* 637 * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G 638 * chunk size: gran_size, ..., 2G 639 * so we need (1+16)*8 640 */ 641 #define NUM_RESULT 136 642 #define PSHIFT (PAGE_SHIFT - 10) 643 644 static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; 645 static unsigned long __initdata min_loss_pfn[RANGE_NUM]; 646 647 static void __init print_out_mtrr_range_state(void) 648 { 649 char start_factor = 'K', size_factor = 'K'; 650 unsigned long start_base, size_base; 651 mtrr_type type; 652 int i; 653 654 for (i = 0; i < num_var_ranges; i++) { 655 656 size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); 657 if (!size_base) 658 continue; 659 660 size_base = to_size_factor(size_base, &size_factor), 661 start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); 662 start_base = to_size_factor(start_base, &start_factor), 663 type = range_state[i].type; 664 665 printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", 666 i, start_base, start_factor, 667 size_base, size_factor, 668 (type == MTRR_TYPE_UNCACHABLE) ? "UC" : 669 ((type == MTRR_TYPE_WRPROT) ? "WP" : 670 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) 671 ); 672 } 673 } 674 675 static int __init mtrr_need_cleanup(void) 676 { 677 int i; 678 mtrr_type type; 679 unsigned long size; 680 /* Extra one for all 0: */ 681 int num[MTRR_NUM_TYPES + 1]; 682 683 /* Check entries number: */ 684 memset(num, 0, sizeof(num)); 685 for (i = 0; i < num_var_ranges; i++) { 686 type = range_state[i].type; 687 size = range_state[i].size_pfn; 688 if (type >= MTRR_NUM_TYPES) 689 continue; 690 if (!size) 691 type = MTRR_NUM_TYPES; 692 if (type == MTRR_TYPE_WRPROT) 693 type = MTRR_TYPE_UNCACHABLE; 694 num[type]++; 695 } 696 697 /* Check if we got UC entries: */ 698 if (!num[MTRR_TYPE_UNCACHABLE]) 699 return 0; 700 701 /* Check if we only had WB and UC */ 702 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != 703 num_var_ranges - num[MTRR_NUM_TYPES]) 704 return 0; 705 706 return 1; 707 } 708 709 static unsigned long __initdata range_sums; 710 711 static void __init 712 mtrr_calc_range_state(u64 chunk_size, u64 gran_size, 713 unsigned long x_remove_base, 714 unsigned long x_remove_size, int i) 715 { 716 static struct res_range range_new[RANGE_NUM]; 717 unsigned long range_sums_new; 718 static int nr_range_new; 719 int num_reg; 720 721 /* Convert ranges to var ranges state: */ 722 num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); 723 724 /* We got new setting in range_state, check it: */ 725 memset(range_new, 0, sizeof(range_new)); 726 nr_range_new = x86_get_mtrr_mem_range(range_new, 0, 727 x_remove_base, x_remove_size); 728 range_sums_new = sum_ranges(range_new, nr_range_new); 729 730 result[i].chunk_sizek = chunk_size >> 10; 731 result[i].gran_sizek = gran_size >> 10; 732 result[i].num_reg = num_reg; 733 734 if (range_sums < range_sums_new) { 735 result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT; 736 result[i].bad = 1; 737 } else { 738 result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT; 739 } 740 741 /* Double check it: */ 742 if (!result[i].bad && !result[i].lose_cover_sizek) { 743 if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range))) 744 result[i].bad = 1; 745 } 746 747 if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg])) 748 min_loss_pfn[num_reg] = range_sums - range_sums_new; 749 } 750 751 static void __init mtrr_print_out_one_result(int i) 752 { 753 unsigned long gran_base, chunk_base, lose_base; 754 char gran_factor, chunk_factor, lose_factor; 755 756 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), 757 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), 758 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), 759 760 pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", 761 result[i].bad ? "*BAD*" : " ", 762 gran_base, gran_factor, chunk_base, chunk_factor); 763 pr_cont("num_reg: %d \tlose cover RAM: %s%ld%c\n", 764 result[i].num_reg, result[i].bad ? "-" : "", 765 lose_base, lose_factor); 766 } 767 768 static int __init mtrr_search_optimal_index(void) 769 { 770 int num_reg_good; 771 int index_good; 772 int i; 773 774 if (nr_mtrr_spare_reg >= num_var_ranges) 775 nr_mtrr_spare_reg = num_var_ranges - 1; 776 777 num_reg_good = -1; 778 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { 779 if (!min_loss_pfn[i]) 780 num_reg_good = i; 781 } 782 783 index_good = -1; 784 if (num_reg_good != -1) { 785 for (i = 0; i < NUM_RESULT; i++) { 786 if (!result[i].bad && 787 result[i].num_reg == num_reg_good && 788 !result[i].lose_cover_sizek) { 789 index_good = i; 790 break; 791 } 792 } 793 } 794 795 return index_good; 796 } 797 798 int __init mtrr_cleanup(unsigned address_bits) 799 { 800 unsigned long x_remove_base, x_remove_size; 801 unsigned long base, size, def, dummy; 802 u64 chunk_size, gran_size; 803 mtrr_type type; 804 int index_good; 805 int i; 806 807 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) 808 return 0; 809 810 rdmsr(MSR_MTRRdefType, def, dummy); 811 def &= 0xff; 812 if (def != MTRR_TYPE_UNCACHABLE) 813 return 0; 814 815 /* Get it and store it aside: */ 816 memset(range_state, 0, sizeof(range_state)); 817 for (i = 0; i < num_var_ranges; i++) { 818 mtrr_if->get(i, &base, &size, &type); 819 range_state[i].base_pfn = base; 820 range_state[i].size_pfn = size; 821 range_state[i].type = type; 822 } 823 824 /* Check if we need handle it and can handle it: */ 825 if (!mtrr_need_cleanup()) 826 return 0; 827 828 /* Print original var MTRRs at first, for debugging: */ 829 printk(KERN_DEBUG "original variable MTRRs\n"); 830 print_out_mtrr_range_state(); 831 832 memset(range, 0, sizeof(range)); 833 x_remove_size = 0; 834 x_remove_base = 1 << (32 - PAGE_SHIFT); 835 if (mtrr_tom2) 836 x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base; 837 838 nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size); 839 /* 840 * [0, 1M) should always be covered by var mtrr with WB 841 * and fixed mtrrs should take effect before var mtrr for it: 842 */ 843 nr_range = add_range_with_merge(range, nr_range, 0, 844 (1ULL<<(20 - PAGE_SHIFT)) - 1); 845 /* Sort the ranges: */ 846 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 847 848 range_sums = sum_ranges(range, nr_range); 849 printk(KERN_INFO "total RAM coverred: %ldM\n", 850 range_sums >> (20 - PAGE_SHIFT)); 851 852 if (mtrr_chunk_size && mtrr_gran_size) { 853 i = 0; 854 mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, 855 x_remove_base, x_remove_size, i); 856 857 mtrr_print_out_one_result(i); 858 859 if (!result[i].bad) { 860 set_var_mtrr_all(address_bits); 861 printk(KERN_DEBUG "New variable MTRRs\n"); 862 print_out_mtrr_range_state(); 863 return 1; 864 } 865 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " 866 "will find optimal one\n"); 867 } 868 869 i = 0; 870 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); 871 memset(result, 0, sizeof(result)); 872 for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { 873 874 for (chunk_size = gran_size; chunk_size < (1ULL<<32); 875 chunk_size <<= 1) { 876 877 if (i >= NUM_RESULT) 878 continue; 879 880 mtrr_calc_range_state(chunk_size, gran_size, 881 x_remove_base, x_remove_size, i); 882 if (debug_print) { 883 mtrr_print_out_one_result(i); 884 printk(KERN_INFO "\n"); 885 } 886 887 i++; 888 } 889 } 890 891 /* Try to find the optimal index: */ 892 index_good = mtrr_search_optimal_index(); 893 894 if (index_good != -1) { 895 printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); 896 i = index_good; 897 mtrr_print_out_one_result(i); 898 899 /* Convert ranges to var ranges state: */ 900 chunk_size = result[i].chunk_sizek; 901 chunk_size <<= 10; 902 gran_size = result[i].gran_sizek; 903 gran_size <<= 10; 904 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); 905 set_var_mtrr_all(address_bits); 906 printk(KERN_DEBUG "New variable MTRRs\n"); 907 print_out_mtrr_range_state(); 908 return 1; 909 } else { 910 /* print out all */ 911 for (i = 0; i < NUM_RESULT; i++) 912 mtrr_print_out_one_result(i); 913 } 914 915 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); 916 printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); 917 918 return 0; 919 } 920 #else 921 int __init mtrr_cleanup(unsigned address_bits) 922 { 923 return 0; 924 } 925 #endif 926 927 static int disable_mtrr_trim; 928 929 static int __init disable_mtrr_trim_setup(char *str) 930 { 931 disable_mtrr_trim = 1; 932 return 0; 933 } 934 early_param("disable_mtrr_trim", disable_mtrr_trim_setup); 935 936 /* 937 * Newer AMD K8s and later CPUs have a special magic MSR way to force WB 938 * for memory >4GB. Check for that here. 939 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't 940 * apply to are wrong, but so far we don't know of any such case in the wild. 941 */ 942 #define Tom2Enabled (1U << 21) 943 #define Tom2ForceMemTypeWB (1U << 22) 944 945 int __init amd_special_default_mtrr(void) 946 { 947 u32 l, h; 948 949 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 950 return 0; 951 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) 952 return 0; 953 /* In case some hypervisor doesn't pass SYSCFG through: */ 954 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) 955 return 0; 956 /* 957 * Memory between 4GB and top of mem is forced WB by this magic bit. 958 * Reserved before K8RevF, but should be zero there. 959 */ 960 if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) == 961 (Tom2Enabled | Tom2ForceMemTypeWB)) 962 return 1; 963 return 0; 964 } 965 966 static u64 __init 967 real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn) 968 { 969 u64 trim_start, trim_size; 970 971 trim_start = start_pfn; 972 trim_start <<= PAGE_SHIFT; 973 974 trim_size = limit_pfn; 975 trim_size <<= PAGE_SHIFT; 976 trim_size -= trim_start; 977 978 return e820_update_range(trim_start, trim_size, E820_RAM, E820_RESERVED); 979 } 980 981 /** 982 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs 983 * @end_pfn: ending page frame number 984 * 985 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain 986 * memory configurations. This routine checks that the highest MTRR matches 987 * the end of memory, to make sure the MTRRs having a write back type cover 988 * all of the memory the kernel is intending to use. If not, it'll trim any 989 * memory off the end by adjusting end_pfn, removing it from the kernel's 990 * allocation pools, warning the user with an obnoxious message. 991 */ 992 int __init mtrr_trim_uncached_memory(unsigned long end_pfn) 993 { 994 unsigned long i, base, size, highest_pfn = 0, def, dummy; 995 mtrr_type type; 996 u64 total_trim_size; 997 /* extra one for all 0 */ 998 int num[MTRR_NUM_TYPES + 1]; 999 1000 /* 1001 * Make sure we only trim uncachable memory on machines that 1002 * support the Intel MTRR architecture: 1003 */ 1004 if (!is_cpu(INTEL) || disable_mtrr_trim) 1005 return 0; 1006 1007 rdmsr(MSR_MTRRdefType, def, dummy); 1008 def &= 0xff; 1009 if (def != MTRR_TYPE_UNCACHABLE) 1010 return 0; 1011 1012 /* Get it and store it aside: */ 1013 memset(range_state, 0, sizeof(range_state)); 1014 for (i = 0; i < num_var_ranges; i++) { 1015 mtrr_if->get(i, &base, &size, &type); 1016 range_state[i].base_pfn = base; 1017 range_state[i].size_pfn = size; 1018 range_state[i].type = type; 1019 } 1020 1021 /* Find highest cached pfn: */ 1022 for (i = 0; i < num_var_ranges; i++) { 1023 type = range_state[i].type; 1024 if (type != MTRR_TYPE_WRBACK) 1025 continue; 1026 base = range_state[i].base_pfn; 1027 size = range_state[i].size_pfn; 1028 if (highest_pfn < base + size) 1029 highest_pfn = base + size; 1030 } 1031 1032 /* kvm/qemu doesn't have mtrr set right, don't trim them all: */ 1033 if (!highest_pfn) { 1034 printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); 1035 return 0; 1036 } 1037 1038 /* Check entries number: */ 1039 memset(num, 0, sizeof(num)); 1040 for (i = 0; i < num_var_ranges; i++) { 1041 type = range_state[i].type; 1042 if (type >= MTRR_NUM_TYPES) 1043 continue; 1044 size = range_state[i].size_pfn; 1045 if (!size) 1046 type = MTRR_NUM_TYPES; 1047 num[type]++; 1048 } 1049 1050 /* No entry for WB? */ 1051 if (!num[MTRR_TYPE_WRBACK]) 1052 return 0; 1053 1054 /* Check if we only had WB and UC: */ 1055 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != 1056 num_var_ranges - num[MTRR_NUM_TYPES]) 1057 return 0; 1058 1059 memset(range, 0, sizeof(range)); 1060 nr_range = 0; 1061 if (mtrr_tom2) { 1062 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); 1063 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; 1064 if (highest_pfn < range[nr_range].end + 1) 1065 highest_pfn = range[nr_range].end + 1; 1066 nr_range++; 1067 } 1068 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); 1069 1070 /* Check the head: */ 1071 total_trim_size = 0; 1072 if (range[0].start) 1073 total_trim_size += real_trim_memory(0, range[0].start); 1074 1075 /* Check the holes: */ 1076 for (i = 0; i < nr_range - 1; i++) { 1077 if (range[i].end + 1 < range[i+1].start) 1078 total_trim_size += real_trim_memory(range[i].end + 1, 1079 range[i+1].start); 1080 } 1081 1082 /* Check the top: */ 1083 i = nr_range - 1; 1084 if (range[i].end + 1 < end_pfn) 1085 total_trim_size += real_trim_memory(range[i].end + 1, 1086 end_pfn); 1087 1088 if (total_trim_size) { 1089 pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20); 1090 1091 if (!changed_by_mtrr_cleanup) 1092 WARN_ON(1); 1093 1094 pr_info("update e820 for mtrr\n"); 1095 update_e820(); 1096 1097 return 1; 1098 } 1099 1100 return 0; 1101 } 1102