1 /* 2 * MTRR (Memory Type Range Register) cleanup 3 * 4 * Copyright (C) 2009 Yinghai Lu 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public 17 * License along with this library; if not, write to the Free 18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 19 */ 20 #include <linux/module.h> 21 #include <linux/init.h> 22 #include <linux/pci.h> 23 #include <linux/smp.h> 24 #include <linux/cpu.h> 25 #include <linux/sort.h> 26 #include <linux/mutex.h> 27 #include <linux/uaccess.h> 28 #include <linux/kvm_para.h> 29 30 #include <asm/processor.h> 31 #include <asm/e820.h> 32 #include <asm/mtrr.h> 33 #include <asm/msr.h> 34 35 #include "mtrr.h" 36 37 struct res_range { 38 unsigned long start; 39 unsigned long end; 40 }; 41 42 struct var_mtrr_range_state { 43 unsigned long base_pfn; 44 unsigned long size_pfn; 45 mtrr_type type; 46 }; 47 48 struct var_mtrr_state { 49 unsigned long range_startk; 50 unsigned long range_sizek; 51 unsigned long chunk_sizek; 52 unsigned long gran_sizek; 53 unsigned int reg; 54 }; 55 56 /* Should be related to MTRR_VAR_RANGES nums */ 57 #define RANGE_NUM 256 58 59 static struct res_range __initdata range[RANGE_NUM]; 60 static int __initdata nr_range; 61 62 static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; 63 64 static int __initdata debug_print; 65 #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) 66 67 68 static int __init 69 add_range(struct res_range *range, int nr_range, 70 unsigned long start, unsigned long end) 71 { 72 /* Out of slots: */ 73 if (nr_range >= RANGE_NUM) 74 return nr_range; 75 76 range[nr_range].start = start; 77 range[nr_range].end = end; 78 79 nr_range++; 80 81 return nr_range; 82 } 83 84 static int __init 85 add_range_with_merge(struct res_range *range, int nr_range, 86 unsigned long start, unsigned long end) 87 { 88 int i; 89 90 /* Try to merge it with old one: */ 91 for (i = 0; i < nr_range; i++) { 92 unsigned long final_start, final_end; 93 unsigned long common_start, common_end; 94 95 if (!range[i].end) 96 continue; 97 98 common_start = max(range[i].start, start); 99 common_end = min(range[i].end, end); 100 if (common_start > common_end + 1) 101 continue; 102 103 final_start = min(range[i].start, start); 104 final_end = max(range[i].end, end); 105 106 range[i].start = final_start; 107 range[i].end = final_end; 108 return nr_range; 109 } 110 111 /* Need to add it: */ 112 return add_range(range, nr_range, start, end); 113 } 114 115 static void __init 116 subtract_range(struct res_range *range, unsigned long start, unsigned long end) 117 { 118 int i, j; 119 120 for (j = 0; j < RANGE_NUM; j++) { 121 if (!range[j].end) 122 continue; 123 124 if (start <= range[j].start && end >= range[j].end) { 125 range[j].start = 0; 126 range[j].end = 0; 127 continue; 128 } 129 130 if (start <= range[j].start && end < range[j].end && 131 range[j].start < end + 1) { 132 range[j].start = end + 1; 133 continue; 134 } 135 136 137 if (start > range[j].start && end >= range[j].end && 138 range[j].end > start - 1) { 139 range[j].end = start - 1; 140 continue; 141 } 142 143 if (start > range[j].start && end < range[j].end) { 144 /* Find the new spare: */ 145 for (i = 0; i < RANGE_NUM; i++) { 146 if (range[i].end == 0) 147 break; 148 } 149 if (i < RANGE_NUM) { 150 range[i].end = range[j].end; 151 range[i].start = end + 1; 152 } else { 153 printk(KERN_ERR "run of slot in ranges\n"); 154 } 155 range[j].end = start - 1; 156 continue; 157 } 158 } 159 } 160 161 static int __init cmp_range(const void *x1, const void *x2) 162 { 163 const struct res_range *r1 = x1; 164 const struct res_range *r2 = x2; 165 long start1, start2; 166 167 start1 = r1->start; 168 start2 = r2->start; 169 170 return start1 - start2; 171 } 172 173 static int __init clean_sort_range(struct res_range *range, int az) 174 { 175 int i, j, k = az - 1, nr_range = 0; 176 177 for (i = 0; i < k; i++) { 178 if (range[i].end) 179 continue; 180 for (j = k; j > i; j--) { 181 if (range[j].end) { 182 k = j; 183 break; 184 } 185 } 186 if (j == i) 187 break; 188 range[i].start = range[k].start; 189 range[i].end = range[k].end; 190 range[k].start = 0; 191 range[k].end = 0; 192 k--; 193 } 194 /* count it */ 195 for (i = 0; i < az; i++) { 196 if (!range[i].end) { 197 nr_range = i; 198 break; 199 } 200 } 201 202 /* sort them */ 203 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 204 205 return nr_range; 206 } 207 208 #define BIOS_BUG_MSG KERN_WARNING \ 209 "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" 210 211 static int __init 212 x86_get_mtrr_mem_range(struct res_range *range, int nr_range, 213 unsigned long extra_remove_base, 214 unsigned long extra_remove_size) 215 { 216 unsigned long base, size; 217 mtrr_type type; 218 int i; 219 220 for (i = 0; i < num_var_ranges; i++) { 221 type = range_state[i].type; 222 if (type != MTRR_TYPE_WRBACK) 223 continue; 224 base = range_state[i].base_pfn; 225 size = range_state[i].size_pfn; 226 nr_range = add_range_with_merge(range, nr_range, base, 227 base + size - 1); 228 } 229 if (debug_print) { 230 printk(KERN_DEBUG "After WB checking\n"); 231 for (i = 0; i < nr_range; i++) 232 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 233 range[i].start, range[i].end + 1); 234 } 235 236 /* Take out UC ranges: */ 237 for (i = 0; i < num_var_ranges; i++) { 238 type = range_state[i].type; 239 if (type != MTRR_TYPE_UNCACHABLE && 240 type != MTRR_TYPE_WRPROT) 241 continue; 242 size = range_state[i].size_pfn; 243 if (!size) 244 continue; 245 base = range_state[i].base_pfn; 246 if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && 247 (mtrr_state.enabled & 1)) { 248 /* Var MTRR contains UC entry below 1M? Skip it: */ 249 printk(BIOS_BUG_MSG, i); 250 if (base + size <= (1<<(20-PAGE_SHIFT))) 251 continue; 252 size -= (1<<(20-PAGE_SHIFT)) - base; 253 base = 1<<(20-PAGE_SHIFT); 254 } 255 subtract_range(range, base, base + size - 1); 256 } 257 if (extra_remove_size) 258 subtract_range(range, extra_remove_base, 259 extra_remove_base + extra_remove_size - 1); 260 261 if (debug_print) { 262 printk(KERN_DEBUG "After UC checking\n"); 263 for (i = 0; i < RANGE_NUM; i++) { 264 if (!range[i].end) 265 continue; 266 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 267 range[i].start, range[i].end + 1); 268 } 269 } 270 271 /* sort the ranges */ 272 nr_range = clean_sort_range(range, RANGE_NUM); 273 if (debug_print) { 274 printk(KERN_DEBUG "After sorting\n"); 275 for (i = 0; i < nr_range; i++) 276 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 277 range[i].start, range[i].end + 1); 278 } 279 280 /* clear those is not used */ 281 for (i = nr_range; i < RANGE_NUM; i++) 282 memset(&range[i], 0, sizeof(range[i])); 283 284 return nr_range; 285 } 286 287 #ifdef CONFIG_MTRR_SANITIZER 288 289 static unsigned long __init sum_ranges(struct res_range *range, int nr_range) 290 { 291 unsigned long sum = 0; 292 int i; 293 294 for (i = 0; i < nr_range; i++) 295 sum += range[i].end + 1 - range[i].start; 296 297 return sum; 298 } 299 300 static int enable_mtrr_cleanup __initdata = 301 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; 302 303 static int __init disable_mtrr_cleanup_setup(char *str) 304 { 305 enable_mtrr_cleanup = 0; 306 return 0; 307 } 308 early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); 309 310 static int __init enable_mtrr_cleanup_setup(char *str) 311 { 312 enable_mtrr_cleanup = 1; 313 return 0; 314 } 315 early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); 316 317 static int __init mtrr_cleanup_debug_setup(char *str) 318 { 319 debug_print = 1; 320 return 0; 321 } 322 early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); 323 324 static void __init 325 set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, 326 unsigned char type, unsigned int address_bits) 327 { 328 u32 base_lo, base_hi, mask_lo, mask_hi; 329 u64 base, mask; 330 331 if (!sizek) { 332 fill_mtrr_var_range(reg, 0, 0, 0, 0); 333 return; 334 } 335 336 mask = (1ULL << address_bits) - 1; 337 mask &= ~((((u64)sizek) << 10) - 1); 338 339 base = ((u64)basek) << 10; 340 341 base |= type; 342 mask |= 0x800; 343 344 base_lo = base & ((1ULL<<32) - 1); 345 base_hi = base >> 32; 346 347 mask_lo = mask & ((1ULL<<32) - 1); 348 mask_hi = mask >> 32; 349 350 fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); 351 } 352 353 static void __init 354 save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, 355 unsigned char type) 356 { 357 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); 358 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); 359 range_state[reg].type = type; 360 } 361 362 static void __init set_var_mtrr_all(unsigned int address_bits) 363 { 364 unsigned long basek, sizek; 365 unsigned char type; 366 unsigned int reg; 367 368 for (reg = 0; reg < num_var_ranges; reg++) { 369 basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10); 370 sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); 371 type = range_state[reg].type; 372 373 set_var_mtrr(reg, basek, sizek, type, address_bits); 374 } 375 } 376 377 static unsigned long to_size_factor(unsigned long sizek, char *factorp) 378 { 379 unsigned long base = sizek; 380 char factor; 381 382 if (base & ((1<<10) - 1)) { 383 /* Not MB-aligned: */ 384 factor = 'K'; 385 } else if (base & ((1<<20) - 1)) { 386 factor = 'M'; 387 base >>= 10; 388 } else { 389 factor = 'G'; 390 base >>= 20; 391 } 392 393 *factorp = factor; 394 395 return base; 396 } 397 398 static unsigned int __init 399 range_to_mtrr(unsigned int reg, unsigned long range_startk, 400 unsigned long range_sizek, unsigned char type) 401 { 402 if (!range_sizek || (reg >= num_var_ranges)) 403 return reg; 404 405 while (range_sizek) { 406 unsigned long max_align, align; 407 unsigned long sizek; 408 409 /* Compute the maximum size with which we can make a range: */ 410 if (range_startk) 411 max_align = ffs(range_startk) - 1; 412 else 413 max_align = 32; 414 415 align = fls(range_sizek) - 1; 416 if (align > max_align) 417 align = max_align; 418 419 sizek = 1 << align; 420 if (debug_print) { 421 char start_factor = 'K', size_factor = 'K'; 422 unsigned long start_base, size_base; 423 424 start_base = to_size_factor(range_startk, &start_factor); 425 size_base = to_size_factor(sizek, &size_factor); 426 427 Dprintk("Setting variable MTRR %d, " 428 "base: %ld%cB, range: %ld%cB, type %s\n", 429 reg, start_base, start_factor, 430 size_base, size_factor, 431 (type == MTRR_TYPE_UNCACHABLE) ? "UC" : 432 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other") 433 ); 434 } 435 save_var_mtrr(reg++, range_startk, sizek, type); 436 range_startk += sizek; 437 range_sizek -= sizek; 438 if (reg >= num_var_ranges) 439 break; 440 } 441 return reg; 442 } 443 444 static unsigned __init 445 range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, 446 unsigned long sizek) 447 { 448 unsigned long hole_basek, hole_sizek; 449 unsigned long second_basek, second_sizek; 450 unsigned long range0_basek, range0_sizek; 451 unsigned long range_basek, range_sizek; 452 unsigned long chunk_sizek; 453 unsigned long gran_sizek; 454 455 hole_basek = 0; 456 hole_sizek = 0; 457 second_basek = 0; 458 second_sizek = 0; 459 chunk_sizek = state->chunk_sizek; 460 gran_sizek = state->gran_sizek; 461 462 /* Align with gran size, prevent small block used up MTRRs: */ 463 range_basek = ALIGN(state->range_startk, gran_sizek); 464 if ((range_basek > basek) && basek) 465 return second_sizek; 466 467 state->range_sizek -= (range_basek - state->range_startk); 468 range_sizek = ALIGN(state->range_sizek, gran_sizek); 469 470 while (range_sizek > state->range_sizek) { 471 range_sizek -= gran_sizek; 472 if (!range_sizek) 473 return 0; 474 } 475 state->range_sizek = range_sizek; 476 477 /* Try to append some small hole: */ 478 range0_basek = state->range_startk; 479 range0_sizek = ALIGN(state->range_sizek, chunk_sizek); 480 481 /* No increase: */ 482 if (range0_sizek == state->range_sizek) { 483 Dprintk("rangeX: %016lx - %016lx\n", 484 range0_basek<<10, 485 (range0_basek + state->range_sizek)<<10); 486 state->reg = range_to_mtrr(state->reg, range0_basek, 487 state->range_sizek, MTRR_TYPE_WRBACK); 488 return 0; 489 } 490 491 /* Only cut back when it is not the last: */ 492 if (sizek) { 493 while (range0_basek + range0_sizek > (basek + sizek)) { 494 if (range0_sizek >= chunk_sizek) 495 range0_sizek -= chunk_sizek; 496 else 497 range0_sizek = 0; 498 499 if (!range0_sizek) 500 break; 501 } 502 } 503 504 second_try: 505 range_basek = range0_basek + range0_sizek; 506 507 /* One hole in the middle: */ 508 if (range_basek > basek && range_basek <= (basek + sizek)) 509 second_sizek = range_basek - basek; 510 511 if (range0_sizek > state->range_sizek) { 512 513 /* One hole in middle or at the end: */ 514 hole_sizek = range0_sizek - state->range_sizek - second_sizek; 515 516 /* Hole size should be less than half of range0 size: */ 517 if (hole_sizek >= (range0_sizek >> 1) && 518 range0_sizek >= chunk_sizek) { 519 range0_sizek -= chunk_sizek; 520 second_sizek = 0; 521 hole_sizek = 0; 522 523 goto second_try; 524 } 525 } 526 527 if (range0_sizek) { 528 Dprintk("range0: %016lx - %016lx\n", 529 range0_basek<<10, 530 (range0_basek + range0_sizek)<<10); 531 state->reg = range_to_mtrr(state->reg, range0_basek, 532 range0_sizek, MTRR_TYPE_WRBACK); 533 } 534 535 if (range0_sizek < state->range_sizek) { 536 /* Need to handle left over range: */ 537 range_sizek = state->range_sizek - range0_sizek; 538 539 Dprintk("range: %016lx - %016lx\n", 540 range_basek<<10, 541 (range_basek + range_sizek)<<10); 542 543 state->reg = range_to_mtrr(state->reg, range_basek, 544 range_sizek, MTRR_TYPE_WRBACK); 545 } 546 547 if (hole_sizek) { 548 hole_basek = range_basek - hole_sizek - second_sizek; 549 Dprintk("hole: %016lx - %016lx\n", 550 hole_basek<<10, 551 (hole_basek + hole_sizek)<<10); 552 state->reg = range_to_mtrr(state->reg, hole_basek, 553 hole_sizek, MTRR_TYPE_UNCACHABLE); 554 } 555 556 return second_sizek; 557 } 558 559 static void __init 560 set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, 561 unsigned long size_pfn) 562 { 563 unsigned long basek, sizek; 564 unsigned long second_sizek = 0; 565 566 if (state->reg >= num_var_ranges) 567 return; 568 569 basek = base_pfn << (PAGE_SHIFT - 10); 570 sizek = size_pfn << (PAGE_SHIFT - 10); 571 572 /* See if I can merge with the last range: */ 573 if ((basek <= 1024) || 574 (state->range_startk + state->range_sizek == basek)) { 575 unsigned long endk = basek + sizek; 576 state->range_sizek = endk - state->range_startk; 577 return; 578 } 579 /* Write the range mtrrs: */ 580 if (state->range_sizek != 0) 581 second_sizek = range_to_mtrr_with_hole(state, basek, sizek); 582 583 /* Allocate an msr: */ 584 state->range_startk = basek + second_sizek; 585 state->range_sizek = sizek - second_sizek; 586 } 587 588 /* Mininum size of mtrr block that can take hole: */ 589 static u64 mtrr_chunk_size __initdata = (256ULL<<20); 590 591 static int __init parse_mtrr_chunk_size_opt(char *p) 592 { 593 if (!p) 594 return -EINVAL; 595 mtrr_chunk_size = memparse(p, &p); 596 return 0; 597 } 598 early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); 599 600 /* Granularity of mtrr of block: */ 601 static u64 mtrr_gran_size __initdata; 602 603 static int __init parse_mtrr_gran_size_opt(char *p) 604 { 605 if (!p) 606 return -EINVAL; 607 mtrr_gran_size = memparse(p, &p); 608 return 0; 609 } 610 early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); 611 612 static unsigned long nr_mtrr_spare_reg __initdata = 613 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; 614 615 static int __init parse_mtrr_spare_reg(char *arg) 616 { 617 if (arg) 618 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); 619 return 0; 620 } 621 early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); 622 623 static int __init 624 x86_setup_var_mtrrs(struct res_range *range, int nr_range, 625 u64 chunk_size, u64 gran_size) 626 { 627 struct var_mtrr_state var_state; 628 int num_reg; 629 int i; 630 631 var_state.range_startk = 0; 632 var_state.range_sizek = 0; 633 var_state.reg = 0; 634 var_state.chunk_sizek = chunk_size >> 10; 635 var_state.gran_sizek = gran_size >> 10; 636 637 memset(range_state, 0, sizeof(range_state)); 638 639 /* Write the range: */ 640 for (i = 0; i < nr_range; i++) { 641 set_var_mtrr_range(&var_state, range[i].start, 642 range[i].end - range[i].start + 1); 643 } 644 645 /* Write the last range: */ 646 if (var_state.range_sizek != 0) 647 range_to_mtrr_with_hole(&var_state, 0, 0); 648 649 num_reg = var_state.reg; 650 /* Clear out the extra MTRR's: */ 651 while (var_state.reg < num_var_ranges) { 652 save_var_mtrr(var_state.reg, 0, 0, 0); 653 var_state.reg++; 654 } 655 656 return num_reg; 657 } 658 659 struct mtrr_cleanup_result { 660 unsigned long gran_sizek; 661 unsigned long chunk_sizek; 662 unsigned long lose_cover_sizek; 663 unsigned int num_reg; 664 int bad; 665 }; 666 667 /* 668 * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G 669 * chunk size: gran_size, ..., 2G 670 * so we need (1+16)*8 671 */ 672 #define NUM_RESULT 136 673 #define PSHIFT (PAGE_SHIFT - 10) 674 675 static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; 676 static unsigned long __initdata min_loss_pfn[RANGE_NUM]; 677 678 static void __init print_out_mtrr_range_state(void) 679 { 680 char start_factor = 'K', size_factor = 'K'; 681 unsigned long start_base, size_base; 682 mtrr_type type; 683 int i; 684 685 for (i = 0; i < num_var_ranges; i++) { 686 687 size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); 688 if (!size_base) 689 continue; 690 691 size_base = to_size_factor(size_base, &size_factor), 692 start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); 693 start_base = to_size_factor(start_base, &start_factor), 694 type = range_state[i].type; 695 696 printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", 697 i, start_base, start_factor, 698 size_base, size_factor, 699 (type == MTRR_TYPE_UNCACHABLE) ? "UC" : 700 ((type == MTRR_TYPE_WRPROT) ? "WP" : 701 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) 702 ); 703 } 704 } 705 706 static int __init mtrr_need_cleanup(void) 707 { 708 int i; 709 mtrr_type type; 710 unsigned long size; 711 /* Extra one for all 0: */ 712 int num[MTRR_NUM_TYPES + 1]; 713 714 /* Check entries number: */ 715 memset(num, 0, sizeof(num)); 716 for (i = 0; i < num_var_ranges; i++) { 717 type = range_state[i].type; 718 size = range_state[i].size_pfn; 719 if (type >= MTRR_NUM_TYPES) 720 continue; 721 if (!size) 722 type = MTRR_NUM_TYPES; 723 num[type]++; 724 } 725 726 /* Check if we got UC entries: */ 727 if (!num[MTRR_TYPE_UNCACHABLE]) 728 return 0; 729 730 /* Check if we only had WB and UC */ 731 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != 732 num_var_ranges - num[MTRR_NUM_TYPES]) 733 return 0; 734 735 return 1; 736 } 737 738 static unsigned long __initdata range_sums; 739 740 static void __init 741 mtrr_calc_range_state(u64 chunk_size, u64 gran_size, 742 unsigned long x_remove_base, 743 unsigned long x_remove_size, int i) 744 { 745 static struct res_range range_new[RANGE_NUM]; 746 unsigned long range_sums_new; 747 static int nr_range_new; 748 int num_reg; 749 750 /* Convert ranges to var ranges state: */ 751 num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); 752 753 /* We got new setting in range_state, check it: */ 754 memset(range_new, 0, sizeof(range_new)); 755 nr_range_new = x86_get_mtrr_mem_range(range_new, 0, 756 x_remove_base, x_remove_size); 757 range_sums_new = sum_ranges(range_new, nr_range_new); 758 759 result[i].chunk_sizek = chunk_size >> 10; 760 result[i].gran_sizek = gran_size >> 10; 761 result[i].num_reg = num_reg; 762 763 if (range_sums < range_sums_new) { 764 result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT; 765 result[i].bad = 1; 766 } else { 767 result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT; 768 } 769 770 /* Double check it: */ 771 if (!result[i].bad && !result[i].lose_cover_sizek) { 772 if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range))) 773 result[i].bad = 1; 774 } 775 776 if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg])) 777 min_loss_pfn[num_reg] = range_sums - range_sums_new; 778 } 779 780 static void __init mtrr_print_out_one_result(int i) 781 { 782 unsigned long gran_base, chunk_base, lose_base; 783 char gran_factor, chunk_factor, lose_factor; 784 785 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), 786 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), 787 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), 788 789 pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", 790 result[i].bad ? "*BAD*" : " ", 791 gran_base, gran_factor, chunk_base, chunk_factor); 792 pr_cont("num_reg: %d \tlose cover RAM: %s%ld%c\n", 793 result[i].num_reg, result[i].bad ? "-" : "", 794 lose_base, lose_factor); 795 } 796 797 static int __init mtrr_search_optimal_index(void) 798 { 799 int num_reg_good; 800 int index_good; 801 int i; 802 803 if (nr_mtrr_spare_reg >= num_var_ranges) 804 nr_mtrr_spare_reg = num_var_ranges - 1; 805 806 num_reg_good = -1; 807 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { 808 if (!min_loss_pfn[i]) 809 num_reg_good = i; 810 } 811 812 index_good = -1; 813 if (num_reg_good != -1) { 814 for (i = 0; i < NUM_RESULT; i++) { 815 if (!result[i].bad && 816 result[i].num_reg == num_reg_good && 817 !result[i].lose_cover_sizek) { 818 index_good = i; 819 break; 820 } 821 } 822 } 823 824 return index_good; 825 } 826 827 int __init mtrr_cleanup(unsigned address_bits) 828 { 829 unsigned long x_remove_base, x_remove_size; 830 unsigned long base, size, def, dummy; 831 u64 chunk_size, gran_size; 832 mtrr_type type; 833 int index_good; 834 int i; 835 836 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) 837 return 0; 838 839 rdmsr(MSR_MTRRdefType, def, dummy); 840 def &= 0xff; 841 if (def != MTRR_TYPE_UNCACHABLE) 842 return 0; 843 844 /* Get it and store it aside: */ 845 memset(range_state, 0, sizeof(range_state)); 846 for (i = 0; i < num_var_ranges; i++) { 847 mtrr_if->get(i, &base, &size, &type); 848 range_state[i].base_pfn = base; 849 range_state[i].size_pfn = size; 850 range_state[i].type = type; 851 } 852 853 /* Check if we need handle it and can handle it: */ 854 if (!mtrr_need_cleanup()) 855 return 0; 856 857 /* Print original var MTRRs at first, for debugging: */ 858 printk(KERN_DEBUG "original variable MTRRs\n"); 859 print_out_mtrr_range_state(); 860 861 memset(range, 0, sizeof(range)); 862 x_remove_size = 0; 863 x_remove_base = 1 << (32 - PAGE_SHIFT); 864 if (mtrr_tom2) 865 x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base; 866 867 nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size); 868 /* 869 * [0, 1M) should always be covered by var mtrr with WB 870 * and fixed mtrrs should take effect before var mtrr for it: 871 */ 872 nr_range = add_range_with_merge(range, nr_range, 0, 873 (1ULL<<(20 - PAGE_SHIFT)) - 1); 874 /* Sort the ranges: */ 875 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 876 877 range_sums = sum_ranges(range, nr_range); 878 printk(KERN_INFO "total RAM covered: %ldM\n", 879 range_sums >> (20 - PAGE_SHIFT)); 880 881 if (mtrr_chunk_size && mtrr_gran_size) { 882 i = 0; 883 mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, 884 x_remove_base, x_remove_size, i); 885 886 mtrr_print_out_one_result(i); 887 888 if (!result[i].bad) { 889 set_var_mtrr_all(address_bits); 890 printk(KERN_DEBUG "New variable MTRRs\n"); 891 print_out_mtrr_range_state(); 892 return 1; 893 } 894 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " 895 "will find optimal one\n"); 896 } 897 898 i = 0; 899 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); 900 memset(result, 0, sizeof(result)); 901 for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { 902 903 for (chunk_size = gran_size; chunk_size < (1ULL<<32); 904 chunk_size <<= 1) { 905 906 if (i >= NUM_RESULT) 907 continue; 908 909 mtrr_calc_range_state(chunk_size, gran_size, 910 x_remove_base, x_remove_size, i); 911 if (debug_print) { 912 mtrr_print_out_one_result(i); 913 printk(KERN_INFO "\n"); 914 } 915 916 i++; 917 } 918 } 919 920 /* Try to find the optimal index: */ 921 index_good = mtrr_search_optimal_index(); 922 923 if (index_good != -1) { 924 printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); 925 i = index_good; 926 mtrr_print_out_one_result(i); 927 928 /* Convert ranges to var ranges state: */ 929 chunk_size = result[i].chunk_sizek; 930 chunk_size <<= 10; 931 gran_size = result[i].gran_sizek; 932 gran_size <<= 10; 933 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); 934 set_var_mtrr_all(address_bits); 935 printk(KERN_DEBUG "New variable MTRRs\n"); 936 print_out_mtrr_range_state(); 937 return 1; 938 } else { 939 /* print out all */ 940 for (i = 0; i < NUM_RESULT; i++) 941 mtrr_print_out_one_result(i); 942 } 943 944 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); 945 printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); 946 947 return 0; 948 } 949 #else 950 int __init mtrr_cleanup(unsigned address_bits) 951 { 952 return 0; 953 } 954 #endif 955 956 static int disable_mtrr_trim; 957 958 static int __init disable_mtrr_trim_setup(char *str) 959 { 960 disable_mtrr_trim = 1; 961 return 0; 962 } 963 early_param("disable_mtrr_trim", disable_mtrr_trim_setup); 964 965 /* 966 * Newer AMD K8s and later CPUs have a special magic MSR way to force WB 967 * for memory >4GB. Check for that here. 968 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't 969 * apply to are wrong, but so far we don't know of any such case in the wild. 970 */ 971 #define Tom2Enabled (1U << 21) 972 #define Tom2ForceMemTypeWB (1U << 22) 973 974 int __init amd_special_default_mtrr(void) 975 { 976 u32 l, h; 977 978 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 979 return 0; 980 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) 981 return 0; 982 /* In case some hypervisor doesn't pass SYSCFG through: */ 983 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) 984 return 0; 985 /* 986 * Memory between 4GB and top of mem is forced WB by this magic bit. 987 * Reserved before K8RevF, but should be zero there. 988 */ 989 if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) == 990 (Tom2Enabled | Tom2ForceMemTypeWB)) 991 return 1; 992 return 0; 993 } 994 995 static u64 __init 996 real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn) 997 { 998 u64 trim_start, trim_size; 999 1000 trim_start = start_pfn; 1001 trim_start <<= PAGE_SHIFT; 1002 1003 trim_size = limit_pfn; 1004 trim_size <<= PAGE_SHIFT; 1005 trim_size -= trim_start; 1006 1007 return e820_update_range(trim_start, trim_size, E820_RAM, E820_RESERVED); 1008 } 1009 1010 /** 1011 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs 1012 * @end_pfn: ending page frame number 1013 * 1014 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain 1015 * memory configurations. This routine checks that the highest MTRR matches 1016 * the end of memory, to make sure the MTRRs having a write back type cover 1017 * all of the memory the kernel is intending to use. If not, it'll trim any 1018 * memory off the end by adjusting end_pfn, removing it from the kernel's 1019 * allocation pools, warning the user with an obnoxious message. 1020 */ 1021 int __init mtrr_trim_uncached_memory(unsigned long end_pfn) 1022 { 1023 unsigned long i, base, size, highest_pfn = 0, def, dummy; 1024 mtrr_type type; 1025 u64 total_trim_size; 1026 /* extra one for all 0 */ 1027 int num[MTRR_NUM_TYPES + 1]; 1028 1029 /* 1030 * Make sure we only trim uncachable memory on machines that 1031 * support the Intel MTRR architecture: 1032 */ 1033 if (!is_cpu(INTEL) || disable_mtrr_trim) 1034 return 0; 1035 1036 rdmsr(MSR_MTRRdefType, def, dummy); 1037 def &= 0xff; 1038 if (def != MTRR_TYPE_UNCACHABLE) 1039 return 0; 1040 1041 /* Get it and store it aside: */ 1042 memset(range_state, 0, sizeof(range_state)); 1043 for (i = 0; i < num_var_ranges; i++) { 1044 mtrr_if->get(i, &base, &size, &type); 1045 range_state[i].base_pfn = base; 1046 range_state[i].size_pfn = size; 1047 range_state[i].type = type; 1048 } 1049 1050 /* Find highest cached pfn: */ 1051 for (i = 0; i < num_var_ranges; i++) { 1052 type = range_state[i].type; 1053 if (type != MTRR_TYPE_WRBACK) 1054 continue; 1055 base = range_state[i].base_pfn; 1056 size = range_state[i].size_pfn; 1057 if (highest_pfn < base + size) 1058 highest_pfn = base + size; 1059 } 1060 1061 /* kvm/qemu doesn't have mtrr set right, don't trim them all: */ 1062 if (!highest_pfn) { 1063 printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); 1064 return 0; 1065 } 1066 1067 /* Check entries number: */ 1068 memset(num, 0, sizeof(num)); 1069 for (i = 0; i < num_var_ranges; i++) { 1070 type = range_state[i].type; 1071 if (type >= MTRR_NUM_TYPES) 1072 continue; 1073 size = range_state[i].size_pfn; 1074 if (!size) 1075 type = MTRR_NUM_TYPES; 1076 num[type]++; 1077 } 1078 1079 /* No entry for WB? */ 1080 if (!num[MTRR_TYPE_WRBACK]) 1081 return 0; 1082 1083 /* Check if we only had WB and UC: */ 1084 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != 1085 num_var_ranges - num[MTRR_NUM_TYPES]) 1086 return 0; 1087 1088 memset(range, 0, sizeof(range)); 1089 nr_range = 0; 1090 if (mtrr_tom2) { 1091 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); 1092 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; 1093 if (highest_pfn < range[nr_range].end + 1) 1094 highest_pfn = range[nr_range].end + 1; 1095 nr_range++; 1096 } 1097 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); 1098 1099 /* Check the head: */ 1100 total_trim_size = 0; 1101 if (range[0].start) 1102 total_trim_size += real_trim_memory(0, range[0].start); 1103 1104 /* Check the holes: */ 1105 for (i = 0; i < nr_range - 1; i++) { 1106 if (range[i].end + 1 < range[i+1].start) 1107 total_trim_size += real_trim_memory(range[i].end + 1, 1108 range[i+1].start); 1109 } 1110 1111 /* Check the top: */ 1112 i = nr_range - 1; 1113 if (range[i].end + 1 < end_pfn) 1114 total_trim_size += real_trim_memory(range[i].end + 1, 1115 end_pfn); 1116 1117 if (total_trim_size) { 1118 pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20); 1119 1120 if (!changed_by_mtrr_cleanup) 1121 WARN_ON(1); 1122 1123 pr_info("update e820 for mtrr\n"); 1124 update_e820(); 1125 1126 return 1; 1127 } 1128 1129 return 0; 1130 } 1131