1 /* Generic MTRR (Memory Type Range Register) driver. 2 3 Copyright (C) 1997-2000 Richard Gooch 4 Copyright (c) 2002 Patrick Mochel 5 6 This library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Library General Public 8 License as published by the Free Software Foundation; either 9 version 2 of the License, or (at your option) any later version. 10 11 This library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Library General Public License for more details. 15 16 You should have received a copy of the GNU Library General Public 17 License along with this library; if not, write to the Free 18 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 19 20 Richard Gooch may be reached by email at rgooch@atnf.csiro.au 21 The postal address is: 22 Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. 23 24 Source: "Pentium Pro Family Developer's Manual, Volume 3: 25 Operating System Writer's Guide" (Intel document number 242692), 26 section 11.11.7 27 28 This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> 29 on 6-7 March 2002. 30 Source: Intel Architecture Software Developers Manual, Volume 3: 31 System Programming Guide; Section 9.11. (1997 edition - PPro). 32 */ 33 34 #include <linux/types.h> /* FIXME: kvm_para.h needs this */ 35 36 #include <linux/stop_machine.h> 37 #include <linux/kvm_para.h> 38 #include <linux/uaccess.h> 39 #include <linux/export.h> 40 #include <linux/mutex.h> 41 #include <linux/init.h> 42 #include <linux/sort.h> 43 #include <linux/cpu.h> 44 #include <linux/pci.h> 45 #include <linux/smp.h> 46 #include <linux/syscore_ops.h> 47 #include <linux/rcupdate.h> 48 49 #include <asm/cacheinfo.h> 50 #include <asm/cpufeature.h> 51 #include <asm/e820/api.h> 52 #include <asm/mtrr.h> 53 #include <asm/msr.h> 54 #include <asm/memtype.h> 55 56 #include "mtrr.h" 57 58 /* arch_phys_wc_add returns an MTRR register index plus this offset. */ 59 #define MTRR_TO_PHYS_WC_OFFSET 1000 60 61 u32 num_var_ranges; 62 63 unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; 64 DEFINE_MUTEX(mtrr_mutex); 65 66 const struct mtrr_ops *mtrr_if; 67 68 /* Returns non-zero if we have the write-combining memory type */ 69 static int have_wrcomb(void) 70 { 71 struct pci_dev *dev; 72 73 dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL); 74 if (dev != NULL) { 75 /* 76 * ServerWorks LE chipsets < rev 6 have problems with 77 * write-combining. Don't allow it and leave room for other 78 * chipsets to be tagged 79 */ 80 if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && 81 dev->device == PCI_DEVICE_ID_SERVERWORKS_LE && 82 dev->revision <= 5) { 83 pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n"); 84 pci_dev_put(dev); 85 return 0; 86 } 87 /* 88 * Intel 450NX errata # 23. Non ascending cacheline evictions to 89 * write combining memory may resulting in data corruption 90 */ 91 if (dev->vendor == PCI_VENDOR_ID_INTEL && 92 dev->device == PCI_DEVICE_ID_INTEL_82451NX) { 93 pr_info("Intel 450NX MMC detected. Write-combining disabled.\n"); 94 pci_dev_put(dev); 95 return 0; 96 } 97 pci_dev_put(dev); 98 } 99 return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0; 100 } 101 102 static void __init init_table(void) 103 { 104 int i, max; 105 106 max = num_var_ranges; 107 for (i = 0; i < max; i++) 108 mtrr_usage_table[i] = 1; 109 } 110 111 struct set_mtrr_data { 112 unsigned long smp_base; 113 unsigned long smp_size; 114 unsigned int smp_reg; 115 mtrr_type smp_type; 116 }; 117 118 /** 119 * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed 120 * by all the CPUs. 121 * @info: pointer to mtrr configuration data 122 * 123 * Returns nothing. 124 */ 125 static int mtrr_rendezvous_handler(void *info) 126 { 127 struct set_mtrr_data *data = info; 128 129 mtrr_if->set(data->smp_reg, data->smp_base, 130 data->smp_size, data->smp_type); 131 return 0; 132 } 133 134 static inline int types_compatible(mtrr_type type1, mtrr_type type2) 135 { 136 return type1 == MTRR_TYPE_UNCACHABLE || 137 type2 == MTRR_TYPE_UNCACHABLE || 138 (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || 139 (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH); 140 } 141 142 /** 143 * set_mtrr - update mtrrs on all processors 144 * @reg: mtrr in question 145 * @base: mtrr base 146 * @size: mtrr size 147 * @type: mtrr type 148 * 149 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: 150 * 151 * 1. Queue work to do the following on all processors: 152 * 2. Disable Interrupts 153 * 3. Wait for all procs to do so 154 * 4. Enter no-fill cache mode 155 * 5. Flush caches 156 * 6. Clear PGE bit 157 * 7. Flush all TLBs 158 * 8. Disable all range registers 159 * 9. Update the MTRRs 160 * 10. Enable all range registers 161 * 11. Flush all TLBs and caches again 162 * 12. Enter normal cache mode and reenable caching 163 * 13. Set PGE 164 * 14. Wait for buddies to catch up 165 * 15. Enable interrupts. 166 * 167 * What does that mean for us? Well, stop_machine() will ensure that 168 * the rendezvous handler is started on each CPU. And in lockstep they 169 * do the state transition of disabling interrupts, updating MTRR's 170 * (the CPU vendors may each do it differently, so we call mtrr_if->set() 171 * callback and let them take care of it.) and enabling interrupts. 172 * 173 * Note that the mechanism is the same for UP systems, too; all the SMP stuff 174 * becomes nops. 175 */ 176 static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size, 177 mtrr_type type) 178 { 179 struct set_mtrr_data data = { .smp_reg = reg, 180 .smp_base = base, 181 .smp_size = size, 182 .smp_type = type 183 }; 184 185 stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask); 186 187 generic_rebuild_map(); 188 } 189 190 /** 191 * mtrr_add_page - Add a memory type region 192 * @base: Physical base address of region in pages (in units of 4 kB!) 193 * @size: Physical size of region in pages (4 kB) 194 * @type: Type of MTRR desired 195 * @increment: If this is true do usage counting on the region 196 * 197 * Memory type region registers control the caching on newer Intel and 198 * non Intel processors. This function allows drivers to request an 199 * MTRR is added. The details and hardware specifics of each processor's 200 * implementation are hidden from the caller, but nevertheless the 201 * caller should expect to need to provide a power of two size on an 202 * equivalent power of two boundary. 203 * 204 * If the region cannot be added either because all regions are in use 205 * or the CPU cannot support it a negative value is returned. On success 206 * the register number for this entry is returned, but should be treated 207 * as a cookie only. 208 * 209 * On a multiprocessor machine the changes are made to all processors. 210 * This is required on x86 by the Intel processors. 211 * 212 * The available types are 213 * 214 * %MTRR_TYPE_UNCACHABLE - No caching 215 * 216 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever 217 * 218 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts 219 * 220 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes 221 * 222 * BUGS: Needs a quiet flag for the cases where drivers do not mind 223 * failures and do not wish system log messages to be sent. 224 */ 225 int mtrr_add_page(unsigned long base, unsigned long size, 226 unsigned int type, bool increment) 227 { 228 unsigned long lbase, lsize; 229 int i, replace, error; 230 mtrr_type ltype; 231 232 if (!mtrr_enabled()) 233 return -ENXIO; 234 235 error = mtrr_if->validate_add_page(base, size, type); 236 if (error) 237 return error; 238 239 if (type >= MTRR_NUM_TYPES) { 240 pr_warn("type: %u invalid\n", type); 241 return -EINVAL; 242 } 243 244 /* If the type is WC, check that this processor supports it */ 245 if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { 246 pr_warn("your processor doesn't support write-combining\n"); 247 return -ENOSYS; 248 } 249 250 if (!size) { 251 pr_warn("zero sized request\n"); 252 return -EINVAL; 253 } 254 255 if ((base | (base + size - 1)) >> 256 (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) { 257 pr_warn("base or size exceeds the MTRR width\n"); 258 return -EINVAL; 259 } 260 261 error = -EINVAL; 262 replace = -1; 263 264 /* No CPU hotplug when we change MTRR entries */ 265 cpus_read_lock(); 266 267 /* Search for existing MTRR */ 268 mutex_lock(&mtrr_mutex); 269 for (i = 0; i < num_var_ranges; ++i) { 270 mtrr_if->get(i, &lbase, &lsize, <ype); 271 if (!lsize || base > lbase + lsize - 1 || 272 base + size - 1 < lbase) 273 continue; 274 /* 275 * At this point we know there is some kind of 276 * overlap/enclosure 277 */ 278 if (base < lbase || base + size - 1 > lbase + lsize - 1) { 279 if (base <= lbase && 280 base + size - 1 >= lbase + lsize - 1) { 281 /* New region encloses an existing region */ 282 if (type == ltype) { 283 replace = replace == -1 ? i : -2; 284 continue; 285 } else if (types_compatible(type, ltype)) 286 continue; 287 } 288 pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase, 289 lsize); 290 goto out; 291 } 292 /* New region is enclosed by an existing region */ 293 if (ltype != type) { 294 if (types_compatible(type, ltype)) 295 continue; 296 pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n", 297 base, size, mtrr_attrib_to_str(ltype), 298 mtrr_attrib_to_str(type)); 299 goto out; 300 } 301 if (increment) 302 ++mtrr_usage_table[i]; 303 error = i; 304 goto out; 305 } 306 /* Search for an empty MTRR */ 307 i = mtrr_if->get_free_region(base, size, replace); 308 if (i >= 0) { 309 set_mtrr(i, base, size, type); 310 if (likely(replace < 0)) { 311 mtrr_usage_table[i] = 1; 312 } else { 313 mtrr_usage_table[i] = mtrr_usage_table[replace]; 314 if (increment) 315 mtrr_usage_table[i]++; 316 if (unlikely(replace != i)) { 317 set_mtrr(replace, 0, 0, 0); 318 mtrr_usage_table[replace] = 0; 319 } 320 } 321 } else { 322 pr_info("no more MTRRs available\n"); 323 } 324 error = i; 325 out: 326 mutex_unlock(&mtrr_mutex); 327 cpus_read_unlock(); 328 return error; 329 } 330 331 static int mtrr_check(unsigned long base, unsigned long size) 332 { 333 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { 334 pr_warn("size and base must be multiples of 4 kiB\n"); 335 Dprintk("size: 0x%lx base: 0x%lx\n", size, base); 336 dump_stack(); 337 return -1; 338 } 339 return 0; 340 } 341 342 /** 343 * mtrr_add - Add a memory type region 344 * @base: Physical base address of region 345 * @size: Physical size of region 346 * @type: Type of MTRR desired 347 * @increment: If this is true do usage counting on the region 348 * 349 * Memory type region registers control the caching on newer Intel and 350 * non Intel processors. This function allows drivers to request an 351 * MTRR is added. The details and hardware specifics of each processor's 352 * implementation are hidden from the caller, but nevertheless the 353 * caller should expect to need to provide a power of two size on an 354 * equivalent power of two boundary. 355 * 356 * If the region cannot be added either because all regions are in use 357 * or the CPU cannot support it a negative value is returned. On success 358 * the register number for this entry is returned, but should be treated 359 * as a cookie only. 360 * 361 * On a multiprocessor machine the changes are made to all processors. 362 * This is required on x86 by the Intel processors. 363 * 364 * The available types are 365 * 366 * %MTRR_TYPE_UNCACHABLE - No caching 367 * 368 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever 369 * 370 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts 371 * 372 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes 373 * 374 * BUGS: Needs a quiet flag for the cases where drivers do not mind 375 * failures and do not wish system log messages to be sent. 376 */ 377 int mtrr_add(unsigned long base, unsigned long size, unsigned int type, 378 bool increment) 379 { 380 if (!mtrr_enabled()) 381 return -ENODEV; 382 if (mtrr_check(base, size)) 383 return -EINVAL; 384 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, 385 increment); 386 } 387 388 /** 389 * mtrr_del_page - delete a memory type region 390 * @reg: Register returned by mtrr_add 391 * @base: Physical base address 392 * @size: Size of region 393 * 394 * If register is supplied then base and size are ignored. This is 395 * how drivers should call it. 396 * 397 * Releases an MTRR region. If the usage count drops to zero the 398 * register is freed and the region returns to default state. 399 * On success the register is returned, on failure a negative error 400 * code. 401 */ 402 int mtrr_del_page(int reg, unsigned long base, unsigned long size) 403 { 404 int i, max; 405 mtrr_type ltype; 406 unsigned long lbase, lsize; 407 int error = -EINVAL; 408 409 if (!mtrr_enabled()) 410 return -ENODEV; 411 412 max = num_var_ranges; 413 /* No CPU hotplug when we change MTRR entries */ 414 cpus_read_lock(); 415 mutex_lock(&mtrr_mutex); 416 if (reg < 0) { 417 /* Search for existing MTRR */ 418 for (i = 0; i < max; ++i) { 419 mtrr_if->get(i, &lbase, &lsize, <ype); 420 if (lbase == base && lsize == size) { 421 reg = i; 422 break; 423 } 424 } 425 if (reg < 0) { 426 Dprintk("no MTRR for %lx000,%lx000 found\n", base, size); 427 goto out; 428 } 429 } 430 if (reg >= max) { 431 pr_warn("register: %d too big\n", reg); 432 goto out; 433 } 434 mtrr_if->get(reg, &lbase, &lsize, <ype); 435 if (lsize < 1) { 436 pr_warn("MTRR %d not used\n", reg); 437 goto out; 438 } 439 if (mtrr_usage_table[reg] < 1) { 440 pr_warn("reg: %d has count=0\n", reg); 441 goto out; 442 } 443 if (--mtrr_usage_table[reg] < 1) 444 set_mtrr(reg, 0, 0, 0); 445 error = reg; 446 out: 447 mutex_unlock(&mtrr_mutex); 448 cpus_read_unlock(); 449 return error; 450 } 451 452 /** 453 * mtrr_del - delete a memory type region 454 * @reg: Register returned by mtrr_add 455 * @base: Physical base address 456 * @size: Size of region 457 * 458 * If register is supplied then base and size are ignored. This is 459 * how drivers should call it. 460 * 461 * Releases an MTRR region. If the usage count drops to zero the 462 * register is freed and the region returns to default state. 463 * On success the register is returned, on failure a negative error 464 * code. 465 */ 466 int mtrr_del(int reg, unsigned long base, unsigned long size) 467 { 468 if (!mtrr_enabled()) 469 return -ENODEV; 470 if (mtrr_check(base, size)) 471 return -EINVAL; 472 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); 473 } 474 475 /** 476 * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable 477 * @base: Physical base address 478 * @size: Size of region 479 * 480 * If PAT is available, this does nothing. If PAT is unavailable, it 481 * attempts to add a WC MTRR covering size bytes starting at base and 482 * logs an error if this fails. 483 * 484 * The called should provide a power of two size on an equivalent 485 * power of two boundary. 486 * 487 * Drivers must store the return value to pass to mtrr_del_wc_if_needed, 488 * but drivers should not try to interpret that return value. 489 */ 490 int arch_phys_wc_add(unsigned long base, unsigned long size) 491 { 492 int ret; 493 494 if (pat_enabled() || !mtrr_enabled()) 495 return 0; /* Success! (We don't need to do anything.) */ 496 497 ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true); 498 if (ret < 0) { 499 pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.", 500 (void *)base, (void *)(base + size - 1)); 501 return ret; 502 } 503 return ret + MTRR_TO_PHYS_WC_OFFSET; 504 } 505 EXPORT_SYMBOL(arch_phys_wc_add); 506 507 /* 508 * arch_phys_wc_del - undoes arch_phys_wc_add 509 * @handle: Return value from arch_phys_wc_add 510 * 511 * This cleans up after mtrr_add_wc_if_needed. 512 * 513 * The API guarantees that mtrr_del_wc_if_needed(error code) and 514 * mtrr_del_wc_if_needed(0) do nothing. 515 */ 516 void arch_phys_wc_del(int handle) 517 { 518 if (handle >= 1) { 519 WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET); 520 mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0); 521 } 522 } 523 EXPORT_SYMBOL(arch_phys_wc_del); 524 525 /* 526 * arch_phys_wc_index - translates arch_phys_wc_add's return value 527 * @handle: Return value from arch_phys_wc_add 528 * 529 * This will turn the return value from arch_phys_wc_add into an mtrr 530 * index suitable for debugging. 531 * 532 * Note: There is no legitimate use for this function, except possibly 533 * in printk line. Alas there is an illegitimate use in some ancient 534 * drm ioctls. 535 */ 536 int arch_phys_wc_index(int handle) 537 { 538 if (handle < MTRR_TO_PHYS_WC_OFFSET) 539 return -1; 540 else 541 return handle - MTRR_TO_PHYS_WC_OFFSET; 542 } 543 EXPORT_SYMBOL_GPL(arch_phys_wc_index); 544 545 int __initdata changed_by_mtrr_cleanup; 546 547 /** 548 * mtrr_bp_init - initialize MTRRs on the boot CPU 549 * 550 * This needs to be called early; before any of the other CPUs are 551 * initialized (i.e. before smp_init()). 552 */ 553 void __init mtrr_bp_init(void) 554 { 555 bool generic_mtrrs = cpu_feature_enabled(X86_FEATURE_MTRR); 556 const char *why = "(not available)"; 557 unsigned long config, dummy; 558 559 phys_hi_rsvd = GENMASK(31, boot_cpu_data.x86_phys_bits - 32); 560 561 if (!generic_mtrrs && mtrr_state.enabled) { 562 /* 563 * Software overwrite of MTRR state, only for generic case. 564 * Note that X86_FEATURE_MTRR has been reset in this case. 565 */ 566 init_table(); 567 mtrr_build_map(); 568 pr_info("MTRRs set to read-only\n"); 569 570 return; 571 } 572 573 if (generic_mtrrs) 574 mtrr_if = &generic_mtrr_ops; 575 else 576 mtrr_set_if(); 577 578 if (mtrr_enabled()) { 579 /* Get the number of variable MTRR ranges. */ 580 if (mtrr_if == &generic_mtrr_ops) 581 rdmsr(MSR_MTRRcap, config, dummy); 582 else 583 config = mtrr_if->var_regs; 584 num_var_ranges = config & MTRR_CAP_VCNT; 585 586 init_table(); 587 if (mtrr_if == &generic_mtrr_ops) { 588 /* BIOS may override */ 589 if (get_mtrr_state()) { 590 memory_caching_control |= CACHE_MTRR; 591 changed_by_mtrr_cleanup = mtrr_cleanup(); 592 mtrr_build_map(); 593 } else { 594 mtrr_if = NULL; 595 why = "by BIOS"; 596 } 597 } 598 } 599 600 if (!mtrr_enabled()) 601 pr_info("MTRRs disabled %s\n", why); 602 } 603 604 /** 605 * mtrr_save_state - Save current fixed-range MTRR state of the first 606 * cpu in cpu_online_mask. 607 */ 608 void mtrr_save_state(void) 609 { 610 int first_cpu; 611 612 if (!mtrr_enabled() || !mtrr_state.have_fixed) 613 return; 614 615 first_cpu = cpumask_first(cpu_online_mask); 616 smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1); 617 } 618 619 static int __init mtrr_init_finalize(void) 620 { 621 /* 622 * Map might exist if mtrr_overwrite_state() has been called or if 623 * mtrr_enabled() returns true. 624 */ 625 mtrr_copy_map(); 626 627 if (!mtrr_enabled()) 628 return 0; 629 630 if (memory_caching_control & CACHE_MTRR) { 631 if (!changed_by_mtrr_cleanup) 632 mtrr_state_warn(); 633 return 0; 634 } 635 636 mtrr_register_syscore(); 637 638 return 0; 639 } 640 subsys_initcall(mtrr_init_finalize); 641