1 #include <linux/export.h> 2 #include <linux/bitops.h> 3 #include <linux/elf.h> 4 #include <linux/mm.h> 5 6 #include <linux/io.h> 7 #include <linux/sched.h> 8 #include <asm/processor.h> 9 #include <asm/apic.h> 10 #include <asm/cpu.h> 11 #include <asm/pci-direct.h> 12 13 #ifdef CONFIG_X86_64 14 # include <asm/mmconfig.h> 15 # include <asm/cacheflush.h> 16 #endif 17 18 #include "cpu.h" 19 20 static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) 21 { 22 u32 gprs[8] = { 0 }; 23 int err; 24 25 WARN_ONCE((boot_cpu_data.x86 != 0xf), 26 "%s should only be used on K8!\n", __func__); 27 28 gprs[1] = msr; 29 gprs[7] = 0x9c5a203a; 30 31 err = rdmsr_safe_regs(gprs); 32 33 *p = gprs[0] | ((u64)gprs[2] << 32); 34 35 return err; 36 } 37 38 static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) 39 { 40 u32 gprs[8] = { 0 }; 41 42 WARN_ONCE((boot_cpu_data.x86 != 0xf), 43 "%s should only be used on K8!\n", __func__); 44 45 gprs[0] = (u32)val; 46 gprs[1] = msr; 47 gprs[2] = val >> 32; 48 gprs[7] = 0x9c5a203a; 49 50 return wrmsr_safe_regs(gprs); 51 } 52 53 #ifdef CONFIG_X86_32 54 /* 55 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause 56 * misexecution of code under Linux. Owners of such processors should 57 * contact AMD for precise details and a CPU swap. 58 * 59 * See http://www.multimania.com/poulot/k6bug.html 60 * and section 2.6.2 of "AMD-K6 Processor Revision Guide - Model 6" 61 * (Publication # 21266 Issue Date: August 1998) 62 * 63 * The following test is erm.. interesting. AMD neglected to up 64 * the chip setting when fixing the bug but they also tweaked some 65 * performance at the same time.. 66 */ 67 68 extern __visible void vide(void); 69 __asm__(".globl vide\n\t.align 4\nvide: ret"); 70 71 static void init_amd_k5(struct cpuinfo_x86 *c) 72 { 73 /* 74 * General Systems BIOSen alias the cpu frequency registers 75 * of the Elan at 0x000df000. Unfortuantly, one of the Linux 76 * drivers subsequently pokes it, and changes the CPU speed. 77 * Workaround : Remove the unneeded alias. 78 */ 79 #define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ 80 #define CBAR_ENB (0x80000000) 81 #define CBAR_KEY (0X000000CB) 82 if (c->x86_model == 9 || c->x86_model == 10) { 83 if (inl(CBAR) & CBAR_ENB) 84 outl(0 | CBAR_KEY, CBAR); 85 } 86 } 87 88 89 static void init_amd_k6(struct cpuinfo_x86 *c) 90 { 91 u32 l, h; 92 int mbytes = get_num_physpages() >> (20-PAGE_SHIFT); 93 94 if (c->x86_model < 6) { 95 /* Based on AMD doc 20734R - June 2000 */ 96 if (c->x86_model == 0) { 97 clear_cpu_cap(c, X86_FEATURE_APIC); 98 set_cpu_cap(c, X86_FEATURE_PGE); 99 } 100 return; 101 } 102 103 if (c->x86_model == 6 && c->x86_mask == 1) { 104 const int K6_BUG_LOOP = 1000000; 105 int n; 106 void (*f_vide)(void); 107 unsigned long d, d2; 108 109 printk(KERN_INFO "AMD K6 stepping B detected - "); 110 111 /* 112 * It looks like AMD fixed the 2.6.2 bug and improved indirect 113 * calls at the same time. 114 */ 115 116 n = K6_BUG_LOOP; 117 f_vide = vide; 118 rdtscl(d); 119 while (n--) 120 f_vide(); 121 rdtscl(d2); 122 d = d2-d; 123 124 if (d > 20*K6_BUG_LOOP) 125 printk(KERN_CONT 126 "system stability may be impaired when more than 32 MB are used.\n"); 127 else 128 printk(KERN_CONT "probably OK (after B9730xxxx).\n"); 129 } 130 131 /* K6 with old style WHCR */ 132 if (c->x86_model < 8 || 133 (c->x86_model == 8 && c->x86_mask < 8)) { 134 /* We can only write allocate on the low 508Mb */ 135 if (mbytes > 508) 136 mbytes = 508; 137 138 rdmsr(MSR_K6_WHCR, l, h); 139 if ((l&0x0000FFFF) == 0) { 140 unsigned long flags; 141 l = (1<<0)|((mbytes/4)<<1); 142 local_irq_save(flags); 143 wbinvd(); 144 wrmsr(MSR_K6_WHCR, l, h); 145 local_irq_restore(flags); 146 printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", 147 mbytes); 148 } 149 return; 150 } 151 152 if ((c->x86_model == 8 && c->x86_mask > 7) || 153 c->x86_model == 9 || c->x86_model == 13) { 154 /* The more serious chips .. */ 155 156 if (mbytes > 4092) 157 mbytes = 4092; 158 159 rdmsr(MSR_K6_WHCR, l, h); 160 if ((l&0xFFFF0000) == 0) { 161 unsigned long flags; 162 l = ((mbytes>>2)<<22)|(1<<16); 163 local_irq_save(flags); 164 wbinvd(); 165 wrmsr(MSR_K6_WHCR, l, h); 166 local_irq_restore(flags); 167 printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", 168 mbytes); 169 } 170 171 return; 172 } 173 174 if (c->x86_model == 10) { 175 /* AMD Geode LX is model 10 */ 176 /* placeholder for any needed mods */ 177 return; 178 } 179 } 180 181 static void amd_k7_smp_check(struct cpuinfo_x86 *c) 182 { 183 /* calling is from identify_secondary_cpu() ? */ 184 if (!c->cpu_index) 185 return; 186 187 /* 188 * Certain Athlons might work (for various values of 'work') in SMP 189 * but they are not certified as MP capable. 190 */ 191 /* Athlon 660/661 is valid. */ 192 if ((c->x86_model == 6) && ((c->x86_mask == 0) || 193 (c->x86_mask == 1))) 194 return; 195 196 /* Duron 670 is valid */ 197 if ((c->x86_model == 7) && (c->x86_mask == 0)) 198 return; 199 200 /* 201 * Athlon 662, Duron 671, and Athlon >model 7 have capability 202 * bit. It's worth noting that the A5 stepping (662) of some 203 * Athlon XP's have the MP bit set. 204 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for 205 * more. 206 */ 207 if (((c->x86_model == 6) && (c->x86_mask >= 2)) || 208 ((c->x86_model == 7) && (c->x86_mask >= 1)) || 209 (c->x86_model > 7)) 210 if (cpu_has_mp) 211 return; 212 213 /* If we get here, not a certified SMP capable AMD system. */ 214 215 /* 216 * Don't taint if we are running SMP kernel on a single non-MP 217 * approved Athlon 218 */ 219 WARN_ONCE(1, "WARNING: This combination of AMD" 220 " processors is not suitable for SMP.\n"); 221 add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); 222 } 223 224 static void init_amd_k7(struct cpuinfo_x86 *c) 225 { 226 u32 l, h; 227 228 /* 229 * Bit 15 of Athlon specific MSR 15, needs to be 0 230 * to enable SSE on Palomino/Morgan/Barton CPU's. 231 * If the BIOS didn't enable it already, enable it here. 232 */ 233 if (c->x86_model >= 6 && c->x86_model <= 10) { 234 if (!cpu_has(c, X86_FEATURE_XMM)) { 235 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); 236 msr_clear_bit(MSR_K7_HWCR, 15); 237 set_cpu_cap(c, X86_FEATURE_XMM); 238 } 239 } 240 241 /* 242 * It's been determined by AMD that Athlons since model 8 stepping 1 243 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx 244 * As per AMD technical note 27212 0.2 245 */ 246 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { 247 rdmsr(MSR_K7_CLK_CTL, l, h); 248 if ((l & 0xfff00000) != 0x20000000) { 249 printk(KERN_INFO 250 "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", 251 l, ((l & 0x000fffff)|0x20000000)); 252 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); 253 } 254 } 255 256 set_cpu_cap(c, X86_FEATURE_K7); 257 258 amd_k7_smp_check(c); 259 } 260 #endif 261 262 #ifdef CONFIG_NUMA 263 /* 264 * To workaround broken NUMA config. Read the comment in 265 * srat_detect_node(). 266 */ 267 static int nearby_node(int apicid) 268 { 269 int i, node; 270 271 for (i = apicid - 1; i >= 0; i--) { 272 node = __apicid_to_node[i]; 273 if (node != NUMA_NO_NODE && node_online(node)) 274 return node; 275 } 276 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { 277 node = __apicid_to_node[i]; 278 if (node != NUMA_NO_NODE && node_online(node)) 279 return node; 280 } 281 return first_node(node_online_map); /* Shouldn't happen */ 282 } 283 #endif 284 285 /* 286 * Fixup core topology information for 287 * (1) AMD multi-node processors 288 * Assumption: Number of cores in each internal node is the same. 289 * (2) AMD processors supporting compute units 290 */ 291 #ifdef CONFIG_X86_HT 292 static void amd_get_topology(struct cpuinfo_x86 *c) 293 { 294 u32 nodes, cores_per_cu = 1; 295 u8 node_id; 296 int cpu = smp_processor_id(); 297 298 /* get information required for multi-node processors */ 299 if (cpu_has_topoext) { 300 u32 eax, ebx, ecx, edx; 301 302 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); 303 nodes = ((ecx >> 8) & 7) + 1; 304 node_id = ecx & 7; 305 306 /* get compute unit information */ 307 smp_num_siblings = ((ebx >> 8) & 3) + 1; 308 c->compute_unit_id = ebx & 0xff; 309 cores_per_cu += ((ebx >> 8) & 3); 310 } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { 311 u64 value; 312 313 rdmsrl(MSR_FAM10H_NODE_ID, value); 314 nodes = ((value >> 3) & 7) + 1; 315 node_id = value & 7; 316 } else 317 return; 318 319 /* fixup multi-node processor information */ 320 if (nodes > 1) { 321 u32 cores_per_node; 322 u32 cus_per_node; 323 324 set_cpu_cap(c, X86_FEATURE_AMD_DCM); 325 cores_per_node = c->x86_max_cores / nodes; 326 cus_per_node = cores_per_node / cores_per_cu; 327 328 /* store NodeID, use llc_shared_map to store sibling info */ 329 per_cpu(cpu_llc_id, cpu) = node_id; 330 331 /* core id has to be in the [0 .. cores_per_node - 1] range */ 332 c->cpu_core_id %= cores_per_node; 333 c->compute_unit_id %= cus_per_node; 334 } 335 } 336 #endif 337 338 /* 339 * On a AMD dual core setup the lower bits of the APIC id distinguish the cores. 340 * Assumes number of cores is a power of two. 341 */ 342 static void amd_detect_cmp(struct cpuinfo_x86 *c) 343 { 344 #ifdef CONFIG_X86_HT 345 unsigned bits; 346 int cpu = smp_processor_id(); 347 348 bits = c->x86_coreid_bits; 349 /* Low order bits define the core id (index of core in socket) */ 350 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); 351 /* Convert the initial APIC ID into the socket ID */ 352 c->phys_proc_id = c->initial_apicid >> bits; 353 /* use socket ID also for last level cache */ 354 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; 355 amd_get_topology(c); 356 #endif 357 } 358 359 u16 amd_get_nb_id(int cpu) 360 { 361 u16 id = 0; 362 #ifdef CONFIG_SMP 363 id = per_cpu(cpu_llc_id, cpu); 364 #endif 365 return id; 366 } 367 EXPORT_SYMBOL_GPL(amd_get_nb_id); 368 369 static void srat_detect_node(struct cpuinfo_x86 *c) 370 { 371 #ifdef CONFIG_NUMA 372 int cpu = smp_processor_id(); 373 int node; 374 unsigned apicid = c->apicid; 375 376 node = numa_cpu_node(cpu); 377 if (node == NUMA_NO_NODE) 378 node = per_cpu(cpu_llc_id, cpu); 379 380 /* 381 * On multi-fabric platform (e.g. Numascale NumaChip) a 382 * platform-specific handler needs to be called to fixup some 383 * IDs of the CPU. 384 */ 385 if (x86_cpuinit.fixup_cpu_id) 386 x86_cpuinit.fixup_cpu_id(c, node); 387 388 if (!node_online(node)) { 389 /* 390 * Two possibilities here: 391 * 392 * - The CPU is missing memory and no node was created. In 393 * that case try picking one from a nearby CPU. 394 * 395 * - The APIC IDs differ from the HyperTransport node IDs 396 * which the K8 northbridge parsing fills in. Assume 397 * they are all increased by a constant offset, but in 398 * the same order as the HT nodeids. If that doesn't 399 * result in a usable node fall back to the path for the 400 * previous case. 401 * 402 * This workaround operates directly on the mapping between 403 * APIC ID and NUMA node, assuming certain relationship 404 * between APIC ID, HT node ID and NUMA topology. As going 405 * through CPU mapping may alter the outcome, directly 406 * access __apicid_to_node[]. 407 */ 408 int ht_nodeid = c->initial_apicid; 409 410 if (ht_nodeid >= 0 && 411 __apicid_to_node[ht_nodeid] != NUMA_NO_NODE) 412 node = __apicid_to_node[ht_nodeid]; 413 /* Pick a nearby node */ 414 if (!node_online(node)) 415 node = nearby_node(apicid); 416 } 417 numa_set_node(cpu, node); 418 #endif 419 } 420 421 static void early_init_amd_mc(struct cpuinfo_x86 *c) 422 { 423 #ifdef CONFIG_X86_HT 424 unsigned bits, ecx; 425 426 /* Multi core CPU? */ 427 if (c->extended_cpuid_level < 0x80000008) 428 return; 429 430 ecx = cpuid_ecx(0x80000008); 431 432 c->x86_max_cores = (ecx & 0xff) + 1; 433 434 /* CPU telling us the core id bits shift? */ 435 bits = (ecx >> 12) & 0xF; 436 437 /* Otherwise recompute */ 438 if (bits == 0) { 439 while ((1 << bits) < c->x86_max_cores) 440 bits++; 441 } 442 443 c->x86_coreid_bits = bits; 444 #endif 445 } 446 447 static void bsp_init_amd(struct cpuinfo_x86 *c) 448 { 449 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { 450 451 if (c->x86 > 0x10 || 452 (c->x86 == 0x10 && c->x86_model >= 0x2)) { 453 u64 val; 454 455 rdmsrl(MSR_K7_HWCR, val); 456 if (!(val & BIT(24))) 457 printk(KERN_WARNING FW_BUG "TSC doesn't count " 458 "with P0 frequency!\n"); 459 } 460 } 461 462 if (c->x86 == 0x15) { 463 unsigned long upperbit; 464 u32 cpuid, assoc; 465 466 cpuid = cpuid_edx(0x80000005); 467 assoc = cpuid >> 16 & 0xff; 468 upperbit = ((cpuid >> 24) << 10) / assoc; 469 470 va_align.mask = (upperbit - 1) & PAGE_MASK; 471 va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; 472 } 473 } 474 475 static void early_init_amd(struct cpuinfo_x86 *c) 476 { 477 early_init_amd_mc(c); 478 479 /* 480 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate 481 * with P/T states and does not stop in deep C-states 482 */ 483 if (c->x86_power & (1 << 8)) { 484 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 485 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 486 if (!check_tsc_unstable()) 487 set_sched_clock_stable(); 488 } 489 490 #ifdef CONFIG_X86_64 491 set_cpu_cap(c, X86_FEATURE_SYSCALL32); 492 #else 493 /* Set MTRR capability flag if appropriate */ 494 if (c->x86 == 5) 495 if (c->x86_model == 13 || c->x86_model == 9 || 496 (c->x86_model == 8 && c->x86_mask >= 8)) 497 set_cpu_cap(c, X86_FEATURE_K6_MTRR); 498 #endif 499 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) 500 /* check CPU config space for extended APIC ID */ 501 if (cpu_has_apic && c->x86 >= 0xf) { 502 unsigned int val; 503 val = read_pci_config(0, 24, 0, 0x68); 504 if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) 505 set_cpu_cap(c, X86_FEATURE_EXTD_APICID); 506 } 507 #endif 508 509 /* F16h erratum 793, CVE-2013-6885 */ 510 if (c->x86 == 0x16 && c->x86_model <= 0xf) 511 msr_set_bit(MSR_AMD64_LS_CFG, 15); 512 } 513 514 static const int amd_erratum_383[]; 515 static const int amd_erratum_400[]; 516 static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); 517 518 static void init_amd(struct cpuinfo_x86 *c) 519 { 520 u32 dummy; 521 unsigned long long value; 522 523 #ifdef CONFIG_SMP 524 /* 525 * Disable TLB flush filter by setting HWCR.FFDIS on K8 526 * bit 6 of msr C001_0015 527 * 528 * Errata 63 for SH-B3 steppings 529 * Errata 122 for all steppings (F+ have it disabled by default) 530 */ 531 if (c->x86 == 0xf) 532 msr_set_bit(MSR_K7_HWCR, 6); 533 #endif 534 535 early_init_amd(c); 536 537 /* 538 * Bit 31 in normal CPUID used for nonstandard 3DNow ID; 539 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway 540 */ 541 clear_cpu_cap(c, 0*32+31); 542 543 #ifdef CONFIG_X86_64 544 /* On C+ stepping K8 rep microcode works well for copy/memset */ 545 if (c->x86 == 0xf) { 546 u32 level; 547 548 level = cpuid_eax(1); 549 if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) 550 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 551 552 /* 553 * Some BIOSes incorrectly force this feature, but only K8 554 * revision D (model = 0x14) and later actually support it. 555 * (AMD Erratum #110, docId: 25759). 556 */ 557 if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { 558 clear_cpu_cap(c, X86_FEATURE_LAHF_LM); 559 if (!rdmsrl_amd_safe(0xc001100d, &value)) { 560 value &= ~(1ULL << 32); 561 wrmsrl_amd_safe(0xc001100d, value); 562 } 563 } 564 565 } 566 if (c->x86 >= 0x10) 567 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 568 569 /* get apicid instead of initial apic id from cpuid */ 570 c->apicid = hard_smp_processor_id(); 571 #else 572 573 /* 574 * FIXME: We should handle the K5 here. Set up the write 575 * range and also turn on MSR 83 bits 4 and 31 (write alloc, 576 * no bus pipeline) 577 */ 578 579 switch (c->x86) { 580 case 4: 581 init_amd_k5(c); 582 break; 583 case 5: 584 init_amd_k6(c); 585 break; 586 case 6: /* An Athlon/Duron */ 587 init_amd_k7(c); 588 break; 589 } 590 591 /* K6s reports MCEs but don't actually have all the MSRs */ 592 if (c->x86 < 6) 593 clear_cpu_cap(c, X86_FEATURE_MCE); 594 #endif 595 596 /* Enable workaround for FXSAVE leak */ 597 if (c->x86 >= 6) 598 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); 599 600 if (!c->x86_model_id[0]) { 601 switch (c->x86) { 602 case 0xf: 603 /* Should distinguish Models here, but this is only 604 a fallback anyways. */ 605 strcpy(c->x86_model_id, "Hammer"); 606 break; 607 } 608 } 609 610 /* re-enable TopologyExtensions if switched off by BIOS */ 611 if ((c->x86 == 0x15) && 612 (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && 613 !cpu_has(c, X86_FEATURE_TOPOEXT)) { 614 615 if (msr_set_bit(0xc0011005, 54) > 0) { 616 rdmsrl(0xc0011005, value); 617 if (value & BIT_64(54)) { 618 set_cpu_cap(c, X86_FEATURE_TOPOEXT); 619 pr_info(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); 620 } 621 } 622 } 623 624 /* 625 * The way access filter has a performance penalty on some workloads. 626 * Disable it on the affected CPUs. 627 */ 628 if ((c->x86 == 0x15) && 629 (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { 630 631 if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) { 632 value |= 0x1E; 633 wrmsrl_safe(0xc0011021, value); 634 } 635 } 636 637 cpu_detect_cache_sizes(c); 638 639 /* Multi core CPU? */ 640 if (c->extended_cpuid_level >= 0x80000008) { 641 amd_detect_cmp(c); 642 srat_detect_node(c); 643 } 644 645 #ifdef CONFIG_X86_32 646 detect_ht(c); 647 #endif 648 649 init_amd_cacheinfo(c); 650 651 if (c->x86 >= 0xf) 652 set_cpu_cap(c, X86_FEATURE_K8); 653 654 if (cpu_has_xmm2) { 655 /* MFENCE stops RDTSC speculation */ 656 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); 657 } 658 659 #ifdef CONFIG_X86_64 660 if (c->x86 == 0x10) { 661 /* do this for boot cpu */ 662 if (c == &boot_cpu_data) 663 check_enable_amd_mmconf_dmi(); 664 665 fam10h_check_enable_mmcfg(); 666 } 667 668 if (c == &boot_cpu_data && c->x86 >= 0xf) { 669 unsigned long long tseg; 670 671 /* 672 * Split up direct mapping around the TSEG SMM area. 673 * Don't do it for gbpages because there seems very little 674 * benefit in doing so. 675 */ 676 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { 677 unsigned long pfn = tseg >> PAGE_SHIFT; 678 679 printk(KERN_DEBUG "tseg: %010llx\n", tseg); 680 if (pfn_range_is_mapped(pfn, pfn + 1)) 681 set_memory_4k((unsigned long)__va(tseg), 1); 682 } 683 } 684 #endif 685 686 /* 687 * Family 0x12 and above processors have APIC timer 688 * running in deep C states. 689 */ 690 if (c->x86 > 0x11) 691 set_cpu_cap(c, X86_FEATURE_ARAT); 692 693 if (c->x86 == 0x10) { 694 /* 695 * Disable GART TLB Walk Errors on Fam10h. We do this here 696 * because this is always needed when GART is enabled, even in a 697 * kernel which has no MCE support built in. 698 * BIOS should disable GartTlbWlk Errors already. If 699 * it doesn't, do it here as suggested by the BKDG. 700 * 701 * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 702 */ 703 msr_set_bit(MSR_AMD64_MCx_MASK(4), 10); 704 705 /* 706 * On family 10h BIOS may not have properly enabled WC+ support, 707 * causing it to be converted to CD memtype. This may result in 708 * performance degradation for certain nested-paging guests. 709 * Prevent this conversion by clearing bit 24 in 710 * MSR_AMD64_BU_CFG2. 711 * 712 * NOTE: we want to use the _safe accessors so as not to #GP kvm 713 * guests on older kvm hosts. 714 */ 715 msr_clear_bit(MSR_AMD64_BU_CFG2, 24); 716 717 if (cpu_has_amd_erratum(c, amd_erratum_383)) 718 set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); 719 } 720 721 if (cpu_has_amd_erratum(c, amd_erratum_400)) 722 set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); 723 724 rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); 725 } 726 727 #ifdef CONFIG_X86_32 728 static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) 729 { 730 /* AMD errata T13 (order #21922) */ 731 if ((c->x86 == 6)) { 732 /* Duron Rev A0 */ 733 if (c->x86_model == 3 && c->x86_mask == 0) 734 size = 64; 735 /* Tbird rev A1/A2 */ 736 if (c->x86_model == 4 && 737 (c->x86_mask == 0 || c->x86_mask == 1)) 738 size = 256; 739 } 740 return size; 741 } 742 #endif 743 744 static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) 745 { 746 tlb_flushall_shift = 6; 747 } 748 749 static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) 750 { 751 u32 ebx, eax, ecx, edx; 752 u16 mask = 0xfff; 753 754 if (c->x86 < 0xf) 755 return; 756 757 if (c->extended_cpuid_level < 0x80000006) 758 return; 759 760 cpuid(0x80000006, &eax, &ebx, &ecx, &edx); 761 762 tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; 763 tlb_lli_4k[ENTRIES] = ebx & mask; 764 765 /* 766 * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB 767 * characteristics from the CPUID function 0x80000005 instead. 768 */ 769 if (c->x86 == 0xf) { 770 cpuid(0x80000005, &eax, &ebx, &ecx, &edx); 771 mask = 0xff; 772 } 773 774 /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ 775 if (!((eax >> 16) & mask)) 776 tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff; 777 else 778 tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; 779 780 /* a 4M entry uses two 2M entries */ 781 tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; 782 783 /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ 784 if (!(eax & mask)) { 785 /* Erratum 658 */ 786 if (c->x86 == 0x15 && c->x86_model <= 0x1f) { 787 tlb_lli_2m[ENTRIES] = 1024; 788 } else { 789 cpuid(0x80000005, &eax, &ebx, &ecx, &edx); 790 tlb_lli_2m[ENTRIES] = eax & 0xff; 791 } 792 } else 793 tlb_lli_2m[ENTRIES] = eax & mask; 794 795 tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; 796 797 cpu_set_tlb_flushall_shift(c); 798 } 799 800 static const struct cpu_dev amd_cpu_dev = { 801 .c_vendor = "AMD", 802 .c_ident = { "AuthenticAMD" }, 803 #ifdef CONFIG_X86_32 804 .legacy_models = { 805 { .family = 4, .model_names = 806 { 807 [3] = "486 DX/2", 808 [7] = "486 DX/2-WB", 809 [8] = "486 DX/4", 810 [9] = "486 DX/4-WB", 811 [14] = "Am5x86-WT", 812 [15] = "Am5x86-WB" 813 } 814 }, 815 }, 816 .legacy_cache_size = amd_size_cache, 817 #endif 818 .c_early_init = early_init_amd, 819 .c_detect_tlb = cpu_detect_tlb_amd, 820 .c_bsp_init = bsp_init_amd, 821 .c_init = init_amd, 822 .c_x86_vendor = X86_VENDOR_AMD, 823 }; 824 825 cpu_dev_register(amd_cpu_dev); 826 827 /* 828 * AMD errata checking 829 * 830 * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or 831 * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that 832 * have an OSVW id assigned, which it takes as first argument. Both take a 833 * variable number of family-specific model-stepping ranges created by 834 * AMD_MODEL_RANGE(). 835 * 836 * Example: 837 * 838 * const int amd_erratum_319[] = 839 * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), 840 * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), 841 * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)); 842 */ 843 844 #define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 } 845 #define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 } 846 #define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \ 847 ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end)) 848 #define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff) 849 #define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff) 850 #define AMD_MODEL_RANGE_END(range) ((range) & 0xfff) 851 852 static const int amd_erratum_400[] = 853 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), 854 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); 855 856 static const int amd_erratum_383[] = 857 AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); 858 859 860 static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) 861 { 862 int osvw_id = *erratum++; 863 u32 range; 864 u32 ms; 865 866 if (osvw_id >= 0 && osvw_id < 65536 && 867 cpu_has(cpu, X86_FEATURE_OSVW)) { 868 u64 osvw_len; 869 870 rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len); 871 if (osvw_id < osvw_len) { 872 u64 osvw_bits; 873 874 rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6), 875 osvw_bits); 876 return osvw_bits & (1ULL << (osvw_id & 0x3f)); 877 } 878 } 879 880 /* OSVW unavailable or ID unknown, match family-model-stepping range */ 881 ms = (cpu->x86_model << 4) | cpu->x86_mask; 882 while ((range = *erratum++)) 883 if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && 884 (ms >= AMD_MODEL_RANGE_START(range)) && 885 (ms <= AMD_MODEL_RANGE_END(range))) 886 return true; 887 888 return false; 889 } 890