1 #include <linux/export.h> 2 #include <linux/bitops.h> 3 #include <linux/elf.h> 4 #include <linux/mm.h> 5 6 #include <linux/io.h> 7 #include <linux/sched.h> 8 #include <asm/processor.h> 9 #include <asm/apic.h> 10 #include <asm/cpu.h> 11 #include <asm/pci-direct.h> 12 13 #ifdef CONFIG_X86_64 14 # include <asm/mmconfig.h> 15 # include <asm/cacheflush.h> 16 #endif 17 18 #include "cpu.h" 19 20 static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) 21 { 22 u32 gprs[8] = { 0 }; 23 int err; 24 25 WARN_ONCE((boot_cpu_data.x86 != 0xf), 26 "%s should only be used on K8!\n", __func__); 27 28 gprs[1] = msr; 29 gprs[7] = 0x9c5a203a; 30 31 err = rdmsr_safe_regs(gprs); 32 33 *p = gprs[0] | ((u64)gprs[2] << 32); 34 35 return err; 36 } 37 38 static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) 39 { 40 u32 gprs[8] = { 0 }; 41 42 WARN_ONCE((boot_cpu_data.x86 != 0xf), 43 "%s should only be used on K8!\n", __func__); 44 45 gprs[0] = (u32)val; 46 gprs[1] = msr; 47 gprs[2] = val >> 32; 48 gprs[7] = 0x9c5a203a; 49 50 return wrmsr_safe_regs(gprs); 51 } 52 53 #ifdef CONFIG_X86_32 54 /* 55 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause 56 * misexecution of code under Linux. Owners of such processors should 57 * contact AMD for precise details and a CPU swap. 58 * 59 * See http://www.multimania.com/poulot/k6bug.html 60 * and section 2.6.2 of "AMD-K6 Processor Revision Guide - Model 6" 61 * (Publication # 21266 Issue Date: August 1998) 62 * 63 * The following test is erm.. interesting. AMD neglected to up 64 * the chip setting when fixing the bug but they also tweaked some 65 * performance at the same time.. 66 */ 67 68 extern __visible void vide(void); 69 __asm__(".globl vide\n\t.align 4\nvide: ret"); 70 71 static void init_amd_k5(struct cpuinfo_x86 *c) 72 { 73 /* 74 * General Systems BIOSen alias the cpu frequency registers 75 * of the Elan at 0x000df000. Unfortuantly, one of the Linux 76 * drivers subsequently pokes it, and changes the CPU speed. 77 * Workaround : Remove the unneeded alias. 78 */ 79 #define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ 80 #define CBAR_ENB (0x80000000) 81 #define CBAR_KEY (0X000000CB) 82 if (c->x86_model == 9 || c->x86_model == 10) { 83 if (inl(CBAR) & CBAR_ENB) 84 outl(0 | CBAR_KEY, CBAR); 85 } 86 } 87 88 89 static void init_amd_k6(struct cpuinfo_x86 *c) 90 { 91 u32 l, h; 92 int mbytes = get_num_physpages() >> (20-PAGE_SHIFT); 93 94 if (c->x86_model < 6) { 95 /* Based on AMD doc 20734R - June 2000 */ 96 if (c->x86_model == 0) { 97 clear_cpu_cap(c, X86_FEATURE_APIC); 98 set_cpu_cap(c, X86_FEATURE_PGE); 99 } 100 return; 101 } 102 103 if (c->x86_model == 6 && c->x86_mask == 1) { 104 const int K6_BUG_LOOP = 1000000; 105 int n; 106 void (*f_vide)(void); 107 unsigned long d, d2; 108 109 printk(KERN_INFO "AMD K6 stepping B detected - "); 110 111 /* 112 * It looks like AMD fixed the 2.6.2 bug and improved indirect 113 * calls at the same time. 114 */ 115 116 n = K6_BUG_LOOP; 117 f_vide = vide; 118 rdtscl(d); 119 while (n--) 120 f_vide(); 121 rdtscl(d2); 122 d = d2-d; 123 124 if (d > 20*K6_BUG_LOOP) 125 printk(KERN_CONT 126 "system stability may be impaired when more than 32 MB are used.\n"); 127 else 128 printk(KERN_CONT "probably OK (after B9730xxxx).\n"); 129 } 130 131 /* K6 with old style WHCR */ 132 if (c->x86_model < 8 || 133 (c->x86_model == 8 && c->x86_mask < 8)) { 134 /* We can only write allocate on the low 508Mb */ 135 if (mbytes > 508) 136 mbytes = 508; 137 138 rdmsr(MSR_K6_WHCR, l, h); 139 if ((l&0x0000FFFF) == 0) { 140 unsigned long flags; 141 l = (1<<0)|((mbytes/4)<<1); 142 local_irq_save(flags); 143 wbinvd(); 144 wrmsr(MSR_K6_WHCR, l, h); 145 local_irq_restore(flags); 146 printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", 147 mbytes); 148 } 149 return; 150 } 151 152 if ((c->x86_model == 8 && c->x86_mask > 7) || 153 c->x86_model == 9 || c->x86_model == 13) { 154 /* The more serious chips .. */ 155 156 if (mbytes > 4092) 157 mbytes = 4092; 158 159 rdmsr(MSR_K6_WHCR, l, h); 160 if ((l&0xFFFF0000) == 0) { 161 unsigned long flags; 162 l = ((mbytes>>2)<<22)|(1<<16); 163 local_irq_save(flags); 164 wbinvd(); 165 wrmsr(MSR_K6_WHCR, l, h); 166 local_irq_restore(flags); 167 printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", 168 mbytes); 169 } 170 171 return; 172 } 173 174 if (c->x86_model == 10) { 175 /* AMD Geode LX is model 10 */ 176 /* placeholder for any needed mods */ 177 return; 178 } 179 } 180 181 static void amd_k7_smp_check(struct cpuinfo_x86 *c) 182 { 183 /* calling is from identify_secondary_cpu() ? */ 184 if (!c->cpu_index) 185 return; 186 187 /* 188 * Certain Athlons might work (for various values of 'work') in SMP 189 * but they are not certified as MP capable. 190 */ 191 /* Athlon 660/661 is valid. */ 192 if ((c->x86_model == 6) && ((c->x86_mask == 0) || 193 (c->x86_mask == 1))) 194 return; 195 196 /* Duron 670 is valid */ 197 if ((c->x86_model == 7) && (c->x86_mask == 0)) 198 return; 199 200 /* 201 * Athlon 662, Duron 671, and Athlon >model 7 have capability 202 * bit. It's worth noting that the A5 stepping (662) of some 203 * Athlon XP's have the MP bit set. 204 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for 205 * more. 206 */ 207 if (((c->x86_model == 6) && (c->x86_mask >= 2)) || 208 ((c->x86_model == 7) && (c->x86_mask >= 1)) || 209 (c->x86_model > 7)) 210 if (cpu_has_mp) 211 return; 212 213 /* If we get here, not a certified SMP capable AMD system. */ 214 215 /* 216 * Don't taint if we are running SMP kernel on a single non-MP 217 * approved Athlon 218 */ 219 WARN_ONCE(1, "WARNING: This combination of AMD" 220 " processors is not suitable for SMP.\n"); 221 add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE); 222 } 223 224 static void init_amd_k7(struct cpuinfo_x86 *c) 225 { 226 u32 l, h; 227 228 /* 229 * Bit 15 of Athlon specific MSR 15, needs to be 0 230 * to enable SSE on Palomino/Morgan/Barton CPU's. 231 * If the BIOS didn't enable it already, enable it here. 232 */ 233 if (c->x86_model >= 6 && c->x86_model <= 10) { 234 if (!cpu_has(c, X86_FEATURE_XMM)) { 235 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); 236 rdmsr(MSR_K7_HWCR, l, h); 237 l &= ~0x00008000; 238 wrmsr(MSR_K7_HWCR, l, h); 239 set_cpu_cap(c, X86_FEATURE_XMM); 240 } 241 } 242 243 /* 244 * It's been determined by AMD that Athlons since model 8 stepping 1 245 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx 246 * As per AMD technical note 27212 0.2 247 */ 248 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { 249 rdmsr(MSR_K7_CLK_CTL, l, h); 250 if ((l & 0xfff00000) != 0x20000000) { 251 printk(KERN_INFO 252 "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", 253 l, ((l & 0x000fffff)|0x20000000)); 254 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); 255 } 256 } 257 258 set_cpu_cap(c, X86_FEATURE_K7); 259 260 amd_k7_smp_check(c); 261 } 262 #endif 263 264 #ifdef CONFIG_NUMA 265 /* 266 * To workaround broken NUMA config. Read the comment in 267 * srat_detect_node(). 268 */ 269 static int nearby_node(int apicid) 270 { 271 int i, node; 272 273 for (i = apicid - 1; i >= 0; i--) { 274 node = __apicid_to_node[i]; 275 if (node != NUMA_NO_NODE && node_online(node)) 276 return node; 277 } 278 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { 279 node = __apicid_to_node[i]; 280 if (node != NUMA_NO_NODE && node_online(node)) 281 return node; 282 } 283 return first_node(node_online_map); /* Shouldn't happen */ 284 } 285 #endif 286 287 /* 288 * Fixup core topology information for 289 * (1) AMD multi-node processors 290 * Assumption: Number of cores in each internal node is the same. 291 * (2) AMD processors supporting compute units 292 */ 293 #ifdef CONFIG_X86_HT 294 static void amd_get_topology(struct cpuinfo_x86 *c) 295 { 296 u32 nodes, cores_per_cu = 1; 297 u8 node_id; 298 int cpu = smp_processor_id(); 299 300 /* get information required for multi-node processors */ 301 if (cpu_has_topoext) { 302 u32 eax, ebx, ecx, edx; 303 304 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); 305 nodes = ((ecx >> 8) & 7) + 1; 306 node_id = ecx & 7; 307 308 /* get compute unit information */ 309 smp_num_siblings = ((ebx >> 8) & 3) + 1; 310 c->compute_unit_id = ebx & 0xff; 311 cores_per_cu += ((ebx >> 8) & 3); 312 } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { 313 u64 value; 314 315 rdmsrl(MSR_FAM10H_NODE_ID, value); 316 nodes = ((value >> 3) & 7) + 1; 317 node_id = value & 7; 318 } else 319 return; 320 321 /* fixup multi-node processor information */ 322 if (nodes > 1) { 323 u32 cores_per_node; 324 u32 cus_per_node; 325 326 set_cpu_cap(c, X86_FEATURE_AMD_DCM); 327 cores_per_node = c->x86_max_cores / nodes; 328 cus_per_node = cores_per_node / cores_per_cu; 329 330 /* store NodeID, use llc_shared_map to store sibling info */ 331 per_cpu(cpu_llc_id, cpu) = node_id; 332 333 /* core id has to be in the [0 .. cores_per_node - 1] range */ 334 c->cpu_core_id %= cores_per_node; 335 c->compute_unit_id %= cus_per_node; 336 } 337 } 338 #endif 339 340 /* 341 * On a AMD dual core setup the lower bits of the APIC id distinguish the cores. 342 * Assumes number of cores is a power of two. 343 */ 344 static void amd_detect_cmp(struct cpuinfo_x86 *c) 345 { 346 #ifdef CONFIG_X86_HT 347 unsigned bits; 348 int cpu = smp_processor_id(); 349 350 bits = c->x86_coreid_bits; 351 /* Low order bits define the core id (index of core in socket) */ 352 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); 353 /* Convert the initial APIC ID into the socket ID */ 354 c->phys_proc_id = c->initial_apicid >> bits; 355 /* use socket ID also for last level cache */ 356 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; 357 amd_get_topology(c); 358 #endif 359 } 360 361 u16 amd_get_nb_id(int cpu) 362 { 363 u16 id = 0; 364 #ifdef CONFIG_SMP 365 id = per_cpu(cpu_llc_id, cpu); 366 #endif 367 return id; 368 } 369 EXPORT_SYMBOL_GPL(amd_get_nb_id); 370 371 static void srat_detect_node(struct cpuinfo_x86 *c) 372 { 373 #ifdef CONFIG_NUMA 374 int cpu = smp_processor_id(); 375 int node; 376 unsigned apicid = c->apicid; 377 378 node = numa_cpu_node(cpu); 379 if (node == NUMA_NO_NODE) 380 node = per_cpu(cpu_llc_id, cpu); 381 382 /* 383 * On multi-fabric platform (e.g. Numascale NumaChip) a 384 * platform-specific handler needs to be called to fixup some 385 * IDs of the CPU. 386 */ 387 if (x86_cpuinit.fixup_cpu_id) 388 x86_cpuinit.fixup_cpu_id(c, node); 389 390 if (!node_online(node)) { 391 /* 392 * Two possibilities here: 393 * 394 * - The CPU is missing memory and no node was created. In 395 * that case try picking one from a nearby CPU. 396 * 397 * - The APIC IDs differ from the HyperTransport node IDs 398 * which the K8 northbridge parsing fills in. Assume 399 * they are all increased by a constant offset, but in 400 * the same order as the HT nodeids. If that doesn't 401 * result in a usable node fall back to the path for the 402 * previous case. 403 * 404 * This workaround operates directly on the mapping between 405 * APIC ID and NUMA node, assuming certain relationship 406 * between APIC ID, HT node ID and NUMA topology. As going 407 * through CPU mapping may alter the outcome, directly 408 * access __apicid_to_node[]. 409 */ 410 int ht_nodeid = c->initial_apicid; 411 412 if (ht_nodeid >= 0 && 413 __apicid_to_node[ht_nodeid] != NUMA_NO_NODE) 414 node = __apicid_to_node[ht_nodeid]; 415 /* Pick a nearby node */ 416 if (!node_online(node)) 417 node = nearby_node(apicid); 418 } 419 numa_set_node(cpu, node); 420 #endif 421 } 422 423 static void early_init_amd_mc(struct cpuinfo_x86 *c) 424 { 425 #ifdef CONFIG_X86_HT 426 unsigned bits, ecx; 427 428 /* Multi core CPU? */ 429 if (c->extended_cpuid_level < 0x80000008) 430 return; 431 432 ecx = cpuid_ecx(0x80000008); 433 434 c->x86_max_cores = (ecx & 0xff) + 1; 435 436 /* CPU telling us the core id bits shift? */ 437 bits = (ecx >> 12) & 0xF; 438 439 /* Otherwise recompute */ 440 if (bits == 0) { 441 while ((1 << bits) < c->x86_max_cores) 442 bits++; 443 } 444 445 c->x86_coreid_bits = bits; 446 #endif 447 } 448 449 static void bsp_init_amd(struct cpuinfo_x86 *c) 450 { 451 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { 452 453 if (c->x86 > 0x10 || 454 (c->x86 == 0x10 && c->x86_model >= 0x2)) { 455 u64 val; 456 457 rdmsrl(MSR_K7_HWCR, val); 458 if (!(val & BIT(24))) 459 printk(KERN_WARNING FW_BUG "TSC doesn't count " 460 "with P0 frequency!\n"); 461 } 462 } 463 464 if (c->x86 == 0x15) { 465 unsigned long upperbit; 466 u32 cpuid, assoc; 467 468 cpuid = cpuid_edx(0x80000005); 469 assoc = cpuid >> 16 & 0xff; 470 upperbit = ((cpuid >> 24) << 10) / assoc; 471 472 va_align.mask = (upperbit - 1) & PAGE_MASK; 473 va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; 474 } 475 } 476 477 static void early_init_amd(struct cpuinfo_x86 *c) 478 { 479 early_init_amd_mc(c); 480 481 /* 482 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate 483 * with P/T states and does not stop in deep C-states 484 */ 485 if (c->x86_power & (1 << 8)) { 486 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 487 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 488 if (!check_tsc_unstable()) 489 set_sched_clock_stable(); 490 } 491 492 #ifdef CONFIG_X86_64 493 set_cpu_cap(c, X86_FEATURE_SYSCALL32); 494 #else 495 /* Set MTRR capability flag if appropriate */ 496 if (c->x86 == 5) 497 if (c->x86_model == 13 || c->x86_model == 9 || 498 (c->x86_model == 8 && c->x86_mask >= 8)) 499 set_cpu_cap(c, X86_FEATURE_K6_MTRR); 500 #endif 501 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) 502 /* check CPU config space for extended APIC ID */ 503 if (cpu_has_apic && c->x86 >= 0xf) { 504 unsigned int val; 505 val = read_pci_config(0, 24, 0, 0x68); 506 if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) 507 set_cpu_cap(c, X86_FEATURE_EXTD_APICID); 508 } 509 #endif 510 511 /* F16h erratum 793, CVE-2013-6885 */ 512 if (c->x86 == 0x16 && c->x86_model <= 0xf) { 513 u64 val; 514 515 rdmsrl(MSR_AMD64_LS_CFG, val); 516 if (!(val & BIT(15))) 517 wrmsrl(MSR_AMD64_LS_CFG, val | BIT(15)); 518 } 519 520 } 521 522 static const int amd_erratum_383[]; 523 static const int amd_erratum_400[]; 524 static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); 525 526 static void init_amd(struct cpuinfo_x86 *c) 527 { 528 u32 dummy; 529 unsigned long long value; 530 531 #ifdef CONFIG_SMP 532 /* 533 * Disable TLB flush filter by setting HWCR.FFDIS on K8 534 * bit 6 of msr C001_0015 535 * 536 * Errata 63 for SH-B3 steppings 537 * Errata 122 for all steppings (F+ have it disabled by default) 538 */ 539 if (c->x86 == 0xf) { 540 rdmsrl(MSR_K7_HWCR, value); 541 value |= 1 << 6; 542 wrmsrl(MSR_K7_HWCR, value); 543 } 544 #endif 545 546 early_init_amd(c); 547 548 /* 549 * Bit 31 in normal CPUID used for nonstandard 3DNow ID; 550 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway 551 */ 552 clear_cpu_cap(c, 0*32+31); 553 554 #ifdef CONFIG_X86_64 555 /* On C+ stepping K8 rep microcode works well for copy/memset */ 556 if (c->x86 == 0xf) { 557 u32 level; 558 559 level = cpuid_eax(1); 560 if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) 561 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 562 563 /* 564 * Some BIOSes incorrectly force this feature, but only K8 565 * revision D (model = 0x14) and later actually support it. 566 * (AMD Erratum #110, docId: 25759). 567 */ 568 if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { 569 clear_cpu_cap(c, X86_FEATURE_LAHF_LM); 570 if (!rdmsrl_amd_safe(0xc001100d, &value)) { 571 value &= ~(1ULL << 32); 572 wrmsrl_amd_safe(0xc001100d, value); 573 } 574 } 575 576 } 577 if (c->x86 >= 0x10) 578 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 579 580 /* get apicid instead of initial apic id from cpuid */ 581 c->apicid = hard_smp_processor_id(); 582 #else 583 584 /* 585 * FIXME: We should handle the K5 here. Set up the write 586 * range and also turn on MSR 83 bits 4 and 31 (write alloc, 587 * no bus pipeline) 588 */ 589 590 switch (c->x86) { 591 case 4: 592 init_amd_k5(c); 593 break; 594 case 5: 595 init_amd_k6(c); 596 break; 597 case 6: /* An Athlon/Duron */ 598 init_amd_k7(c); 599 break; 600 } 601 602 /* K6s reports MCEs but don't actually have all the MSRs */ 603 if (c->x86 < 6) 604 clear_cpu_cap(c, X86_FEATURE_MCE); 605 #endif 606 607 /* Enable workaround for FXSAVE leak */ 608 if (c->x86 >= 6) 609 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); 610 611 if (!c->x86_model_id[0]) { 612 switch (c->x86) { 613 case 0xf: 614 /* Should distinguish Models here, but this is only 615 a fallback anyways. */ 616 strcpy(c->x86_model_id, "Hammer"); 617 break; 618 } 619 } 620 621 /* re-enable TopologyExtensions if switched off by BIOS */ 622 if ((c->x86 == 0x15) && 623 (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && 624 !cpu_has(c, X86_FEATURE_TOPOEXT)) { 625 626 if (!rdmsrl_safe(0xc0011005, &value)) { 627 value |= 1ULL << 54; 628 wrmsrl_safe(0xc0011005, value); 629 rdmsrl(0xc0011005, value); 630 if (value & (1ULL << 54)) { 631 set_cpu_cap(c, X86_FEATURE_TOPOEXT); 632 printk(KERN_INFO FW_INFO "CPU: Re-enabling " 633 "disabled Topology Extensions Support\n"); 634 } 635 } 636 } 637 638 /* 639 * The way access filter has a performance penalty on some workloads. 640 * Disable it on the affected CPUs. 641 */ 642 if ((c->x86 == 0x15) && 643 (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { 644 645 if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) { 646 value |= 0x1E; 647 wrmsrl_safe(0xc0011021, value); 648 } 649 } 650 651 cpu_detect_cache_sizes(c); 652 653 /* Multi core CPU? */ 654 if (c->extended_cpuid_level >= 0x80000008) { 655 amd_detect_cmp(c); 656 srat_detect_node(c); 657 } 658 659 #ifdef CONFIG_X86_32 660 detect_ht(c); 661 #endif 662 663 init_amd_cacheinfo(c); 664 665 if (c->x86 >= 0xf) 666 set_cpu_cap(c, X86_FEATURE_K8); 667 668 if (cpu_has_xmm2) { 669 /* MFENCE stops RDTSC speculation */ 670 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); 671 } 672 673 #ifdef CONFIG_X86_64 674 if (c->x86 == 0x10) { 675 /* do this for boot cpu */ 676 if (c == &boot_cpu_data) 677 check_enable_amd_mmconf_dmi(); 678 679 fam10h_check_enable_mmcfg(); 680 } 681 682 if (c == &boot_cpu_data && c->x86 >= 0xf) { 683 unsigned long long tseg; 684 685 /* 686 * Split up direct mapping around the TSEG SMM area. 687 * Don't do it for gbpages because there seems very little 688 * benefit in doing so. 689 */ 690 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { 691 unsigned long pfn = tseg >> PAGE_SHIFT; 692 693 printk(KERN_DEBUG "tseg: %010llx\n", tseg); 694 if (pfn_range_is_mapped(pfn, pfn + 1)) 695 set_memory_4k((unsigned long)__va(tseg), 1); 696 } 697 } 698 #endif 699 700 /* 701 * Family 0x12 and above processors have APIC timer 702 * running in deep C states. 703 */ 704 if (c->x86 > 0x11) 705 set_cpu_cap(c, X86_FEATURE_ARAT); 706 707 if (c->x86 == 0x10) { 708 /* 709 * Disable GART TLB Walk Errors on Fam10h. We do this here 710 * because this is always needed when GART is enabled, even in a 711 * kernel which has no MCE support built in. 712 * BIOS should disable GartTlbWlk Errors themself. If 713 * it doesn't do it here as suggested by the BKDG. 714 * 715 * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 716 */ 717 u64 mask; 718 int err; 719 720 err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask); 721 if (err == 0) { 722 mask |= (1 << 10); 723 wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask); 724 } 725 726 /* 727 * On family 10h BIOS may not have properly enabled WC+ support, 728 * causing it to be converted to CD memtype. This may result in 729 * performance degradation for certain nested-paging guests. 730 * Prevent this conversion by clearing bit 24 in 731 * MSR_AMD64_BU_CFG2. 732 * 733 * NOTE: we want to use the _safe accessors so as not to #GP kvm 734 * guests on older kvm hosts. 735 */ 736 737 rdmsrl_safe(MSR_AMD64_BU_CFG2, &value); 738 value &= ~(1ULL << 24); 739 wrmsrl_safe(MSR_AMD64_BU_CFG2, value); 740 741 if (cpu_has_amd_erratum(c, amd_erratum_383)) 742 set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); 743 } 744 745 if (cpu_has_amd_erratum(c, amd_erratum_400)) 746 set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); 747 748 rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); 749 } 750 751 #ifdef CONFIG_X86_32 752 static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) 753 { 754 /* AMD errata T13 (order #21922) */ 755 if ((c->x86 == 6)) { 756 /* Duron Rev A0 */ 757 if (c->x86_model == 3 && c->x86_mask == 0) 758 size = 64; 759 /* Tbird rev A1/A2 */ 760 if (c->x86_model == 4 && 761 (c->x86_mask == 0 || c->x86_mask == 1)) 762 size = 256; 763 } 764 return size; 765 } 766 #endif 767 768 static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) 769 { 770 tlb_flushall_shift = 6; 771 } 772 773 static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) 774 { 775 u32 ebx, eax, ecx, edx; 776 u16 mask = 0xfff; 777 778 if (c->x86 < 0xf) 779 return; 780 781 if (c->extended_cpuid_level < 0x80000006) 782 return; 783 784 cpuid(0x80000006, &eax, &ebx, &ecx, &edx); 785 786 tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; 787 tlb_lli_4k[ENTRIES] = ebx & mask; 788 789 /* 790 * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB 791 * characteristics from the CPUID function 0x80000005 instead. 792 */ 793 if (c->x86 == 0xf) { 794 cpuid(0x80000005, &eax, &ebx, &ecx, &edx); 795 mask = 0xff; 796 } 797 798 /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ 799 if (!((eax >> 16) & mask)) 800 tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff; 801 else 802 tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; 803 804 /* a 4M entry uses two 2M entries */ 805 tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; 806 807 /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ 808 if (!(eax & mask)) { 809 /* Erratum 658 */ 810 if (c->x86 == 0x15 && c->x86_model <= 0x1f) { 811 tlb_lli_2m[ENTRIES] = 1024; 812 } else { 813 cpuid(0x80000005, &eax, &ebx, &ecx, &edx); 814 tlb_lli_2m[ENTRIES] = eax & 0xff; 815 } 816 } else 817 tlb_lli_2m[ENTRIES] = eax & mask; 818 819 tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; 820 821 cpu_set_tlb_flushall_shift(c); 822 } 823 824 static const struct cpu_dev amd_cpu_dev = { 825 .c_vendor = "AMD", 826 .c_ident = { "AuthenticAMD" }, 827 #ifdef CONFIG_X86_32 828 .legacy_models = { 829 { .family = 4, .model_names = 830 { 831 [3] = "486 DX/2", 832 [7] = "486 DX/2-WB", 833 [8] = "486 DX/4", 834 [9] = "486 DX/4-WB", 835 [14] = "Am5x86-WT", 836 [15] = "Am5x86-WB" 837 } 838 }, 839 }, 840 .legacy_cache_size = amd_size_cache, 841 #endif 842 .c_early_init = early_init_amd, 843 .c_detect_tlb = cpu_detect_tlb_amd, 844 .c_bsp_init = bsp_init_amd, 845 .c_init = init_amd, 846 .c_x86_vendor = X86_VENDOR_AMD, 847 }; 848 849 cpu_dev_register(amd_cpu_dev); 850 851 /* 852 * AMD errata checking 853 * 854 * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or 855 * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that 856 * have an OSVW id assigned, which it takes as first argument. Both take a 857 * variable number of family-specific model-stepping ranges created by 858 * AMD_MODEL_RANGE(). 859 * 860 * Example: 861 * 862 * const int amd_erratum_319[] = 863 * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), 864 * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), 865 * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)); 866 */ 867 868 #define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 } 869 #define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 } 870 #define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \ 871 ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end)) 872 #define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff) 873 #define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff) 874 #define AMD_MODEL_RANGE_END(range) ((range) & 0xfff) 875 876 static const int amd_erratum_400[] = 877 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), 878 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); 879 880 static const int amd_erratum_383[] = 881 AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); 882 883 884 static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) 885 { 886 int osvw_id = *erratum++; 887 u32 range; 888 u32 ms; 889 890 if (osvw_id >= 0 && osvw_id < 65536 && 891 cpu_has(cpu, X86_FEATURE_OSVW)) { 892 u64 osvw_len; 893 894 rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len); 895 if (osvw_id < osvw_len) { 896 u64 osvw_bits; 897 898 rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6), 899 osvw_bits); 900 return osvw_bits & (1ULL << (osvw_id & 0x3f)); 901 } 902 } 903 904 /* OSVW unavailable or ID unknown, match family-model-stepping range */ 905 ms = (cpu->x86_model << 4) | cpu->x86_mask; 906 while ((range = *erratum++)) 907 if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && 908 (ms >= AMD_MODEL_RANGE_START(range)) && 909 (ms <= AMD_MODEL_RANGE_END(range))) 910 return true; 911 912 return false; 913 } 914