1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/pgtable.h> 4 5 #include <linux/string.h> 6 #include <linux/bitops.h> 7 #include <linux/smp.h> 8 #include <linux/sched.h> 9 #include <linux/sched/clock.h> 10 #include <linux/thread_info.h> 11 #include <linux/init.h> 12 #include <linux/uaccess.h> 13 14 #include <asm/cpufeature.h> 15 #include <asm/msr.h> 16 #include <asm/bugs.h> 17 #include <asm/cpu.h> 18 #include <asm/intel-family.h> 19 #include <asm/microcode_intel.h> 20 #include <asm/hwcap2.h> 21 #include <asm/elf.h> 22 #include <asm/cpu_device_id.h> 23 #include <asm/cmdline.h> 24 #include <asm/traps.h> 25 #include <asm/resctrl.h> 26 #include <asm/numa.h> 27 #include <asm/thermal.h> 28 29 #ifdef CONFIG_X86_64 30 #include <linux/topology.h> 31 #endif 32 33 #include "cpu.h" 34 35 #ifdef CONFIG_X86_LOCAL_APIC 36 #include <asm/mpspec.h> 37 #include <asm/apic.h> 38 #endif 39 40 enum split_lock_detect_state { 41 sld_off = 0, 42 sld_warn, 43 sld_fatal, 44 }; 45 46 /* 47 * Default to sld_off because most systems do not support split lock detection. 48 * sld_state_setup() will switch this to sld_warn on systems that support 49 * split lock/bus lock detect, unless there is a command line override. 50 */ 51 static enum split_lock_detect_state sld_state __ro_after_init = sld_off; 52 static u64 msr_test_ctrl_cache __ro_after_init; 53 54 /* 55 * With a name like MSR_TEST_CTL it should go without saying, but don't touch 56 * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it 57 * on CPUs that do not support SLD can cause fireworks, even when writing '0'. 58 */ 59 static bool cpu_model_supports_sld __ro_after_init; 60 61 /* 62 * Processors which have self-snooping capability can handle conflicting 63 * memory type across CPUs by snooping its own cache. However, there exists 64 * CPU models in which having conflicting memory types still leads to 65 * unpredictable behavior, machine check errors, or hangs. Clear this 66 * feature to prevent its use on machines with known erratas. 67 */ 68 static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c) 69 { 70 switch (c->x86_model) { 71 case INTEL_FAM6_CORE_YONAH: 72 case INTEL_FAM6_CORE2_MEROM: 73 case INTEL_FAM6_CORE2_MEROM_L: 74 case INTEL_FAM6_CORE2_PENRYN: 75 case INTEL_FAM6_CORE2_DUNNINGTON: 76 case INTEL_FAM6_NEHALEM: 77 case INTEL_FAM6_NEHALEM_G: 78 case INTEL_FAM6_NEHALEM_EP: 79 case INTEL_FAM6_NEHALEM_EX: 80 case INTEL_FAM6_WESTMERE: 81 case INTEL_FAM6_WESTMERE_EP: 82 case INTEL_FAM6_SANDYBRIDGE: 83 setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP); 84 } 85 } 86 87 static bool ring3mwait_disabled __read_mostly; 88 89 static int __init ring3mwait_disable(char *__unused) 90 { 91 ring3mwait_disabled = true; 92 return 0; 93 } 94 __setup("ring3mwait=disable", ring3mwait_disable); 95 96 static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) 97 { 98 /* 99 * Ring 3 MONITOR/MWAIT feature cannot be detected without 100 * cpu model and family comparison. 101 */ 102 if (c->x86 != 6) 103 return; 104 switch (c->x86_model) { 105 case INTEL_FAM6_XEON_PHI_KNL: 106 case INTEL_FAM6_XEON_PHI_KNM: 107 break; 108 default: 109 return; 110 } 111 112 if (ring3mwait_disabled) 113 return; 114 115 set_cpu_cap(c, X86_FEATURE_RING3MWAIT); 116 this_cpu_or(msr_misc_features_shadow, 117 1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT); 118 119 if (c == &boot_cpu_data) 120 ELF_HWCAP2 |= HWCAP2_RING3MWAIT; 121 } 122 123 /* 124 * Early microcode releases for the Spectre v2 mitigation were broken. 125 * Information taken from; 126 * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf 127 * - https://kb.vmware.com/s/article/52345 128 * - Microcode revisions observed in the wild 129 * - Release note from 20180108 microcode release 130 */ 131 struct sku_microcode { 132 u8 model; 133 u8 stepping; 134 u32 microcode; 135 }; 136 static const struct sku_microcode spectre_bad_microcodes[] = { 137 { INTEL_FAM6_KABYLAKE, 0x0B, 0x80 }, 138 { INTEL_FAM6_KABYLAKE, 0x0A, 0x80 }, 139 { INTEL_FAM6_KABYLAKE, 0x09, 0x80 }, 140 { INTEL_FAM6_KABYLAKE_L, 0x0A, 0x80 }, 141 { INTEL_FAM6_KABYLAKE_L, 0x09, 0x80 }, 142 { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, 143 { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, 144 { INTEL_FAM6_BROADWELL, 0x04, 0x28 }, 145 { INTEL_FAM6_BROADWELL_G, 0x01, 0x1b }, 146 { INTEL_FAM6_BROADWELL_D, 0x02, 0x14 }, 147 { INTEL_FAM6_BROADWELL_D, 0x03, 0x07000011 }, 148 { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, 149 { INTEL_FAM6_HASWELL_L, 0x01, 0x21 }, 150 { INTEL_FAM6_HASWELL_G, 0x01, 0x18 }, 151 { INTEL_FAM6_HASWELL, 0x03, 0x23 }, 152 { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, 153 { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, 154 { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, 155 /* Observed in the wild */ 156 { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, 157 { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, 158 }; 159 160 static bool bad_spectre_microcode(struct cpuinfo_x86 *c) 161 { 162 int i; 163 164 /* 165 * We know that the hypervisor lie to us on the microcode version so 166 * we may as well hope that it is running the correct version. 167 */ 168 if (cpu_has(c, X86_FEATURE_HYPERVISOR)) 169 return false; 170 171 if (c->x86 != 6) 172 return false; 173 174 for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { 175 if (c->x86_model == spectre_bad_microcodes[i].model && 176 c->x86_stepping == spectre_bad_microcodes[i].stepping) 177 return (c->microcode <= spectre_bad_microcodes[i].microcode); 178 } 179 return false; 180 } 181 182 static void early_init_intel(struct cpuinfo_x86 *c) 183 { 184 u64 misc_enable; 185 186 /* Unmask CPUID levels if masked: */ 187 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { 188 if (msr_clear_bit(MSR_IA32_MISC_ENABLE, 189 MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) { 190 c->cpuid_level = cpuid_eax(0); 191 get_cpu_cap(c); 192 } 193 } 194 195 if ((c->x86 == 0xf && c->x86_model >= 0x03) || 196 (c->x86 == 0x6 && c->x86_model >= 0x0e)) 197 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 198 199 if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) 200 c->microcode = intel_get_microcode_revision(); 201 202 /* Now if any of them are set, check the blacklist and clear the lot */ 203 if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || 204 cpu_has(c, X86_FEATURE_INTEL_STIBP) || 205 cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || 206 cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { 207 pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); 208 setup_clear_cpu_cap(X86_FEATURE_IBRS); 209 setup_clear_cpu_cap(X86_FEATURE_IBPB); 210 setup_clear_cpu_cap(X86_FEATURE_STIBP); 211 setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); 212 setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); 213 setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); 214 setup_clear_cpu_cap(X86_FEATURE_SSBD); 215 setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD); 216 } 217 218 /* 219 * Atom erratum AAE44/AAF40/AAG38/AAH41: 220 * 221 * A race condition between speculative fetches and invalidating 222 * a large page. This is worked around in microcode, but we 223 * need the microcode to have already been loaded... so if it is 224 * not, recommend a BIOS update and disable large pages. 225 */ 226 if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 && 227 c->microcode < 0x20e) { 228 pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); 229 clear_cpu_cap(c, X86_FEATURE_PSE); 230 } 231 232 #ifdef CONFIG_X86_64 233 set_cpu_cap(c, X86_FEATURE_SYSENTER32); 234 #else 235 /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ 236 if (c->x86 == 15 && c->x86_cache_alignment == 64) 237 c->x86_cache_alignment = 128; 238 #endif 239 240 /* CPUID workaround for 0F33/0F34 CPU */ 241 if (c->x86 == 0xF && c->x86_model == 0x3 242 && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) 243 c->x86_phys_bits = 36; 244 245 /* 246 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate 247 * with P/T states and does not stop in deep C-states. 248 * 249 * It is also reliable across cores and sockets. (but not across 250 * cabinets - we turn it off in that case explicitly.) 251 */ 252 if (c->x86_power & (1 << 8)) { 253 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 254 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 255 } 256 257 /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ 258 if (c->x86 == 6) { 259 switch (c->x86_model) { 260 case INTEL_FAM6_ATOM_SALTWELL_MID: 261 case INTEL_FAM6_ATOM_SALTWELL_TABLET: 262 case INTEL_FAM6_ATOM_SILVERMONT_MID: 263 case INTEL_FAM6_ATOM_AIRMONT_NP: 264 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); 265 break; 266 default: 267 break; 268 } 269 } 270 271 /* 272 * There is a known erratum on Pentium III and Core Solo 273 * and Core Duo CPUs. 274 * " Page with PAT set to WC while associated MTRR is UC 275 * may consolidate to UC " 276 * Because of this erratum, it is better to stick with 277 * setting WC in MTRR rather than using PAT on these CPUs. 278 * 279 * Enable PAT WC only on P4, Core 2 or later CPUs. 280 */ 281 if (c->x86 == 6 && c->x86_model < 15) 282 clear_cpu_cap(c, X86_FEATURE_PAT); 283 284 /* 285 * If fast string is not enabled in IA32_MISC_ENABLE for any reason, 286 * clear the fast string and enhanced fast string CPU capabilities. 287 */ 288 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { 289 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); 290 if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { 291 pr_info("Disabled fast string operations\n"); 292 setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); 293 setup_clear_cpu_cap(X86_FEATURE_ERMS); 294 } 295 } 296 297 /* 298 * Intel Quark Core DevMan_001.pdf section 6.4.11 299 * "The operating system also is required to invalidate (i.e., flush) 300 * the TLB when any changes are made to any of the page table entries. 301 * The operating system must reload CR3 to cause the TLB to be flushed" 302 * 303 * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h 304 * should be false so that __flush_tlb_all() causes CR3 instead of CR4.PGE 305 * to be modified. 306 */ 307 if (c->x86 == 5 && c->x86_model == 9) { 308 pr_info("Disabling PGE capability bit\n"); 309 setup_clear_cpu_cap(X86_FEATURE_PGE); 310 } 311 312 if (c->cpuid_level >= 0x00000001) { 313 u32 eax, ebx, ecx, edx; 314 315 cpuid(0x00000001, &eax, &ebx, &ecx, &edx); 316 /* 317 * If HTT (EDX[28]) is set EBX[16:23] contain the number of 318 * apicids which are reserved per package. Store the resulting 319 * shift value for the package management code. 320 */ 321 if (edx & (1U << 28)) 322 c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); 323 } 324 325 check_memory_type_self_snoop_errata(c); 326 327 /* 328 * Get the number of SMT siblings early from the extended topology 329 * leaf, if available. Otherwise try the legacy SMT detection. 330 */ 331 if (detect_extended_topology_early(c) < 0) 332 detect_ht_early(c); 333 } 334 335 static void bsp_init_intel(struct cpuinfo_x86 *c) 336 { 337 resctrl_cpu_detect(c); 338 } 339 340 #ifdef CONFIG_X86_32 341 /* 342 * Early probe support logic for ppro memory erratum #50 343 * 344 * This is called before we do cpu ident work 345 */ 346 347 int ppro_with_ram_bug(void) 348 { 349 /* Uses data from early_cpu_detect now */ 350 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 351 boot_cpu_data.x86 == 6 && 352 boot_cpu_data.x86_model == 1 && 353 boot_cpu_data.x86_stepping < 8) { 354 pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); 355 return 1; 356 } 357 return 0; 358 } 359 360 static void intel_smp_check(struct cpuinfo_x86 *c) 361 { 362 /* calling is from identify_secondary_cpu() ? */ 363 if (!c->cpu_index) 364 return; 365 366 /* 367 * Mask B, Pentium, but not Pentium MMX 368 */ 369 if (c->x86 == 5 && 370 c->x86_stepping >= 1 && c->x86_stepping <= 4 && 371 c->x86_model <= 3) { 372 /* 373 * Remember we have B step Pentia with bugs 374 */ 375 WARN_ONCE(1, "WARNING: SMP operation may be unreliable" 376 "with B stepping processors.\n"); 377 } 378 } 379 380 static int forcepae; 381 static int __init forcepae_setup(char *__unused) 382 { 383 forcepae = 1; 384 return 1; 385 } 386 __setup("forcepae", forcepae_setup); 387 388 static void intel_workarounds(struct cpuinfo_x86 *c) 389 { 390 #ifdef CONFIG_X86_F00F_BUG 391 /* 392 * All models of Pentium and Pentium with MMX technology CPUs 393 * have the F0 0F bug, which lets nonprivileged users lock up the 394 * system. Announce that the fault handler will be checking for it. 395 * The Quark is also family 5, but does not have the same bug. 396 */ 397 clear_cpu_bug(c, X86_BUG_F00F); 398 if (c->x86 == 5 && c->x86_model < 9) { 399 static int f00f_workaround_enabled; 400 401 set_cpu_bug(c, X86_BUG_F00F); 402 if (!f00f_workaround_enabled) { 403 pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n"); 404 f00f_workaround_enabled = 1; 405 } 406 } 407 #endif 408 409 /* 410 * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until 411 * model 3 mask 3 412 */ 413 if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633) 414 clear_cpu_cap(c, X86_FEATURE_SEP); 415 416 /* 417 * PAE CPUID issue: many Pentium M report no PAE but may have a 418 * functionally usable PAE implementation. 419 * Forcefully enable PAE if kernel parameter "forcepae" is present. 420 */ 421 if (forcepae) { 422 pr_warn("PAE forced!\n"); 423 set_cpu_cap(c, X86_FEATURE_PAE); 424 add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); 425 } 426 427 /* 428 * P4 Xeon erratum 037 workaround. 429 * Hardware prefetcher may cause stale data to be loaded into the cache. 430 */ 431 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) { 432 if (msr_set_bit(MSR_IA32_MISC_ENABLE, 433 MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { 434 pr_info("CPU: C0 stepping P4 Xeon detected.\n"); 435 pr_info("CPU: Disabling hardware prefetching (Erratum 037)\n"); 436 } 437 } 438 439 /* 440 * See if we have a good local APIC by checking for buggy Pentia, 441 * i.e. all B steppings and the C2 stepping of P54C when using their 442 * integrated APIC (see 11AP erratum in "Pentium Processor 443 * Specification Update"). 444 */ 445 if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && 446 (c->x86_stepping < 0x6 || c->x86_stepping == 0xb)) 447 set_cpu_bug(c, X86_BUG_11AP); 448 449 450 #ifdef CONFIG_X86_INTEL_USERCOPY 451 /* 452 * Set up the preferred alignment for movsl bulk memory moves 453 */ 454 switch (c->x86) { 455 case 4: /* 486: untested */ 456 break; 457 case 5: /* Old Pentia: untested */ 458 break; 459 case 6: /* PII/PIII only like movsl with 8-byte alignment */ 460 movsl_mask.mask = 7; 461 break; 462 case 15: /* P4 is OK down to 8-byte alignment */ 463 movsl_mask.mask = 7; 464 break; 465 } 466 #endif 467 468 intel_smp_check(c); 469 } 470 #else 471 static void intel_workarounds(struct cpuinfo_x86 *c) 472 { 473 } 474 #endif 475 476 static void srat_detect_node(struct cpuinfo_x86 *c) 477 { 478 #ifdef CONFIG_NUMA 479 unsigned node; 480 int cpu = smp_processor_id(); 481 482 /* Don't do the funky fallback heuristics the AMD version employs 483 for now. */ 484 node = numa_cpu_node(cpu); 485 if (node == NUMA_NO_NODE || !node_online(node)) { 486 /* reuse the value from init_cpu_to_node() */ 487 node = cpu_to_node(cpu); 488 } 489 numa_set_node(cpu, node); 490 #endif 491 } 492 493 #define MSR_IA32_TME_ACTIVATE 0x982 494 495 /* Helpers to access TME_ACTIVATE MSR */ 496 #define TME_ACTIVATE_LOCKED(x) (x & 0x1) 497 #define TME_ACTIVATE_ENABLED(x) (x & 0x2) 498 499 #define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ 500 #define TME_ACTIVATE_POLICY_AES_XTS_128 0 501 502 #define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ 503 504 #define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ 505 #define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 506 507 /* Values for mktme_status (SW only construct) */ 508 #define MKTME_ENABLED 0 509 #define MKTME_DISABLED 1 510 #define MKTME_UNINITIALIZED 2 511 static int mktme_status = MKTME_UNINITIALIZED; 512 513 static void detect_tme(struct cpuinfo_x86 *c) 514 { 515 u64 tme_activate, tme_policy, tme_crypto_algs; 516 int keyid_bits = 0, nr_keyids = 0; 517 static u64 tme_activate_cpu0 = 0; 518 519 rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); 520 521 if (mktme_status != MKTME_UNINITIALIZED) { 522 if (tme_activate != tme_activate_cpu0) { 523 /* Broken BIOS? */ 524 pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); 525 pr_err_once("x86/tme: MKTME is not usable\n"); 526 mktme_status = MKTME_DISABLED; 527 528 /* Proceed. We may need to exclude bits from x86_phys_bits. */ 529 } 530 } else { 531 tme_activate_cpu0 = tme_activate; 532 } 533 534 if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { 535 pr_info_once("x86/tme: not enabled by BIOS\n"); 536 mktme_status = MKTME_DISABLED; 537 return; 538 } 539 540 if (mktme_status != MKTME_UNINITIALIZED) 541 goto detect_keyid_bits; 542 543 pr_info("x86/tme: enabled by BIOS\n"); 544 545 tme_policy = TME_ACTIVATE_POLICY(tme_activate); 546 if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) 547 pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); 548 549 tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); 550 if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { 551 pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", 552 tme_crypto_algs); 553 mktme_status = MKTME_DISABLED; 554 } 555 detect_keyid_bits: 556 keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); 557 nr_keyids = (1UL << keyid_bits) - 1; 558 if (nr_keyids) { 559 pr_info_once("x86/mktme: enabled by BIOS\n"); 560 pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); 561 } else { 562 pr_info_once("x86/mktme: disabled by BIOS\n"); 563 } 564 565 if (mktme_status == MKTME_UNINITIALIZED) { 566 /* MKTME is usable */ 567 mktme_status = MKTME_ENABLED; 568 } 569 570 /* 571 * KeyID bits effectively lower the number of physical address 572 * bits. Update cpuinfo_x86::x86_phys_bits accordingly. 573 */ 574 c->x86_phys_bits -= keyid_bits; 575 } 576 577 static void init_cpuid_fault(struct cpuinfo_x86 *c) 578 { 579 u64 msr; 580 581 if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) { 582 if (msr & MSR_PLATFORM_INFO_CPUID_FAULT) 583 set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); 584 } 585 } 586 587 static void init_intel_misc_features(struct cpuinfo_x86 *c) 588 { 589 u64 msr; 590 591 if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr)) 592 return; 593 594 /* Clear all MISC features */ 595 this_cpu_write(msr_misc_features_shadow, 0); 596 597 /* Check features and update capabilities and shadow control bits */ 598 init_cpuid_fault(c); 599 probe_xeon_phi_r3mwait(c); 600 601 msr = this_cpu_read(msr_misc_features_shadow); 602 wrmsrl(MSR_MISC_FEATURES_ENABLES, msr); 603 } 604 605 static void split_lock_init(void); 606 static void bus_lock_init(void); 607 608 static void init_intel(struct cpuinfo_x86 *c) 609 { 610 early_init_intel(c); 611 612 intel_workarounds(c); 613 614 /* 615 * Detect the extended topology information if available. This 616 * will reinitialise the initial_apicid which will be used 617 * in init_intel_cacheinfo() 618 */ 619 detect_extended_topology(c); 620 621 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { 622 /* 623 * let's use the legacy cpuid vector 0x1 and 0x4 for topology 624 * detection. 625 */ 626 detect_num_cpu_cores(c); 627 #ifdef CONFIG_X86_32 628 detect_ht(c); 629 #endif 630 } 631 632 init_intel_cacheinfo(c); 633 634 if (c->cpuid_level > 9) { 635 unsigned eax = cpuid_eax(10); 636 /* Check for version and the number of counters */ 637 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) 638 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); 639 } 640 641 if (cpu_has(c, X86_FEATURE_XMM2)) 642 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); 643 644 if (boot_cpu_has(X86_FEATURE_DS)) { 645 unsigned int l1, l2; 646 647 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); 648 if (!(l1 & (1<<11))) 649 set_cpu_cap(c, X86_FEATURE_BTS); 650 if (!(l1 & (1<<12))) 651 set_cpu_cap(c, X86_FEATURE_PEBS); 652 } 653 654 if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) && 655 (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) 656 set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); 657 658 if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) && 659 ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT))) 660 set_cpu_bug(c, X86_BUG_MONITOR); 661 662 #ifdef CONFIG_X86_64 663 if (c->x86 == 15) 664 c->x86_cache_alignment = c->x86_clflush_size * 2; 665 if (c->x86 == 6) 666 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 667 #else 668 /* 669 * Names for the Pentium II/Celeron processors 670 * detectable only by also checking the cache size. 671 * Dixon is NOT a Celeron. 672 */ 673 if (c->x86 == 6) { 674 unsigned int l2 = c->x86_cache_size; 675 char *p = NULL; 676 677 switch (c->x86_model) { 678 case 5: 679 if (l2 == 0) 680 p = "Celeron (Covington)"; 681 else if (l2 == 256) 682 p = "Mobile Pentium II (Dixon)"; 683 break; 684 685 case 6: 686 if (l2 == 128) 687 p = "Celeron (Mendocino)"; 688 else if (c->x86_stepping == 0 || c->x86_stepping == 5) 689 p = "Celeron-A"; 690 break; 691 692 case 8: 693 if (l2 == 128) 694 p = "Celeron (Coppermine)"; 695 break; 696 } 697 698 if (p) 699 strcpy(c->x86_model_id, p); 700 } 701 702 if (c->x86 == 15) 703 set_cpu_cap(c, X86_FEATURE_P4); 704 if (c->x86 == 6) 705 set_cpu_cap(c, X86_FEATURE_P3); 706 #endif 707 708 /* Work around errata */ 709 srat_detect_node(c); 710 711 init_ia32_feat_ctl(c); 712 713 if (cpu_has(c, X86_FEATURE_TME)) 714 detect_tme(c); 715 716 init_intel_misc_features(c); 717 718 if (tsx_ctrl_state == TSX_CTRL_ENABLE) 719 tsx_enable(); 720 if (tsx_ctrl_state == TSX_CTRL_DISABLE) 721 tsx_disable(); 722 723 split_lock_init(); 724 bus_lock_init(); 725 726 intel_init_thermal(c); 727 } 728 729 #ifdef CONFIG_X86_32 730 static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) 731 { 732 /* 733 * Intel PIII Tualatin. This comes in two flavours. 734 * One has 256kb of cache, the other 512. We have no way 735 * to determine which, so we use a boottime override 736 * for the 512kb model, and assume 256 otherwise. 737 */ 738 if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) 739 size = 256; 740 741 /* 742 * Intel Quark SoC X1000 contains a 4-way set associative 743 * 16K cache with a 16 byte cache line and 256 lines per tag 744 */ 745 if ((c->x86 == 5) && (c->x86_model == 9)) 746 size = 16; 747 return size; 748 } 749 #endif 750 751 #define TLB_INST_4K 0x01 752 #define TLB_INST_4M 0x02 753 #define TLB_INST_2M_4M 0x03 754 755 #define TLB_INST_ALL 0x05 756 #define TLB_INST_1G 0x06 757 758 #define TLB_DATA_4K 0x11 759 #define TLB_DATA_4M 0x12 760 #define TLB_DATA_2M_4M 0x13 761 #define TLB_DATA_4K_4M 0x14 762 763 #define TLB_DATA_1G 0x16 764 765 #define TLB_DATA0_4K 0x21 766 #define TLB_DATA0_4M 0x22 767 #define TLB_DATA0_2M_4M 0x23 768 769 #define STLB_4K 0x41 770 #define STLB_4K_2M 0x42 771 772 static const struct _tlb_table intel_tlb_table[] = { 773 { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, 774 { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" }, 775 { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" }, 776 { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" }, 777 { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" }, 778 { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" }, 779 { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages" }, 780 { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, 781 { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, 782 { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, 783 { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, 784 { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" }, 785 { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" }, 786 { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" }, 787 { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" }, 788 { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, 789 { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, 790 { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, 791 { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" }, 792 { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" }, 793 { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" }, 794 { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" }, 795 { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" }, 796 { 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, 797 { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, 798 { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, 799 { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, 800 { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, 801 { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, 802 { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set associative" }, 803 { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set associative" }, 804 { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, 805 { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, 806 { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, 807 { 0xc2, TLB_DATA_2M_4M, 16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" }, 808 { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, 809 { 0x00, 0, 0 } 810 }; 811 812 static void intel_tlb_lookup(const unsigned char desc) 813 { 814 unsigned char k; 815 if (desc == 0) 816 return; 817 818 /* look up this descriptor in the table */ 819 for (k = 0; intel_tlb_table[k].descriptor != desc && 820 intel_tlb_table[k].descriptor != 0; k++) 821 ; 822 823 if (intel_tlb_table[k].tlb_type == 0) 824 return; 825 826 switch (intel_tlb_table[k].tlb_type) { 827 case STLB_4K: 828 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) 829 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; 830 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) 831 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; 832 break; 833 case STLB_4K_2M: 834 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) 835 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; 836 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) 837 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; 838 if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) 839 tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; 840 if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) 841 tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; 842 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) 843 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; 844 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) 845 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; 846 break; 847 case TLB_INST_ALL: 848 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) 849 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; 850 if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) 851 tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; 852 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) 853 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; 854 break; 855 case TLB_INST_4K: 856 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) 857 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; 858 break; 859 case TLB_INST_4M: 860 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) 861 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; 862 break; 863 case TLB_INST_2M_4M: 864 if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) 865 tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; 866 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) 867 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; 868 break; 869 case TLB_DATA_4K: 870 case TLB_DATA0_4K: 871 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) 872 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; 873 break; 874 case TLB_DATA_4M: 875 case TLB_DATA0_4M: 876 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) 877 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; 878 break; 879 case TLB_DATA_2M_4M: 880 case TLB_DATA0_2M_4M: 881 if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) 882 tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; 883 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) 884 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; 885 break; 886 case TLB_DATA_4K_4M: 887 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) 888 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; 889 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) 890 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; 891 break; 892 case TLB_DATA_1G: 893 if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries) 894 tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries; 895 break; 896 } 897 } 898 899 static void intel_detect_tlb(struct cpuinfo_x86 *c) 900 { 901 int i, j, n; 902 unsigned int regs[4]; 903 unsigned char *desc = (unsigned char *)regs; 904 905 if (c->cpuid_level < 2) 906 return; 907 908 /* Number of times to iterate */ 909 n = cpuid_eax(2) & 0xFF; 910 911 for (i = 0 ; i < n ; i++) { 912 cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); 913 914 /* If bit 31 is set, this is an unknown format */ 915 for (j = 0 ; j < 3 ; j++) 916 if (regs[j] & (1 << 31)) 917 regs[j] = 0; 918 919 /* Byte 0 is level count, not a descriptor */ 920 for (j = 1 ; j < 16 ; j++) 921 intel_tlb_lookup(desc[j]); 922 } 923 } 924 925 static const struct cpu_dev intel_cpu_dev = { 926 .c_vendor = "Intel", 927 .c_ident = { "GenuineIntel" }, 928 #ifdef CONFIG_X86_32 929 .legacy_models = { 930 { .family = 4, .model_names = 931 { 932 [0] = "486 DX-25/33", 933 [1] = "486 DX-50", 934 [2] = "486 SX", 935 [3] = "486 DX/2", 936 [4] = "486 SL", 937 [5] = "486 SX/2", 938 [7] = "486 DX/2-WB", 939 [8] = "486 DX/4", 940 [9] = "486 DX/4-WB" 941 } 942 }, 943 { .family = 5, .model_names = 944 { 945 [0] = "Pentium 60/66 A-step", 946 [1] = "Pentium 60/66", 947 [2] = "Pentium 75 - 200", 948 [3] = "OverDrive PODP5V83", 949 [4] = "Pentium MMX", 950 [7] = "Mobile Pentium 75 - 200", 951 [8] = "Mobile Pentium MMX", 952 [9] = "Quark SoC X1000", 953 } 954 }, 955 { .family = 6, .model_names = 956 { 957 [0] = "Pentium Pro A-step", 958 [1] = "Pentium Pro", 959 [3] = "Pentium II (Klamath)", 960 [4] = "Pentium II (Deschutes)", 961 [5] = "Pentium II (Deschutes)", 962 [6] = "Mobile Pentium II", 963 [7] = "Pentium III (Katmai)", 964 [8] = "Pentium III (Coppermine)", 965 [10] = "Pentium III (Cascades)", 966 [11] = "Pentium III (Tualatin)", 967 } 968 }, 969 { .family = 15, .model_names = 970 { 971 [0] = "Pentium 4 (Unknown)", 972 [1] = "Pentium 4 (Willamette)", 973 [2] = "Pentium 4 (Northwood)", 974 [4] = "Pentium 4 (Foster)", 975 [5] = "Pentium 4 (Foster)", 976 } 977 }, 978 }, 979 .legacy_cache_size = intel_size_cache, 980 #endif 981 .c_detect_tlb = intel_detect_tlb, 982 .c_early_init = early_init_intel, 983 .c_bsp_init = bsp_init_intel, 984 .c_init = init_intel, 985 .c_x86_vendor = X86_VENDOR_INTEL, 986 }; 987 988 cpu_dev_register(intel_cpu_dev); 989 990 #undef pr_fmt 991 #define pr_fmt(fmt) "x86/split lock detection: " fmt 992 993 static const struct { 994 const char *option; 995 enum split_lock_detect_state state; 996 } sld_options[] __initconst = { 997 { "off", sld_off }, 998 { "warn", sld_warn }, 999 { "fatal", sld_fatal }, 1000 }; 1001 1002 static inline bool match_option(const char *arg, int arglen, const char *opt) 1003 { 1004 int len = strlen(opt); 1005 1006 return len == arglen && !strncmp(arg, opt, len); 1007 } 1008 1009 static bool split_lock_verify_msr(bool on) 1010 { 1011 u64 ctrl, tmp; 1012 1013 if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl)) 1014 return false; 1015 if (on) 1016 ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT; 1017 else 1018 ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT; 1019 if (wrmsrl_safe(MSR_TEST_CTRL, ctrl)) 1020 return false; 1021 rdmsrl(MSR_TEST_CTRL, tmp); 1022 return ctrl == tmp; 1023 } 1024 1025 static void __init sld_state_setup(void) 1026 { 1027 enum split_lock_detect_state state = sld_warn; 1028 char arg[20]; 1029 int i, ret; 1030 1031 if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && 1032 !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) 1033 return; 1034 1035 ret = cmdline_find_option(boot_command_line, "split_lock_detect", 1036 arg, sizeof(arg)); 1037 if (ret >= 0) { 1038 for (i = 0; i < ARRAY_SIZE(sld_options); i++) { 1039 if (match_option(arg, ret, sld_options[i].option)) { 1040 state = sld_options[i].state; 1041 break; 1042 } 1043 } 1044 } 1045 sld_state = state; 1046 } 1047 1048 static void __init __split_lock_setup(void) 1049 { 1050 if (!split_lock_verify_msr(false)) { 1051 pr_info("MSR access failed: Disabled\n"); 1052 return; 1053 } 1054 1055 rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); 1056 1057 if (!split_lock_verify_msr(true)) { 1058 pr_info("MSR access failed: Disabled\n"); 1059 return; 1060 } 1061 1062 /* Restore the MSR to its cached value. */ 1063 wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); 1064 1065 setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT); 1066 } 1067 1068 /* 1069 * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking 1070 * is not implemented as one thread could undo the setting of the other 1071 * thread immediately after dropping the lock anyway. 1072 */ 1073 static void sld_update_msr(bool on) 1074 { 1075 u64 test_ctrl_val = msr_test_ctrl_cache; 1076 1077 if (on) 1078 test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT; 1079 1080 wrmsrl(MSR_TEST_CTRL, test_ctrl_val); 1081 } 1082 1083 static void split_lock_init(void) 1084 { 1085 if (cpu_model_supports_sld) 1086 split_lock_verify_msr(sld_state != sld_off); 1087 } 1088 1089 static void split_lock_warn(unsigned long ip) 1090 { 1091 pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n", 1092 current->comm, current->pid, ip); 1093 1094 /* 1095 * Disable the split lock detection for this task so it can make 1096 * progress and set TIF_SLD so the detection is re-enabled via 1097 * switch_to_sld() when the task is scheduled out. 1098 */ 1099 sld_update_msr(false); 1100 set_tsk_thread_flag(current, TIF_SLD); 1101 } 1102 1103 bool handle_guest_split_lock(unsigned long ip) 1104 { 1105 if (sld_state == sld_warn) { 1106 split_lock_warn(ip); 1107 return true; 1108 } 1109 1110 pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n", 1111 current->comm, current->pid, 1112 sld_state == sld_fatal ? "fatal" : "bogus", ip); 1113 1114 current->thread.error_code = 0; 1115 current->thread.trap_nr = X86_TRAP_AC; 1116 force_sig_fault(SIGBUS, BUS_ADRALN, NULL); 1117 return false; 1118 } 1119 EXPORT_SYMBOL_GPL(handle_guest_split_lock); 1120 1121 static void bus_lock_init(void) 1122 { 1123 u64 val; 1124 1125 /* 1126 * Warn and fatal are handled by #AC for split lock if #AC for 1127 * split lock is supported. 1128 */ 1129 if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) || 1130 (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && 1131 (sld_state == sld_warn || sld_state == sld_fatal)) || 1132 sld_state == sld_off) 1133 return; 1134 1135 /* 1136 * Enable #DB for bus lock. All bus locks are handled in #DB except 1137 * split locks are handled in #AC in the fatal case. 1138 */ 1139 rdmsrl(MSR_IA32_DEBUGCTLMSR, val); 1140 val |= DEBUGCTLMSR_BUS_LOCK_DETECT; 1141 wrmsrl(MSR_IA32_DEBUGCTLMSR, val); 1142 } 1143 1144 bool handle_user_split_lock(struct pt_regs *regs, long error_code) 1145 { 1146 if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal) 1147 return false; 1148 split_lock_warn(regs->ip); 1149 return true; 1150 } 1151 1152 void handle_bus_lock(struct pt_regs *regs) 1153 { 1154 switch (sld_state) { 1155 case sld_off: 1156 break; 1157 case sld_warn: 1158 pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n", 1159 current->comm, current->pid, regs->ip); 1160 break; 1161 case sld_fatal: 1162 force_sig_fault(SIGBUS, BUS_ADRALN, NULL); 1163 break; 1164 } 1165 } 1166 1167 /* 1168 * This function is called only when switching between tasks with 1169 * different split-lock detection modes. It sets the MSR for the 1170 * mode of the new task. This is right most of the time, but since 1171 * the MSR is shared by hyperthreads on a physical core there can 1172 * be glitches when the two threads need different modes. 1173 */ 1174 void switch_to_sld(unsigned long tifn) 1175 { 1176 sld_update_msr(!(tifn & _TIF_SLD)); 1177 } 1178 1179 /* 1180 * Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should 1181 * only be trusted if it is confirmed that a CPU model implements a 1182 * specific feature at a particular bit position. 1183 * 1184 * The possible driver data field values: 1185 * 1186 * - 0: CPU models that are known to have the per-core split-lock detection 1187 * feature even though they do not enumerate IA32_CORE_CAPABILITIES. 1188 * 1189 * - 1: CPU models which may enumerate IA32_CORE_CAPABILITIES and if so use 1190 * bit 5 to enumerate the per-core split-lock detection feature. 1191 */ 1192 static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { 1193 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), 1194 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), 1195 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), 1196 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1), 1197 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1), 1198 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1), 1199 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, 1), 1200 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1), 1201 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1), 1202 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1), 1203 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1), 1204 {} 1205 }; 1206 1207 static void __init split_lock_setup(struct cpuinfo_x86 *c) 1208 { 1209 const struct x86_cpu_id *m; 1210 u64 ia32_core_caps; 1211 1212 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 1213 return; 1214 1215 m = x86_match_cpu(split_lock_cpu_ids); 1216 if (!m) 1217 return; 1218 1219 switch (m->driver_data) { 1220 case 0: 1221 break; 1222 case 1: 1223 if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) 1224 return; 1225 rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); 1226 if (!(ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)) 1227 return; 1228 break; 1229 default: 1230 return; 1231 } 1232 1233 cpu_model_supports_sld = true; 1234 __split_lock_setup(); 1235 } 1236 1237 static void sld_state_show(void) 1238 { 1239 if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && 1240 !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) 1241 return; 1242 1243 switch (sld_state) { 1244 case sld_off: 1245 pr_info("disabled\n"); 1246 break; 1247 case sld_warn: 1248 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) 1249 pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n"); 1250 else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) 1251 pr_info("#DB: warning on user-space bus_locks\n"); 1252 break; 1253 case sld_fatal: 1254 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { 1255 pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n"); 1256 } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { 1257 pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n", 1258 boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ? 1259 " from non-WB" : ""); 1260 } 1261 break; 1262 } 1263 } 1264 1265 void __init sld_setup(struct cpuinfo_x86 *c) 1266 { 1267 split_lock_setup(c); 1268 sld_state_setup(); 1269 sld_state_show(); 1270 } 1271 1272 #define X86_HYBRID_CPU_TYPE_ID_SHIFT 24 1273 1274 /** 1275 * get_this_hybrid_cpu_type() - Get the type of this hybrid CPU 1276 * 1277 * Returns the CPU type [31:24] (i.e., Atom or Core) of a CPU in 1278 * a hybrid processor. If the processor is not hybrid, returns 0. 1279 */ 1280 u8 get_this_hybrid_cpu_type(void) 1281 { 1282 if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) 1283 return 0; 1284 1285 return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT; 1286 } 1287