1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Routines to identify caches on Intel CPU. 4 * 5 * Changes: 6 * Venkatesh Pallipadi : Adding cache identification through cpuid(4) 7 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. 8 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. 9 */ 10 11 #include <linux/slab.h> 12 #include <linux/cacheinfo.h> 13 #include <linux/cpu.h> 14 #include <linux/sched.h> 15 #include <linux/capability.h> 16 #include <linux/sysfs.h> 17 #include <linux/pci.h> 18 19 #include <asm/cpufeature.h> 20 #include <asm/cacheinfo.h> 21 #include <asm/amd_nb.h> 22 #include <asm/smp.h> 23 24 #include "cpu.h" 25 26 #define LVL_1_INST 1 27 #define LVL_1_DATA 2 28 #define LVL_2 3 29 #define LVL_3 4 30 #define LVL_TRACE 5 31 32 /* Shared last level cache maps */ 33 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); 34 35 /* Shared L2 cache maps */ 36 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); 37 38 struct _cache_table { 39 unsigned char descriptor; 40 char cache_type; 41 short size; 42 }; 43 44 #define MB(x) ((x) * 1024) 45 46 /* All the cache descriptor types we care about (no TLB or 47 trace cache entries) */ 48 49 static const struct _cache_table cache_table[] = 50 { 51 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ 52 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ 53 { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */ 54 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ 55 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ 56 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ 57 { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */ 58 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ 59 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 60 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ 61 { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */ 62 { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */ 63 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ 64 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ 65 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 66 { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ 67 { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ 68 { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 69 { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ 70 { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 71 { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */ 72 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ 73 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ 74 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ 75 { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */ 76 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */ 77 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */ 78 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */ 79 { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */ 80 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ 81 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */ 82 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ 83 { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */ 84 { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */ 85 { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */ 86 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 87 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 88 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 89 { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 90 { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ 91 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ 92 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ 93 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ 94 { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */ 95 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 96 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 97 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 98 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ 99 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */ 100 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ 101 { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */ 102 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ 103 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ 104 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */ 105 { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */ 106 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ 107 { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */ 108 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ 109 { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */ 110 { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */ 111 { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */ 112 { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */ 113 { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ 114 { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */ 115 { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ 116 { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */ 117 { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */ 118 { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ 119 { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ 120 { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */ 121 { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */ 122 { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */ 123 { 0x00, 0, 0} 124 }; 125 126 127 enum _cache_type { 128 CTYPE_NULL = 0, 129 CTYPE_DATA = 1, 130 CTYPE_INST = 2, 131 CTYPE_UNIFIED = 3 132 }; 133 134 union _cpuid4_leaf_eax { 135 struct { 136 enum _cache_type type:5; 137 unsigned int level:3; 138 unsigned int is_self_initializing:1; 139 unsigned int is_fully_associative:1; 140 unsigned int reserved:4; 141 unsigned int num_threads_sharing:12; 142 unsigned int num_cores_on_die:6; 143 } split; 144 u32 full; 145 }; 146 147 union _cpuid4_leaf_ebx { 148 struct { 149 unsigned int coherency_line_size:12; 150 unsigned int physical_line_partition:10; 151 unsigned int ways_of_associativity:10; 152 } split; 153 u32 full; 154 }; 155 156 union _cpuid4_leaf_ecx { 157 struct { 158 unsigned int number_of_sets:32; 159 } split; 160 u32 full; 161 }; 162 163 struct _cpuid4_info_regs { 164 union _cpuid4_leaf_eax eax; 165 union _cpuid4_leaf_ebx ebx; 166 union _cpuid4_leaf_ecx ecx; 167 unsigned int id; 168 unsigned long size; 169 struct amd_northbridge *nb; 170 }; 171 172 static unsigned short num_cache_leaves; 173 174 /* AMD doesn't have CPUID4. Emulate it here to report the same 175 information to the user. This makes some assumptions about the machine: 176 L2 not shared, no SMT etc. that is currently true on AMD CPUs. 177 178 In theory the TLBs could be reported as fake type (they are in "dummy"). 179 Maybe later */ 180 union l1_cache { 181 struct { 182 unsigned line_size:8; 183 unsigned lines_per_tag:8; 184 unsigned assoc:8; 185 unsigned size_in_kb:8; 186 }; 187 unsigned val; 188 }; 189 190 union l2_cache { 191 struct { 192 unsigned line_size:8; 193 unsigned lines_per_tag:4; 194 unsigned assoc:4; 195 unsigned size_in_kb:16; 196 }; 197 unsigned val; 198 }; 199 200 union l3_cache { 201 struct { 202 unsigned line_size:8; 203 unsigned lines_per_tag:4; 204 unsigned assoc:4; 205 unsigned res:2; 206 unsigned size_encoded:14; 207 }; 208 unsigned val; 209 }; 210 211 static const unsigned short assocs[] = { 212 [1] = 1, 213 [2] = 2, 214 [4] = 4, 215 [6] = 8, 216 [8] = 16, 217 [0xa] = 32, 218 [0xb] = 48, 219 [0xc] = 64, 220 [0xd] = 96, 221 [0xe] = 128, 222 [0xf] = 0xffff /* fully associative - no way to show this currently */ 223 }; 224 225 static const unsigned char levels[] = { 1, 1, 2, 3 }; 226 static const unsigned char types[] = { 1, 2, 3, 3 }; 227 228 static const enum cache_type cache_type_map[] = { 229 [CTYPE_NULL] = CACHE_TYPE_NOCACHE, 230 [CTYPE_DATA] = CACHE_TYPE_DATA, 231 [CTYPE_INST] = CACHE_TYPE_INST, 232 [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, 233 }; 234 235 static void 236 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, 237 union _cpuid4_leaf_ebx *ebx, 238 union _cpuid4_leaf_ecx *ecx) 239 { 240 unsigned dummy; 241 unsigned line_size, lines_per_tag, assoc, size_in_kb; 242 union l1_cache l1i, l1d; 243 union l2_cache l2; 244 union l3_cache l3; 245 union l1_cache *l1 = &l1d; 246 247 eax->full = 0; 248 ebx->full = 0; 249 ecx->full = 0; 250 251 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); 252 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); 253 254 switch (leaf) { 255 case 1: 256 l1 = &l1i; 257 fallthrough; 258 case 0: 259 if (!l1->val) 260 return; 261 assoc = assocs[l1->assoc]; 262 line_size = l1->line_size; 263 lines_per_tag = l1->lines_per_tag; 264 size_in_kb = l1->size_in_kb; 265 break; 266 case 2: 267 if (!l2.val) 268 return; 269 assoc = assocs[l2.assoc]; 270 line_size = l2.line_size; 271 lines_per_tag = l2.lines_per_tag; 272 /* cpu_data has errata corrections for K7 applied */ 273 size_in_kb = __this_cpu_read(cpu_info.x86_cache_size); 274 break; 275 case 3: 276 if (!l3.val) 277 return; 278 assoc = assocs[l3.assoc]; 279 line_size = l3.line_size; 280 lines_per_tag = l3.lines_per_tag; 281 size_in_kb = l3.size_encoded * 512; 282 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) { 283 size_in_kb = size_in_kb >> 1; 284 assoc = assoc >> 1; 285 } 286 break; 287 default: 288 return; 289 } 290 291 eax->split.is_self_initializing = 1; 292 eax->split.type = types[leaf]; 293 eax->split.level = levels[leaf]; 294 eax->split.num_threads_sharing = 0; 295 eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1; 296 297 298 if (assoc == 0xffff) 299 eax->split.is_fully_associative = 1; 300 ebx->split.coherency_line_size = line_size - 1; 301 ebx->split.ways_of_associativity = assoc - 1; 302 ebx->split.physical_line_partition = lines_per_tag - 1; 303 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / 304 (ebx->split.ways_of_associativity + 1) - 1; 305 } 306 307 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS) 308 309 /* 310 * L3 cache descriptors 311 */ 312 static void amd_calc_l3_indices(struct amd_northbridge *nb) 313 { 314 struct amd_l3_cache *l3 = &nb->l3_cache; 315 unsigned int sc0, sc1, sc2, sc3; 316 u32 val = 0; 317 318 pci_read_config_dword(nb->misc, 0x1C4, &val); 319 320 /* calculate subcache sizes */ 321 l3->subcaches[0] = sc0 = !(val & BIT(0)); 322 l3->subcaches[1] = sc1 = !(val & BIT(4)); 323 324 if (boot_cpu_data.x86 == 0x15) { 325 l3->subcaches[0] = sc0 += !(val & BIT(1)); 326 l3->subcaches[1] = sc1 += !(val & BIT(5)); 327 } 328 329 l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); 330 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); 331 332 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; 333 } 334 335 /* 336 * check whether a slot used for disabling an L3 index is occupied. 337 * @l3: L3 cache descriptor 338 * @slot: slot number (0..1) 339 * 340 * @returns: the disabled index if used or negative value if slot free. 341 */ 342 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) 343 { 344 unsigned int reg = 0; 345 346 pci_read_config_dword(nb->misc, 0x1BC + slot * 4, ®); 347 348 /* check whether this slot is activated already */ 349 if (reg & (3UL << 30)) 350 return reg & 0xfff; 351 352 return -1; 353 } 354 355 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf, 356 unsigned int slot) 357 { 358 int index; 359 struct amd_northbridge *nb = this_leaf->priv; 360 361 index = amd_get_l3_disable_slot(nb, slot); 362 if (index >= 0) 363 return sprintf(buf, "%d\n", index); 364 365 return sprintf(buf, "FREE\n"); 366 } 367 368 #define SHOW_CACHE_DISABLE(slot) \ 369 static ssize_t \ 370 cache_disable_##slot##_show(struct device *dev, \ 371 struct device_attribute *attr, char *buf) \ 372 { \ 373 struct cacheinfo *this_leaf = dev_get_drvdata(dev); \ 374 return show_cache_disable(this_leaf, buf, slot); \ 375 } 376 SHOW_CACHE_DISABLE(0) 377 SHOW_CACHE_DISABLE(1) 378 379 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu, 380 unsigned slot, unsigned long idx) 381 { 382 int i; 383 384 idx |= BIT(30); 385 386 /* 387 * disable index in all 4 subcaches 388 */ 389 for (i = 0; i < 4; i++) { 390 u32 reg = idx | (i << 20); 391 392 if (!nb->l3_cache.subcaches[i]) 393 continue; 394 395 pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg); 396 397 /* 398 * We need to WBINVD on a core on the node containing the L3 399 * cache which indices we disable therefore a simple wbinvd() 400 * is not sufficient. 401 */ 402 wbinvd_on_cpu(cpu); 403 404 reg |= BIT(31); 405 pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg); 406 } 407 } 408 409 /* 410 * disable a L3 cache index by using a disable-slot 411 * 412 * @l3: L3 cache descriptor 413 * @cpu: A CPU on the node containing the L3 cache 414 * @slot: slot number (0..1) 415 * @index: index to disable 416 * 417 * @return: 0 on success, error status on failure 418 */ 419 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, 420 unsigned slot, unsigned long index) 421 { 422 int ret = 0; 423 424 /* check if @slot is already used or the index is already disabled */ 425 ret = amd_get_l3_disable_slot(nb, slot); 426 if (ret >= 0) 427 return -EEXIST; 428 429 if (index > nb->l3_cache.indices) 430 return -EINVAL; 431 432 /* check whether the other slot has disabled the same index already */ 433 if (index == amd_get_l3_disable_slot(nb, !slot)) 434 return -EEXIST; 435 436 amd_l3_disable_index(nb, cpu, slot, index); 437 438 return 0; 439 } 440 441 static ssize_t store_cache_disable(struct cacheinfo *this_leaf, 442 const char *buf, size_t count, 443 unsigned int slot) 444 { 445 unsigned long val = 0; 446 int cpu, err = 0; 447 struct amd_northbridge *nb = this_leaf->priv; 448 449 if (!capable(CAP_SYS_ADMIN)) 450 return -EPERM; 451 452 cpu = cpumask_first(&this_leaf->shared_cpu_map); 453 454 if (kstrtoul(buf, 10, &val) < 0) 455 return -EINVAL; 456 457 err = amd_set_l3_disable_slot(nb, cpu, slot, val); 458 if (err) { 459 if (err == -EEXIST) 460 pr_warn("L3 slot %d in use/index already disabled!\n", 461 slot); 462 return err; 463 } 464 return count; 465 } 466 467 #define STORE_CACHE_DISABLE(slot) \ 468 static ssize_t \ 469 cache_disable_##slot##_store(struct device *dev, \ 470 struct device_attribute *attr, \ 471 const char *buf, size_t count) \ 472 { \ 473 struct cacheinfo *this_leaf = dev_get_drvdata(dev); \ 474 return store_cache_disable(this_leaf, buf, count, slot); \ 475 } 476 STORE_CACHE_DISABLE(0) 477 STORE_CACHE_DISABLE(1) 478 479 static ssize_t subcaches_show(struct device *dev, 480 struct device_attribute *attr, char *buf) 481 { 482 struct cacheinfo *this_leaf = dev_get_drvdata(dev); 483 int cpu = cpumask_first(&this_leaf->shared_cpu_map); 484 485 return sprintf(buf, "%x\n", amd_get_subcaches(cpu)); 486 } 487 488 static ssize_t subcaches_store(struct device *dev, 489 struct device_attribute *attr, 490 const char *buf, size_t count) 491 { 492 struct cacheinfo *this_leaf = dev_get_drvdata(dev); 493 int cpu = cpumask_first(&this_leaf->shared_cpu_map); 494 unsigned long val; 495 496 if (!capable(CAP_SYS_ADMIN)) 497 return -EPERM; 498 499 if (kstrtoul(buf, 16, &val) < 0) 500 return -EINVAL; 501 502 if (amd_set_subcaches(cpu, val)) 503 return -EINVAL; 504 505 return count; 506 } 507 508 static DEVICE_ATTR_RW(cache_disable_0); 509 static DEVICE_ATTR_RW(cache_disable_1); 510 static DEVICE_ATTR_RW(subcaches); 511 512 static umode_t 513 cache_private_attrs_is_visible(struct kobject *kobj, 514 struct attribute *attr, int unused) 515 { 516 struct device *dev = kobj_to_dev(kobj); 517 struct cacheinfo *this_leaf = dev_get_drvdata(dev); 518 umode_t mode = attr->mode; 519 520 if (!this_leaf->priv) 521 return 0; 522 523 if ((attr == &dev_attr_subcaches.attr) && 524 amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) 525 return mode; 526 527 if ((attr == &dev_attr_cache_disable_0.attr || 528 attr == &dev_attr_cache_disable_1.attr) && 529 amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) 530 return mode; 531 532 return 0; 533 } 534 535 static struct attribute_group cache_private_group = { 536 .is_visible = cache_private_attrs_is_visible, 537 }; 538 539 static void init_amd_l3_attrs(void) 540 { 541 int n = 1; 542 static struct attribute **amd_l3_attrs; 543 544 if (amd_l3_attrs) /* already initialized */ 545 return; 546 547 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) 548 n += 2; 549 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) 550 n += 1; 551 552 amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL); 553 if (!amd_l3_attrs) 554 return; 555 556 n = 0; 557 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { 558 amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr; 559 amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr; 560 } 561 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) 562 amd_l3_attrs[n++] = &dev_attr_subcaches.attr; 563 564 cache_private_group.attrs = amd_l3_attrs; 565 } 566 567 const struct attribute_group * 568 cache_get_priv_group(struct cacheinfo *this_leaf) 569 { 570 struct amd_northbridge *nb = this_leaf->priv; 571 572 if (this_leaf->level < 3 || !nb) 573 return NULL; 574 575 if (nb && nb->l3_cache.indices) 576 init_amd_l3_attrs(); 577 578 return &cache_private_group; 579 } 580 581 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) 582 { 583 int node; 584 585 /* only for L3, and not in virtualized environments */ 586 if (index < 3) 587 return; 588 589 node = topology_die_id(smp_processor_id()); 590 this_leaf->nb = node_to_amd_nb(node); 591 if (this_leaf->nb && !this_leaf->nb->l3_cache.indices) 592 amd_calc_l3_indices(this_leaf->nb); 593 } 594 #else 595 #define amd_init_l3_cache(x, y) 596 #endif /* CONFIG_AMD_NB && CONFIG_SYSFS */ 597 598 static int 599 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf) 600 { 601 union _cpuid4_leaf_eax eax; 602 union _cpuid4_leaf_ebx ebx; 603 union _cpuid4_leaf_ecx ecx; 604 unsigned edx; 605 606 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 607 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) 608 cpuid_count(0x8000001d, index, &eax.full, 609 &ebx.full, &ecx.full, &edx); 610 else 611 amd_cpuid4(index, &eax, &ebx, &ecx); 612 amd_init_l3_cache(this_leaf, index); 613 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { 614 cpuid_count(0x8000001d, index, &eax.full, 615 &ebx.full, &ecx.full, &edx); 616 amd_init_l3_cache(this_leaf, index); 617 } else { 618 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); 619 } 620 621 if (eax.split.type == CTYPE_NULL) 622 return -EIO; /* better error ? */ 623 624 this_leaf->eax = eax; 625 this_leaf->ebx = ebx; 626 this_leaf->ecx = ecx; 627 this_leaf->size = (ecx.split.number_of_sets + 1) * 628 (ebx.split.coherency_line_size + 1) * 629 (ebx.split.physical_line_partition + 1) * 630 (ebx.split.ways_of_associativity + 1); 631 return 0; 632 } 633 634 static int find_num_cache_leaves(struct cpuinfo_x86 *c) 635 { 636 unsigned int eax, ebx, ecx, edx, op; 637 union _cpuid4_leaf_eax cache_eax; 638 int i = -1; 639 640 if (c->x86_vendor == X86_VENDOR_AMD || 641 c->x86_vendor == X86_VENDOR_HYGON) 642 op = 0x8000001d; 643 else 644 op = 4; 645 646 do { 647 ++i; 648 /* Do cpuid(op) loop to find out num_cache_leaves */ 649 cpuid_count(op, i, &eax, &ebx, &ecx, &edx); 650 cache_eax.full = eax; 651 } while (cache_eax.split.type != CTYPE_NULL); 652 return i; 653 } 654 655 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu) 656 { 657 /* 658 * We may have multiple LLCs if L3 caches exist, so check if we 659 * have an L3 cache by looking at the L3 cache CPUID leaf. 660 */ 661 if (!cpuid_edx(0x80000006)) 662 return; 663 664 if (c->x86 < 0x17) { 665 /* LLC is at the node level. */ 666 per_cpu(cpu_llc_id, cpu) = c->cpu_die_id; 667 } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { 668 /* 669 * LLC is at the core complex level. 670 * Core complex ID is ApicId[3] for these processors. 671 */ 672 per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; 673 } else { 674 /* 675 * LLC ID is calculated from the number of threads sharing the 676 * cache. 677 * */ 678 u32 eax, ebx, ecx, edx, num_sharing_cache = 0; 679 u32 llc_index = find_num_cache_leaves(c) - 1; 680 681 cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx); 682 if (eax) 683 num_sharing_cache = ((eax >> 14) & 0xfff) + 1; 684 685 if (num_sharing_cache) { 686 int bits = get_count_order(num_sharing_cache); 687 688 per_cpu(cpu_llc_id, cpu) = c->apicid >> bits; 689 } 690 } 691 } 692 693 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu) 694 { 695 /* 696 * We may have multiple LLCs if L3 caches exist, so check if we 697 * have an L3 cache by looking at the L3 cache CPUID leaf. 698 */ 699 if (!cpuid_edx(0x80000006)) 700 return; 701 702 /* 703 * LLC is at the core complex level. 704 * Core complex ID is ApicId[3] for these processors. 705 */ 706 per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; 707 } 708 709 void init_amd_cacheinfo(struct cpuinfo_x86 *c) 710 { 711 712 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { 713 num_cache_leaves = find_num_cache_leaves(c); 714 } else if (c->extended_cpuid_level >= 0x80000006) { 715 if (cpuid_edx(0x80000006) & 0xf000) 716 num_cache_leaves = 4; 717 else 718 num_cache_leaves = 3; 719 } 720 } 721 722 void init_hygon_cacheinfo(struct cpuinfo_x86 *c) 723 { 724 num_cache_leaves = find_num_cache_leaves(c); 725 } 726 727 void init_intel_cacheinfo(struct cpuinfo_x86 *c) 728 { 729 /* Cache sizes */ 730 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; 731 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ 732 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ 733 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; 734 #ifdef CONFIG_SMP 735 unsigned int cpu = c->cpu_index; 736 #endif 737 738 if (c->cpuid_level > 3) { 739 static int is_initialized; 740 741 if (is_initialized == 0) { 742 /* Init num_cache_leaves from boot CPU */ 743 num_cache_leaves = find_num_cache_leaves(c); 744 is_initialized++; 745 } 746 747 /* 748 * Whenever possible use cpuid(4), deterministic cache 749 * parameters cpuid leaf to find the cache details 750 */ 751 for (i = 0; i < num_cache_leaves; i++) { 752 struct _cpuid4_info_regs this_leaf = {}; 753 int retval; 754 755 retval = cpuid4_cache_lookup_regs(i, &this_leaf); 756 if (retval < 0) 757 continue; 758 759 switch (this_leaf.eax.split.level) { 760 case 1: 761 if (this_leaf.eax.split.type == CTYPE_DATA) 762 new_l1d = this_leaf.size/1024; 763 else if (this_leaf.eax.split.type == CTYPE_INST) 764 new_l1i = this_leaf.size/1024; 765 break; 766 case 2: 767 new_l2 = this_leaf.size/1024; 768 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 769 index_msb = get_count_order(num_threads_sharing); 770 l2_id = c->apicid & ~((1 << index_msb) - 1); 771 break; 772 case 3: 773 new_l3 = this_leaf.size/1024; 774 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 775 index_msb = get_count_order(num_threads_sharing); 776 l3_id = c->apicid & ~((1 << index_msb) - 1); 777 break; 778 default: 779 break; 780 } 781 } 782 } 783 /* 784 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for 785 * trace cache 786 */ 787 if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { 788 /* supports eax=2 call */ 789 int j, n; 790 unsigned int regs[4]; 791 unsigned char *dp = (unsigned char *)regs; 792 int only_trace = 0; 793 794 if (num_cache_leaves != 0 && c->x86 == 15) 795 only_trace = 1; 796 797 /* Number of times to iterate */ 798 n = cpuid_eax(2) & 0xFF; 799 800 for (i = 0 ; i < n ; i++) { 801 cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); 802 803 /* If bit 31 is set, this is an unknown format */ 804 for (j = 0 ; j < 3 ; j++) 805 if (regs[j] & (1 << 31)) 806 regs[j] = 0; 807 808 /* Byte 0 is level count, not a descriptor */ 809 for (j = 1 ; j < 16 ; j++) { 810 unsigned char des = dp[j]; 811 unsigned char k = 0; 812 813 /* look up this descriptor in the table */ 814 while (cache_table[k].descriptor != 0) { 815 if (cache_table[k].descriptor == des) { 816 if (only_trace && cache_table[k].cache_type != LVL_TRACE) 817 break; 818 switch (cache_table[k].cache_type) { 819 case LVL_1_INST: 820 l1i += cache_table[k].size; 821 break; 822 case LVL_1_DATA: 823 l1d += cache_table[k].size; 824 break; 825 case LVL_2: 826 l2 += cache_table[k].size; 827 break; 828 case LVL_3: 829 l3 += cache_table[k].size; 830 break; 831 case LVL_TRACE: 832 trace += cache_table[k].size; 833 break; 834 } 835 836 break; 837 } 838 839 k++; 840 } 841 } 842 } 843 } 844 845 if (new_l1d) 846 l1d = new_l1d; 847 848 if (new_l1i) 849 l1i = new_l1i; 850 851 if (new_l2) { 852 l2 = new_l2; 853 #ifdef CONFIG_SMP 854 per_cpu(cpu_llc_id, cpu) = l2_id; 855 per_cpu(cpu_l2c_id, cpu) = l2_id; 856 #endif 857 } 858 859 if (new_l3) { 860 l3 = new_l3; 861 #ifdef CONFIG_SMP 862 per_cpu(cpu_llc_id, cpu) = l3_id; 863 #endif 864 } 865 866 #ifdef CONFIG_SMP 867 /* 868 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in 869 * turns means that the only possibility is SMT (as indicated in 870 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know 871 * that SMT shares all caches, we can unconditionally set cpu_llc_id to 872 * c->phys_proc_id. 873 */ 874 if (per_cpu(cpu_llc_id, cpu) == BAD_APICID) 875 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; 876 #endif 877 878 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); 879 880 if (!l2) 881 cpu_detect_cache_sizes(c); 882 } 883 884 static int __cache_amd_cpumap_setup(unsigned int cpu, int index, 885 struct _cpuid4_info_regs *base) 886 { 887 struct cpu_cacheinfo *this_cpu_ci; 888 struct cacheinfo *this_leaf; 889 int i, sibling; 890 891 /* 892 * For L3, always use the pre-calculated cpu_llc_shared_mask 893 * to derive shared_cpu_map. 894 */ 895 if (index == 3) { 896 for_each_cpu(i, cpu_llc_shared_mask(cpu)) { 897 this_cpu_ci = get_cpu_cacheinfo(i); 898 if (!this_cpu_ci->info_list) 899 continue; 900 this_leaf = this_cpu_ci->info_list + index; 901 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { 902 if (!cpu_online(sibling)) 903 continue; 904 cpumask_set_cpu(sibling, 905 &this_leaf->shared_cpu_map); 906 } 907 } 908 } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { 909 unsigned int apicid, nshared, first, last; 910 911 nshared = base->eax.split.num_threads_sharing + 1; 912 apicid = cpu_data(cpu).apicid; 913 first = apicid - (apicid % nshared); 914 last = first + nshared - 1; 915 916 for_each_online_cpu(i) { 917 this_cpu_ci = get_cpu_cacheinfo(i); 918 if (!this_cpu_ci->info_list) 919 continue; 920 921 apicid = cpu_data(i).apicid; 922 if ((apicid < first) || (apicid > last)) 923 continue; 924 925 this_leaf = this_cpu_ci->info_list + index; 926 927 for_each_online_cpu(sibling) { 928 apicid = cpu_data(sibling).apicid; 929 if ((apicid < first) || (apicid > last)) 930 continue; 931 cpumask_set_cpu(sibling, 932 &this_leaf->shared_cpu_map); 933 } 934 } 935 } else 936 return 0; 937 938 return 1; 939 } 940 941 static void __cache_cpumap_setup(unsigned int cpu, int index, 942 struct _cpuid4_info_regs *base) 943 { 944 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); 945 struct cacheinfo *this_leaf, *sibling_leaf; 946 unsigned long num_threads_sharing; 947 int index_msb, i; 948 struct cpuinfo_x86 *c = &cpu_data(cpu); 949 950 if (c->x86_vendor == X86_VENDOR_AMD || 951 c->x86_vendor == X86_VENDOR_HYGON) { 952 if (__cache_amd_cpumap_setup(cpu, index, base)) 953 return; 954 } 955 956 this_leaf = this_cpu_ci->info_list + index; 957 num_threads_sharing = 1 + base->eax.split.num_threads_sharing; 958 959 cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); 960 if (num_threads_sharing == 1) 961 return; 962 963 index_msb = get_count_order(num_threads_sharing); 964 965 for_each_online_cpu(i) 966 if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) { 967 struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); 968 969 if (i == cpu || !sib_cpu_ci->info_list) 970 continue;/* skip if itself or no cacheinfo */ 971 sibling_leaf = sib_cpu_ci->info_list + index; 972 cpumask_set_cpu(i, &this_leaf->shared_cpu_map); 973 cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map); 974 } 975 } 976 977 static void ci_leaf_init(struct cacheinfo *this_leaf, 978 struct _cpuid4_info_regs *base) 979 { 980 this_leaf->id = base->id; 981 this_leaf->attributes = CACHE_ID; 982 this_leaf->level = base->eax.split.level; 983 this_leaf->type = cache_type_map[base->eax.split.type]; 984 this_leaf->coherency_line_size = 985 base->ebx.split.coherency_line_size + 1; 986 this_leaf->ways_of_associativity = 987 base->ebx.split.ways_of_associativity + 1; 988 this_leaf->size = base->size; 989 this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1; 990 this_leaf->physical_line_partition = 991 base->ebx.split.physical_line_partition + 1; 992 this_leaf->priv = base->nb; 993 } 994 995 int init_cache_level(unsigned int cpu) 996 { 997 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); 998 999 if (!num_cache_leaves) 1000 return -ENOENT; 1001 if (!this_cpu_ci) 1002 return -EINVAL; 1003 this_cpu_ci->num_levels = 3; 1004 this_cpu_ci->num_leaves = num_cache_leaves; 1005 return 0; 1006 } 1007 1008 /* 1009 * The max shared threads number comes from CPUID.4:EAX[25-14] with input 1010 * ECX as cache index. Then right shift apicid by the number's order to get 1011 * cache id for this cache node. 1012 */ 1013 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs) 1014 { 1015 struct cpuinfo_x86 *c = &cpu_data(cpu); 1016 unsigned long num_threads_sharing; 1017 int index_msb; 1018 1019 num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing; 1020 index_msb = get_count_order(num_threads_sharing); 1021 id4_regs->id = c->apicid >> index_msb; 1022 } 1023 1024 int populate_cache_leaves(unsigned int cpu) 1025 { 1026 unsigned int idx, ret; 1027 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); 1028 struct cacheinfo *this_leaf = this_cpu_ci->info_list; 1029 struct _cpuid4_info_regs id4_regs = {}; 1030 1031 for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { 1032 ret = cpuid4_cache_lookup_regs(idx, &id4_regs); 1033 if (ret) 1034 return ret; 1035 get_cache_id(cpu, &id4_regs); 1036 ci_leaf_init(this_leaf++, &id4_regs); 1037 __cache_cpumap_setup(cpu, idx, &id4_regs); 1038 } 1039 this_cpu_ci->cpu_map_populated = true; 1040 1041 return 0; 1042 } 1043