xref: /openbmc/linux/arch/x86/kernel/cpu/cacheinfo.c (revision f21e49be)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *	Routines to identify caches on Intel CPU.
4  *
5  *	Changes:
6  *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
7  *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
8  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
9  */
10 
11 #include <linux/slab.h>
12 #include <linux/cacheinfo.h>
13 #include <linux/cpu.h>
14 #include <linux/sched.h>
15 #include <linux/capability.h>
16 #include <linux/sysfs.h>
17 #include <linux/pci.h>
18 
19 #include <asm/cpufeature.h>
20 #include <asm/cacheinfo.h>
21 #include <asm/amd_nb.h>
22 #include <asm/smp.h>
23 
24 #include "cpu.h"
25 
26 #define LVL_1_INST	1
27 #define LVL_1_DATA	2
28 #define LVL_2		3
29 #define LVL_3		4
30 #define LVL_TRACE	5
31 
32 struct _cache_table {
33 	unsigned char descriptor;
34 	char cache_type;
35 	short size;
36 };
37 
38 #define MB(x)	((x) * 1024)
39 
40 /* All the cache descriptor types we care about (no TLB or
41    trace cache entries) */
42 
43 static const struct _cache_table cache_table[] =
44 {
45 	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
46 	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
47 	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
48 	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
49 	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
50 	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
51 	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
52 	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
53 	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
54 	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
55 	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
56 	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
57 	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
58 	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
59 	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
60 	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
61 	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
62 	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
63 	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
64 	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
65 	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
66 	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
67 	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
68 	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
69 	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
70 	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
71 	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
72 	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
73 	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
74 	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
75 	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
76 	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
77 	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
78 	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
79 	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
80 	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
81 	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
82 	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
83 	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
84 	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
85 	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
86 	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
87 	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
88 	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
89 	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
90 	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
91 	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
92 	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
93 	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
94 	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
95 	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
96 	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
97 	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
98 	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
99 	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
100 	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
101 	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
102 	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
103 	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
104 	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
105 	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
106 	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
107 	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
108 	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
109 	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
110 	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
111 	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
112 	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
113 	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
114 	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
115 	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
116 	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
117 	{ 0x00, 0, 0}
118 };
119 
120 
121 enum _cache_type {
122 	CTYPE_NULL = 0,
123 	CTYPE_DATA = 1,
124 	CTYPE_INST = 2,
125 	CTYPE_UNIFIED = 3
126 };
127 
128 union _cpuid4_leaf_eax {
129 	struct {
130 		enum _cache_type	type:5;
131 		unsigned int		level:3;
132 		unsigned int		is_self_initializing:1;
133 		unsigned int		is_fully_associative:1;
134 		unsigned int		reserved:4;
135 		unsigned int		num_threads_sharing:12;
136 		unsigned int		num_cores_on_die:6;
137 	} split;
138 	u32 full;
139 };
140 
141 union _cpuid4_leaf_ebx {
142 	struct {
143 		unsigned int		coherency_line_size:12;
144 		unsigned int		physical_line_partition:10;
145 		unsigned int		ways_of_associativity:10;
146 	} split;
147 	u32 full;
148 };
149 
150 union _cpuid4_leaf_ecx {
151 	struct {
152 		unsigned int		number_of_sets:32;
153 	} split;
154 	u32 full;
155 };
156 
157 struct _cpuid4_info_regs {
158 	union _cpuid4_leaf_eax eax;
159 	union _cpuid4_leaf_ebx ebx;
160 	union _cpuid4_leaf_ecx ecx;
161 	unsigned int id;
162 	unsigned long size;
163 	struct amd_northbridge *nb;
164 };
165 
166 static unsigned short num_cache_leaves;
167 
168 /* AMD doesn't have CPUID4. Emulate it here to report the same
169    information to the user.  This makes some assumptions about the machine:
170    L2 not shared, no SMT etc. that is currently true on AMD CPUs.
171 
172    In theory the TLBs could be reported as fake type (they are in "dummy").
173    Maybe later */
174 union l1_cache {
175 	struct {
176 		unsigned line_size:8;
177 		unsigned lines_per_tag:8;
178 		unsigned assoc:8;
179 		unsigned size_in_kb:8;
180 	};
181 	unsigned val;
182 };
183 
184 union l2_cache {
185 	struct {
186 		unsigned line_size:8;
187 		unsigned lines_per_tag:4;
188 		unsigned assoc:4;
189 		unsigned size_in_kb:16;
190 	};
191 	unsigned val;
192 };
193 
194 union l3_cache {
195 	struct {
196 		unsigned line_size:8;
197 		unsigned lines_per_tag:4;
198 		unsigned assoc:4;
199 		unsigned res:2;
200 		unsigned size_encoded:14;
201 	};
202 	unsigned val;
203 };
204 
205 static const unsigned short assocs[] = {
206 	[1] = 1,
207 	[2] = 2,
208 	[4] = 4,
209 	[6] = 8,
210 	[8] = 16,
211 	[0xa] = 32,
212 	[0xb] = 48,
213 	[0xc] = 64,
214 	[0xd] = 96,
215 	[0xe] = 128,
216 	[0xf] = 0xffff /* fully associative - no way to show this currently */
217 };
218 
219 static const unsigned char levels[] = { 1, 1, 2, 3 };
220 static const unsigned char types[] = { 1, 2, 3, 3 };
221 
222 static const enum cache_type cache_type_map[] = {
223 	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
224 	[CTYPE_DATA] = CACHE_TYPE_DATA,
225 	[CTYPE_INST] = CACHE_TYPE_INST,
226 	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
227 };
228 
229 static void
230 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
231 		     union _cpuid4_leaf_ebx *ebx,
232 		     union _cpuid4_leaf_ecx *ecx)
233 {
234 	unsigned dummy;
235 	unsigned line_size, lines_per_tag, assoc, size_in_kb;
236 	union l1_cache l1i, l1d;
237 	union l2_cache l2;
238 	union l3_cache l3;
239 	union l1_cache *l1 = &l1d;
240 
241 	eax->full = 0;
242 	ebx->full = 0;
243 	ecx->full = 0;
244 
245 	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
246 	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
247 
248 	switch (leaf) {
249 	case 1:
250 		l1 = &l1i;
251 		fallthrough;
252 	case 0:
253 		if (!l1->val)
254 			return;
255 		assoc = assocs[l1->assoc];
256 		line_size = l1->line_size;
257 		lines_per_tag = l1->lines_per_tag;
258 		size_in_kb = l1->size_in_kb;
259 		break;
260 	case 2:
261 		if (!l2.val)
262 			return;
263 		assoc = assocs[l2.assoc];
264 		line_size = l2.line_size;
265 		lines_per_tag = l2.lines_per_tag;
266 		/* cpu_data has errata corrections for K7 applied */
267 		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
268 		break;
269 	case 3:
270 		if (!l3.val)
271 			return;
272 		assoc = assocs[l3.assoc];
273 		line_size = l3.line_size;
274 		lines_per_tag = l3.lines_per_tag;
275 		size_in_kb = l3.size_encoded * 512;
276 		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
277 			size_in_kb = size_in_kb >> 1;
278 			assoc = assoc >> 1;
279 		}
280 		break;
281 	default:
282 		return;
283 	}
284 
285 	eax->split.is_self_initializing = 1;
286 	eax->split.type = types[leaf];
287 	eax->split.level = levels[leaf];
288 	eax->split.num_threads_sharing = 0;
289 	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
290 
291 
292 	if (assoc == 0xffff)
293 		eax->split.is_fully_associative = 1;
294 	ebx->split.coherency_line_size = line_size - 1;
295 	ebx->split.ways_of_associativity = assoc - 1;
296 	ebx->split.physical_line_partition = lines_per_tag - 1;
297 	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
298 		(ebx->split.ways_of_associativity + 1) - 1;
299 }
300 
301 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
302 
303 /*
304  * L3 cache descriptors
305  */
306 static void amd_calc_l3_indices(struct amd_northbridge *nb)
307 {
308 	struct amd_l3_cache *l3 = &nb->l3_cache;
309 	unsigned int sc0, sc1, sc2, sc3;
310 	u32 val = 0;
311 
312 	pci_read_config_dword(nb->misc, 0x1C4, &val);
313 
314 	/* calculate subcache sizes */
315 	l3->subcaches[0] = sc0 = !(val & BIT(0));
316 	l3->subcaches[1] = sc1 = !(val & BIT(4));
317 
318 	if (boot_cpu_data.x86 == 0x15) {
319 		l3->subcaches[0] = sc0 += !(val & BIT(1));
320 		l3->subcaches[1] = sc1 += !(val & BIT(5));
321 	}
322 
323 	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
324 	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
325 
326 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
327 }
328 
329 /*
330  * check whether a slot used for disabling an L3 index is occupied.
331  * @l3: L3 cache descriptor
332  * @slot: slot number (0..1)
333  *
334  * @returns: the disabled index if used or negative value if slot free.
335  */
336 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
337 {
338 	unsigned int reg = 0;
339 
340 	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
341 
342 	/* check whether this slot is activated already */
343 	if (reg & (3UL << 30))
344 		return reg & 0xfff;
345 
346 	return -1;
347 }
348 
349 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
350 				  unsigned int slot)
351 {
352 	int index;
353 	struct amd_northbridge *nb = this_leaf->priv;
354 
355 	index = amd_get_l3_disable_slot(nb, slot);
356 	if (index >= 0)
357 		return sprintf(buf, "%d\n", index);
358 
359 	return sprintf(buf, "FREE\n");
360 }
361 
362 #define SHOW_CACHE_DISABLE(slot)					\
363 static ssize_t								\
364 cache_disable_##slot##_show(struct device *dev,				\
365 			    struct device_attribute *attr, char *buf)	\
366 {									\
367 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
368 	return show_cache_disable(this_leaf, buf, slot);		\
369 }
370 SHOW_CACHE_DISABLE(0)
371 SHOW_CACHE_DISABLE(1)
372 
373 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
374 				 unsigned slot, unsigned long idx)
375 {
376 	int i;
377 
378 	idx |= BIT(30);
379 
380 	/*
381 	 *  disable index in all 4 subcaches
382 	 */
383 	for (i = 0; i < 4; i++) {
384 		u32 reg = idx | (i << 20);
385 
386 		if (!nb->l3_cache.subcaches[i])
387 			continue;
388 
389 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
390 
391 		/*
392 		 * We need to WBINVD on a core on the node containing the L3
393 		 * cache which indices we disable therefore a simple wbinvd()
394 		 * is not sufficient.
395 		 */
396 		wbinvd_on_cpu(cpu);
397 
398 		reg |= BIT(31);
399 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
400 	}
401 }
402 
403 /*
404  * disable a L3 cache index by using a disable-slot
405  *
406  * @l3:    L3 cache descriptor
407  * @cpu:   A CPU on the node containing the L3 cache
408  * @slot:  slot number (0..1)
409  * @index: index to disable
410  *
411  * @return: 0 on success, error status on failure
412  */
413 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
414 			    unsigned slot, unsigned long index)
415 {
416 	int ret = 0;
417 
418 	/*  check if @slot is already used or the index is already disabled */
419 	ret = amd_get_l3_disable_slot(nb, slot);
420 	if (ret >= 0)
421 		return -EEXIST;
422 
423 	if (index > nb->l3_cache.indices)
424 		return -EINVAL;
425 
426 	/* check whether the other slot has disabled the same index already */
427 	if (index == amd_get_l3_disable_slot(nb, !slot))
428 		return -EEXIST;
429 
430 	amd_l3_disable_index(nb, cpu, slot, index);
431 
432 	return 0;
433 }
434 
435 static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
436 				   const char *buf, size_t count,
437 				   unsigned int slot)
438 {
439 	unsigned long val = 0;
440 	int cpu, err = 0;
441 	struct amd_northbridge *nb = this_leaf->priv;
442 
443 	if (!capable(CAP_SYS_ADMIN))
444 		return -EPERM;
445 
446 	cpu = cpumask_first(&this_leaf->shared_cpu_map);
447 
448 	if (kstrtoul(buf, 10, &val) < 0)
449 		return -EINVAL;
450 
451 	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
452 	if (err) {
453 		if (err == -EEXIST)
454 			pr_warn("L3 slot %d in use/index already disabled!\n",
455 				   slot);
456 		return err;
457 	}
458 	return count;
459 }
460 
461 #define STORE_CACHE_DISABLE(slot)					\
462 static ssize_t								\
463 cache_disable_##slot##_store(struct device *dev,			\
464 			     struct device_attribute *attr,		\
465 			     const char *buf, size_t count)		\
466 {									\
467 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
468 	return store_cache_disable(this_leaf, buf, count, slot);	\
469 }
470 STORE_CACHE_DISABLE(0)
471 STORE_CACHE_DISABLE(1)
472 
473 static ssize_t subcaches_show(struct device *dev,
474 			      struct device_attribute *attr, char *buf)
475 {
476 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
477 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
478 
479 	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
480 }
481 
482 static ssize_t subcaches_store(struct device *dev,
483 			       struct device_attribute *attr,
484 			       const char *buf, size_t count)
485 {
486 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
487 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
488 	unsigned long val;
489 
490 	if (!capable(CAP_SYS_ADMIN))
491 		return -EPERM;
492 
493 	if (kstrtoul(buf, 16, &val) < 0)
494 		return -EINVAL;
495 
496 	if (amd_set_subcaches(cpu, val))
497 		return -EINVAL;
498 
499 	return count;
500 }
501 
502 static DEVICE_ATTR_RW(cache_disable_0);
503 static DEVICE_ATTR_RW(cache_disable_1);
504 static DEVICE_ATTR_RW(subcaches);
505 
506 static umode_t
507 cache_private_attrs_is_visible(struct kobject *kobj,
508 			       struct attribute *attr, int unused)
509 {
510 	struct device *dev = kobj_to_dev(kobj);
511 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
512 	umode_t mode = attr->mode;
513 
514 	if (!this_leaf->priv)
515 		return 0;
516 
517 	if ((attr == &dev_attr_subcaches.attr) &&
518 	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
519 		return mode;
520 
521 	if ((attr == &dev_attr_cache_disable_0.attr ||
522 	     attr == &dev_attr_cache_disable_1.attr) &&
523 	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
524 		return mode;
525 
526 	return 0;
527 }
528 
529 static struct attribute_group cache_private_group = {
530 	.is_visible = cache_private_attrs_is_visible,
531 };
532 
533 static void init_amd_l3_attrs(void)
534 {
535 	int n = 1;
536 	static struct attribute **amd_l3_attrs;
537 
538 	if (amd_l3_attrs) /* already initialized */
539 		return;
540 
541 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
542 		n += 2;
543 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
544 		n += 1;
545 
546 	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
547 	if (!amd_l3_attrs)
548 		return;
549 
550 	n = 0;
551 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
552 		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
553 		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
554 	}
555 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
556 		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
557 
558 	cache_private_group.attrs = amd_l3_attrs;
559 }
560 
561 const struct attribute_group *
562 cache_get_priv_group(struct cacheinfo *this_leaf)
563 {
564 	struct amd_northbridge *nb = this_leaf->priv;
565 
566 	if (this_leaf->level < 3 || !nb)
567 		return NULL;
568 
569 	if (nb && nb->l3_cache.indices)
570 		init_amd_l3_attrs();
571 
572 	return &cache_private_group;
573 }
574 
575 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
576 {
577 	int node;
578 
579 	/* only for L3, and not in virtualized environments */
580 	if (index < 3)
581 		return;
582 
583 	node = topology_die_id(smp_processor_id());
584 	this_leaf->nb = node_to_amd_nb(node);
585 	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
586 		amd_calc_l3_indices(this_leaf->nb);
587 }
588 #else
589 #define amd_init_l3_cache(x, y)
590 #endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
591 
592 static int
593 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
594 {
595 	union _cpuid4_leaf_eax	eax;
596 	union _cpuid4_leaf_ebx	ebx;
597 	union _cpuid4_leaf_ecx	ecx;
598 	unsigned		edx;
599 
600 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
601 		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
602 			cpuid_count(0x8000001d, index, &eax.full,
603 				    &ebx.full, &ecx.full, &edx);
604 		else
605 			amd_cpuid4(index, &eax, &ebx, &ecx);
606 		amd_init_l3_cache(this_leaf, index);
607 	} else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
608 		cpuid_count(0x8000001d, index, &eax.full,
609 			    &ebx.full, &ecx.full, &edx);
610 		amd_init_l3_cache(this_leaf, index);
611 	} else {
612 		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
613 	}
614 
615 	if (eax.split.type == CTYPE_NULL)
616 		return -EIO; /* better error ? */
617 
618 	this_leaf->eax = eax;
619 	this_leaf->ebx = ebx;
620 	this_leaf->ecx = ecx;
621 	this_leaf->size = (ecx.split.number_of_sets          + 1) *
622 			  (ebx.split.coherency_line_size     + 1) *
623 			  (ebx.split.physical_line_partition + 1) *
624 			  (ebx.split.ways_of_associativity   + 1);
625 	return 0;
626 }
627 
628 static int find_num_cache_leaves(struct cpuinfo_x86 *c)
629 {
630 	unsigned int		eax, ebx, ecx, edx, op;
631 	union _cpuid4_leaf_eax	cache_eax;
632 	int 			i = -1;
633 
634 	if (c->x86_vendor == X86_VENDOR_AMD ||
635 	    c->x86_vendor == X86_VENDOR_HYGON)
636 		op = 0x8000001d;
637 	else
638 		op = 4;
639 
640 	do {
641 		++i;
642 		/* Do cpuid(op) loop to find out num_cache_leaves */
643 		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
644 		cache_eax.full = eax;
645 	} while (cache_eax.split.type != CTYPE_NULL);
646 	return i;
647 }
648 
649 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu)
650 {
651 	/*
652 	 * We may have multiple LLCs if L3 caches exist, so check if we
653 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
654 	 */
655 	if (!cpuid_edx(0x80000006))
656 		return;
657 
658 	if (c->x86 < 0x17) {
659 		/* LLC is at the node level. */
660 		per_cpu(cpu_llc_id, cpu) = c->cpu_die_id;
661 	} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
662 		/*
663 		 * LLC is at the core complex level.
664 		 * Core complex ID is ApicId[3] for these processors.
665 		 */
666 		per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
667 	} else {
668 		/*
669 		 * LLC ID is calculated from the number of threads sharing the
670 		 * cache.
671 		 * */
672 		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
673 		u32 llc_index = find_num_cache_leaves(c) - 1;
674 
675 		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
676 		if (eax)
677 			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
678 
679 		if (num_sharing_cache) {
680 			int bits = get_count_order(num_sharing_cache);
681 
682 			per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
683 		}
684 	}
685 }
686 
687 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu)
688 {
689 	/*
690 	 * We may have multiple LLCs if L3 caches exist, so check if we
691 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
692 	 */
693 	if (!cpuid_edx(0x80000006))
694 		return;
695 
696 	/*
697 	 * LLC is at the core complex level.
698 	 * Core complex ID is ApicId[3] for these processors.
699 	 */
700 	per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
701 }
702 
703 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
704 {
705 
706 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
707 		num_cache_leaves = find_num_cache_leaves(c);
708 	} else if (c->extended_cpuid_level >= 0x80000006) {
709 		if (cpuid_edx(0x80000006) & 0xf000)
710 			num_cache_leaves = 4;
711 		else
712 			num_cache_leaves = 3;
713 	}
714 }
715 
716 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
717 {
718 	num_cache_leaves = find_num_cache_leaves(c);
719 }
720 
721 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
722 {
723 	/* Cache sizes */
724 	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
725 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
726 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
727 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
728 #ifdef CONFIG_SMP
729 	unsigned int cpu = c->cpu_index;
730 #endif
731 
732 	if (c->cpuid_level > 3) {
733 		static int is_initialized;
734 
735 		if (is_initialized == 0) {
736 			/* Init num_cache_leaves from boot CPU */
737 			num_cache_leaves = find_num_cache_leaves(c);
738 			is_initialized++;
739 		}
740 
741 		/*
742 		 * Whenever possible use cpuid(4), deterministic cache
743 		 * parameters cpuid leaf to find the cache details
744 		 */
745 		for (i = 0; i < num_cache_leaves; i++) {
746 			struct _cpuid4_info_regs this_leaf = {};
747 			int retval;
748 
749 			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
750 			if (retval < 0)
751 				continue;
752 
753 			switch (this_leaf.eax.split.level) {
754 			case 1:
755 				if (this_leaf.eax.split.type == CTYPE_DATA)
756 					new_l1d = this_leaf.size/1024;
757 				else if (this_leaf.eax.split.type == CTYPE_INST)
758 					new_l1i = this_leaf.size/1024;
759 				break;
760 			case 2:
761 				new_l2 = this_leaf.size/1024;
762 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
763 				index_msb = get_count_order(num_threads_sharing);
764 				l2_id = c->apicid & ~((1 << index_msb) - 1);
765 				break;
766 			case 3:
767 				new_l3 = this_leaf.size/1024;
768 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
769 				index_msb = get_count_order(num_threads_sharing);
770 				l3_id = c->apicid & ~((1 << index_msb) - 1);
771 				break;
772 			default:
773 				break;
774 			}
775 		}
776 	}
777 	/*
778 	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
779 	 * trace cache
780 	 */
781 	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
782 		/* supports eax=2  call */
783 		int j, n;
784 		unsigned int regs[4];
785 		unsigned char *dp = (unsigned char *)regs;
786 		int only_trace = 0;
787 
788 		if (num_cache_leaves != 0 && c->x86 == 15)
789 			only_trace = 1;
790 
791 		/* Number of times to iterate */
792 		n = cpuid_eax(2) & 0xFF;
793 
794 		for (i = 0 ; i < n ; i++) {
795 			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
796 
797 			/* If bit 31 is set, this is an unknown format */
798 			for (j = 0 ; j < 3 ; j++)
799 				if (regs[j] & (1 << 31))
800 					regs[j] = 0;
801 
802 			/* Byte 0 is level count, not a descriptor */
803 			for (j = 1 ; j < 16 ; j++) {
804 				unsigned char des = dp[j];
805 				unsigned char k = 0;
806 
807 				/* look up this descriptor in the table */
808 				while (cache_table[k].descriptor != 0) {
809 					if (cache_table[k].descriptor == des) {
810 						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
811 							break;
812 						switch (cache_table[k].cache_type) {
813 						case LVL_1_INST:
814 							l1i += cache_table[k].size;
815 							break;
816 						case LVL_1_DATA:
817 							l1d += cache_table[k].size;
818 							break;
819 						case LVL_2:
820 							l2 += cache_table[k].size;
821 							break;
822 						case LVL_3:
823 							l3 += cache_table[k].size;
824 							break;
825 						case LVL_TRACE:
826 							trace += cache_table[k].size;
827 							break;
828 						}
829 
830 						break;
831 					}
832 
833 					k++;
834 				}
835 			}
836 		}
837 	}
838 
839 	if (new_l1d)
840 		l1d = new_l1d;
841 
842 	if (new_l1i)
843 		l1i = new_l1i;
844 
845 	if (new_l2) {
846 		l2 = new_l2;
847 #ifdef CONFIG_SMP
848 		per_cpu(cpu_llc_id, cpu) = l2_id;
849 		per_cpu(cpu_l2c_id, cpu) = l2_id;
850 #endif
851 	}
852 
853 	if (new_l3) {
854 		l3 = new_l3;
855 #ifdef CONFIG_SMP
856 		per_cpu(cpu_llc_id, cpu) = l3_id;
857 #endif
858 	}
859 
860 #ifdef CONFIG_SMP
861 	/*
862 	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
863 	 * turns means that the only possibility is SMT (as indicated in
864 	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
865 	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
866 	 * c->phys_proc_id.
867 	 */
868 	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
869 		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
870 #endif
871 
872 	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
873 
874 	if (!l2)
875 		cpu_detect_cache_sizes(c);
876 }
877 
878 static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
879 				    struct _cpuid4_info_regs *base)
880 {
881 	struct cpu_cacheinfo *this_cpu_ci;
882 	struct cacheinfo *this_leaf;
883 	int i, sibling;
884 
885 	/*
886 	 * For L3, always use the pre-calculated cpu_llc_shared_mask
887 	 * to derive shared_cpu_map.
888 	 */
889 	if (index == 3) {
890 		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
891 			this_cpu_ci = get_cpu_cacheinfo(i);
892 			if (!this_cpu_ci->info_list)
893 				continue;
894 			this_leaf = this_cpu_ci->info_list + index;
895 			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
896 				if (!cpu_online(sibling))
897 					continue;
898 				cpumask_set_cpu(sibling,
899 						&this_leaf->shared_cpu_map);
900 			}
901 		}
902 	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
903 		unsigned int apicid, nshared, first, last;
904 
905 		nshared = base->eax.split.num_threads_sharing + 1;
906 		apicid = cpu_data(cpu).apicid;
907 		first = apicid - (apicid % nshared);
908 		last = first + nshared - 1;
909 
910 		for_each_online_cpu(i) {
911 			this_cpu_ci = get_cpu_cacheinfo(i);
912 			if (!this_cpu_ci->info_list)
913 				continue;
914 
915 			apicid = cpu_data(i).apicid;
916 			if ((apicid < first) || (apicid > last))
917 				continue;
918 
919 			this_leaf = this_cpu_ci->info_list + index;
920 
921 			for_each_online_cpu(sibling) {
922 				apicid = cpu_data(sibling).apicid;
923 				if ((apicid < first) || (apicid > last))
924 					continue;
925 				cpumask_set_cpu(sibling,
926 						&this_leaf->shared_cpu_map);
927 			}
928 		}
929 	} else
930 		return 0;
931 
932 	return 1;
933 }
934 
935 static void __cache_cpumap_setup(unsigned int cpu, int index,
936 				 struct _cpuid4_info_regs *base)
937 {
938 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
939 	struct cacheinfo *this_leaf, *sibling_leaf;
940 	unsigned long num_threads_sharing;
941 	int index_msb, i;
942 	struct cpuinfo_x86 *c = &cpu_data(cpu);
943 
944 	if (c->x86_vendor == X86_VENDOR_AMD ||
945 	    c->x86_vendor == X86_VENDOR_HYGON) {
946 		if (__cache_amd_cpumap_setup(cpu, index, base))
947 			return;
948 	}
949 
950 	this_leaf = this_cpu_ci->info_list + index;
951 	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
952 
953 	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
954 	if (num_threads_sharing == 1)
955 		return;
956 
957 	index_msb = get_count_order(num_threads_sharing);
958 
959 	for_each_online_cpu(i)
960 		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
961 			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
962 
963 			if (i == cpu || !sib_cpu_ci->info_list)
964 				continue;/* skip if itself or no cacheinfo */
965 			sibling_leaf = sib_cpu_ci->info_list + index;
966 			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
967 			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
968 		}
969 }
970 
971 static void ci_leaf_init(struct cacheinfo *this_leaf,
972 			 struct _cpuid4_info_regs *base)
973 {
974 	this_leaf->id = base->id;
975 	this_leaf->attributes = CACHE_ID;
976 	this_leaf->level = base->eax.split.level;
977 	this_leaf->type = cache_type_map[base->eax.split.type];
978 	this_leaf->coherency_line_size =
979 				base->ebx.split.coherency_line_size + 1;
980 	this_leaf->ways_of_associativity =
981 				base->ebx.split.ways_of_associativity + 1;
982 	this_leaf->size = base->size;
983 	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
984 	this_leaf->physical_line_partition =
985 				base->ebx.split.physical_line_partition + 1;
986 	this_leaf->priv = base->nb;
987 }
988 
989 int init_cache_level(unsigned int cpu)
990 {
991 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
992 
993 	if (!num_cache_leaves)
994 		return -ENOENT;
995 	if (!this_cpu_ci)
996 		return -EINVAL;
997 	this_cpu_ci->num_levels = 3;
998 	this_cpu_ci->num_leaves = num_cache_leaves;
999 	return 0;
1000 }
1001 
1002 /*
1003  * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1004  * ECX as cache index. Then right shift apicid by the number's order to get
1005  * cache id for this cache node.
1006  */
1007 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1008 {
1009 	struct cpuinfo_x86 *c = &cpu_data(cpu);
1010 	unsigned long num_threads_sharing;
1011 	int index_msb;
1012 
1013 	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1014 	index_msb = get_count_order(num_threads_sharing);
1015 	id4_regs->id = c->apicid >> index_msb;
1016 }
1017 
1018 int populate_cache_leaves(unsigned int cpu)
1019 {
1020 	unsigned int idx, ret;
1021 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1022 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1023 	struct _cpuid4_info_regs id4_regs = {};
1024 
1025 	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1026 		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1027 		if (ret)
1028 			return ret;
1029 		get_cache_id(cpu, &id4_regs);
1030 		ci_leaf_init(this_leaf++, &id4_regs);
1031 		__cache_cpumap_setup(cpu, idx, &id4_regs);
1032 	}
1033 	this_cpu_ci->cpu_map_populated = true;
1034 
1035 	return 0;
1036 }
1037