xref: /openbmc/linux/arch/x86/kernel/cpu/cacheinfo.c (revision 8631f940b81bf0da3d375fce166d381fa8c47bb2)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *	Routines to identify caches on Intel CPU.
4  *
5  *	Changes:
6  *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
7  *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
8  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
9  */
10 
11 #include <linux/slab.h>
12 #include <linux/cacheinfo.h>
13 #include <linux/cpu.h>
14 #include <linux/sched.h>
15 #include <linux/capability.h>
16 #include <linux/sysfs.h>
17 #include <linux/pci.h>
18 
19 #include <asm/cpufeature.h>
20 #include <asm/cacheinfo.h>
21 #include <asm/amd_nb.h>
22 #include <asm/smp.h>
23 
24 #include "cpu.h"
25 
26 #define LVL_1_INST	1
27 #define LVL_1_DATA	2
28 #define LVL_2		3
29 #define LVL_3		4
30 #define LVL_TRACE	5
31 
32 struct _cache_table {
33 	unsigned char descriptor;
34 	char cache_type;
35 	short size;
36 };
37 
38 #define MB(x)	((x) * 1024)
39 
40 /* All the cache descriptor types we care about (no TLB or
41    trace cache entries) */
42 
43 static const struct _cache_table cache_table[] =
44 {
45 	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
46 	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
47 	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
48 	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
49 	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
50 	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
51 	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
52 	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
53 	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
54 	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
55 	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
56 	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
57 	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
58 	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
59 	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
60 	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
61 	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
62 	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
63 	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
64 	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
65 	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
66 	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
67 	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
68 	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
69 	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
70 	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
71 	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
72 	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
73 	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
74 	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
75 	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
76 	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
77 	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
78 	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
79 	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
80 	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
81 	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
82 	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
83 	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
84 	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
85 	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
86 	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
87 	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
88 	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
89 	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
90 	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
91 	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
92 	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
93 	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
94 	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
95 	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
96 	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
97 	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
98 	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
99 	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
100 	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
101 	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
102 	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
103 	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
104 	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
105 	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
106 	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
107 	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
108 	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
109 	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
110 	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
111 	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
112 	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
113 	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
114 	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
115 	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
116 	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
117 	{ 0x00, 0, 0}
118 };
119 
120 
121 enum _cache_type {
122 	CTYPE_NULL = 0,
123 	CTYPE_DATA = 1,
124 	CTYPE_INST = 2,
125 	CTYPE_UNIFIED = 3
126 };
127 
128 union _cpuid4_leaf_eax {
129 	struct {
130 		enum _cache_type	type:5;
131 		unsigned int		level:3;
132 		unsigned int		is_self_initializing:1;
133 		unsigned int		is_fully_associative:1;
134 		unsigned int		reserved:4;
135 		unsigned int		num_threads_sharing:12;
136 		unsigned int		num_cores_on_die:6;
137 	} split;
138 	u32 full;
139 };
140 
141 union _cpuid4_leaf_ebx {
142 	struct {
143 		unsigned int		coherency_line_size:12;
144 		unsigned int		physical_line_partition:10;
145 		unsigned int		ways_of_associativity:10;
146 	} split;
147 	u32 full;
148 };
149 
150 union _cpuid4_leaf_ecx {
151 	struct {
152 		unsigned int		number_of_sets:32;
153 	} split;
154 	u32 full;
155 };
156 
157 struct _cpuid4_info_regs {
158 	union _cpuid4_leaf_eax eax;
159 	union _cpuid4_leaf_ebx ebx;
160 	union _cpuid4_leaf_ecx ecx;
161 	unsigned int id;
162 	unsigned long size;
163 	struct amd_northbridge *nb;
164 };
165 
166 static unsigned short num_cache_leaves;
167 
168 /* AMD doesn't have CPUID4. Emulate it here to report the same
169    information to the user.  This makes some assumptions about the machine:
170    L2 not shared, no SMT etc. that is currently true on AMD CPUs.
171 
172    In theory the TLBs could be reported as fake type (they are in "dummy").
173    Maybe later */
174 union l1_cache {
175 	struct {
176 		unsigned line_size:8;
177 		unsigned lines_per_tag:8;
178 		unsigned assoc:8;
179 		unsigned size_in_kb:8;
180 	};
181 	unsigned val;
182 };
183 
184 union l2_cache {
185 	struct {
186 		unsigned line_size:8;
187 		unsigned lines_per_tag:4;
188 		unsigned assoc:4;
189 		unsigned size_in_kb:16;
190 	};
191 	unsigned val;
192 };
193 
194 union l3_cache {
195 	struct {
196 		unsigned line_size:8;
197 		unsigned lines_per_tag:4;
198 		unsigned assoc:4;
199 		unsigned res:2;
200 		unsigned size_encoded:14;
201 	};
202 	unsigned val;
203 };
204 
205 static const unsigned short assocs[] = {
206 	[1] = 1,
207 	[2] = 2,
208 	[4] = 4,
209 	[6] = 8,
210 	[8] = 16,
211 	[0xa] = 32,
212 	[0xb] = 48,
213 	[0xc] = 64,
214 	[0xd] = 96,
215 	[0xe] = 128,
216 	[0xf] = 0xffff /* fully associative - no way to show this currently */
217 };
218 
219 static const unsigned char levels[] = { 1, 1, 2, 3 };
220 static const unsigned char types[] = { 1, 2, 3, 3 };
221 
222 static const enum cache_type cache_type_map[] = {
223 	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
224 	[CTYPE_DATA] = CACHE_TYPE_DATA,
225 	[CTYPE_INST] = CACHE_TYPE_INST,
226 	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
227 };
228 
229 static void
230 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
231 		     union _cpuid4_leaf_ebx *ebx,
232 		     union _cpuid4_leaf_ecx *ecx)
233 {
234 	unsigned dummy;
235 	unsigned line_size, lines_per_tag, assoc, size_in_kb;
236 	union l1_cache l1i, l1d;
237 	union l2_cache l2;
238 	union l3_cache l3;
239 	union l1_cache *l1 = &l1d;
240 
241 	eax->full = 0;
242 	ebx->full = 0;
243 	ecx->full = 0;
244 
245 	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
246 	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
247 
248 	switch (leaf) {
249 	case 1:
250 		l1 = &l1i;
251 	case 0:
252 		if (!l1->val)
253 			return;
254 		assoc = assocs[l1->assoc];
255 		line_size = l1->line_size;
256 		lines_per_tag = l1->lines_per_tag;
257 		size_in_kb = l1->size_in_kb;
258 		break;
259 	case 2:
260 		if (!l2.val)
261 			return;
262 		assoc = assocs[l2.assoc];
263 		line_size = l2.line_size;
264 		lines_per_tag = l2.lines_per_tag;
265 		/* cpu_data has errata corrections for K7 applied */
266 		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
267 		break;
268 	case 3:
269 		if (!l3.val)
270 			return;
271 		assoc = assocs[l3.assoc];
272 		line_size = l3.line_size;
273 		lines_per_tag = l3.lines_per_tag;
274 		size_in_kb = l3.size_encoded * 512;
275 		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
276 			size_in_kb = size_in_kb >> 1;
277 			assoc = assoc >> 1;
278 		}
279 		break;
280 	default:
281 		return;
282 	}
283 
284 	eax->split.is_self_initializing = 1;
285 	eax->split.type = types[leaf];
286 	eax->split.level = levels[leaf];
287 	eax->split.num_threads_sharing = 0;
288 	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
289 
290 
291 	if (assoc == 0xffff)
292 		eax->split.is_fully_associative = 1;
293 	ebx->split.coherency_line_size = line_size - 1;
294 	ebx->split.ways_of_associativity = assoc - 1;
295 	ebx->split.physical_line_partition = lines_per_tag - 1;
296 	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
297 		(ebx->split.ways_of_associativity + 1) - 1;
298 }
299 
300 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
301 
302 /*
303  * L3 cache descriptors
304  */
305 static void amd_calc_l3_indices(struct amd_northbridge *nb)
306 {
307 	struct amd_l3_cache *l3 = &nb->l3_cache;
308 	unsigned int sc0, sc1, sc2, sc3;
309 	u32 val = 0;
310 
311 	pci_read_config_dword(nb->misc, 0x1C4, &val);
312 
313 	/* calculate subcache sizes */
314 	l3->subcaches[0] = sc0 = !(val & BIT(0));
315 	l3->subcaches[1] = sc1 = !(val & BIT(4));
316 
317 	if (boot_cpu_data.x86 == 0x15) {
318 		l3->subcaches[0] = sc0 += !(val & BIT(1));
319 		l3->subcaches[1] = sc1 += !(val & BIT(5));
320 	}
321 
322 	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
323 	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
324 
325 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
326 }
327 
328 /*
329  * check whether a slot used for disabling an L3 index is occupied.
330  * @l3: L3 cache descriptor
331  * @slot: slot number (0..1)
332  *
333  * @returns: the disabled index if used or negative value if slot free.
334  */
335 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
336 {
337 	unsigned int reg = 0;
338 
339 	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
340 
341 	/* check whether this slot is activated already */
342 	if (reg & (3UL << 30))
343 		return reg & 0xfff;
344 
345 	return -1;
346 }
347 
348 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
349 				  unsigned int slot)
350 {
351 	int index;
352 	struct amd_northbridge *nb = this_leaf->priv;
353 
354 	index = amd_get_l3_disable_slot(nb, slot);
355 	if (index >= 0)
356 		return sprintf(buf, "%d\n", index);
357 
358 	return sprintf(buf, "FREE\n");
359 }
360 
361 #define SHOW_CACHE_DISABLE(slot)					\
362 static ssize_t								\
363 cache_disable_##slot##_show(struct device *dev,				\
364 			    struct device_attribute *attr, char *buf)	\
365 {									\
366 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
367 	return show_cache_disable(this_leaf, buf, slot);		\
368 }
369 SHOW_CACHE_DISABLE(0)
370 SHOW_CACHE_DISABLE(1)
371 
372 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
373 				 unsigned slot, unsigned long idx)
374 {
375 	int i;
376 
377 	idx |= BIT(30);
378 
379 	/*
380 	 *  disable index in all 4 subcaches
381 	 */
382 	for (i = 0; i < 4; i++) {
383 		u32 reg = idx | (i << 20);
384 
385 		if (!nb->l3_cache.subcaches[i])
386 			continue;
387 
388 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
389 
390 		/*
391 		 * We need to WBINVD on a core on the node containing the L3
392 		 * cache which indices we disable therefore a simple wbinvd()
393 		 * is not sufficient.
394 		 */
395 		wbinvd_on_cpu(cpu);
396 
397 		reg |= BIT(31);
398 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
399 	}
400 }
401 
402 /*
403  * disable a L3 cache index by using a disable-slot
404  *
405  * @l3:    L3 cache descriptor
406  * @cpu:   A CPU on the node containing the L3 cache
407  * @slot:  slot number (0..1)
408  * @index: index to disable
409  *
410  * @return: 0 on success, error status on failure
411  */
412 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
413 			    unsigned slot, unsigned long index)
414 {
415 	int ret = 0;
416 
417 	/*  check if @slot is already used or the index is already disabled */
418 	ret = amd_get_l3_disable_slot(nb, slot);
419 	if (ret >= 0)
420 		return -EEXIST;
421 
422 	if (index > nb->l3_cache.indices)
423 		return -EINVAL;
424 
425 	/* check whether the other slot has disabled the same index already */
426 	if (index == amd_get_l3_disable_slot(nb, !slot))
427 		return -EEXIST;
428 
429 	amd_l3_disable_index(nb, cpu, slot, index);
430 
431 	return 0;
432 }
433 
434 static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
435 				   const char *buf, size_t count,
436 				   unsigned int slot)
437 {
438 	unsigned long val = 0;
439 	int cpu, err = 0;
440 	struct amd_northbridge *nb = this_leaf->priv;
441 
442 	if (!capable(CAP_SYS_ADMIN))
443 		return -EPERM;
444 
445 	cpu = cpumask_first(&this_leaf->shared_cpu_map);
446 
447 	if (kstrtoul(buf, 10, &val) < 0)
448 		return -EINVAL;
449 
450 	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
451 	if (err) {
452 		if (err == -EEXIST)
453 			pr_warn("L3 slot %d in use/index already disabled!\n",
454 				   slot);
455 		return err;
456 	}
457 	return count;
458 }
459 
460 #define STORE_CACHE_DISABLE(slot)					\
461 static ssize_t								\
462 cache_disable_##slot##_store(struct device *dev,			\
463 			     struct device_attribute *attr,		\
464 			     const char *buf, size_t count)		\
465 {									\
466 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
467 	return store_cache_disable(this_leaf, buf, count, slot);	\
468 }
469 STORE_CACHE_DISABLE(0)
470 STORE_CACHE_DISABLE(1)
471 
472 static ssize_t subcaches_show(struct device *dev,
473 			      struct device_attribute *attr, char *buf)
474 {
475 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
476 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
477 
478 	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
479 }
480 
481 static ssize_t subcaches_store(struct device *dev,
482 			       struct device_attribute *attr,
483 			       const char *buf, size_t count)
484 {
485 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
486 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
487 	unsigned long val;
488 
489 	if (!capable(CAP_SYS_ADMIN))
490 		return -EPERM;
491 
492 	if (kstrtoul(buf, 16, &val) < 0)
493 		return -EINVAL;
494 
495 	if (amd_set_subcaches(cpu, val))
496 		return -EINVAL;
497 
498 	return count;
499 }
500 
501 static DEVICE_ATTR_RW(cache_disable_0);
502 static DEVICE_ATTR_RW(cache_disable_1);
503 static DEVICE_ATTR_RW(subcaches);
504 
505 static umode_t
506 cache_private_attrs_is_visible(struct kobject *kobj,
507 			       struct attribute *attr, int unused)
508 {
509 	struct device *dev = kobj_to_dev(kobj);
510 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
511 	umode_t mode = attr->mode;
512 
513 	if (!this_leaf->priv)
514 		return 0;
515 
516 	if ((attr == &dev_attr_subcaches.attr) &&
517 	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
518 		return mode;
519 
520 	if ((attr == &dev_attr_cache_disable_0.attr ||
521 	     attr == &dev_attr_cache_disable_1.attr) &&
522 	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
523 		return mode;
524 
525 	return 0;
526 }
527 
528 static struct attribute_group cache_private_group = {
529 	.is_visible = cache_private_attrs_is_visible,
530 };
531 
532 static void init_amd_l3_attrs(void)
533 {
534 	int n = 1;
535 	static struct attribute **amd_l3_attrs;
536 
537 	if (amd_l3_attrs) /* already initialized */
538 		return;
539 
540 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
541 		n += 2;
542 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
543 		n += 1;
544 
545 	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
546 	if (!amd_l3_attrs)
547 		return;
548 
549 	n = 0;
550 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
551 		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
552 		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
553 	}
554 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
555 		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
556 
557 	cache_private_group.attrs = amd_l3_attrs;
558 }
559 
560 const struct attribute_group *
561 cache_get_priv_group(struct cacheinfo *this_leaf)
562 {
563 	struct amd_northbridge *nb = this_leaf->priv;
564 
565 	if (this_leaf->level < 3 || !nb)
566 		return NULL;
567 
568 	if (nb && nb->l3_cache.indices)
569 		init_amd_l3_attrs();
570 
571 	return &cache_private_group;
572 }
573 
574 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
575 {
576 	int node;
577 
578 	/* only for L3, and not in virtualized environments */
579 	if (index < 3)
580 		return;
581 
582 	node = amd_get_nb_id(smp_processor_id());
583 	this_leaf->nb = node_to_amd_nb(node);
584 	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
585 		amd_calc_l3_indices(this_leaf->nb);
586 }
587 #else
588 #define amd_init_l3_cache(x, y)
589 #endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
590 
591 static int
592 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
593 {
594 	union _cpuid4_leaf_eax	eax;
595 	union _cpuid4_leaf_ebx	ebx;
596 	union _cpuid4_leaf_ecx	ecx;
597 	unsigned		edx;
598 
599 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
600 		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
601 			cpuid_count(0x8000001d, index, &eax.full,
602 				    &ebx.full, &ecx.full, &edx);
603 		else
604 			amd_cpuid4(index, &eax, &ebx, &ecx);
605 		amd_init_l3_cache(this_leaf, index);
606 	} else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
607 		cpuid_count(0x8000001d, index, &eax.full,
608 			    &ebx.full, &ecx.full, &edx);
609 		amd_init_l3_cache(this_leaf, index);
610 	} else {
611 		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
612 	}
613 
614 	if (eax.split.type == CTYPE_NULL)
615 		return -EIO; /* better error ? */
616 
617 	this_leaf->eax = eax;
618 	this_leaf->ebx = ebx;
619 	this_leaf->ecx = ecx;
620 	this_leaf->size = (ecx.split.number_of_sets          + 1) *
621 			  (ebx.split.coherency_line_size     + 1) *
622 			  (ebx.split.physical_line_partition + 1) *
623 			  (ebx.split.ways_of_associativity   + 1);
624 	return 0;
625 }
626 
627 static int find_num_cache_leaves(struct cpuinfo_x86 *c)
628 {
629 	unsigned int		eax, ebx, ecx, edx, op;
630 	union _cpuid4_leaf_eax	cache_eax;
631 	int 			i = -1;
632 
633 	if (c->x86_vendor == X86_VENDOR_AMD ||
634 	    c->x86_vendor == X86_VENDOR_HYGON)
635 		op = 0x8000001d;
636 	else
637 		op = 4;
638 
639 	do {
640 		++i;
641 		/* Do cpuid(op) loop to find out num_cache_leaves */
642 		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
643 		cache_eax.full = eax;
644 	} while (cache_eax.split.type != CTYPE_NULL);
645 	return i;
646 }
647 
648 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
649 {
650 	/*
651 	 * We may have multiple LLCs if L3 caches exist, so check if we
652 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
653 	 */
654 	if (!cpuid_edx(0x80000006))
655 		return;
656 
657 	if (c->x86 < 0x17) {
658 		/* LLC is at the node level. */
659 		per_cpu(cpu_llc_id, cpu) = node_id;
660 	} else if (c->x86 == 0x17 &&
661 		   c->x86_model >= 0 && c->x86_model <= 0x1F) {
662 		/*
663 		 * LLC is at the core complex level.
664 		 * Core complex ID is ApicId[3] for these processors.
665 		 */
666 		per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
667 	} else {
668 		/*
669 		 * LLC ID is calculated from the number of threads sharing the
670 		 * cache.
671 		 * */
672 		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
673 		u32 llc_index = find_num_cache_leaves(c) - 1;
674 
675 		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
676 		if (eax)
677 			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
678 
679 		if (num_sharing_cache) {
680 			int bits = get_count_order(num_sharing_cache);
681 
682 			per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
683 		}
684 	}
685 }
686 
687 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
688 {
689 	/*
690 	 * We may have multiple LLCs if L3 caches exist, so check if we
691 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
692 	 */
693 	if (!cpuid_edx(0x80000006))
694 		return;
695 
696 	/*
697 	 * LLC is at the core complex level.
698 	 * Core complex ID is ApicId[3] for these processors.
699 	 */
700 	per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
701 }
702 
703 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
704 {
705 
706 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
707 		num_cache_leaves = find_num_cache_leaves(c);
708 	} else if (c->extended_cpuid_level >= 0x80000006) {
709 		if (cpuid_edx(0x80000006) & 0xf000)
710 			num_cache_leaves = 4;
711 		else
712 			num_cache_leaves = 3;
713 	}
714 }
715 
716 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
717 {
718 	num_cache_leaves = find_num_cache_leaves(c);
719 }
720 
721 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
722 {
723 	/* Cache sizes */
724 	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
725 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
726 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
727 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
728 #ifdef CONFIG_SMP
729 	unsigned int cpu = c->cpu_index;
730 #endif
731 
732 	if (c->cpuid_level > 3) {
733 		static int is_initialized;
734 
735 		if (is_initialized == 0) {
736 			/* Init num_cache_leaves from boot CPU */
737 			num_cache_leaves = find_num_cache_leaves(c);
738 			is_initialized++;
739 		}
740 
741 		/*
742 		 * Whenever possible use cpuid(4), deterministic cache
743 		 * parameters cpuid leaf to find the cache details
744 		 */
745 		for (i = 0; i < num_cache_leaves; i++) {
746 			struct _cpuid4_info_regs this_leaf = {};
747 			int retval;
748 
749 			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
750 			if (retval < 0)
751 				continue;
752 
753 			switch (this_leaf.eax.split.level) {
754 			case 1:
755 				if (this_leaf.eax.split.type == CTYPE_DATA)
756 					new_l1d = this_leaf.size/1024;
757 				else if (this_leaf.eax.split.type == CTYPE_INST)
758 					new_l1i = this_leaf.size/1024;
759 				break;
760 			case 2:
761 				new_l2 = this_leaf.size/1024;
762 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
763 				index_msb = get_count_order(num_threads_sharing);
764 				l2_id = c->apicid & ~((1 << index_msb) - 1);
765 				break;
766 			case 3:
767 				new_l3 = this_leaf.size/1024;
768 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
769 				index_msb = get_count_order(num_threads_sharing);
770 				l3_id = c->apicid & ~((1 << index_msb) - 1);
771 				break;
772 			default:
773 				break;
774 			}
775 		}
776 	}
777 	/*
778 	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
779 	 * trace cache
780 	 */
781 	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
782 		/* supports eax=2  call */
783 		int j, n;
784 		unsigned int regs[4];
785 		unsigned char *dp = (unsigned char *)regs;
786 		int only_trace = 0;
787 
788 		if (num_cache_leaves != 0 && c->x86 == 15)
789 			only_trace = 1;
790 
791 		/* Number of times to iterate */
792 		n = cpuid_eax(2) & 0xFF;
793 
794 		for (i = 0 ; i < n ; i++) {
795 			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
796 
797 			/* If bit 31 is set, this is an unknown format */
798 			for (j = 0 ; j < 3 ; j++)
799 				if (regs[j] & (1 << 31))
800 					regs[j] = 0;
801 
802 			/* Byte 0 is level count, not a descriptor */
803 			for (j = 1 ; j < 16 ; j++) {
804 				unsigned char des = dp[j];
805 				unsigned char k = 0;
806 
807 				/* look up this descriptor in the table */
808 				while (cache_table[k].descriptor != 0) {
809 					if (cache_table[k].descriptor == des) {
810 						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
811 							break;
812 						switch (cache_table[k].cache_type) {
813 						case LVL_1_INST:
814 							l1i += cache_table[k].size;
815 							break;
816 						case LVL_1_DATA:
817 							l1d += cache_table[k].size;
818 							break;
819 						case LVL_2:
820 							l2 += cache_table[k].size;
821 							break;
822 						case LVL_3:
823 							l3 += cache_table[k].size;
824 							break;
825 						case LVL_TRACE:
826 							trace += cache_table[k].size;
827 							break;
828 						}
829 
830 						break;
831 					}
832 
833 					k++;
834 				}
835 			}
836 		}
837 	}
838 
839 	if (new_l1d)
840 		l1d = new_l1d;
841 
842 	if (new_l1i)
843 		l1i = new_l1i;
844 
845 	if (new_l2) {
846 		l2 = new_l2;
847 #ifdef CONFIG_SMP
848 		per_cpu(cpu_llc_id, cpu) = l2_id;
849 #endif
850 	}
851 
852 	if (new_l3) {
853 		l3 = new_l3;
854 #ifdef CONFIG_SMP
855 		per_cpu(cpu_llc_id, cpu) = l3_id;
856 #endif
857 	}
858 
859 #ifdef CONFIG_SMP
860 	/*
861 	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
862 	 * turns means that the only possibility is SMT (as indicated in
863 	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
864 	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
865 	 * c->phys_proc_id.
866 	 */
867 	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
868 		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
869 #endif
870 
871 	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
872 
873 	if (!l2)
874 		cpu_detect_cache_sizes(c);
875 }
876 
877 static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
878 				    struct _cpuid4_info_regs *base)
879 {
880 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
881 	struct cacheinfo *this_leaf;
882 	int i, sibling;
883 
884 	/*
885 	 * For L3, always use the pre-calculated cpu_llc_shared_mask
886 	 * to derive shared_cpu_map.
887 	 */
888 	if (index == 3) {
889 		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
890 			this_cpu_ci = get_cpu_cacheinfo(i);
891 			if (!this_cpu_ci->info_list)
892 				continue;
893 			this_leaf = this_cpu_ci->info_list + index;
894 			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
895 				if (!cpu_online(sibling))
896 					continue;
897 				cpumask_set_cpu(sibling,
898 						&this_leaf->shared_cpu_map);
899 			}
900 		}
901 	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
902 		unsigned int apicid, nshared, first, last;
903 
904 		nshared = base->eax.split.num_threads_sharing + 1;
905 		apicid = cpu_data(cpu).apicid;
906 		first = apicid - (apicid % nshared);
907 		last = first + nshared - 1;
908 
909 		for_each_online_cpu(i) {
910 			this_cpu_ci = get_cpu_cacheinfo(i);
911 			if (!this_cpu_ci->info_list)
912 				continue;
913 
914 			apicid = cpu_data(i).apicid;
915 			if ((apicid < first) || (apicid > last))
916 				continue;
917 
918 			this_leaf = this_cpu_ci->info_list + index;
919 
920 			for_each_online_cpu(sibling) {
921 				apicid = cpu_data(sibling).apicid;
922 				if ((apicid < first) || (apicid > last))
923 					continue;
924 				cpumask_set_cpu(sibling,
925 						&this_leaf->shared_cpu_map);
926 			}
927 		}
928 	} else
929 		return 0;
930 
931 	return 1;
932 }
933 
934 static void __cache_cpumap_setup(unsigned int cpu, int index,
935 				 struct _cpuid4_info_regs *base)
936 {
937 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
938 	struct cacheinfo *this_leaf, *sibling_leaf;
939 	unsigned long num_threads_sharing;
940 	int index_msb, i;
941 	struct cpuinfo_x86 *c = &cpu_data(cpu);
942 
943 	if (c->x86_vendor == X86_VENDOR_AMD ||
944 	    c->x86_vendor == X86_VENDOR_HYGON) {
945 		if (__cache_amd_cpumap_setup(cpu, index, base))
946 			return;
947 	}
948 
949 	this_leaf = this_cpu_ci->info_list + index;
950 	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
951 
952 	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
953 	if (num_threads_sharing == 1)
954 		return;
955 
956 	index_msb = get_count_order(num_threads_sharing);
957 
958 	for_each_online_cpu(i)
959 		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
960 			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
961 
962 			if (i == cpu || !sib_cpu_ci->info_list)
963 				continue;/* skip if itself or no cacheinfo */
964 			sibling_leaf = sib_cpu_ci->info_list + index;
965 			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
966 			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
967 		}
968 }
969 
970 static void ci_leaf_init(struct cacheinfo *this_leaf,
971 			 struct _cpuid4_info_regs *base)
972 {
973 	this_leaf->id = base->id;
974 	this_leaf->attributes = CACHE_ID;
975 	this_leaf->level = base->eax.split.level;
976 	this_leaf->type = cache_type_map[base->eax.split.type];
977 	this_leaf->coherency_line_size =
978 				base->ebx.split.coherency_line_size + 1;
979 	this_leaf->ways_of_associativity =
980 				base->ebx.split.ways_of_associativity + 1;
981 	this_leaf->size = base->size;
982 	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
983 	this_leaf->physical_line_partition =
984 				base->ebx.split.physical_line_partition + 1;
985 	this_leaf->priv = base->nb;
986 }
987 
988 static int __init_cache_level(unsigned int cpu)
989 {
990 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
991 
992 	if (!num_cache_leaves)
993 		return -ENOENT;
994 	if (!this_cpu_ci)
995 		return -EINVAL;
996 	this_cpu_ci->num_levels = 3;
997 	this_cpu_ci->num_leaves = num_cache_leaves;
998 	return 0;
999 }
1000 
1001 /*
1002  * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1003  * ECX as cache index. Then right shift apicid by the number's order to get
1004  * cache id for this cache node.
1005  */
1006 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1007 {
1008 	struct cpuinfo_x86 *c = &cpu_data(cpu);
1009 	unsigned long num_threads_sharing;
1010 	int index_msb;
1011 
1012 	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1013 	index_msb = get_count_order(num_threads_sharing);
1014 	id4_regs->id = c->apicid >> index_msb;
1015 }
1016 
1017 static int __populate_cache_leaves(unsigned int cpu)
1018 {
1019 	unsigned int idx, ret;
1020 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1021 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1022 	struct _cpuid4_info_regs id4_regs = {};
1023 
1024 	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1025 		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1026 		if (ret)
1027 			return ret;
1028 		get_cache_id(cpu, &id4_regs);
1029 		ci_leaf_init(this_leaf++, &id4_regs);
1030 		__cache_cpumap_setup(cpu, idx, &id4_regs);
1031 	}
1032 	this_cpu_ci->cpu_map_populated = true;
1033 
1034 	return 0;
1035 }
1036 
1037 DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
1038 DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
1039