xref: /openbmc/linux/arch/x86/kernel/cpu/cacheinfo.c (revision f43e47c090dc7fe32d5410d8740c3a004eb2676f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *	Routines to identify caches on Intel CPU.
4  *
5  *	Changes:
6  *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
7  *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
8  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
9  */
10 
11 #include <linux/slab.h>
12 #include <linux/cacheinfo.h>
13 #include <linux/cpu.h>
14 #include <linux/sched.h>
15 #include <linux/capability.h>
16 #include <linux/sysfs.h>
17 #include <linux/pci.h>
18 
19 #include <asm/cpufeature.h>
20 #include <asm/cacheinfo.h>
21 #include <asm/amd_nb.h>
22 #include <asm/smp.h>
23 
24 #include "cpu.h"
25 
26 #define LVL_1_INST	1
27 #define LVL_1_DATA	2
28 #define LVL_2		3
29 #define LVL_3		4
30 #define LVL_TRACE	5
31 
32 /* Shared last level cache maps */
33 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
34 
35 /* Shared L2 cache maps */
36 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
37 
38 struct _cache_table {
39 	unsigned char descriptor;
40 	char cache_type;
41 	short size;
42 };
43 
44 #define MB(x)	((x) * 1024)
45 
46 /* All the cache descriptor types we care about (no TLB or
47    trace cache entries) */
48 
49 static const struct _cache_table cache_table[] =
50 {
51 	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
52 	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
53 	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
54 	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
55 	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
56 	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
57 	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
58 	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
59 	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
60 	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
61 	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
62 	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
63 	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
64 	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
65 	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
66 	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
67 	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
68 	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
69 	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
70 	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
71 	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
72 	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
73 	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
74 	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
75 	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
76 	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
77 	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
78 	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
79 	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
80 	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
81 	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
82 	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
83 	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
84 	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
85 	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
86 	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
87 	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
88 	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
89 	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
90 	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
91 	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
92 	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
93 	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
94 	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
95 	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
96 	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
97 	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
98 	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
99 	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
100 	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
101 	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
102 	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
103 	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
104 	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
105 	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
106 	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
107 	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
108 	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
109 	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
110 	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
111 	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
112 	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
113 	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
114 	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
115 	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
116 	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
117 	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
118 	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
119 	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
120 	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
121 	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
122 	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
123 	{ 0x00, 0, 0}
124 };
125 
126 
127 enum _cache_type {
128 	CTYPE_NULL = 0,
129 	CTYPE_DATA = 1,
130 	CTYPE_INST = 2,
131 	CTYPE_UNIFIED = 3
132 };
133 
134 union _cpuid4_leaf_eax {
135 	struct {
136 		enum _cache_type	type:5;
137 		unsigned int		level:3;
138 		unsigned int		is_self_initializing:1;
139 		unsigned int		is_fully_associative:1;
140 		unsigned int		reserved:4;
141 		unsigned int		num_threads_sharing:12;
142 		unsigned int		num_cores_on_die:6;
143 	} split;
144 	u32 full;
145 };
146 
147 union _cpuid4_leaf_ebx {
148 	struct {
149 		unsigned int		coherency_line_size:12;
150 		unsigned int		physical_line_partition:10;
151 		unsigned int		ways_of_associativity:10;
152 	} split;
153 	u32 full;
154 };
155 
156 union _cpuid4_leaf_ecx {
157 	struct {
158 		unsigned int		number_of_sets:32;
159 	} split;
160 	u32 full;
161 };
162 
163 struct _cpuid4_info_regs {
164 	union _cpuid4_leaf_eax eax;
165 	union _cpuid4_leaf_ebx ebx;
166 	union _cpuid4_leaf_ecx ecx;
167 	unsigned int id;
168 	unsigned long size;
169 	struct amd_northbridge *nb;
170 };
171 
172 static unsigned short num_cache_leaves;
173 
174 /* AMD doesn't have CPUID4. Emulate it here to report the same
175    information to the user.  This makes some assumptions about the machine:
176    L2 not shared, no SMT etc. that is currently true on AMD CPUs.
177 
178    In theory the TLBs could be reported as fake type (they are in "dummy").
179    Maybe later */
180 union l1_cache {
181 	struct {
182 		unsigned line_size:8;
183 		unsigned lines_per_tag:8;
184 		unsigned assoc:8;
185 		unsigned size_in_kb:8;
186 	};
187 	unsigned val;
188 };
189 
190 union l2_cache {
191 	struct {
192 		unsigned line_size:8;
193 		unsigned lines_per_tag:4;
194 		unsigned assoc:4;
195 		unsigned size_in_kb:16;
196 	};
197 	unsigned val;
198 };
199 
200 union l3_cache {
201 	struct {
202 		unsigned line_size:8;
203 		unsigned lines_per_tag:4;
204 		unsigned assoc:4;
205 		unsigned res:2;
206 		unsigned size_encoded:14;
207 	};
208 	unsigned val;
209 };
210 
211 static const unsigned short assocs[] = {
212 	[1] = 1,
213 	[2] = 2,
214 	[4] = 4,
215 	[6] = 8,
216 	[8] = 16,
217 	[0xa] = 32,
218 	[0xb] = 48,
219 	[0xc] = 64,
220 	[0xd] = 96,
221 	[0xe] = 128,
222 	[0xf] = 0xffff /* fully associative - no way to show this currently */
223 };
224 
225 static const unsigned char levels[] = { 1, 1, 2, 3 };
226 static const unsigned char types[] = { 1, 2, 3, 3 };
227 
228 static const enum cache_type cache_type_map[] = {
229 	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
230 	[CTYPE_DATA] = CACHE_TYPE_DATA,
231 	[CTYPE_INST] = CACHE_TYPE_INST,
232 	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
233 };
234 
235 static void
236 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
237 		     union _cpuid4_leaf_ebx *ebx,
238 		     union _cpuid4_leaf_ecx *ecx)
239 {
240 	unsigned dummy;
241 	unsigned line_size, lines_per_tag, assoc, size_in_kb;
242 	union l1_cache l1i, l1d;
243 	union l2_cache l2;
244 	union l3_cache l3;
245 	union l1_cache *l1 = &l1d;
246 
247 	eax->full = 0;
248 	ebx->full = 0;
249 	ecx->full = 0;
250 
251 	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
252 	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
253 
254 	switch (leaf) {
255 	case 1:
256 		l1 = &l1i;
257 		fallthrough;
258 	case 0:
259 		if (!l1->val)
260 			return;
261 		assoc = assocs[l1->assoc];
262 		line_size = l1->line_size;
263 		lines_per_tag = l1->lines_per_tag;
264 		size_in_kb = l1->size_in_kb;
265 		break;
266 	case 2:
267 		if (!l2.val)
268 			return;
269 		assoc = assocs[l2.assoc];
270 		line_size = l2.line_size;
271 		lines_per_tag = l2.lines_per_tag;
272 		/* cpu_data has errata corrections for K7 applied */
273 		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
274 		break;
275 	case 3:
276 		if (!l3.val)
277 			return;
278 		assoc = assocs[l3.assoc];
279 		line_size = l3.line_size;
280 		lines_per_tag = l3.lines_per_tag;
281 		size_in_kb = l3.size_encoded * 512;
282 		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
283 			size_in_kb = size_in_kb >> 1;
284 			assoc = assoc >> 1;
285 		}
286 		break;
287 	default:
288 		return;
289 	}
290 
291 	eax->split.is_self_initializing = 1;
292 	eax->split.type = types[leaf];
293 	eax->split.level = levels[leaf];
294 	eax->split.num_threads_sharing = 0;
295 	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
296 
297 
298 	if (assoc == 0xffff)
299 		eax->split.is_fully_associative = 1;
300 	ebx->split.coherency_line_size = line_size - 1;
301 	ebx->split.ways_of_associativity = assoc - 1;
302 	ebx->split.physical_line_partition = lines_per_tag - 1;
303 	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
304 		(ebx->split.ways_of_associativity + 1) - 1;
305 }
306 
307 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
308 
309 /*
310  * L3 cache descriptors
311  */
312 static void amd_calc_l3_indices(struct amd_northbridge *nb)
313 {
314 	struct amd_l3_cache *l3 = &nb->l3_cache;
315 	unsigned int sc0, sc1, sc2, sc3;
316 	u32 val = 0;
317 
318 	pci_read_config_dword(nb->misc, 0x1C4, &val);
319 
320 	/* calculate subcache sizes */
321 	l3->subcaches[0] = sc0 = !(val & BIT(0));
322 	l3->subcaches[1] = sc1 = !(val & BIT(4));
323 
324 	if (boot_cpu_data.x86 == 0x15) {
325 		l3->subcaches[0] = sc0 += !(val & BIT(1));
326 		l3->subcaches[1] = sc1 += !(val & BIT(5));
327 	}
328 
329 	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
330 	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
331 
332 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
333 }
334 
335 /*
336  * check whether a slot used for disabling an L3 index is occupied.
337  * @l3: L3 cache descriptor
338  * @slot: slot number (0..1)
339  *
340  * @returns: the disabled index if used or negative value if slot free.
341  */
342 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
343 {
344 	unsigned int reg = 0;
345 
346 	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
347 
348 	/* check whether this slot is activated already */
349 	if (reg & (3UL << 30))
350 		return reg & 0xfff;
351 
352 	return -1;
353 }
354 
355 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
356 				  unsigned int slot)
357 {
358 	int index;
359 	struct amd_northbridge *nb = this_leaf->priv;
360 
361 	index = amd_get_l3_disable_slot(nb, slot);
362 	if (index >= 0)
363 		return sprintf(buf, "%d\n", index);
364 
365 	return sprintf(buf, "FREE\n");
366 }
367 
368 #define SHOW_CACHE_DISABLE(slot)					\
369 static ssize_t								\
370 cache_disable_##slot##_show(struct device *dev,				\
371 			    struct device_attribute *attr, char *buf)	\
372 {									\
373 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
374 	return show_cache_disable(this_leaf, buf, slot);		\
375 }
376 SHOW_CACHE_DISABLE(0)
377 SHOW_CACHE_DISABLE(1)
378 
379 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
380 				 unsigned slot, unsigned long idx)
381 {
382 	int i;
383 
384 	idx |= BIT(30);
385 
386 	/*
387 	 *  disable index in all 4 subcaches
388 	 */
389 	for (i = 0; i < 4; i++) {
390 		u32 reg = idx | (i << 20);
391 
392 		if (!nb->l3_cache.subcaches[i])
393 			continue;
394 
395 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
396 
397 		/*
398 		 * We need to WBINVD on a core on the node containing the L3
399 		 * cache which indices we disable therefore a simple wbinvd()
400 		 * is not sufficient.
401 		 */
402 		wbinvd_on_cpu(cpu);
403 
404 		reg |= BIT(31);
405 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
406 	}
407 }
408 
409 /*
410  * disable a L3 cache index by using a disable-slot
411  *
412  * @l3:    L3 cache descriptor
413  * @cpu:   A CPU on the node containing the L3 cache
414  * @slot:  slot number (0..1)
415  * @index: index to disable
416  *
417  * @return: 0 on success, error status on failure
418  */
419 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
420 			    unsigned slot, unsigned long index)
421 {
422 	int ret = 0;
423 
424 	/*  check if @slot is already used or the index is already disabled */
425 	ret = amd_get_l3_disable_slot(nb, slot);
426 	if (ret >= 0)
427 		return -EEXIST;
428 
429 	if (index > nb->l3_cache.indices)
430 		return -EINVAL;
431 
432 	/* check whether the other slot has disabled the same index already */
433 	if (index == amd_get_l3_disable_slot(nb, !slot))
434 		return -EEXIST;
435 
436 	amd_l3_disable_index(nb, cpu, slot, index);
437 
438 	return 0;
439 }
440 
441 static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
442 				   const char *buf, size_t count,
443 				   unsigned int slot)
444 {
445 	unsigned long val = 0;
446 	int cpu, err = 0;
447 	struct amd_northbridge *nb = this_leaf->priv;
448 
449 	if (!capable(CAP_SYS_ADMIN))
450 		return -EPERM;
451 
452 	cpu = cpumask_first(&this_leaf->shared_cpu_map);
453 
454 	if (kstrtoul(buf, 10, &val) < 0)
455 		return -EINVAL;
456 
457 	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
458 	if (err) {
459 		if (err == -EEXIST)
460 			pr_warn("L3 slot %d in use/index already disabled!\n",
461 				   slot);
462 		return err;
463 	}
464 	return count;
465 }
466 
467 #define STORE_CACHE_DISABLE(slot)					\
468 static ssize_t								\
469 cache_disable_##slot##_store(struct device *dev,			\
470 			     struct device_attribute *attr,		\
471 			     const char *buf, size_t count)		\
472 {									\
473 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
474 	return store_cache_disable(this_leaf, buf, count, slot);	\
475 }
476 STORE_CACHE_DISABLE(0)
477 STORE_CACHE_DISABLE(1)
478 
479 static ssize_t subcaches_show(struct device *dev,
480 			      struct device_attribute *attr, char *buf)
481 {
482 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
483 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
484 
485 	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
486 }
487 
488 static ssize_t subcaches_store(struct device *dev,
489 			       struct device_attribute *attr,
490 			       const char *buf, size_t count)
491 {
492 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
493 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
494 	unsigned long val;
495 
496 	if (!capable(CAP_SYS_ADMIN))
497 		return -EPERM;
498 
499 	if (kstrtoul(buf, 16, &val) < 0)
500 		return -EINVAL;
501 
502 	if (amd_set_subcaches(cpu, val))
503 		return -EINVAL;
504 
505 	return count;
506 }
507 
508 static DEVICE_ATTR_RW(cache_disable_0);
509 static DEVICE_ATTR_RW(cache_disable_1);
510 static DEVICE_ATTR_RW(subcaches);
511 
512 static umode_t
513 cache_private_attrs_is_visible(struct kobject *kobj,
514 			       struct attribute *attr, int unused)
515 {
516 	struct device *dev = kobj_to_dev(kobj);
517 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
518 	umode_t mode = attr->mode;
519 
520 	if (!this_leaf->priv)
521 		return 0;
522 
523 	if ((attr == &dev_attr_subcaches.attr) &&
524 	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
525 		return mode;
526 
527 	if ((attr == &dev_attr_cache_disable_0.attr ||
528 	     attr == &dev_attr_cache_disable_1.attr) &&
529 	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
530 		return mode;
531 
532 	return 0;
533 }
534 
535 static struct attribute_group cache_private_group = {
536 	.is_visible = cache_private_attrs_is_visible,
537 };
538 
539 static void init_amd_l3_attrs(void)
540 {
541 	int n = 1;
542 	static struct attribute **amd_l3_attrs;
543 
544 	if (amd_l3_attrs) /* already initialized */
545 		return;
546 
547 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
548 		n += 2;
549 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
550 		n += 1;
551 
552 	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
553 	if (!amd_l3_attrs)
554 		return;
555 
556 	n = 0;
557 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
558 		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
559 		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
560 	}
561 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
562 		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
563 
564 	cache_private_group.attrs = amd_l3_attrs;
565 }
566 
567 const struct attribute_group *
568 cache_get_priv_group(struct cacheinfo *this_leaf)
569 {
570 	struct amd_northbridge *nb = this_leaf->priv;
571 
572 	if (this_leaf->level < 3 || !nb)
573 		return NULL;
574 
575 	if (nb && nb->l3_cache.indices)
576 		init_amd_l3_attrs();
577 
578 	return &cache_private_group;
579 }
580 
581 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
582 {
583 	int node;
584 
585 	/* only for L3, and not in virtualized environments */
586 	if (index < 3)
587 		return;
588 
589 	node = topology_die_id(smp_processor_id());
590 	this_leaf->nb = node_to_amd_nb(node);
591 	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
592 		amd_calc_l3_indices(this_leaf->nb);
593 }
594 #else
595 #define amd_init_l3_cache(x, y)
596 #endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
597 
598 static int
599 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
600 {
601 	union _cpuid4_leaf_eax	eax;
602 	union _cpuid4_leaf_ebx	ebx;
603 	union _cpuid4_leaf_ecx	ecx;
604 	unsigned		edx;
605 
606 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
607 		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
608 			cpuid_count(0x8000001d, index, &eax.full,
609 				    &ebx.full, &ecx.full, &edx);
610 		else
611 			amd_cpuid4(index, &eax, &ebx, &ecx);
612 		amd_init_l3_cache(this_leaf, index);
613 	} else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
614 		cpuid_count(0x8000001d, index, &eax.full,
615 			    &ebx.full, &ecx.full, &edx);
616 		amd_init_l3_cache(this_leaf, index);
617 	} else {
618 		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
619 	}
620 
621 	if (eax.split.type == CTYPE_NULL)
622 		return -EIO; /* better error ? */
623 
624 	this_leaf->eax = eax;
625 	this_leaf->ebx = ebx;
626 	this_leaf->ecx = ecx;
627 	this_leaf->size = (ecx.split.number_of_sets          + 1) *
628 			  (ebx.split.coherency_line_size     + 1) *
629 			  (ebx.split.physical_line_partition + 1) *
630 			  (ebx.split.ways_of_associativity   + 1);
631 	return 0;
632 }
633 
634 static int find_num_cache_leaves(struct cpuinfo_x86 *c)
635 {
636 	unsigned int		eax, ebx, ecx, edx, op;
637 	union _cpuid4_leaf_eax	cache_eax;
638 	int 			i = -1;
639 
640 	if (c->x86_vendor == X86_VENDOR_AMD ||
641 	    c->x86_vendor == X86_VENDOR_HYGON)
642 		op = 0x8000001d;
643 	else
644 		op = 4;
645 
646 	do {
647 		++i;
648 		/* Do cpuid(op) loop to find out num_cache_leaves */
649 		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
650 		cache_eax.full = eax;
651 	} while (cache_eax.split.type != CTYPE_NULL);
652 	return i;
653 }
654 
655 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu)
656 {
657 	/*
658 	 * We may have multiple LLCs if L3 caches exist, so check if we
659 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
660 	 */
661 	if (!cpuid_edx(0x80000006))
662 		return;
663 
664 	if (c->x86 < 0x17) {
665 		/* LLC is at the node level. */
666 		per_cpu(cpu_llc_id, cpu) = c->cpu_die_id;
667 	} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
668 		/*
669 		 * LLC is at the core complex level.
670 		 * Core complex ID is ApicId[3] for these processors.
671 		 */
672 		per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
673 	} else {
674 		/*
675 		 * LLC ID is calculated from the number of threads sharing the
676 		 * cache.
677 		 * */
678 		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
679 		u32 llc_index = find_num_cache_leaves(c) - 1;
680 
681 		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
682 		if (eax)
683 			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
684 
685 		if (num_sharing_cache) {
686 			int bits = get_count_order(num_sharing_cache);
687 
688 			per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
689 		}
690 	}
691 }
692 
693 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu)
694 {
695 	/*
696 	 * We may have multiple LLCs if L3 caches exist, so check if we
697 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
698 	 */
699 	if (!cpuid_edx(0x80000006))
700 		return;
701 
702 	/*
703 	 * LLC is at the core complex level.
704 	 * Core complex ID is ApicId[3] for these processors.
705 	 */
706 	per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
707 }
708 
709 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
710 {
711 
712 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
713 		num_cache_leaves = find_num_cache_leaves(c);
714 	} else if (c->extended_cpuid_level >= 0x80000006) {
715 		if (cpuid_edx(0x80000006) & 0xf000)
716 			num_cache_leaves = 4;
717 		else
718 			num_cache_leaves = 3;
719 	}
720 }
721 
722 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
723 {
724 	num_cache_leaves = find_num_cache_leaves(c);
725 }
726 
727 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
728 {
729 	/* Cache sizes */
730 	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
731 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
732 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
733 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
734 #ifdef CONFIG_SMP
735 	unsigned int cpu = c->cpu_index;
736 #endif
737 
738 	if (c->cpuid_level > 3) {
739 		static int is_initialized;
740 
741 		if (is_initialized == 0) {
742 			/* Init num_cache_leaves from boot CPU */
743 			num_cache_leaves = find_num_cache_leaves(c);
744 			is_initialized++;
745 		}
746 
747 		/*
748 		 * Whenever possible use cpuid(4), deterministic cache
749 		 * parameters cpuid leaf to find the cache details
750 		 */
751 		for (i = 0; i < num_cache_leaves; i++) {
752 			struct _cpuid4_info_regs this_leaf = {};
753 			int retval;
754 
755 			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
756 			if (retval < 0)
757 				continue;
758 
759 			switch (this_leaf.eax.split.level) {
760 			case 1:
761 				if (this_leaf.eax.split.type == CTYPE_DATA)
762 					new_l1d = this_leaf.size/1024;
763 				else if (this_leaf.eax.split.type == CTYPE_INST)
764 					new_l1i = this_leaf.size/1024;
765 				break;
766 			case 2:
767 				new_l2 = this_leaf.size/1024;
768 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
769 				index_msb = get_count_order(num_threads_sharing);
770 				l2_id = c->apicid & ~((1 << index_msb) - 1);
771 				break;
772 			case 3:
773 				new_l3 = this_leaf.size/1024;
774 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
775 				index_msb = get_count_order(num_threads_sharing);
776 				l3_id = c->apicid & ~((1 << index_msb) - 1);
777 				break;
778 			default:
779 				break;
780 			}
781 		}
782 	}
783 	/*
784 	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
785 	 * trace cache
786 	 */
787 	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
788 		/* supports eax=2  call */
789 		int j, n;
790 		unsigned int regs[4];
791 		unsigned char *dp = (unsigned char *)regs;
792 		int only_trace = 0;
793 
794 		if (num_cache_leaves != 0 && c->x86 == 15)
795 			only_trace = 1;
796 
797 		/* Number of times to iterate */
798 		n = cpuid_eax(2) & 0xFF;
799 
800 		for (i = 0 ; i < n ; i++) {
801 			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
802 
803 			/* If bit 31 is set, this is an unknown format */
804 			for (j = 0 ; j < 3 ; j++)
805 				if (regs[j] & (1 << 31))
806 					regs[j] = 0;
807 
808 			/* Byte 0 is level count, not a descriptor */
809 			for (j = 1 ; j < 16 ; j++) {
810 				unsigned char des = dp[j];
811 				unsigned char k = 0;
812 
813 				/* look up this descriptor in the table */
814 				while (cache_table[k].descriptor != 0) {
815 					if (cache_table[k].descriptor == des) {
816 						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
817 							break;
818 						switch (cache_table[k].cache_type) {
819 						case LVL_1_INST:
820 							l1i += cache_table[k].size;
821 							break;
822 						case LVL_1_DATA:
823 							l1d += cache_table[k].size;
824 							break;
825 						case LVL_2:
826 							l2 += cache_table[k].size;
827 							break;
828 						case LVL_3:
829 							l3 += cache_table[k].size;
830 							break;
831 						case LVL_TRACE:
832 							trace += cache_table[k].size;
833 							break;
834 						}
835 
836 						break;
837 					}
838 
839 					k++;
840 				}
841 			}
842 		}
843 	}
844 
845 	if (new_l1d)
846 		l1d = new_l1d;
847 
848 	if (new_l1i)
849 		l1i = new_l1i;
850 
851 	if (new_l2) {
852 		l2 = new_l2;
853 #ifdef CONFIG_SMP
854 		per_cpu(cpu_llc_id, cpu) = l2_id;
855 		per_cpu(cpu_l2c_id, cpu) = l2_id;
856 #endif
857 	}
858 
859 	if (new_l3) {
860 		l3 = new_l3;
861 #ifdef CONFIG_SMP
862 		per_cpu(cpu_llc_id, cpu) = l3_id;
863 #endif
864 	}
865 
866 #ifdef CONFIG_SMP
867 	/*
868 	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
869 	 * turns means that the only possibility is SMT (as indicated in
870 	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
871 	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
872 	 * c->phys_proc_id.
873 	 */
874 	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
875 		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
876 #endif
877 
878 	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
879 
880 	if (!l2)
881 		cpu_detect_cache_sizes(c);
882 }
883 
884 static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
885 				    struct _cpuid4_info_regs *base)
886 {
887 	struct cpu_cacheinfo *this_cpu_ci;
888 	struct cacheinfo *this_leaf;
889 	int i, sibling;
890 
891 	/*
892 	 * For L3, always use the pre-calculated cpu_llc_shared_mask
893 	 * to derive shared_cpu_map.
894 	 */
895 	if (index == 3) {
896 		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
897 			this_cpu_ci = get_cpu_cacheinfo(i);
898 			if (!this_cpu_ci->info_list)
899 				continue;
900 			this_leaf = this_cpu_ci->info_list + index;
901 			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
902 				if (!cpu_online(sibling))
903 					continue;
904 				cpumask_set_cpu(sibling,
905 						&this_leaf->shared_cpu_map);
906 			}
907 		}
908 	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
909 		unsigned int apicid, nshared, first, last;
910 
911 		nshared = base->eax.split.num_threads_sharing + 1;
912 		apicid = cpu_data(cpu).apicid;
913 		first = apicid - (apicid % nshared);
914 		last = first + nshared - 1;
915 
916 		for_each_online_cpu(i) {
917 			this_cpu_ci = get_cpu_cacheinfo(i);
918 			if (!this_cpu_ci->info_list)
919 				continue;
920 
921 			apicid = cpu_data(i).apicid;
922 			if ((apicid < first) || (apicid > last))
923 				continue;
924 
925 			this_leaf = this_cpu_ci->info_list + index;
926 
927 			for_each_online_cpu(sibling) {
928 				apicid = cpu_data(sibling).apicid;
929 				if ((apicid < first) || (apicid > last))
930 					continue;
931 				cpumask_set_cpu(sibling,
932 						&this_leaf->shared_cpu_map);
933 			}
934 		}
935 	} else
936 		return 0;
937 
938 	return 1;
939 }
940 
941 static void __cache_cpumap_setup(unsigned int cpu, int index,
942 				 struct _cpuid4_info_regs *base)
943 {
944 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
945 	struct cacheinfo *this_leaf, *sibling_leaf;
946 	unsigned long num_threads_sharing;
947 	int index_msb, i;
948 	struct cpuinfo_x86 *c = &cpu_data(cpu);
949 
950 	if (c->x86_vendor == X86_VENDOR_AMD ||
951 	    c->x86_vendor == X86_VENDOR_HYGON) {
952 		if (__cache_amd_cpumap_setup(cpu, index, base))
953 			return;
954 	}
955 
956 	this_leaf = this_cpu_ci->info_list + index;
957 	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
958 
959 	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
960 	if (num_threads_sharing == 1)
961 		return;
962 
963 	index_msb = get_count_order(num_threads_sharing);
964 
965 	for_each_online_cpu(i)
966 		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
967 			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
968 
969 			if (i == cpu || !sib_cpu_ci->info_list)
970 				continue;/* skip if itself or no cacheinfo */
971 			sibling_leaf = sib_cpu_ci->info_list + index;
972 			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
973 			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
974 		}
975 }
976 
977 static void ci_leaf_init(struct cacheinfo *this_leaf,
978 			 struct _cpuid4_info_regs *base)
979 {
980 	this_leaf->id = base->id;
981 	this_leaf->attributes = CACHE_ID;
982 	this_leaf->level = base->eax.split.level;
983 	this_leaf->type = cache_type_map[base->eax.split.type];
984 	this_leaf->coherency_line_size =
985 				base->ebx.split.coherency_line_size + 1;
986 	this_leaf->ways_of_associativity =
987 				base->ebx.split.ways_of_associativity + 1;
988 	this_leaf->size = base->size;
989 	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
990 	this_leaf->physical_line_partition =
991 				base->ebx.split.physical_line_partition + 1;
992 	this_leaf->priv = base->nb;
993 }
994 
995 int init_cache_level(unsigned int cpu)
996 {
997 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
998 
999 	if (!num_cache_leaves)
1000 		return -ENOENT;
1001 	if (!this_cpu_ci)
1002 		return -EINVAL;
1003 	this_cpu_ci->num_levels = 3;
1004 	this_cpu_ci->num_leaves = num_cache_leaves;
1005 	return 0;
1006 }
1007 
1008 /*
1009  * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1010  * ECX as cache index. Then right shift apicid by the number's order to get
1011  * cache id for this cache node.
1012  */
1013 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1014 {
1015 	struct cpuinfo_x86 *c = &cpu_data(cpu);
1016 	unsigned long num_threads_sharing;
1017 	int index_msb;
1018 
1019 	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1020 	index_msb = get_count_order(num_threads_sharing);
1021 	id4_regs->id = c->apicid >> index_msb;
1022 }
1023 
1024 int populate_cache_leaves(unsigned int cpu)
1025 {
1026 	unsigned int idx, ret;
1027 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1028 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1029 	struct _cpuid4_info_regs id4_regs = {};
1030 
1031 	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1032 		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1033 		if (ret)
1034 			return ret;
1035 		get_cache_id(cpu, &id4_regs);
1036 		ci_leaf_init(this_leaf++, &id4_regs);
1037 		__cache_cpumap_setup(cpu, idx, &id4_regs);
1038 	}
1039 	this_cpu_ci->cpu_map_populated = true;
1040 
1041 	return 0;
1042 }
1043