xref: /openbmc/linux/arch/x86/kernel/cpu/centaur.c (revision 8a10bc9d)
1 #include <linux/bitops.h>
2 #include <linux/kernel.h>
3 
4 #include <asm/processor.h>
5 #include <asm/e820.h>
6 #include <asm/mtrr.h>
7 #include <asm/msr.h>
8 
9 #include "cpu.h"
10 
11 #ifdef CONFIG_X86_OOSTORE
12 
13 static u32 power2(u32 x)
14 {
15 	u32 s = 1;
16 
17 	while (s <= x)
18 		s <<= 1;
19 
20 	return s >>= 1;
21 }
22 
23 
24 /*
25  * Set up an actual MCR
26  */
27 static void centaur_mcr_insert(int reg, u32 base, u32 size, int key)
28 {
29 	u32 lo, hi;
30 
31 	hi = base & ~0xFFF;
32 	lo = ~(size-1);		/* Size is a power of 2 so this makes a mask */
33 	lo &= ~0xFFF;		/* Remove the ctrl value bits */
34 	lo |= key;		/* Attribute we wish to set */
35 	wrmsr(reg+MSR_IDT_MCR0, lo, hi);
36 	mtrr_centaur_report_mcr(reg, lo, hi);	/* Tell the mtrr driver */
37 }
38 
39 /*
40  * Figure what we can cover with MCR's
41  *
42  * Shortcut: We know you can't put 4Gig of RAM on a winchip
43  */
44 static u32 ramtop(void)
45 {
46 	u32 clip = 0xFFFFFFFFUL;
47 	u32 top = 0;
48 	int i;
49 
50 	for (i = 0; i < e820.nr_map; i++) {
51 		unsigned long start, end;
52 
53 		if (e820.map[i].addr > 0xFFFFFFFFUL)
54 			continue;
55 		/*
56 		 * Don't MCR over reserved space. Ignore the ISA hole
57 		 * we frob around that catastrophe already
58 		 */
59 		if (e820.map[i].type == E820_RESERVED) {
60 			if (e820.map[i].addr >= 0x100000UL &&
61 			    e820.map[i].addr < clip)
62 				clip = e820.map[i].addr;
63 			continue;
64 		}
65 		start = e820.map[i].addr;
66 		end = e820.map[i].addr + e820.map[i].size;
67 		if (start >= end)
68 			continue;
69 		if (end > top)
70 			top = end;
71 	}
72 	/*
73 	 * Everything below 'top' should be RAM except for the ISA hole.
74 	 * Because of the limited MCR's we want to map NV/ACPI into our
75 	 * MCR range for gunk in RAM
76 	 *
77 	 * Clip might cause us to MCR insufficient RAM but that is an
78 	 * acceptable failure mode and should only bite obscure boxes with
79 	 * a VESA hole at 15Mb
80 	 *
81 	 * The second case Clip sometimes kicks in is when the EBDA is marked
82 	 * as reserved. Again we fail safe with reasonable results
83 	 */
84 	if (top > clip)
85 		top = clip;
86 
87 	return top;
88 }
89 
90 /*
91  * Compute a set of MCR's to give maximum coverage
92  */
93 static int centaur_mcr_compute(int nr, int key)
94 {
95 	u32 mem = ramtop();
96 	u32 root = power2(mem);
97 	u32 base = root;
98 	u32 top = root;
99 	u32 floor = 0;
100 	int ct = 0;
101 
102 	while (ct < nr) {
103 		u32 fspace = 0;
104 		u32 high;
105 		u32 low;
106 
107 		/*
108 		 * Find the largest block we will fill going upwards
109 		 */
110 		high = power2(mem-top);
111 
112 		/*
113 		 * Find the largest block we will fill going downwards
114 		 */
115 		low = base/2;
116 
117 		/*
118 		 * Don't fill below 1Mb going downwards as there
119 		 * is an ISA hole in the way.
120 		 */
121 		if (base <= 1024*1024)
122 			low = 0;
123 
124 		/*
125 		 * See how much space we could cover by filling below
126 		 * the ISA hole
127 		 */
128 
129 		if (floor == 0)
130 			fspace = 512*1024;
131 		else if (floor == 512*1024)
132 			fspace = 128*1024;
133 
134 		/* And forget ROM space */
135 
136 		/*
137 		 * Now install the largest coverage we get
138 		 */
139 		if (fspace > high && fspace > low) {
140 			centaur_mcr_insert(ct, floor, fspace, key);
141 			floor += fspace;
142 		} else if (high > low) {
143 			centaur_mcr_insert(ct, top, high, key);
144 			top += high;
145 		} else if (low > 0) {
146 			base -= low;
147 			centaur_mcr_insert(ct, base, low, key);
148 		} else
149 			break;
150 		ct++;
151 	}
152 	/*
153 	 * We loaded ct values. We now need to set the mask. The caller
154 	 * must do this bit.
155 	 */
156 	return ct;
157 }
158 
159 static void centaur_create_optimal_mcr(void)
160 {
161 	int used;
162 	int i;
163 
164 	/*
165 	 * Allocate up to 6 mcrs to mark as much of ram as possible
166 	 * as write combining and weak write ordered.
167 	 *
168 	 * To experiment with: Linux never uses stack operations for
169 	 * mmio spaces so we could globally enable stack operation wc
170 	 *
171 	 * Load the registers with type 31 - full write combining, all
172 	 * writes weakly ordered.
173 	 */
174 	used = centaur_mcr_compute(6, 31);
175 
176 	/*
177 	 * Wipe unused MCRs
178 	 */
179 	for (i = used; i < 8; i++)
180 		wrmsr(MSR_IDT_MCR0+i, 0, 0);
181 }
182 
183 static void winchip2_create_optimal_mcr(void)
184 {
185 	u32 lo, hi;
186 	int used;
187 	int i;
188 
189 	/*
190 	 * Allocate up to 6 mcrs to mark as much of ram as possible
191 	 * as write combining, weak store ordered.
192 	 *
193 	 * Load the registers with type 25
194 	 *	8	-	weak write ordering
195 	 *	16	-	weak read ordering
196 	 *	1	-	write combining
197 	 */
198 	used = centaur_mcr_compute(6, 25);
199 
200 	/*
201 	 * Mark the registers we are using.
202 	 */
203 	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
204 	for (i = 0; i < used; i++)
205 		lo |= 1<<(9+i);
206 	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
207 
208 	/*
209 	 * Wipe unused MCRs
210 	 */
211 
212 	for (i = used; i < 8; i++)
213 		wrmsr(MSR_IDT_MCR0+i, 0, 0);
214 }
215 
216 /*
217  * Handle the MCR key on the Winchip 2.
218  */
219 static void winchip2_unprotect_mcr(void)
220 {
221 	u32 lo, hi;
222 	u32 key;
223 
224 	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
225 	lo &= ~0x1C0;	/* blank bits 8-6 */
226 	key = (lo>>17) & 7;
227 	lo |= key<<6;	/* replace with unlock key */
228 	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
229 }
230 
231 static void winchip2_protect_mcr(void)
232 {
233 	u32 lo, hi;
234 
235 	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
236 	lo &= ~0x1C0;	/* blank bits 8-6 */
237 	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
238 }
239 #endif /* CONFIG_X86_OOSTORE */
240 
241 #define ACE_PRESENT	(1 << 6)
242 #define ACE_ENABLED	(1 << 7)
243 #define ACE_FCR		(1 << 28)	/* MSR_VIA_FCR */
244 
245 #define RNG_PRESENT	(1 << 2)
246 #define RNG_ENABLED	(1 << 3)
247 #define RNG_ENABLE	(1 << 6)	/* MSR_VIA_RNG */
248 
249 static void init_c3(struct cpuinfo_x86 *c)
250 {
251 	u32  lo, hi;
252 
253 	/* Test for Centaur Extended Feature Flags presence */
254 	if (cpuid_eax(0xC0000000) >= 0xC0000001) {
255 		u32 tmp = cpuid_edx(0xC0000001);
256 
257 		/* enable ACE unit, if present and disabled */
258 		if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
259 			rdmsr(MSR_VIA_FCR, lo, hi);
260 			lo |= ACE_FCR;		/* enable ACE unit */
261 			wrmsr(MSR_VIA_FCR, lo, hi);
262 			printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
263 		}
264 
265 		/* enable RNG unit, if present and disabled */
266 		if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
267 			rdmsr(MSR_VIA_RNG, lo, hi);
268 			lo |= RNG_ENABLE;	/* enable RNG unit */
269 			wrmsr(MSR_VIA_RNG, lo, hi);
270 			printk(KERN_INFO "CPU: Enabled h/w RNG\n");
271 		}
272 
273 		/* store Centaur Extended Feature Flags as
274 		 * word 5 of the CPU capability bit array
275 		 */
276 		c->x86_capability[5] = cpuid_edx(0xC0000001);
277 	}
278 #ifdef CONFIG_X86_32
279 	/* Cyrix III family needs CX8 & PGE explicitly enabled. */
280 	if (c->x86_model >= 6 && c->x86_model <= 13) {
281 		rdmsr(MSR_VIA_FCR, lo, hi);
282 		lo |= (1<<1 | 1<<7);
283 		wrmsr(MSR_VIA_FCR, lo, hi);
284 		set_cpu_cap(c, X86_FEATURE_CX8);
285 	}
286 
287 	/* Before Nehemiah, the C3's had 3dNOW! */
288 	if (c->x86_model >= 6 && c->x86_model < 9)
289 		set_cpu_cap(c, X86_FEATURE_3DNOW);
290 #endif
291 	if (c->x86 == 0x6 && c->x86_model >= 0xf) {
292 		c->x86_cache_alignment = c->x86_clflush_size * 2;
293 		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
294 	}
295 
296 	cpu_detect_cache_sizes(c);
297 }
298 
299 enum {
300 		ECX8		= 1<<1,
301 		EIERRINT	= 1<<2,
302 		DPM		= 1<<3,
303 		DMCE		= 1<<4,
304 		DSTPCLK		= 1<<5,
305 		ELINEAR		= 1<<6,
306 		DSMC		= 1<<7,
307 		DTLOCK		= 1<<8,
308 		EDCTLB		= 1<<8,
309 		EMMX		= 1<<9,
310 		DPDC		= 1<<11,
311 		EBRPRED		= 1<<12,
312 		DIC		= 1<<13,
313 		DDC		= 1<<14,
314 		DNA		= 1<<15,
315 		ERETSTK		= 1<<16,
316 		E2MMX		= 1<<19,
317 		EAMD3D		= 1<<20,
318 };
319 
320 static void early_init_centaur(struct cpuinfo_x86 *c)
321 {
322 	switch (c->x86) {
323 #ifdef CONFIG_X86_32
324 	case 5:
325 		/* Emulate MTRRs using Centaur's MCR. */
326 		set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
327 		break;
328 #endif
329 	case 6:
330 		if (c->x86_model >= 0xf)
331 			set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
332 		break;
333 	}
334 #ifdef CONFIG_X86_64
335 	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
336 #endif
337 }
338 
339 static void init_centaur(struct cpuinfo_x86 *c)
340 {
341 #ifdef CONFIG_X86_32
342 	char *name;
343 	u32  fcr_set = 0;
344 	u32  fcr_clr = 0;
345 	u32  lo, hi, newlo;
346 	u32  aa, bb, cc, dd;
347 
348 	/*
349 	 * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
350 	 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
351 	 */
352 	clear_cpu_cap(c, 0*32+31);
353 #endif
354 	early_init_centaur(c);
355 	switch (c->x86) {
356 #ifdef CONFIG_X86_32
357 	case 5:
358 		switch (c->x86_model) {
359 		case 4:
360 			name = "C6";
361 			fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
362 			fcr_clr = DPDC;
363 			printk(KERN_NOTICE "Disabling bugged TSC.\n");
364 			clear_cpu_cap(c, X86_FEATURE_TSC);
365 #ifdef CONFIG_X86_OOSTORE
366 			centaur_create_optimal_mcr();
367 			/*
368 			 * Enable:
369 			 *	write combining on non-stack, non-string
370 			 *	write combining on string, all types
371 			 *	weak write ordering
372 			 *
373 			 * The C6 original lacks weak read order
374 			 *
375 			 * Note 0x120 is write only on Winchip 1
376 			 */
377 			wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);
378 #endif
379 			break;
380 		case 8:
381 			switch (c->x86_mask) {
382 			default:
383 			name = "2";
384 				break;
385 			case 7 ... 9:
386 				name = "2A";
387 				break;
388 			case 10 ... 15:
389 				name = "2B";
390 				break;
391 			}
392 			fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
393 				  E2MMX|EAMD3D;
394 			fcr_clr = DPDC;
395 #ifdef CONFIG_X86_OOSTORE
396 			winchip2_unprotect_mcr();
397 			winchip2_create_optimal_mcr();
398 			rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
399 			/*
400 			 * Enable:
401 			 *	write combining on non-stack, non-string
402 			 *	write combining on string, all types
403 			 *	weak write ordering
404 			 */
405 			lo |= 31;
406 			wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
407 			winchip2_protect_mcr();
408 #endif
409 			break;
410 		case 9:
411 			name = "3";
412 			fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
413 				  E2MMX|EAMD3D;
414 			fcr_clr = DPDC;
415 #ifdef CONFIG_X86_OOSTORE
416 			winchip2_unprotect_mcr();
417 			winchip2_create_optimal_mcr();
418 			rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
419 			/*
420 			 * Enable:
421 			 *	write combining on non-stack, non-string
422 			 *	write combining on string, all types
423 			 *	weak write ordering
424 			 */
425 			lo |= 31;
426 			wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
427 			winchip2_protect_mcr();
428 #endif
429 			break;
430 		default:
431 			name = "??";
432 		}
433 
434 		rdmsr(MSR_IDT_FCR1, lo, hi);
435 		newlo = (lo|fcr_set) & (~fcr_clr);
436 
437 		if (newlo != lo) {
438 			printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n",
439 				lo, newlo);
440 			wrmsr(MSR_IDT_FCR1, newlo, hi);
441 		} else {
442 			printk(KERN_INFO "Centaur FCR is 0x%X\n", lo);
443 		}
444 		/* Emulate MTRRs using Centaur's MCR. */
445 		set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
446 		/* Report CX8 */
447 		set_cpu_cap(c, X86_FEATURE_CX8);
448 		/* Set 3DNow! on Winchip 2 and above. */
449 		if (c->x86_model >= 8)
450 			set_cpu_cap(c, X86_FEATURE_3DNOW);
451 		/* See if we can find out some more. */
452 		if (cpuid_eax(0x80000000) >= 0x80000005) {
453 			/* Yes, we can. */
454 			cpuid(0x80000005, &aa, &bb, &cc, &dd);
455 			/* Add L1 data and code cache sizes. */
456 			c->x86_cache_size = (cc>>24)+(dd>>24);
457 		}
458 		sprintf(c->x86_model_id, "WinChip %s", name);
459 		break;
460 #endif
461 	case 6:
462 		init_c3(c);
463 		break;
464 	}
465 #ifdef CONFIG_X86_64
466 	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
467 #endif
468 }
469 
470 #ifdef CONFIG_X86_32
471 static unsigned int
472 centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
473 {
474 	/* VIA C3 CPUs (670-68F) need further shifting. */
475 	if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
476 		size >>= 8;
477 
478 	/*
479 	 * There's also an erratum in Nehemiah stepping 1, which
480 	 * returns '65KB' instead of '64KB'
481 	 *  - Note, it seems this may only be in engineering samples.
482 	 */
483 	if ((c->x86 == 6) && (c->x86_model == 9) &&
484 				(c->x86_mask == 1) && (size == 65))
485 		size -= 1;
486 	return size;
487 }
488 #endif
489 
490 static const struct cpu_dev centaur_cpu_dev = {
491 	.c_vendor	= "Centaur",
492 	.c_ident	= { "CentaurHauls" },
493 	.c_early_init	= early_init_centaur,
494 	.c_init		= init_centaur,
495 #ifdef CONFIG_X86_32
496 	.legacy_cache_size = centaur_size_cache,
497 #endif
498 	.c_x86_vendor	= X86_VENDOR_CENTAUR,
499 };
500 
501 cpu_dev_register(centaur_cpu_dev);
502