1 #include <linux/kernel.h> 2 #include <linux/init.h> 3 #include <linux/bitops.h> 4 5 #include <asm/processor.h> 6 #include <asm/msr.h> 7 #include <asm/e820.h> 8 #include <asm/mtrr.h> 9 10 #include "cpu.h" 11 12 #ifdef CONFIG_X86_OOSTORE 13 14 static u32 __cpuinit power2(u32 x) 15 { 16 u32 s = 1; 17 18 while (s <= x) 19 s <<= 1; 20 21 return s >>= 1; 22 } 23 24 25 /* 26 * Set up an actual MCR 27 */ 28 static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) 29 { 30 u32 lo, hi; 31 32 hi = base & ~0xFFF; 33 lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ 34 lo &= ~0xFFF; /* Remove the ctrl value bits */ 35 lo |= key; /* Attribute we wish to set */ 36 wrmsr(reg+MSR_IDT_MCR0, lo, hi); 37 mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ 38 } 39 40 /* 41 * Figure what we can cover with MCR's 42 * 43 * Shortcut: We know you can't put 4Gig of RAM on a winchip 44 */ 45 static u32 __cpuinit ramtop(void) 46 { 47 u32 clip = 0xFFFFFFFFUL; 48 u32 top = 0; 49 int i; 50 51 for (i = 0; i < e820.nr_map; i++) { 52 unsigned long start, end; 53 54 if (e820.map[i].addr > 0xFFFFFFFFUL) 55 continue; 56 /* 57 * Don't MCR over reserved space. Ignore the ISA hole 58 * we frob around that catastrophe already 59 */ 60 if (e820.map[i].type == E820_RESERVED) { 61 if (e820.map[i].addr >= 0x100000UL && 62 e820.map[i].addr < clip) 63 clip = e820.map[i].addr; 64 continue; 65 } 66 start = e820.map[i].addr; 67 end = e820.map[i].addr + e820.map[i].size; 68 if (start >= end) 69 continue; 70 if (end > top) 71 top = end; 72 } 73 /* 74 * Everything below 'top' should be RAM except for the ISA hole. 75 * Because of the limited MCR's we want to map NV/ACPI into our 76 * MCR range for gunk in RAM 77 * 78 * Clip might cause us to MCR insufficient RAM but that is an 79 * acceptable failure mode and should only bite obscure boxes with 80 * a VESA hole at 15Mb 81 * 82 * The second case Clip sometimes kicks in is when the EBDA is marked 83 * as reserved. Again we fail safe with reasonable results 84 */ 85 if (top > clip) 86 top = clip; 87 88 return top; 89 } 90 91 /* 92 * Compute a set of MCR's to give maximum coverage 93 */ 94 static int __cpuinit centaur_mcr_compute(int nr, int key) 95 { 96 u32 mem = ramtop(); 97 u32 root = power2(mem); 98 u32 base = root; 99 u32 top = root; 100 u32 floor = 0; 101 int ct = 0; 102 103 while (ct < nr) { 104 u32 fspace = 0; 105 u32 high; 106 u32 low; 107 108 /* 109 * Find the largest block we will fill going upwards 110 */ 111 high = power2(mem-top); 112 113 /* 114 * Find the largest block we will fill going downwards 115 */ 116 low = base/2; 117 118 /* 119 * Don't fill below 1Mb going downwards as there 120 * is an ISA hole in the way. 121 */ 122 if (base <= 1024*1024) 123 low = 0; 124 125 /* 126 * See how much space we could cover by filling below 127 * the ISA hole 128 */ 129 130 if (floor == 0) 131 fspace = 512*1024; 132 else if (floor == 512*1024) 133 fspace = 128*1024; 134 135 /* And forget ROM space */ 136 137 /* 138 * Now install the largest coverage we get 139 */ 140 if (fspace > high && fspace > low) { 141 centaur_mcr_insert(ct, floor, fspace, key); 142 floor += fspace; 143 } else if (high > low) { 144 centaur_mcr_insert(ct, top, high, key); 145 top += high; 146 } else if (low > 0) { 147 base -= low; 148 centaur_mcr_insert(ct, base, low, key); 149 } else 150 break; 151 ct++; 152 } 153 /* 154 * We loaded ct values. We now need to set the mask. The caller 155 * must do this bit. 156 */ 157 return ct; 158 } 159 160 static void __cpuinit centaur_create_optimal_mcr(void) 161 { 162 int used; 163 int i; 164 165 /* 166 * Allocate up to 6 mcrs to mark as much of ram as possible 167 * as write combining and weak write ordered. 168 * 169 * To experiment with: Linux never uses stack operations for 170 * mmio spaces so we could globally enable stack operation wc 171 * 172 * Load the registers with type 31 - full write combining, all 173 * writes weakly ordered. 174 */ 175 used = centaur_mcr_compute(6, 31); 176 177 /* 178 * Wipe unused MCRs 179 */ 180 for (i = used; i < 8; i++) 181 wrmsr(MSR_IDT_MCR0+i, 0, 0); 182 } 183 184 static void __cpuinit winchip2_create_optimal_mcr(void) 185 { 186 u32 lo, hi; 187 int used; 188 int i; 189 190 /* 191 * Allocate up to 6 mcrs to mark as much of ram as possible 192 * as write combining, weak store ordered. 193 * 194 * Load the registers with type 25 195 * 8 - weak write ordering 196 * 16 - weak read ordering 197 * 1 - write combining 198 */ 199 used = centaur_mcr_compute(6, 25); 200 201 /* 202 * Mark the registers we are using. 203 */ 204 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 205 for (i = 0; i < used; i++) 206 lo |= 1<<(9+i); 207 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 208 209 /* 210 * Wipe unused MCRs 211 */ 212 213 for (i = used; i < 8; i++) 214 wrmsr(MSR_IDT_MCR0+i, 0, 0); 215 } 216 217 /* 218 * Handle the MCR key on the Winchip 2. 219 */ 220 static void __cpuinit winchip2_unprotect_mcr(void) 221 { 222 u32 lo, hi; 223 u32 key; 224 225 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 226 lo &= ~0x1C0; /* blank bits 8-6 */ 227 key = (lo>>17) & 7; 228 lo |= key<<6; /* replace with unlock key */ 229 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 230 } 231 232 static void __cpuinit winchip2_protect_mcr(void) 233 { 234 u32 lo, hi; 235 236 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 237 lo &= ~0x1C0; /* blank bits 8-6 */ 238 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 239 } 240 #endif /* CONFIG_X86_OOSTORE */ 241 242 #define ACE_PRESENT (1 << 6) 243 #define ACE_ENABLED (1 << 7) 244 #define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ 245 246 #define RNG_PRESENT (1 << 2) 247 #define RNG_ENABLED (1 << 3) 248 #define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ 249 250 static void __cpuinit init_c3(struct cpuinfo_x86 *c) 251 { 252 u32 lo, hi; 253 254 /* Test for Centaur Extended Feature Flags presence */ 255 if (cpuid_eax(0xC0000000) >= 0xC0000001) { 256 u32 tmp = cpuid_edx(0xC0000001); 257 258 /* enable ACE unit, if present and disabled */ 259 if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { 260 rdmsr(MSR_VIA_FCR, lo, hi); 261 lo |= ACE_FCR; /* enable ACE unit */ 262 wrmsr(MSR_VIA_FCR, lo, hi); 263 printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); 264 } 265 266 /* enable RNG unit, if present and disabled */ 267 if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { 268 rdmsr(MSR_VIA_RNG, lo, hi); 269 lo |= RNG_ENABLE; /* enable RNG unit */ 270 wrmsr(MSR_VIA_RNG, lo, hi); 271 printk(KERN_INFO "CPU: Enabled h/w RNG\n"); 272 } 273 274 /* store Centaur Extended Feature Flags as 275 * word 5 of the CPU capability bit array 276 */ 277 c->x86_capability[5] = cpuid_edx(0xC0000001); 278 } 279 280 /* Cyrix III family needs CX8 & PGE explicitly enabled. */ 281 if (c->x86_model >= 6 && c->x86_model <= 9) { 282 rdmsr(MSR_VIA_FCR, lo, hi); 283 lo |= (1<<1 | 1<<7); 284 wrmsr(MSR_VIA_FCR, lo, hi); 285 set_cpu_cap(c, X86_FEATURE_CX8); 286 } 287 288 /* Before Nehemiah, the C3's had 3dNOW! */ 289 if (c->x86_model >= 6 && c->x86_model < 9) 290 set_cpu_cap(c, X86_FEATURE_3DNOW); 291 292 get_model_name(c); 293 display_cacheinfo(c); 294 } 295 296 enum { 297 ECX8 = 1<<1, 298 EIERRINT = 1<<2, 299 DPM = 1<<3, 300 DMCE = 1<<4, 301 DSTPCLK = 1<<5, 302 ELINEAR = 1<<6, 303 DSMC = 1<<7, 304 DTLOCK = 1<<8, 305 EDCTLB = 1<<8, 306 EMMX = 1<<9, 307 DPDC = 1<<11, 308 EBRPRED = 1<<12, 309 DIC = 1<<13, 310 DDC = 1<<14, 311 DNA = 1<<15, 312 ERETSTK = 1<<16, 313 E2MMX = 1<<19, 314 EAMD3D = 1<<20, 315 }; 316 317 static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) 318 { 319 switch (c->x86) { 320 case 5: 321 /* Emulate MTRRs using Centaur's MCR. */ 322 set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); 323 break; 324 } 325 } 326 327 static void __cpuinit init_centaur(struct cpuinfo_x86 *c) 328 { 329 330 char *name; 331 u32 fcr_set = 0; 332 u32 fcr_clr = 0; 333 u32 lo, hi, newlo; 334 u32 aa, bb, cc, dd; 335 336 /* 337 * Bit 31 in normal CPUID used for nonstandard 3DNow ID; 338 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway 339 */ 340 clear_cpu_cap(c, 0*32+31); 341 342 switch (c->x86) { 343 case 5: 344 switch (c->x86_model) { 345 case 4: 346 name = "C6"; 347 fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK; 348 fcr_clr = DPDC; 349 printk(KERN_NOTICE "Disabling bugged TSC.\n"); 350 clear_cpu_cap(c, X86_FEATURE_TSC); 351 #ifdef CONFIG_X86_OOSTORE 352 centaur_create_optimal_mcr(); 353 /* 354 * Enable: 355 * write combining on non-stack, non-string 356 * write combining on string, all types 357 * weak write ordering 358 * 359 * The C6 original lacks weak read order 360 * 361 * Note 0x120 is write only on Winchip 1 362 */ 363 wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); 364 #endif 365 break; 366 case 8: 367 switch (c->x86_mask) { 368 default: 369 name = "2"; 370 break; 371 case 7 ... 9: 372 name = "2A"; 373 break; 374 case 10 ... 15: 375 name = "2B"; 376 break; 377 } 378 fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| 379 E2MMX|EAMD3D; 380 fcr_clr = DPDC; 381 #ifdef CONFIG_X86_OOSTORE 382 winchip2_unprotect_mcr(); 383 winchip2_create_optimal_mcr(); 384 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 385 /* 386 * Enable: 387 * write combining on non-stack, non-string 388 * write combining on string, all types 389 * weak write ordering 390 */ 391 lo |= 31; 392 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 393 winchip2_protect_mcr(); 394 #endif 395 break; 396 case 9: 397 name = "3"; 398 fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| 399 E2MMX|EAMD3D; 400 fcr_clr = DPDC; 401 #ifdef CONFIG_X86_OOSTORE 402 winchip2_unprotect_mcr(); 403 winchip2_create_optimal_mcr(); 404 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 405 /* 406 * Enable: 407 * write combining on non-stack, non-string 408 * write combining on string, all types 409 * weak write ordering 410 */ 411 lo |= 31; 412 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 413 winchip2_protect_mcr(); 414 #endif 415 break; 416 default: 417 name = "??"; 418 } 419 420 rdmsr(MSR_IDT_FCR1, lo, hi); 421 newlo = (lo|fcr_set) & (~fcr_clr); 422 423 if (newlo != lo) { 424 printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", 425 lo, newlo); 426 wrmsr(MSR_IDT_FCR1, newlo, hi); 427 } else { 428 printk(KERN_INFO "Centaur FCR is 0x%X\n", lo); 429 } 430 /* Emulate MTRRs using Centaur's MCR. */ 431 set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); 432 /* Report CX8 */ 433 set_cpu_cap(c, X86_FEATURE_CX8); 434 /* Set 3DNow! on Winchip 2 and above. */ 435 if (c->x86_model >= 8) 436 set_cpu_cap(c, X86_FEATURE_3DNOW); 437 /* See if we can find out some more. */ 438 if (cpuid_eax(0x80000000) >= 0x80000005) { 439 /* Yes, we can. */ 440 cpuid(0x80000005, &aa, &bb, &cc, &dd); 441 /* Add L1 data and code cache sizes. */ 442 c->x86_cache_size = (cc>>24)+(dd>>24); 443 } 444 sprintf(c->x86_model_id, "WinChip %s", name); 445 break; 446 447 case 6: 448 init_c3(c); 449 break; 450 } 451 } 452 453 static unsigned int __cpuinit 454 centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) 455 { 456 /* VIA C3 CPUs (670-68F) need further shifting. */ 457 if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) 458 size >>= 8; 459 460 /* 461 * There's also an erratum in Nehemiah stepping 1, which 462 * returns '65KB' instead of '64KB' 463 * - Note, it seems this may only be in engineering samples. 464 */ 465 if ((c->x86 == 6) && (c->x86_model == 9) && 466 (c->x86_mask == 1) && (size == 65)) 467 size -= 1; 468 469 return size; 470 } 471 472 static struct cpu_dev centaur_cpu_dev __cpuinitdata = { 473 .c_vendor = "Centaur", 474 .c_ident = { "CentaurHauls" }, 475 .c_early_init = early_init_centaur, 476 .c_init = init_centaur, 477 .c_size_cache = centaur_size_cache, 478 }; 479 480 cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); 481