1 #include <linux/kernel.h> 2 #include <linux/init.h> 3 #include <linux/bitops.h> 4 5 #include <asm/processor.h> 6 #include <asm/msr.h> 7 #include <asm/e820.h> 8 #include <asm/mtrr.h> 9 10 #include "cpu.h" 11 12 #ifdef CONFIG_X86_OOSTORE 13 14 static u32 __cpuinit power2(u32 x) 15 { 16 u32 s = 1; 17 18 while (s <= x) 19 s <<= 1; 20 21 return s >>= 1; 22 } 23 24 25 /* 26 * Set up an actual MCR 27 */ 28 static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) 29 { 30 u32 lo, hi; 31 32 hi = base & ~0xFFF; 33 lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ 34 lo &= ~0xFFF; /* Remove the ctrl value bits */ 35 lo |= key; /* Attribute we wish to set */ 36 wrmsr(reg+MSR_IDT_MCR0, lo, hi); 37 mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ 38 } 39 40 /* 41 * Figure what we can cover with MCR's 42 * 43 * Shortcut: We know you can't put 4Gig of RAM on a winchip 44 */ 45 static u32 __cpuinit ramtop(void) 46 { 47 u32 clip = 0xFFFFFFFFUL; 48 u32 top = 0; 49 int i; 50 51 for (i = 0; i < e820.nr_map; i++) { 52 unsigned long start, end; 53 54 if (e820.map[i].addr > 0xFFFFFFFFUL) 55 continue; 56 /* 57 * Don't MCR over reserved space. Ignore the ISA hole 58 * we frob around that catastrophe already 59 */ 60 if (e820.map[i].type == E820_RESERVED) { 61 if (e820.map[i].addr >= 0x100000UL && 62 e820.map[i].addr < clip) 63 clip = e820.map[i].addr; 64 continue; 65 } 66 start = e820.map[i].addr; 67 end = e820.map[i].addr + e820.map[i].size; 68 if (start >= end) 69 continue; 70 if (end > top) 71 top = end; 72 } 73 /* 74 * Everything below 'top' should be RAM except for the ISA hole. 75 * Because of the limited MCR's we want to map NV/ACPI into our 76 * MCR range for gunk in RAM 77 * 78 * Clip might cause us to MCR insufficient RAM but that is an 79 * acceptable failure mode and should only bite obscure boxes with 80 * a VESA hole at 15Mb 81 * 82 * The second case Clip sometimes kicks in is when the EBDA is marked 83 * as reserved. Again we fail safe with reasonable results 84 */ 85 if (top > clip) 86 top = clip; 87 88 return top; 89 } 90 91 /* 92 * Compute a set of MCR's to give maximum coverage 93 */ 94 static int __cpuinit centaur_mcr_compute(int nr, int key) 95 { 96 u32 mem = ramtop(); 97 u32 root = power2(mem); 98 u32 base = root; 99 u32 top = root; 100 u32 floor = 0; 101 int ct = 0; 102 103 while (ct < nr) { 104 u32 fspace = 0; 105 u32 high; 106 u32 low; 107 108 /* 109 * Find the largest block we will fill going upwards 110 */ 111 high = power2(mem-top); 112 113 /* 114 * Find the largest block we will fill going downwards 115 */ 116 low = base/2; 117 118 /* 119 * Don't fill below 1Mb going downwards as there 120 * is an ISA hole in the way. 121 */ 122 if (base <= 1024*1024) 123 low = 0; 124 125 /* 126 * See how much space we could cover by filling below 127 * the ISA hole 128 */ 129 130 if (floor == 0) 131 fspace = 512*1024; 132 else if (floor == 512*1024) 133 fspace = 128*1024; 134 135 /* And forget ROM space */ 136 137 /* 138 * Now install the largest coverage we get 139 */ 140 if (fspace > high && fspace > low) { 141 centaur_mcr_insert(ct, floor, fspace, key); 142 floor += fspace; 143 } else if (high > low) { 144 centaur_mcr_insert(ct, top, high, key); 145 top += high; 146 } else if (low > 0) { 147 base -= low; 148 centaur_mcr_insert(ct, base, low, key); 149 } else 150 break; 151 ct++; 152 } 153 /* 154 * We loaded ct values. We now need to set the mask. The caller 155 * must do this bit. 156 */ 157 return ct; 158 } 159 160 static void __cpuinit centaur_create_optimal_mcr(void) 161 { 162 int used; 163 int i; 164 165 /* 166 * Allocate up to 6 mcrs to mark as much of ram as possible 167 * as write combining and weak write ordered. 168 * 169 * To experiment with: Linux never uses stack operations for 170 * mmio spaces so we could globally enable stack operation wc 171 * 172 * Load the registers with type 31 - full write combining, all 173 * writes weakly ordered. 174 */ 175 used = centaur_mcr_compute(6, 31); 176 177 /* 178 * Wipe unused MCRs 179 */ 180 for (i = used; i < 8; i++) 181 wrmsr(MSR_IDT_MCR0+i, 0, 0); 182 } 183 184 static void __cpuinit winchip2_create_optimal_mcr(void) 185 { 186 u32 lo, hi; 187 int used; 188 int i; 189 190 /* 191 * Allocate up to 6 mcrs to mark as much of ram as possible 192 * as write combining, weak store ordered. 193 * 194 * Load the registers with type 25 195 * 8 - weak write ordering 196 * 16 - weak read ordering 197 * 1 - write combining 198 */ 199 used = centaur_mcr_compute(6, 25); 200 201 /* 202 * Mark the registers we are using. 203 */ 204 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 205 for (i = 0; i < used; i++) 206 lo |= 1<<(9+i); 207 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 208 209 /* 210 * Wipe unused MCRs 211 */ 212 213 for (i = used; i < 8; i++) 214 wrmsr(MSR_IDT_MCR0+i, 0, 0); 215 } 216 217 /* 218 * Handle the MCR key on the Winchip 2. 219 */ 220 static void __cpuinit winchip2_unprotect_mcr(void) 221 { 222 u32 lo, hi; 223 u32 key; 224 225 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 226 lo &= ~0x1C0; /* blank bits 8-6 */ 227 key = (lo>>17) & 7; 228 lo |= key<<6; /* replace with unlock key */ 229 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 230 } 231 232 static void __cpuinit winchip2_protect_mcr(void) 233 { 234 u32 lo, hi; 235 236 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 237 lo &= ~0x1C0; /* blank bits 8-6 */ 238 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 239 } 240 #endif /* CONFIG_X86_OOSTORE */ 241 242 #define ACE_PRESENT (1 << 6) 243 #define ACE_ENABLED (1 << 7) 244 #define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ 245 246 #define RNG_PRESENT (1 << 2) 247 #define RNG_ENABLED (1 << 3) 248 #define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ 249 250 static void __cpuinit init_c3(struct cpuinfo_x86 *c) 251 { 252 u32 lo, hi; 253 254 /* Test for Centaur Extended Feature Flags presence */ 255 if (cpuid_eax(0xC0000000) >= 0xC0000001) { 256 u32 tmp = cpuid_edx(0xC0000001); 257 258 /* enable ACE unit, if present and disabled */ 259 if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { 260 rdmsr(MSR_VIA_FCR, lo, hi); 261 lo |= ACE_FCR; /* enable ACE unit */ 262 wrmsr(MSR_VIA_FCR, lo, hi); 263 printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); 264 } 265 266 /* enable RNG unit, if present and disabled */ 267 if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { 268 rdmsr(MSR_VIA_RNG, lo, hi); 269 lo |= RNG_ENABLE; /* enable RNG unit */ 270 wrmsr(MSR_VIA_RNG, lo, hi); 271 printk(KERN_INFO "CPU: Enabled h/w RNG\n"); 272 } 273 274 /* store Centaur Extended Feature Flags as 275 * word 5 of the CPU capability bit array 276 */ 277 c->x86_capability[5] = cpuid_edx(0xC0000001); 278 } 279 280 /* Cyrix III family needs CX8 & PGE explicitly enabled. */ 281 if (c->x86_model >= 6 && c->x86_model <= 9) { 282 rdmsr(MSR_VIA_FCR, lo, hi); 283 lo |= (1<<1 | 1<<7); 284 wrmsr(MSR_VIA_FCR, lo, hi); 285 set_cpu_cap(c, X86_FEATURE_CX8); 286 } 287 288 /* Before Nehemiah, the C3's had 3dNOW! */ 289 if (c->x86_model >= 6 && c->x86_model < 9) 290 set_cpu_cap(c, X86_FEATURE_3DNOW); 291 292 get_model_name(c); 293 display_cacheinfo(c); 294 } 295 296 enum { 297 ECX8 = 1<<1, 298 EIERRINT = 1<<2, 299 DPM = 1<<3, 300 DMCE = 1<<4, 301 DSTPCLK = 1<<5, 302 ELINEAR = 1<<6, 303 DSMC = 1<<7, 304 DTLOCK = 1<<8, 305 EDCTLB = 1<<8, 306 EMMX = 1<<9, 307 DPDC = 1<<11, 308 EBRPRED = 1<<12, 309 DIC = 1<<13, 310 DDC = 1<<14, 311 DNA = 1<<15, 312 ERETSTK = 1<<16, 313 E2MMX = 1<<19, 314 EAMD3D = 1<<20, 315 }; 316 317 static void __cpuinit init_centaur(struct cpuinfo_x86 *c) 318 { 319 320 char *name; 321 u32 fcr_set = 0; 322 u32 fcr_clr = 0; 323 u32 lo, hi, newlo; 324 u32 aa, bb, cc, dd; 325 326 /* 327 * Bit 31 in normal CPUID used for nonstandard 3DNow ID; 328 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway 329 */ 330 clear_cpu_cap(c, 0*32+31); 331 332 switch (c->x86) { 333 case 5: 334 switch (c->x86_model) { 335 case 4: 336 name = "C6"; 337 fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK; 338 fcr_clr = DPDC; 339 printk(KERN_NOTICE "Disabling bugged TSC.\n"); 340 clear_cpu_cap(c, X86_FEATURE_TSC); 341 #ifdef CONFIG_X86_OOSTORE 342 centaur_create_optimal_mcr(); 343 /* 344 * Enable: 345 * write combining on non-stack, non-string 346 * write combining on string, all types 347 * weak write ordering 348 * 349 * The C6 original lacks weak read order 350 * 351 * Note 0x120 is write only on Winchip 1 352 */ 353 wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); 354 #endif 355 break; 356 case 8: 357 switch (c->x86_mask) { 358 default: 359 name = "2"; 360 break; 361 case 7 ... 9: 362 name = "2A"; 363 break; 364 case 10 ... 15: 365 name = "2B"; 366 break; 367 } 368 fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| 369 E2MMX|EAMD3D; 370 fcr_clr = DPDC; 371 #ifdef CONFIG_X86_OOSTORE 372 winchip2_unprotect_mcr(); 373 winchip2_create_optimal_mcr(); 374 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 375 /* 376 * Enable: 377 * write combining on non-stack, non-string 378 * write combining on string, all types 379 * weak write ordering 380 */ 381 lo |= 31; 382 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 383 winchip2_protect_mcr(); 384 #endif 385 break; 386 case 9: 387 name = "3"; 388 fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| 389 E2MMX|EAMD3D; 390 fcr_clr = DPDC; 391 #ifdef CONFIG_X86_OOSTORE 392 winchip2_unprotect_mcr(); 393 winchip2_create_optimal_mcr(); 394 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 395 /* 396 * Enable: 397 * write combining on non-stack, non-string 398 * write combining on string, all types 399 * weak write ordering 400 */ 401 lo |= 31; 402 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 403 winchip2_protect_mcr(); 404 #endif 405 break; 406 default: 407 name = "??"; 408 } 409 410 rdmsr(MSR_IDT_FCR1, lo, hi); 411 newlo = (lo|fcr_set) & (~fcr_clr); 412 413 if (newlo != lo) { 414 printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", 415 lo, newlo); 416 wrmsr(MSR_IDT_FCR1, newlo, hi); 417 } else { 418 printk(KERN_INFO "Centaur FCR is 0x%X\n", lo); 419 } 420 /* Emulate MTRRs using Centaur's MCR. */ 421 set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); 422 /* Report CX8 */ 423 set_cpu_cap(c, X86_FEATURE_CX8); 424 /* Set 3DNow! on Winchip 2 and above. */ 425 if (c->x86_model >= 8) 426 set_cpu_cap(c, X86_FEATURE_3DNOW); 427 /* See if we can find out some more. */ 428 if (cpuid_eax(0x80000000) >= 0x80000005) { 429 /* Yes, we can. */ 430 cpuid(0x80000005, &aa, &bb, &cc, &dd); 431 /* Add L1 data and code cache sizes. */ 432 c->x86_cache_size = (cc>>24)+(dd>>24); 433 } 434 sprintf(c->x86_model_id, "WinChip %s", name); 435 break; 436 437 case 6: 438 init_c3(c); 439 break; 440 } 441 } 442 443 static unsigned int __cpuinit 444 centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) 445 { 446 /* VIA C3 CPUs (670-68F) need further shifting. */ 447 if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) 448 size >>= 8; 449 450 /* 451 * There's also an erratum in Nehemiah stepping 1, which 452 * returns '65KB' instead of '64KB' 453 * - Note, it seems this may only be in engineering samples. 454 */ 455 if ((c->x86 == 6) && (c->x86_model == 9) && 456 (c->x86_mask == 1) && (size == 65)) 457 size -= 1; 458 459 return size; 460 } 461 462 static struct cpu_dev centaur_cpu_dev __cpuinitdata = { 463 .c_vendor = "Centaur", 464 .c_ident = { "CentaurHauls" }, 465 .c_init = init_centaur, 466 .c_size_cache = centaur_size_cache, 467 }; 468 469 cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); 470