1 #include <linux/bitops.h> 2 #include <linux/kernel.h> 3 #include <linux/init.h> 4 5 #include <asm/processor.h> 6 #include <asm/e820.h> 7 #include <asm/mtrr.h> 8 #include <asm/msr.h> 9 10 #include "cpu.h" 11 12 #ifdef CONFIG_X86_OOSTORE 13 14 static u32 __cpuinit power2(u32 x) 15 { 16 u32 s = 1; 17 18 while (s <= x) 19 s <<= 1; 20 21 return s >>= 1; 22 } 23 24 25 /* 26 * Set up an actual MCR 27 */ 28 static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) 29 { 30 u32 lo, hi; 31 32 hi = base & ~0xFFF; 33 lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ 34 lo &= ~0xFFF; /* Remove the ctrl value bits */ 35 lo |= key; /* Attribute we wish to set */ 36 wrmsr(reg+MSR_IDT_MCR0, lo, hi); 37 mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ 38 } 39 40 /* 41 * Figure what we can cover with MCR's 42 * 43 * Shortcut: We know you can't put 4Gig of RAM on a winchip 44 */ 45 static u32 __cpuinit ramtop(void) 46 { 47 u32 clip = 0xFFFFFFFFUL; 48 u32 top = 0; 49 int i; 50 51 for (i = 0; i < e820.nr_map; i++) { 52 unsigned long start, end; 53 54 if (e820.map[i].addr > 0xFFFFFFFFUL) 55 continue; 56 /* 57 * Don't MCR over reserved space. Ignore the ISA hole 58 * we frob around that catastrophe already 59 */ 60 if (e820.map[i].type == E820_RESERVED) { 61 if (e820.map[i].addr >= 0x100000UL && 62 e820.map[i].addr < clip) 63 clip = e820.map[i].addr; 64 continue; 65 } 66 start = e820.map[i].addr; 67 end = e820.map[i].addr + e820.map[i].size; 68 if (start >= end) 69 continue; 70 if (end > top) 71 top = end; 72 } 73 /* 74 * Everything below 'top' should be RAM except for the ISA hole. 75 * Because of the limited MCR's we want to map NV/ACPI into our 76 * MCR range for gunk in RAM 77 * 78 * Clip might cause us to MCR insufficient RAM but that is an 79 * acceptable failure mode and should only bite obscure boxes with 80 * a VESA hole at 15Mb 81 * 82 * The second case Clip sometimes kicks in is when the EBDA is marked 83 * as reserved. Again we fail safe with reasonable results 84 */ 85 if (top > clip) 86 top = clip; 87 88 return top; 89 } 90 91 /* 92 * Compute a set of MCR's to give maximum coverage 93 */ 94 static int __cpuinit centaur_mcr_compute(int nr, int key) 95 { 96 u32 mem = ramtop(); 97 u32 root = power2(mem); 98 u32 base = root; 99 u32 top = root; 100 u32 floor = 0; 101 int ct = 0; 102 103 while (ct < nr) { 104 u32 fspace = 0; 105 u32 high; 106 u32 low; 107 108 /* 109 * Find the largest block we will fill going upwards 110 */ 111 high = power2(mem-top); 112 113 /* 114 * Find the largest block we will fill going downwards 115 */ 116 low = base/2; 117 118 /* 119 * Don't fill below 1Mb going downwards as there 120 * is an ISA hole in the way. 121 */ 122 if (base <= 1024*1024) 123 low = 0; 124 125 /* 126 * See how much space we could cover by filling below 127 * the ISA hole 128 */ 129 130 if (floor == 0) 131 fspace = 512*1024; 132 else if (floor == 512*1024) 133 fspace = 128*1024; 134 135 /* And forget ROM space */ 136 137 /* 138 * Now install the largest coverage we get 139 */ 140 if (fspace > high && fspace > low) { 141 centaur_mcr_insert(ct, floor, fspace, key); 142 floor += fspace; 143 } else if (high > low) { 144 centaur_mcr_insert(ct, top, high, key); 145 top += high; 146 } else if (low > 0) { 147 base -= low; 148 centaur_mcr_insert(ct, base, low, key); 149 } else 150 break; 151 ct++; 152 } 153 /* 154 * We loaded ct values. We now need to set the mask. The caller 155 * must do this bit. 156 */ 157 return ct; 158 } 159 160 static void __cpuinit centaur_create_optimal_mcr(void) 161 { 162 int used; 163 int i; 164 165 /* 166 * Allocate up to 6 mcrs to mark as much of ram as possible 167 * as write combining and weak write ordered. 168 * 169 * To experiment with: Linux never uses stack operations for 170 * mmio spaces so we could globally enable stack operation wc 171 * 172 * Load the registers with type 31 - full write combining, all 173 * writes weakly ordered. 174 */ 175 used = centaur_mcr_compute(6, 31); 176 177 /* 178 * Wipe unused MCRs 179 */ 180 for (i = used; i < 8; i++) 181 wrmsr(MSR_IDT_MCR0+i, 0, 0); 182 } 183 184 static void __cpuinit winchip2_create_optimal_mcr(void) 185 { 186 u32 lo, hi; 187 int used; 188 int i; 189 190 /* 191 * Allocate up to 6 mcrs to mark as much of ram as possible 192 * as write combining, weak store ordered. 193 * 194 * Load the registers with type 25 195 * 8 - weak write ordering 196 * 16 - weak read ordering 197 * 1 - write combining 198 */ 199 used = centaur_mcr_compute(6, 25); 200 201 /* 202 * Mark the registers we are using. 203 */ 204 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 205 for (i = 0; i < used; i++) 206 lo |= 1<<(9+i); 207 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 208 209 /* 210 * Wipe unused MCRs 211 */ 212 213 for (i = used; i < 8; i++) 214 wrmsr(MSR_IDT_MCR0+i, 0, 0); 215 } 216 217 /* 218 * Handle the MCR key on the Winchip 2. 219 */ 220 static void __cpuinit winchip2_unprotect_mcr(void) 221 { 222 u32 lo, hi; 223 u32 key; 224 225 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 226 lo &= ~0x1C0; /* blank bits 8-6 */ 227 key = (lo>>17) & 7; 228 lo |= key<<6; /* replace with unlock key */ 229 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 230 } 231 232 static void __cpuinit winchip2_protect_mcr(void) 233 { 234 u32 lo, hi; 235 236 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 237 lo &= ~0x1C0; /* blank bits 8-6 */ 238 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 239 } 240 #endif /* CONFIG_X86_OOSTORE */ 241 242 #define ACE_PRESENT (1 << 6) 243 #define ACE_ENABLED (1 << 7) 244 #define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ 245 246 #define RNG_PRESENT (1 << 2) 247 #define RNG_ENABLED (1 << 3) 248 #define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ 249 250 static void __cpuinit init_c3(struct cpuinfo_x86 *c) 251 { 252 u32 lo, hi; 253 254 /* Test for Centaur Extended Feature Flags presence */ 255 if (cpuid_eax(0xC0000000) >= 0xC0000001) { 256 u32 tmp = cpuid_edx(0xC0000001); 257 258 /* enable ACE unit, if present and disabled */ 259 if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { 260 rdmsr(MSR_VIA_FCR, lo, hi); 261 lo |= ACE_FCR; /* enable ACE unit */ 262 wrmsr(MSR_VIA_FCR, lo, hi); 263 printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); 264 } 265 266 /* enable RNG unit, if present and disabled */ 267 if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { 268 rdmsr(MSR_VIA_RNG, lo, hi); 269 lo |= RNG_ENABLE; /* enable RNG unit */ 270 wrmsr(MSR_VIA_RNG, lo, hi); 271 printk(KERN_INFO "CPU: Enabled h/w RNG\n"); 272 } 273 274 /* store Centaur Extended Feature Flags as 275 * word 5 of the CPU capability bit array 276 */ 277 c->x86_capability[5] = cpuid_edx(0xC0000001); 278 } 279 #ifdef CONFIG_X86_32 280 /* Cyrix III family needs CX8 & PGE explicitly enabled. */ 281 if (c->x86_model >= 6 && c->x86_model <= 13) { 282 rdmsr(MSR_VIA_FCR, lo, hi); 283 lo |= (1<<1 | 1<<7); 284 wrmsr(MSR_VIA_FCR, lo, hi); 285 set_cpu_cap(c, X86_FEATURE_CX8); 286 } 287 288 /* Before Nehemiah, the C3's had 3dNOW! */ 289 if (c->x86_model >= 6 && c->x86_model < 9) 290 set_cpu_cap(c, X86_FEATURE_3DNOW); 291 #endif 292 if (c->x86 == 0x6 && c->x86_model >= 0xf) { 293 c->x86_cache_alignment = c->x86_clflush_size * 2; 294 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 295 } 296 297 cpu_detect_cache_sizes(c); 298 } 299 300 enum { 301 ECX8 = 1<<1, 302 EIERRINT = 1<<2, 303 DPM = 1<<3, 304 DMCE = 1<<4, 305 DSTPCLK = 1<<5, 306 ELINEAR = 1<<6, 307 DSMC = 1<<7, 308 DTLOCK = 1<<8, 309 EDCTLB = 1<<8, 310 EMMX = 1<<9, 311 DPDC = 1<<11, 312 EBRPRED = 1<<12, 313 DIC = 1<<13, 314 DDC = 1<<14, 315 DNA = 1<<15, 316 ERETSTK = 1<<16, 317 E2MMX = 1<<19, 318 EAMD3D = 1<<20, 319 }; 320 321 static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) 322 { 323 switch (c->x86) { 324 #ifdef CONFIG_X86_32 325 case 5: 326 /* Emulate MTRRs using Centaur's MCR. */ 327 set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); 328 break; 329 #endif 330 case 6: 331 if (c->x86_model >= 0xf) 332 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 333 break; 334 } 335 #ifdef CONFIG_X86_64 336 set_cpu_cap(c, X86_FEATURE_SYSENTER32); 337 #endif 338 } 339 340 static void __cpuinit init_centaur(struct cpuinfo_x86 *c) 341 { 342 #ifdef CONFIG_X86_32 343 char *name; 344 u32 fcr_set = 0; 345 u32 fcr_clr = 0; 346 u32 lo, hi, newlo; 347 u32 aa, bb, cc, dd; 348 349 /* 350 * Bit 31 in normal CPUID used for nonstandard 3DNow ID; 351 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway 352 */ 353 clear_cpu_cap(c, 0*32+31); 354 #endif 355 early_init_centaur(c); 356 switch (c->x86) { 357 #ifdef CONFIG_X86_32 358 case 5: 359 switch (c->x86_model) { 360 case 4: 361 name = "C6"; 362 fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK; 363 fcr_clr = DPDC; 364 printk(KERN_NOTICE "Disabling bugged TSC.\n"); 365 clear_cpu_cap(c, X86_FEATURE_TSC); 366 #ifdef CONFIG_X86_OOSTORE 367 centaur_create_optimal_mcr(); 368 /* 369 * Enable: 370 * write combining on non-stack, non-string 371 * write combining on string, all types 372 * weak write ordering 373 * 374 * The C6 original lacks weak read order 375 * 376 * Note 0x120 is write only on Winchip 1 377 */ 378 wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); 379 #endif 380 break; 381 case 8: 382 switch (c->x86_mask) { 383 default: 384 name = "2"; 385 break; 386 case 7 ... 9: 387 name = "2A"; 388 break; 389 case 10 ... 15: 390 name = "2B"; 391 break; 392 } 393 fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| 394 E2MMX|EAMD3D; 395 fcr_clr = DPDC; 396 #ifdef CONFIG_X86_OOSTORE 397 winchip2_unprotect_mcr(); 398 winchip2_create_optimal_mcr(); 399 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 400 /* 401 * Enable: 402 * write combining on non-stack, non-string 403 * write combining on string, all types 404 * weak write ordering 405 */ 406 lo |= 31; 407 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 408 winchip2_protect_mcr(); 409 #endif 410 break; 411 case 9: 412 name = "3"; 413 fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| 414 E2MMX|EAMD3D; 415 fcr_clr = DPDC; 416 #ifdef CONFIG_X86_OOSTORE 417 winchip2_unprotect_mcr(); 418 winchip2_create_optimal_mcr(); 419 rdmsr(MSR_IDT_MCR_CTRL, lo, hi); 420 /* 421 * Enable: 422 * write combining on non-stack, non-string 423 * write combining on string, all types 424 * weak write ordering 425 */ 426 lo |= 31; 427 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 428 winchip2_protect_mcr(); 429 #endif 430 break; 431 default: 432 name = "??"; 433 } 434 435 rdmsr(MSR_IDT_FCR1, lo, hi); 436 newlo = (lo|fcr_set) & (~fcr_clr); 437 438 if (newlo != lo) { 439 printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", 440 lo, newlo); 441 wrmsr(MSR_IDT_FCR1, newlo, hi); 442 } else { 443 printk(KERN_INFO "Centaur FCR is 0x%X\n", lo); 444 } 445 /* Emulate MTRRs using Centaur's MCR. */ 446 set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); 447 /* Report CX8 */ 448 set_cpu_cap(c, X86_FEATURE_CX8); 449 /* Set 3DNow! on Winchip 2 and above. */ 450 if (c->x86_model >= 8) 451 set_cpu_cap(c, X86_FEATURE_3DNOW); 452 /* See if we can find out some more. */ 453 if (cpuid_eax(0x80000000) >= 0x80000005) { 454 /* Yes, we can. */ 455 cpuid(0x80000005, &aa, &bb, &cc, &dd); 456 /* Add L1 data and code cache sizes. */ 457 c->x86_cache_size = (cc>>24)+(dd>>24); 458 } 459 sprintf(c->x86_model_id, "WinChip %s", name); 460 break; 461 #endif 462 case 6: 463 init_c3(c); 464 break; 465 } 466 #ifdef CONFIG_X86_64 467 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); 468 #endif 469 } 470 471 static unsigned int __cpuinit 472 centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) 473 { 474 #ifdef CONFIG_X86_32 475 /* VIA C3 CPUs (670-68F) need further shifting. */ 476 if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) 477 size >>= 8; 478 479 /* 480 * There's also an erratum in Nehemiah stepping 1, which 481 * returns '65KB' instead of '64KB' 482 * - Note, it seems this may only be in engineering samples. 483 */ 484 if ((c->x86 == 6) && (c->x86_model == 9) && 485 (c->x86_mask == 1) && (size == 65)) 486 size -= 1; 487 #endif 488 return size; 489 } 490 491 static const struct cpu_dev __cpuinitconst centaur_cpu_dev = { 492 .c_vendor = "Centaur", 493 .c_ident = { "CentaurHauls" }, 494 .c_early_init = early_init_centaur, 495 .c_init = init_centaur, 496 .c_size_cache = centaur_size_cache, 497 .c_x86_vendor = X86_VENDOR_CENTAUR, 498 }; 499 500 cpu_dev_register(centaur_cpu_dev); 501