1 /* 2 * (C) Copyright 2008-2011 3 * Graeme Russ, <graeme.russ@gmail.com> 4 * 5 * (C) Copyright 2002 6 * Daniel Engström, Omicron Ceti AB, <daniel@omicron.se> 7 * 8 * (C) Copyright 2002 9 * Sysgo Real-Time Solutions, GmbH <www.elinos.com> 10 * Marius Groeger <mgroeger@sysgo.de> 11 * 12 * (C) Copyright 2002 13 * Sysgo Real-Time Solutions, GmbH <www.elinos.com> 14 * Alex Zuepke <azu@sysgo.de> 15 * 16 * Part of this file is adapted from coreboot 17 * src/arch/x86/lib/cpu.c 18 * 19 * SPDX-License-Identifier: GPL-2.0+ 20 */ 21 22 #include <common.h> 23 #include <malloc.h> 24 #include <asm/control_regs.h> 25 #include <asm/cpu.h> 26 #include <asm/mp.h> 27 #include <asm/msr.h> 28 #include <asm/mtrr.h> 29 #include <asm/processor-flags.h> 30 31 DECLARE_GLOBAL_DATA_PTR; 32 33 /* 34 * Constructor for a conventional segment GDT (or LDT) entry 35 * This is a macro so it can be used in initialisers 36 */ 37 #define GDT_ENTRY(flags, base, limit) \ 38 ((((base) & 0xff000000ULL) << (56-24)) | \ 39 (((flags) & 0x0000f0ffULL) << 40) | \ 40 (((limit) & 0x000f0000ULL) << (48-16)) | \ 41 (((base) & 0x00ffffffULL) << 16) | \ 42 (((limit) & 0x0000ffffULL))) 43 44 struct gdt_ptr { 45 u16 len; 46 u32 ptr; 47 } __packed; 48 49 struct cpu_device_id { 50 unsigned vendor; 51 unsigned device; 52 }; 53 54 struct cpuinfo_x86 { 55 uint8_t x86; /* CPU family */ 56 uint8_t x86_vendor; /* CPU vendor */ 57 uint8_t x86_model; 58 uint8_t x86_mask; 59 }; 60 61 /* 62 * List of cpu vendor strings along with their normalized 63 * id values. 64 */ 65 static const struct { 66 int vendor; 67 const char *name; 68 } x86_vendors[] = { 69 { X86_VENDOR_INTEL, "GenuineIntel", }, 70 { X86_VENDOR_CYRIX, "CyrixInstead", }, 71 { X86_VENDOR_AMD, "AuthenticAMD", }, 72 { X86_VENDOR_UMC, "UMC UMC UMC ", }, 73 { X86_VENDOR_NEXGEN, "NexGenDriven", }, 74 { X86_VENDOR_CENTAUR, "CentaurHauls", }, 75 { X86_VENDOR_RISE, "RiseRiseRise", }, 76 { X86_VENDOR_TRANSMETA, "GenuineTMx86", }, 77 { X86_VENDOR_TRANSMETA, "TransmetaCPU", }, 78 { X86_VENDOR_NSC, "Geode by NSC", }, 79 { X86_VENDOR_SIS, "SiS SiS SiS ", }, 80 }; 81 82 static void load_ds(u32 segment) 83 { 84 asm volatile("movl %0, %%ds" : : "r" (segment * X86_GDT_ENTRY_SIZE)); 85 } 86 87 static void load_es(u32 segment) 88 { 89 asm volatile("movl %0, %%es" : : "r" (segment * X86_GDT_ENTRY_SIZE)); 90 } 91 92 static void load_fs(u32 segment) 93 { 94 asm volatile("movl %0, %%fs" : : "r" (segment * X86_GDT_ENTRY_SIZE)); 95 } 96 97 static void load_gs(u32 segment) 98 { 99 asm volatile("movl %0, %%gs" : : "r" (segment * X86_GDT_ENTRY_SIZE)); 100 } 101 102 static void load_ss(u32 segment) 103 { 104 asm volatile("movl %0, %%ss" : : "r" (segment * X86_GDT_ENTRY_SIZE)); 105 } 106 107 static void load_gdt(const u64 *boot_gdt, u16 num_entries) 108 { 109 struct gdt_ptr gdt; 110 111 gdt.len = (num_entries * X86_GDT_ENTRY_SIZE) - 1; 112 gdt.ptr = (ulong)boot_gdt; 113 114 asm volatile("lgdtl %0\n" : : "m" (gdt)); 115 } 116 117 void arch_setup_gd(gd_t *new_gd) 118 { 119 u64 *gdt_addr; 120 121 gdt_addr = new_gd->arch.gdt; 122 123 /* 124 * CS: code, read/execute, 4 GB, base 0 125 * 126 * Some OS (like VxWorks) requires GDT entry 1 to be the 32-bit CS 127 */ 128 gdt_addr[X86_GDT_ENTRY_UNUSED] = GDT_ENTRY(0xc09b, 0, 0xfffff); 129 gdt_addr[X86_GDT_ENTRY_32BIT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff); 130 131 /* DS: data, read/write, 4 GB, base 0 */ 132 gdt_addr[X86_GDT_ENTRY_32BIT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff); 133 134 /* FS: data, read/write, 4 GB, base (Global Data Pointer) */ 135 new_gd->arch.gd_addr = new_gd; 136 gdt_addr[X86_GDT_ENTRY_32BIT_FS] = GDT_ENTRY(0xc093, 137 (ulong)&new_gd->arch.gd_addr, 0xfffff); 138 139 /* 16-bit CS: code, read/execute, 64 kB, base 0 */ 140 gdt_addr[X86_GDT_ENTRY_16BIT_CS] = GDT_ENTRY(0x009b, 0, 0x0ffff); 141 142 /* 16-bit DS: data, read/write, 64 kB, base 0 */ 143 gdt_addr[X86_GDT_ENTRY_16BIT_DS] = GDT_ENTRY(0x0093, 0, 0x0ffff); 144 145 gdt_addr[X86_GDT_ENTRY_16BIT_FLAT_CS] = GDT_ENTRY(0x809b, 0, 0xfffff); 146 gdt_addr[X86_GDT_ENTRY_16BIT_FLAT_DS] = GDT_ENTRY(0x8093, 0, 0xfffff); 147 148 load_gdt(gdt_addr, X86_GDT_NUM_ENTRIES); 149 load_ds(X86_GDT_ENTRY_32BIT_DS); 150 load_es(X86_GDT_ENTRY_32BIT_DS); 151 load_gs(X86_GDT_ENTRY_32BIT_DS); 152 load_ss(X86_GDT_ENTRY_32BIT_DS); 153 load_fs(X86_GDT_ENTRY_32BIT_FS); 154 } 155 156 #ifdef CONFIG_HAVE_FSP 157 /* 158 * Setup FSP execution environment GDT 159 * 160 * Per Intel FSP external architecture specification, before calling any FSP 161 * APIs, we need make sure the system is in flat 32-bit mode and both the code 162 * and data selectors should have full 4GB access range. Here we reuse the one 163 * we used in arch/x86/cpu/start16.S, and reload the segement registers. 164 */ 165 void setup_fsp_gdt(void) 166 { 167 load_gdt((const u64 *)(gdt_rom + CONFIG_RESET_SEG_START), 4); 168 load_ds(X86_GDT_ENTRY_32BIT_DS); 169 load_ss(X86_GDT_ENTRY_32BIT_DS); 170 load_es(X86_GDT_ENTRY_32BIT_DS); 171 load_fs(X86_GDT_ENTRY_32BIT_DS); 172 load_gs(X86_GDT_ENTRY_32BIT_DS); 173 } 174 #endif 175 176 /* 177 * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected 178 * by the fact that they preserve the flags across the division of 5/2. 179 * PII and PPro exhibit this behavior too, but they have cpuid available. 180 */ 181 182 /* 183 * Perform the Cyrix 5/2 test. A Cyrix won't change 184 * the flags, while other 486 chips will. 185 */ 186 static inline int test_cyrix_52div(void) 187 { 188 unsigned int test; 189 190 __asm__ __volatile__( 191 "sahf\n\t" /* clear flags (%eax = 0x0005) */ 192 "div %b2\n\t" /* divide 5 by 2 */ 193 "lahf" /* store flags into %ah */ 194 : "=a" (test) 195 : "0" (5), "q" (2) 196 : "cc"); 197 198 /* AH is 0x02 on Cyrix after the divide.. */ 199 return (unsigned char) (test >> 8) == 0x02; 200 } 201 202 /* 203 * Detect a NexGen CPU running without BIOS hypercode new enough 204 * to have CPUID. (Thanks to Herbert Oppmann) 205 */ 206 static int deep_magic_nexgen_probe(void) 207 { 208 int ret; 209 210 __asm__ __volatile__ ( 211 " movw $0x5555, %%ax\n" 212 " xorw %%dx,%%dx\n" 213 " movw $2, %%cx\n" 214 " divw %%cx\n" 215 " movl $0, %%eax\n" 216 " jnz 1f\n" 217 " movl $1, %%eax\n" 218 "1:\n" 219 : "=a" (ret) : : "cx", "dx"); 220 return ret; 221 } 222 223 static bool has_cpuid(void) 224 { 225 return flag_is_changeable_p(X86_EFLAGS_ID); 226 } 227 228 static bool has_mtrr(void) 229 { 230 return cpuid_edx(0x00000001) & (1 << 12) ? true : false; 231 } 232 233 static int build_vendor_name(char *vendor_name) 234 { 235 struct cpuid_result result; 236 result = cpuid(0x00000000); 237 unsigned int *name_as_ints = (unsigned int *)vendor_name; 238 239 name_as_ints[0] = result.ebx; 240 name_as_ints[1] = result.edx; 241 name_as_ints[2] = result.ecx; 242 243 return result.eax; 244 } 245 246 static void identify_cpu(struct cpu_device_id *cpu) 247 { 248 char vendor_name[16]; 249 int i; 250 251 vendor_name[0] = '\0'; /* Unset */ 252 cpu->device = 0; /* fix gcc 4.4.4 warning */ 253 254 /* Find the id and vendor_name */ 255 if (!has_cpuid()) { 256 /* Its a 486 if we can modify the AC flag */ 257 if (flag_is_changeable_p(X86_EFLAGS_AC)) 258 cpu->device = 0x00000400; /* 486 */ 259 else 260 cpu->device = 0x00000300; /* 386 */ 261 if ((cpu->device == 0x00000400) && test_cyrix_52div()) { 262 memcpy(vendor_name, "CyrixInstead", 13); 263 /* If we ever care we can enable cpuid here */ 264 } 265 /* Detect NexGen with old hypercode */ 266 else if (deep_magic_nexgen_probe()) 267 memcpy(vendor_name, "NexGenDriven", 13); 268 } 269 if (has_cpuid()) { 270 int cpuid_level; 271 272 cpuid_level = build_vendor_name(vendor_name); 273 vendor_name[12] = '\0'; 274 275 /* Intel-defined flags: level 0x00000001 */ 276 if (cpuid_level >= 0x00000001) { 277 cpu->device = cpuid_eax(0x00000001); 278 } else { 279 /* Have CPUID level 0 only unheard of */ 280 cpu->device = 0x00000400; 281 } 282 } 283 cpu->vendor = X86_VENDOR_UNKNOWN; 284 for (i = 0; i < ARRAY_SIZE(x86_vendors); i++) { 285 if (memcmp(vendor_name, x86_vendors[i].name, 12) == 0) { 286 cpu->vendor = x86_vendors[i].vendor; 287 break; 288 } 289 } 290 } 291 292 static inline void get_fms(struct cpuinfo_x86 *c, uint32_t tfms) 293 { 294 c->x86 = (tfms >> 8) & 0xf; 295 c->x86_model = (tfms >> 4) & 0xf; 296 c->x86_mask = tfms & 0xf; 297 if (c->x86 == 0xf) 298 c->x86 += (tfms >> 20) & 0xff; 299 if (c->x86 >= 0x6) 300 c->x86_model += ((tfms >> 16) & 0xF) << 4; 301 } 302 303 u32 cpu_get_family_model(void) 304 { 305 return gd->arch.x86_device & 0x0fff0ff0; 306 } 307 308 u32 cpu_get_stepping(void) 309 { 310 return gd->arch.x86_mask; 311 } 312 313 int x86_cpu_init_f(void) 314 { 315 const u32 em_rst = ~X86_CR0_EM; 316 const u32 mp_ne_set = X86_CR0_MP | X86_CR0_NE; 317 318 if (ll_boot_init()) { 319 /* initialize FPU, reset EM, set MP and NE */ 320 asm ("fninit\n" \ 321 "movl %%cr0, %%eax\n" \ 322 "andl %0, %%eax\n" \ 323 "orl %1, %%eax\n" \ 324 "movl %%eax, %%cr0\n" \ 325 : : "i" (em_rst), "i" (mp_ne_set) : "eax"); 326 } 327 328 /* identify CPU via cpuid and store the decoded info into gd->arch */ 329 if (has_cpuid()) { 330 struct cpu_device_id cpu; 331 struct cpuinfo_x86 c; 332 333 identify_cpu(&cpu); 334 get_fms(&c, cpu.device); 335 gd->arch.x86 = c.x86; 336 gd->arch.x86_vendor = cpu.vendor; 337 gd->arch.x86_model = c.x86_model; 338 gd->arch.x86_mask = c.x86_mask; 339 gd->arch.x86_device = cpu.device; 340 341 gd->arch.has_mtrr = has_mtrr(); 342 } 343 /* Don't allow PCI region 3 to use memory in the 2-4GB memory hole */ 344 gd->pci_ram_top = 0x80000000U; 345 346 /* Configure fixed range MTRRs for some legacy regions */ 347 if (gd->arch.has_mtrr) { 348 u64 mtrr_cap; 349 350 mtrr_cap = native_read_msr(MTRR_CAP_MSR); 351 if (mtrr_cap & MTRR_CAP_FIX) { 352 /* Mark the VGA RAM area as uncacheable */ 353 native_write_msr(MTRR_FIX_16K_A0000_MSR, 354 MTRR_FIX_TYPE(MTRR_TYPE_UNCACHEABLE), 355 MTRR_FIX_TYPE(MTRR_TYPE_UNCACHEABLE)); 356 357 /* 358 * Mark the PCI ROM area as cacheable to improve ROM 359 * execution performance. 360 */ 361 native_write_msr(MTRR_FIX_4K_C0000_MSR, 362 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK), 363 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK)); 364 native_write_msr(MTRR_FIX_4K_C8000_MSR, 365 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK), 366 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK)); 367 native_write_msr(MTRR_FIX_4K_D0000_MSR, 368 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK), 369 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK)); 370 native_write_msr(MTRR_FIX_4K_D8000_MSR, 371 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK), 372 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK)); 373 374 /* Enable the fixed range MTRRs */ 375 msr_setbits_64(MTRR_DEF_TYPE_MSR, MTRR_DEF_TYPE_FIX_EN); 376 } 377 } 378 379 #ifdef CONFIG_I8254_TIMER 380 /* Set up the i8254 timer if required */ 381 i8254_init(); 382 #endif 383 384 return 0; 385 } 386 387 void x86_enable_caches(void) 388 { 389 unsigned long cr0; 390 391 cr0 = read_cr0(); 392 cr0 &= ~(X86_CR0_NW | X86_CR0_CD); 393 write_cr0(cr0); 394 wbinvd(); 395 } 396 void enable_caches(void) __attribute__((weak, alias("x86_enable_caches"))); 397 398 void x86_disable_caches(void) 399 { 400 unsigned long cr0; 401 402 cr0 = read_cr0(); 403 cr0 |= X86_CR0_NW | X86_CR0_CD; 404 wbinvd(); 405 write_cr0(cr0); 406 wbinvd(); 407 } 408 void disable_caches(void) __attribute__((weak, alias("x86_disable_caches"))); 409 410 int dcache_status(void) 411 { 412 return !(read_cr0() & X86_CR0_CD); 413 } 414 415 void cpu_enable_paging_pae(ulong cr3) 416 { 417 __asm__ __volatile__( 418 /* Load the page table address */ 419 "movl %0, %%cr3\n" 420 /* Enable pae */ 421 "movl %%cr4, %%eax\n" 422 "orl $0x00000020, %%eax\n" 423 "movl %%eax, %%cr4\n" 424 /* Enable paging */ 425 "movl %%cr0, %%eax\n" 426 "orl $0x80000000, %%eax\n" 427 "movl %%eax, %%cr0\n" 428 : 429 : "r" (cr3) 430 : "eax"); 431 } 432 433 void cpu_disable_paging_pae(void) 434 { 435 /* Turn off paging */ 436 __asm__ __volatile__ ( 437 /* Disable paging */ 438 "movl %%cr0, %%eax\n" 439 "andl $0x7fffffff, %%eax\n" 440 "movl %%eax, %%cr0\n" 441 /* Disable pae */ 442 "movl %%cr4, %%eax\n" 443 "andl $0xffffffdf, %%eax\n" 444 "movl %%eax, %%cr4\n" 445 : 446 : 447 : "eax"); 448 } 449 450 static bool can_detect_long_mode(void) 451 { 452 return cpuid_eax(0x80000000) > 0x80000000UL; 453 } 454 455 static bool has_long_mode(void) 456 { 457 return cpuid_edx(0x80000001) & (1 << 29) ? true : false; 458 } 459 460 int cpu_has_64bit(void) 461 { 462 return has_cpuid() && can_detect_long_mode() && 463 has_long_mode(); 464 } 465 466 #define PAGETABLE_SIZE (6 * 4096) 467 468 /** 469 * build_pagetable() - build a flat 4GiB page table structure for 64-bti mode 470 * 471 * @pgtable: Pointer to a 24iKB block of memory 472 */ 473 static void build_pagetable(uint32_t *pgtable) 474 { 475 uint i; 476 477 memset(pgtable, '\0', PAGETABLE_SIZE); 478 479 /* Level 4 needs a single entry */ 480 pgtable[0] = (ulong)&pgtable[1024] + 7; 481 482 /* Level 3 has one 64-bit entry for each GiB of memory */ 483 for (i = 0; i < 4; i++) 484 pgtable[1024 + i * 2] = (ulong)&pgtable[2048] + 0x1000 * i + 7; 485 486 /* Level 2 has 2048 64-bit entries, each repesenting 2MiB */ 487 for (i = 0; i < 2048; i++) 488 pgtable[2048 + i * 2] = 0x183 + (i << 21UL); 489 } 490 491 int cpu_jump_to_64bit(ulong setup_base, ulong target) 492 { 493 uint32_t *pgtable; 494 495 pgtable = memalign(4096, PAGETABLE_SIZE); 496 if (!pgtable) 497 return -ENOMEM; 498 499 build_pagetable(pgtable); 500 cpu_call64((ulong)pgtable, setup_base, target); 501 free(pgtable); 502 503 return -EFAULT; 504 } 505 506 /* 507 * Jump from SPL to U-Boot 508 * 509 * This function is work-in-progress with many issues to resolve. 510 * 511 * It works by setting up several regions: 512 * ptr - a place to put the code that jumps into 64-bit mode 513 * gdt - a place to put the global descriptor table 514 * pgtable - a place to put the page tables 515 * 516 * The cpu_call64() code is copied from ROM and then manually patched so that 517 * it has the correct GDT address in RAM. U-Boot is copied from ROM into 518 * its pre-relocation address. Then we jump to the cpu_call64() code in RAM, 519 * which changes to 64-bit mode and starts U-Boot. 520 */ 521 int cpu_jump_to_64bit_uboot(ulong target) 522 { 523 typedef void (*func_t)(ulong pgtable, ulong setup_base, ulong target); 524 uint32_t *pgtable; 525 func_t func; 526 527 /* TODO(sjg@chromium.org): Find a better place for this */ 528 pgtable = (uint32_t *)0x1000000; 529 if (!pgtable) 530 return -ENOMEM; 531 532 build_pagetable(pgtable); 533 534 /* TODO(sjg@chromium.org): Find a better place for this */ 535 char *ptr = (char *)0x3000000; 536 char *gdt = (char *)0x3100000; 537 538 extern char gdt64[]; 539 540 memcpy(ptr, cpu_call64, 0x1000); 541 memcpy(gdt, gdt64, 0x100); 542 543 /* 544 * TODO(sjg@chromium.org): This manually inserts the pointers into 545 * the code. Tidy this up to avoid this. 546 */ 547 func = (func_t)ptr; 548 ulong ofs = (ulong)cpu_call64 - (ulong)ptr; 549 *(ulong *)(ptr + 7) = (ulong)gdt; 550 *(ulong *)(ptr + 0xc) = (ulong)gdt + 2; 551 *(ulong *)(ptr + 0x13) = (ulong)gdt; 552 *(ulong *)(ptr + 0x117 - 0xd4) -= ofs; 553 554 /* 555 * Copy U-Boot from ROM 556 * TODO(sjg@chromium.org): Figure out a way to get the text base 557 * correctly here, and in the device-tree binman definition. 558 * 559 * Also consider using FIT so we get the correct image length and 560 * parameters. 561 */ 562 memcpy((char *)target, (char *)0xfff00000, 0x100000); 563 564 /* Jump to U-Boot */ 565 func((ulong)pgtable, 0, (ulong)target); 566 567 return -EFAULT; 568 } 569 570 #ifdef CONFIG_SMP 571 static int enable_smis(struct udevice *cpu, void *unused) 572 { 573 return 0; 574 } 575 576 static struct mp_flight_record mp_steps[] = { 577 MP_FR_BLOCK_APS(mp_init_cpu, NULL, mp_init_cpu, NULL), 578 /* Wait for APs to finish initialization before proceeding */ 579 MP_FR_BLOCK_APS(NULL, NULL, enable_smis, NULL), 580 }; 581 582 int x86_mp_init(void) 583 { 584 struct mp_params mp_params; 585 586 mp_params.parallel_microcode_load = 0, 587 mp_params.flight_plan = &mp_steps[0]; 588 mp_params.num_records = ARRAY_SIZE(mp_steps); 589 mp_params.microcode_pointer = 0; 590 591 if (mp_init(&mp_params)) { 592 printf("Warning: MP init failure\n"); 593 return -EIO; 594 } 595 596 return 0; 597 } 598 #endif 599