1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * (C) Copyright 2008-2011 4 * Graeme Russ, <graeme.russ@gmail.com> 5 * 6 * (C) Copyright 2002 7 * Daniel Engström, Omicron Ceti AB, <daniel@omicron.se> 8 * 9 * (C) Copyright 2002 10 * Sysgo Real-Time Solutions, GmbH <www.elinos.com> 11 * Marius Groeger <mgroeger@sysgo.de> 12 * 13 * (C) Copyright 2002 14 * Sysgo Real-Time Solutions, GmbH <www.elinos.com> 15 * Alex Zuepke <azu@sysgo.de> 16 * 17 * Part of this file is adapted from coreboot 18 * src/arch/x86/lib/cpu.c 19 */ 20 21 #include <common.h> 22 #include <malloc.h> 23 #include <asm/control_regs.h> 24 #include <asm/cpu.h> 25 #include <asm/mp.h> 26 #include <asm/msr.h> 27 #include <asm/mtrr.h> 28 #include <asm/processor-flags.h> 29 30 DECLARE_GLOBAL_DATA_PTR; 31 32 /* 33 * Constructor for a conventional segment GDT (or LDT) entry 34 * This is a macro so it can be used in initialisers 35 */ 36 #define GDT_ENTRY(flags, base, limit) \ 37 ((((base) & 0xff000000ULL) << (56-24)) | \ 38 (((flags) & 0x0000f0ffULL) << 40) | \ 39 (((limit) & 0x000f0000ULL) << (48-16)) | \ 40 (((base) & 0x00ffffffULL) << 16) | \ 41 (((limit) & 0x0000ffffULL))) 42 43 struct gdt_ptr { 44 u16 len; 45 u32 ptr; 46 } __packed; 47 48 struct cpu_device_id { 49 unsigned vendor; 50 unsigned device; 51 }; 52 53 struct cpuinfo_x86 { 54 uint8_t x86; /* CPU family */ 55 uint8_t x86_vendor; /* CPU vendor */ 56 uint8_t x86_model; 57 uint8_t x86_mask; 58 }; 59 60 /* 61 * List of cpu vendor strings along with their normalized 62 * id values. 63 */ 64 static const struct { 65 int vendor; 66 const char *name; 67 } x86_vendors[] = { 68 { X86_VENDOR_INTEL, "GenuineIntel", }, 69 { X86_VENDOR_CYRIX, "CyrixInstead", }, 70 { X86_VENDOR_AMD, "AuthenticAMD", }, 71 { X86_VENDOR_UMC, "UMC UMC UMC ", }, 72 { X86_VENDOR_NEXGEN, "NexGenDriven", }, 73 { X86_VENDOR_CENTAUR, "CentaurHauls", }, 74 { X86_VENDOR_RISE, "RiseRiseRise", }, 75 { X86_VENDOR_TRANSMETA, "GenuineTMx86", }, 76 { X86_VENDOR_TRANSMETA, "TransmetaCPU", }, 77 { X86_VENDOR_NSC, "Geode by NSC", }, 78 { X86_VENDOR_SIS, "SiS SiS SiS ", }, 79 }; 80 81 static void load_ds(u32 segment) 82 { 83 asm volatile("movl %0, %%ds" : : "r" (segment * X86_GDT_ENTRY_SIZE)); 84 } 85 86 static void load_es(u32 segment) 87 { 88 asm volatile("movl %0, %%es" : : "r" (segment * X86_GDT_ENTRY_SIZE)); 89 } 90 91 static void load_fs(u32 segment) 92 { 93 asm volatile("movl %0, %%fs" : : "r" (segment * X86_GDT_ENTRY_SIZE)); 94 } 95 96 static void load_gs(u32 segment) 97 { 98 asm volatile("movl %0, %%gs" : : "r" (segment * X86_GDT_ENTRY_SIZE)); 99 } 100 101 static void load_ss(u32 segment) 102 { 103 asm volatile("movl %0, %%ss" : : "r" (segment * X86_GDT_ENTRY_SIZE)); 104 } 105 106 static void load_gdt(const u64 *boot_gdt, u16 num_entries) 107 { 108 struct gdt_ptr gdt; 109 110 gdt.len = (num_entries * X86_GDT_ENTRY_SIZE) - 1; 111 gdt.ptr = (ulong)boot_gdt; 112 113 asm volatile("lgdtl %0\n" : : "m" (gdt)); 114 } 115 116 void arch_setup_gd(gd_t *new_gd) 117 { 118 u64 *gdt_addr; 119 120 gdt_addr = new_gd->arch.gdt; 121 122 /* 123 * CS: code, read/execute, 4 GB, base 0 124 * 125 * Some OS (like VxWorks) requires GDT entry 1 to be the 32-bit CS 126 */ 127 gdt_addr[X86_GDT_ENTRY_UNUSED] = GDT_ENTRY(0xc09b, 0, 0xfffff); 128 gdt_addr[X86_GDT_ENTRY_32BIT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff); 129 130 /* DS: data, read/write, 4 GB, base 0 */ 131 gdt_addr[X86_GDT_ENTRY_32BIT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff); 132 133 /* FS: data, read/write, 4 GB, base (Global Data Pointer) */ 134 new_gd->arch.gd_addr = new_gd; 135 gdt_addr[X86_GDT_ENTRY_32BIT_FS] = GDT_ENTRY(0xc093, 136 (ulong)&new_gd->arch.gd_addr, 0xfffff); 137 138 /* 16-bit CS: code, read/execute, 64 kB, base 0 */ 139 gdt_addr[X86_GDT_ENTRY_16BIT_CS] = GDT_ENTRY(0x009b, 0, 0x0ffff); 140 141 /* 16-bit DS: data, read/write, 64 kB, base 0 */ 142 gdt_addr[X86_GDT_ENTRY_16BIT_DS] = GDT_ENTRY(0x0093, 0, 0x0ffff); 143 144 gdt_addr[X86_GDT_ENTRY_16BIT_FLAT_CS] = GDT_ENTRY(0x809b, 0, 0xfffff); 145 gdt_addr[X86_GDT_ENTRY_16BIT_FLAT_DS] = GDT_ENTRY(0x8093, 0, 0xfffff); 146 147 load_gdt(gdt_addr, X86_GDT_NUM_ENTRIES); 148 load_ds(X86_GDT_ENTRY_32BIT_DS); 149 load_es(X86_GDT_ENTRY_32BIT_DS); 150 load_gs(X86_GDT_ENTRY_32BIT_DS); 151 load_ss(X86_GDT_ENTRY_32BIT_DS); 152 load_fs(X86_GDT_ENTRY_32BIT_FS); 153 } 154 155 #ifdef CONFIG_HAVE_FSP 156 /* 157 * Setup FSP execution environment GDT 158 * 159 * Per Intel FSP external architecture specification, before calling any FSP 160 * APIs, we need make sure the system is in flat 32-bit mode and both the code 161 * and data selectors should have full 4GB access range. Here we reuse the one 162 * we used in arch/x86/cpu/start16.S, and reload the segement registers. 163 */ 164 void setup_fsp_gdt(void) 165 { 166 load_gdt((const u64 *)(gdt_rom + CONFIG_RESET_SEG_START), 4); 167 load_ds(X86_GDT_ENTRY_32BIT_DS); 168 load_ss(X86_GDT_ENTRY_32BIT_DS); 169 load_es(X86_GDT_ENTRY_32BIT_DS); 170 load_fs(X86_GDT_ENTRY_32BIT_DS); 171 load_gs(X86_GDT_ENTRY_32BIT_DS); 172 } 173 #endif 174 175 /* 176 * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected 177 * by the fact that they preserve the flags across the division of 5/2. 178 * PII and PPro exhibit this behavior too, but they have cpuid available. 179 */ 180 181 /* 182 * Perform the Cyrix 5/2 test. A Cyrix won't change 183 * the flags, while other 486 chips will. 184 */ 185 static inline int test_cyrix_52div(void) 186 { 187 unsigned int test; 188 189 __asm__ __volatile__( 190 "sahf\n\t" /* clear flags (%eax = 0x0005) */ 191 "div %b2\n\t" /* divide 5 by 2 */ 192 "lahf" /* store flags into %ah */ 193 : "=a" (test) 194 : "0" (5), "q" (2) 195 : "cc"); 196 197 /* AH is 0x02 on Cyrix after the divide.. */ 198 return (unsigned char) (test >> 8) == 0x02; 199 } 200 201 /* 202 * Detect a NexGen CPU running without BIOS hypercode new enough 203 * to have CPUID. (Thanks to Herbert Oppmann) 204 */ 205 static int deep_magic_nexgen_probe(void) 206 { 207 int ret; 208 209 __asm__ __volatile__ ( 210 " movw $0x5555, %%ax\n" 211 " xorw %%dx,%%dx\n" 212 " movw $2, %%cx\n" 213 " divw %%cx\n" 214 " movl $0, %%eax\n" 215 " jnz 1f\n" 216 " movl $1, %%eax\n" 217 "1:\n" 218 : "=a" (ret) : : "cx", "dx"); 219 return ret; 220 } 221 222 static bool has_cpuid(void) 223 { 224 return flag_is_changeable_p(X86_EFLAGS_ID); 225 } 226 227 static bool has_mtrr(void) 228 { 229 return cpuid_edx(0x00000001) & (1 << 12) ? true : false; 230 } 231 232 static int build_vendor_name(char *vendor_name) 233 { 234 struct cpuid_result result; 235 result = cpuid(0x00000000); 236 unsigned int *name_as_ints = (unsigned int *)vendor_name; 237 238 name_as_ints[0] = result.ebx; 239 name_as_ints[1] = result.edx; 240 name_as_ints[2] = result.ecx; 241 242 return result.eax; 243 } 244 245 static void identify_cpu(struct cpu_device_id *cpu) 246 { 247 char vendor_name[16]; 248 int i; 249 250 vendor_name[0] = '\0'; /* Unset */ 251 cpu->device = 0; /* fix gcc 4.4.4 warning */ 252 253 /* Find the id and vendor_name */ 254 if (!has_cpuid()) { 255 /* Its a 486 if we can modify the AC flag */ 256 if (flag_is_changeable_p(X86_EFLAGS_AC)) 257 cpu->device = 0x00000400; /* 486 */ 258 else 259 cpu->device = 0x00000300; /* 386 */ 260 if ((cpu->device == 0x00000400) && test_cyrix_52div()) { 261 memcpy(vendor_name, "CyrixInstead", 13); 262 /* If we ever care we can enable cpuid here */ 263 } 264 /* Detect NexGen with old hypercode */ 265 else if (deep_magic_nexgen_probe()) 266 memcpy(vendor_name, "NexGenDriven", 13); 267 } 268 if (has_cpuid()) { 269 int cpuid_level; 270 271 cpuid_level = build_vendor_name(vendor_name); 272 vendor_name[12] = '\0'; 273 274 /* Intel-defined flags: level 0x00000001 */ 275 if (cpuid_level >= 0x00000001) { 276 cpu->device = cpuid_eax(0x00000001); 277 } else { 278 /* Have CPUID level 0 only unheard of */ 279 cpu->device = 0x00000400; 280 } 281 } 282 cpu->vendor = X86_VENDOR_UNKNOWN; 283 for (i = 0; i < ARRAY_SIZE(x86_vendors); i++) { 284 if (memcmp(vendor_name, x86_vendors[i].name, 12) == 0) { 285 cpu->vendor = x86_vendors[i].vendor; 286 break; 287 } 288 } 289 } 290 291 static inline void get_fms(struct cpuinfo_x86 *c, uint32_t tfms) 292 { 293 c->x86 = (tfms >> 8) & 0xf; 294 c->x86_model = (tfms >> 4) & 0xf; 295 c->x86_mask = tfms & 0xf; 296 if (c->x86 == 0xf) 297 c->x86 += (tfms >> 20) & 0xff; 298 if (c->x86 >= 0x6) 299 c->x86_model += ((tfms >> 16) & 0xF) << 4; 300 } 301 302 u32 cpu_get_family_model(void) 303 { 304 return gd->arch.x86_device & 0x0fff0ff0; 305 } 306 307 u32 cpu_get_stepping(void) 308 { 309 return gd->arch.x86_mask; 310 } 311 312 int x86_cpu_init_f(void) 313 { 314 const u32 em_rst = ~X86_CR0_EM; 315 const u32 mp_ne_set = X86_CR0_MP | X86_CR0_NE; 316 317 if (ll_boot_init()) { 318 /* initialize FPU, reset EM, set MP and NE */ 319 asm ("fninit\n" \ 320 "movl %%cr0, %%eax\n" \ 321 "andl %0, %%eax\n" \ 322 "orl %1, %%eax\n" \ 323 "movl %%eax, %%cr0\n" \ 324 : : "i" (em_rst), "i" (mp_ne_set) : "eax"); 325 } 326 327 /* identify CPU via cpuid and store the decoded info into gd->arch */ 328 if (has_cpuid()) { 329 struct cpu_device_id cpu; 330 struct cpuinfo_x86 c; 331 332 identify_cpu(&cpu); 333 get_fms(&c, cpu.device); 334 gd->arch.x86 = c.x86; 335 gd->arch.x86_vendor = cpu.vendor; 336 gd->arch.x86_model = c.x86_model; 337 gd->arch.x86_mask = c.x86_mask; 338 gd->arch.x86_device = cpu.device; 339 340 gd->arch.has_mtrr = has_mtrr(); 341 } 342 /* Don't allow PCI region 3 to use memory in the 2-4GB memory hole */ 343 gd->pci_ram_top = 0x80000000U; 344 345 /* Configure fixed range MTRRs for some legacy regions */ 346 if (gd->arch.has_mtrr) { 347 u64 mtrr_cap; 348 349 mtrr_cap = native_read_msr(MTRR_CAP_MSR); 350 if (mtrr_cap & MTRR_CAP_FIX) { 351 /* Mark the VGA RAM area as uncacheable */ 352 native_write_msr(MTRR_FIX_16K_A0000_MSR, 353 MTRR_FIX_TYPE(MTRR_TYPE_UNCACHEABLE), 354 MTRR_FIX_TYPE(MTRR_TYPE_UNCACHEABLE)); 355 356 /* 357 * Mark the PCI ROM area as cacheable to improve ROM 358 * execution performance. 359 */ 360 native_write_msr(MTRR_FIX_4K_C0000_MSR, 361 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK), 362 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK)); 363 native_write_msr(MTRR_FIX_4K_C8000_MSR, 364 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK), 365 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK)); 366 native_write_msr(MTRR_FIX_4K_D0000_MSR, 367 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK), 368 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK)); 369 native_write_msr(MTRR_FIX_4K_D8000_MSR, 370 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK), 371 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK)); 372 373 /* Enable the fixed range MTRRs */ 374 msr_setbits_64(MTRR_DEF_TYPE_MSR, MTRR_DEF_TYPE_FIX_EN); 375 } 376 } 377 378 #ifdef CONFIG_I8254_TIMER 379 /* Set up the i8254 timer if required */ 380 i8254_init(); 381 #endif 382 383 return 0; 384 } 385 386 void x86_enable_caches(void) 387 { 388 unsigned long cr0; 389 390 cr0 = read_cr0(); 391 cr0 &= ~(X86_CR0_NW | X86_CR0_CD); 392 write_cr0(cr0); 393 wbinvd(); 394 } 395 void enable_caches(void) __attribute__((weak, alias("x86_enable_caches"))); 396 397 void x86_disable_caches(void) 398 { 399 unsigned long cr0; 400 401 cr0 = read_cr0(); 402 cr0 |= X86_CR0_NW | X86_CR0_CD; 403 wbinvd(); 404 write_cr0(cr0); 405 wbinvd(); 406 } 407 void disable_caches(void) __attribute__((weak, alias("x86_disable_caches"))); 408 409 int dcache_status(void) 410 { 411 return !(read_cr0() & X86_CR0_CD); 412 } 413 414 void cpu_enable_paging_pae(ulong cr3) 415 { 416 __asm__ __volatile__( 417 /* Load the page table address */ 418 "movl %0, %%cr3\n" 419 /* Enable pae */ 420 "movl %%cr4, %%eax\n" 421 "orl $0x00000020, %%eax\n" 422 "movl %%eax, %%cr4\n" 423 /* Enable paging */ 424 "movl %%cr0, %%eax\n" 425 "orl $0x80000000, %%eax\n" 426 "movl %%eax, %%cr0\n" 427 : 428 : "r" (cr3) 429 : "eax"); 430 } 431 432 void cpu_disable_paging_pae(void) 433 { 434 /* Turn off paging */ 435 __asm__ __volatile__ ( 436 /* Disable paging */ 437 "movl %%cr0, %%eax\n" 438 "andl $0x7fffffff, %%eax\n" 439 "movl %%eax, %%cr0\n" 440 /* Disable pae */ 441 "movl %%cr4, %%eax\n" 442 "andl $0xffffffdf, %%eax\n" 443 "movl %%eax, %%cr4\n" 444 : 445 : 446 : "eax"); 447 } 448 449 static bool can_detect_long_mode(void) 450 { 451 return cpuid_eax(0x80000000) > 0x80000000UL; 452 } 453 454 static bool has_long_mode(void) 455 { 456 return cpuid_edx(0x80000001) & (1 << 29) ? true : false; 457 } 458 459 int cpu_has_64bit(void) 460 { 461 return has_cpuid() && can_detect_long_mode() && 462 has_long_mode(); 463 } 464 465 #define PAGETABLE_SIZE (6 * 4096) 466 467 /** 468 * build_pagetable() - build a flat 4GiB page table structure for 64-bti mode 469 * 470 * @pgtable: Pointer to a 24iKB block of memory 471 */ 472 static void build_pagetable(uint32_t *pgtable) 473 { 474 uint i; 475 476 memset(pgtable, '\0', PAGETABLE_SIZE); 477 478 /* Level 4 needs a single entry */ 479 pgtable[0] = (ulong)&pgtable[1024] + 7; 480 481 /* Level 3 has one 64-bit entry for each GiB of memory */ 482 for (i = 0; i < 4; i++) 483 pgtable[1024 + i * 2] = (ulong)&pgtable[2048] + 0x1000 * i + 7; 484 485 /* Level 2 has 2048 64-bit entries, each repesenting 2MiB */ 486 for (i = 0; i < 2048; i++) 487 pgtable[2048 + i * 2] = 0x183 + (i << 21UL); 488 } 489 490 int cpu_jump_to_64bit(ulong setup_base, ulong target) 491 { 492 uint32_t *pgtable; 493 494 pgtable = memalign(4096, PAGETABLE_SIZE); 495 if (!pgtable) 496 return -ENOMEM; 497 498 build_pagetable(pgtable); 499 cpu_call64((ulong)pgtable, setup_base, target); 500 free(pgtable); 501 502 return -EFAULT; 503 } 504 505 /* 506 * Jump from SPL to U-Boot 507 * 508 * This function is work-in-progress with many issues to resolve. 509 * 510 * It works by setting up several regions: 511 * ptr - a place to put the code that jumps into 64-bit mode 512 * gdt - a place to put the global descriptor table 513 * pgtable - a place to put the page tables 514 * 515 * The cpu_call64() code is copied from ROM and then manually patched so that 516 * it has the correct GDT address in RAM. U-Boot is copied from ROM into 517 * its pre-relocation address. Then we jump to the cpu_call64() code in RAM, 518 * which changes to 64-bit mode and starts U-Boot. 519 */ 520 int cpu_jump_to_64bit_uboot(ulong target) 521 { 522 typedef void (*func_t)(ulong pgtable, ulong setup_base, ulong target); 523 uint32_t *pgtable; 524 func_t func; 525 526 /* TODO(sjg@chromium.org): Find a better place for this */ 527 pgtable = (uint32_t *)0x1000000; 528 if (!pgtable) 529 return -ENOMEM; 530 531 build_pagetable(pgtable); 532 533 /* TODO(sjg@chromium.org): Find a better place for this */ 534 char *ptr = (char *)0x3000000; 535 char *gdt = (char *)0x3100000; 536 537 extern char gdt64[]; 538 539 memcpy(ptr, cpu_call64, 0x1000); 540 memcpy(gdt, gdt64, 0x100); 541 542 /* 543 * TODO(sjg@chromium.org): This manually inserts the pointers into 544 * the code. Tidy this up to avoid this. 545 */ 546 func = (func_t)ptr; 547 ulong ofs = (ulong)cpu_call64 - (ulong)ptr; 548 *(ulong *)(ptr + 7) = (ulong)gdt; 549 *(ulong *)(ptr + 0xc) = (ulong)gdt + 2; 550 *(ulong *)(ptr + 0x13) = (ulong)gdt; 551 *(ulong *)(ptr + 0x117 - 0xd4) -= ofs; 552 553 /* 554 * Copy U-Boot from ROM 555 * TODO(sjg@chromium.org): Figure out a way to get the text base 556 * correctly here, and in the device-tree binman definition. 557 * 558 * Also consider using FIT so we get the correct image length and 559 * parameters. 560 */ 561 memcpy((char *)target, (char *)0xfff00000, 0x100000); 562 563 /* Jump to U-Boot */ 564 func((ulong)pgtable, 0, (ulong)target); 565 566 return -EFAULT; 567 } 568 569 #ifdef CONFIG_SMP 570 static int enable_smis(struct udevice *cpu, void *unused) 571 { 572 return 0; 573 } 574 575 static struct mp_flight_record mp_steps[] = { 576 MP_FR_BLOCK_APS(mp_init_cpu, NULL, mp_init_cpu, NULL), 577 /* Wait for APs to finish initialization before proceeding */ 578 MP_FR_BLOCK_APS(NULL, NULL, enable_smis, NULL), 579 }; 580 581 int x86_mp_init(void) 582 { 583 struct mp_params mp_params; 584 585 mp_params.parallel_microcode_load = 0, 586 mp_params.flight_plan = &mp_steps[0]; 587 mp_params.num_records = ARRAY_SIZE(mp_steps); 588 mp_params.microcode_pointer = 0; 589 590 if (mp_init(&mp_params)) { 591 printf("Warning: MP init failure\n"); 592 return -EIO; 593 } 594 595 return 0; 596 } 597 #endif 598