1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2015 Google, Inc 4 * 5 * Based on code from the coreboot file of the same name 6 */ 7 8 #include <common.h> 9 #include <cpu.h> 10 #include <dm.h> 11 #include <errno.h> 12 #include <malloc.h> 13 #include <qfw.h> 14 #include <asm/atomic.h> 15 #include <asm/cpu.h> 16 #include <asm/interrupt.h> 17 #include <asm/lapic.h> 18 #include <asm/microcode.h> 19 #include <asm/mp.h> 20 #include <asm/msr.h> 21 #include <asm/mtrr.h> 22 #include <asm/processor.h> 23 #include <asm/sipi.h> 24 #include <dm/device-internal.h> 25 #include <dm/uclass-internal.h> 26 #include <dm/lists.h> 27 #include <dm/root.h> 28 #include <linux/linkage.h> 29 30 DECLARE_GLOBAL_DATA_PTR; 31 32 /* Total CPUs include BSP */ 33 static int num_cpus; 34 35 /* This also needs to match the sipi.S assembly code for saved MSR encoding */ 36 struct saved_msr { 37 uint32_t index; 38 uint32_t lo; 39 uint32_t hi; 40 } __packed; 41 42 43 struct mp_flight_plan { 44 int num_records; 45 struct mp_flight_record *records; 46 }; 47 48 static struct mp_flight_plan mp_info; 49 50 struct cpu_map { 51 struct udevice *dev; 52 int apic_id; 53 int err_code; 54 }; 55 56 static inline void barrier_wait(atomic_t *b) 57 { 58 while (atomic_read(b) == 0) 59 asm("pause"); 60 mfence(); 61 } 62 63 static inline void release_barrier(atomic_t *b) 64 { 65 mfence(); 66 atomic_set(b, 1); 67 } 68 69 static inline void stop_this_cpu(void) 70 { 71 /* Called by an AP when it is ready to halt and wait for a new task */ 72 for (;;) 73 cpu_hlt(); 74 } 75 76 /* Returns 1 if timeout waiting for APs. 0 if target APs found */ 77 static int wait_for_aps(atomic_t *val, int target, int total_delay, 78 int delay_step) 79 { 80 int timeout = 0; 81 int delayed = 0; 82 83 while (atomic_read(val) != target) { 84 udelay(delay_step); 85 delayed += delay_step; 86 if (delayed >= total_delay) { 87 timeout = 1; 88 break; 89 } 90 } 91 92 return timeout; 93 } 94 95 static void ap_do_flight_plan(struct udevice *cpu) 96 { 97 int i; 98 99 for (i = 0; i < mp_info.num_records; i++) { 100 struct mp_flight_record *rec = &mp_info.records[i]; 101 102 atomic_inc(&rec->cpus_entered); 103 barrier_wait(&rec->barrier); 104 105 if (rec->ap_call != NULL) 106 rec->ap_call(cpu, rec->ap_arg); 107 } 108 } 109 110 static int find_cpu_by_apic_id(int apic_id, struct udevice **devp) 111 { 112 struct udevice *dev; 113 114 *devp = NULL; 115 for (uclass_find_first_device(UCLASS_CPU, &dev); 116 dev; 117 uclass_find_next_device(&dev)) { 118 struct cpu_platdata *plat = dev_get_parent_platdata(dev); 119 120 if (plat->cpu_id == apic_id) { 121 *devp = dev; 122 return 0; 123 } 124 } 125 126 return -ENOENT; 127 } 128 129 /* 130 * By the time APs call ap_init() caching has been setup, and microcode has 131 * been loaded 132 */ 133 static void ap_init(unsigned int cpu_index) 134 { 135 struct udevice *dev; 136 int apic_id; 137 int ret; 138 139 /* Ensure the local apic is enabled */ 140 enable_lapic(); 141 142 apic_id = lapicid(); 143 ret = find_cpu_by_apic_id(apic_id, &dev); 144 if (ret) { 145 debug("Unknown CPU apic_id %x\n", apic_id); 146 goto done; 147 } 148 149 debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id, 150 dev ? dev->name : "(apic_id not found)"); 151 152 /* Walk the flight plan */ 153 ap_do_flight_plan(dev); 154 155 /* Park the AP */ 156 debug("parking\n"); 157 done: 158 stop_this_cpu(); 159 } 160 161 static const unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = { 162 MTRR_FIX_64K_00000_MSR, MTRR_FIX_16K_80000_MSR, MTRR_FIX_16K_A0000_MSR, 163 MTRR_FIX_4K_C0000_MSR, MTRR_FIX_4K_C8000_MSR, MTRR_FIX_4K_D0000_MSR, 164 MTRR_FIX_4K_D8000_MSR, MTRR_FIX_4K_E0000_MSR, MTRR_FIX_4K_E8000_MSR, 165 MTRR_FIX_4K_F0000_MSR, MTRR_FIX_4K_F8000_MSR, 166 }; 167 168 static inline struct saved_msr *save_msr(int index, struct saved_msr *entry) 169 { 170 msr_t msr; 171 172 msr = msr_read(index); 173 entry->index = index; 174 entry->lo = msr.lo; 175 entry->hi = msr.hi; 176 177 /* Return the next entry */ 178 entry++; 179 return entry; 180 } 181 182 static int save_bsp_msrs(char *start, int size) 183 { 184 int msr_count; 185 int num_var_mtrrs; 186 struct saved_msr *msr_entry; 187 int i; 188 msr_t msr; 189 190 /* Determine number of MTRRs need to be saved */ 191 msr = msr_read(MTRR_CAP_MSR); 192 num_var_mtrrs = msr.lo & 0xff; 193 194 /* 2 * num_var_mtrrs for base and mask. +1 for IA32_MTRR_DEF_TYPE */ 195 msr_count = 2 * num_var_mtrrs + NUM_FIXED_MTRRS + 1; 196 197 if ((msr_count * sizeof(struct saved_msr)) > size) { 198 printf("Cannot mirror all %d msrs\n", msr_count); 199 return -ENOSPC; 200 } 201 202 msr_entry = (void *)start; 203 for (i = 0; i < NUM_FIXED_MTRRS; i++) 204 msr_entry = save_msr(fixed_mtrrs[i], msr_entry); 205 206 for (i = 0; i < num_var_mtrrs; i++) { 207 msr_entry = save_msr(MTRR_PHYS_BASE_MSR(i), msr_entry); 208 msr_entry = save_msr(MTRR_PHYS_MASK_MSR(i), msr_entry); 209 } 210 211 msr_entry = save_msr(MTRR_DEF_TYPE_MSR, msr_entry); 212 213 return msr_count; 214 } 215 216 static int load_sipi_vector(atomic_t **ap_countp, int num_cpus) 217 { 218 struct sipi_params_16bit *params16; 219 struct sipi_params *params; 220 static char msr_save[512]; 221 char *stack; 222 ulong addr; 223 int code_len; 224 int size; 225 int ret; 226 227 /* Copy in the code */ 228 code_len = ap_start16_code_end - ap_start16; 229 debug("Copying SIPI code to %x: %d bytes\n", AP_DEFAULT_BASE, 230 code_len); 231 memcpy((void *)AP_DEFAULT_BASE, ap_start16, code_len); 232 233 addr = AP_DEFAULT_BASE + (ulong)sipi_params_16bit - (ulong)ap_start16; 234 params16 = (struct sipi_params_16bit *)addr; 235 params16->ap_start = (uint32_t)ap_start; 236 params16->gdt = (uint32_t)gd->arch.gdt; 237 params16->gdt_limit = X86_GDT_SIZE - 1; 238 debug("gdt = %x, gdt_limit = %x\n", params16->gdt, params16->gdt_limit); 239 240 params = (struct sipi_params *)sipi_params; 241 debug("SIPI 32-bit params at %p\n", params); 242 params->idt_ptr = (uint32_t)x86_get_idt(); 243 244 params->stack_size = CONFIG_AP_STACK_SIZE; 245 size = params->stack_size * num_cpus; 246 stack = memalign(4096, size); 247 if (!stack) 248 return -ENOMEM; 249 params->stack_top = (u32)(stack + size); 250 #if !defined(CONFIG_QEMU) && !defined(CONFIG_HAVE_FSP) && \ 251 !defined(CONFIG_INTEL_MID) 252 params->microcode_ptr = ucode_base; 253 debug("Microcode at %x\n", params->microcode_ptr); 254 #endif 255 params->msr_table_ptr = (u32)msr_save; 256 ret = save_bsp_msrs(msr_save, sizeof(msr_save)); 257 if (ret < 0) 258 return ret; 259 params->msr_count = ret; 260 261 params->c_handler = (uint32_t)&ap_init; 262 263 *ap_countp = ¶ms->ap_count; 264 atomic_set(*ap_countp, 0); 265 debug("SIPI vector is ready\n"); 266 267 return 0; 268 } 269 270 static int check_cpu_devices(int expected_cpus) 271 { 272 int i; 273 274 for (i = 0; i < expected_cpus; i++) { 275 struct udevice *dev; 276 int ret; 277 278 ret = uclass_find_device(UCLASS_CPU, i, &dev); 279 if (ret) { 280 debug("Cannot find CPU %d in device tree\n", i); 281 return ret; 282 } 283 } 284 285 return 0; 286 } 287 288 /* Returns 1 for timeout. 0 on success */ 289 static int apic_wait_timeout(int total_delay, const char *msg) 290 { 291 int total = 0; 292 293 if (!(lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) 294 return 0; 295 296 debug("Waiting for %s...", msg); 297 while (lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY) { 298 udelay(50); 299 total += 50; 300 if (total >= total_delay) { 301 debug("timed out: aborting\n"); 302 return -ETIMEDOUT; 303 } 304 } 305 debug("done\n"); 306 307 return 0; 308 } 309 310 static int start_aps(int ap_count, atomic_t *num_aps) 311 { 312 int sipi_vector; 313 /* Max location is 4KiB below 1MiB */ 314 const int max_vector_loc = ((1 << 20) - (1 << 12)) >> 12; 315 316 if (ap_count == 0) 317 return 0; 318 319 /* The vector is sent as a 4k aligned address in one byte */ 320 sipi_vector = AP_DEFAULT_BASE >> 12; 321 322 if (sipi_vector > max_vector_loc) { 323 printf("SIPI vector too large! 0x%08x\n", 324 sipi_vector); 325 return -1; 326 } 327 328 debug("Attempting to start %d APs\n", ap_count); 329 330 if (apic_wait_timeout(1000, "ICR not to be busy")) 331 return -ETIMEDOUT; 332 333 /* Send INIT IPI to all but self */ 334 lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); 335 lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | 336 LAPIC_DM_INIT); 337 debug("Waiting for 10ms after sending INIT\n"); 338 mdelay(10); 339 340 /* Send 1st SIPI */ 341 if (apic_wait_timeout(1000, "ICR not to be busy")) 342 return -ETIMEDOUT; 343 344 lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); 345 lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | 346 LAPIC_DM_STARTUP | sipi_vector); 347 if (apic_wait_timeout(10000, "first SIPI to complete")) 348 return -ETIMEDOUT; 349 350 /* Wait for CPUs to check in up to 200 us */ 351 wait_for_aps(num_aps, ap_count, 200, 15); 352 353 /* Send 2nd SIPI */ 354 if (apic_wait_timeout(1000, "ICR not to be busy")) 355 return -ETIMEDOUT; 356 357 lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); 358 lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | 359 LAPIC_DM_STARTUP | sipi_vector); 360 if (apic_wait_timeout(10000, "second SIPI to complete")) 361 return -ETIMEDOUT; 362 363 /* Wait for CPUs to check in */ 364 if (wait_for_aps(num_aps, ap_count, 10000, 50)) { 365 debug("Not all APs checked in: %d/%d\n", 366 atomic_read(num_aps), ap_count); 367 return -1; 368 } 369 370 return 0; 371 } 372 373 static int bsp_do_flight_plan(struct udevice *cpu, struct mp_params *mp_params) 374 { 375 int i; 376 int ret = 0; 377 const int timeout_us = 100000; 378 const int step_us = 100; 379 int num_aps = num_cpus - 1; 380 381 for (i = 0; i < mp_params->num_records; i++) { 382 struct mp_flight_record *rec = &mp_params->flight_plan[i]; 383 384 /* Wait for APs if the record is not released */ 385 if (atomic_read(&rec->barrier) == 0) { 386 /* Wait for the APs to check in */ 387 if (wait_for_aps(&rec->cpus_entered, num_aps, 388 timeout_us, step_us)) { 389 debug("MP record %d timeout\n", i); 390 ret = -1; 391 } 392 } 393 394 if (rec->bsp_call != NULL) 395 rec->bsp_call(cpu, rec->bsp_arg); 396 397 release_barrier(&rec->barrier); 398 } 399 return ret; 400 } 401 402 static int init_bsp(struct udevice **devp) 403 { 404 char processor_name[CPU_MAX_NAME_LEN]; 405 int apic_id; 406 int ret; 407 408 cpu_get_name(processor_name); 409 debug("CPU: %s\n", processor_name); 410 411 apic_id = lapicid(); 412 ret = find_cpu_by_apic_id(apic_id, devp); 413 if (ret) { 414 printf("Cannot find boot CPU, APIC ID %d\n", apic_id); 415 return ret; 416 } 417 418 return 0; 419 } 420 421 #ifdef CONFIG_QFW 422 static int qemu_cpu_fixup(void) 423 { 424 int ret; 425 int cpu_num; 426 int cpu_online; 427 struct udevice *dev, *pdev; 428 struct cpu_platdata *plat; 429 char *cpu; 430 431 /* first we need to find '/cpus' */ 432 for (device_find_first_child(dm_root(), &pdev); 433 pdev; 434 device_find_next_child(&pdev)) { 435 if (!strcmp(pdev->name, "cpus")) 436 break; 437 } 438 if (!pdev) { 439 printf("unable to find cpus device\n"); 440 return -ENODEV; 441 } 442 443 /* calculate cpus that are already bound */ 444 cpu_num = 0; 445 for (uclass_find_first_device(UCLASS_CPU, &dev); 446 dev; 447 uclass_find_next_device(&dev)) { 448 cpu_num++; 449 } 450 451 /* get actual cpu number */ 452 cpu_online = qemu_fwcfg_online_cpus(); 453 if (cpu_online < 0) { 454 printf("unable to get online cpu number: %d\n", cpu_online); 455 return cpu_online; 456 } 457 458 /* bind addtional cpus */ 459 dev = NULL; 460 for (; cpu_num < cpu_online; cpu_num++) { 461 /* 462 * allocate device name here as device_bind_driver() does 463 * not copy device name, 8 bytes are enough for 464 * sizeof("cpu@") + 3 digits cpu number + '\0' 465 */ 466 cpu = malloc(8); 467 if (!cpu) { 468 printf("unable to allocate device name\n"); 469 return -ENOMEM; 470 } 471 sprintf(cpu, "cpu@%d", cpu_num); 472 ret = device_bind_driver(pdev, "cpu_qemu", cpu, &dev); 473 if (ret) { 474 printf("binding cpu@%d failed: %d\n", cpu_num, ret); 475 return ret; 476 } 477 plat = dev_get_parent_platdata(dev); 478 plat->cpu_id = cpu_num; 479 } 480 return 0; 481 } 482 #endif 483 484 int mp_init(struct mp_params *p) 485 { 486 int num_aps; 487 atomic_t *ap_count; 488 struct udevice *cpu; 489 int ret; 490 491 /* This will cause the CPUs devices to be bound */ 492 struct uclass *uc; 493 ret = uclass_get(UCLASS_CPU, &uc); 494 if (ret) 495 return ret; 496 497 #ifdef CONFIG_QFW 498 ret = qemu_cpu_fixup(); 499 if (ret) 500 return ret; 501 #endif 502 503 ret = init_bsp(&cpu); 504 if (ret) { 505 debug("Cannot init boot CPU: err=%d\n", ret); 506 return ret; 507 } 508 509 if (p == NULL || p->flight_plan == NULL || p->num_records < 1) { 510 printf("Invalid MP parameters\n"); 511 return -1; 512 } 513 514 num_cpus = cpu_get_count(cpu); 515 if (num_cpus < 0) { 516 debug("Cannot get number of CPUs: err=%d\n", num_cpus); 517 return num_cpus; 518 } 519 520 if (num_cpus < 2) 521 debug("Warning: Only 1 CPU is detected\n"); 522 523 ret = check_cpu_devices(num_cpus); 524 if (ret) 525 debug("Warning: Device tree does not describe all CPUs. Extra ones will not be started correctly\n"); 526 527 /* Copy needed parameters so that APs have a reference to the plan */ 528 mp_info.num_records = p->num_records; 529 mp_info.records = p->flight_plan; 530 531 /* Load the SIPI vector */ 532 ret = load_sipi_vector(&ap_count, num_cpus); 533 if (ap_count == NULL) 534 return -1; 535 536 /* 537 * Make sure SIPI data hits RAM so the APs that come up will see 538 * the startup code even if the caches are disabled 539 */ 540 wbinvd(); 541 542 /* Start the APs providing number of APs and the cpus_entered field */ 543 num_aps = num_cpus - 1; 544 ret = start_aps(num_aps, ap_count); 545 if (ret) { 546 mdelay(1000); 547 debug("%d/%d eventually checked in?\n", atomic_read(ap_count), 548 num_aps); 549 return ret; 550 } 551 552 /* Walk the flight plan for the BSP */ 553 ret = bsp_do_flight_plan(cpu, p); 554 if (ret) { 555 debug("CPU init failed: err=%d\n", ret); 556 return ret; 557 } 558 559 return 0; 560 } 561 562 int mp_init_cpu(struct udevice *cpu, void *unused) 563 { 564 struct cpu_platdata *plat = dev_get_parent_platdata(cpu); 565 566 /* 567 * Multiple APs are brought up simultaneously and they may get the same 568 * seq num in the uclass_resolve_seq() during device_probe(). To avoid 569 * this, set req_seq to the reg number in the device tree in advance. 570 */ 571 cpu->req_seq = fdtdec_get_int(gd->fdt_blob, dev_of_offset(cpu), "reg", 572 -1); 573 plat->ucode_version = microcode_read_rev(); 574 plat->device_id = gd->arch.x86_device; 575 576 return device_probe(cpu); 577 } 578