1 /* 2 * Copyright (C) 2015 Google, Inc 3 * 4 * SPDX-License-Identifier: GPL-2.0+ 5 * 6 * Based on code from the coreboot file of the same name 7 */ 8 9 #include <common.h> 10 #include <cpu.h> 11 #include <dm.h> 12 #include <errno.h> 13 #include <malloc.h> 14 #include <qfw.h> 15 #include <asm/atomic.h> 16 #include <asm/cpu.h> 17 #include <asm/interrupt.h> 18 #include <asm/lapic.h> 19 #include <asm/microcode.h> 20 #include <asm/mp.h> 21 #include <asm/msr.h> 22 #include <asm/mtrr.h> 23 #include <asm/processor.h> 24 #include <asm/sipi.h> 25 #include <dm/device-internal.h> 26 #include <dm/uclass-internal.h> 27 #include <dm/lists.h> 28 #include <dm/root.h> 29 #include <linux/linkage.h> 30 31 DECLARE_GLOBAL_DATA_PTR; 32 33 /* Total CPUs include BSP */ 34 static int num_cpus; 35 36 /* This also needs to match the sipi.S assembly code for saved MSR encoding */ 37 struct saved_msr { 38 uint32_t index; 39 uint32_t lo; 40 uint32_t hi; 41 } __packed; 42 43 44 struct mp_flight_plan { 45 int num_records; 46 struct mp_flight_record *records; 47 }; 48 49 static struct mp_flight_plan mp_info; 50 51 struct cpu_map { 52 struct udevice *dev; 53 int apic_id; 54 int err_code; 55 }; 56 57 static inline void barrier_wait(atomic_t *b) 58 { 59 while (atomic_read(b) == 0) 60 asm("pause"); 61 mfence(); 62 } 63 64 static inline void release_barrier(atomic_t *b) 65 { 66 mfence(); 67 atomic_set(b, 1); 68 } 69 70 static inline void stop_this_cpu(void) 71 { 72 /* Called by an AP when it is ready to halt and wait for a new task */ 73 for (;;) 74 cpu_hlt(); 75 } 76 77 /* Returns 1 if timeout waiting for APs. 0 if target APs found */ 78 static int wait_for_aps(atomic_t *val, int target, int total_delay, 79 int delay_step) 80 { 81 int timeout = 0; 82 int delayed = 0; 83 84 while (atomic_read(val) != target) { 85 udelay(delay_step); 86 delayed += delay_step; 87 if (delayed >= total_delay) { 88 timeout = 1; 89 break; 90 } 91 } 92 93 return timeout; 94 } 95 96 static void ap_do_flight_plan(struct udevice *cpu) 97 { 98 int i; 99 100 for (i = 0; i < mp_info.num_records; i++) { 101 struct mp_flight_record *rec = &mp_info.records[i]; 102 103 atomic_inc(&rec->cpus_entered); 104 barrier_wait(&rec->barrier); 105 106 if (rec->ap_call != NULL) 107 rec->ap_call(cpu, rec->ap_arg); 108 } 109 } 110 111 static int find_cpu_by_apic_id(int apic_id, struct udevice **devp) 112 { 113 struct udevice *dev; 114 115 *devp = NULL; 116 for (uclass_find_first_device(UCLASS_CPU, &dev); 117 dev; 118 uclass_find_next_device(&dev)) { 119 struct cpu_platdata *plat = dev_get_parent_platdata(dev); 120 121 if (plat->cpu_id == apic_id) { 122 *devp = dev; 123 return 0; 124 } 125 } 126 127 return -ENOENT; 128 } 129 130 /* 131 * By the time APs call ap_init() caching has been setup, and microcode has 132 * been loaded 133 */ 134 static void ap_init(unsigned int cpu_index) 135 { 136 struct udevice *dev; 137 int apic_id; 138 int ret; 139 140 /* Ensure the local apic is enabled */ 141 enable_lapic(); 142 143 apic_id = lapicid(); 144 ret = find_cpu_by_apic_id(apic_id, &dev); 145 if (ret) { 146 debug("Unknown CPU apic_id %x\n", apic_id); 147 goto done; 148 } 149 150 debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id, 151 dev ? dev->name : "(apic_id not found)"); 152 153 /* Walk the flight plan */ 154 ap_do_flight_plan(dev); 155 156 /* Park the AP */ 157 debug("parking\n"); 158 done: 159 stop_this_cpu(); 160 } 161 162 static const unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = { 163 MTRR_FIX_64K_00000_MSR, MTRR_FIX_16K_80000_MSR, MTRR_FIX_16K_A0000_MSR, 164 MTRR_FIX_4K_C0000_MSR, MTRR_FIX_4K_C8000_MSR, MTRR_FIX_4K_D0000_MSR, 165 MTRR_FIX_4K_D8000_MSR, MTRR_FIX_4K_E0000_MSR, MTRR_FIX_4K_E8000_MSR, 166 MTRR_FIX_4K_F0000_MSR, MTRR_FIX_4K_F8000_MSR, 167 }; 168 169 static inline struct saved_msr *save_msr(int index, struct saved_msr *entry) 170 { 171 msr_t msr; 172 173 msr = msr_read(index); 174 entry->index = index; 175 entry->lo = msr.lo; 176 entry->hi = msr.hi; 177 178 /* Return the next entry */ 179 entry++; 180 return entry; 181 } 182 183 static int save_bsp_msrs(char *start, int size) 184 { 185 int msr_count; 186 int num_var_mtrrs; 187 struct saved_msr *msr_entry; 188 int i; 189 msr_t msr; 190 191 /* Determine number of MTRRs need to be saved */ 192 msr = msr_read(MTRR_CAP_MSR); 193 num_var_mtrrs = msr.lo & 0xff; 194 195 /* 2 * num_var_mtrrs for base and mask. +1 for IA32_MTRR_DEF_TYPE */ 196 msr_count = 2 * num_var_mtrrs + NUM_FIXED_MTRRS + 1; 197 198 if ((msr_count * sizeof(struct saved_msr)) > size) { 199 printf("Cannot mirror all %d msrs\n", msr_count); 200 return -ENOSPC; 201 } 202 203 msr_entry = (void *)start; 204 for (i = 0; i < NUM_FIXED_MTRRS; i++) 205 msr_entry = save_msr(fixed_mtrrs[i], msr_entry); 206 207 for (i = 0; i < num_var_mtrrs; i++) { 208 msr_entry = save_msr(MTRR_PHYS_BASE_MSR(i), msr_entry); 209 msr_entry = save_msr(MTRR_PHYS_MASK_MSR(i), msr_entry); 210 } 211 212 msr_entry = save_msr(MTRR_DEF_TYPE_MSR, msr_entry); 213 214 return msr_count; 215 } 216 217 static int load_sipi_vector(atomic_t **ap_countp, int num_cpus) 218 { 219 struct sipi_params_16bit *params16; 220 struct sipi_params *params; 221 static char msr_save[512]; 222 char *stack; 223 ulong addr; 224 int code_len; 225 int size; 226 int ret; 227 228 /* Copy in the code */ 229 code_len = ap_start16_code_end - ap_start16; 230 debug("Copying SIPI code to %x: %d bytes\n", AP_DEFAULT_BASE, 231 code_len); 232 memcpy((void *)AP_DEFAULT_BASE, ap_start16, code_len); 233 234 addr = AP_DEFAULT_BASE + (ulong)sipi_params_16bit - (ulong)ap_start16; 235 params16 = (struct sipi_params_16bit *)addr; 236 params16->ap_start = (uint32_t)ap_start; 237 params16->gdt = (uint32_t)gd->arch.gdt; 238 params16->gdt_limit = X86_GDT_SIZE - 1; 239 debug("gdt = %x, gdt_limit = %x\n", params16->gdt, params16->gdt_limit); 240 241 params = (struct sipi_params *)sipi_params; 242 debug("SIPI 32-bit params at %p\n", params); 243 params->idt_ptr = (uint32_t)x86_get_idt(); 244 245 params->stack_size = CONFIG_AP_STACK_SIZE; 246 size = params->stack_size * num_cpus; 247 stack = memalign(4096, size); 248 if (!stack) 249 return -ENOMEM; 250 params->stack_top = (u32)(stack + size); 251 #if !defined(CONFIG_QEMU) && !defined(CONFIG_HAVE_FSP) && \ 252 !defined(CONFIG_INTEL_MID) 253 params->microcode_ptr = ucode_base; 254 debug("Microcode at %x\n", params->microcode_ptr); 255 #endif 256 params->msr_table_ptr = (u32)msr_save; 257 ret = save_bsp_msrs(msr_save, sizeof(msr_save)); 258 if (ret < 0) 259 return ret; 260 params->msr_count = ret; 261 262 params->c_handler = (uint32_t)&ap_init; 263 264 *ap_countp = ¶ms->ap_count; 265 atomic_set(*ap_countp, 0); 266 debug("SIPI vector is ready\n"); 267 268 return 0; 269 } 270 271 static int check_cpu_devices(int expected_cpus) 272 { 273 int i; 274 275 for (i = 0; i < expected_cpus; i++) { 276 struct udevice *dev; 277 int ret; 278 279 ret = uclass_find_device(UCLASS_CPU, i, &dev); 280 if (ret) { 281 debug("Cannot find CPU %d in device tree\n", i); 282 return ret; 283 } 284 } 285 286 return 0; 287 } 288 289 /* Returns 1 for timeout. 0 on success */ 290 static int apic_wait_timeout(int total_delay, const char *msg) 291 { 292 int total = 0; 293 294 if (!(lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) 295 return 0; 296 297 debug("Waiting for %s...", msg); 298 while (lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY) { 299 udelay(50); 300 total += 50; 301 if (total >= total_delay) { 302 debug("timed out: aborting\n"); 303 return -ETIMEDOUT; 304 } 305 } 306 debug("done\n"); 307 308 return 0; 309 } 310 311 static int start_aps(int ap_count, atomic_t *num_aps) 312 { 313 int sipi_vector; 314 /* Max location is 4KiB below 1MiB */ 315 const int max_vector_loc = ((1 << 20) - (1 << 12)) >> 12; 316 317 if (ap_count == 0) 318 return 0; 319 320 /* The vector is sent as a 4k aligned address in one byte */ 321 sipi_vector = AP_DEFAULT_BASE >> 12; 322 323 if (sipi_vector > max_vector_loc) { 324 printf("SIPI vector too large! 0x%08x\n", 325 sipi_vector); 326 return -1; 327 } 328 329 debug("Attempting to start %d APs\n", ap_count); 330 331 if (apic_wait_timeout(1000, "ICR not to be busy")) 332 return -ETIMEDOUT; 333 334 /* Send INIT IPI to all but self */ 335 lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); 336 lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | 337 LAPIC_DM_INIT); 338 debug("Waiting for 10ms after sending INIT\n"); 339 mdelay(10); 340 341 /* Send 1st SIPI */ 342 if (apic_wait_timeout(1000, "ICR not to be busy")) 343 return -ETIMEDOUT; 344 345 lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); 346 lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | 347 LAPIC_DM_STARTUP | sipi_vector); 348 if (apic_wait_timeout(10000, "first SIPI to complete")) 349 return -ETIMEDOUT; 350 351 /* Wait for CPUs to check in up to 200 us */ 352 wait_for_aps(num_aps, ap_count, 200, 15); 353 354 /* Send 2nd SIPI */ 355 if (apic_wait_timeout(1000, "ICR not to be busy")) 356 return -ETIMEDOUT; 357 358 lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); 359 lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | 360 LAPIC_DM_STARTUP | sipi_vector); 361 if (apic_wait_timeout(10000, "second SIPI to complete")) 362 return -ETIMEDOUT; 363 364 /* Wait for CPUs to check in */ 365 if (wait_for_aps(num_aps, ap_count, 10000, 50)) { 366 debug("Not all APs checked in: %d/%d\n", 367 atomic_read(num_aps), ap_count); 368 return -1; 369 } 370 371 return 0; 372 } 373 374 static int bsp_do_flight_plan(struct udevice *cpu, struct mp_params *mp_params) 375 { 376 int i; 377 int ret = 0; 378 const int timeout_us = 100000; 379 const int step_us = 100; 380 int num_aps = num_cpus - 1; 381 382 for (i = 0; i < mp_params->num_records; i++) { 383 struct mp_flight_record *rec = &mp_params->flight_plan[i]; 384 385 /* Wait for APs if the record is not released */ 386 if (atomic_read(&rec->barrier) == 0) { 387 /* Wait for the APs to check in */ 388 if (wait_for_aps(&rec->cpus_entered, num_aps, 389 timeout_us, step_us)) { 390 debug("MP record %d timeout\n", i); 391 ret = -1; 392 } 393 } 394 395 if (rec->bsp_call != NULL) 396 rec->bsp_call(cpu, rec->bsp_arg); 397 398 release_barrier(&rec->barrier); 399 } 400 return ret; 401 } 402 403 static int init_bsp(struct udevice **devp) 404 { 405 char processor_name[CPU_MAX_NAME_LEN]; 406 int apic_id; 407 int ret; 408 409 cpu_get_name(processor_name); 410 debug("CPU: %s\n", processor_name); 411 412 apic_id = lapicid(); 413 ret = find_cpu_by_apic_id(apic_id, devp); 414 if (ret) { 415 printf("Cannot find boot CPU, APIC ID %d\n", apic_id); 416 return ret; 417 } 418 419 return 0; 420 } 421 422 #ifdef CONFIG_QFW 423 static int qemu_cpu_fixup(void) 424 { 425 int ret; 426 int cpu_num; 427 int cpu_online; 428 struct udevice *dev, *pdev; 429 struct cpu_platdata *plat; 430 char *cpu; 431 432 /* first we need to find '/cpus' */ 433 for (device_find_first_child(dm_root(), &pdev); 434 pdev; 435 device_find_next_child(&pdev)) { 436 if (!strcmp(pdev->name, "cpus")) 437 break; 438 } 439 if (!pdev) { 440 printf("unable to find cpus device\n"); 441 return -ENODEV; 442 } 443 444 /* calculate cpus that are already bound */ 445 cpu_num = 0; 446 for (uclass_find_first_device(UCLASS_CPU, &dev); 447 dev; 448 uclass_find_next_device(&dev)) { 449 cpu_num++; 450 } 451 452 /* get actual cpu number */ 453 cpu_online = qemu_fwcfg_online_cpus(); 454 if (cpu_online < 0) { 455 printf("unable to get online cpu number: %d\n", cpu_online); 456 return cpu_online; 457 } 458 459 /* bind addtional cpus */ 460 dev = NULL; 461 for (; cpu_num < cpu_online; cpu_num++) { 462 /* 463 * allocate device name here as device_bind_driver() does 464 * not copy device name, 8 bytes are enough for 465 * sizeof("cpu@") + 3 digits cpu number + '\0' 466 */ 467 cpu = malloc(8); 468 if (!cpu) { 469 printf("unable to allocate device name\n"); 470 return -ENOMEM; 471 } 472 sprintf(cpu, "cpu@%d", cpu_num); 473 ret = device_bind_driver(pdev, "cpu_qemu", cpu, &dev); 474 if (ret) { 475 printf("binding cpu@%d failed: %d\n", cpu_num, ret); 476 return ret; 477 } 478 plat = dev_get_parent_platdata(dev); 479 plat->cpu_id = cpu_num; 480 } 481 return 0; 482 } 483 #endif 484 485 int mp_init(struct mp_params *p) 486 { 487 int num_aps; 488 atomic_t *ap_count; 489 struct udevice *cpu; 490 int ret; 491 492 /* This will cause the CPUs devices to be bound */ 493 struct uclass *uc; 494 ret = uclass_get(UCLASS_CPU, &uc); 495 if (ret) 496 return ret; 497 498 #ifdef CONFIG_QFW 499 ret = qemu_cpu_fixup(); 500 if (ret) 501 return ret; 502 #endif 503 504 ret = init_bsp(&cpu); 505 if (ret) { 506 debug("Cannot init boot CPU: err=%d\n", ret); 507 return ret; 508 } 509 510 if (p == NULL || p->flight_plan == NULL || p->num_records < 1) { 511 printf("Invalid MP parameters\n"); 512 return -1; 513 } 514 515 num_cpus = cpu_get_count(cpu); 516 if (num_cpus < 0) { 517 debug("Cannot get number of CPUs: err=%d\n", num_cpus); 518 return num_cpus; 519 } 520 521 if (num_cpus < 2) 522 debug("Warning: Only 1 CPU is detected\n"); 523 524 ret = check_cpu_devices(num_cpus); 525 if (ret) 526 debug("Warning: Device tree does not describe all CPUs. Extra ones will not be started correctly\n"); 527 528 /* Copy needed parameters so that APs have a reference to the plan */ 529 mp_info.num_records = p->num_records; 530 mp_info.records = p->flight_plan; 531 532 /* Load the SIPI vector */ 533 ret = load_sipi_vector(&ap_count, num_cpus); 534 if (ap_count == NULL) 535 return -1; 536 537 /* 538 * Make sure SIPI data hits RAM so the APs that come up will see 539 * the startup code even if the caches are disabled 540 */ 541 wbinvd(); 542 543 /* Start the APs providing number of APs and the cpus_entered field */ 544 num_aps = num_cpus - 1; 545 ret = start_aps(num_aps, ap_count); 546 if (ret) { 547 mdelay(1000); 548 debug("%d/%d eventually checked in?\n", atomic_read(ap_count), 549 num_aps); 550 return ret; 551 } 552 553 /* Walk the flight plan for the BSP */ 554 ret = bsp_do_flight_plan(cpu, p); 555 if (ret) { 556 debug("CPU init failed: err=%d\n", ret); 557 return ret; 558 } 559 560 return 0; 561 } 562 563 int mp_init_cpu(struct udevice *cpu, void *unused) 564 { 565 struct cpu_platdata *plat = dev_get_parent_platdata(cpu); 566 567 /* 568 * Multiple APs are brought up simultaneously and they may get the same 569 * seq num in the uclass_resolve_seq() during device_probe(). To avoid 570 * this, set req_seq to the reg number in the device tree in advance. 571 */ 572 cpu->req_seq = fdtdec_get_int(gd->fdt_blob, dev_of_offset(cpu), "reg", 573 -1); 574 plat->ucode_version = microcode_read_rev(); 575 plat->device_id = gd->arch.x86_device; 576 577 return device_probe(cpu); 578 } 579