1 /* 2 * Copyright (C) 2015 Google, Inc 3 * 4 * SPDX-License-Identifier: GPL-2.0+ 5 * 6 * Based on code from the coreboot file of the same name 7 */ 8 9 #include <common.h> 10 #include <cpu.h> 11 #include <dm.h> 12 #include <errno.h> 13 #include <malloc.h> 14 #include <asm/atomic.h> 15 #include <asm/cpu.h> 16 #include <asm/interrupt.h> 17 #include <asm/lapic.h> 18 #include <asm/mp.h> 19 #include <asm/msr.h> 20 #include <asm/mtrr.h> 21 #include <asm/processor.h> 22 #include <asm/sipi.h> 23 #include <asm/fw_cfg.h> 24 #include <dm/device-internal.h> 25 #include <dm/uclass-internal.h> 26 #include <dm/lists.h> 27 #include <dm/root.h> 28 #include <linux/linkage.h> 29 30 DECLARE_GLOBAL_DATA_PTR; 31 32 /* Total CPUs include BSP */ 33 static int num_cpus; 34 35 /* This also needs to match the sipi.S assembly code for saved MSR encoding */ 36 struct saved_msr { 37 uint32_t index; 38 uint32_t lo; 39 uint32_t hi; 40 } __packed; 41 42 43 struct mp_flight_plan { 44 int num_records; 45 struct mp_flight_record *records; 46 }; 47 48 static struct mp_flight_plan mp_info; 49 50 struct cpu_map { 51 struct udevice *dev; 52 int apic_id; 53 int err_code; 54 }; 55 56 static inline void barrier_wait(atomic_t *b) 57 { 58 while (atomic_read(b) == 0) 59 asm("pause"); 60 mfence(); 61 } 62 63 static inline void release_barrier(atomic_t *b) 64 { 65 mfence(); 66 atomic_set(b, 1); 67 } 68 69 static inline void stop_this_cpu(void) 70 { 71 /* Called by an AP when it is ready to halt and wait for a new task */ 72 for (;;) 73 cpu_hlt(); 74 } 75 76 /* Returns 1 if timeout waiting for APs. 0 if target APs found */ 77 static int wait_for_aps(atomic_t *val, int target, int total_delay, 78 int delay_step) 79 { 80 int timeout = 0; 81 int delayed = 0; 82 83 while (atomic_read(val) != target) { 84 udelay(delay_step); 85 delayed += delay_step; 86 if (delayed >= total_delay) { 87 timeout = 1; 88 break; 89 } 90 } 91 92 return timeout; 93 } 94 95 static void ap_do_flight_plan(struct udevice *cpu) 96 { 97 int i; 98 99 for (i = 0; i < mp_info.num_records; i++) { 100 struct mp_flight_record *rec = &mp_info.records[i]; 101 102 atomic_inc(&rec->cpus_entered); 103 barrier_wait(&rec->barrier); 104 105 if (rec->ap_call != NULL) 106 rec->ap_call(cpu, rec->ap_arg); 107 } 108 } 109 110 static int find_cpu_by_apic_id(int apic_id, struct udevice **devp) 111 { 112 struct udevice *dev; 113 114 *devp = NULL; 115 for (uclass_find_first_device(UCLASS_CPU, &dev); 116 dev; 117 uclass_find_next_device(&dev)) { 118 struct cpu_platdata *plat = dev_get_parent_platdata(dev); 119 120 if (plat->cpu_id == apic_id) { 121 *devp = dev; 122 return 0; 123 } 124 } 125 126 return -ENOENT; 127 } 128 129 /* 130 * By the time APs call ap_init() caching has been setup, and microcode has 131 * been loaded 132 */ 133 static void ap_init(unsigned int cpu_index) 134 { 135 struct udevice *dev; 136 int apic_id; 137 int ret; 138 139 /* Ensure the local apic is enabled */ 140 enable_lapic(); 141 142 apic_id = lapicid(); 143 ret = find_cpu_by_apic_id(apic_id, &dev); 144 if (ret) { 145 debug("Unknown CPU apic_id %x\n", apic_id); 146 goto done; 147 } 148 149 debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id, 150 dev ? dev->name : "(apic_id not found)"); 151 152 /* Walk the flight plan */ 153 ap_do_flight_plan(dev); 154 155 /* Park the AP */ 156 debug("parking\n"); 157 done: 158 stop_this_cpu(); 159 } 160 161 static const unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = { 162 MTRR_FIX_64K_00000_MSR, MTRR_FIX_16K_80000_MSR, MTRR_FIX_16K_A0000_MSR, 163 MTRR_FIX_4K_C0000_MSR, MTRR_FIX_4K_C8000_MSR, MTRR_FIX_4K_D0000_MSR, 164 MTRR_FIX_4K_D8000_MSR, MTRR_FIX_4K_E0000_MSR, MTRR_FIX_4K_E8000_MSR, 165 MTRR_FIX_4K_F0000_MSR, MTRR_FIX_4K_F8000_MSR, 166 }; 167 168 static inline struct saved_msr *save_msr(int index, struct saved_msr *entry) 169 { 170 msr_t msr; 171 172 msr = msr_read(index); 173 entry->index = index; 174 entry->lo = msr.lo; 175 entry->hi = msr.hi; 176 177 /* Return the next entry */ 178 entry++; 179 return entry; 180 } 181 182 static int save_bsp_msrs(char *start, int size) 183 { 184 int msr_count; 185 int num_var_mtrrs; 186 struct saved_msr *msr_entry; 187 int i; 188 msr_t msr; 189 190 /* Determine number of MTRRs need to be saved */ 191 msr = msr_read(MTRR_CAP_MSR); 192 num_var_mtrrs = msr.lo & 0xff; 193 194 /* 2 * num_var_mtrrs for base and mask. +1 for IA32_MTRR_DEF_TYPE */ 195 msr_count = 2 * num_var_mtrrs + NUM_FIXED_MTRRS + 1; 196 197 if ((msr_count * sizeof(struct saved_msr)) > size) { 198 printf("Cannot mirror all %d msrs.\n", msr_count); 199 return -ENOSPC; 200 } 201 202 msr_entry = (void *)start; 203 for (i = 0; i < NUM_FIXED_MTRRS; i++) 204 msr_entry = save_msr(fixed_mtrrs[i], msr_entry); 205 206 for (i = 0; i < num_var_mtrrs; i++) { 207 msr_entry = save_msr(MTRR_PHYS_BASE_MSR(i), msr_entry); 208 msr_entry = save_msr(MTRR_PHYS_MASK_MSR(i), msr_entry); 209 } 210 211 msr_entry = save_msr(MTRR_DEF_TYPE_MSR, msr_entry); 212 213 return msr_count; 214 } 215 216 static int load_sipi_vector(atomic_t **ap_countp, int num_cpus) 217 { 218 struct sipi_params_16bit *params16; 219 struct sipi_params *params; 220 static char msr_save[512]; 221 char *stack; 222 ulong addr; 223 int code_len; 224 int size; 225 int ret; 226 227 /* Copy in the code */ 228 code_len = ap_start16_code_end - ap_start16; 229 debug("Copying SIPI code to %x: %d bytes\n", AP_DEFAULT_BASE, 230 code_len); 231 memcpy((void *)AP_DEFAULT_BASE, ap_start16, code_len); 232 233 addr = AP_DEFAULT_BASE + (ulong)sipi_params_16bit - (ulong)ap_start16; 234 params16 = (struct sipi_params_16bit *)addr; 235 params16->ap_start = (uint32_t)ap_start; 236 params16->gdt = (uint32_t)gd->arch.gdt; 237 params16->gdt_limit = X86_GDT_SIZE - 1; 238 debug("gdt = %x, gdt_limit = %x\n", params16->gdt, params16->gdt_limit); 239 240 params = (struct sipi_params *)sipi_params; 241 debug("SIPI 32-bit params at %p\n", params); 242 params->idt_ptr = (uint32_t)x86_get_idt(); 243 244 params->stack_size = CONFIG_AP_STACK_SIZE; 245 size = params->stack_size * num_cpus; 246 stack = memalign(size, 4096); 247 if (!stack) 248 return -ENOMEM; 249 params->stack_top = (u32)(stack + size); 250 251 params->microcode_ptr = 0; 252 params->msr_table_ptr = (u32)msr_save; 253 ret = save_bsp_msrs(msr_save, sizeof(msr_save)); 254 if (ret < 0) 255 return ret; 256 params->msr_count = ret; 257 258 params->c_handler = (uint32_t)&ap_init; 259 260 *ap_countp = ¶ms->ap_count; 261 atomic_set(*ap_countp, 0); 262 debug("SIPI vector is ready\n"); 263 264 return 0; 265 } 266 267 static int check_cpu_devices(int expected_cpus) 268 { 269 int i; 270 271 for (i = 0; i < expected_cpus; i++) { 272 struct udevice *dev; 273 int ret; 274 275 ret = uclass_find_device(UCLASS_CPU, i, &dev); 276 if (ret) { 277 debug("Cannot find CPU %d in device tree\n", i); 278 return ret; 279 } 280 } 281 282 return 0; 283 } 284 285 /* Returns 1 for timeout. 0 on success */ 286 static int apic_wait_timeout(int total_delay, int delay_step) 287 { 288 int total = 0; 289 int timeout = 0; 290 291 while (lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY) { 292 udelay(delay_step); 293 total += delay_step; 294 if (total >= total_delay) { 295 timeout = 1; 296 break; 297 } 298 } 299 300 return timeout; 301 } 302 303 static int start_aps(int ap_count, atomic_t *num_aps) 304 { 305 int sipi_vector; 306 /* Max location is 4KiB below 1MiB */ 307 const int max_vector_loc = ((1 << 20) - (1 << 12)) >> 12; 308 309 if (ap_count == 0) 310 return 0; 311 312 /* The vector is sent as a 4k aligned address in one byte */ 313 sipi_vector = AP_DEFAULT_BASE >> 12; 314 315 if (sipi_vector > max_vector_loc) { 316 printf("SIPI vector too large! 0x%08x\n", 317 sipi_vector); 318 return -1; 319 } 320 321 debug("Attempting to start %d APs\n", ap_count); 322 323 if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) { 324 debug("Waiting for ICR not to be busy..."); 325 if (apic_wait_timeout(1000, 50)) { 326 debug("timed out. Aborting.\n"); 327 return -1; 328 } else { 329 debug("done.\n"); 330 } 331 } 332 333 /* Send INIT IPI to all but self */ 334 lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); 335 lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | 336 LAPIC_DM_INIT); 337 debug("Waiting for 10ms after sending INIT.\n"); 338 mdelay(10); 339 340 /* Send 1st SIPI */ 341 if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) { 342 debug("Waiting for ICR not to be busy..."); 343 if (apic_wait_timeout(1000, 50)) { 344 debug("timed out. Aborting.\n"); 345 return -1; 346 } else { 347 debug("done.\n"); 348 } 349 } 350 351 lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); 352 lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | 353 LAPIC_DM_STARTUP | sipi_vector); 354 debug("Waiting for 1st SIPI to complete..."); 355 if (apic_wait_timeout(10000, 50)) { 356 debug("timed out.\n"); 357 return -1; 358 } else { 359 debug("done.\n"); 360 } 361 362 /* Wait for CPUs to check in up to 200 us */ 363 wait_for_aps(num_aps, ap_count, 200, 15); 364 365 /* Send 2nd SIPI */ 366 if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) { 367 debug("Waiting for ICR not to be busy..."); 368 if (apic_wait_timeout(1000, 50)) { 369 debug("timed out. Aborting.\n"); 370 return -1; 371 } else { 372 debug("done.\n"); 373 } 374 } 375 376 lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0)); 377 lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT | 378 LAPIC_DM_STARTUP | sipi_vector); 379 debug("Waiting for 2nd SIPI to complete..."); 380 if (apic_wait_timeout(10000, 50)) { 381 debug("timed out.\n"); 382 return -1; 383 } else { 384 debug("done.\n"); 385 } 386 387 /* Wait for CPUs to check in */ 388 if (wait_for_aps(num_aps, ap_count, 10000, 50)) { 389 debug("Not all APs checked in: %d/%d.\n", 390 atomic_read(num_aps), ap_count); 391 return -1; 392 } 393 394 return 0; 395 } 396 397 static int bsp_do_flight_plan(struct udevice *cpu, struct mp_params *mp_params) 398 { 399 int i; 400 int ret = 0; 401 const int timeout_us = 100000; 402 const int step_us = 100; 403 int num_aps = num_cpus - 1; 404 405 for (i = 0; i < mp_params->num_records; i++) { 406 struct mp_flight_record *rec = &mp_params->flight_plan[i]; 407 408 /* Wait for APs if the record is not released */ 409 if (atomic_read(&rec->barrier) == 0) { 410 /* Wait for the APs to check in */ 411 if (wait_for_aps(&rec->cpus_entered, num_aps, 412 timeout_us, step_us)) { 413 debug("MP record %d timeout.\n", i); 414 ret = -1; 415 } 416 } 417 418 if (rec->bsp_call != NULL) 419 rec->bsp_call(cpu, rec->bsp_arg); 420 421 release_barrier(&rec->barrier); 422 } 423 return ret; 424 } 425 426 static int init_bsp(struct udevice **devp) 427 { 428 char processor_name[CPU_MAX_NAME_LEN]; 429 int apic_id; 430 int ret; 431 432 cpu_get_name(processor_name); 433 debug("CPU: %s.\n", processor_name); 434 435 lapic_setup(); 436 437 apic_id = lapicid(); 438 ret = find_cpu_by_apic_id(apic_id, devp); 439 if (ret) { 440 printf("Cannot find boot CPU, APIC ID %d\n", apic_id); 441 return ret; 442 } 443 444 return 0; 445 } 446 447 #ifdef CONFIG_QEMU 448 static int qemu_cpu_fixup(void) 449 { 450 int ret; 451 int cpu_num; 452 int cpu_online; 453 struct udevice *dev, *pdev; 454 struct cpu_platdata *plat; 455 char *cpu; 456 457 /* first we need to find '/cpus' */ 458 for (device_find_first_child(dm_root(), &pdev); 459 pdev; 460 device_find_next_child(&pdev)) { 461 if (!strcmp(pdev->name, "cpus")) 462 break; 463 } 464 if (!pdev) { 465 printf("unable to find cpus device\n"); 466 return -ENODEV; 467 } 468 469 /* calculate cpus that are already bound */ 470 cpu_num = 0; 471 for (uclass_find_first_device(UCLASS_CPU, &dev); 472 dev; 473 uclass_find_next_device(&dev)) { 474 cpu_num++; 475 } 476 477 /* get actual cpu number */ 478 cpu_online = qemu_fwcfg_online_cpus(); 479 if (cpu_online < 0) { 480 printf("unable to get online cpu number: %d\n", cpu_online); 481 return cpu_online; 482 } 483 484 /* bind addtional cpus */ 485 dev = NULL; 486 for (; cpu_num < cpu_online; cpu_num++) { 487 /* 488 * allocate device name here as device_bind_driver() does 489 * not copy device name, 8 bytes are enough for 490 * sizeof("cpu@") + 3 digits cpu number + '\0' 491 */ 492 cpu = malloc(8); 493 if (!cpu) { 494 printf("unable to allocate device name\n"); 495 return -ENOMEM; 496 } 497 sprintf(cpu, "cpu@%d", cpu_num); 498 ret = device_bind_driver(pdev, "cpu_qemu", cpu, &dev); 499 if (ret) { 500 printf("binding cpu@%d failed: %d\n", cpu_num, ret); 501 return ret; 502 } 503 plat = dev_get_parent_platdata(dev); 504 plat->cpu_id = cpu_num; 505 } 506 return 0; 507 } 508 #endif 509 510 int mp_init(struct mp_params *p) 511 { 512 int num_aps; 513 atomic_t *ap_count; 514 struct udevice *cpu; 515 int ret; 516 517 /* This will cause the CPUs devices to be bound */ 518 struct uclass *uc; 519 ret = uclass_get(UCLASS_CPU, &uc); 520 if (ret) 521 return ret; 522 523 #ifdef CONFIG_QEMU 524 ret = qemu_cpu_fixup(); 525 if (ret) 526 return ret; 527 #endif 528 529 ret = init_bsp(&cpu); 530 if (ret) { 531 debug("Cannot init boot CPU: err=%d\n", ret); 532 return ret; 533 } 534 535 if (p == NULL || p->flight_plan == NULL || p->num_records < 1) { 536 printf("Invalid MP parameters\n"); 537 return -1; 538 } 539 540 num_cpus = cpu_get_count(cpu); 541 if (num_cpus < 0) { 542 debug("Cannot get number of CPUs: err=%d\n", num_cpus); 543 return num_cpus; 544 } 545 546 if (num_cpus < 2) 547 debug("Warning: Only 1 CPU is detected\n"); 548 549 ret = check_cpu_devices(num_cpus); 550 if (ret) 551 debug("Warning: Device tree does not describe all CPUs. Extra ones will not be started correctly\n"); 552 553 /* Copy needed parameters so that APs have a reference to the plan */ 554 mp_info.num_records = p->num_records; 555 mp_info.records = p->flight_plan; 556 557 /* Load the SIPI vector */ 558 ret = load_sipi_vector(&ap_count, num_cpus); 559 if (ap_count == NULL) 560 return -1; 561 562 /* 563 * Make sure SIPI data hits RAM so the APs that come up will see 564 * the startup code even if the caches are disabled 565 */ 566 wbinvd(); 567 568 /* Start the APs providing number of APs and the cpus_entered field */ 569 num_aps = num_cpus - 1; 570 ret = start_aps(num_aps, ap_count); 571 if (ret) { 572 mdelay(1000); 573 debug("%d/%d eventually checked in?\n", atomic_read(ap_count), 574 num_aps); 575 return ret; 576 } 577 578 /* Walk the flight plan for the BSP */ 579 ret = bsp_do_flight_plan(cpu, p); 580 if (ret) { 581 debug("CPU init failed: err=%d\n", ret); 582 return ret; 583 } 584 585 return 0; 586 } 587 588 int mp_init_cpu(struct udevice *cpu, void *unused) 589 { 590 /* 591 * Multiple APs are brought up simultaneously and they may get the same 592 * seq num in the uclass_resolve_seq() during device_probe(). To avoid 593 * this, set req_seq to the reg number in the device tree in advance. 594 */ 595 cpu->req_seq = fdtdec_get_int(gd->fdt_blob, cpu->of_offset, "reg", -1); 596 597 return device_probe(cpu); 598 } 599