xref: /openbmc/u-boot/arch/x86/cpu/mp_init.c (revision 29b103c7)
1 /*
2  * Copyright (C) 2015 Google, Inc
3  *
4  * SPDX-License-Identifier:	GPL-2.0+
5  *
6  * Based on code from the coreboot file of the same name
7  */
8 
9 #include <common.h>
10 #include <cpu.h>
11 #include <dm.h>
12 #include <errno.h>
13 #include <malloc.h>
14 #include <asm/atomic.h>
15 #include <asm/cpu.h>
16 #include <asm/interrupt.h>
17 #include <asm/lapic.h>
18 #include <asm/mp.h>
19 #include <asm/msr.h>
20 #include <asm/mtrr.h>
21 #include <asm/processor.h>
22 #include <asm/sipi.h>
23 #include <dm/device-internal.h>
24 #include <dm/uclass-internal.h>
25 #include <linux/linkage.h>
26 
27 /* Total CPUs include BSP */
28 static int num_cpus;
29 
30 /* This also needs to match the sipi.S assembly code for saved MSR encoding */
31 struct saved_msr {
32 	uint32_t index;
33 	uint32_t lo;
34 	uint32_t hi;
35 } __packed;
36 
37 
38 struct mp_flight_plan {
39 	int num_records;
40 	struct mp_flight_record *records;
41 };
42 
43 static struct mp_flight_plan mp_info;
44 
45 struct cpu_map {
46 	struct udevice *dev;
47 	int apic_id;
48 	int err_code;
49 };
50 
51 static inline void barrier_wait(atomic_t *b)
52 {
53 	while (atomic_read(b) == 0)
54 		asm("pause");
55 	mfence();
56 }
57 
58 static inline void release_barrier(atomic_t *b)
59 {
60 	mfence();
61 	atomic_set(b, 1);
62 }
63 
64 static inline void stop_this_cpu(void)
65 {
66 	/* Called by an AP when it is ready to halt and wait for a new task */
67 	for (;;)
68 		cpu_hlt();
69 }
70 
71 /* Returns 1 if timeout waiting for APs. 0 if target APs found */
72 static int wait_for_aps(atomic_t *val, int target, int total_delay,
73 			int delay_step)
74 {
75 	int timeout = 0;
76 	int delayed = 0;
77 
78 	while (atomic_read(val) != target) {
79 		udelay(delay_step);
80 		delayed += delay_step;
81 		if (delayed >= total_delay) {
82 			timeout = 1;
83 			break;
84 		}
85 	}
86 
87 	return timeout;
88 }
89 
90 static void ap_do_flight_plan(struct udevice *cpu)
91 {
92 	int i;
93 
94 	for (i = 0; i < mp_info.num_records; i++) {
95 		struct mp_flight_record *rec = &mp_info.records[i];
96 
97 		atomic_inc(&rec->cpus_entered);
98 		barrier_wait(&rec->barrier);
99 
100 		if (rec->ap_call != NULL)
101 			rec->ap_call(cpu, rec->ap_arg);
102 	}
103 }
104 
105 static int find_cpu_by_apid_id(int apic_id, struct udevice **devp)
106 {
107 	struct udevice *dev;
108 
109 	*devp = NULL;
110 	for (uclass_find_first_device(UCLASS_CPU, &dev);
111 	     dev;
112 	     uclass_find_next_device(&dev)) {
113 		struct cpu_platdata *plat = dev_get_parent_platdata(dev);
114 
115 		if (plat->cpu_id == apic_id) {
116 			*devp = dev;
117 			return 0;
118 		}
119 	}
120 
121 	return -ENOENT;
122 }
123 
124 /*
125  * By the time APs call ap_init() caching has been setup, and microcode has
126  * been loaded
127  */
128 static void ap_init(unsigned int cpu_index)
129 {
130 	struct udevice *dev;
131 	int apic_id;
132 	int ret;
133 
134 	/* Ensure the local apic is enabled */
135 	enable_lapic();
136 
137 	apic_id = lapicid();
138 	ret = find_cpu_by_apid_id(apic_id, &dev);
139 	if (ret) {
140 		debug("Unknown CPU apic_id %x\n", apic_id);
141 		goto done;
142 	}
143 
144 	debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id,
145 	      dev ? dev->name : "(apic_id not found)");
146 
147 	/* Walk the flight plan */
148 	ap_do_flight_plan(dev);
149 
150 	/* Park the AP */
151 	debug("parking\n");
152 done:
153 	stop_this_cpu();
154 }
155 
156 static const unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = {
157 	MTRR_FIX_64K_00000_MSR, MTRR_FIX_16K_80000_MSR, MTRR_FIX_16K_A0000_MSR,
158 	MTRR_FIX_4K_C0000_MSR, MTRR_FIX_4K_C8000_MSR, MTRR_FIX_4K_D0000_MSR,
159 	MTRR_FIX_4K_D8000_MSR, MTRR_FIX_4K_E0000_MSR, MTRR_FIX_4K_E8000_MSR,
160 	MTRR_FIX_4K_F0000_MSR, MTRR_FIX_4K_F8000_MSR,
161 };
162 
163 static inline struct saved_msr *save_msr(int index, struct saved_msr *entry)
164 {
165 	msr_t msr;
166 
167 	msr = msr_read(index);
168 	entry->index = index;
169 	entry->lo = msr.lo;
170 	entry->hi = msr.hi;
171 
172 	/* Return the next entry */
173 	entry++;
174 	return entry;
175 }
176 
177 static int save_bsp_msrs(char *start, int size)
178 {
179 	int msr_count;
180 	int num_var_mtrrs;
181 	struct saved_msr *msr_entry;
182 	int i;
183 	msr_t msr;
184 
185 	/* Determine number of MTRRs need to be saved */
186 	msr = msr_read(MTRR_CAP_MSR);
187 	num_var_mtrrs = msr.lo & 0xff;
188 
189 	/* 2 * num_var_mtrrs for base and mask. +1 for IA32_MTRR_DEF_TYPE */
190 	msr_count = 2 * num_var_mtrrs + NUM_FIXED_MTRRS + 1;
191 
192 	if ((msr_count * sizeof(struct saved_msr)) > size) {
193 		printf("Cannot mirror all %d msrs.\n", msr_count);
194 		return -ENOSPC;
195 	}
196 
197 	msr_entry = (void *)start;
198 	for (i = 0; i < NUM_FIXED_MTRRS; i++)
199 		msr_entry = save_msr(fixed_mtrrs[i], msr_entry);
200 
201 	for (i = 0; i < num_var_mtrrs; i++) {
202 		msr_entry = save_msr(MTRR_PHYS_BASE_MSR(i), msr_entry);
203 		msr_entry = save_msr(MTRR_PHYS_MASK_MSR(i), msr_entry);
204 	}
205 
206 	msr_entry = save_msr(MTRR_DEF_TYPE_MSR, msr_entry);
207 
208 	return msr_count;
209 }
210 
211 static int load_sipi_vector(atomic_t **ap_countp)
212 {
213 	struct sipi_params_16bit *params16;
214 	struct sipi_params *params;
215 	static char msr_save[512];
216 	char *stack;
217 	ulong addr;
218 	int code_len;
219 	int size;
220 	int ret;
221 
222 	/* Copy in the code */
223 	code_len = ap_start16_code_end - ap_start16;
224 	debug("Copying SIPI code to %x: %d bytes\n", AP_DEFAULT_BASE,
225 	      code_len);
226 	memcpy((void *)AP_DEFAULT_BASE, ap_start16, code_len);
227 
228 	addr = AP_DEFAULT_BASE + (ulong)sipi_params_16bit - (ulong)ap_start16;
229 	params16 = (struct sipi_params_16bit *)addr;
230 	params16->ap_start = (uint32_t)ap_start;
231 	params16->gdt = (uint32_t)gd->arch.gdt;
232 	params16->gdt_limit = X86_GDT_SIZE - 1;
233 	debug("gdt = %x, gdt_limit = %x\n", params16->gdt, params16->gdt_limit);
234 
235 	params = (struct sipi_params *)sipi_params;
236 	debug("SIPI 32-bit params at %p\n", params);
237 	params->idt_ptr = (uint32_t)x86_get_idt();
238 
239 	params->stack_size = CONFIG_AP_STACK_SIZE;
240 	size = params->stack_size * CONFIG_MAX_CPUS;
241 	stack = memalign(size, 4096);
242 	if (!stack)
243 		return -ENOMEM;
244 	params->stack_top = (u32)(stack + size);
245 
246 	params->microcode_ptr = 0;
247 	params->msr_table_ptr = (u32)msr_save;
248 	ret = save_bsp_msrs(msr_save, sizeof(msr_save));
249 	if (ret < 0)
250 		return ret;
251 	params->msr_count = ret;
252 
253 	params->c_handler = (uint32_t)&ap_init;
254 
255 	*ap_countp = &params->ap_count;
256 	atomic_set(*ap_countp, 0);
257 	debug("SIPI vector is ready\n");
258 
259 	return 0;
260 }
261 
262 static int check_cpu_devices(int expected_cpus)
263 {
264 	int i;
265 
266 	for (i = 0; i < expected_cpus; i++) {
267 		struct udevice *dev;
268 		int ret;
269 
270 		ret = uclass_find_device(UCLASS_CPU, i, &dev);
271 		if (ret) {
272 			debug("Cannot find CPU %d in device tree\n", i);
273 			return ret;
274 		}
275 	}
276 
277 	return 0;
278 }
279 
280 /* Returns 1 for timeout. 0 on success */
281 static int apic_wait_timeout(int total_delay, int delay_step)
282 {
283 	int total = 0;
284 	int timeout = 0;
285 
286 	while (lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY) {
287 		udelay(delay_step);
288 		total += delay_step;
289 		if (total >= total_delay) {
290 			timeout = 1;
291 			break;
292 		}
293 	}
294 
295 	return timeout;
296 }
297 
298 static int start_aps(int ap_count, atomic_t *num_aps)
299 {
300 	int sipi_vector;
301 	/* Max location is 4KiB below 1MiB */
302 	const int max_vector_loc = ((1 << 20) - (1 << 12)) >> 12;
303 
304 	if (ap_count == 0)
305 		return 0;
306 
307 	/* The vector is sent as a 4k aligned address in one byte */
308 	sipi_vector = AP_DEFAULT_BASE >> 12;
309 
310 	if (sipi_vector > max_vector_loc) {
311 		printf("SIPI vector too large! 0x%08x\n",
312 		       sipi_vector);
313 		return -1;
314 	}
315 
316 	debug("Attempting to start %d APs\n", ap_count);
317 
318 	if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
319 		debug("Waiting for ICR not to be busy...");
320 		if (apic_wait_timeout(1000, 50)) {
321 			debug("timed out. Aborting.\n");
322 			return -1;
323 		} else {
324 			debug("done.\n");
325 		}
326 	}
327 
328 	/* Send INIT IPI to all but self */
329 	lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
330 	lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
331 		    LAPIC_DM_INIT);
332 	debug("Waiting for 10ms after sending INIT.\n");
333 	mdelay(10);
334 
335 	/* Send 1st SIPI */
336 	if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
337 		debug("Waiting for ICR not to be busy...");
338 		if (apic_wait_timeout(1000, 50)) {
339 			debug("timed out. Aborting.\n");
340 			return -1;
341 		} else {
342 			debug("done.\n");
343 		}
344 	}
345 
346 	lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
347 	lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
348 		    LAPIC_DM_STARTUP | sipi_vector);
349 	debug("Waiting for 1st SIPI to complete...");
350 	if (apic_wait_timeout(10000, 50)) {
351 		debug("timed out.\n");
352 		return -1;
353 	} else {
354 		debug("done.\n");
355 	}
356 
357 	/* Wait for CPUs to check in up to 200 us */
358 	wait_for_aps(num_aps, ap_count, 200, 15);
359 
360 	/* Send 2nd SIPI */
361 	if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
362 		debug("Waiting for ICR not to be busy...");
363 		if (apic_wait_timeout(1000, 50)) {
364 			debug("timed out. Aborting.\n");
365 			return -1;
366 		} else {
367 			debug("done.\n");
368 		}
369 	}
370 
371 	lapic_write(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
372 	lapic_write(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
373 		    LAPIC_DM_STARTUP | sipi_vector);
374 	debug("Waiting for 2nd SIPI to complete...");
375 	if (apic_wait_timeout(10000, 50)) {
376 		debug("timed out.\n");
377 		return -1;
378 	} else {
379 		debug("done.\n");
380 	}
381 
382 	/* Wait for CPUs to check in */
383 	if (wait_for_aps(num_aps, ap_count, 10000, 50)) {
384 		debug("Not all APs checked in: %d/%d.\n",
385 		      atomic_read(num_aps), ap_count);
386 		return -1;
387 	}
388 
389 	return 0;
390 }
391 
392 static int bsp_do_flight_plan(struct udevice *cpu, struct mp_params *mp_params)
393 {
394 	int i;
395 	int ret = 0;
396 	const int timeout_us = 100000;
397 	const int step_us = 100;
398 	int num_aps = num_cpus - 1;
399 
400 	for (i = 0; i < mp_params->num_records; i++) {
401 		struct mp_flight_record *rec = &mp_params->flight_plan[i];
402 
403 		/* Wait for APs if the record is not released */
404 		if (atomic_read(&rec->barrier) == 0) {
405 			/* Wait for the APs to check in */
406 			if (wait_for_aps(&rec->cpus_entered, num_aps,
407 					 timeout_us, step_us)) {
408 				debug("MP record %d timeout.\n", i);
409 				ret = -1;
410 			}
411 		}
412 
413 		if (rec->bsp_call != NULL)
414 			rec->bsp_call(cpu, rec->bsp_arg);
415 
416 		release_barrier(&rec->barrier);
417 	}
418 	return ret;
419 }
420 
421 static int init_bsp(struct udevice **devp)
422 {
423 	char processor_name[CPU_MAX_NAME_LEN];
424 	int apic_id;
425 	int ret;
426 
427 	cpu_get_name(processor_name);
428 	debug("CPU: %s.\n", processor_name);
429 
430 	lapic_setup();
431 
432 	apic_id = lapicid();
433 	ret = find_cpu_by_apid_id(apic_id, devp);
434 	if (ret) {
435 		printf("Cannot find boot CPU, APIC ID %d\n", apic_id);
436 		return ret;
437 	}
438 
439 	return 0;
440 }
441 
442 int mp_init(struct mp_params *p)
443 {
444 	int num_aps;
445 	atomic_t *ap_count;
446 	struct udevice *cpu;
447 	int ret;
448 
449 	/* This will cause the CPUs devices to be bound */
450 	struct uclass *uc;
451 	ret = uclass_get(UCLASS_CPU, &uc);
452 	if (ret)
453 		return ret;
454 
455 	ret = init_bsp(&cpu);
456 	if (ret) {
457 		debug("Cannot init boot CPU: err=%d\n", ret);
458 		return ret;
459 	}
460 
461 	if (p == NULL || p->flight_plan == NULL || p->num_records < 1) {
462 		printf("Invalid MP parameters\n");
463 		return -1;
464 	}
465 
466 	num_cpus = cpu_get_count(cpu);
467 	if (num_cpus < 0) {
468 		debug("Cannot get number of CPUs: err=%d\n", num_cpus);
469 		return num_cpus;
470 	}
471 
472 	if (num_cpus < 2)
473 		debug("Warning: Only 1 CPU is detected\n");
474 
475 	ret = check_cpu_devices(num_cpus);
476 	if (ret)
477 		debug("Warning: Device tree does not describe all CPUs. Extra ones will not be started correctly\n");
478 
479 	/* Copy needed parameters so that APs have a reference to the plan */
480 	mp_info.num_records = p->num_records;
481 	mp_info.records = p->flight_plan;
482 
483 	/* Load the SIPI vector */
484 	ret = load_sipi_vector(&ap_count);
485 	if (ap_count == NULL)
486 		return -1;
487 
488 	/*
489 	 * Make sure SIPI data hits RAM so the APs that come up will see
490 	 * the startup code even if the caches are disabled
491 	 */
492 	wbinvd();
493 
494 	/* Start the APs providing number of APs and the cpus_entered field */
495 	num_aps = num_cpus - 1;
496 	ret = start_aps(num_aps, ap_count);
497 	if (ret) {
498 		mdelay(1000);
499 		debug("%d/%d eventually checked in?\n", atomic_read(ap_count),
500 		      num_aps);
501 		return ret;
502 	}
503 
504 	/* Walk the flight plan for the BSP */
505 	ret = bsp_do_flight_plan(cpu, p);
506 	if (ret) {
507 		debug("CPU init failed: err=%d\n", ret);
508 		return ret;
509 	}
510 
511 	return 0;
512 }
513 
514 int mp_init_cpu(struct udevice *cpu, void *unused)
515 {
516 	return device_probe(cpu);
517 }
518