xref: /openbmc/u-boot/arch/x86/cpu/mp_init.c (revision 63d54a67)
1 /*
2  * Copyright (C) 2015 Google, Inc
3  *
4  * SPDX-License-Identifier:	GPL-2.0+
5  *
6  * Based on code from the coreboot file of the same name
7  */
8 
9 #include <common.h>
10 #include <cpu.h>
11 #include <dm.h>
12 #include <errno.h>
13 #include <malloc.h>
14 #include <asm/atomic.h>
15 #include <asm/cpu.h>
16 #include <asm/interrupt.h>
17 #include <asm/lapic.h>
18 #include <asm/mp.h>
19 #include <asm/mtrr.h>
20 #include <asm/sipi.h>
21 #include <dm/device-internal.h>
22 #include <dm/uclass-internal.h>
23 #include <linux/linkage.h>
24 
25 /* Total CPUs include BSP */
26 static int num_cpus;
27 
28 /* This also needs to match the sipi.S assembly code for saved MSR encoding */
29 struct saved_msr {
30 	uint32_t index;
31 	uint32_t lo;
32 	uint32_t hi;
33 } __packed;
34 
35 
36 struct mp_flight_plan {
37 	int num_records;
38 	struct mp_flight_record *records;
39 };
40 
41 static struct mp_flight_plan mp_info;
42 
43 struct cpu_map {
44 	struct udevice *dev;
45 	int apic_id;
46 	int err_code;
47 };
48 
49 static inline void barrier_wait(atomic_t *b)
50 {
51 	while (atomic_read(b) == 0)
52 		asm("pause");
53 	mfence();
54 }
55 
56 static inline void release_barrier(atomic_t *b)
57 {
58 	mfence();
59 	atomic_set(b, 1);
60 }
61 
62 /* Returns 1 if timeout waiting for APs. 0 if target APs found */
63 static int wait_for_aps(atomic_t *val, int target, int total_delay,
64 			int delay_step)
65 {
66 	int timeout = 0;
67 	int delayed = 0;
68 
69 	while (atomic_read(val) != target) {
70 		udelay(delay_step);
71 		delayed += delay_step;
72 		if (delayed >= total_delay) {
73 			timeout = 1;
74 			break;
75 		}
76 	}
77 
78 	return timeout;
79 }
80 
81 static void ap_do_flight_plan(struct udevice *cpu)
82 {
83 	int i;
84 
85 	for (i = 0; i < mp_info.num_records; i++) {
86 		struct mp_flight_record *rec = &mp_info.records[i];
87 
88 		atomic_inc(&rec->cpus_entered);
89 		barrier_wait(&rec->barrier);
90 
91 		if (rec->ap_call != NULL)
92 			rec->ap_call(cpu, rec->ap_arg);
93 	}
94 }
95 
96 static int find_cpu_by_apid_id(int apic_id, struct udevice **devp)
97 {
98 	struct udevice *dev;
99 
100 	*devp = NULL;
101 	for (uclass_find_first_device(UCLASS_CPU, &dev);
102 	     dev;
103 	     uclass_find_next_device(&dev)) {
104 		struct cpu_platdata *plat = dev_get_parent_platdata(dev);
105 
106 		if (plat->cpu_id == apic_id) {
107 			*devp = dev;
108 			return 0;
109 		}
110 	}
111 
112 	return -ENOENT;
113 }
114 
115 /*
116  * By the time APs call ap_init() caching has been setup, and microcode has
117  * been loaded
118  */
119 static void ap_init(unsigned int cpu_index)
120 {
121 	struct udevice *dev;
122 	int apic_id;
123 	int ret;
124 
125 	/* Ensure the local apic is enabled */
126 	enable_lapic();
127 
128 	apic_id = lapicid();
129 	ret = find_cpu_by_apid_id(apic_id, &dev);
130 	if (ret) {
131 		debug("Unknown CPU apic_id %x\n", apic_id);
132 		goto done;
133 	}
134 
135 	debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id,
136 	      dev ? dev->name : "(apic_id not found)");
137 
138 	/* Walk the flight plan */
139 	ap_do_flight_plan(dev);
140 
141 	/* Park the AP */
142 	debug("parking\n");
143 done:
144 	stop_this_cpu();
145 }
146 
147 static const unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = {
148 	MTRR_FIX_64K_00000_MSR, MTRR_FIX_16K_80000_MSR, MTRR_FIX_16K_A0000_MSR,
149 	MTRR_FIX_4K_C0000_MSR, MTRR_FIX_4K_C8000_MSR, MTRR_FIX_4K_D0000_MSR,
150 	MTRR_FIX_4K_D8000_MSR, MTRR_FIX_4K_E0000_MSR, MTRR_FIX_4K_E8000_MSR,
151 	MTRR_FIX_4K_F0000_MSR, MTRR_FIX_4K_F8000_MSR,
152 };
153 
154 static inline struct saved_msr *save_msr(int index, struct saved_msr *entry)
155 {
156 	msr_t msr;
157 
158 	msr = msr_read(index);
159 	entry->index = index;
160 	entry->lo = msr.lo;
161 	entry->hi = msr.hi;
162 
163 	/* Return the next entry */
164 	entry++;
165 	return entry;
166 }
167 
168 static int save_bsp_msrs(char *start, int size)
169 {
170 	int msr_count;
171 	int num_var_mtrrs;
172 	struct saved_msr *msr_entry;
173 	int i;
174 	msr_t msr;
175 
176 	/* Determine number of MTRRs need to be saved */
177 	msr = msr_read(MTRR_CAP_MSR);
178 	num_var_mtrrs = msr.lo & 0xff;
179 
180 	/* 2 * num_var_mtrrs for base and mask. +1 for IA32_MTRR_DEF_TYPE */
181 	msr_count = 2 * num_var_mtrrs + NUM_FIXED_MTRRS + 1;
182 
183 	if ((msr_count * sizeof(struct saved_msr)) > size) {
184 		printf("Cannot mirror all %d msrs.\n", msr_count);
185 		return -ENOSPC;
186 	}
187 
188 	msr_entry = (void *)start;
189 	for (i = 0; i < NUM_FIXED_MTRRS; i++)
190 		msr_entry = save_msr(fixed_mtrrs[i], msr_entry);
191 
192 	for (i = 0; i < num_var_mtrrs; i++) {
193 		msr_entry = save_msr(MTRR_PHYS_BASE_MSR(i), msr_entry);
194 		msr_entry = save_msr(MTRR_PHYS_MASK_MSR(i), msr_entry);
195 	}
196 
197 	msr_entry = save_msr(MTRR_DEF_TYPE_MSR, msr_entry);
198 
199 	return msr_count;
200 }
201 
202 static int load_sipi_vector(atomic_t **ap_countp)
203 {
204 	struct sipi_params_16bit *params16;
205 	struct sipi_params *params;
206 	static char msr_save[512];
207 	char *stack;
208 	ulong addr;
209 	int code_len;
210 	int size;
211 	int ret;
212 
213 	/* Copy in the code */
214 	code_len = ap_start16_code_end - ap_start16;
215 	debug("Copying SIPI code to %x: %d bytes\n", AP_DEFAULT_BASE,
216 	      code_len);
217 	memcpy((void *)AP_DEFAULT_BASE, ap_start16, code_len);
218 
219 	addr = AP_DEFAULT_BASE + (ulong)sipi_params_16bit - (ulong)ap_start16;
220 	params16 = (struct sipi_params_16bit *)addr;
221 	params16->ap_start = (uint32_t)ap_start;
222 	params16->gdt = (uint32_t)gd->arch.gdt;
223 	params16->gdt_limit = X86_GDT_SIZE - 1;
224 	debug("gdt = %x, gdt_limit = %x\n", params16->gdt, params16->gdt_limit);
225 
226 	params = (struct sipi_params *)sipi_params;
227 	debug("SIPI 32-bit params at %p\n", params);
228 	params->idt_ptr = (uint32_t)x86_get_idt();
229 
230 	params->stack_size = CONFIG_AP_STACK_SIZE;
231 	size = params->stack_size * CONFIG_MAX_CPUS;
232 	stack = memalign(size, 4096);
233 	if (!stack)
234 		return -ENOMEM;
235 	params->stack_top = (u32)(stack + size);
236 
237 	params->microcode_ptr = 0;
238 	params->msr_table_ptr = (u32)msr_save;
239 	ret = save_bsp_msrs(msr_save, sizeof(msr_save));
240 	if (ret < 0)
241 		return ret;
242 	params->msr_count = ret;
243 
244 	params->c_handler = (uint32_t)&ap_init;
245 
246 	*ap_countp = &params->ap_count;
247 	atomic_set(*ap_countp, 0);
248 	debug("SIPI vector is ready\n");
249 
250 	return 0;
251 }
252 
253 static int check_cpu_devices(int expected_cpus)
254 {
255 	int i;
256 
257 	for (i = 0; i < expected_cpus; i++) {
258 		struct udevice *dev;
259 		int ret;
260 
261 		ret = uclass_find_device(UCLASS_CPU, i, &dev);
262 		if (ret) {
263 			debug("Cannot find CPU %d in device tree\n", i);
264 			return ret;
265 		}
266 	}
267 
268 	return 0;
269 }
270 
271 /* Returns 1 for timeout. 0 on success */
272 static int apic_wait_timeout(int total_delay, int delay_step)
273 {
274 	int total = 0;
275 	int timeout = 0;
276 
277 	while (lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY) {
278 		udelay(delay_step);
279 		total += delay_step;
280 		if (total >= total_delay) {
281 			timeout = 1;
282 			break;
283 		}
284 	}
285 
286 	return timeout;
287 }
288 
289 static int start_aps(int ap_count, atomic_t *num_aps)
290 {
291 	int sipi_vector;
292 	/* Max location is 4KiB below 1MiB */
293 	const int max_vector_loc = ((1 << 20) - (1 << 12)) >> 12;
294 
295 	if (ap_count == 0)
296 		return 0;
297 
298 	/* The vector is sent as a 4k aligned address in one byte */
299 	sipi_vector = AP_DEFAULT_BASE >> 12;
300 
301 	if (sipi_vector > max_vector_loc) {
302 		printf("SIPI vector too large! 0x%08x\n",
303 		       sipi_vector);
304 		return -1;
305 	}
306 
307 	debug("Attempting to start %d APs\n", ap_count);
308 
309 	if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
310 		debug("Waiting for ICR not to be busy...");
311 		if (apic_wait_timeout(1000, 50)) {
312 			debug("timed out. Aborting.\n");
313 			return -1;
314 		} else {
315 			debug("done.\n");
316 		}
317 	}
318 
319 	/* Send INIT IPI to all but self */
320 	lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
321 	lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
322 			   LAPIC_DM_INIT);
323 	debug("Waiting for 10ms after sending INIT.\n");
324 	mdelay(10);
325 
326 	/* Send 1st SIPI */
327 	if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
328 		debug("Waiting for ICR not to be busy...");
329 		if (apic_wait_timeout(1000, 50)) {
330 			debug("timed out. Aborting.\n");
331 			return -1;
332 		} else {
333 			debug("done.\n");
334 		}
335 	}
336 
337 	lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
338 	lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
339 			   LAPIC_DM_STARTUP | sipi_vector);
340 	debug("Waiting for 1st SIPI to complete...");
341 	if (apic_wait_timeout(10000, 50)) {
342 		debug("timed out.\n");
343 		return -1;
344 	} else {
345 		debug("done.\n");
346 	}
347 
348 	/* Wait for CPUs to check in up to 200 us */
349 	wait_for_aps(num_aps, ap_count, 200, 15);
350 
351 	/* Send 2nd SIPI */
352 	if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
353 		debug("Waiting for ICR not to be busy...");
354 		if (apic_wait_timeout(1000, 50)) {
355 			debug("timed out. Aborting.\n");
356 			return -1;
357 		} else {
358 			debug("done.\n");
359 		}
360 	}
361 
362 	lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
363 	lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
364 			   LAPIC_DM_STARTUP | sipi_vector);
365 	debug("Waiting for 2nd SIPI to complete...");
366 	if (apic_wait_timeout(10000, 50)) {
367 		debug("timed out.\n");
368 		return -1;
369 	} else {
370 		debug("done.\n");
371 	}
372 
373 	/* Wait for CPUs to check in */
374 	if (wait_for_aps(num_aps, ap_count, 10000, 50)) {
375 		debug("Not all APs checked in: %d/%d.\n",
376 		      atomic_read(num_aps), ap_count);
377 		return -1;
378 	}
379 
380 	return 0;
381 }
382 
383 static int bsp_do_flight_plan(struct udevice *cpu, struct mp_params *mp_params)
384 {
385 	int i;
386 	int ret = 0;
387 	const int timeout_us = 100000;
388 	const int step_us = 100;
389 	int num_aps = num_cpus - 1;
390 
391 	for (i = 0; i < mp_params->num_records; i++) {
392 		struct mp_flight_record *rec = &mp_params->flight_plan[i];
393 
394 		/* Wait for APs if the record is not released */
395 		if (atomic_read(&rec->barrier) == 0) {
396 			/* Wait for the APs to check in */
397 			if (wait_for_aps(&rec->cpus_entered, num_aps,
398 					 timeout_us, step_us)) {
399 				debug("MP record %d timeout.\n", i);
400 				ret = -1;
401 			}
402 		}
403 
404 		if (rec->bsp_call != NULL)
405 			rec->bsp_call(cpu, rec->bsp_arg);
406 
407 		release_barrier(&rec->barrier);
408 	}
409 	return ret;
410 }
411 
412 static int init_bsp(struct udevice **devp)
413 {
414 	char processor_name[CPU_MAX_NAME_LEN];
415 	int apic_id;
416 	int ret;
417 
418 	cpu_get_name(processor_name);
419 	debug("CPU: %s.\n", processor_name);
420 
421 	lapic_setup();
422 
423 	apic_id = lapicid();
424 	ret = find_cpu_by_apid_id(apic_id, devp);
425 	if (ret) {
426 		printf("Cannot find boot CPU, APIC ID %d\n", apic_id);
427 		return ret;
428 	}
429 
430 	return 0;
431 }
432 
433 int mp_init(struct mp_params *p)
434 {
435 	int num_aps;
436 	atomic_t *ap_count;
437 	struct udevice *cpu;
438 	int ret;
439 
440 	/* This will cause the CPUs devices to be bound */
441 	struct uclass *uc;
442 	ret = uclass_get(UCLASS_CPU, &uc);
443 	if (ret)
444 		return ret;
445 
446 	ret = init_bsp(&cpu);
447 	if (ret) {
448 		debug("Cannot init boot CPU: err=%d\n", ret);
449 		return ret;
450 	}
451 
452 	if (p == NULL || p->flight_plan == NULL || p->num_records < 1) {
453 		printf("Invalid MP parameters\n");
454 		return -1;
455 	}
456 
457 	num_cpus = cpu_get_count(cpu);
458 	if (num_cpus < 0) {
459 		debug("Cannot get number of CPUs: err=%d\n", num_cpus);
460 		return num_cpus;
461 	}
462 
463 	if (num_cpus < 2)
464 		debug("Warning: Only 1 CPU is detected\n");
465 
466 	ret = check_cpu_devices(num_cpus);
467 	if (ret)
468 		debug("Warning: Device tree does not describe all CPUs. Extra ones will not be started correctly\n");
469 
470 	/* Copy needed parameters so that APs have a reference to the plan */
471 	mp_info.num_records = p->num_records;
472 	mp_info.records = p->flight_plan;
473 
474 	/* Load the SIPI vector */
475 	ret = load_sipi_vector(&ap_count);
476 	if (ap_count == NULL)
477 		return -1;
478 
479 	/*
480 	 * Make sure SIPI data hits RAM so the APs that come up will see
481 	 * the startup code even if the caches are disabled
482 	 */
483 	wbinvd();
484 
485 	/* Start the APs providing number of APs and the cpus_entered field */
486 	num_aps = num_cpus - 1;
487 	ret = start_aps(num_aps, ap_count);
488 	if (ret) {
489 		mdelay(1000);
490 		debug("%d/%d eventually checked in?\n", atomic_read(ap_count),
491 		      num_aps);
492 		return ret;
493 	}
494 
495 	/* Walk the flight plan for the BSP */
496 	ret = bsp_do_flight_plan(cpu, p);
497 	if (ret) {
498 		debug("CPU init failed: err=%d\n", ret);
499 		return ret;
500 	}
501 
502 	return 0;
503 }
504 
505 int mp_init_cpu(struct udevice *cpu, void *unused)
506 {
507 	return device_probe(cpu);
508 }
509