xref: /openbmc/u-boot/board/synopsys/hsdk/hsdk.c (revision 1a68faac)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2018 Synopsys, Inc. All rights reserved.
4  * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
5  */
6 
7 #include <common.h>
8 #include <config.h>
9 #include <linux/printk.h>
10 #include <linux/kernel.h>
11 #include <linux/io.h>
12 #include <asm/arcregs.h>
13 #include <fdt_support.h>
14 #include <dwmmc.h>
15 #include <malloc.h>
16 #include <usb.h>
17 
18 #include "clk-lib.h"
19 #include "env-lib.h"
20 
21 DECLARE_GLOBAL_DATA_PTR;
22 
23 #define ALL_CPU_MASK		GENMASK(NR_CPUS - 1, 0)
24 #define MASTER_CPU_ID		0
25 #define APERTURE_SHIFT		28
26 #define NO_CCM			0x10
27 #define SLAVE_CPU_READY		0x12345678
28 #define BOOTSTAGE_1		1 /* after SP, FP setup, before HW init */
29 #define BOOTSTAGE_2		2 /* after HW init, before self halt */
30 #define BOOTSTAGE_3		3 /* after self halt */
31 #define BOOTSTAGE_4		4 /* before app launch */
32 #define BOOTSTAGE_5		5 /* after app launch, unreachable */
33 
34 #define RESET_VECTOR_ADDR	0x0
35 
36 #define CREG_BASE		(ARC_PERIPHERAL_BASE + 0x1000)
37 #define CREG_CPU_START		(CREG_BASE + 0x400)
38 #define CREG_CPU_START_MASK	0xF
39 
40 #define SDIO_BASE		(ARC_PERIPHERAL_BASE + 0xA000)
41 #define SDIO_UHS_REG_EXT	(SDIO_BASE + 0x108)
42 #define SDIO_UHS_REG_EXT_DIV_2	(2 << 30)
43 
44 /* Uncached access macros */
45 #define arc_read_uncached_32(ptr)	\
46 ({					\
47 	unsigned int __ret;		\
48 	__asm__ __volatile__(		\
49 	"	ld.di %0, [%1]	\n"	\
50 	: "=r"(__ret)			\
51 	: "r"(ptr));			\
52 	__ret;				\
53 })
54 
55 #define arc_write_uncached_32(ptr, data)\
56 ({					\
57 	__asm__ __volatile__(		\
58 	"	st.di %0, [%1]	\n"	\
59 	:				\
60 	: "r"(data), "r"(ptr));		\
61 })
62 
63 struct hsdk_env_core_ctl {
64 	u32_env entry[NR_CPUS];
65 	u32_env iccm[NR_CPUS];
66 	u32_env dccm[NR_CPUS];
67 };
68 
69 struct hsdk_env_common_ctl {
70 	bool halt_on_boot;
71 	u32_env core_mask;
72 	u32_env cpu_freq;
73 	u32_env axi_freq;
74 	u32_env tun_freq;
75 	u32_env nvlim;
76 	u32_env icache;
77 	u32_env dcache;
78 };
79 
80 /*
81  * Uncached cross-cpu structure. All CPUs must access to this structure fields
82  * only with arc_read_uncached_32() / arc_write_uncached_32() accessors (which
83  * implement ld.di / st.di instructions). Simultaneous cached and uncached
84  * access to this area will lead to data loss.
85  * We flush all data caches in board_early_init_r() as we don't want to have
86  * any dirty line in L1d$ or SL$ in this area.
87  */
88 struct hsdk_cross_cpu {
89 	/* slave CPU ready flag */
90 	u32 ready_flag;
91 	/* address of the area, which can be used for stack by slave CPU */
92 	u32 stack_ptr;
93 	/* slave CPU status - bootstage number */
94 	s32 status[NR_CPUS];
95 
96 	/*
97 	 * Slave CPU data - it is copy of corresponding fields in
98 	 * hsdk_env_core_ctl and hsdk_env_common_ctl structures which are
99 	 * required for slave CPUs initialization.
100 	 * This fields can be populated by copying from hsdk_env_core_ctl
101 	 * and hsdk_env_common_ctl structures with sync_cross_cpu_data()
102 	 * function.
103 	 */
104 	u32 entry[NR_CPUS];
105 	u32 iccm[NR_CPUS];
106 	u32 dccm[NR_CPUS];
107 
108 	u32 core_mask;
109 	u32 icache;
110 	u32 dcache;
111 
112 	u8 cache_padding[ARCH_DMA_MINALIGN];
113 } __aligned(ARCH_DMA_MINALIGN);
114 
115 /* Place for slave CPUs temporary stack */
116 static u32 slave_stack[256 * NR_CPUS] __aligned(ARCH_DMA_MINALIGN);
117 
118 static struct hsdk_env_common_ctl env_common = {};
119 static struct hsdk_env_core_ctl env_core = {};
120 static struct hsdk_cross_cpu cross_cpu_data;
121 
122 static const struct env_map_common env_map_common[] = {
123 	{ "core_mask",	ENV_HEX, true,	0x1, 0xF,	&env_common.core_mask },
124 	{ "non_volatile_limit", ENV_HEX, true, 0, 0xF,	&env_common.nvlim },
125 	{ "icache_ena",	ENV_HEX, true,	0, 1,		&env_common.icache },
126 	{ "dcache_ena",	ENV_HEX, true,	0, 1,		&env_common.dcache },
127 	{}
128 };
129 
130 static const struct env_map_common env_map_clock[] = {
131 	{ "cpu_freq",	ENV_DEC, false,	100, 1000,	&env_common.cpu_freq },
132 	{ "axi_freq",	ENV_DEC, false,	200, 800,	&env_common.axi_freq },
133 	{ "tun_freq",	ENV_DEC, false,	0, 150,		&env_common.tun_freq },
134 	{}
135 };
136 
137 static const struct env_map_percpu env_map_core[] = {
138 	{ "core_iccm", ENV_HEX, true, {NO_CCM, 0, NO_CCM, 0}, {NO_CCM, 0xF, NO_CCM, 0xF}, &env_core.iccm },
139 	{ "core_dccm", ENV_HEX, true, {NO_CCM, 0, NO_CCM, 0}, {NO_CCM, 0xF, NO_CCM, 0xF}, &env_core.dccm },
140 	{}
141 };
142 
143 static const struct env_map_common env_map_mask[] = {
144 	{ "core_mask",	ENV_HEX, false,	0x1, 0xF,	&env_common.core_mask },
145 	{}
146 };
147 
148 static const struct env_map_percpu env_map_go[] = {
149 	{ "core_entry", ENV_HEX, true, {0, 0, 0, 0}, {U32_MAX, U32_MAX, U32_MAX, U32_MAX}, &env_core.entry },
150 	{}
151 };
152 
153 static void sync_cross_cpu_data(void)
154 {
155 	u32 value;
156 
157 	for (u32 i = 0; i < NR_CPUS; i++) {
158 		value = env_core.entry[i].val;
159 		arc_write_uncached_32(&cross_cpu_data.entry[i], value);
160 	}
161 
162 	for (u32 i = 0; i < NR_CPUS; i++) {
163 		value = env_core.iccm[i].val;
164 		arc_write_uncached_32(&cross_cpu_data.iccm[i], value);
165 	}
166 
167 	for (u32 i = 0; i < NR_CPUS; i++) {
168 		value = env_core.dccm[i].val;
169 		arc_write_uncached_32(&cross_cpu_data.dccm[i], value);
170 	}
171 
172 	value = env_common.core_mask.val;
173 	arc_write_uncached_32(&cross_cpu_data.core_mask, value);
174 
175 	value = env_common.icache.val;
176 	arc_write_uncached_32(&cross_cpu_data.icache, value);
177 
178 	value = env_common.dcache.val;
179 	arc_write_uncached_32(&cross_cpu_data.dcache, value);
180 }
181 
182 /* Can be used only on master CPU */
183 static bool is_cpu_used(u32 cpu_id)
184 {
185 	return !!(env_common.core_mask.val & BIT(cpu_id));
186 }
187 
188 /* TODO: add ICCM BCR and DCCM BCR runtime check */
189 static void init_slave_cpu_func(u32 core)
190 {
191 	u32 val;
192 
193 	/* Remap ICCM to another memory region if it exists */
194 	val = arc_read_uncached_32(&cross_cpu_data.iccm[core]);
195 	if (val != NO_CCM)
196 		write_aux_reg(ARC_AUX_ICCM_BASE, val << APERTURE_SHIFT);
197 
198 	/* Remap DCCM to another memory region if it exists */
199 	val = arc_read_uncached_32(&cross_cpu_data.dccm[core]);
200 	if (val != NO_CCM)
201 		write_aux_reg(ARC_AUX_DCCM_BASE, val << APERTURE_SHIFT);
202 
203 	if (arc_read_uncached_32(&cross_cpu_data.icache))
204 		icache_enable();
205 	else
206 		icache_disable();
207 
208 	if (arc_read_uncached_32(&cross_cpu_data.dcache))
209 		dcache_enable();
210 	else
211 		dcache_disable();
212 }
213 
214 static void init_cluster_nvlim(void)
215 {
216 	u32 val = env_common.nvlim.val << APERTURE_SHIFT;
217 
218 	flush_dcache_all();
219 	write_aux_reg(ARC_AUX_NON_VOLATILE_LIMIT, val);
220 	write_aux_reg(AUX_AUX_CACHE_LIMIT, val);
221 	flush_n_invalidate_dcache_all();
222 }
223 
224 static void init_master_icache(void)
225 {
226 	if (icache_status()) {
227 		/* I$ is enabled - we need to disable it */
228 		if (!env_common.icache.val)
229 			icache_disable();
230 	} else {
231 		/* I$ is disabled - we need to enable it */
232 		if (env_common.icache.val) {
233 			icache_enable();
234 
235 			/* invalidate I$ right after enable */
236 			invalidate_icache_all();
237 		}
238 	}
239 }
240 
241 static void init_master_dcache(void)
242 {
243 	if (dcache_status()) {
244 		/* D$ is enabled - we need to disable it */
245 		if (!env_common.dcache.val)
246 			dcache_disable();
247 	} else {
248 		/* D$ is disabled - we need to enable it */
249 		if (env_common.dcache.val)
250 			dcache_enable();
251 
252 		/* TODO: probably we need ti invalidate D$ right after enable */
253 	}
254 }
255 
256 static int cleanup_before_go(void)
257 {
258 	disable_interrupts();
259 	sync_n_cleanup_cache_all();
260 
261 	return 0;
262 }
263 
264 void slave_cpu_set_boot_addr(u32 addr)
265 {
266 	/* All cores have reset vector pointing to 0 */
267 	writel(addr, (void __iomem *)RESET_VECTOR_ADDR);
268 
269 	/* Make sure other cores see written value in memory */
270 	sync_n_cleanup_cache_all();
271 }
272 
273 static inline void halt_this_cpu(void)
274 {
275 	__builtin_arc_flag(1);
276 }
277 
278 static void smp_kick_cpu_x(u32 cpu_id)
279 {
280 	int cmd = readl((void __iomem *)CREG_CPU_START);
281 
282 	if (cpu_id > NR_CPUS)
283 		return;
284 
285 	cmd &= ~CREG_CPU_START_MASK;
286 	cmd |= (1 << cpu_id);
287 	writel(cmd, (void __iomem *)CREG_CPU_START);
288 }
289 
290 static u32 prepare_cpu_ctart_reg(void)
291 {
292 	int cmd = readl((void __iomem *)CREG_CPU_START);
293 
294 	cmd &= ~CREG_CPU_START_MASK;
295 
296 	return cmd | env_common.core_mask.val;
297 }
298 
299 /* slave CPU entry for configuration */
300 __attribute__((naked, noreturn, flatten)) noinline void hsdk_core_init_f(void)
301 {
302 	__asm__ __volatile__(
303 		"ld.di	r8,	[%0]\n"
304 		"mov	%%sp,	r8\n"
305 		"mov	%%fp,	%%sp\n"
306 		: /* no output */
307 		: "r" (&cross_cpu_data.stack_ptr));
308 
309 	invalidate_icache_all();
310 
311 	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], BOOTSTAGE_1);
312 	init_slave_cpu_func(CPU_ID_GET());
313 
314 	arc_write_uncached_32(&cross_cpu_data.ready_flag, SLAVE_CPU_READY);
315 	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], BOOTSTAGE_2);
316 
317 	/* Halt the processor until the master kick us again */
318 	halt_this_cpu();
319 
320 	/*
321 	 * 3 NOPs after FLAG 1 instruction are no longer required for ARCv2
322 	 * cores but we leave them for gebug purposes.
323 	 */
324 	__builtin_arc_nop();
325 	__builtin_arc_nop();
326 	__builtin_arc_nop();
327 
328 	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], BOOTSTAGE_3);
329 
330 	/* get the updated entry - invalidate i$ */
331 	invalidate_icache_all();
332 
333 	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], BOOTSTAGE_4);
334 
335 	/* Run our program */
336 	((void (*)(void))(arc_read_uncached_32(&cross_cpu_data.entry[CPU_ID_GET()])))();
337 
338 	/* This bootstage is unreachable as we don't return from app we launch */
339 	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], BOOTSTAGE_5);
340 
341 	/* Something went terribly wrong */
342 	while (true)
343 		halt_this_cpu();
344 }
345 
346 static void clear_cross_cpu_data(void)
347 {
348 	arc_write_uncached_32(&cross_cpu_data.ready_flag, 0);
349 	arc_write_uncached_32(&cross_cpu_data.stack_ptr, 0);
350 
351 	for (u32 i = 0; i < NR_CPUS; i++)
352 		arc_write_uncached_32(&cross_cpu_data.status[i], 0);
353 }
354 
355 static noinline void do_init_slave_cpu(u32 cpu_id)
356 {
357 	/* attempts number for check clave CPU ready_flag */
358 	u32 attempts = 100;
359 	u32 stack_ptr = (u32)(slave_stack + (64 * cpu_id));
360 
361 	if (cpu_id >= NR_CPUS)
362 		return;
363 
364 	arc_write_uncached_32(&cross_cpu_data.ready_flag, 0);
365 
366 	/* Use global unique place for each slave cpu stack */
367 	arc_write_uncached_32(&cross_cpu_data.stack_ptr, stack_ptr);
368 
369 	debug("CPU %u: stack pool base: %p\n", cpu_id, slave_stack);
370 	debug("CPU %u: current slave stack base: %x\n", cpu_id, stack_ptr);
371 	slave_cpu_set_boot_addr((u32)hsdk_core_init_f);
372 
373 	smp_kick_cpu_x(cpu_id);
374 
375 	debug("CPU %u: cross-cpu flag: %x [before timeout]\n", cpu_id,
376 	      arc_read_uncached_32(&cross_cpu_data.ready_flag));
377 
378 	while (!arc_read_uncached_32(&cross_cpu_data.ready_flag) && attempts--)
379 		mdelay(10);
380 
381 	/* Just to be sure that slave cpu is halted after it set ready_flag */
382 	mdelay(20);
383 
384 	/*
385 	 * Only print error here if we reach timeout as there is no option to
386 	 * halt slave cpu (or check that slave cpu is halted)
387 	 */
388 	if (!attempts)
389 		pr_err("CPU %u is not responding after init!\n", cpu_id);
390 
391 	/* Check current stage of slave cpu */
392 	if (arc_read_uncached_32(&cross_cpu_data.status[cpu_id]) != BOOTSTAGE_2)
393 		pr_err("CPU %u status is unexpected: %d\n", cpu_id,
394 		       arc_read_uncached_32(&cross_cpu_data.status[cpu_id]));
395 
396 	debug("CPU %u: cross-cpu flag: %x [after timeout]\n", cpu_id,
397 	      arc_read_uncached_32(&cross_cpu_data.ready_flag));
398 	debug("CPU %u: status: %d [after timeout]\n", cpu_id,
399 	      arc_read_uncached_32(&cross_cpu_data.status[cpu_id]));
400 }
401 
402 static void do_init_slave_cpus(void)
403 {
404 	clear_cross_cpu_data();
405 	sync_cross_cpu_data();
406 
407 	debug("cross_cpu_data location: %#x\n", (u32)&cross_cpu_data);
408 
409 	for (u32 i = MASTER_CPU_ID + 1; i < NR_CPUS; i++)
410 		if (is_cpu_used(i))
411 			do_init_slave_cpu(i);
412 }
413 
414 static void do_init_master_cpu(void)
415 {
416 	/*
417 	 * Setup master caches even if master isn't used as we want to use
418 	 * same cache configuration on all running CPUs
419 	 */
420 	init_master_icache();
421 	init_master_dcache();
422 }
423 
424 enum hsdk_axi_masters {
425 	M_HS_CORE = 0,
426 	M_HS_RTT,
427 	M_AXI_TUN,
428 	M_HDMI_VIDEO,
429 	M_HDMI_AUDIO,
430 	M_USB_HOST,
431 	M_ETHERNET,
432 	M_SDIO,
433 	M_GPU,
434 	M_DMAC_0,
435 	M_DMAC_1,
436 	M_DVFS
437 };
438 
439 #define UPDATE_VAL	1
440 
441 /*
442  * m	master		AXI_M_m_SLV0	AXI_M_m_SLV1	AXI_M_m_OFFSET0	AXI_M_m_OFFSET1
443  * 0	HS (CBU)	0x11111111	0x63111111	0xFEDCBA98	0x0E543210
444  * 1	HS (RTT)	0x77777777	0x77777777	0xFEDCBA98	0x76543210
445  * 2	AXI Tunnel	0x88888888	0x88888888	0xFEDCBA98	0x76543210
446  * 3	HDMI-VIDEO	0x77777777	0x77777777	0xFEDCBA98	0x76543210
447  * 4	HDMI-ADUIO	0x77777777	0x77777777	0xFEDCBA98	0x76543210
448  * 5	USB-HOST	0x77777777	0x77999999	0xFEDCBA98	0x76DCBA98
449  * 6	ETHERNET	0x77777777	0x77999999	0xFEDCBA98	0x76DCBA98
450  * 7	SDIO		0x77777777	0x77999999	0xFEDCBA98	0x76DCBA98
451  * 8	GPU		0x77777777	0x77777777	0xFEDCBA98	0x76543210
452  * 9	DMAC (port #1)	0x77777777	0x77777777	0xFEDCBA98	0x76543210
453  * 10	DMAC (port #2)	0x77777777	0x77777777	0xFEDCBA98	0x76543210
454  * 11	DVFS		0x00000000	0x60000000	0x00000000	0x00000000
455  *
456  * Please read ARC HS Development IC Specification, section 17.2 for more
457  * information about apertures configuration.
458  * NOTE: we intentionally modify default settings in U-boot. Default settings
459  * are specified in "Table 111 CREG Address Decoder register reset values".
460  */
461 
462 #define CREG_AXI_M_SLV0(m)  ((void __iomem *)(CREG_BASE + 0x020 * (m)))
463 #define CREG_AXI_M_SLV1(m)  ((void __iomem *)(CREG_BASE + 0x020 * (m) + 0x004))
464 #define CREG_AXI_M_OFT0(m)  ((void __iomem *)(CREG_BASE + 0x020 * (m) + 0x008))
465 #define CREG_AXI_M_OFT1(m)  ((void __iomem *)(CREG_BASE + 0x020 * (m) + 0x00C))
466 #define CREG_AXI_M_UPDT(m)  ((void __iomem *)(CREG_BASE + 0x020 * (m) + 0x014))
467 
468 #define CREG_AXI_M_HS_CORE_BOOT	((void __iomem *)(CREG_BASE + 0x010))
469 
470 #define CREG_PAE	((void __iomem *)(CREG_BASE + 0x180))
471 #define CREG_PAE_UPDT	((void __iomem *)(CREG_BASE + 0x194))
472 
473 void init_memory_bridge(void)
474 {
475 	u32 reg;
476 
477 	/*
478 	 * M_HS_CORE has one unic register - BOOT.
479 	 * We need to clean boot mirror (BOOT[1:0]) bits in them.
480 	 */
481 	reg = readl(CREG_AXI_M_HS_CORE_BOOT) & (~0x3);
482 	writel(reg, CREG_AXI_M_HS_CORE_BOOT);
483 	writel(0x11111111, CREG_AXI_M_SLV0(M_HS_CORE));
484 	writel(0x63111111, CREG_AXI_M_SLV1(M_HS_CORE));
485 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_CORE));
486 	writel(0x0E543210, CREG_AXI_M_OFT1(M_HS_CORE));
487 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_CORE));
488 
489 	writel(0x77777777, CREG_AXI_M_SLV0(M_HS_RTT));
490 	writel(0x77777777, CREG_AXI_M_SLV1(M_HS_RTT));
491 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_RTT));
492 	writel(0x76543210, CREG_AXI_M_OFT1(M_HS_RTT));
493 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_RTT));
494 
495 	writel(0x88888888, CREG_AXI_M_SLV0(M_AXI_TUN));
496 	writel(0x88888888, CREG_AXI_M_SLV1(M_AXI_TUN));
497 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_AXI_TUN));
498 	writel(0x76543210, CREG_AXI_M_OFT1(M_AXI_TUN));
499 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_AXI_TUN));
500 
501 	writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_VIDEO));
502 	writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_VIDEO));
503 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_VIDEO));
504 	writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_VIDEO));
505 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_VIDEO));
506 
507 	writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_AUDIO));
508 	writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_AUDIO));
509 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_AUDIO));
510 	writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_AUDIO));
511 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_AUDIO));
512 
513 	writel(0x77777777, CREG_AXI_M_SLV0(M_USB_HOST));
514 	writel(0x77999999, CREG_AXI_M_SLV1(M_USB_HOST));
515 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_USB_HOST));
516 	writel(0x76DCBA98, CREG_AXI_M_OFT1(M_USB_HOST));
517 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_USB_HOST));
518 
519 	writel(0x77777777, CREG_AXI_M_SLV0(M_ETHERNET));
520 	writel(0x77999999, CREG_AXI_M_SLV1(M_ETHERNET));
521 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_ETHERNET));
522 	writel(0x76DCBA98, CREG_AXI_M_OFT1(M_ETHERNET));
523 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_ETHERNET));
524 
525 	writel(0x77777777, CREG_AXI_M_SLV0(M_SDIO));
526 	writel(0x77999999, CREG_AXI_M_SLV1(M_SDIO));
527 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_SDIO));
528 	writel(0x76DCBA98, CREG_AXI_M_OFT1(M_SDIO));
529 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_SDIO));
530 
531 	writel(0x77777777, CREG_AXI_M_SLV0(M_GPU));
532 	writel(0x77777777, CREG_AXI_M_SLV1(M_GPU));
533 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_GPU));
534 	writel(0x76543210, CREG_AXI_M_OFT1(M_GPU));
535 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_GPU));
536 
537 	writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_0));
538 	writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_0));
539 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_0));
540 	writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_0));
541 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_0));
542 
543 	writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_1));
544 	writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_1));
545 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_1));
546 	writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_1));
547 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_1));
548 
549 	writel(0x00000000, CREG_AXI_M_SLV0(M_DVFS));
550 	writel(0x60000000, CREG_AXI_M_SLV1(M_DVFS));
551 	writel(0x00000000, CREG_AXI_M_OFT0(M_DVFS));
552 	writel(0x00000000, CREG_AXI_M_OFT1(M_DVFS));
553 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DVFS));
554 
555 	writel(0x00000000, CREG_PAE);
556 	writel(UPDATE_VAL, CREG_PAE_UPDT);
557 }
558 
559 static void setup_clocks(void)
560 {
561 	ulong rate;
562 
563 	/* Setup CPU clock */
564 	if (env_common.cpu_freq.set) {
565 		rate = env_common.cpu_freq.val;
566 		soc_clk_ctl("cpu-clk", &rate, CLK_ON | CLK_SET | CLK_MHZ);
567 	}
568 
569 	/* Setup TUN clock */
570 	if (env_common.tun_freq.set) {
571 		rate = env_common.tun_freq.val;
572 		if (rate)
573 			soc_clk_ctl("tun-clk", &rate, CLK_ON | CLK_SET | CLK_MHZ);
574 		else
575 			soc_clk_ctl("tun-clk", NULL, CLK_OFF);
576 	}
577 
578 	if (env_common.axi_freq.set) {
579 		rate = env_common.axi_freq.val;
580 		soc_clk_ctl("axi-clk", &rate, CLK_SET | CLK_ON | CLK_MHZ);
581 	}
582 }
583 
584 static void do_init_cluster(void)
585 {
586 	/*
587 	 * A multi-core ARC HS configuration always includes only one
588 	 * ARC_AUX_NON_VOLATILE_LIMIT register, which is shared by all the
589 	 * cores.
590 	 */
591 	init_cluster_nvlim();
592 }
593 
594 static int check_master_cpu_id(void)
595 {
596 	if (CPU_ID_GET() == MASTER_CPU_ID)
597 		return 0;
598 
599 	pr_err("u-boot runs on non-master cpu with id: %lu\n", CPU_ID_GET());
600 
601 	return -ENOENT;
602 }
603 
604 static noinline int prepare_cpus(void)
605 {
606 	int ret;
607 
608 	ret = check_master_cpu_id();
609 	if (ret)
610 		return ret;
611 
612 	ret = envs_process_and_validate(env_map_common, env_map_core, is_cpu_used);
613 	if (ret)
614 		return ret;
615 
616 	printf("CPU start mask is %#x\n", env_common.core_mask.val);
617 
618 	do_init_slave_cpus();
619 	do_init_master_cpu();
620 	do_init_cluster();
621 
622 	return 0;
623 }
624 
625 static int hsdk_go_run(u32 cpu_start_reg)
626 {
627 	/* Cleanup caches, disable interrupts */
628 	cleanup_before_go();
629 
630 	if (env_common.halt_on_boot)
631 		halt_this_cpu();
632 
633 	/*
634 	 * 3 NOPs after FLAG 1 instruction are no longer required for ARCv2
635 	 * cores but we leave them for gebug purposes.
636 	 */
637 	__builtin_arc_nop();
638 	__builtin_arc_nop();
639 	__builtin_arc_nop();
640 
641 	/* Kick chosen slave CPUs */
642 	writel(cpu_start_reg, (void __iomem *)CREG_CPU_START);
643 
644 	if (is_cpu_used(MASTER_CPU_ID))
645 		((void (*)(void))(env_core.entry[MASTER_CPU_ID].val))();
646 	else
647 		halt_this_cpu();
648 
649 	pr_err("u-boot still runs on cpu [%ld]\n", CPU_ID_GET());
650 
651 	/*
652 	 * We will never return after executing our program if master cpu used
653 	 * otherwise halt master cpu manually.
654 	 */
655 	while (true)
656 		halt_this_cpu();
657 
658 	return 0;
659 }
660 
661 int board_prep_linux(bootm_headers_t *images)
662 {
663 	int ret, ofst;
664 	char mask[15];
665 
666 	ret = envs_read_validate_common(env_map_mask);
667 	if (ret)
668 		return ret;
669 
670 	/* Rollback to default values */
671 	if (!env_common.core_mask.set) {
672 		env_common.core_mask.val = ALL_CPU_MASK;
673 		env_common.core_mask.set = true;
674 	}
675 
676 	printf("CPU start mask is %#x\n", env_common.core_mask.val);
677 
678 	if (!is_cpu_used(MASTER_CPU_ID))
679 		pr_err("ERR: try to launch linux with CPU[0] disabled! It doesn't work for ARC.\n");
680 
681 	/*
682 	 * If we want to launch linux on all CPUs we don't need to patch
683 	 * linux DTB as it is default configuration
684 	 */
685 	if (env_common.core_mask.val == ALL_CPU_MASK)
686 		return 0;
687 
688 	if (!IMAGE_ENABLE_OF_LIBFDT || !images->ft_len) {
689 		pr_err("WARN: core_mask setup will work properly only with external DTB!\n");
690 		return 0;
691 	}
692 
693 	/* patch '/possible-cpus' property according to cpu mask */
694 	ofst = fdt_path_offset(images->ft_addr, "/");
695 	sprintf(mask, "%s%s%s%s",
696 		is_cpu_used(0) ? "0," : "",
697 		is_cpu_used(1) ? "1," : "",
698 		is_cpu_used(2) ? "2," : "",
699 		is_cpu_used(3) ? "3," : "");
700 	ret = fdt_setprop_string(images->ft_addr, ofst, "possible-cpus", mask);
701 	/*
702 	 * If we failed to patch '/possible-cpus' property we don't need break
703 	 * linux loading process: kernel will handle it but linux will print
704 	 * warning like "Timeout: CPU1 FAILED to comeup !!!".
705 	 * So warn here about error, but return 0 like no error had occurred.
706 	 */
707 	if (ret)
708 		pr_err("WARN: failed to patch '/possible-cpus' property, ret=%d\n",
709 		       ret);
710 
711 	return 0;
712 }
713 
714 void board_jump_and_run(ulong entry, int zero, int arch, uint params)
715 {
716 	void (*kernel_entry)(int zero, int arch, uint params);
717 	u32 cpu_start_reg;
718 
719 	kernel_entry = (void (*)(int, int, uint))entry;
720 
721 	/* Prepare CREG_CPU_START for kicking chosen CPUs */
722 	cpu_start_reg = prepare_cpu_ctart_reg();
723 
724 	/* In case of run without hsdk_init */
725 	slave_cpu_set_boot_addr(entry);
726 
727 	/* In case of run with hsdk_init */
728 	for (u32 i = 0; i < NR_CPUS; i++) {
729 		env_core.entry[i].val = entry;
730 		env_core.entry[i].set = true;
731 	}
732 	/* sync cross_cpu struct as we updated core-entry variables */
733 	sync_cross_cpu_data();
734 
735 	/* Kick chosen slave CPUs */
736 	writel(cpu_start_reg, (void __iomem *)CREG_CPU_START);
737 
738 	if (is_cpu_used(0))
739 		kernel_entry(zero, arch, params);
740 }
741 
742 static int hsdk_go_prepare_and_run(void)
743 {
744 	/* Prepare CREG_CPU_START for kicking chosen CPUs */
745 	u32 reg = prepare_cpu_ctart_reg();
746 
747 	if (env_common.halt_on_boot)
748 		printf("CPU will halt before application start, start application with debugger.\n");
749 
750 	return hsdk_go_run(reg);
751 }
752 
753 static int do_hsdk_go(cmd_tbl_t *cmdtp, int flag, int argc, char *const argv[])
754 {
755 	int ret;
756 
757 	/*
758 	 * Check for 'halt' parameter. 'halt' = enter halt-mode just before
759 	 * starting the application; can be used for debug.
760 	 */
761 	if (argc > 1) {
762 		env_common.halt_on_boot = !strcmp(argv[1], "halt");
763 		if (!env_common.halt_on_boot) {
764 			pr_err("Unrecognised parameter: \'%s\'\n", argv[1]);
765 			return CMD_RET_FAILURE;
766 		}
767 	}
768 
769 	ret = check_master_cpu_id();
770 	if (ret)
771 		return ret;
772 
773 	ret = envs_process_and_validate(env_map_mask, env_map_go, is_cpu_used);
774 	if (ret)
775 		return ret;
776 
777 	/* sync cross_cpu struct as we updated core-entry variables */
778 	sync_cross_cpu_data();
779 
780 	ret = hsdk_go_prepare_and_run();
781 
782 	return ret ? CMD_RET_FAILURE : CMD_RET_SUCCESS;
783 }
784 
785 U_BOOT_CMD(
786 	hsdk_go, 3, 0, do_hsdk_go,
787 	"Synopsys HSDK specific command",
788 	"     - Boot stand-alone application on HSDK\n"
789 	"hsdk_go halt - Boot stand-alone application on HSDK, halt CPU just before application run\n"
790 );
791 
792 static int do_hsdk_init(cmd_tbl_t *cmdtp, int flag, int argc, char *const argv[])
793 {
794 	static bool done = false;
795 	int ret;
796 
797 	/* hsdk_init can be run only once */
798 	if (done) {
799 		printf("HSDK HW is already initialized! Please reset the board if you want to change the configuration.\n");
800 		return CMD_RET_FAILURE;
801 	}
802 
803 	ret = prepare_cpus();
804 	if (!ret)
805 		done = true;
806 
807 	return ret ? CMD_RET_FAILURE : CMD_RET_SUCCESS;
808 }
809 
810 U_BOOT_CMD(
811 	hsdk_init, 1, 0, do_hsdk_init,
812 	"Synopsys HSDK specific command",
813 	"- Init HSDK HW\n"
814 );
815 
816 static int do_hsdk_clock_set(cmd_tbl_t *cmdtp, int flag, int argc,
817 			     char *const argv[])
818 {
819 	int ret = 0;
820 
821 	/* Strip off leading subcommand argument */
822 	argc--;
823 	argv++;
824 
825 	envs_cleanup_common(env_map_clock);
826 
827 	if (!argc) {
828 		printf("Set clocks to values specified in environment\n");
829 		ret = envs_read_common(env_map_clock);
830 	} else {
831 		printf("Set clocks to values specified in args\n");
832 		ret = args_envs_enumerate(env_map_clock, 2, argc, argv);
833 	}
834 
835 	if (ret)
836 		return CMD_RET_FAILURE;
837 
838 	ret = envs_validate_common(env_map_clock);
839 	if (ret)
840 		return CMD_RET_FAILURE;
841 
842 	/* Setup clock tree HW */
843 	setup_clocks();
844 
845 	return CMD_RET_SUCCESS;
846 }
847 
848 static int do_hsdk_clock_get(cmd_tbl_t *cmdtp, int flag, int argc,
849 			     char *const argv[])
850 {
851 	ulong rate;
852 
853 	if (soc_clk_ctl("cpu-clk", &rate, CLK_GET | CLK_MHZ))
854 		return CMD_RET_FAILURE;
855 
856 	if (env_set_ulong("cpu_freq", rate))
857 		return CMD_RET_FAILURE;
858 
859 	if (soc_clk_ctl("tun-clk", &rate, CLK_GET | CLK_MHZ))
860 		return CMD_RET_FAILURE;
861 
862 	if (env_set_ulong("tun_freq", rate))
863 		return CMD_RET_FAILURE;
864 
865 	if (soc_clk_ctl("axi-clk", &rate, CLK_GET | CLK_MHZ))
866 		return CMD_RET_FAILURE;
867 
868 	if (env_set_ulong("axi_freq", rate))
869 		return CMD_RET_FAILURE;
870 
871 	printf("Clock values are saved to environment\n");
872 
873 	return CMD_RET_SUCCESS;
874 }
875 
876 static int do_hsdk_clock_print(cmd_tbl_t *cmdtp, int flag, int argc,
877 			       char *const argv[])
878 {
879 	/* Main clocks */
880 	soc_clk_ctl("cpu-clk", NULL, CLK_PRINT | CLK_MHZ);
881 	soc_clk_ctl("tun-clk", NULL, CLK_PRINT | CLK_MHZ);
882 	soc_clk_ctl("axi-clk", NULL, CLK_PRINT | CLK_MHZ);
883 	soc_clk_ctl("ddr-clk", NULL, CLK_PRINT | CLK_MHZ);
884 
885 	return CMD_RET_SUCCESS;
886 }
887 
888 static int do_hsdk_clock_print_all(cmd_tbl_t *cmdtp, int flag, int argc,
889 				   char *const argv[])
890 {
891 	/*
892 	 * NOTE: as of today we don't use some peripherals like HDMI / EBI
893 	 * so we don't want to print their clocks ("hdmi-sys-clk", "hdmi-pll",
894 	 * "hdmi-clk", "ebi-clk"). Nevertheless their clock subsystems is fully
895 	 * functional and we can print their clocks if it is required
896 	 */
897 
898 	/* CPU clock domain */
899 	soc_clk_ctl("cpu-pll", NULL, CLK_PRINT | CLK_MHZ);
900 	soc_clk_ctl("cpu-clk", NULL, CLK_PRINT | CLK_MHZ);
901 	printf("\n");
902 
903 	/* SYS clock domain */
904 	soc_clk_ctl("sys-pll", NULL, CLK_PRINT | CLK_MHZ);
905 	soc_clk_ctl("apb-clk", NULL, CLK_PRINT | CLK_MHZ);
906 	soc_clk_ctl("axi-clk", NULL, CLK_PRINT | CLK_MHZ);
907 	soc_clk_ctl("eth-clk", NULL, CLK_PRINT | CLK_MHZ);
908 	soc_clk_ctl("usb-clk", NULL, CLK_PRINT | CLK_MHZ);
909 	soc_clk_ctl("sdio-clk", NULL, CLK_PRINT | CLK_MHZ);
910 /*	soc_clk_ctl("hdmi-sys-clk", NULL, CLK_PRINT | CLK_MHZ); */
911 	soc_clk_ctl("gfx-core-clk", NULL, CLK_PRINT | CLK_MHZ);
912 	soc_clk_ctl("gfx-dma-clk", NULL, CLK_PRINT | CLK_MHZ);
913 	soc_clk_ctl("gfx-cfg-clk", NULL, CLK_PRINT | CLK_MHZ);
914 	soc_clk_ctl("dmac-core-clk", NULL, CLK_PRINT | CLK_MHZ);
915 	soc_clk_ctl("dmac-cfg-clk", NULL, CLK_PRINT | CLK_MHZ);
916 	soc_clk_ctl("sdio-ref-clk", NULL, CLK_PRINT | CLK_MHZ);
917 	soc_clk_ctl("spi-clk", NULL, CLK_PRINT | CLK_MHZ);
918 	soc_clk_ctl("i2c-clk", NULL, CLK_PRINT | CLK_MHZ);
919 /*	soc_clk_ctl("ebi-clk", NULL, CLK_PRINT | CLK_MHZ); */
920 	soc_clk_ctl("uart-clk", NULL, CLK_PRINT | CLK_MHZ);
921 	printf("\n");
922 
923 	/* DDR clock domain */
924 	soc_clk_ctl("ddr-clk", NULL, CLK_PRINT | CLK_MHZ);
925 	printf("\n");
926 
927 	/* HDMI clock domain */
928 /*	soc_clk_ctl("hdmi-pll", NULL, CLK_PRINT | CLK_MHZ); */
929 /*	soc_clk_ctl("hdmi-clk", NULL, CLK_PRINT | CLK_MHZ); */
930 /*	printf("\n"); */
931 
932 	/* TUN clock domain */
933 	soc_clk_ctl("tun-pll", NULL, CLK_PRINT | CLK_MHZ);
934 	soc_clk_ctl("tun-clk", NULL, CLK_PRINT | CLK_MHZ);
935 	soc_clk_ctl("rom-clk", NULL, CLK_PRINT | CLK_MHZ);
936 	soc_clk_ctl("pwm-clk", NULL, CLK_PRINT | CLK_MHZ);
937 	printf("\n");
938 
939 	return CMD_RET_SUCCESS;
940 }
941 
942 cmd_tbl_t cmd_hsdk_clock[] = {
943 	U_BOOT_CMD_MKENT(set, 3, 0, do_hsdk_clock_set, "", ""),
944 	U_BOOT_CMD_MKENT(get, 3, 0, do_hsdk_clock_get, "", ""),
945 	U_BOOT_CMD_MKENT(print, 4, 0, do_hsdk_clock_print, "", ""),
946 	U_BOOT_CMD_MKENT(print_all, 4, 0, do_hsdk_clock_print_all, "", ""),
947 };
948 
949 static int do_hsdk_clock(cmd_tbl_t *cmdtp, int flag, int argc, char *const argv[])
950 {
951 	cmd_tbl_t *c;
952 
953 	if (argc < 2)
954 		return CMD_RET_USAGE;
955 
956 	/* Strip off leading 'hsdk_clock' command argument */
957 	argc--;
958 	argv++;
959 
960 	c = find_cmd_tbl(argv[0], cmd_hsdk_clock, ARRAY_SIZE(cmd_hsdk_clock));
961 	if (!c)
962 		return CMD_RET_USAGE;
963 
964 	return c->cmd(cmdtp, flag, argc, argv);
965 }
966 
967 U_BOOT_CMD(
968 	hsdk_clock, CONFIG_SYS_MAXARGS, 0, do_hsdk_clock,
969 	"Synopsys HSDK specific clock command",
970 	"set   - Set clock to values specified in environment / command line arguments\n"
971 	"hsdk_clock get   - Save clock values to environment\n"
972 	"hsdk_clock print - Print main clock values to console\n"
973 	"hsdk_clock print_all - Print all clock values to console\n"
974 );
975 
976 /* init calls */
977 int board_early_init_f(void)
978 {
979 	/*
980 	 * Setup AXI apertures unconditionally as we want to have DDR
981 	 * in 0x00000000 region when we are kicking slave cpus.
982 	 */
983 	init_memory_bridge();
984 
985 	return 0;
986 }
987 
988 int board_early_init_r(void)
989 {
990 	/*
991 	 * TODO: Init USB here to be able read environment from USB MSD.
992 	 * It can be done with usb_init() call. We can't do it right now
993 	 * due to brocken USB IP SW reset and lack of USB IP HW reset in
994 	 * linux kernel (if we init USB here we will break USB in linux)
995 	 */
996 
997 	/*
998 	 * Flush all d$ as we want to use uncached area with st.di / ld.di
999 	 * instructions and we don't want to have any dirty line in L1d$ or SL$
1000 	 * in this area. It is enough to flush all d$ once here as we access to
1001 	 * uncached area with regular st (non .di) instruction only when we copy
1002 	 * data during u-boot relocation.
1003 	 */
1004 	flush_dcache_all();
1005 
1006 	printf("Relocation Offset is: %08lx\n", gd->reloc_off);
1007 
1008 	return 0;
1009 }
1010 
1011 int board_late_init(void)
1012 {
1013 	/*
1014 	 * Populate environment with clock frequency values -
1015 	 * run hsdk_clock get callback without uboot command run.
1016 	 */
1017 	do_hsdk_clock_get(NULL, 0, 0, NULL);
1018 
1019 	return 0;
1020 }
1021 
1022 int board_mmc_getcd(struct mmc *mmc)
1023 {
1024 	struct dwmci_host *host = mmc->priv;
1025 
1026 	return !(dwmci_readl(host, DWMCI_CDETECT) & 1);
1027 }
1028 
1029 int board_mmc_init(bd_t *bis)
1030 {
1031 	struct dwmci_host *host = NULL;
1032 
1033 	host = malloc(sizeof(struct dwmci_host));
1034 	if (!host) {
1035 		printf("dwmci_host malloc fail!\n");
1036 		return 1;
1037 	}
1038 
1039 	/*
1040 	 * Switch SDIO external ciu clock divider from default div-by-8 to
1041 	 * minimum possible div-by-2.
1042 	 */
1043 	writel(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *)SDIO_UHS_REG_EXT);
1044 
1045 	memset(host, 0, sizeof(struct dwmci_host));
1046 	host->name = "Synopsys Mobile storage";
1047 	host->ioaddr = (void *)ARC_DWMMC_BASE;
1048 	host->buswidth = 4;
1049 	host->dev_index = 0;
1050 	host->bus_hz = 50000000;
1051 
1052 	add_dwmci(host, host->bus_hz / 2, 400000);
1053 
1054 	return 0;
1055 }
1056 
1057 int checkboard(void)
1058 {
1059 	puts("Board: Synopsys ARC HS Development Kit\n");
1060 	return 0;
1061 };
1062