xref: /openbmc/u-boot/board/synopsys/hsdk/hsdk.c (revision 85231c08)
1 /*
2  * Copyright (C) 2018 Synopsys, Inc. All rights reserved.
3  * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
4  *
5  * SPDX-License-Identifier:	GPL-2.0+
6  */
7 
8 #include <common.h>
9 #include <config.h>
10 #include <linux/printk.h>
11 #include <linux/kernel.h>
12 #include <linux/io.h>
13 #include <asm/arcregs.h>
14 #include <fdt_support.h>
15 #include <dwmmc.h>
16 #include <malloc.h>
17 #include <usb.h>
18 
19 #include "clk-lib.h"
20 #include "env-lib.h"
21 
22 DECLARE_GLOBAL_DATA_PTR;
23 
24 #define ALL_CPU_MASK		GENMASK(NR_CPUS - 1, 0)
25 #define MASTER_CPU_ID		0
26 #define APERTURE_SHIFT		28
27 #define NO_CCM			0x10
28 #define SLAVE_CPU_READY		0x12345678
29 #define BOOTSTAGE_1		1 /* after SP, FP setup, before HW init */
30 #define BOOTSTAGE_2		2 /* after HW init, before self halt */
31 #define BOOTSTAGE_3		3 /* after self halt */
32 #define BOOTSTAGE_4		4 /* before app launch */
33 #define BOOTSTAGE_5		5 /* after app launch, unreachable */
34 
35 #define RESET_VECTOR_ADDR	0x0
36 
37 #define CREG_BASE		(ARC_PERIPHERAL_BASE + 0x1000)
38 #define CREG_CPU_START		(CREG_BASE + 0x400)
39 #define CREG_CPU_START_MASK	0xF
40 
41 #define SDIO_BASE		(ARC_PERIPHERAL_BASE + 0xA000)
42 #define SDIO_UHS_REG_EXT	(SDIO_BASE + 0x108)
43 #define SDIO_UHS_REG_EXT_DIV_2	(2 << 30)
44 
45 /* Uncached access macros */
46 #define arc_read_uncached_32(ptr)	\
47 ({					\
48 	unsigned int __ret;		\
49 	__asm__ __volatile__(		\
50 	"	ld.di %0, [%1]	\n"	\
51 	: "=r"(__ret)			\
52 	: "r"(ptr));			\
53 	__ret;				\
54 })
55 
56 #define arc_write_uncached_32(ptr, data)\
57 ({					\
58 	__asm__ __volatile__(		\
59 	"	st.di %0, [%1]	\n"	\
60 	:				\
61 	: "r"(data), "r"(ptr));		\
62 })
63 
64 struct hsdk_env_core_ctl {
65 	u32_env entry[NR_CPUS];
66 	u32_env iccm[NR_CPUS];
67 	u32_env dccm[NR_CPUS];
68 };
69 
70 struct hsdk_env_common_ctl {
71 	bool halt_on_boot;
72 	u32_env core_mask;
73 	u32_env cpu_freq;
74 	u32_env axi_freq;
75 	u32_env tun_freq;
76 	u32_env nvlim;
77 	u32_env icache;
78 	u32_env dcache;
79 };
80 
81 /*
82  * Uncached cross-cpu structure. All CPUs must access to this structure fields
83  * only with arc_read_uncached_32() / arc_write_uncached_32() accessors (which
84  * implement ld.di / st.di instructions). Simultaneous cached and uncached
85  * access to this area will lead to data loss.
86  * We flush all data caches in board_early_init_r() as we don't want to have
87  * any dirty line in L1d$ or SL$ in this area.
88  */
89 struct hsdk_cross_cpu {
90 	/* slave CPU ready flag */
91 	u32 ready_flag;
92 	/* address of the area, which can be used for stack by slave CPU */
93 	u32 stack_ptr;
94 	/* slave CPU status - bootstage number */
95 	s32 status[NR_CPUS];
96 
97 	/*
98 	 * Slave CPU data - it is copy of corresponding fields in
99 	 * hsdk_env_core_ctl and hsdk_env_common_ctl structures which are
100 	 * required for slave CPUs initialization.
101 	 * This fields can be populated by copying from hsdk_env_core_ctl
102 	 * and hsdk_env_common_ctl structures with sync_cross_cpu_data()
103 	 * function.
104 	 */
105 	u32 entry[NR_CPUS];
106 	u32 iccm[NR_CPUS];
107 	u32 dccm[NR_CPUS];
108 
109 	u32 core_mask;
110 	u32 icache;
111 	u32 dcache;
112 
113 	u8 cache_padding[ARCH_DMA_MINALIGN];
114 } __aligned(ARCH_DMA_MINALIGN);
115 
116 /* Place for slave CPUs temporary stack */
117 static u32 slave_stack[256 * NR_CPUS] __aligned(ARCH_DMA_MINALIGN);
118 
119 static struct hsdk_env_common_ctl env_common = {};
120 static struct hsdk_env_core_ctl env_core = {};
121 static struct hsdk_cross_cpu cross_cpu_data;
122 
123 static const struct env_map_common env_map_common[] = {
124 	{ "core_mask",	ENV_HEX, true,	0x1, 0xF,	&env_common.core_mask },
125 	{ "non_volatile_limit", ENV_HEX, true, 0, 0xF,	&env_common.nvlim },
126 	{ "icache_ena",	ENV_HEX, true,	0, 1,		&env_common.icache },
127 	{ "dcache_ena",	ENV_HEX, true,	0, 1,		&env_common.dcache },
128 	{}
129 };
130 
131 static const struct env_map_common env_map_clock[] = {
132 	{ "cpu_freq",	ENV_DEC, false,	100, 1000,	&env_common.cpu_freq },
133 	{ "axi_freq",	ENV_DEC, false,	200, 800,	&env_common.axi_freq },
134 	{ "tun_freq",	ENV_DEC, false,	0, 150,		&env_common.tun_freq },
135 	{}
136 };
137 
138 static const struct env_map_percpu env_map_core[] = {
139 	{ "core_iccm", ENV_HEX, true, {NO_CCM, 0, NO_CCM, 0}, {NO_CCM, 0xF, NO_CCM, 0xF}, &env_core.iccm },
140 	{ "core_dccm", ENV_HEX, true, {NO_CCM, 0, NO_CCM, 0}, {NO_CCM, 0xF, NO_CCM, 0xF}, &env_core.dccm },
141 	{}
142 };
143 
144 static const struct env_map_common env_map_mask[] = {
145 	{ "core_mask",	ENV_HEX, false,	0x1, 0xF,	&env_common.core_mask },
146 	{}
147 };
148 
149 static const struct env_map_percpu env_map_go[] = {
150 	{ "core_entry", ENV_HEX, true, {0, 0, 0, 0}, {U32_MAX, U32_MAX, U32_MAX, U32_MAX}, &env_core.entry },
151 	{}
152 };
153 
154 static void sync_cross_cpu_data(void)
155 {
156 	u32 value;
157 
158 	for (u32 i = 0; i < NR_CPUS; i++) {
159 		value = env_core.entry[i].val;
160 		arc_write_uncached_32(&cross_cpu_data.entry[i], value);
161 	}
162 
163 	for (u32 i = 0; i < NR_CPUS; i++) {
164 		value = env_core.iccm[i].val;
165 		arc_write_uncached_32(&cross_cpu_data.iccm[i], value);
166 	}
167 
168 	for (u32 i = 0; i < NR_CPUS; i++) {
169 		value = env_core.dccm[i].val;
170 		arc_write_uncached_32(&cross_cpu_data.dccm[i], value);
171 	}
172 
173 	value = env_common.core_mask.val;
174 	arc_write_uncached_32(&cross_cpu_data.core_mask, value);
175 
176 	value = env_common.icache.val;
177 	arc_write_uncached_32(&cross_cpu_data.icache, value);
178 
179 	value = env_common.dcache.val;
180 	arc_write_uncached_32(&cross_cpu_data.dcache, value);
181 }
182 
183 /* Can be used only on master CPU */
184 static bool is_cpu_used(u32 cpu_id)
185 {
186 	return !!(env_common.core_mask.val & BIT(cpu_id));
187 }
188 
189 /* TODO: add ICCM BCR and DCCM BCR runtime check */
190 static void init_slave_cpu_func(u32 core)
191 {
192 	u32 val;
193 
194 	/* Remap ICCM to another memory region if it exists */
195 	val = arc_read_uncached_32(&cross_cpu_data.iccm[core]);
196 	if (val != NO_CCM)
197 		write_aux_reg(ARC_AUX_ICCM_BASE, val << APERTURE_SHIFT);
198 
199 	/* Remap DCCM to another memory region if it exists */
200 	val = arc_read_uncached_32(&cross_cpu_data.dccm[core]);
201 	if (val != NO_CCM)
202 		write_aux_reg(ARC_AUX_DCCM_BASE, val << APERTURE_SHIFT);
203 
204 	if (arc_read_uncached_32(&cross_cpu_data.icache))
205 		icache_enable();
206 	else
207 		icache_disable();
208 
209 	if (arc_read_uncached_32(&cross_cpu_data.dcache))
210 		dcache_enable();
211 	else
212 		dcache_disable();
213 }
214 
215 static void init_cluster_nvlim(void)
216 {
217 	u32 val = env_common.nvlim.val << APERTURE_SHIFT;
218 
219 	flush_dcache_all();
220 	write_aux_reg(ARC_AUX_NON_VOLATILE_LIMIT, val);
221 	write_aux_reg(AUX_AUX_CACHE_LIMIT, val);
222 	flush_n_invalidate_dcache_all();
223 }
224 
225 static void init_master_icache(void)
226 {
227 	if (icache_status()) {
228 		/* I$ is enabled - we need to disable it */
229 		if (!env_common.icache.val)
230 			icache_disable();
231 	} else {
232 		/* I$ is disabled - we need to enable it */
233 		if (env_common.icache.val) {
234 			icache_enable();
235 
236 			/* invalidate I$ right after enable */
237 			invalidate_icache_all();
238 		}
239 	}
240 }
241 
242 static void init_master_dcache(void)
243 {
244 	if (dcache_status()) {
245 		/* D$ is enabled - we need to disable it */
246 		if (!env_common.dcache.val)
247 			dcache_disable();
248 	} else {
249 		/* D$ is disabled - we need to enable it */
250 		if (env_common.dcache.val)
251 			dcache_enable();
252 
253 		/* TODO: probably we need ti invalidate D$ right after enable */
254 	}
255 }
256 
257 static int cleanup_before_go(void)
258 {
259 	disable_interrupts();
260 	sync_n_cleanup_cache_all();
261 
262 	return 0;
263 }
264 
265 void slave_cpu_set_boot_addr(u32 addr)
266 {
267 	/* All cores have reset vector pointing to 0 */
268 	writel(addr, (void __iomem *)RESET_VECTOR_ADDR);
269 
270 	/* Make sure other cores see written value in memory */
271 	sync_n_cleanup_cache_all();
272 }
273 
274 static inline void halt_this_cpu(void)
275 {
276 	__builtin_arc_flag(1);
277 }
278 
279 static void smp_kick_cpu_x(u32 cpu_id)
280 {
281 	int cmd = readl((void __iomem *)CREG_CPU_START);
282 
283 	if (cpu_id > NR_CPUS)
284 		return;
285 
286 	cmd &= ~CREG_CPU_START_MASK;
287 	cmd |= (1 << cpu_id);
288 	writel(cmd, (void __iomem *)CREG_CPU_START);
289 }
290 
291 static u32 prepare_cpu_ctart_reg(void)
292 {
293 	int cmd = readl((void __iomem *)CREG_CPU_START);
294 
295 	cmd &= ~CREG_CPU_START_MASK;
296 
297 	return cmd | env_common.core_mask.val;
298 }
299 
300 /* slave CPU entry for configuration */
301 __attribute__((naked, noreturn, flatten)) noinline void hsdk_core_init_f(void)
302 {
303 	__asm__ __volatile__(
304 		"ld.di	r8,	[%0]\n"
305 		"mov	%%sp,	r8\n"
306 		"mov	%%fp,	%%sp\n"
307 		: /* no output */
308 		: "r" (&cross_cpu_data.stack_ptr));
309 
310 	invalidate_icache_all();
311 
312 	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], BOOTSTAGE_1);
313 	init_slave_cpu_func(CPU_ID_GET());
314 
315 	arc_write_uncached_32(&cross_cpu_data.ready_flag, SLAVE_CPU_READY);
316 	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], BOOTSTAGE_2);
317 
318 	/* Halt the processor until the master kick us again */
319 	halt_this_cpu();
320 
321 	/*
322 	 * 3 NOPs after FLAG 1 instruction are no longer required for ARCv2
323 	 * cores but we leave them for gebug purposes.
324 	 */
325 	__builtin_arc_nop();
326 	__builtin_arc_nop();
327 	__builtin_arc_nop();
328 
329 	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], BOOTSTAGE_3);
330 
331 	/* get the updated entry - invalidate i$ */
332 	invalidate_icache_all();
333 
334 	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], BOOTSTAGE_4);
335 
336 	/* Run our program */
337 	((void (*)(void))(arc_read_uncached_32(&cross_cpu_data.entry[CPU_ID_GET()])))();
338 
339 	/* This bootstage is unreachable as we don't return from app we launch */
340 	arc_write_uncached_32(&cross_cpu_data.status[CPU_ID_GET()], BOOTSTAGE_5);
341 
342 	/* Something went terribly wrong */
343 	while (true)
344 		halt_this_cpu();
345 }
346 
347 static void clear_cross_cpu_data(void)
348 {
349 	arc_write_uncached_32(&cross_cpu_data.ready_flag, 0);
350 	arc_write_uncached_32(&cross_cpu_data.stack_ptr, 0);
351 
352 	for (u32 i = 0; i < NR_CPUS; i++)
353 		arc_write_uncached_32(&cross_cpu_data.status[i], 0);
354 }
355 
356 static noinline void do_init_slave_cpu(u32 cpu_id)
357 {
358 	/* attempts number for check clave CPU ready_flag */
359 	u32 attempts = 100;
360 	u32 stack_ptr = (u32)(slave_stack + (64 * cpu_id));
361 
362 	if (cpu_id >= NR_CPUS)
363 		return;
364 
365 	arc_write_uncached_32(&cross_cpu_data.ready_flag, 0);
366 
367 	/* Use global unique place for each slave cpu stack */
368 	arc_write_uncached_32(&cross_cpu_data.stack_ptr, stack_ptr);
369 
370 	debug("CPU %u: stack pool base: %p\n", cpu_id, slave_stack);
371 	debug("CPU %u: current slave stack base: %x\n", cpu_id, stack_ptr);
372 	slave_cpu_set_boot_addr((u32)hsdk_core_init_f);
373 
374 	smp_kick_cpu_x(cpu_id);
375 
376 	debug("CPU %u: cross-cpu flag: %x [before timeout]\n", cpu_id,
377 	      arc_read_uncached_32(&cross_cpu_data.ready_flag));
378 
379 	while (!arc_read_uncached_32(&cross_cpu_data.ready_flag) && attempts--)
380 		mdelay(10);
381 
382 	/* Just to be sure that slave cpu is halted after it set ready_flag */
383 	mdelay(20);
384 
385 	/*
386 	 * Only print error here if we reach timeout as there is no option to
387 	 * halt slave cpu (or check that slave cpu is halted)
388 	 */
389 	if (!attempts)
390 		pr_err("CPU %u is not responding after init!\n", cpu_id);
391 
392 	/* Check current stage of slave cpu */
393 	if (arc_read_uncached_32(&cross_cpu_data.status[cpu_id]) != BOOTSTAGE_2)
394 		pr_err("CPU %u status is unexpected: %d\n", cpu_id,
395 		       arc_read_uncached_32(&cross_cpu_data.status[cpu_id]));
396 
397 	debug("CPU %u: cross-cpu flag: %x [after timeout]\n", cpu_id,
398 	      arc_read_uncached_32(&cross_cpu_data.ready_flag));
399 	debug("CPU %u: status: %d [after timeout]\n", cpu_id,
400 	      arc_read_uncached_32(&cross_cpu_data.status[cpu_id]));
401 }
402 
403 static void do_init_slave_cpus(void)
404 {
405 	clear_cross_cpu_data();
406 	sync_cross_cpu_data();
407 
408 	debug("cross_cpu_data location: %#x\n", (u32)&cross_cpu_data);
409 
410 	for (u32 i = MASTER_CPU_ID + 1; i < NR_CPUS; i++)
411 		if (is_cpu_used(i))
412 			do_init_slave_cpu(i);
413 }
414 
415 static void do_init_master_cpu(void)
416 {
417 	/*
418 	 * Setup master caches even if master isn't used as we want to use
419 	 * same cache configuration on all running CPUs
420 	 */
421 	init_master_icache();
422 	init_master_dcache();
423 }
424 
425 enum hsdk_axi_masters {
426 	M_HS_CORE = 0,
427 	M_HS_RTT,
428 	M_AXI_TUN,
429 	M_HDMI_VIDEO,
430 	M_HDMI_AUDIO,
431 	M_USB_HOST,
432 	M_ETHERNET,
433 	M_SDIO,
434 	M_GPU,
435 	M_DMAC_0,
436 	M_DMAC_1,
437 	M_DVFS
438 };
439 
440 #define UPDATE_VAL	1
441 
442 /*
443  * m	master		AXI_M_m_SLV0	AXI_M_m_SLV1	AXI_M_m_OFFSET0	AXI_M_m_OFFSET1
444  * 0	HS (CBU)	0x11111111	0x63111111	0xFEDCBA98	0x0E543210
445  * 1	HS (RTT)	0x77777777	0x77777777	0xFEDCBA98	0x76543210
446  * 2	AXI Tunnel	0x88888888	0x88888888	0xFEDCBA98	0x76543210
447  * 3	HDMI-VIDEO	0x77777777	0x77777777	0xFEDCBA98	0x76543210
448  * 4	HDMI-ADUIO	0x77777777	0x77777777	0xFEDCBA98	0x76543210
449  * 5	USB-HOST	0x77777777	0x77999999	0xFEDCBA98	0x76DCBA98
450  * 6	ETHERNET	0x77777777	0x77999999	0xFEDCBA98	0x76DCBA98
451  * 7	SDIO		0x77777777	0x77999999	0xFEDCBA98	0x76DCBA98
452  * 8	GPU		0x77777777	0x77777777	0xFEDCBA98	0x76543210
453  * 9	DMAC (port #1)	0x77777777	0x77777777	0xFEDCBA98	0x76543210
454  * 10	DMAC (port #2)	0x77777777	0x77777777	0xFEDCBA98	0x76543210
455  * 11	DVFS		0x00000000	0x60000000	0x00000000	0x00000000
456  *
457  * Please read ARC HS Development IC Specification, section 17.2 for more
458  * information about apertures configuration.
459  * NOTE: we intentionally modify default settings in U-boot. Default settings
460  * are specified in "Table 111 CREG Address Decoder register reset values".
461  */
462 
463 #define CREG_AXI_M_SLV0(m)  ((void __iomem *)(CREG_BASE + 0x020 * (m)))
464 #define CREG_AXI_M_SLV1(m)  ((void __iomem *)(CREG_BASE + 0x020 * (m) + 0x004))
465 #define CREG_AXI_M_OFT0(m)  ((void __iomem *)(CREG_BASE + 0x020 * (m) + 0x008))
466 #define CREG_AXI_M_OFT1(m)  ((void __iomem *)(CREG_BASE + 0x020 * (m) + 0x00C))
467 #define CREG_AXI_M_UPDT(m)  ((void __iomem *)(CREG_BASE + 0x020 * (m) + 0x014))
468 
469 #define CREG_AXI_M_HS_CORE_BOOT	((void __iomem *)(CREG_BASE + 0x010))
470 
471 #define CREG_PAE	((void __iomem *)(CREG_BASE + 0x180))
472 #define CREG_PAE_UPDT	((void __iomem *)(CREG_BASE + 0x194))
473 
474 void init_memory_bridge(void)
475 {
476 	u32 reg;
477 
478 	/*
479 	 * M_HS_CORE has one unic register - BOOT.
480 	 * We need to clean boot mirror (BOOT[1:0]) bits in them.
481 	 */
482 	reg = readl(CREG_AXI_M_HS_CORE_BOOT) & (~0x3);
483 	writel(reg, CREG_AXI_M_HS_CORE_BOOT);
484 	writel(0x11111111, CREG_AXI_M_SLV0(M_HS_CORE));
485 	writel(0x63111111, CREG_AXI_M_SLV1(M_HS_CORE));
486 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_CORE));
487 	writel(0x0E543210, CREG_AXI_M_OFT1(M_HS_CORE));
488 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_CORE));
489 
490 	writel(0x77777777, CREG_AXI_M_SLV0(M_HS_RTT));
491 	writel(0x77777777, CREG_AXI_M_SLV1(M_HS_RTT));
492 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_RTT));
493 	writel(0x76543210, CREG_AXI_M_OFT1(M_HS_RTT));
494 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_RTT));
495 
496 	writel(0x88888888, CREG_AXI_M_SLV0(M_AXI_TUN));
497 	writel(0x88888888, CREG_AXI_M_SLV1(M_AXI_TUN));
498 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_AXI_TUN));
499 	writel(0x76543210, CREG_AXI_M_OFT1(M_AXI_TUN));
500 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_AXI_TUN));
501 
502 	writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_VIDEO));
503 	writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_VIDEO));
504 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_VIDEO));
505 	writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_VIDEO));
506 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_VIDEO));
507 
508 	writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_AUDIO));
509 	writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_AUDIO));
510 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_AUDIO));
511 	writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_AUDIO));
512 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_AUDIO));
513 
514 	writel(0x77777777, CREG_AXI_M_SLV0(M_USB_HOST));
515 	writel(0x77999999, CREG_AXI_M_SLV1(M_USB_HOST));
516 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_USB_HOST));
517 	writel(0x76DCBA98, CREG_AXI_M_OFT1(M_USB_HOST));
518 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_USB_HOST));
519 
520 	writel(0x77777777, CREG_AXI_M_SLV0(M_ETHERNET));
521 	writel(0x77999999, CREG_AXI_M_SLV1(M_ETHERNET));
522 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_ETHERNET));
523 	writel(0x76DCBA98, CREG_AXI_M_OFT1(M_ETHERNET));
524 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_ETHERNET));
525 
526 	writel(0x77777777, CREG_AXI_M_SLV0(M_SDIO));
527 	writel(0x77999999, CREG_AXI_M_SLV1(M_SDIO));
528 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_SDIO));
529 	writel(0x76DCBA98, CREG_AXI_M_OFT1(M_SDIO));
530 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_SDIO));
531 
532 	writel(0x77777777, CREG_AXI_M_SLV0(M_GPU));
533 	writel(0x77777777, CREG_AXI_M_SLV1(M_GPU));
534 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_GPU));
535 	writel(0x76543210, CREG_AXI_M_OFT1(M_GPU));
536 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_GPU));
537 
538 	writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_0));
539 	writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_0));
540 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_0));
541 	writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_0));
542 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_0));
543 
544 	writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_1));
545 	writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_1));
546 	writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_1));
547 	writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_1));
548 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_1));
549 
550 	writel(0x00000000, CREG_AXI_M_SLV0(M_DVFS));
551 	writel(0x60000000, CREG_AXI_M_SLV1(M_DVFS));
552 	writel(0x00000000, CREG_AXI_M_OFT0(M_DVFS));
553 	writel(0x00000000, CREG_AXI_M_OFT1(M_DVFS));
554 	writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DVFS));
555 
556 	writel(0x00000000, CREG_PAE);
557 	writel(UPDATE_VAL, CREG_PAE_UPDT);
558 }
559 
560 static void setup_clocks(void)
561 {
562 	ulong rate;
563 
564 	/* Setup CPU clock */
565 	if (env_common.cpu_freq.set) {
566 		rate = env_common.cpu_freq.val;
567 		soc_clk_ctl("cpu-clk", &rate, CLK_ON | CLK_SET | CLK_MHZ);
568 	}
569 
570 	/* Setup TUN clock */
571 	if (env_common.tun_freq.set) {
572 		rate = env_common.tun_freq.val;
573 		if (rate)
574 			soc_clk_ctl("tun-clk", &rate, CLK_ON | CLK_SET | CLK_MHZ);
575 		else
576 			soc_clk_ctl("tun-clk", NULL, CLK_OFF);
577 	}
578 
579 	if (env_common.axi_freq.set) {
580 		rate = env_common.axi_freq.val;
581 		soc_clk_ctl("axi-clk", &rate, CLK_SET | CLK_ON | CLK_MHZ);
582 	}
583 }
584 
585 static void do_init_cluster(void)
586 {
587 	/*
588 	 * A multi-core ARC HS configuration always includes only one
589 	 * ARC_AUX_NON_VOLATILE_LIMIT register, which is shared by all the
590 	 * cores.
591 	 */
592 	init_cluster_nvlim();
593 }
594 
595 static int check_master_cpu_id(void)
596 {
597 	if (CPU_ID_GET() == MASTER_CPU_ID)
598 		return 0;
599 
600 	pr_err("u-boot runs on non-master cpu with id: %lu\n", CPU_ID_GET());
601 
602 	return -ENOENT;
603 }
604 
605 static noinline int prepare_cpus(void)
606 {
607 	int ret;
608 
609 	ret = check_master_cpu_id();
610 	if (ret)
611 		return ret;
612 
613 	ret = envs_process_and_validate(env_map_common, env_map_core, is_cpu_used);
614 	if (ret)
615 		return ret;
616 
617 	printf("CPU start mask is %#x\n", env_common.core_mask.val);
618 
619 	do_init_slave_cpus();
620 	do_init_master_cpu();
621 	do_init_cluster();
622 
623 	return 0;
624 }
625 
626 static int hsdk_go_run(u32 cpu_start_reg)
627 {
628 	/* Cleanup caches, disable interrupts */
629 	cleanup_before_go();
630 
631 	if (env_common.halt_on_boot)
632 		halt_this_cpu();
633 
634 	/*
635 	 * 3 NOPs after FLAG 1 instruction are no longer required for ARCv2
636 	 * cores but we leave them for gebug purposes.
637 	 */
638 	__builtin_arc_nop();
639 	__builtin_arc_nop();
640 	__builtin_arc_nop();
641 
642 	/* Kick chosen slave CPUs */
643 	writel(cpu_start_reg, (void __iomem *)CREG_CPU_START);
644 
645 	if (is_cpu_used(MASTER_CPU_ID))
646 		((void (*)(void))(env_core.entry[MASTER_CPU_ID].val))();
647 	else
648 		halt_this_cpu();
649 
650 	pr_err("u-boot still runs on cpu [%ld]\n", CPU_ID_GET());
651 
652 	/*
653 	 * We will never return after executing our program if master cpu used
654 	 * otherwise halt master cpu manually.
655 	 */
656 	while (true)
657 		halt_this_cpu();
658 
659 	return 0;
660 }
661 
662 int board_prep_linux(bootm_headers_t *images)
663 {
664 	int ret, ofst;
665 	char mask[15];
666 
667 	ret = envs_read_validate_common(env_map_mask);
668 	if (ret)
669 		return ret;
670 
671 	/* Rollback to default values */
672 	if (!env_common.core_mask.set) {
673 		env_common.core_mask.val = ALL_CPU_MASK;
674 		env_common.core_mask.set = true;
675 	}
676 
677 	printf("CPU start mask is %#x\n", env_common.core_mask.val);
678 
679 	if (!is_cpu_used(MASTER_CPU_ID))
680 		pr_err("ERR: try to launch linux with CPU[0] disabled! It doesn't work for ARC.\n");
681 
682 	/*
683 	 * If we want to launch linux on all CPUs we don't need to patch
684 	 * linux DTB as it is default configuration
685 	 */
686 	if (env_common.core_mask.val == ALL_CPU_MASK)
687 		return 0;
688 
689 	if (!IMAGE_ENABLE_OF_LIBFDT || !images->ft_len) {
690 		pr_err("WARN: core_mask setup will work properly only with external DTB!\n");
691 		return 0;
692 	}
693 
694 	/* patch '/possible-cpus' property according to cpu mask */
695 	ofst = fdt_path_offset(images->ft_addr, "/");
696 	sprintf(mask, "%s%s%s%s",
697 		is_cpu_used(0) ? "0," : "",
698 		is_cpu_used(1) ? "1," : "",
699 		is_cpu_used(2) ? "2," : "",
700 		is_cpu_used(3) ? "3," : "");
701 	ret = fdt_setprop_string(images->ft_addr, ofst, "possible-cpus", mask);
702 	/*
703 	 * If we failed to patch '/possible-cpus' property we don't need break
704 	 * linux loading process: kernel will handle it but linux will print
705 	 * warning like "Timeout: CPU1 FAILED to comeup !!!".
706 	 * So warn here about error, but return 0 like no error had occurred.
707 	 */
708 	if (ret)
709 		pr_err("WARN: failed to patch '/possible-cpus' property, ret=%d\n",
710 		       ret);
711 
712 	return 0;
713 }
714 
715 void board_jump_and_run(ulong entry, int zero, int arch, uint params)
716 {
717 	void (*kernel_entry)(int zero, int arch, uint params);
718 	u32 cpu_start_reg;
719 
720 	kernel_entry = (void (*)(int, int, uint))entry;
721 
722 	/* Prepare CREG_CPU_START for kicking chosen CPUs */
723 	cpu_start_reg = prepare_cpu_ctart_reg();
724 
725 	/* In case of run without hsdk_init */
726 	slave_cpu_set_boot_addr(entry);
727 
728 	/* In case of run with hsdk_init */
729 	for (u32 i = 0; i < NR_CPUS; i++) {
730 		env_core.entry[i].val = entry;
731 		env_core.entry[i].set = true;
732 	}
733 	/* sync cross_cpu struct as we updated core-entry variables */
734 	sync_cross_cpu_data();
735 
736 	/* Kick chosen slave CPUs */
737 	writel(cpu_start_reg, (void __iomem *)CREG_CPU_START);
738 
739 	if (is_cpu_used(0))
740 		kernel_entry(zero, arch, params);
741 }
742 
743 static int hsdk_go_prepare_and_run(void)
744 {
745 	/* Prepare CREG_CPU_START for kicking chosen CPUs */
746 	u32 reg = prepare_cpu_ctart_reg();
747 
748 	if (env_common.halt_on_boot)
749 		printf("CPU will halt before application start, start application with debugger.\n");
750 
751 	return hsdk_go_run(reg);
752 }
753 
754 static int do_hsdk_go(cmd_tbl_t *cmdtp, int flag, int argc, char *const argv[])
755 {
756 	int ret;
757 
758 	/*
759 	 * Check for 'halt' parameter. 'halt' = enter halt-mode just before
760 	 * starting the application; can be used for debug.
761 	 */
762 	if (argc > 1) {
763 		env_common.halt_on_boot = !strcmp(argv[1], "halt");
764 		if (!env_common.halt_on_boot) {
765 			pr_err("Unrecognised parameter: \'%s\'\n", argv[1]);
766 			return CMD_RET_FAILURE;
767 		}
768 	}
769 
770 	ret = check_master_cpu_id();
771 	if (ret)
772 		return ret;
773 
774 	ret = envs_process_and_validate(env_map_mask, env_map_go, is_cpu_used);
775 	if (ret)
776 		return ret;
777 
778 	/* sync cross_cpu struct as we updated core-entry variables */
779 	sync_cross_cpu_data();
780 
781 	ret = hsdk_go_prepare_and_run();
782 
783 	return ret ? CMD_RET_FAILURE : CMD_RET_SUCCESS;
784 }
785 
786 U_BOOT_CMD(
787 	hsdk_go, 3, 0, do_hsdk_go,
788 	"Synopsys HSDK specific command",
789 	"     - Boot stand-alone application on HSDK\n"
790 	"hsdk_go halt - Boot stand-alone application on HSDK, halt CPU just before application run\n"
791 );
792 
793 static int do_hsdk_init(cmd_tbl_t *cmdtp, int flag, int argc, char *const argv[])
794 {
795 	static bool done = false;
796 	int ret;
797 
798 	/* hsdk_init can be run only once */
799 	if (done) {
800 		printf("HSDK HW is already initialized! Please reset the board if you want to change the configuration.\n");
801 		return CMD_RET_FAILURE;
802 	}
803 
804 	ret = prepare_cpus();
805 	if (!ret)
806 		done = true;
807 
808 	return ret ? CMD_RET_FAILURE : CMD_RET_SUCCESS;
809 }
810 
811 U_BOOT_CMD(
812 	hsdk_init, 1, 0, do_hsdk_init,
813 	"Synopsys HSDK specific command",
814 	"- Init HSDK HW\n"
815 );
816 
817 static int do_hsdk_clock_set(cmd_tbl_t *cmdtp, int flag, int argc,
818 			     char *const argv[])
819 {
820 	int ret = 0;
821 
822 	/* Strip off leading subcommand argument */
823 	argc--;
824 	argv++;
825 
826 	envs_cleanup_common(env_map_clock);
827 
828 	if (!argc) {
829 		printf("Set clocks to values specified in environment\n");
830 		ret = envs_read_common(env_map_clock);
831 	} else {
832 		printf("Set clocks to values specified in args\n");
833 		ret = args_envs_enumerate(env_map_clock, 2, argc, argv);
834 	}
835 
836 	if (ret)
837 		return CMD_RET_FAILURE;
838 
839 	ret = envs_validate_common(env_map_clock);
840 	if (ret)
841 		return CMD_RET_FAILURE;
842 
843 	/* Setup clock tree HW */
844 	setup_clocks();
845 
846 	return CMD_RET_SUCCESS;
847 }
848 
849 static int do_hsdk_clock_get(cmd_tbl_t *cmdtp, int flag, int argc,
850 			     char *const argv[])
851 {
852 	ulong rate;
853 
854 	if (soc_clk_ctl("cpu-clk", &rate, CLK_GET | CLK_MHZ))
855 		return CMD_RET_FAILURE;
856 
857 	if (env_set_ulong("cpu_freq", rate))
858 		return CMD_RET_FAILURE;
859 
860 	if (soc_clk_ctl("tun-clk", &rate, CLK_GET | CLK_MHZ))
861 		return CMD_RET_FAILURE;
862 
863 	if (env_set_ulong("tun_freq", rate))
864 		return CMD_RET_FAILURE;
865 
866 	if (soc_clk_ctl("axi-clk", &rate, CLK_GET | CLK_MHZ))
867 		return CMD_RET_FAILURE;
868 
869 	if (env_set_ulong("axi_freq", rate))
870 		return CMD_RET_FAILURE;
871 
872 	printf("Clock values are saved to environment\n");
873 
874 	return CMD_RET_SUCCESS;
875 }
876 
877 static int do_hsdk_clock_print(cmd_tbl_t *cmdtp, int flag, int argc,
878 			       char *const argv[])
879 {
880 	/* Main clocks */
881 	soc_clk_ctl("cpu-clk", NULL, CLK_PRINT | CLK_MHZ);
882 	soc_clk_ctl("tun-clk", NULL, CLK_PRINT | CLK_MHZ);
883 	soc_clk_ctl("axi-clk", NULL, CLK_PRINT | CLK_MHZ);
884 	soc_clk_ctl("ddr-clk", NULL, CLK_PRINT | CLK_MHZ);
885 
886 	return CMD_RET_SUCCESS;
887 }
888 
889 static int do_hsdk_clock_print_all(cmd_tbl_t *cmdtp, int flag, int argc,
890 				   char *const argv[])
891 {
892 	/*
893 	 * NOTE: as of today we don't use some peripherals like HDMI / EBI
894 	 * so we don't want to print their clocks ("hdmi-sys-clk", "hdmi-pll",
895 	 * "hdmi-clk", "ebi-clk"). Nevertheless their clock subsystems is fully
896 	 * functional and we can print their clocks if it is required
897 	 */
898 
899 	/* CPU clock domain */
900 	soc_clk_ctl("cpu-pll", NULL, CLK_PRINT | CLK_MHZ);
901 	soc_clk_ctl("cpu-clk", NULL, CLK_PRINT | CLK_MHZ);
902 	printf("\n");
903 
904 	/* SYS clock domain */
905 	soc_clk_ctl("sys-pll", NULL, CLK_PRINT | CLK_MHZ);
906 	soc_clk_ctl("apb-clk", NULL, CLK_PRINT | CLK_MHZ);
907 	soc_clk_ctl("axi-clk", NULL, CLK_PRINT | CLK_MHZ);
908 	soc_clk_ctl("eth-clk", NULL, CLK_PRINT | CLK_MHZ);
909 	soc_clk_ctl("usb-clk", NULL, CLK_PRINT | CLK_MHZ);
910 	soc_clk_ctl("sdio-clk", NULL, CLK_PRINT | CLK_MHZ);
911 /*	soc_clk_ctl("hdmi-sys-clk", NULL, CLK_PRINT | CLK_MHZ); */
912 	soc_clk_ctl("gfx-core-clk", NULL, CLK_PRINT | CLK_MHZ);
913 	soc_clk_ctl("gfx-dma-clk", NULL, CLK_PRINT | CLK_MHZ);
914 	soc_clk_ctl("gfx-cfg-clk", NULL, CLK_PRINT | CLK_MHZ);
915 	soc_clk_ctl("dmac-core-clk", NULL, CLK_PRINT | CLK_MHZ);
916 	soc_clk_ctl("dmac-cfg-clk", NULL, CLK_PRINT | CLK_MHZ);
917 	soc_clk_ctl("sdio-ref-clk", NULL, CLK_PRINT | CLK_MHZ);
918 	soc_clk_ctl("spi-clk", NULL, CLK_PRINT | CLK_MHZ);
919 	soc_clk_ctl("i2c-clk", NULL, CLK_PRINT | CLK_MHZ);
920 /*	soc_clk_ctl("ebi-clk", NULL, CLK_PRINT | CLK_MHZ); */
921 	soc_clk_ctl("uart-clk", NULL, CLK_PRINT | CLK_MHZ);
922 	printf("\n");
923 
924 	/* DDR clock domain */
925 	soc_clk_ctl("ddr-clk", NULL, CLK_PRINT | CLK_MHZ);
926 	printf("\n");
927 
928 	/* HDMI clock domain */
929 /*	soc_clk_ctl("hdmi-pll", NULL, CLK_PRINT | CLK_MHZ); */
930 /*	soc_clk_ctl("hdmi-clk", NULL, CLK_PRINT | CLK_MHZ); */
931 /*	printf("\n"); */
932 
933 	/* TUN clock domain */
934 	soc_clk_ctl("tun-pll", NULL, CLK_PRINT | CLK_MHZ);
935 	soc_clk_ctl("tun-clk", NULL, CLK_PRINT | CLK_MHZ);
936 	soc_clk_ctl("rom-clk", NULL, CLK_PRINT | CLK_MHZ);
937 	soc_clk_ctl("pwm-clk", NULL, CLK_PRINT | CLK_MHZ);
938 	printf("\n");
939 
940 	return CMD_RET_SUCCESS;
941 }
942 
943 cmd_tbl_t cmd_hsdk_clock[] = {
944 	U_BOOT_CMD_MKENT(set, 3, 0, do_hsdk_clock_set, "", ""),
945 	U_BOOT_CMD_MKENT(get, 3, 0, do_hsdk_clock_get, "", ""),
946 	U_BOOT_CMD_MKENT(print, 4, 0, do_hsdk_clock_print, "", ""),
947 	U_BOOT_CMD_MKENT(print_all, 4, 0, do_hsdk_clock_print_all, "", ""),
948 };
949 
950 static int do_hsdk_clock(cmd_tbl_t *cmdtp, int flag, int argc, char *const argv[])
951 {
952 	cmd_tbl_t *c;
953 
954 	if (argc < 2)
955 		return CMD_RET_USAGE;
956 
957 	/* Strip off leading 'hsdk_clock' command argument */
958 	argc--;
959 	argv++;
960 
961 	c = find_cmd_tbl(argv[0], cmd_hsdk_clock, ARRAY_SIZE(cmd_hsdk_clock));
962 	if (!c)
963 		return CMD_RET_USAGE;
964 
965 	return c->cmd(cmdtp, flag, argc, argv);
966 }
967 
968 U_BOOT_CMD(
969 	hsdk_clock, CONFIG_SYS_MAXARGS, 0, do_hsdk_clock,
970 	"Synopsys HSDK specific clock command",
971 	"set   - Set clock to values specified in environment / command line arguments\n"
972 	"hsdk_clock get   - Save clock values to environment\n"
973 	"hsdk_clock print - Print main clock values to console\n"
974 	"hsdk_clock print_all - Print all clock values to console\n"
975 );
976 
977 /* init calls */
978 int board_early_init_f(void)
979 {
980 	/*
981 	 * Setup AXI apertures unconditionally as we want to have DDR
982 	 * in 0x00000000 region when we are kicking slave cpus.
983 	 */
984 	init_memory_bridge();
985 
986 	return 0;
987 }
988 
989 int board_early_init_r(void)
990 {
991 	/*
992 	 * TODO: Init USB here to be able read environment from USB MSD.
993 	 * It can be done with usb_init() call. We can't do it right now
994 	 * due to brocken USB IP SW reset and lack of USB IP HW reset in
995 	 * linux kernel (if we init USB here we will break USB in linux)
996 	 */
997 
998 	/*
999 	 * Flush all d$ as we want to use uncached area with st.di / ld.di
1000 	 * instructions and we don't want to have any dirty line in L1d$ or SL$
1001 	 * in this area. It is enough to flush all d$ once here as we access to
1002 	 * uncached area with regular st (non .di) instruction only when we copy
1003 	 * data during u-boot relocation.
1004 	 */
1005 	flush_dcache_all();
1006 
1007 	printf("Relocation Offset is: %08lx\n", gd->reloc_off);
1008 
1009 	return 0;
1010 }
1011 
1012 int board_late_init(void)
1013 {
1014 	/*
1015 	 * Populate environment with clock frequency values -
1016 	 * run hsdk_clock get callback without uboot command run.
1017 	 */
1018 	do_hsdk_clock_get(NULL, 0, 0, NULL);
1019 
1020 	return 0;
1021 }
1022 
1023 int board_mmc_init(bd_t *bis)
1024 {
1025 	struct dwmci_host *host = NULL;
1026 
1027 	host = malloc(sizeof(struct dwmci_host));
1028 	if (!host) {
1029 		printf("dwmci_host malloc fail!\n");
1030 		return 1;
1031 	}
1032 
1033 	/*
1034 	 * Switch SDIO external ciu clock divider from default div-by-8 to
1035 	 * minimum possible div-by-2.
1036 	 */
1037 	writel(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *)SDIO_UHS_REG_EXT);
1038 
1039 	memset(host, 0, sizeof(struct dwmci_host));
1040 	host->name = "Synopsys Mobile storage";
1041 	host->ioaddr = (void *)ARC_DWMMC_BASE;
1042 	host->buswidth = 4;
1043 	host->dev_index = 0;
1044 	host->bus_hz = 50000000;
1045 
1046 	add_dwmci(host, host->bus_hz / 2, 400000);
1047 
1048 	return 0;
1049 }
1050