1 /*
2  * sun50i H6 platform dram controller init
3  *
4  * (C) Copyright 2017      Icenowy Zheng <icenowy@aosc.io>
5  *
6  * SPDX-License-Identifier:	GPL-2.0+
7  */
8 #include <common.h>
9 #include <asm/io.h>
10 #include <asm/arch/clock.h>
11 #include <asm/arch/dram.h>
12 #include <asm/arch/cpu.h>
13 #include <linux/bitops.h>
14 #include <linux/kconfig.h>
15 
16 /*
17  * The DRAM controller structure on H6 is similar to the ones on A23/A80:
18  * they all contains 3 parts, COM, CTL and PHY. (As a note on A33/A83T/H3/A64
19  * /H5/R40 CTL and PHY is composed).
20  *
21  * COM is allwinner-specific. On H6, the address mapping function is moved
22  * from COM to CTL (with the standard ADDRMAP registers on DesignWare memory
23  * controller).
24  *
25  * CTL (controller) and PHY is from DesignWare.
26  *
27  * The CTL part is a bit similar to the one on A23/A80 (because they all
28  * originate from DesignWare), but gets more registers added.
29  *
30  * The PHY part is quite new, not seen in any previous Allwinner SoCs, and
31  * not seen on other SoCs in U-Boot. The only SoC that is also known to have
32  * similar PHY is ZynqMP.
33  */
34 
35 /*
36  * The delay parameters below allow to allegedly specify delay times of some
37  * unknown unit for each individual bit trace in each of the four data bytes
38  * the 32-bit wide access consists of. Also three control signals can be
39  * adjusted individually.
40  */
41 #define NR_OF_BYTE_LANES	(32 / BITS_PER_BYTE)
42 /* The eight data lines (DQn) plus DM, DQS, DQS/DM/DQ Output Enable and DQSN */
43 #define WR_LINES_PER_BYTE_LANE	(BITS_PER_BYTE + 4)
44 /*
45  * The eight data lines (DQn) plus DM, DQS, DQS/DM/DQ Output Enable, DQSN,
46  * Termination and Power down
47  */
48 #define RD_LINES_PER_BYTE_LANE	(BITS_PER_BYTE + 6)
49 struct dram_para {
50 	u32 clk;
51 	enum sunxi_dram_type type;
52 	u8 cols;
53 	u8 rows;
54 	u8 ranks;
55 	const u8 dx_read_delays[NR_OF_BYTE_LANES][RD_LINES_PER_BYTE_LANE];
56 	const u8 dx_write_delays[NR_OF_BYTE_LANES][WR_LINES_PER_BYTE_LANE];
57 };
58 
59 static void mctl_sys_init(struct dram_para *para);
60 static void mctl_com_init(struct dram_para *para);
61 static void mctl_set_timing_lpddr3(struct dram_para *para);
62 static void mctl_channel_init(struct dram_para *para);
63 
64 static void mctl_core_init(struct dram_para *para)
65 {
66 	mctl_sys_init(para);
67 	mctl_com_init(para);
68 	switch (para->type) {
69 	case SUNXI_DRAM_TYPE_LPDDR3:
70 		mctl_set_timing_lpddr3(para);
71 		break;
72 	default:
73 		panic("Unsupported DRAM type!");
74 	};
75 	mctl_channel_init(para);
76 }
77 
78 static void mctl_phy_pir_init(u32 val)
79 {
80 	struct sunxi_mctl_phy_reg * const mctl_phy =
81 			(struct sunxi_mctl_phy_reg *)SUNXI_DRAM_PHY0_BASE;
82 
83 	writel(val | BIT(0), &mctl_phy->pir);
84 	mctl_await_completion(&mctl_phy->pgsr[0], BIT(0), BIT(0));
85 }
86 
87 enum {
88 	MBUS_PORT_CPU           = 0,
89 	MBUS_PORT_GPU           = 1,
90 	MBUS_PORT_MAHB          = 2,
91 	MBUS_PORT_DMA           = 3,
92 	MBUS_PORT_VE            = 4,
93 	MBUS_PORT_CE            = 5,
94 	MBUS_PORT_TSC0          = 6,
95 	MBUS_PORT_NDFC0         = 8,
96 	MBUS_PORT_CSI0          = 11,
97 	MBUS_PORT_DI0           = 14,
98 	MBUS_PORT_DI1           = 15,
99 	MBUS_PORT_DE300         = 16,
100 	MBUS_PORT_IOMMU         = 25,
101 	MBUS_PORT_VE2           = 26,
102 	MBUS_PORT_USB3        = 37,
103 	MBUS_PORT_PCIE          = 38,
104 	MBUS_PORT_VP9           = 39,
105 	MBUS_PORT_HDCP2       = 40,
106 };
107 
108 enum {
109 	MBUS_QOS_LOWEST = 0,
110 	MBUS_QOS_LOW,
111 	MBUS_QOS_HIGH,
112 	MBUS_QOS_HIGHEST
113 };
114 inline void mbus_configure_port(u8 port,
115 				bool bwlimit,
116 				bool priority,
117 				u8 qos,
118 				u8 waittime,
119 				u8 acs,
120 				u16 bwl0,
121 				u16 bwl1,
122 				u16 bwl2)
123 {
124 	struct sunxi_mctl_com_reg * const mctl_com =
125 			(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
126 
127 	const u32 cfg0 = ( (bwlimit ? (1 << 0) : 0)
128 			   | (priority ? (1 << 1) : 0)
129 			   | ((qos & 0x3) << 2)
130 			   | ((waittime & 0xf) << 4)
131 			   | ((acs & 0xff) << 8)
132 			   | (bwl0 << 16) );
133 	const u32 cfg1 = ((u32)bwl2 << 16) | (bwl1 & 0xffff);
134 
135 	debug("MBUS port %d cfg0 %08x cfg1 %08x\n", port, cfg0, cfg1);
136 	writel(cfg0, &mctl_com->master[port].cfg0);
137 	writel(cfg1, &mctl_com->master[port].cfg1);
138 }
139 
140 #define MBUS_CONF(port, bwlimit, qos, acs, bwl0, bwl1, bwl2)	\
141 	mbus_configure_port(MBUS_PORT_ ## port, bwlimit, false, \
142 			    MBUS_QOS_ ## qos, 0, acs, bwl0, bwl1, bwl2)
143 
144 static void mctl_set_master_priority(void)
145 {
146 	struct sunxi_mctl_com_reg * const mctl_com =
147 			(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
148 
149 	/* enable bandwidth limit windows and set windows size 1us */
150 	writel(399, &mctl_com->tmr);
151 	writel(BIT(16), &mctl_com->bwcr);
152 
153 	MBUS_CONF(  CPU,  true, HIGHEST, 0,  256,  128,  100);
154 	MBUS_CONF(  GPU,  true,    HIGH, 0, 1536, 1400,  256);
155 	MBUS_CONF( MAHB,  true, HIGHEST, 0,  512,  256,   96);
156 	MBUS_CONF(  DMA,  true,    HIGH, 0,  256,  100,   80);
157 	MBUS_CONF(   VE,  true,    HIGH, 2, 8192, 5500, 5000);
158 	MBUS_CONF(   CE,  true,    HIGH, 2,  100,   64,   32);
159 	MBUS_CONF( TSC0,  true,    HIGH, 2,  100,   64,   32);
160 	MBUS_CONF(NDFC0,  true,    HIGH, 0,  256,  128,   64);
161 	MBUS_CONF( CSI0,  true,    HIGH, 0,  256,  128,  100);
162 	MBUS_CONF(  DI0,  true,    HIGH, 0, 1024,  256,   64);
163 	MBUS_CONF(DE300,  true, HIGHEST, 6, 8192, 2800, 2400);
164 	MBUS_CONF(IOMMU,  true, HIGHEST, 0,  100,   64,   32);
165 	MBUS_CONF(  VE2,  true,    HIGH, 2, 8192, 5500, 5000);
166 	MBUS_CONF( USB3,  true,    HIGH, 0,  256,  128,   64);
167 	MBUS_CONF( PCIE,  true,    HIGH, 2,  100,   64,   32);
168 	MBUS_CONF(  VP9,  true,    HIGH, 2, 8192, 5500, 5000);
169 	MBUS_CONF(HDCP2,  true,    HIGH, 2,  100,   64,   32);
170 }
171 
172 static u32 mr_lpddr3[12] = {
173 	0x00000000, 0x00000043, 0x0000001a, 0x00000001,
174 	0x00000000, 0x00000000, 0x00000048, 0x00000000,
175 	0x00000000, 0x00000000, 0x00000000, 0x00000003,
176 };
177 
178 /* TODO: flexible timing */
179 static void mctl_set_timing_lpddr3(struct dram_para *para)
180 {
181 	struct sunxi_mctl_ctl_reg * const mctl_ctl =
182 			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
183 	struct sunxi_mctl_phy_reg * const mctl_phy =
184 			(struct sunxi_mctl_phy_reg *)SUNXI_DRAM_PHY0_BASE;
185 
186 	u8 tccd		= 2;
187 	u8 tfaw		= max(ns_to_t(50), 4);
188 	u8 trrd		= max(ns_to_t(10), 2);
189 	u8 trcd		= max(ns_to_t(24), 2);
190 	u8 trc		= ns_to_t(70);
191 	u8 txp		= max(ns_to_t(8), 2);
192 	u8 twtr		= max(ns_to_t(8), 2);
193 	u8 trtp		= max(ns_to_t(8), 2);
194 	u8 twr		= max(ns_to_t(15), 2);
195 	u8 trp		= ns_to_t(18);
196 	u8 tras		= ns_to_t(42);
197 	u8 twtr_sa	= ns_to_t(5);
198 	u8 tcksrea	= ns_to_t(11);
199 	u16 trefi	= ns_to_t(3900) / 32;
200 	u16 trfc	= ns_to_t(210);
201 	u16 txsr	= ns_to_t(220);
202 
203 	if (CONFIG_DRAM_CLK % 400 == 0) {
204 		/* Round up these parameters */
205 		twtr_sa++;
206 		tcksrea++;
207 	}
208 
209 	u8 tmrw		= 5;
210 	u8 tmrd		= 5;
211 	u8 tmod		= 12;
212 	u8 tcke		= 3;
213 	u8 tcksrx	= 5;
214 	u8 tcksre	= 5;
215 	u8 tckesr	= 5;
216 	u8 trasmax	= CONFIG_DRAM_CLK / 60;
217 	u8 txs		= 4;
218 	u8 txsdll	= 4;
219 	u8 txsabort	= 4;
220 	u8 txsfast	= 4;
221 
222 	u8 tcl		= 5; /* CL 10 */
223 	u8 tcwl		= 3; /* CWL 6 */
224 	u8 t_rdata_en	= twtr_sa + 8;
225 
226 	u32 tdinit0	= (200 * CONFIG_DRAM_CLK) + 1;		/* 200us */
227 	u32 tdinit1	= (100 * CONFIG_DRAM_CLK) / 1000 + 1;	/* 100ns */
228 	u32 tdinit2	= (11 * CONFIG_DRAM_CLK) + 1;		/* 11us */
229 	u32 tdinit3	= (1 * CONFIG_DRAM_CLK) + 1;		/* 1us */
230 
231 	u8 twtp		= tcwl + 4 + twr + 1;
232 	/*
233 	 * The code below for twr2rd and trd2wr follows the IP core's
234 	 * document from ZynqMP and i.MX7. The BSP has both number
235 	 * substracted by 2.
236 	 */
237 	u8 twr2rd	= tcwl + 4 + 1 + twtr;
238 	u8 trd2wr	= tcl + 4 + (tcksrea >> 1) - tcwl + 1;
239 
240 	/* set mode register */
241 	memcpy(mctl_phy->mr, mr_lpddr3, sizeof(mr_lpddr3));
242 
243 	/* set DRAM timing */
244 	writel((twtp << 24) | (tfaw << 16) | (trasmax << 8) | tras,
245 	       &mctl_ctl->dramtmg[0]);
246 	writel((txp << 16) | (trtp << 8) | trc, &mctl_ctl->dramtmg[1]);
247 	writel((tcwl << 24) | (tcl << 16) | (trd2wr << 8) | twr2rd,
248 	       &mctl_ctl->dramtmg[2]);
249 	writel((tmrw << 20) | (tmrd << 12) | tmod, &mctl_ctl->dramtmg[3]);
250 	writel((trcd << 24) | (tccd << 16) | (trrd << 8) | trp,
251 	       &mctl_ctl->dramtmg[4]);
252 	writel((tcksrx << 24) | (tcksre << 16) | (tckesr << 8) | tcke,
253 	       &mctl_ctl->dramtmg[5]);
254 	/* Value suggested by ZynqMP manual and used by libdram */
255 	writel((txp + 2) | 0x02020000, &mctl_ctl->dramtmg[6]);
256 	writel((txsfast << 24) | (txsabort << 16) | (txsdll << 8) | txs,
257 	       &mctl_ctl->dramtmg[8]);
258 	writel(txsr, &mctl_ctl->dramtmg[14]);
259 
260 	clrsetbits_le32(&mctl_ctl->init[0], (3 << 30), (1 << 30));
261 	writel(0, &mctl_ctl->dfimisc);
262 	clrsetbits_le32(&mctl_ctl->rankctl, 0xff0, 0x660);
263 
264 	/*
265 	 * Set timing registers of the PHY.
266 	 * Note: the PHY is clocked 2x from the DRAM frequency.
267 	 */
268 	writel((trrd << 25) | (tras << 17) | (trp << 9) | (trtp << 1),
269 	       &mctl_phy->dtpr[0]);
270 	writel((tfaw << 17) | 0x28000400 | (tmrd << 1), &mctl_phy->dtpr[1]);
271 	writel(((txs << 6) - 1) | (tcke << 17), &mctl_phy->dtpr[2]);
272 	writel(((txsdll << 22) - (0x1 << 16)) | twtr_sa | (tcksrea << 8),
273 	       &mctl_phy->dtpr[3]);
274 	writel((txp << 1) | (trfc << 17) | 0x800, &mctl_phy->dtpr[4]);
275 	writel((trc << 17) | (trcd << 9) | (twtr << 1), &mctl_phy->dtpr[5]);
276 	writel(0x0505, &mctl_phy->dtpr[6]);
277 
278 	/* Configure DFI timing */
279 	writel(tcl | 0x2000200 | (t_rdata_en << 16) | 0x808000,
280 	       &mctl_ctl->dfitmg0);
281 	writel(0x040201, &mctl_ctl->dfitmg1);
282 
283 	/* Configure PHY timing */
284 	writel(tdinit0 | (tdinit1 << 20), &mctl_phy->ptr[3]);
285 	writel(tdinit2 | (tdinit3 << 18), &mctl_phy->ptr[4]);
286 
287 	/* set refresh timing */
288 	writel((trefi << 16) | trfc, &mctl_ctl->rfshtmg);
289 }
290 
291 static void mctl_sys_init(struct dram_para *para)
292 {
293 	struct sunxi_ccm_reg * const ccm =
294 			(struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
295 	struct sunxi_mctl_com_reg * const mctl_com =
296 			(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
297 	struct sunxi_mctl_ctl_reg * const mctl_ctl =
298 			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
299 
300 	/* Put all DRAM-related blocks to reset state */
301 	clrbits_le32(&ccm->mbus_cfg, MBUS_ENABLE | MBUS_RESET);
302 	writel(0, &ccm->dram_gate_reset);
303 	clrbits_le32(&ccm->pll5_cfg, CCM_PLL5_CTRL_EN);
304 	clrbits_le32(&ccm->dram_clk_cfg, DRAM_MOD_RESET);
305 
306 	udelay(5);
307 
308 	/* Set PLL5 rate to doubled DRAM clock rate */
309 	writel(CCM_PLL5_CTRL_EN | CCM_PLL5_LOCK_EN |
310 	       CCM_PLL5_CTRL_N(para->clk * 2 / 24 - 1), &ccm->pll5_cfg);
311 	mctl_await_completion(&ccm->pll5_cfg, CCM_PLL5_LOCK, CCM_PLL5_LOCK);
312 
313 	/* Configure DRAM mod clock */
314 	writel(DRAM_CLK_SRC_PLL5, &ccm->dram_clk_cfg);
315 	setbits_le32(&ccm->dram_clk_cfg, DRAM_CLK_UPDATE);
316 	writel(BIT(0) | BIT(RESET_SHIFT), &ccm->dram_gate_reset);
317 
318 	/* Disable all channels */
319 	writel(0, &mctl_com->maer0);
320 	writel(0, &mctl_com->maer1);
321 	writel(0, &mctl_com->maer2);
322 
323 	/* Configure MBUS and enable DRAM mod reset */
324 	setbits_le32(&ccm->mbus_cfg, MBUS_RESET);
325 	setbits_le32(&ccm->mbus_cfg, MBUS_ENABLE);
326 	setbits_le32(&ccm->dram_clk_cfg, DRAM_MOD_RESET);
327 	udelay(5);
328 
329 	/* Unknown hack from the BSP, which enables access of mctl_ctl regs */
330 	writel(0x8000, &mctl_ctl->unk_0x00c);
331 }
332 
333 static void mctl_set_addrmap(struct dram_para *para)
334 {
335 	struct sunxi_mctl_ctl_reg * const mctl_ctl =
336 			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
337 	u8 cols = para->cols;
338 	u8 rows = para->rows;
339 	u8 ranks = para->ranks;
340 
341 	/* Ranks */
342 	if (ranks == 2)
343 		mctl_ctl->addrmap[0] = rows + cols - 3;
344 	else
345 		mctl_ctl->addrmap[0] = 0x1F;
346 
347 	/* Banks, hardcoded to 8 banks now */
348 	mctl_ctl->addrmap[1] = (cols - 2) | (cols - 2) << 8 | (cols - 2) << 16;
349 
350 	/* Columns */
351 	mctl_ctl->addrmap[2] = 0;
352 	switch (cols) {
353 	case 8:
354 		mctl_ctl->addrmap[3] = 0x1F1F0000;
355 		mctl_ctl->addrmap[4] = 0x1F1F;
356 		break;
357 	case 9:
358 		mctl_ctl->addrmap[3] = 0x1F000000;
359 		mctl_ctl->addrmap[4] = 0x1F1F;
360 		break;
361 	case 10:
362 		mctl_ctl->addrmap[3] = 0;
363 		mctl_ctl->addrmap[4] = 0x1F1F;
364 		break;
365 	case 11:
366 		mctl_ctl->addrmap[3] = 0;
367 		mctl_ctl->addrmap[4] = 0x1F00;
368 		break;
369 	case 12:
370 		mctl_ctl->addrmap[3] = 0;
371 		mctl_ctl->addrmap[4] = 0;
372 		break;
373 	default:
374 		panic("Unsupported DRAM configuration: column number invalid\n");
375 	}
376 
377 	/* Rows */
378 	mctl_ctl->addrmap[5] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24);
379 	switch (rows) {
380 	case 13:
381 		mctl_ctl->addrmap[6] = (cols - 3) | 0x0F0F0F00;
382 		mctl_ctl->addrmap[7] = 0x0F0F;
383 		break;
384 	case 14:
385 		mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | 0x0F0F0000;
386 		mctl_ctl->addrmap[7] = 0x0F0F;
387 		break;
388 	case 15:
389 		mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | 0x0F000000;
390 		mctl_ctl->addrmap[7] = 0x0F0F;
391 		break;
392 	case 16:
393 		mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24);
394 		mctl_ctl->addrmap[7] = 0x0F0F;
395 		break;
396 	case 17:
397 		mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24);
398 		mctl_ctl->addrmap[7] = (cols - 3) | 0x0F00;
399 		break;
400 	case 18:
401 		mctl_ctl->addrmap[6] = (cols - 3) | ((cols - 3) << 8) | ((cols - 3) << 16) | ((cols - 3) << 24);
402 		mctl_ctl->addrmap[7] = (cols - 3) | ((cols - 3) << 8);
403 		break;
404 	default:
405 		panic("Unsupported DRAM configuration: row number invalid\n");
406 	}
407 
408 	/* Bank groups, DDR4 only */
409 	mctl_ctl->addrmap[8] = 0x3F3F;
410 }
411 
412 static void mctl_com_init(struct dram_para *para)
413 {
414 	struct sunxi_mctl_com_reg * const mctl_com =
415 			(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
416 	struct sunxi_mctl_ctl_reg * const mctl_ctl =
417 			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
418 	struct sunxi_mctl_phy_reg * const mctl_phy =
419 			(struct sunxi_mctl_phy_reg *)SUNXI_DRAM_PHY0_BASE;
420 	u32 reg_val, tmp;
421 
422 	mctl_set_addrmap(para);
423 
424 	setbits_le32(&mctl_com->cr, BIT(31));
425 	/*
426 	 * This address is magic; it's in SID memory area, but there's no
427 	 * known definition of it.
428 	 * On my Pine H64 board it has content 7.
429 	 */
430 	if (readl(0x03006100) == 7)
431 		clrbits_le32(&mctl_com->cr, BIT(27));
432 	else if (readl(0x03006100) == 3)
433 		setbits_le32(&mctl_com->cr, BIT(27));
434 
435 	if (para->clk > 408)
436 		reg_val = 0xf00;
437 	else if (para->clk > 246)
438 		reg_val = 0x1f00;
439 	else
440 		reg_val = 0x3f00;
441 	clrsetbits_le32(&mctl_com->unk_0x008, 0x3f00, reg_val);
442 
443 	/* TODO: half DQ, non-LPDDR3 types */
444 	writel(MSTR_DEVICETYPE_LPDDR3 | MSTR_BUSWIDTH_FULL |
445 	       MSTR_BURST_LENGTH(8) | MSTR_ACTIVE_RANKS(para->ranks) |
446 	       0x80000000, &mctl_ctl->mstr);
447 	writel(DCR_LPDDR3 | DCR_DDR8BANK | 0x400, &mctl_phy->dcr);
448 
449 	if (para->ranks == 2)
450 		writel(0x0303, &mctl_ctl->odtmap);
451 	else
452 		writel(0x0201, &mctl_ctl->odtmap);
453 
454 	/* TODO: non-LPDDR3 types */
455 	tmp = para->clk * 7 / 2000;
456 	reg_val = 0x0400;
457 	reg_val |= (tmp + 7) << 24;
458 	reg_val |= (((para->clk < 400) ? 3 : 4) - tmp) << 16;
459 	writel(reg_val, &mctl_ctl->odtcfg);
460 
461 	/* TODO: half DQ */
462 }
463 
464 static void mctl_bit_delay_set(struct dram_para *para)
465 {
466 	struct sunxi_mctl_phy_reg * const mctl_phy =
467 			(struct sunxi_mctl_phy_reg *)SUNXI_DRAM_PHY0_BASE;
468 	int i, j;
469 	u32 val;
470 
471 	for (i = 0; i < 4; i++) {
472 		val = readl(&mctl_phy->dx[i].bdlr0);
473 		for (j = 0; j < 4; j++)
474 			val += para->dx_write_delays[i][j] << (j * 8);
475 		writel(val, &mctl_phy->dx[i].bdlr0);
476 
477 		val = readl(&mctl_phy->dx[i].bdlr1);
478 		for (j = 0; j < 4; j++)
479 			val += para->dx_write_delays[i][j + 4] << (j * 8);
480 		writel(val, &mctl_phy->dx[i].bdlr1);
481 
482 		val = readl(&mctl_phy->dx[i].bdlr2);
483 		for (j = 0; j < 4; j++)
484 			val += para->dx_write_delays[i][j + 8] << (j * 8);
485 		writel(val, &mctl_phy->dx[i].bdlr2);
486 	}
487 	clrbits_le32(&mctl_phy->pgcr[0], BIT(26));
488 
489 	for (i = 0; i < 4; i++) {
490 		val = readl(&mctl_phy->dx[i].bdlr3);
491 		for (j = 0; j < 4; j++)
492 			val += para->dx_read_delays[i][j] << (j * 8);
493 		writel(val, &mctl_phy->dx[i].bdlr3);
494 
495 		val = readl(&mctl_phy->dx[i].bdlr4);
496 		for (j = 0; j < 4; j++)
497 			val += para->dx_read_delays[i][j + 4] << (j * 8);
498 		writel(val, &mctl_phy->dx[i].bdlr4);
499 
500 		val = readl(&mctl_phy->dx[i].bdlr5);
501 		for (j = 0; j < 4; j++)
502 			val += para->dx_read_delays[i][j + 8] << (j * 8);
503 		writel(val, &mctl_phy->dx[i].bdlr5);
504 
505 		val = readl(&mctl_phy->dx[i].bdlr6);
506 		val += (para->dx_read_delays[i][12] << 8) |
507 		       (para->dx_read_delays[i][13] << 16);
508 		writel(val, &mctl_phy->dx[i].bdlr6);
509 	}
510 	setbits_le32(&mctl_phy->pgcr[0], BIT(26));
511 	udelay(1);
512 
513 	for (i = 1; i < 14; i++) {
514 		val = readl(&mctl_phy->acbdlr[i]);
515 		val += 0x0a0a0a0a;
516 		writel(val, &mctl_phy->acbdlr[i]);
517 	}
518 }
519 
520 static void mctl_channel_init(struct dram_para *para)
521 {
522 	struct sunxi_mctl_com_reg * const mctl_com =
523 			(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
524 	struct sunxi_mctl_ctl_reg * const mctl_ctl =
525 			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
526 	struct sunxi_mctl_phy_reg * const mctl_phy =
527 			(struct sunxi_mctl_phy_reg *)SUNXI_DRAM_PHY0_BASE;
528 	int i;
529 	u32 val;
530 
531 	setbits_le32(&mctl_ctl->dfiupd[0], BIT(31) | BIT(30));
532 	setbits_le32(&mctl_ctl->zqctl[0], BIT(31) | BIT(30));
533 	writel(0x2f05, &mctl_ctl->sched[0]);
534 	setbits_le32(&mctl_ctl->rfshctl3, BIT(0));
535 	setbits_le32(&mctl_ctl->dfimisc, BIT(0));
536 	setbits_le32(&mctl_ctl->unk_0x00c, BIT(8));
537 	clrsetbits_le32(&mctl_phy->pgcr[1], 0x180, 0xc0);
538 	/* TODO: non-LPDDR3 types */
539 	clrsetbits_le32(&mctl_phy->pgcr[2], GENMASK(17, 0), ns_to_t(7800));
540 	clrbits_le32(&mctl_phy->pgcr[6], BIT(0));
541 	clrsetbits_le32(&mctl_phy->dxccr, 0xee0, 0x220);
542 	/* TODO: VT compensation */
543 	clrsetbits_le32(&mctl_phy->dsgcr, BIT(0), 0x440060);
544 	clrbits_le32(&mctl_phy->vtcr[1], BIT(1));
545 
546 	for (i = 0; i < 4; i++)
547 		clrsetbits_le32(&mctl_phy->dx[i].gcr[0], 0xe00, 0x800);
548 	for (i = 0; i < 4; i++)
549 		clrsetbits_le32(&mctl_phy->dx[i].gcr[2], 0xffff, 0x5555);
550 	for (i = 0; i < 4; i++)
551 		clrsetbits_le32(&mctl_phy->dx[i].gcr[3], 0x3030, 0x1010);
552 
553 	udelay(100);
554 
555 	if (para->ranks == 2)
556 		setbits_le32(&mctl_phy->dtcr[1], 0x30000);
557 	else
558 		clrsetbits_le32(&mctl_phy->dtcr[1], 0x30000, 0x10000);
559 
560 	clrbits_le32(&mctl_phy->dtcr[1], BIT(1));
561 	if (para->ranks == 2) {
562 		writel(0x00010001, &mctl_phy->rankidr);
563 		writel(0x20000, &mctl_phy->odtcr);
564 	} else {
565 		writel(0x0, &mctl_phy->rankidr);
566 		writel(0x10000, &mctl_phy->odtcr);
567 	}
568 
569 	/* TODO: non-LPDDR3 types */
570 	clrsetbits_le32(&mctl_phy->dtcr[0], 0xF0000000, 0x10000040);
571 	if (para->clk <= 792) {
572 		if (para->clk <= 672) {
573 			if (para->clk <= 600)
574 				val = 0x300;
575 			else
576 				val = 0x400;
577 		} else {
578 			val = 0x500;
579 		}
580 	} else {
581 		val = 0x600;
582 	}
583 	/* FIXME: NOT REVIEWED YET */
584 	clrsetbits_le32(&mctl_phy->zq[0].zqcr, 0x700, val);
585 	clrsetbits_le32(&mctl_phy->zq[0].zqpr[0], 0xff,
586 			CONFIG_DRAM_ZQ & 0xff);
587 	clrbits_le32(&mctl_phy->zq[0].zqor[0], 0xfffff);
588 	setbits_le32(&mctl_phy->zq[0].zqor[0], (CONFIG_DRAM_ZQ >> 8) & 0xff);
589 	setbits_le32(&mctl_phy->zq[0].zqor[0], (CONFIG_DRAM_ZQ & 0xf00) - 0x100);
590 	setbits_le32(&mctl_phy->zq[0].zqor[0], (CONFIG_DRAM_ZQ & 0xff00) << 4);
591 	clrbits_le32(&mctl_phy->zq[1].zqpr[0], 0xfffff);
592 	setbits_le32(&mctl_phy->zq[1].zqpr[0], (CONFIG_DRAM_ZQ >> 16) & 0xff);
593 	setbits_le32(&mctl_phy->zq[1].zqpr[0], ((CONFIG_DRAM_ZQ >> 8) & 0xf00) - 0x100);
594 	setbits_le32(&mctl_phy->zq[1].zqpr[0], (CONFIG_DRAM_ZQ & 0xff0000) >> 4);
595 	if (para->type == SUNXI_DRAM_TYPE_LPDDR3) {
596 		for (i = 1; i < 14; i++)
597 			writel(0x06060606, &mctl_phy->acbdlr[i]);
598 	}
599 
600 	/* TODO: non-LPDDR3 types */
601 	mctl_phy_pir_init(PIR_ZCAL | PIR_DCAL | PIR_PHYRST | PIR_DRAMINIT |
602 			  PIR_QSGATE | PIR_RDDSKW | PIR_WRDSKW | PIR_RDEYE |
603 			  PIR_WREYE);
604 
605 	/* TODO: non-LPDDR3 types */
606 	for (i = 0; i < 4; i++)
607 		writel(0x00000909, &mctl_phy->dx[i].gcr[5]);
608 
609 	for (i = 0; i < 4; i++) {
610 		if (IS_ENABLED(CONFIG_DRAM_ODT_EN))
611 			val = 0x0;
612 		else
613 			val = 0xaaaa;
614 		clrsetbits_le32(&mctl_phy->dx[i].gcr[2], 0xffff, val);
615 
616 		if (IS_ENABLED(CONFIG_DRAM_ODT_EN))
617 			val = 0x0;
618 		else
619 			val = 0x2020;
620 		clrsetbits_le32(&mctl_phy->dx[i].gcr[3], 0x3030, val);
621 	}
622 
623 	mctl_bit_delay_set(para);
624 	udelay(1);
625 
626 	setbits_le32(&mctl_phy->pgcr[6], BIT(0));
627 	clrbits_le32(&mctl_phy->pgcr[6], 0xfff8);
628 	for (i = 0; i < 4; i++)
629 		clrbits_le32(&mctl_phy->dx[i].gcr[3], ~0x3ffff);
630 	udelay(10);
631 
632 	if (readl(&mctl_phy->pgsr[0]) & 0x400000)
633 	{
634 		/*
635 		 * Detect single rank.
636 		 * TODO: also detect half DQ.
637 		 */
638 		if ((readl(&mctl_phy->dx[0].rsr[0]) & 0x3) == 2 &&
639 		    (readl(&mctl_phy->dx[1].rsr[0]) & 0x3) == 2 &&
640 		    (readl(&mctl_phy->dx[2].rsr[0]) & 0x3) == 2 &&
641 		    (readl(&mctl_phy->dx[3].rsr[0]) & 0x3) == 2) {
642 			para->ranks = 1;
643 			/* Restart DRAM initialization from scratch. */
644 			mctl_core_init(para);
645 			return;
646 		}
647 		else {
648 			panic("This DRAM setup is currently not supported.\n");
649 		}
650 	}
651 
652 	if (readl(&mctl_phy->pgsr[0]) & 0xff00000) {
653 		/* Oops! There's something wrong! */
654 		debug("PLL = %x\n", readl(0x3001010));
655 		debug("DRAM PHY PGSR0 = %x\n", readl(&mctl_phy->pgsr[0]));
656 		for (i = 0; i < 4; i++)
657 			debug("DRAM PHY DX%dRSR0 = %x\n", i, readl(&mctl_phy->dx[i].rsr[0]));
658 		panic("Error while initializing DRAM PHY!\n");
659 	}
660 
661 	clrsetbits_le32(&mctl_phy->dsgcr, 0xc0, 0x40);
662 	clrbits_le32(&mctl_phy->pgcr[1], 0x40);
663 	clrbits_le32(&mctl_ctl->dfimisc, BIT(0));
664 	writel(1, &mctl_ctl->swctl);
665 	mctl_await_completion(&mctl_ctl->swstat, 1, 1);
666 	clrbits_le32(&mctl_ctl->rfshctl3, BIT(0));
667 
668 	setbits_le32(&mctl_com->unk_0x014, BIT(31));
669 	writel(0xffffffff, &mctl_com->maer0);
670 	writel(0x7ff, &mctl_com->maer1);
671 	writel(0xffff, &mctl_com->maer2);
672 }
673 
674 static void mctl_auto_detect_dram_size(struct dram_para *para)
675 {
676 	/* TODO: non-LPDDR3, half DQ */
677 	/*
678 	 * Detect rank number by the code in mctl_channel_init. Furtherly
679 	 * when DQ detection is available it will also be executed there.
680 	 */
681 	mctl_core_init(para);
682 
683 	/* detect row address bits */
684 	para->cols = 8;
685 	para->rows = 18;
686 	mctl_core_init(para);
687 
688 	for (para->rows = 13; para->rows < 18; para->rows++) {
689 		/* 8 banks, 8 bit per byte and 32 bit width */
690 		if (mctl_mem_matches((1 << (para->rows + para->cols + 5))))
691 			break;
692 	}
693 
694 	/* detect column address bits */
695 	para->cols = 11;
696 	mctl_core_init(para);
697 
698 	for (para->cols = 8; para->cols < 11; para->cols++) {
699 		/* 8 bits per byte and 32 bit width */
700 		if (mctl_mem_matches(1 << (para->cols + 2)))
701 			break;
702 	}
703 }
704 
705 unsigned long mctl_calc_size(struct dram_para *para)
706 {
707 	/* TODO: non-LPDDR3, half DQ */
708 
709 	/* 8 banks, 32-bit (4 byte) data width */
710 	return (1ULL << (para->cols + para->rows + 3)) * 4 * para->ranks;
711 }
712 
713 #define SUN50I_H6_DX_WRITE_DELAYS				\
714 	{{  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0 },	\
715 	 {  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0 },	\
716 	 {  0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  4,  0 },	\
717 	 {  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0 }}
718 #define SUN50I_H6_DX_READ_DELAYS					\
719 	{{  4,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0 },	\
720 	 {  4,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0 },	\
721 	 {  4,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0 },	\
722 	 {  4,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0 }}
723 
724 unsigned long sunxi_dram_init(void)
725 {
726 	struct sunxi_mctl_com_reg * const mctl_com =
727 			(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
728 	struct dram_para para = {
729 		.clk = CONFIG_DRAM_CLK,
730 		.type = SUNXI_DRAM_TYPE_LPDDR3,
731 		.ranks = 2,
732 		.cols = 11,
733 		.rows = 14,
734 		.dx_read_delays  = SUN50I_H6_DX_READ_DELAYS,
735 		.dx_write_delays = SUN50I_H6_DX_WRITE_DELAYS,
736 	};
737 
738 	unsigned long size;
739 
740 	/* RES_CAL_CTRL_REG in BSP U-boot*/
741 	setbits_le32(0x7010310, BIT(8));
742 	clrbits_le32(0x7010318, 0x3f);
743 
744 	mctl_auto_detect_dram_size(&para);
745 
746 	mctl_core_init(&para);
747 
748 	size = mctl_calc_size(&para);
749 
750 	clrsetbits_le32(&mctl_com->cr, 0xf0, (size >> (10 + 10 + 4)) & 0xf0);
751 
752 	mctl_set_master_priority();
753 
754 	return size;
755 };
756