xref: /openbmc/u-boot/arch/mips/mach-mt7620/ddr_calibrate.c (revision 284b27cf81da10d55070a49ee8b739f71377a4fb)
1*4c835a60SStefan Roese // SPDX-License-Identifier: GPL-2.0+
2*4c835a60SStefan Roese /*
3*4c835a60SStefan Roese  * Copyright (C) 2018 Stefan Roese <sr@denx.de>
4*4c835a60SStefan Roese  *
5*4c835a60SStefan Roese  * This code is mostly based on the code extracted from this MediaTek
6*4c835a60SStefan Roese  * github repository:
7*4c835a60SStefan Roese  *
8*4c835a60SStefan Roese  * https://github.com/MediaTek-Labs/linkit-smart-uboot.git
9*4c835a60SStefan Roese  *
10*4c835a60SStefan Roese  * I was not able to find a specific license or other developers
11*4c835a60SStefan Roese  * copyrights here, so I can't add them here.
12*4c835a60SStefan Roese  *
13*4c835a60SStefan Roese  * Most functions in this file are copied from the MediaTek U-Boot
14*4c835a60SStefan Roese  * repository. Without any documentation, it was impossible to really
15*4c835a60SStefan Roese  * implement this differently. So its mostly a cleaned-up version of
16*4c835a60SStefan Roese  * the original code, with only support for the MT7628 / MT7688 SoC.
17*4c835a60SStefan Roese  */
18*4c835a60SStefan Roese 
19*4c835a60SStefan Roese #include <common.h>
20*4c835a60SStefan Roese #include <linux/io.h>
21*4c835a60SStefan Roese #include <asm/cacheops.h>
22*4c835a60SStefan Roese #include <asm/io.h>
23*4c835a60SStefan Roese #include "mt76xx.h"
24*4c835a60SStefan Roese 
25*4c835a60SStefan Roese #define NUM_OF_CACHELINE	128
26*4c835a60SStefan Roese #define MIN_START		6
27*4c835a60SStefan Roese #define MIN_FINE_START		0xf
28*4c835a60SStefan Roese #define MAX_START		7
29*4c835a60SStefan Roese #define MAX_FINE_START		0x0
30*4c835a60SStefan Roese 
31*4c835a60SStefan Roese #define CPU_FRAC_DIV		1
32*4c835a60SStefan Roese 
33*4c835a60SStefan Roese #if defined(CONFIG_ONBOARD_DDR2_SIZE_256MBIT)
34*4c835a60SStefan Roese #define DRAM_BUTTOM 0x02000000
35*4c835a60SStefan Roese #endif
36*4c835a60SStefan Roese #if defined(CONFIG_ONBOARD_DDR2_SIZE_512MBIT)
37*4c835a60SStefan Roese #define DRAM_BUTTOM 0x04000000
38*4c835a60SStefan Roese #endif
39*4c835a60SStefan Roese #if defined(CONFIG_ONBOARD_DDR2_SIZE_1024MBIT)
40*4c835a60SStefan Roese #define DRAM_BUTTOM 0x08000000
41*4c835a60SStefan Roese #endif
42*4c835a60SStefan Roese #if defined(CONFIG_ONBOARD_DDR2_SIZE_2048MBIT)
43*4c835a60SStefan Roese #define DRAM_BUTTOM 0x10000000
44*4c835a60SStefan Roese #endif
45*4c835a60SStefan Roese 
cal_memcpy(void * src,void * dst,u32 size)46*4c835a60SStefan Roese static inline void cal_memcpy(void *src, void *dst, u32 size)
47*4c835a60SStefan Roese {
48*4c835a60SStefan Roese 	u8 *psrc = (u8 *)src;
49*4c835a60SStefan Roese 	u8 *pdst = (u8 *)dst;
50*4c835a60SStefan Roese 	int i;
51*4c835a60SStefan Roese 
52*4c835a60SStefan Roese 	for (i = 0; i < size; i++, psrc++, pdst++)
53*4c835a60SStefan Roese 		*pdst = *psrc;
54*4c835a60SStefan Roese }
55*4c835a60SStefan Roese 
cal_memset(void * src,u8 pat,u32 size)56*4c835a60SStefan Roese static inline void cal_memset(void *src, u8 pat, u32 size)
57*4c835a60SStefan Roese {
58*4c835a60SStefan Roese 	u8 *psrc = (u8 *)src;
59*4c835a60SStefan Roese 	int i;
60*4c835a60SStefan Roese 
61*4c835a60SStefan Roese 	for (i = 0; i < size; i++, psrc++)
62*4c835a60SStefan Roese 		*psrc = pat;
63*4c835a60SStefan Roese }
64*4c835a60SStefan Roese 
65*4c835a60SStefan Roese #define pref_op(hint, addr)						\
66*4c835a60SStefan Roese 	__asm__ __volatile__(						\
67*4c835a60SStefan Roese 		".set	push\n"						\
68*4c835a60SStefan Roese 		".set	noreorder\n"					\
69*4c835a60SStefan Roese 		"pref	%0, %1\n"					\
70*4c835a60SStefan Roese 		".set	pop\n"						\
71*4c835a60SStefan Roese 		:							\
72*4c835a60SStefan Roese 		: "i" (hint), "R" (*(u8 *)(addr)))
73*4c835a60SStefan Roese 
cal_patgen(u32 start_addr,u32 size,u32 bias)74*4c835a60SStefan Roese static inline void cal_patgen(u32 start_addr, u32 size, u32 bias)
75*4c835a60SStefan Roese {
76*4c835a60SStefan Roese 	u32 *addr = (u32 *)start_addr;
77*4c835a60SStefan Roese 	int i;
78*4c835a60SStefan Roese 
79*4c835a60SStefan Roese 	for (i = 0; i < size; i++)
80*4c835a60SStefan Roese 		addr[i] = start_addr + i + bias;
81*4c835a60SStefan Roese }
82*4c835a60SStefan Roese 
test_loop(int k,int dqs,u32 test_dqs,u32 * coarse_dqs,u32 offs,u32 pat,u32 val)83*4c835a60SStefan Roese static inline int test_loop(int k, int dqs, u32 test_dqs, u32 *coarse_dqs,
84*4c835a60SStefan Roese 			    u32 offs, u32 pat, u32 val)
85*4c835a60SStefan Roese {
86*4c835a60SStefan Roese 	u32 nc_addr;
87*4c835a60SStefan Roese 	u32 *c_addr;
88*4c835a60SStefan Roese 	int i;
89*4c835a60SStefan Roese 
90*4c835a60SStefan Roese 	for (nc_addr = 0xa0000000;
91*4c835a60SStefan Roese 	     nc_addr < (0xa0000000 + DRAM_BUTTOM - NUM_OF_CACHELINE * 32);
92*4c835a60SStefan Roese 	     nc_addr += (DRAM_BUTTOM >> 6) + offs) {
93*4c835a60SStefan Roese 		writel(0x00007474, (void *)MT76XX_MEMCTRL_BASE + 0x64);
94*4c835a60SStefan Roese 		wmb();		/* Make sure store if finished */
95*4c835a60SStefan Roese 
96*4c835a60SStefan Roese 		c_addr = (u32 *)(nc_addr & 0xdfffffff);
97*4c835a60SStefan Roese 		cal_memset(((u8 *)c_addr), 0x1F, NUM_OF_CACHELINE * 32);
98*4c835a60SStefan Roese 		cal_patgen(nc_addr, NUM_OF_CACHELINE * 8, pat);
99*4c835a60SStefan Roese 
100*4c835a60SStefan Roese 		if (dqs > 0)
101*4c835a60SStefan Roese 			writel(0x00000074 |
102*4c835a60SStefan Roese 			       (((k == 1) ? coarse_dqs[dqs] : test_dqs) << 12) |
103*4c835a60SStefan Roese 			       (((k == 0) ? val : test_dqs) << 8),
104*4c835a60SStefan Roese 			       (void *)MT76XX_MEMCTRL_BASE + 0x64);
105*4c835a60SStefan Roese 		else
106*4c835a60SStefan Roese 			writel(0x00007400 |
107*4c835a60SStefan Roese 			       (((k == 1) ? coarse_dqs[dqs] : test_dqs) << 4) |
108*4c835a60SStefan Roese 			       (((k == 0) ? val : test_dqs) << 0),
109*4c835a60SStefan Roese 			       (void *)MT76XX_MEMCTRL_BASE + 0x64);
110*4c835a60SStefan Roese 		wmb();		/* Make sure store if finished */
111*4c835a60SStefan Roese 
112*4c835a60SStefan Roese 		invalidate_dcache_range((u32)c_addr,
113*4c835a60SStefan Roese 					(u32)c_addr +
114*4c835a60SStefan Roese 					NUM_OF_CACHELINE * 32);
115*4c835a60SStefan Roese 		wmb();		/* Make sure store if finished */
116*4c835a60SStefan Roese 
117*4c835a60SStefan Roese 		for (i = 0; i < NUM_OF_CACHELINE * 8; i++) {
118*4c835a60SStefan Roese 			if (i % 8 == 0)
119*4c835a60SStefan Roese 				pref_op(0, &c_addr[i]);
120*4c835a60SStefan Roese 		}
121*4c835a60SStefan Roese 
122*4c835a60SStefan Roese 		for (i = 0; i < NUM_OF_CACHELINE * 8; i++) {
123*4c835a60SStefan Roese 			if (c_addr[i] != nc_addr + i + pat)
124*4c835a60SStefan Roese 				return -1;
125*4c835a60SStefan Roese 		}
126*4c835a60SStefan Roese 	}
127*4c835a60SStefan Roese 
128*4c835a60SStefan Roese 	return 0;
129*4c835a60SStefan Roese }
130*4c835a60SStefan Roese 
ddr_calibrate(void)131*4c835a60SStefan Roese void ddr_calibrate(void)
132*4c835a60SStefan Roese {
133*4c835a60SStefan Roese 	u32 min_coarse_dqs[2];
134*4c835a60SStefan Roese 	u32 max_coarse_dqs[2];
135*4c835a60SStefan Roese 	u32 min_fine_dqs[2];
136*4c835a60SStefan Roese 	u32 max_fine_dqs[2];
137*4c835a60SStefan Roese 	u32 coarse_dqs[2];
138*4c835a60SStefan Roese 	u32 fine_dqs[2];
139*4c835a60SStefan Roese 	int reg = 0, ddr_cfg2_reg;
140*4c835a60SStefan Roese 	int flag;
141*4c835a60SStefan Roese 	int i, k;
142*4c835a60SStefan Roese 	int dqs = 0;
143*4c835a60SStefan Roese 	u32 min_coarse_dqs_bnd, min_fine_dqs_bnd, coarse_dqs_dll, fine_dqs_dll;
144*4c835a60SStefan Roese 	u32 val;
145*4c835a60SStefan Roese 	u32 fdiv = 0, frac = 0;
146*4c835a60SStefan Roese 
147*4c835a60SStefan Roese 	/* Setup clock to run at full speed */
148*4c835a60SStefan Roese 	val = readl((void *)MT76XX_DYN_CFG0_REG);
149*4c835a60SStefan Roese 	fdiv = (u32)((val >> 8) & 0x0F);
150*4c835a60SStefan Roese 	if (CPU_FRAC_DIV < 1 || CPU_FRAC_DIV > 10)
151*4c835a60SStefan Roese 		frac = val & 0x0f;
152*4c835a60SStefan Roese 	else
153*4c835a60SStefan Roese 		frac = CPU_FRAC_DIV;
154*4c835a60SStefan Roese 
155*4c835a60SStefan Roese 	while (frac < fdiv) {
156*4c835a60SStefan Roese 		val = readl((void *)MT76XX_DYN_CFG0_REG);
157*4c835a60SStefan Roese 		fdiv = (val >> 8) & 0x0f;
158*4c835a60SStefan Roese 		fdiv--;
159*4c835a60SStefan Roese 		val &= ~(0x0f << 8);
160*4c835a60SStefan Roese 		val |= (fdiv << 8);
161*4c835a60SStefan Roese 		writel(val, (void *)MT76XX_DYN_CFG0_REG);
162*4c835a60SStefan Roese 		udelay(500);
163*4c835a60SStefan Roese 		val = readl((void *)MT76XX_DYN_CFG0_REG);
164*4c835a60SStefan Roese 		fdiv = (val >> 8) & 0x0f;
165*4c835a60SStefan Roese 	}
166*4c835a60SStefan Roese 
167*4c835a60SStefan Roese 	clrbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x10, BIT(4));
168*4c835a60SStefan Roese 	ddr_cfg2_reg = readl((void *)MT76XX_MEMCTRL_BASE + 0x48);
169*4c835a60SStefan Roese 	clrbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x48,
170*4c835a60SStefan Roese 		     (0x3 << 28) | (0x3 << 26));
171*4c835a60SStefan Roese 
172*4c835a60SStefan Roese 	min_coarse_dqs[0] = MIN_START;
173*4c835a60SStefan Roese 	min_coarse_dqs[1] = MIN_START;
174*4c835a60SStefan Roese 	min_fine_dqs[0] = MIN_FINE_START;
175*4c835a60SStefan Roese 	min_fine_dqs[1] = MIN_FINE_START;
176*4c835a60SStefan Roese 	max_coarse_dqs[0] = MAX_START;
177*4c835a60SStefan Roese 	max_coarse_dqs[1] = MAX_START;
178*4c835a60SStefan Roese 	max_fine_dqs[0] = MAX_FINE_START;
179*4c835a60SStefan Roese 	max_fine_dqs[1] = MAX_FINE_START;
180*4c835a60SStefan Roese 	dqs = 0;
181*4c835a60SStefan Roese 
182*4c835a60SStefan Roese 	/* Add by KP, DQS MIN boundary */
183*4c835a60SStefan Roese 	reg = readl((void *)MT76XX_MEMCTRL_BASE + 0x20);
184*4c835a60SStefan Roese 	coarse_dqs_dll = (reg & 0xf00) >> 8;
185*4c835a60SStefan Roese 	fine_dqs_dll = (reg & 0xf0) >> 4;
186*4c835a60SStefan Roese 	if (coarse_dqs_dll <= 8)
187*4c835a60SStefan Roese 		min_coarse_dqs_bnd = 8 - coarse_dqs_dll;
188*4c835a60SStefan Roese 	else
189*4c835a60SStefan Roese 		min_coarse_dqs_bnd = 0;
190*4c835a60SStefan Roese 
191*4c835a60SStefan Roese 	if (fine_dqs_dll <= 8)
192*4c835a60SStefan Roese 		min_fine_dqs_bnd = 8 - fine_dqs_dll;
193*4c835a60SStefan Roese 	else
194*4c835a60SStefan Roese 		min_fine_dqs_bnd = 0;
195*4c835a60SStefan Roese 	/* DQS MIN boundary */
196*4c835a60SStefan Roese 
197*4c835a60SStefan Roese DQS_CAL:
198*4c835a60SStefan Roese 
199*4c835a60SStefan Roese 	for (k = 0; k < 2; k++) {
200*4c835a60SStefan Roese 		u32 test_dqs;
201*4c835a60SStefan Roese 
202*4c835a60SStefan Roese 		if (k == 0)
203*4c835a60SStefan Roese 			test_dqs = MAX_START;
204*4c835a60SStefan Roese 		else
205*4c835a60SStefan Roese 			test_dqs = MAX_FINE_START;
206*4c835a60SStefan Roese 
207*4c835a60SStefan Roese 		do {
208*4c835a60SStefan Roese 			flag = test_loop(k, dqs, test_dqs, max_coarse_dqs,
209*4c835a60SStefan Roese 					 0x400, 0x3, 0xf);
210*4c835a60SStefan Roese 			if (flag == -1)
211*4c835a60SStefan Roese 				break;
212*4c835a60SStefan Roese 
213*4c835a60SStefan Roese 			test_dqs++;
214*4c835a60SStefan Roese 		} while (test_dqs <= 0xf);
215*4c835a60SStefan Roese 
216*4c835a60SStefan Roese 		if (k == 0) {
217*4c835a60SStefan Roese 			max_coarse_dqs[dqs] = test_dqs;
218*4c835a60SStefan Roese 		} else {
219*4c835a60SStefan Roese 			test_dqs--;
220*4c835a60SStefan Roese 
221*4c835a60SStefan Roese 			if (test_dqs == MAX_FINE_START - 1) {
222*4c835a60SStefan Roese 				max_coarse_dqs[dqs]--;
223*4c835a60SStefan Roese 				max_fine_dqs[dqs] = 0xf;
224*4c835a60SStefan Roese 			} else {
225*4c835a60SStefan Roese 				max_fine_dqs[dqs] = test_dqs;
226*4c835a60SStefan Roese 			}
227*4c835a60SStefan Roese 		}
228*4c835a60SStefan Roese 	}
229*4c835a60SStefan Roese 
230*4c835a60SStefan Roese 	for (k = 0; k < 2; k++) {
231*4c835a60SStefan Roese 		u32 test_dqs;
232*4c835a60SStefan Roese 
233*4c835a60SStefan Roese 		if (k == 0)
234*4c835a60SStefan Roese 			test_dqs = MIN_START;
235*4c835a60SStefan Roese 		else
236*4c835a60SStefan Roese 			test_dqs = MIN_FINE_START;
237*4c835a60SStefan Roese 
238*4c835a60SStefan Roese 		do {
239*4c835a60SStefan Roese 			flag = test_loop(k, dqs, test_dqs, min_coarse_dqs,
240*4c835a60SStefan Roese 					 0x480, 0x1, 0x0);
241*4c835a60SStefan Roese 			if (k == 0) {
242*4c835a60SStefan Roese 				if (flag == -1 ||
243*4c835a60SStefan Roese 				    test_dqs == min_coarse_dqs_bnd)
244*4c835a60SStefan Roese 					break;
245*4c835a60SStefan Roese 
246*4c835a60SStefan Roese 				test_dqs--;
247*4c835a60SStefan Roese 
248*4c835a60SStefan Roese 				if (test_dqs < min_coarse_dqs_bnd)
249*4c835a60SStefan Roese 					break;
250*4c835a60SStefan Roese 			} else {
251*4c835a60SStefan Roese 				if (flag == -1) {
252*4c835a60SStefan Roese 					test_dqs++;
253*4c835a60SStefan Roese 					break;
254*4c835a60SStefan Roese 				} else if (test_dqs == min_fine_dqs_bnd) {
255*4c835a60SStefan Roese 					break;
256*4c835a60SStefan Roese 				}
257*4c835a60SStefan Roese 
258*4c835a60SStefan Roese 				test_dqs--;
259*4c835a60SStefan Roese 
260*4c835a60SStefan Roese 				if (test_dqs < min_fine_dqs_bnd)
261*4c835a60SStefan Roese 					break;
262*4c835a60SStefan Roese 			}
263*4c835a60SStefan Roese 		} while (test_dqs >= 0);
264*4c835a60SStefan Roese 
265*4c835a60SStefan Roese 		if (k == 0) {
266*4c835a60SStefan Roese 			min_coarse_dqs[dqs] = test_dqs;
267*4c835a60SStefan Roese 		} else {
268*4c835a60SStefan Roese 			if (test_dqs == MIN_FINE_START + 1) {
269*4c835a60SStefan Roese 				min_coarse_dqs[dqs]++;
270*4c835a60SStefan Roese 				min_fine_dqs[dqs] = 0x0;
271*4c835a60SStefan Roese 			} else {
272*4c835a60SStefan Roese 				min_fine_dqs[dqs] = test_dqs;
273*4c835a60SStefan Roese 			}
274*4c835a60SStefan Roese 		}
275*4c835a60SStefan Roese 	}
276*4c835a60SStefan Roese 
277*4c835a60SStefan Roese 	if (dqs == 0) {
278*4c835a60SStefan Roese 		dqs = 1;
279*4c835a60SStefan Roese 		goto DQS_CAL;
280*4c835a60SStefan Roese 	}
281*4c835a60SStefan Roese 
282*4c835a60SStefan Roese 	for (i = 0; i < 2; i++) {
283*4c835a60SStefan Roese 		u32 temp;
284*4c835a60SStefan Roese 
285*4c835a60SStefan Roese 		coarse_dqs[i] = (max_coarse_dqs[i] + min_coarse_dqs[i]) >> 1;
286*4c835a60SStefan Roese 		temp =
287*4c835a60SStefan Roese 		    (((max_coarse_dqs[i] + min_coarse_dqs[i]) % 2) * 4) +
288*4c835a60SStefan Roese 		    ((max_fine_dqs[i] + min_fine_dqs[i]) >> 1);
289*4c835a60SStefan Roese 		if (temp >= 0x10) {
290*4c835a60SStefan Roese 			coarse_dqs[i]++;
291*4c835a60SStefan Roese 			fine_dqs[i] = (temp - 0x10) + 0x8;
292*4c835a60SStefan Roese 		} else {
293*4c835a60SStefan Roese 			fine_dqs[i] = temp;
294*4c835a60SStefan Roese 		}
295*4c835a60SStefan Roese 	}
296*4c835a60SStefan Roese 	reg = (coarse_dqs[1] << 12) | (fine_dqs[1] << 8) |
297*4c835a60SStefan Roese 		(coarse_dqs[0] << 4) | fine_dqs[0];
298*4c835a60SStefan Roese 
299*4c835a60SStefan Roese 	clrbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x10, BIT(4));
300*4c835a60SStefan Roese 	writel(reg, (void *)MT76XX_MEMCTRL_BASE + 0x64);
301*4c835a60SStefan Roese 	writel(ddr_cfg2_reg, (void *)MT76XX_MEMCTRL_BASE + 0x48);
302*4c835a60SStefan Roese 	setbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x10, BIT(4));
303*4c835a60SStefan Roese 
304*4c835a60SStefan Roese 	for (i = 0; i < 2; i++)
305*4c835a60SStefan Roese 		debug("[%02X%02X%02X%02X]", min_coarse_dqs[i],
306*4c835a60SStefan Roese 		      min_fine_dqs[i], max_coarse_dqs[i], max_fine_dqs[i]);
307*4c835a60SStefan Roese 	debug("\nDDR Calibration DQS reg = %08X\n", reg);
308*4c835a60SStefan Roese }
309