1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2018 Stefan Roese <sr@denx.de>
4  *
5  * This code is mostly based on the code extracted from this MediaTek
6  * github repository:
7  *
8  * https://github.com/MediaTek-Labs/linkit-smart-uboot.git
9  *
10  * I was not able to find a specific license or other developers
11  * copyrights here, so I can't add them here.
12  *
13  * Most functions in this file are copied from the MediaTek U-Boot
14  * repository. Without any documentation, it was impossible to really
15  * implement this differently. So its mostly a cleaned-up version of
16  * the original code, with only support for the MT7628 / MT7688 SoC.
17  */
18 
19 #include <common.h>
20 #include <linux/io.h>
21 #include <asm/cacheops.h>
22 #include <asm/io.h>
23 #include "mt76xx.h"
24 
25 #define NUM_OF_CACHELINE	128
26 #define MIN_START		6
27 #define MIN_FINE_START		0xf
28 #define MAX_START		7
29 #define MAX_FINE_START		0x0
30 
31 #define CPU_FRAC_DIV		1
32 
33 #if defined(CONFIG_ONBOARD_DDR2_SIZE_256MBIT)
34 #define DRAM_BUTTOM 0x02000000
35 #endif
36 #if defined(CONFIG_ONBOARD_DDR2_SIZE_512MBIT)
37 #define DRAM_BUTTOM 0x04000000
38 #endif
39 #if defined(CONFIG_ONBOARD_DDR2_SIZE_1024MBIT)
40 #define DRAM_BUTTOM 0x08000000
41 #endif
42 #if defined(CONFIG_ONBOARD_DDR2_SIZE_2048MBIT)
43 #define DRAM_BUTTOM 0x10000000
44 #endif
45 
46 static inline void cal_memcpy(void *src, void *dst, u32 size)
47 {
48 	u8 *psrc = (u8 *)src;
49 	u8 *pdst = (u8 *)dst;
50 	int i;
51 
52 	for (i = 0; i < size; i++, psrc++, pdst++)
53 		*pdst = *psrc;
54 }
55 
56 static inline void cal_memset(void *src, u8 pat, u32 size)
57 {
58 	u8 *psrc = (u8 *)src;
59 	int i;
60 
61 	for (i = 0; i < size; i++, psrc++)
62 		*psrc = pat;
63 }
64 
65 #define pref_op(hint, addr)						\
66 	__asm__ __volatile__(						\
67 		".set	push\n"						\
68 		".set	noreorder\n"					\
69 		"pref	%0, %1\n"					\
70 		".set	pop\n"						\
71 		:							\
72 		: "i" (hint), "R" (*(u8 *)(addr)))
73 
74 static inline void cal_patgen(u32 start_addr, u32 size, u32 bias)
75 {
76 	u32 *addr = (u32 *)start_addr;
77 	int i;
78 
79 	for (i = 0; i < size; i++)
80 		addr[i] = start_addr + i + bias;
81 }
82 
83 static inline int test_loop(int k, int dqs, u32 test_dqs, u32 *coarse_dqs,
84 			    u32 offs, u32 pat, u32 val)
85 {
86 	u32 nc_addr;
87 	u32 *c_addr;
88 	int i;
89 
90 	for (nc_addr = 0xa0000000;
91 	     nc_addr < (0xa0000000 + DRAM_BUTTOM - NUM_OF_CACHELINE * 32);
92 	     nc_addr += (DRAM_BUTTOM >> 6) + offs) {
93 		writel(0x00007474, (void *)MT76XX_MEMCTRL_BASE + 0x64);
94 		wmb();		/* Make sure store if finished */
95 
96 		c_addr = (u32 *)(nc_addr & 0xdfffffff);
97 		cal_memset(((u8 *)c_addr), 0x1F, NUM_OF_CACHELINE * 32);
98 		cal_patgen(nc_addr, NUM_OF_CACHELINE * 8, pat);
99 
100 		if (dqs > 0)
101 			writel(0x00000074 |
102 			       (((k == 1) ? coarse_dqs[dqs] : test_dqs) << 12) |
103 			       (((k == 0) ? val : test_dqs) << 8),
104 			       (void *)MT76XX_MEMCTRL_BASE + 0x64);
105 		else
106 			writel(0x00007400 |
107 			       (((k == 1) ? coarse_dqs[dqs] : test_dqs) << 4) |
108 			       (((k == 0) ? val : test_dqs) << 0),
109 			       (void *)MT76XX_MEMCTRL_BASE + 0x64);
110 		wmb();		/* Make sure store if finished */
111 
112 		invalidate_dcache_range((u32)c_addr,
113 					(u32)c_addr +
114 					NUM_OF_CACHELINE * 32);
115 		wmb();		/* Make sure store if finished */
116 
117 		for (i = 0; i < NUM_OF_CACHELINE * 8; i++) {
118 			if (i % 8 == 0)
119 				pref_op(0, &c_addr[i]);
120 		}
121 
122 		for (i = 0; i < NUM_OF_CACHELINE * 8; i++) {
123 			if (c_addr[i] != nc_addr + i + pat)
124 				return -1;
125 		}
126 	}
127 
128 	return 0;
129 }
130 
131 void ddr_calibrate(void)
132 {
133 	u32 min_coarse_dqs[2];
134 	u32 max_coarse_dqs[2];
135 	u32 min_fine_dqs[2];
136 	u32 max_fine_dqs[2];
137 	u32 coarse_dqs[2];
138 	u32 fine_dqs[2];
139 	int reg = 0, ddr_cfg2_reg;
140 	int flag;
141 	int i, k;
142 	int dqs = 0;
143 	u32 min_coarse_dqs_bnd, min_fine_dqs_bnd, coarse_dqs_dll, fine_dqs_dll;
144 	u32 val;
145 	u32 fdiv = 0, frac = 0;
146 
147 	/* Setup clock to run at full speed */
148 	val = readl((void *)MT76XX_DYN_CFG0_REG);
149 	fdiv = (u32)((val >> 8) & 0x0F);
150 	if (CPU_FRAC_DIV < 1 || CPU_FRAC_DIV > 10)
151 		frac = val & 0x0f;
152 	else
153 		frac = CPU_FRAC_DIV;
154 
155 	while (frac < fdiv) {
156 		val = readl((void *)MT76XX_DYN_CFG0_REG);
157 		fdiv = (val >> 8) & 0x0f;
158 		fdiv--;
159 		val &= ~(0x0f << 8);
160 		val |= (fdiv << 8);
161 		writel(val, (void *)MT76XX_DYN_CFG0_REG);
162 		udelay(500);
163 		val = readl((void *)MT76XX_DYN_CFG0_REG);
164 		fdiv = (val >> 8) & 0x0f;
165 	}
166 
167 	clrbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x10, BIT(4));
168 	ddr_cfg2_reg = readl((void *)MT76XX_MEMCTRL_BASE + 0x48);
169 	clrbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x48,
170 		     (0x3 << 28) | (0x3 << 26));
171 
172 	min_coarse_dqs[0] = MIN_START;
173 	min_coarse_dqs[1] = MIN_START;
174 	min_fine_dqs[0] = MIN_FINE_START;
175 	min_fine_dqs[1] = MIN_FINE_START;
176 	max_coarse_dqs[0] = MAX_START;
177 	max_coarse_dqs[1] = MAX_START;
178 	max_fine_dqs[0] = MAX_FINE_START;
179 	max_fine_dqs[1] = MAX_FINE_START;
180 	dqs = 0;
181 
182 	/* Add by KP, DQS MIN boundary */
183 	reg = readl((void *)MT76XX_MEMCTRL_BASE + 0x20);
184 	coarse_dqs_dll = (reg & 0xf00) >> 8;
185 	fine_dqs_dll = (reg & 0xf0) >> 4;
186 	if (coarse_dqs_dll <= 8)
187 		min_coarse_dqs_bnd = 8 - coarse_dqs_dll;
188 	else
189 		min_coarse_dqs_bnd = 0;
190 
191 	if (fine_dqs_dll <= 8)
192 		min_fine_dqs_bnd = 8 - fine_dqs_dll;
193 	else
194 		min_fine_dqs_bnd = 0;
195 	/* DQS MIN boundary */
196 
197 DQS_CAL:
198 
199 	for (k = 0; k < 2; k++) {
200 		u32 test_dqs;
201 
202 		if (k == 0)
203 			test_dqs = MAX_START;
204 		else
205 			test_dqs = MAX_FINE_START;
206 
207 		do {
208 			flag = test_loop(k, dqs, test_dqs, max_coarse_dqs,
209 					 0x400, 0x3, 0xf);
210 			if (flag == -1)
211 				break;
212 
213 			test_dqs++;
214 		} while (test_dqs <= 0xf);
215 
216 		if (k == 0) {
217 			max_coarse_dqs[dqs] = test_dqs;
218 		} else {
219 			test_dqs--;
220 
221 			if (test_dqs == MAX_FINE_START - 1) {
222 				max_coarse_dqs[dqs]--;
223 				max_fine_dqs[dqs] = 0xf;
224 			} else {
225 				max_fine_dqs[dqs] = test_dqs;
226 			}
227 		}
228 	}
229 
230 	for (k = 0; k < 2; k++) {
231 		u32 test_dqs;
232 
233 		if (k == 0)
234 			test_dqs = MIN_START;
235 		else
236 			test_dqs = MIN_FINE_START;
237 
238 		do {
239 			flag = test_loop(k, dqs, test_dqs, min_coarse_dqs,
240 					 0x480, 0x1, 0x0);
241 			if (k == 0) {
242 				if (flag == -1 ||
243 				    test_dqs == min_coarse_dqs_bnd)
244 					break;
245 
246 				test_dqs--;
247 
248 				if (test_dqs < min_coarse_dqs_bnd)
249 					break;
250 			} else {
251 				if (flag == -1) {
252 					test_dqs++;
253 					break;
254 				} else if (test_dqs == min_fine_dqs_bnd) {
255 					break;
256 				}
257 
258 				test_dqs--;
259 
260 				if (test_dqs < min_fine_dqs_bnd)
261 					break;
262 			}
263 		} while (test_dqs >= 0);
264 
265 		if (k == 0) {
266 			min_coarse_dqs[dqs] = test_dqs;
267 		} else {
268 			if (test_dqs == MIN_FINE_START + 1) {
269 				min_coarse_dqs[dqs]++;
270 				min_fine_dqs[dqs] = 0x0;
271 			} else {
272 				min_fine_dqs[dqs] = test_dqs;
273 			}
274 		}
275 	}
276 
277 	if (dqs == 0) {
278 		dqs = 1;
279 		goto DQS_CAL;
280 	}
281 
282 	for (i = 0; i < 2; i++) {
283 		u32 temp;
284 
285 		coarse_dqs[i] = (max_coarse_dqs[i] + min_coarse_dqs[i]) >> 1;
286 		temp =
287 		    (((max_coarse_dqs[i] + min_coarse_dqs[i]) % 2) * 4) +
288 		    ((max_fine_dqs[i] + min_fine_dqs[i]) >> 1);
289 		if (temp >= 0x10) {
290 			coarse_dqs[i]++;
291 			fine_dqs[i] = (temp - 0x10) + 0x8;
292 		} else {
293 			fine_dqs[i] = temp;
294 		}
295 	}
296 	reg = (coarse_dqs[1] << 12) | (fine_dqs[1] << 8) |
297 		(coarse_dqs[0] << 4) | fine_dqs[0];
298 
299 	clrbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x10, BIT(4));
300 	writel(reg, (void *)MT76XX_MEMCTRL_BASE + 0x64);
301 	writel(ddr_cfg2_reg, (void *)MT76XX_MEMCTRL_BASE + 0x48);
302 	setbits_le32((void *)MT76XX_MEMCTRL_BASE + 0x10, BIT(4));
303 
304 	for (i = 0; i < 2; i++)
305 		debug("[%02X%02X%02X%02X]", min_coarse_dqs[i],
306 		      min_fine_dqs[i], max_coarse_dqs[i], max_fine_dqs[i]);
307 	debug("\nDDR Calibration DQS reg = %08X\n", reg);
308 }
309