xref: /openbmc/u-boot/arch/x86/cpu/quark/mrc_util.c (revision fea7f3aa)
1 /*
2  * Copyright (C) 2013, Intel Corporation
3  * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com>
4  *
5  * Ported from Intel released Quark UEFI BIOS
6  * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei
7  *
8  * SPDX-License-Identifier:	Intel
9  */
10 
11 #include <common.h>
12 #include <asm/arch/device.h>
13 #include <asm/arch/mrc.h>
14 #include <asm/arch/msg_port.h>
15 #include "mrc_util.h"
16 #include "hte.h"
17 #include "smc.h"
18 
19 static const uint8_t vref_codes[64] = {
20 	/* lowest to highest */
21 	0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38,
22 	0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30,
23 	0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28,
24 	0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20,
25 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
26 	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
27 	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
28 	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
29 };
30 
31 void mrc_write_mask(u32 unit, u32 addr, u32 data, u32 mask)
32 {
33 	msg_port_write(unit, addr,
34 		       (msg_port_read(unit, addr) & ~(mask)) |
35 		       ((data) & (mask)));
36 }
37 
38 void mrc_alt_write_mask(u32 unit, u32 addr, u32 data, u32 mask)
39 {
40 	msg_port_alt_write(unit, addr,
41 			   (msg_port_alt_read(unit, addr) & ~(mask)) |
42 			   ((data) & (mask)));
43 }
44 
45 void mrc_post_code(uint8_t major, uint8_t minor)
46 {
47 	/* send message to UART */
48 	DPF(D_INFO, "POST: 0x%01x%02x\n", major, minor);
49 
50 	/* error check */
51 	if (major == 0xee)
52 		hang();
53 }
54 
55 /* Delay number of nanoseconds */
56 void delay_n(uint32_t ns)
57 {
58 	/* 1000 MHz clock has 1ns period --> no conversion required */
59 	uint64_t final_tsc = rdtsc();
60 
61 	final_tsc += ((get_tbclk_mhz() * ns) / 1000);
62 
63 	while (rdtsc() < final_tsc)
64 		;
65 }
66 
67 /* Delay number of microseconds */
68 void delay_u(uint32_t ms)
69 {
70 	/* 64-bit math is not an option, just use loops */
71 	while (ms--)
72 		delay_n(1000);
73 }
74 
75 /* Select Memory Manager as the source for PRI interface */
76 void select_mem_mgr(void)
77 {
78 	u32 dco;
79 
80 	ENTERFN();
81 
82 	dco = msg_port_read(MEM_CTLR, DCO);
83 	dco &= ~DCO_PMICTL;
84 	msg_port_write(MEM_CTLR, DCO, dco);
85 
86 	LEAVEFN();
87 }
88 
89 /* Select HTE as the source for PRI interface */
90 void select_hte(void)
91 {
92 	u32 dco;
93 
94 	ENTERFN();
95 
96 	dco = msg_port_read(MEM_CTLR, DCO);
97 	dco |= DCO_PMICTL;
98 	msg_port_write(MEM_CTLR, DCO, dco);
99 
100 	LEAVEFN();
101 }
102 
103 /*
104  * Send DRAM command
105  * data should be formated using DCMD_Xxxx macro or emrsXCommand structure
106  */
107 void dram_init_command(uint32_t data)
108 {
109 	pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_DATA_REG, data);
110 	pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_CTRL_EXT_REG, 0);
111 	msg_port_setup(MSG_OP_DRAM_INIT, MEM_CTLR, 0);
112 
113 	DPF(D_REGWR, "WR32 %03X %08X %08X\n", MEM_CTLR, 0, data);
114 }
115 
116 /* Send DRAM wake command using special MCU side-band WAKE opcode */
117 void dram_wake_command(void)
118 {
119 	ENTERFN();
120 
121 	msg_port_setup(MSG_OP_DRAM_WAKE, MEM_CTLR, 0);
122 
123 	LEAVEFN();
124 }
125 
126 void training_message(uint8_t channel, uint8_t rank, uint8_t byte_lane)
127 {
128 	/* send message to UART */
129 	DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane);
130 }
131 
132 /*
133  * This function will program the RCVEN delays
134  *
135  * (currently doesn't comprehend rank)
136  */
137 void set_rcvn(uint8_t channel, uint8_t rank,
138 	      uint8_t byte_lane, uint32_t pi_count)
139 {
140 	uint32_t reg;
141 	uint32_t msk;
142 	uint32_t temp;
143 
144 	ENTERFN();
145 
146 	DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n",
147 	    channel, rank, byte_lane, pi_count);
148 
149 	/*
150 	 * RDPTR (1/2 MCLK, 64 PIs)
151 	 * BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
152 	 * BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
153 	 */
154 	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
155 		channel * DDRIODQ_CH_OFFSET;
156 	msk = (byte_lane & 1) ? 0xf00000 : 0xf00;
157 	temp = (byte_lane & 1) ? (pi_count / HALF_CLK) << 20 :
158 		(pi_count / HALF_CLK) << 8;
159 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
160 
161 	/* Adjust PI_COUNT */
162 	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
163 
164 	/*
165 	 * PI (1/64 MCLK, 1 PIs)
166 	 * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
167 	 * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
168 	 */
169 	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
170 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
171 		channel * DDRIODQ_CH_OFFSET);
172 	msk = 0x3f000000;
173 	temp = pi_count << 24;
174 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
175 
176 	/*
177 	 * DEADBAND
178 	 * BL0/1 -> B01DBCTL1[08/11] (+1 select)
179 	 * BL0/1 -> B01DBCTL1[02/05] (enable)
180 	 */
181 	reg = B01DBCTL1 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
182 		channel * DDRIODQ_CH_OFFSET;
183 	msk = 0x00;
184 	temp = 0x00;
185 
186 	/* enable */
187 	msk |= (byte_lane & 1) ? (1 << 5) : (1 << 2);
188 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
189 		temp |= msk;
190 
191 	/* select */
192 	msk |= (byte_lane & 1) ? (1 << 11) : (1 << 8);
193 	if (pi_count < EARLY_DB)
194 		temp |= msk;
195 
196 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
197 
198 	/* error check */
199 	if (pi_count > 0x3f) {
200 		training_message(channel, rank, byte_lane);
201 		mrc_post_code(0xee, 0xe0);
202 	}
203 
204 	LEAVEFN();
205 }
206 
207 /*
208  * This function will return the current RCVEN delay on the given
209  * channel, rank, byte_lane as an absolute PI count.
210  *
211  * (currently doesn't comprehend rank)
212  */
213 uint32_t get_rcvn(uint8_t channel, uint8_t rank, uint8_t byte_lane)
214 {
215 	uint32_t reg;
216 	uint32_t temp;
217 	uint32_t pi_count;
218 
219 	ENTERFN();
220 
221 	/*
222 	 * RDPTR (1/2 MCLK, 64 PIs)
223 	 * BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
224 	 * BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
225 	 */
226 	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
227 		channel * DDRIODQ_CH_OFFSET;
228 	temp = msg_port_alt_read(DDRPHY, reg);
229 	temp >>= (byte_lane & 1) ? 20 : 8;
230 	temp &= 0xf;
231 
232 	/* Adjust PI_COUNT */
233 	pi_count = temp * HALF_CLK;
234 
235 	/*
236 	 * PI (1/64 MCLK, 1 PIs)
237 	 * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
238 	 * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
239 	 */
240 	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
241 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
242 		channel * DDRIODQ_CH_OFFSET);
243 	temp = msg_port_alt_read(DDRPHY, reg);
244 	temp >>= 24;
245 	temp &= 0x3f;
246 
247 	/* Adjust PI_COUNT */
248 	pi_count += temp;
249 
250 	LEAVEFN();
251 
252 	return pi_count;
253 }
254 
255 /*
256  * This function will program the RDQS delays based on an absolute
257  * amount of PIs.
258  *
259  * (currently doesn't comprehend rank)
260  */
261 void set_rdqs(uint8_t channel, uint8_t rank,
262 	      uint8_t byte_lane, uint32_t pi_count)
263 {
264 	uint32_t reg;
265 	uint32_t msk;
266 	uint32_t temp;
267 
268 	ENTERFN();
269 	DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n",
270 	    channel, rank, byte_lane, pi_count);
271 
272 	/*
273 	 * PI (1/128 MCLK)
274 	 * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
275 	 * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
276 	 */
277 	reg = (byte_lane & 1) ? B1RXDQSPICODE : B0RXDQSPICODE;
278 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
279 		channel * DDRIODQ_CH_OFFSET);
280 	msk = 0x7f;
281 	temp = pi_count << 0;
282 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
283 
284 	/* error check (shouldn't go above 0x3F) */
285 	if (pi_count > 0x47) {
286 		training_message(channel, rank, byte_lane);
287 		mrc_post_code(0xee, 0xe1);
288 	}
289 
290 	LEAVEFN();
291 }
292 
293 /*
294  * This function will return the current RDQS delay on the given
295  * channel, rank, byte_lane as an absolute PI count.
296  *
297  * (currently doesn't comprehend rank)
298  */
299 uint32_t get_rdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane)
300 {
301 	uint32_t reg;
302 	uint32_t temp;
303 	uint32_t pi_count;
304 
305 	ENTERFN();
306 
307 	/*
308 	 * PI (1/128 MCLK)
309 	 * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
310 	 * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
311 	 */
312 	reg = (byte_lane & 1) ? B1RXDQSPICODE : B0RXDQSPICODE;
313 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
314 		channel * DDRIODQ_CH_OFFSET);
315 	temp = msg_port_alt_read(DDRPHY, reg);
316 
317 	/* Adjust PI_COUNT */
318 	pi_count = temp & 0x7f;
319 
320 	LEAVEFN();
321 
322 	return pi_count;
323 }
324 
325 /*
326  * This function will program the WDQS delays based on an absolute
327  * amount of PIs.
328  *
329  * (currently doesn't comprehend rank)
330  */
331 void set_wdqs(uint8_t channel, uint8_t rank,
332 	      uint8_t byte_lane, uint32_t pi_count)
333 {
334 	uint32_t reg;
335 	uint32_t msk;
336 	uint32_t temp;
337 
338 	ENTERFN();
339 
340 	DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n",
341 	    channel, rank, byte_lane, pi_count);
342 
343 	/*
344 	 * RDPTR (1/2 MCLK, 64 PIs)
345 	 * BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
346 	 * BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
347 	 */
348 	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
349 		channel * DDRIODQ_CH_OFFSET;
350 	msk = (byte_lane & 1) ? 0xf0000 : 0xf0;
351 	temp = pi_count / HALF_CLK;
352 	temp <<= (byte_lane & 1) ? 16 : 4;
353 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
354 
355 	/* Adjust PI_COUNT */
356 	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
357 
358 	/*
359 	 * PI (1/64 MCLK, 1 PIs)
360 	 * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
361 	 * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
362 	 */
363 	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
364 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
365 		channel * DDRIODQ_CH_OFFSET);
366 	msk = 0x3f0000;
367 	temp = pi_count << 16;
368 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
369 
370 	/*
371 	 * DEADBAND
372 	 * BL0/1 -> B01DBCTL1[07/10] (+1 select)
373 	 * BL0/1 -> B01DBCTL1[01/04] (enable)
374 	 */
375 	reg = B01DBCTL1 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
376 		channel * DDRIODQ_CH_OFFSET;
377 	msk = 0x00;
378 	temp = 0x00;
379 
380 	/* enable */
381 	msk |= (byte_lane & 1) ? (1 << 4) : (1 << 1);
382 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
383 		temp |= msk;
384 
385 	/* select */
386 	msk |= (byte_lane & 1) ? (1 << 10) : (1 << 7);
387 	if (pi_count < EARLY_DB)
388 		temp |= msk;
389 
390 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
391 
392 	/* error check */
393 	if (pi_count > 0x3f) {
394 		training_message(channel, rank, byte_lane);
395 		mrc_post_code(0xee, 0xe2);
396 	}
397 
398 	LEAVEFN();
399 }
400 
401 /*
402  * This function will return the amount of WDQS delay on the given
403  * channel, rank, byte_lane as an absolute PI count.
404  *
405  * (currently doesn't comprehend rank)
406  */
407 uint32_t get_wdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane)
408 {
409 	uint32_t reg;
410 	uint32_t temp;
411 	uint32_t pi_count;
412 
413 	ENTERFN();
414 
415 	/*
416 	 * RDPTR (1/2 MCLK, 64 PIs)
417 	 * BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
418 	 * BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
419 	 */
420 	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
421 		channel * DDRIODQ_CH_OFFSET;
422 	temp = msg_port_alt_read(DDRPHY, reg);
423 	temp >>= (byte_lane & 1) ? 16 : 4;
424 	temp &= 0xf;
425 
426 	/* Adjust PI_COUNT */
427 	pi_count = (temp * HALF_CLK);
428 
429 	/*
430 	 * PI (1/64 MCLK, 1 PIs)
431 	 * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
432 	 * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
433 	 */
434 	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
435 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
436 		channel * DDRIODQ_CH_OFFSET);
437 	temp = msg_port_alt_read(DDRPHY, reg);
438 	temp >>= 16;
439 	temp &= 0x3f;
440 
441 	/* Adjust PI_COUNT */
442 	pi_count += temp;
443 
444 	LEAVEFN();
445 
446 	return pi_count;
447 }
448 
449 /*
450  * This function will program the WDQ delays based on an absolute
451  * number of PIs.
452  *
453  * (currently doesn't comprehend rank)
454  */
455 void set_wdq(uint8_t channel, uint8_t rank,
456 	     uint8_t byte_lane, uint32_t pi_count)
457 {
458 	uint32_t reg;
459 	uint32_t msk;
460 	uint32_t temp;
461 
462 	ENTERFN();
463 
464 	DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n",
465 	    channel, rank, byte_lane, pi_count);
466 
467 	/*
468 	 * RDPTR (1/2 MCLK, 64 PIs)
469 	 * BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
470 	 * BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
471 	 */
472 	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
473 		channel * DDRIODQ_CH_OFFSET;
474 	msk = (byte_lane & 1) ? 0xf000 : 0xf;
475 	temp = pi_count / HALF_CLK;
476 	temp <<= (byte_lane & 1) ? 12 : 0;
477 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
478 
479 	/* Adjust PI_COUNT */
480 	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
481 
482 	/*
483 	 * PI (1/64 MCLK, 1 PIs)
484 	 * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
485 	 * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
486 	 */
487 	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
488 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
489 		channel * DDRIODQ_CH_OFFSET);
490 	msk = 0x3f00;
491 	temp = pi_count << 8;
492 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
493 
494 	/*
495 	 * DEADBAND
496 	 * BL0/1 -> B01DBCTL1[06/09] (+1 select)
497 	 * BL0/1 -> B01DBCTL1[00/03] (enable)
498 	 */
499 	reg = B01DBCTL1 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
500 		channel * DDRIODQ_CH_OFFSET;
501 	msk = 0x00;
502 	temp = 0x00;
503 
504 	/* enable */
505 	msk |= (byte_lane & 1) ? (1 << 3) : (1 << 0);
506 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
507 		temp |= msk;
508 
509 	/* select */
510 	msk |= (byte_lane & 1) ? (1 << 9) : (1 << 6);
511 	if (pi_count < EARLY_DB)
512 		temp |= msk;
513 
514 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
515 
516 	/* error check */
517 	if (pi_count > 0x3f) {
518 		training_message(channel, rank, byte_lane);
519 		mrc_post_code(0xee, 0xe3);
520 	}
521 
522 	LEAVEFN();
523 }
524 
525 /*
526  * This function will return the amount of WDQ delay on the given
527  * channel, rank, byte_lane as an absolute PI count.
528  *
529  * (currently doesn't comprehend rank)
530  */
531 uint32_t get_wdq(uint8_t channel, uint8_t rank, uint8_t byte_lane)
532 {
533 	uint32_t reg;
534 	uint32_t temp;
535 	uint32_t pi_count;
536 
537 	ENTERFN();
538 
539 	/*
540 	 * RDPTR (1/2 MCLK, 64 PIs)
541 	 * BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
542 	 * BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
543 	 */
544 	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
545 		channel * DDRIODQ_CH_OFFSET;
546 	temp = msg_port_alt_read(DDRPHY, reg);
547 	temp >>= (byte_lane & 1) ? 12 : 0;
548 	temp &= 0xf;
549 
550 	/* Adjust PI_COUNT */
551 	pi_count = temp * HALF_CLK;
552 
553 	/*
554 	 * PI (1/64 MCLK, 1 PIs)
555 	 * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
556 	 * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
557 	 */
558 	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
559 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
560 		channel * DDRIODQ_CH_OFFSET);
561 	temp = msg_port_alt_read(DDRPHY, reg);
562 	temp >>= 8;
563 	temp &= 0x3f;
564 
565 	/* Adjust PI_COUNT */
566 	pi_count += temp;
567 
568 	LEAVEFN();
569 
570 	return pi_count;
571 }
572 
573 /*
574  * This function will program the WCMD delays based on an absolute
575  * number of PIs.
576  */
577 void set_wcmd(uint8_t channel, uint32_t pi_count)
578 {
579 	uint32_t reg;
580 	uint32_t msk;
581 	uint32_t temp;
582 
583 	ENTERFN();
584 
585 	/*
586 	 * RDPTR (1/2 MCLK, 64 PIs)
587 	 * CMDPTRREG[11:08] (0x0-0xF)
588 	 */
589 	reg = CMDPTRREG + channel * DDRIOCCC_CH_OFFSET;
590 	msk = 0xf00;
591 	temp = pi_count / HALF_CLK;
592 	temp <<= 8;
593 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
594 
595 	/* Adjust PI_COUNT */
596 	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
597 
598 	/*
599 	 * PI (1/64 MCLK, 1 PIs)
600 	 * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
601 	 * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
602 	 * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
603 	 * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
604 	 * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
605 	 * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
606 	 * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
607 	 * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
608 	 */
609 	reg = CMDDLLPICODER1 + channel * DDRIOCCC_CH_OFFSET;
610 	msk = 0x3f3f3f3f;
611 	temp = (pi_count << 24) | (pi_count << 16) |
612 		(pi_count << 8) | (pi_count << 0);
613 
614 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
615 	reg = CMDDLLPICODER0 + channel * DDRIOCCC_CH_OFFSET;	/* PO */
616 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
617 
618 	/*
619 	 * DEADBAND
620 	 * CMDCFGREG0[17] (+1 select)
621 	 * CMDCFGREG0[16] (enable)
622 	 */
623 	reg = CMDCFGREG0 + channel * DDRIOCCC_CH_OFFSET;
624 	msk = 0x00;
625 	temp = 0x00;
626 
627 	/* enable */
628 	msk |= (1 << 16);
629 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
630 		temp |= msk;
631 
632 	/* select */
633 	msk |= (1 << 17);
634 	if (pi_count < EARLY_DB)
635 		temp |= msk;
636 
637 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
638 
639 	/* error check */
640 	if (pi_count > 0x3f)
641 		mrc_post_code(0xee, 0xe4);
642 
643 	LEAVEFN();
644 }
645 
646 /*
647  * This function will return the amount of WCMD delay on the given
648  * channel as an absolute PI count.
649  */
650 uint32_t get_wcmd(uint8_t channel)
651 {
652 	uint32_t reg;
653 	uint32_t temp;
654 	uint32_t pi_count;
655 
656 	ENTERFN();
657 
658 	/*
659 	 * RDPTR (1/2 MCLK, 64 PIs)
660 	 * CMDPTRREG[11:08] (0x0-0xF)
661 	 */
662 	reg = CMDPTRREG + channel * DDRIOCCC_CH_OFFSET;
663 	temp = msg_port_alt_read(DDRPHY, reg);
664 	temp >>= 8;
665 	temp &= 0xf;
666 
667 	/* Adjust PI_COUNT */
668 	pi_count = temp * HALF_CLK;
669 
670 	/*
671 	 * PI (1/64 MCLK, 1 PIs)
672 	 * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
673 	 * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
674 	 * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
675 	 * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
676 	 * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
677 	 * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
678 	 * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
679 	 * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
680 	 */
681 	reg = CMDDLLPICODER1 + channel * DDRIOCCC_CH_OFFSET;
682 	temp = msg_port_alt_read(DDRPHY, reg);
683 	temp >>= 16;
684 	temp &= 0x3f;
685 
686 	/* Adjust PI_COUNT */
687 	pi_count += temp;
688 
689 	LEAVEFN();
690 
691 	return pi_count;
692 }
693 
694 /*
695  * This function will program the WCLK delays based on an absolute
696  * number of PIs.
697  */
698 void set_wclk(uint8_t channel, uint8_t rank, uint32_t pi_count)
699 {
700 	uint32_t reg;
701 	uint32_t msk;
702 	uint32_t temp;
703 
704 	ENTERFN();
705 
706 	/*
707 	 * RDPTR (1/2 MCLK, 64 PIs)
708 	 * CCPTRREG[15:12] -> CLK1 (0x0-0xF)
709 	 * CCPTRREG[11:08] -> CLK0 (0x0-0xF)
710 	 */
711 	reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
712 	msk = 0xff00;
713 	temp = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8);
714 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
715 
716 	/* Adjust PI_COUNT */
717 	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
718 
719 	/*
720 	 * PI (1/64 MCLK, 1 PIs)
721 	 * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
722 	 * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
723 	 */
724 	reg = rank ? ECCB1DLLPICODER0 : ECCB1DLLPICODER0;
725 	reg += (channel * DDRIOCCC_CH_OFFSET);
726 	msk = 0x3f3f00;
727 	temp = (pi_count << 16) | (pi_count << 8);
728 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
729 
730 	reg = rank ? ECCB1DLLPICODER1 : ECCB1DLLPICODER1;
731 	reg += (channel * DDRIOCCC_CH_OFFSET);
732 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
733 
734 	reg = rank ? ECCB1DLLPICODER2 : ECCB1DLLPICODER2;
735 	reg += (channel * DDRIOCCC_CH_OFFSET);
736 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
737 
738 	reg = rank ? ECCB1DLLPICODER3 : ECCB1DLLPICODER3;
739 	reg += (channel * DDRIOCCC_CH_OFFSET);
740 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
741 
742 	/*
743 	 * DEADBAND
744 	 * CCCFGREG1[11:08] (+1 select)
745 	 * CCCFGREG1[03:00] (enable)
746 	 */
747 	reg = CCCFGREG1 + channel * DDRIOCCC_CH_OFFSET;
748 	msk = 0x00;
749 	temp = 0x00;
750 
751 	/* enable */
752 	msk |= 0xf;
753 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
754 		temp |= msk;
755 
756 	/* select */
757 	msk |= 0xf00;
758 	if (pi_count < EARLY_DB)
759 		temp |= msk;
760 
761 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
762 
763 	/* error check */
764 	if (pi_count > 0x3f)
765 		mrc_post_code(0xee, 0xe5);
766 
767 	LEAVEFN();
768 }
769 
770 /*
771  * This function will return the amout of WCLK delay on the given
772  * channel, rank as an absolute PI count.
773  */
774 uint32_t get_wclk(uint8_t channel, uint8_t rank)
775 {
776 	uint32_t reg;
777 	uint32_t temp;
778 	uint32_t pi_count;
779 
780 	ENTERFN();
781 
782 	/*
783 	 * RDPTR (1/2 MCLK, 64 PIs)
784 	 * CCPTRREG[15:12] -> CLK1 (0x0-0xF)
785 	 * CCPTRREG[11:08] -> CLK0 (0x0-0xF)
786 	 */
787 	reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
788 	temp = msg_port_alt_read(DDRPHY, reg);
789 	temp >>= rank ? 12 : 8;
790 	temp &= 0xf;
791 
792 	/* Adjust PI_COUNT */
793 	pi_count = temp * HALF_CLK;
794 
795 	/*
796 	 * PI (1/64 MCLK, 1 PIs)
797 	 * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
798 	 * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
799 	 */
800 	reg = rank ? ECCB1DLLPICODER0 : ECCB1DLLPICODER0;
801 	reg += (channel * DDRIOCCC_CH_OFFSET);
802 	temp = msg_port_alt_read(DDRPHY, reg);
803 	temp >>= rank ? 16 : 8;
804 	temp &= 0x3f;
805 
806 	pi_count += temp;
807 
808 	LEAVEFN();
809 
810 	return pi_count;
811 }
812 
813 /*
814  * This function will program the WCTL delays based on an absolute
815  * number of PIs.
816  *
817  * (currently doesn't comprehend rank)
818  */
819 void set_wctl(uint8_t channel, uint8_t rank, uint32_t pi_count)
820 {
821 	uint32_t reg;
822 	uint32_t msk;
823 	uint32_t temp;
824 
825 	ENTERFN();
826 
827 	/*
828 	 * RDPTR (1/2 MCLK, 64 PIs)
829 	 * CCPTRREG[31:28] (0x0-0xF)
830 	 * CCPTRREG[27:24] (0x0-0xF)
831 	 */
832 	reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
833 	msk = 0xff000000;
834 	temp = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24);
835 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
836 
837 	/* Adjust PI_COUNT */
838 	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
839 
840 	/*
841 	 * PI (1/64 MCLK, 1 PIs)
842 	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
843 	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
844 	 */
845 	reg = ECCB1DLLPICODER0 + channel * DDRIOCCC_CH_OFFSET;
846 	msk = 0x3f000000;
847 	temp = (pi_count << 24);
848 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
849 
850 	reg = ECCB1DLLPICODER1 + channel * DDRIOCCC_CH_OFFSET;
851 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
852 
853 	reg = ECCB1DLLPICODER2 + channel * DDRIOCCC_CH_OFFSET;
854 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
855 
856 	reg = ECCB1DLLPICODER3 + channel * DDRIOCCC_CH_OFFSET;
857 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
858 
859 	/*
860 	 * DEADBAND
861 	 * CCCFGREG1[13:12] (+1 select)
862 	 * CCCFGREG1[05:04] (enable)
863 	 */
864 	reg = CCCFGREG1 + channel * DDRIOCCC_CH_OFFSET;
865 	msk = 0x00;
866 	temp = 0x00;
867 
868 	/* enable */
869 	msk |= 0x30;
870 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
871 		temp |= msk;
872 
873 	/* select */
874 	msk |= 0x3000;
875 	if (pi_count < EARLY_DB)
876 		temp |= msk;
877 
878 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
879 
880 	/* error check */
881 	if (pi_count > 0x3f)
882 		mrc_post_code(0xee, 0xe6);
883 
884 	LEAVEFN();
885 }
886 
887 /*
888  * This function will return the amount of WCTL delay on the given
889  * channel, rank as an absolute PI count.
890  *
891  * (currently doesn't comprehend rank)
892  */
893 uint32_t get_wctl(uint8_t channel, uint8_t rank)
894 {
895 	uint32_t reg;
896 	uint32_t temp;
897 	uint32_t pi_count;
898 
899 	ENTERFN();
900 
901 	/*
902 	 * RDPTR (1/2 MCLK, 64 PIs)
903 	 * CCPTRREG[31:28] (0x0-0xF)
904 	 * CCPTRREG[27:24] (0x0-0xF)
905 	 */
906 	reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
907 	temp = msg_port_alt_read(DDRPHY, reg);
908 	temp >>= 24;
909 	temp &= 0xf;
910 
911 	/* Adjust PI_COUNT */
912 	pi_count = temp * HALF_CLK;
913 
914 	/*
915 	 * PI (1/64 MCLK, 1 PIs)
916 	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
917 	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
918 	 */
919 	reg = ECCB1DLLPICODER0 + channel * DDRIOCCC_CH_OFFSET;
920 	temp = msg_port_alt_read(DDRPHY, reg);
921 	temp >>= 24;
922 	temp &= 0x3f;
923 
924 	/* Adjust PI_COUNT */
925 	pi_count += temp;
926 
927 	LEAVEFN();
928 
929 	return pi_count;
930 }
931 
932 /*
933  * This function will program the internal Vref setting in a given
934  * byte lane in a given channel.
935  */
936 void set_vref(uint8_t channel, uint8_t byte_lane, uint32_t setting)
937 {
938 	uint32_t reg = (byte_lane & 0x1) ? B1VREFCTL : B0VREFCTL;
939 
940 	ENTERFN();
941 
942 	DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n",
943 	    channel, byte_lane, setting);
944 
945 	mrc_alt_write_mask(DDRPHY, reg + channel * DDRIODQ_CH_OFFSET +
946 		(byte_lane >> 1) * DDRIODQ_BL_OFFSET,
947 		vref_codes[setting] << 2, 0xfc);
948 
949 	/*
950 	 * need to wait ~300ns for Vref to settle
951 	 * (check that this is necessary)
952 	 */
953 	delay_n(300);
954 
955 	/* ??? may need to clear pointers ??? */
956 
957 	LEAVEFN();
958 }
959 
960 /*
961  * This function will return the internal Vref setting for the given
962  * channel, byte_lane.
963  */
964 uint32_t get_vref(uint8_t channel, uint8_t byte_lane)
965 {
966 	uint8_t j;
967 	uint32_t ret_val = sizeof(vref_codes) / 2;
968 	uint32_t reg = (byte_lane & 0x1) ? B1VREFCTL : B0VREFCTL;
969 	uint32_t temp;
970 
971 	ENTERFN();
972 
973 	temp = msg_port_alt_read(DDRPHY, reg + channel * DDRIODQ_CH_OFFSET +
974 		(byte_lane >> 1) * DDRIODQ_BL_OFFSET);
975 	temp >>= 2;
976 	temp &= 0x3f;
977 
978 	for (j = 0; j < sizeof(vref_codes); j++) {
979 		if (vref_codes[j] == temp) {
980 			ret_val = j;
981 			break;
982 		}
983 	}
984 
985 	LEAVEFN();
986 
987 	return ret_val;
988 }
989 
990 /*
991  * This function will return a 32-bit address in the desired
992  * channel and rank.
993  */
994 uint32_t get_addr(uint8_t channel, uint8_t rank)
995 {
996 	uint32_t offset = 32 * 1024 * 1024;	/* 32MB */
997 
998 	/* Begin product specific code */
999 	if (channel > 0) {
1000 		DPF(D_ERROR, "ILLEGAL CHANNEL\n");
1001 		DEAD_LOOP();
1002 	}
1003 
1004 	if (rank > 1) {
1005 		DPF(D_ERROR, "ILLEGAL RANK\n");
1006 		DEAD_LOOP();
1007 	}
1008 
1009 	/* use 256MB lowest density as per DRP == 0x0003 */
1010 	offset += rank * (256 * 1024 * 1024);
1011 
1012 	return offset;
1013 }
1014 
1015 /*
1016  * This function will sample the DQTRAINSTS registers in the given
1017  * channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'.
1018  *
1019  * It will return an encoded 32-bit date in which each bit corresponds to
1020  * the sampled value on the byte lane.
1021  */
1022 uint32_t sample_dqs(struct mrc_params *mrc_params, uint8_t channel,
1023 		    uint8_t rank, bool rcvn)
1024 {
1025 	uint8_t j;	/* just a counter */
1026 	uint8_t bl;	/* which BL in the module (always 2 per module) */
1027 	uint8_t bl_grp;	/* which BL module */
1028 	/* byte lane divisor */
1029 	uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1030 	uint32_t msk[2];	/* BLx in module */
1031 	/* DQTRAINSTS register contents for each sample */
1032 	uint32_t sampled_val[SAMPLE_SIZE];
1033 	uint32_t num_0s;	/* tracks the number of '0' samples */
1034 	uint32_t num_1s;	/* tracks the number of '1' samples */
1035 	uint32_t ret_val = 0x00;	/* assume all '0' samples */
1036 	uint32_t address = get_addr(channel, rank);
1037 
1038 	/* initialise msk[] */
1039 	msk[0] = rcvn ? (1 << 1) : (1 << 9);	/* BL0 */
1040 	msk[1] = rcvn ? (1 << 0) : (1 << 8);	/* BL1 */
1041 
1042 	/* cycle through each byte lane group */
1043 	for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++) {
1044 		/* take SAMPLE_SIZE samples */
1045 		for (j = 0; j < SAMPLE_SIZE; j++) {
1046 			hte_mem_op(address, mrc_params->first_run,
1047 				   rcvn ? 0 : 1);
1048 			mrc_params->first_run = 0;
1049 
1050 			/*
1051 			 * record the contents of the proper
1052 			 * DQTRAINSTS register
1053 			 */
1054 			sampled_val[j] = msg_port_alt_read(DDRPHY,
1055 				DQTRAINSTS +
1056 				bl_grp * DDRIODQ_BL_OFFSET +
1057 				channel * DDRIODQ_CH_OFFSET);
1058 		}
1059 
1060 		/*
1061 		 * look for a majority value (SAMPLE_SIZE / 2) + 1
1062 		 * on the byte lane and set that value in the corresponding
1063 		 * ret_val bit
1064 		 */
1065 		for (bl = 0; bl < 2; bl++) {
1066 			num_0s = 0x00;	/* reset '0' tracker for byte lane */
1067 			num_1s = 0x00;	/* reset '1' tracker for byte lane */
1068 			for (j = 0; j < SAMPLE_SIZE; j++) {
1069 				if (sampled_val[j] & msk[bl])
1070 					num_1s++;
1071 				else
1072 					num_0s++;
1073 			}
1074 		if (num_1s > num_0s)
1075 			ret_val |= (1 << (bl + bl_grp * 2));
1076 		}
1077 	}
1078 
1079 	/*
1080 	 * "ret_val.0" contains the status of BL0
1081 	 * "ret_val.1" contains the status of BL1
1082 	 * "ret_val.2" contains the status of BL2
1083 	 * etc.
1084 	 */
1085 	return ret_val;
1086 }
1087 
1088 /* This function will find the rising edge transition on RCVN or WDQS */
1089 void find_rising_edge(struct mrc_params *mrc_params, uint32_t delay[],
1090 		      uint8_t channel, uint8_t rank, bool rcvn)
1091 {
1092 	bool all_edges_found;	/* determines stop condition */
1093 	bool direction[NUM_BYTE_LANES];	/* direction indicator */
1094 	uint8_t sample;	/* sample counter */
1095 	uint8_t bl;	/* byte lane counter */
1096 	/* byte lane divisor */
1097 	uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1098 	uint32_t sample_result[SAMPLE_CNT];	/* results of sample_dqs() */
1099 	uint32_t temp;
1100 	uint32_t transition_pattern;
1101 
1102 	ENTERFN();
1103 
1104 	/* select hte and request initial configuration */
1105 	select_hte();
1106 	mrc_params->first_run = 1;
1107 
1108 	/* Take 3 sample points (T1,T2,T3) to obtain a transition pattern */
1109 	for (sample = 0; sample < SAMPLE_CNT; sample++) {
1110 		/* program the desired delays for sample */
1111 		for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) {
1112 			/* increase sample delay by 26 PI (0.2 CLK) */
1113 			if (rcvn) {
1114 				set_rcvn(channel, rank, bl,
1115 					 delay[bl] + sample * SAMPLE_DLY);
1116 			} else {
1117 				set_wdqs(channel, rank, bl,
1118 					 delay[bl] + sample * SAMPLE_DLY);
1119 			}
1120 		}
1121 
1122 		/* take samples (Tsample_i) */
1123 		sample_result[sample] = sample_dqs(mrc_params,
1124 			channel, rank, rcvn);
1125 
1126 		DPF(D_TRN,
1127 		    "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n",
1128 		    rcvn ? "RCVN" : "WDQS", channel, rank, sample,
1129 		    sample * SAMPLE_DLY, sample_result[sample]);
1130 	}
1131 
1132 	/*
1133 	 * This pattern will help determine where we landed and ultimately
1134 	 * how to place RCVEN/WDQS.
1135 	 */
1136 	for (bl = 0; bl < NUM_BYTE_LANES / bl_divisor; bl++) {
1137 		/* build transition_pattern (MSB is 1st sample) */
1138 		transition_pattern = 0;
1139 		for (sample = 0; sample < SAMPLE_CNT; sample++) {
1140 			transition_pattern |=
1141 				((sample_result[sample] & (1 << bl)) >> bl) <<
1142 				(SAMPLE_CNT - 1 - sample);
1143 		}
1144 
1145 		DPF(D_TRN, "=== transition pattern %d\n", transition_pattern);
1146 
1147 		/*
1148 		 * set up to look for rising edge based on
1149 		 * transition_pattern
1150 		 */
1151 		switch (transition_pattern) {
1152 		case 0:	/* sampled 0->0->0 */
1153 			/* move forward from T3 looking for 0->1 */
1154 			delay[bl] += 2 * SAMPLE_DLY;
1155 			direction[bl] = FORWARD;
1156 			break;
1157 		case 1:	/* sampled 0->0->1 */
1158 		case 5:	/* sampled 1->0->1 (bad duty cycle) *HSD#237503* */
1159 			/* move forward from T2 looking for 0->1 */
1160 			delay[bl] += 1 * SAMPLE_DLY;
1161 			direction[bl] = FORWARD;
1162 			break;
1163 		case 2:	/* sampled 0->1->0 (bad duty cycle) *HSD#237503* */
1164 		case 3:	/* sampled 0->1->1 */
1165 			/* move forward from T1 looking for 0->1 */
1166 			delay[bl] += 0 * SAMPLE_DLY;
1167 			direction[bl] = FORWARD;
1168 			break;
1169 		case 4:	/* sampled 1->0->0 (assumes BL8, HSD#234975) */
1170 			/* move forward from T3 looking for 0->1 */
1171 			delay[bl] += 2 * SAMPLE_DLY;
1172 			direction[bl] = FORWARD;
1173 			break;
1174 		case 6:	/* sampled 1->1->0 */
1175 		case 7:	/* sampled 1->1->1 */
1176 			/* move backward from T1 looking for 1->0 */
1177 			delay[bl] += 0 * SAMPLE_DLY;
1178 			direction[bl] = BACKWARD;
1179 			break;
1180 		default:
1181 			mrc_post_code(0xee, 0xee);
1182 			break;
1183 		}
1184 
1185 		/* program delays */
1186 		if (rcvn)
1187 			set_rcvn(channel, rank, bl, delay[bl]);
1188 		else
1189 			set_wdqs(channel, rank, bl, delay[bl]);
1190 	}
1191 
1192 	/*
1193 	 * Based on the observed transition pattern on the byte lane,
1194 	 * begin looking for a rising edge with single PI granularity.
1195 	 */
1196 	do {
1197 		all_edges_found = true;	/* assume all byte lanes passed */
1198 		/* take a sample */
1199 		temp = sample_dqs(mrc_params, channel, rank, rcvn);
1200 		/* check all each byte lane for proper edge */
1201 		for (bl = 0; bl < NUM_BYTE_LANES / bl_divisor; bl++) {
1202 			if (temp & (1 << bl)) {
1203 				/* sampled "1" */
1204 				if (direction[bl] == BACKWARD) {
1205 					/*
1206 					 * keep looking for edge
1207 					 * on this byte lane
1208 					 */
1209 					all_edges_found = false;
1210 					delay[bl] -= 1;
1211 					if (rcvn) {
1212 						set_rcvn(channel, rank,
1213 							 bl, delay[bl]);
1214 					} else {
1215 						set_wdqs(channel, rank,
1216 							 bl, delay[bl]);
1217 					}
1218 				}
1219 			} else {
1220 				/* sampled "0" */
1221 				if (direction[bl] == FORWARD) {
1222 					/*
1223 					 * keep looking for edge
1224 					 * on this byte lane
1225 					 */
1226 					all_edges_found = false;
1227 					delay[bl] += 1;
1228 					if (rcvn) {
1229 						set_rcvn(channel, rank,
1230 							 bl, delay[bl]);
1231 					} else {
1232 						set_wdqs(channel, rank,
1233 							 bl, delay[bl]);
1234 					}
1235 				}
1236 			}
1237 		}
1238 	} while (!all_edges_found);
1239 
1240 	/* restore DDR idle state */
1241 	dram_init_command(DCMD_PREA(rank));
1242 
1243 	DPF(D_TRN, "Delay %03X %03X %03X %03X\n",
1244 	    delay[0], delay[1], delay[2], delay[3]);
1245 
1246 	LEAVEFN();
1247 }
1248 
1249 /*
1250  * This function will return a 32 bit mask that will be used to
1251  * check for byte lane failures.
1252  */
1253 uint32_t byte_lane_mask(struct mrc_params *mrc_params)
1254 {
1255 	uint32_t j;
1256 	uint32_t ret_val = 0x00;
1257 
1258 	/*
1259 	 * set ret_val based on NUM_BYTE_LANES such that you will check
1260 	 * only BL0 in result
1261 	 *
1262 	 * (each bit in result represents a byte lane)
1263 	 */
1264 	for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES)
1265 		ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES));
1266 
1267 	/*
1268 	 * HSD#235037
1269 	 * need to adjust the mask for 16-bit mode
1270 	 */
1271 	if (mrc_params->channel_width == X16)
1272 		ret_val |= (ret_val << 2);
1273 
1274 	return ret_val;
1275 }
1276 
1277 /*
1278  * Check memory executing simple write/read/verify at the specified address.
1279  *
1280  * Bits in the result indicate failure on specific byte lane.
1281  */
1282 uint32_t check_rw_coarse(struct mrc_params *mrc_params, uint32_t address)
1283 {
1284 	uint32_t result = 0;
1285 	uint8_t first_run = 0;
1286 
1287 	if (mrc_params->hte_setup) {
1288 		mrc_params->hte_setup = 0;
1289 		first_run = 1;
1290 		select_hte();
1291 	}
1292 
1293 	result = hte_basic_write_read(mrc_params, address, first_run,
1294 				      WRITE_TRAIN);
1295 
1296 	DPF(D_TRN, "check_rw_coarse result is %x\n", result);
1297 
1298 	return result;
1299 }
1300 
1301 /*
1302  * Check memory executing write/read/verify of many data patterns
1303  * at the specified address. Bits in the result indicate failure
1304  * on specific byte lane.
1305  */
1306 uint32_t check_bls_ex(struct mrc_params *mrc_params, uint32_t address)
1307 {
1308 	uint32_t result;
1309 	uint8_t first_run = 0;
1310 
1311 	if (mrc_params->hte_setup) {
1312 		mrc_params->hte_setup = 0;
1313 		first_run = 1;
1314 		select_hte();
1315 	}
1316 
1317 	result = hte_write_stress_bit_lanes(mrc_params, address, first_run);
1318 
1319 	DPF(D_TRN, "check_bls_ex result is %x\n", result);
1320 
1321 	return result;
1322 }
1323 
1324 /*
1325  * 32-bit LFSR with characteristic polynomial: X^32 + X^22 +X^2 + X^1
1326  *
1327  * The function takes pointer to previous 32 bit value and
1328  * modifies it to next value.
1329  */
1330 void lfsr32(uint32_t *lfsr_ptr)
1331 {
1332 	uint32_t bit;
1333 	uint32_t lfsr;
1334 	int i;
1335 
1336 	lfsr = *lfsr_ptr;
1337 
1338 	for (i = 0; i < 32; i++) {
1339 		bit = 1 ^ (lfsr & 1);
1340 		bit = bit ^ ((lfsr & 2) >> 1);
1341 		bit = bit ^ ((lfsr & 4) >> 2);
1342 		bit = bit ^ ((lfsr & 0x400000) >> 22);
1343 
1344 		lfsr = ((lfsr >> 1) | (bit << 31));
1345 	}
1346 
1347 	*lfsr_ptr = lfsr;
1348 }
1349 
1350 /* Clear the pointers in a given byte lane in a given channel */
1351 void clear_pointers(void)
1352 {
1353 	uint8_t channel;
1354 	uint8_t bl;
1355 
1356 	ENTERFN();
1357 
1358 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
1359 		for (bl = 0; bl < NUM_BYTE_LANES; bl++) {
1360 			mrc_alt_write_mask(DDRPHY,
1361 					   B01PTRCTL1 +
1362 					   channel * DDRIODQ_CH_OFFSET +
1363 					   (bl >> 1) * DDRIODQ_BL_OFFSET,
1364 					   ~(1 << 8), (1 << 8));
1365 
1366 			mrc_alt_write_mask(DDRPHY,
1367 					   B01PTRCTL1 +
1368 					   channel * DDRIODQ_CH_OFFSET +
1369 					   (bl >> 1) * DDRIODQ_BL_OFFSET,
1370 					   (1 << 8), (1 << 8));
1371 		}
1372 	}
1373 
1374 	LEAVEFN();
1375 }
1376 
1377 static void print_timings_internal(uint8_t algo, uint8_t channel, uint8_t rank,
1378 				   uint8_t bl_divisor)
1379 {
1380 	uint8_t bl;
1381 
1382 	switch (algo) {
1383 	case RCVN:
1384 		DPF(D_INFO, "\nRCVN[%02d:%02d]", channel, rank);
1385 		break;
1386 	case WDQS:
1387 		DPF(D_INFO, "\nWDQS[%02d:%02d]", channel, rank);
1388 		break;
1389 	case WDQX:
1390 		DPF(D_INFO, "\nWDQx[%02d:%02d]", channel, rank);
1391 		break;
1392 	case RDQS:
1393 		DPF(D_INFO, "\nRDQS[%02d:%02d]", channel, rank);
1394 		break;
1395 	case VREF:
1396 		DPF(D_INFO, "\nVREF[%02d:%02d]", channel, rank);
1397 		break;
1398 	case WCMD:
1399 		DPF(D_INFO, "\nWCMD[%02d:%02d]", channel, rank);
1400 		break;
1401 	case WCTL:
1402 		DPF(D_INFO, "\nWCTL[%02d:%02d]", channel, rank);
1403 		break;
1404 	case WCLK:
1405 		DPF(D_INFO, "\nWCLK[%02d:%02d]", channel, rank);
1406 		break;
1407 	default:
1408 		break;
1409 	}
1410 
1411 	for (bl = 0; bl < NUM_BYTE_LANES / bl_divisor; bl++) {
1412 		switch (algo) {
1413 		case RCVN:
1414 			DPF(D_INFO, " %03d", get_rcvn(channel, rank, bl));
1415 			break;
1416 		case WDQS:
1417 			DPF(D_INFO, " %03d", get_wdqs(channel, rank, bl));
1418 			break;
1419 		case WDQX:
1420 			DPF(D_INFO, " %03d", get_wdq(channel, rank, bl));
1421 			break;
1422 		case RDQS:
1423 			DPF(D_INFO, " %03d", get_rdqs(channel, rank, bl));
1424 			break;
1425 		case VREF:
1426 			DPF(D_INFO, " %03d", get_vref(channel, bl));
1427 			break;
1428 		case WCMD:
1429 			DPF(D_INFO, " %03d", get_wcmd(channel));
1430 			break;
1431 		case WCTL:
1432 			DPF(D_INFO, " %03d", get_wctl(channel, rank));
1433 			break;
1434 		case WCLK:
1435 			DPF(D_INFO, " %03d", get_wclk(channel, rank));
1436 			break;
1437 		default:
1438 			break;
1439 		}
1440 	}
1441 }
1442 
1443 void print_timings(struct mrc_params *mrc_params)
1444 {
1445 	uint8_t algo;
1446 	uint8_t channel;
1447 	uint8_t rank;
1448 	uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1449 
1450 	DPF(D_INFO, "\n---------------------------");
1451 	DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3");
1452 	DPF(D_INFO, "\n===========================");
1453 
1454 	for (algo = 0; algo < MAX_ALGOS; algo++) {
1455 		for (channel = 0; channel < NUM_CHANNELS; channel++) {
1456 			if (mrc_params->channel_enables & (1 << channel)) {
1457 				for (rank = 0; rank < NUM_RANKS; rank++) {
1458 					if (mrc_params->rank_enables &
1459 						(1 << rank)) {
1460 						print_timings_internal(algo,
1461 							channel, rank,
1462 							bl_divisor);
1463 					}
1464 				}
1465 			}
1466 		}
1467 	}
1468 
1469 	DPF(D_INFO, "\n---------------------------");
1470 	DPF(D_INFO, "\n");
1471 }
1472