xref: /openbmc/u-boot/arch/x86/cpu/quark/mrc_util.c (revision 1cc95f6e1b38e96dfbb5ffb9aec211b1d0a88135)
1 /*
2  * Copyright (C) 2013, Intel Corporation
3  * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com>
4  *
5  * Ported from Intel released Quark UEFI BIOS
6  * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei
7  *
8  * SPDX-License-Identifier:	Intel
9  */
10 
11 #include <common.h>
12 #include <asm/arch/device.h>
13 #include <asm/arch/mrc.h>
14 #include <asm/arch/msg_port.h>
15 #include <asm/arch/quark.h>
16 #include "mrc_util.h"
17 #include "hte.h"
18 #include "smc.h"
19 
20 static const uint8_t vref_codes[64] = {
21 	/* lowest to highest */
22 	0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38,
23 	0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30,
24 	0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28,
25 	0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20,
26 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
27 	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
28 	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
29 	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
30 };
31 
32 void mrc_write_mask(u32 unit, u32 addr, u32 data, u32 mask)
33 {
34 	msg_port_write(unit, addr,
35 		       (msg_port_read(unit, addr) & ~(mask)) |
36 		       ((data) & (mask)));
37 }
38 
39 void mrc_alt_write_mask(u32 unit, u32 addr, u32 data, u32 mask)
40 {
41 	msg_port_alt_write(unit, addr,
42 			   (msg_port_alt_read(unit, addr) & ~(mask)) |
43 			   ((data) & (mask)));
44 }
45 
46 void mrc_post_code(uint8_t major, uint8_t minor)
47 {
48 	/* send message to UART */
49 	DPF(D_INFO, "POST: 0x%01x%02x\n", major, minor);
50 
51 	/* error check */
52 	if (major == 0xee)
53 		hang();
54 }
55 
56 /* Delay number of nanoseconds */
57 void delay_n(uint32_t ns)
58 {
59 	/* 1000 MHz clock has 1ns period --> no conversion required */
60 	uint64_t final_tsc = rdtsc();
61 
62 	final_tsc += ((get_tbclk_mhz() * ns) / 1000);
63 
64 	while (rdtsc() < final_tsc)
65 		;
66 }
67 
68 /* Delay number of microseconds */
69 void delay_u(uint32_t ms)
70 {
71 	/* 64-bit math is not an option, just use loops */
72 	while (ms--)
73 		delay_n(1000);
74 }
75 
76 /* Select Memory Manager as the source for PRI interface */
77 void select_mem_mgr(void)
78 {
79 	u32 dco;
80 
81 	ENTERFN();
82 
83 	dco = msg_port_read(MEM_CTLR, DCO);
84 	dco &= ~DCO_PMICTL;
85 	msg_port_write(MEM_CTLR, DCO, dco);
86 
87 	LEAVEFN();
88 }
89 
90 /* Select HTE as the source for PRI interface */
91 void select_hte(void)
92 {
93 	u32 dco;
94 
95 	ENTERFN();
96 
97 	dco = msg_port_read(MEM_CTLR, DCO);
98 	dco |= DCO_PMICTL;
99 	msg_port_write(MEM_CTLR, DCO, dco);
100 
101 	LEAVEFN();
102 }
103 
104 /*
105  * Send DRAM command
106  * data should be formated using DCMD_Xxxx macro or emrsXCommand structure
107  */
108 void dram_init_command(uint32_t data)
109 {
110 	qrk_pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_DATA_REG, data);
111 	qrk_pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_CTRL_EXT_REG, 0);
112 	msg_port_setup(MSG_OP_DRAM_INIT, MEM_CTLR, 0);
113 
114 	DPF(D_REGWR, "WR32 %03X %08X %08X\n", MEM_CTLR, 0, data);
115 }
116 
117 /* Send DRAM wake command using special MCU side-band WAKE opcode */
118 void dram_wake_command(void)
119 {
120 	ENTERFN();
121 
122 	msg_port_setup(MSG_OP_DRAM_WAKE, MEM_CTLR, 0);
123 
124 	LEAVEFN();
125 }
126 
127 void training_message(uint8_t channel, uint8_t rank, uint8_t byte_lane)
128 {
129 	/* send message to UART */
130 	DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane);
131 }
132 
133 /*
134  * This function will program the RCVEN delays
135  *
136  * (currently doesn't comprehend rank)
137  */
138 void set_rcvn(uint8_t channel, uint8_t rank,
139 	      uint8_t byte_lane, uint32_t pi_count)
140 {
141 	uint32_t reg;
142 	uint32_t msk;
143 	uint32_t temp;
144 
145 	ENTERFN();
146 
147 	DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n",
148 	    channel, rank, byte_lane, pi_count);
149 
150 	/*
151 	 * RDPTR (1/2 MCLK, 64 PIs)
152 	 * BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
153 	 * BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
154 	 */
155 	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
156 		channel * DDRIODQ_CH_OFFSET;
157 	msk = (byte_lane & 1) ? 0xf00000 : 0xf00;
158 	temp = (byte_lane & 1) ? (pi_count / HALF_CLK) << 20 :
159 		(pi_count / HALF_CLK) << 8;
160 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
161 
162 	/* Adjust PI_COUNT */
163 	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
164 
165 	/*
166 	 * PI (1/64 MCLK, 1 PIs)
167 	 * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
168 	 * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
169 	 */
170 	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
171 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
172 		channel * DDRIODQ_CH_OFFSET);
173 	msk = 0x3f000000;
174 	temp = pi_count << 24;
175 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
176 
177 	/*
178 	 * DEADBAND
179 	 * BL0/1 -> B01DBCTL1[08/11] (+1 select)
180 	 * BL0/1 -> B01DBCTL1[02/05] (enable)
181 	 */
182 	reg = B01DBCTL1 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
183 		channel * DDRIODQ_CH_OFFSET;
184 	msk = 0x00;
185 	temp = 0x00;
186 
187 	/* enable */
188 	msk |= (byte_lane & 1) ? (1 << 5) : (1 << 2);
189 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
190 		temp |= msk;
191 
192 	/* select */
193 	msk |= (byte_lane & 1) ? (1 << 11) : (1 << 8);
194 	if (pi_count < EARLY_DB)
195 		temp |= msk;
196 
197 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
198 
199 	/* error check */
200 	if (pi_count > 0x3f) {
201 		training_message(channel, rank, byte_lane);
202 		mrc_post_code(0xee, 0xe0);
203 	}
204 
205 	LEAVEFN();
206 }
207 
208 /*
209  * This function will return the current RCVEN delay on the given
210  * channel, rank, byte_lane as an absolute PI count.
211  *
212  * (currently doesn't comprehend rank)
213  */
214 uint32_t get_rcvn(uint8_t channel, uint8_t rank, uint8_t byte_lane)
215 {
216 	uint32_t reg;
217 	uint32_t temp;
218 	uint32_t pi_count;
219 
220 	ENTERFN();
221 
222 	/*
223 	 * RDPTR (1/2 MCLK, 64 PIs)
224 	 * BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
225 	 * BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
226 	 */
227 	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
228 		channel * DDRIODQ_CH_OFFSET;
229 	temp = msg_port_alt_read(DDRPHY, reg);
230 	temp >>= (byte_lane & 1) ? 20 : 8;
231 	temp &= 0xf;
232 
233 	/* Adjust PI_COUNT */
234 	pi_count = temp * HALF_CLK;
235 
236 	/*
237 	 * PI (1/64 MCLK, 1 PIs)
238 	 * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
239 	 * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
240 	 */
241 	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
242 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
243 		channel * DDRIODQ_CH_OFFSET);
244 	temp = msg_port_alt_read(DDRPHY, reg);
245 	temp >>= 24;
246 	temp &= 0x3f;
247 
248 	/* Adjust PI_COUNT */
249 	pi_count += temp;
250 
251 	LEAVEFN();
252 
253 	return pi_count;
254 }
255 
256 /*
257  * This function will program the RDQS delays based on an absolute
258  * amount of PIs.
259  *
260  * (currently doesn't comprehend rank)
261  */
262 void set_rdqs(uint8_t channel, uint8_t rank,
263 	      uint8_t byte_lane, uint32_t pi_count)
264 {
265 	uint32_t reg;
266 	uint32_t msk;
267 	uint32_t temp;
268 
269 	ENTERFN();
270 	DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n",
271 	    channel, rank, byte_lane, pi_count);
272 
273 	/*
274 	 * PI (1/128 MCLK)
275 	 * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
276 	 * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
277 	 */
278 	reg = (byte_lane & 1) ? B1RXDQSPICODE : B0RXDQSPICODE;
279 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
280 		channel * DDRIODQ_CH_OFFSET);
281 	msk = 0x7f;
282 	temp = pi_count << 0;
283 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
284 
285 	/* error check (shouldn't go above 0x3F) */
286 	if (pi_count > 0x47) {
287 		training_message(channel, rank, byte_lane);
288 		mrc_post_code(0xee, 0xe1);
289 	}
290 
291 	LEAVEFN();
292 }
293 
294 /*
295  * This function will return the current RDQS delay on the given
296  * channel, rank, byte_lane as an absolute PI count.
297  *
298  * (currently doesn't comprehend rank)
299  */
300 uint32_t get_rdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane)
301 {
302 	uint32_t reg;
303 	uint32_t temp;
304 	uint32_t pi_count;
305 
306 	ENTERFN();
307 
308 	/*
309 	 * PI (1/128 MCLK)
310 	 * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
311 	 * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
312 	 */
313 	reg = (byte_lane & 1) ? B1RXDQSPICODE : B0RXDQSPICODE;
314 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
315 		channel * DDRIODQ_CH_OFFSET);
316 	temp = msg_port_alt_read(DDRPHY, reg);
317 
318 	/* Adjust PI_COUNT */
319 	pi_count = temp & 0x7f;
320 
321 	LEAVEFN();
322 
323 	return pi_count;
324 }
325 
326 /*
327  * This function will program the WDQS delays based on an absolute
328  * amount of PIs.
329  *
330  * (currently doesn't comprehend rank)
331  */
332 void set_wdqs(uint8_t channel, uint8_t rank,
333 	      uint8_t byte_lane, uint32_t pi_count)
334 {
335 	uint32_t reg;
336 	uint32_t msk;
337 	uint32_t temp;
338 
339 	ENTERFN();
340 
341 	DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n",
342 	    channel, rank, byte_lane, pi_count);
343 
344 	/*
345 	 * RDPTR (1/2 MCLK, 64 PIs)
346 	 * BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
347 	 * BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
348 	 */
349 	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
350 		channel * DDRIODQ_CH_OFFSET;
351 	msk = (byte_lane & 1) ? 0xf0000 : 0xf0;
352 	temp = pi_count / HALF_CLK;
353 	temp <<= (byte_lane & 1) ? 16 : 4;
354 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
355 
356 	/* Adjust PI_COUNT */
357 	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
358 
359 	/*
360 	 * PI (1/64 MCLK, 1 PIs)
361 	 * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
362 	 * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
363 	 */
364 	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
365 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
366 		channel * DDRIODQ_CH_OFFSET);
367 	msk = 0x3f0000;
368 	temp = pi_count << 16;
369 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
370 
371 	/*
372 	 * DEADBAND
373 	 * BL0/1 -> B01DBCTL1[07/10] (+1 select)
374 	 * BL0/1 -> B01DBCTL1[01/04] (enable)
375 	 */
376 	reg = B01DBCTL1 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
377 		channel * DDRIODQ_CH_OFFSET;
378 	msk = 0x00;
379 	temp = 0x00;
380 
381 	/* enable */
382 	msk |= (byte_lane & 1) ? (1 << 4) : (1 << 1);
383 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
384 		temp |= msk;
385 
386 	/* select */
387 	msk |= (byte_lane & 1) ? (1 << 10) : (1 << 7);
388 	if (pi_count < EARLY_DB)
389 		temp |= msk;
390 
391 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
392 
393 	/* error check */
394 	if (pi_count > 0x3f) {
395 		training_message(channel, rank, byte_lane);
396 		mrc_post_code(0xee, 0xe2);
397 	}
398 
399 	LEAVEFN();
400 }
401 
402 /*
403  * This function will return the amount of WDQS delay on the given
404  * channel, rank, byte_lane as an absolute PI count.
405  *
406  * (currently doesn't comprehend rank)
407  */
408 uint32_t get_wdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane)
409 {
410 	uint32_t reg;
411 	uint32_t temp;
412 	uint32_t pi_count;
413 
414 	ENTERFN();
415 
416 	/*
417 	 * RDPTR (1/2 MCLK, 64 PIs)
418 	 * BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
419 	 * BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
420 	 */
421 	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
422 		channel * DDRIODQ_CH_OFFSET;
423 	temp = msg_port_alt_read(DDRPHY, reg);
424 	temp >>= (byte_lane & 1) ? 16 : 4;
425 	temp &= 0xf;
426 
427 	/* Adjust PI_COUNT */
428 	pi_count = (temp * HALF_CLK);
429 
430 	/*
431 	 * PI (1/64 MCLK, 1 PIs)
432 	 * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
433 	 * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
434 	 */
435 	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
436 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
437 		channel * DDRIODQ_CH_OFFSET);
438 	temp = msg_port_alt_read(DDRPHY, reg);
439 	temp >>= 16;
440 	temp &= 0x3f;
441 
442 	/* Adjust PI_COUNT */
443 	pi_count += temp;
444 
445 	LEAVEFN();
446 
447 	return pi_count;
448 }
449 
450 /*
451  * This function will program the WDQ delays based on an absolute
452  * number of PIs.
453  *
454  * (currently doesn't comprehend rank)
455  */
456 void set_wdq(uint8_t channel, uint8_t rank,
457 	     uint8_t byte_lane, uint32_t pi_count)
458 {
459 	uint32_t reg;
460 	uint32_t msk;
461 	uint32_t temp;
462 
463 	ENTERFN();
464 
465 	DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n",
466 	    channel, rank, byte_lane, pi_count);
467 
468 	/*
469 	 * RDPTR (1/2 MCLK, 64 PIs)
470 	 * BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
471 	 * BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
472 	 */
473 	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
474 		channel * DDRIODQ_CH_OFFSET;
475 	msk = (byte_lane & 1) ? 0xf000 : 0xf;
476 	temp = pi_count / HALF_CLK;
477 	temp <<= (byte_lane & 1) ? 12 : 0;
478 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
479 
480 	/* Adjust PI_COUNT */
481 	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
482 
483 	/*
484 	 * PI (1/64 MCLK, 1 PIs)
485 	 * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
486 	 * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
487 	 */
488 	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
489 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
490 		channel * DDRIODQ_CH_OFFSET);
491 	msk = 0x3f00;
492 	temp = pi_count << 8;
493 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
494 
495 	/*
496 	 * DEADBAND
497 	 * BL0/1 -> B01DBCTL1[06/09] (+1 select)
498 	 * BL0/1 -> B01DBCTL1[00/03] (enable)
499 	 */
500 	reg = B01DBCTL1 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
501 		channel * DDRIODQ_CH_OFFSET;
502 	msk = 0x00;
503 	temp = 0x00;
504 
505 	/* enable */
506 	msk |= (byte_lane & 1) ? (1 << 3) : (1 << 0);
507 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
508 		temp |= msk;
509 
510 	/* select */
511 	msk |= (byte_lane & 1) ? (1 << 9) : (1 << 6);
512 	if (pi_count < EARLY_DB)
513 		temp |= msk;
514 
515 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
516 
517 	/* error check */
518 	if (pi_count > 0x3f) {
519 		training_message(channel, rank, byte_lane);
520 		mrc_post_code(0xee, 0xe3);
521 	}
522 
523 	LEAVEFN();
524 }
525 
526 /*
527  * This function will return the amount of WDQ delay on the given
528  * channel, rank, byte_lane as an absolute PI count.
529  *
530  * (currently doesn't comprehend rank)
531  */
532 uint32_t get_wdq(uint8_t channel, uint8_t rank, uint8_t byte_lane)
533 {
534 	uint32_t reg;
535 	uint32_t temp;
536 	uint32_t pi_count;
537 
538 	ENTERFN();
539 
540 	/*
541 	 * RDPTR (1/2 MCLK, 64 PIs)
542 	 * BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
543 	 * BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
544 	 */
545 	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
546 		channel * DDRIODQ_CH_OFFSET;
547 	temp = msg_port_alt_read(DDRPHY, reg);
548 	temp >>= (byte_lane & 1) ? 12 : 0;
549 	temp &= 0xf;
550 
551 	/* Adjust PI_COUNT */
552 	pi_count = temp * HALF_CLK;
553 
554 	/*
555 	 * PI (1/64 MCLK, 1 PIs)
556 	 * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
557 	 * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
558 	 */
559 	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
560 	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
561 		channel * DDRIODQ_CH_OFFSET);
562 	temp = msg_port_alt_read(DDRPHY, reg);
563 	temp >>= 8;
564 	temp &= 0x3f;
565 
566 	/* Adjust PI_COUNT */
567 	pi_count += temp;
568 
569 	LEAVEFN();
570 
571 	return pi_count;
572 }
573 
574 /*
575  * This function will program the WCMD delays based on an absolute
576  * number of PIs.
577  */
578 void set_wcmd(uint8_t channel, uint32_t pi_count)
579 {
580 	uint32_t reg;
581 	uint32_t msk;
582 	uint32_t temp;
583 
584 	ENTERFN();
585 
586 	/*
587 	 * RDPTR (1/2 MCLK, 64 PIs)
588 	 * CMDPTRREG[11:08] (0x0-0xF)
589 	 */
590 	reg = CMDPTRREG + channel * DDRIOCCC_CH_OFFSET;
591 	msk = 0xf00;
592 	temp = pi_count / HALF_CLK;
593 	temp <<= 8;
594 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
595 
596 	/* Adjust PI_COUNT */
597 	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
598 
599 	/*
600 	 * PI (1/64 MCLK, 1 PIs)
601 	 * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
602 	 * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
603 	 * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
604 	 * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
605 	 * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
606 	 * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
607 	 * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
608 	 * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
609 	 */
610 	reg = CMDDLLPICODER1 + channel * DDRIOCCC_CH_OFFSET;
611 	msk = 0x3f3f3f3f;
612 	temp = (pi_count << 24) | (pi_count << 16) |
613 		(pi_count << 8) | (pi_count << 0);
614 
615 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
616 	reg = CMDDLLPICODER0 + channel * DDRIOCCC_CH_OFFSET;	/* PO */
617 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
618 
619 	/*
620 	 * DEADBAND
621 	 * CMDCFGREG0[17] (+1 select)
622 	 * CMDCFGREG0[16] (enable)
623 	 */
624 	reg = CMDCFGREG0 + channel * DDRIOCCC_CH_OFFSET;
625 	msk = 0x00;
626 	temp = 0x00;
627 
628 	/* enable */
629 	msk |= (1 << 16);
630 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
631 		temp |= msk;
632 
633 	/* select */
634 	msk |= (1 << 17);
635 	if (pi_count < EARLY_DB)
636 		temp |= msk;
637 
638 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
639 
640 	/* error check */
641 	if (pi_count > 0x3f)
642 		mrc_post_code(0xee, 0xe4);
643 
644 	LEAVEFN();
645 }
646 
647 /*
648  * This function will return the amount of WCMD delay on the given
649  * channel as an absolute PI count.
650  */
651 uint32_t get_wcmd(uint8_t channel)
652 {
653 	uint32_t reg;
654 	uint32_t temp;
655 	uint32_t pi_count;
656 
657 	ENTERFN();
658 
659 	/*
660 	 * RDPTR (1/2 MCLK, 64 PIs)
661 	 * CMDPTRREG[11:08] (0x0-0xF)
662 	 */
663 	reg = CMDPTRREG + channel * DDRIOCCC_CH_OFFSET;
664 	temp = msg_port_alt_read(DDRPHY, reg);
665 	temp >>= 8;
666 	temp &= 0xf;
667 
668 	/* Adjust PI_COUNT */
669 	pi_count = temp * HALF_CLK;
670 
671 	/*
672 	 * PI (1/64 MCLK, 1 PIs)
673 	 * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
674 	 * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
675 	 * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
676 	 * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
677 	 * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
678 	 * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
679 	 * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
680 	 * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
681 	 */
682 	reg = CMDDLLPICODER1 + channel * DDRIOCCC_CH_OFFSET;
683 	temp = msg_port_alt_read(DDRPHY, reg);
684 	temp >>= 16;
685 	temp &= 0x3f;
686 
687 	/* Adjust PI_COUNT */
688 	pi_count += temp;
689 
690 	LEAVEFN();
691 
692 	return pi_count;
693 }
694 
695 /*
696  * This function will program the WCLK delays based on an absolute
697  * number of PIs.
698  */
699 void set_wclk(uint8_t channel, uint8_t rank, uint32_t pi_count)
700 {
701 	uint32_t reg;
702 	uint32_t msk;
703 	uint32_t temp;
704 
705 	ENTERFN();
706 
707 	/*
708 	 * RDPTR (1/2 MCLK, 64 PIs)
709 	 * CCPTRREG[15:12] -> CLK1 (0x0-0xF)
710 	 * CCPTRREG[11:08] -> CLK0 (0x0-0xF)
711 	 */
712 	reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
713 	msk = 0xff00;
714 	temp = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8);
715 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
716 
717 	/* Adjust PI_COUNT */
718 	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
719 
720 	/*
721 	 * PI (1/64 MCLK, 1 PIs)
722 	 * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
723 	 * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
724 	 */
725 	reg = rank ? ECCB1DLLPICODER0 : ECCB1DLLPICODER0;
726 	reg += (channel * DDRIOCCC_CH_OFFSET);
727 	msk = 0x3f3f00;
728 	temp = (pi_count << 16) | (pi_count << 8);
729 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
730 
731 	reg = rank ? ECCB1DLLPICODER1 : ECCB1DLLPICODER1;
732 	reg += (channel * DDRIOCCC_CH_OFFSET);
733 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
734 
735 	reg = rank ? ECCB1DLLPICODER2 : ECCB1DLLPICODER2;
736 	reg += (channel * DDRIOCCC_CH_OFFSET);
737 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
738 
739 	reg = rank ? ECCB1DLLPICODER3 : ECCB1DLLPICODER3;
740 	reg += (channel * DDRIOCCC_CH_OFFSET);
741 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
742 
743 	/*
744 	 * DEADBAND
745 	 * CCCFGREG1[11:08] (+1 select)
746 	 * CCCFGREG1[03:00] (enable)
747 	 */
748 	reg = CCCFGREG1 + channel * DDRIOCCC_CH_OFFSET;
749 	msk = 0x00;
750 	temp = 0x00;
751 
752 	/* enable */
753 	msk |= 0xf;
754 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
755 		temp |= msk;
756 
757 	/* select */
758 	msk |= 0xf00;
759 	if (pi_count < EARLY_DB)
760 		temp |= msk;
761 
762 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
763 
764 	/* error check */
765 	if (pi_count > 0x3f)
766 		mrc_post_code(0xee, 0xe5);
767 
768 	LEAVEFN();
769 }
770 
771 /*
772  * This function will return the amout of WCLK delay on the given
773  * channel, rank as an absolute PI count.
774  */
775 uint32_t get_wclk(uint8_t channel, uint8_t rank)
776 {
777 	uint32_t reg;
778 	uint32_t temp;
779 	uint32_t pi_count;
780 
781 	ENTERFN();
782 
783 	/*
784 	 * RDPTR (1/2 MCLK, 64 PIs)
785 	 * CCPTRREG[15:12] -> CLK1 (0x0-0xF)
786 	 * CCPTRREG[11:08] -> CLK0 (0x0-0xF)
787 	 */
788 	reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
789 	temp = msg_port_alt_read(DDRPHY, reg);
790 	temp >>= rank ? 12 : 8;
791 	temp &= 0xf;
792 
793 	/* Adjust PI_COUNT */
794 	pi_count = temp * HALF_CLK;
795 
796 	/*
797 	 * PI (1/64 MCLK, 1 PIs)
798 	 * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
799 	 * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
800 	 */
801 	reg = rank ? ECCB1DLLPICODER0 : ECCB1DLLPICODER0;
802 	reg += (channel * DDRIOCCC_CH_OFFSET);
803 	temp = msg_port_alt_read(DDRPHY, reg);
804 	temp >>= rank ? 16 : 8;
805 	temp &= 0x3f;
806 
807 	pi_count += temp;
808 
809 	LEAVEFN();
810 
811 	return pi_count;
812 }
813 
814 /*
815  * This function will program the WCTL delays based on an absolute
816  * number of PIs.
817  *
818  * (currently doesn't comprehend rank)
819  */
820 void set_wctl(uint8_t channel, uint8_t rank, uint32_t pi_count)
821 {
822 	uint32_t reg;
823 	uint32_t msk;
824 	uint32_t temp;
825 
826 	ENTERFN();
827 
828 	/*
829 	 * RDPTR (1/2 MCLK, 64 PIs)
830 	 * CCPTRREG[31:28] (0x0-0xF)
831 	 * CCPTRREG[27:24] (0x0-0xF)
832 	 */
833 	reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
834 	msk = 0xff000000;
835 	temp = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24);
836 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
837 
838 	/* Adjust PI_COUNT */
839 	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
840 
841 	/*
842 	 * PI (1/64 MCLK, 1 PIs)
843 	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
844 	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
845 	 */
846 	reg = ECCB1DLLPICODER0 + channel * DDRIOCCC_CH_OFFSET;
847 	msk = 0x3f000000;
848 	temp = (pi_count << 24);
849 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
850 
851 	reg = ECCB1DLLPICODER1 + channel * DDRIOCCC_CH_OFFSET;
852 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
853 
854 	reg = ECCB1DLLPICODER2 + channel * DDRIOCCC_CH_OFFSET;
855 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
856 
857 	reg = ECCB1DLLPICODER3 + channel * DDRIOCCC_CH_OFFSET;
858 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
859 
860 	/*
861 	 * DEADBAND
862 	 * CCCFGREG1[13:12] (+1 select)
863 	 * CCCFGREG1[05:04] (enable)
864 	 */
865 	reg = CCCFGREG1 + channel * DDRIOCCC_CH_OFFSET;
866 	msk = 0x00;
867 	temp = 0x00;
868 
869 	/* enable */
870 	msk |= 0x30;
871 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
872 		temp |= msk;
873 
874 	/* select */
875 	msk |= 0x3000;
876 	if (pi_count < EARLY_DB)
877 		temp |= msk;
878 
879 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
880 
881 	/* error check */
882 	if (pi_count > 0x3f)
883 		mrc_post_code(0xee, 0xe6);
884 
885 	LEAVEFN();
886 }
887 
888 /*
889  * This function will return the amount of WCTL delay on the given
890  * channel, rank as an absolute PI count.
891  *
892  * (currently doesn't comprehend rank)
893  */
894 uint32_t get_wctl(uint8_t channel, uint8_t rank)
895 {
896 	uint32_t reg;
897 	uint32_t temp;
898 	uint32_t pi_count;
899 
900 	ENTERFN();
901 
902 	/*
903 	 * RDPTR (1/2 MCLK, 64 PIs)
904 	 * CCPTRREG[31:28] (0x0-0xF)
905 	 * CCPTRREG[27:24] (0x0-0xF)
906 	 */
907 	reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
908 	temp = msg_port_alt_read(DDRPHY, reg);
909 	temp >>= 24;
910 	temp &= 0xf;
911 
912 	/* Adjust PI_COUNT */
913 	pi_count = temp * HALF_CLK;
914 
915 	/*
916 	 * PI (1/64 MCLK, 1 PIs)
917 	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
918 	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
919 	 */
920 	reg = ECCB1DLLPICODER0 + channel * DDRIOCCC_CH_OFFSET;
921 	temp = msg_port_alt_read(DDRPHY, reg);
922 	temp >>= 24;
923 	temp &= 0x3f;
924 
925 	/* Adjust PI_COUNT */
926 	pi_count += temp;
927 
928 	LEAVEFN();
929 
930 	return pi_count;
931 }
932 
933 /*
934  * This function will program the internal Vref setting in a given
935  * byte lane in a given channel.
936  */
937 void set_vref(uint8_t channel, uint8_t byte_lane, uint32_t setting)
938 {
939 	uint32_t reg = (byte_lane & 0x1) ? B1VREFCTL : B0VREFCTL;
940 
941 	ENTERFN();
942 
943 	DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n",
944 	    channel, byte_lane, setting);
945 
946 	mrc_alt_write_mask(DDRPHY, reg + channel * DDRIODQ_CH_OFFSET +
947 		(byte_lane >> 1) * DDRIODQ_BL_OFFSET,
948 		vref_codes[setting] << 2, 0xfc);
949 
950 	/*
951 	 * need to wait ~300ns for Vref to settle
952 	 * (check that this is necessary)
953 	 */
954 	delay_n(300);
955 
956 	/* ??? may need to clear pointers ??? */
957 
958 	LEAVEFN();
959 }
960 
961 /*
962  * This function will return the internal Vref setting for the given
963  * channel, byte_lane.
964  */
965 uint32_t get_vref(uint8_t channel, uint8_t byte_lane)
966 {
967 	uint8_t j;
968 	uint32_t ret_val = sizeof(vref_codes) / 2;
969 	uint32_t reg = (byte_lane & 0x1) ? B1VREFCTL : B0VREFCTL;
970 	uint32_t temp;
971 
972 	ENTERFN();
973 
974 	temp = msg_port_alt_read(DDRPHY, reg + channel * DDRIODQ_CH_OFFSET +
975 		(byte_lane >> 1) * DDRIODQ_BL_OFFSET);
976 	temp >>= 2;
977 	temp &= 0x3f;
978 
979 	for (j = 0; j < sizeof(vref_codes); j++) {
980 		if (vref_codes[j] == temp) {
981 			ret_val = j;
982 			break;
983 		}
984 	}
985 
986 	LEAVEFN();
987 
988 	return ret_val;
989 }
990 
991 /*
992  * This function will return a 32-bit address in the desired
993  * channel and rank.
994  */
995 uint32_t get_addr(uint8_t channel, uint8_t rank)
996 {
997 	uint32_t offset = 32 * 1024 * 1024;	/* 32MB */
998 
999 	/* Begin product specific code */
1000 	if (channel > 0) {
1001 		DPF(D_ERROR, "ILLEGAL CHANNEL\n");
1002 		DEAD_LOOP();
1003 	}
1004 
1005 	if (rank > 1) {
1006 		DPF(D_ERROR, "ILLEGAL RANK\n");
1007 		DEAD_LOOP();
1008 	}
1009 
1010 	/* use 256MB lowest density as per DRP == 0x0003 */
1011 	offset += rank * (256 * 1024 * 1024);
1012 
1013 	return offset;
1014 }
1015 
1016 /*
1017  * This function will sample the DQTRAINSTS registers in the given
1018  * channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'.
1019  *
1020  * It will return an encoded 32-bit date in which each bit corresponds to
1021  * the sampled value on the byte lane.
1022  */
1023 uint32_t sample_dqs(struct mrc_params *mrc_params, uint8_t channel,
1024 		    uint8_t rank, bool rcvn)
1025 {
1026 	uint8_t j;	/* just a counter */
1027 	uint8_t bl;	/* which BL in the module (always 2 per module) */
1028 	uint8_t bl_grp;	/* which BL module */
1029 	/* byte lane divisor */
1030 	uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1031 	uint32_t msk[2];	/* BLx in module */
1032 	/* DQTRAINSTS register contents for each sample */
1033 	uint32_t sampled_val[SAMPLE_SIZE];
1034 	uint32_t num_0s;	/* tracks the number of '0' samples */
1035 	uint32_t num_1s;	/* tracks the number of '1' samples */
1036 	uint32_t ret_val = 0x00;	/* assume all '0' samples */
1037 	uint32_t address = get_addr(channel, rank);
1038 
1039 	/* initialise msk[] */
1040 	msk[0] = rcvn ? (1 << 1) : (1 << 9);	/* BL0 */
1041 	msk[1] = rcvn ? (1 << 0) : (1 << 8);	/* BL1 */
1042 
1043 	/* cycle through each byte lane group */
1044 	for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++) {
1045 		/* take SAMPLE_SIZE samples */
1046 		for (j = 0; j < SAMPLE_SIZE; j++) {
1047 			hte_mem_op(address, mrc_params->first_run,
1048 				   rcvn ? 0 : 1);
1049 			mrc_params->first_run = 0;
1050 
1051 			/*
1052 			 * record the contents of the proper
1053 			 * DQTRAINSTS register
1054 			 */
1055 			sampled_val[j] = msg_port_alt_read(DDRPHY,
1056 				DQTRAINSTS +
1057 				bl_grp * DDRIODQ_BL_OFFSET +
1058 				channel * DDRIODQ_CH_OFFSET);
1059 		}
1060 
1061 		/*
1062 		 * look for a majority value (SAMPLE_SIZE / 2) + 1
1063 		 * on the byte lane and set that value in the corresponding
1064 		 * ret_val bit
1065 		 */
1066 		for (bl = 0; bl < 2; bl++) {
1067 			num_0s = 0x00;	/* reset '0' tracker for byte lane */
1068 			num_1s = 0x00;	/* reset '1' tracker for byte lane */
1069 			for (j = 0; j < SAMPLE_SIZE; j++) {
1070 				if (sampled_val[j] & msk[bl])
1071 					num_1s++;
1072 				else
1073 					num_0s++;
1074 			}
1075 		if (num_1s > num_0s)
1076 			ret_val |= (1 << (bl + bl_grp * 2));
1077 		}
1078 	}
1079 
1080 	/*
1081 	 * "ret_val.0" contains the status of BL0
1082 	 * "ret_val.1" contains the status of BL1
1083 	 * "ret_val.2" contains the status of BL2
1084 	 * etc.
1085 	 */
1086 	return ret_val;
1087 }
1088 
1089 /* This function will find the rising edge transition on RCVN or WDQS */
1090 void find_rising_edge(struct mrc_params *mrc_params, uint32_t delay[],
1091 		      uint8_t channel, uint8_t rank, bool rcvn)
1092 {
1093 	bool all_edges_found;	/* determines stop condition */
1094 	bool direction[NUM_BYTE_LANES];	/* direction indicator */
1095 	uint8_t sample;	/* sample counter */
1096 	uint8_t bl;	/* byte lane counter */
1097 	/* byte lane divisor */
1098 	uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1099 	uint32_t sample_result[SAMPLE_CNT];	/* results of sample_dqs() */
1100 	uint32_t temp;
1101 	uint32_t transition_pattern;
1102 
1103 	ENTERFN();
1104 
1105 	/* select hte and request initial configuration */
1106 	select_hte();
1107 	mrc_params->first_run = 1;
1108 
1109 	/* Take 3 sample points (T1,T2,T3) to obtain a transition pattern */
1110 	for (sample = 0; sample < SAMPLE_CNT; sample++) {
1111 		/* program the desired delays for sample */
1112 		for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) {
1113 			/* increase sample delay by 26 PI (0.2 CLK) */
1114 			if (rcvn) {
1115 				set_rcvn(channel, rank, bl,
1116 					 delay[bl] + sample * SAMPLE_DLY);
1117 			} else {
1118 				set_wdqs(channel, rank, bl,
1119 					 delay[bl] + sample * SAMPLE_DLY);
1120 			}
1121 		}
1122 
1123 		/* take samples (Tsample_i) */
1124 		sample_result[sample] = sample_dqs(mrc_params,
1125 			channel, rank, rcvn);
1126 
1127 		DPF(D_TRN,
1128 		    "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n",
1129 		    rcvn ? "RCVN" : "WDQS", channel, rank, sample,
1130 		    sample * SAMPLE_DLY, sample_result[sample]);
1131 	}
1132 
1133 	/*
1134 	 * This pattern will help determine where we landed and ultimately
1135 	 * how to place RCVEN/WDQS.
1136 	 */
1137 	for (bl = 0; bl < NUM_BYTE_LANES / bl_divisor; bl++) {
1138 		/* build transition_pattern (MSB is 1st sample) */
1139 		transition_pattern = 0;
1140 		for (sample = 0; sample < SAMPLE_CNT; sample++) {
1141 			transition_pattern |=
1142 				((sample_result[sample] & (1 << bl)) >> bl) <<
1143 				(SAMPLE_CNT - 1 - sample);
1144 		}
1145 
1146 		DPF(D_TRN, "=== transition pattern %d\n", transition_pattern);
1147 
1148 		/*
1149 		 * set up to look for rising edge based on
1150 		 * transition_pattern
1151 		 */
1152 		switch (transition_pattern) {
1153 		case 0:	/* sampled 0->0->0 */
1154 			/* move forward from T3 looking for 0->1 */
1155 			delay[bl] += 2 * SAMPLE_DLY;
1156 			direction[bl] = FORWARD;
1157 			break;
1158 		case 1:	/* sampled 0->0->1 */
1159 		case 5:	/* sampled 1->0->1 (bad duty cycle) *HSD#237503* */
1160 			/* move forward from T2 looking for 0->1 */
1161 			delay[bl] += 1 * SAMPLE_DLY;
1162 			direction[bl] = FORWARD;
1163 			break;
1164 		case 2:	/* sampled 0->1->0 (bad duty cycle) *HSD#237503* */
1165 		case 3:	/* sampled 0->1->1 */
1166 			/* move forward from T1 looking for 0->1 */
1167 			delay[bl] += 0 * SAMPLE_DLY;
1168 			direction[bl] = FORWARD;
1169 			break;
1170 		case 4:	/* sampled 1->0->0 (assumes BL8, HSD#234975) */
1171 			/* move forward from T3 looking for 0->1 */
1172 			delay[bl] += 2 * SAMPLE_DLY;
1173 			direction[bl] = FORWARD;
1174 			break;
1175 		case 6:	/* sampled 1->1->0 */
1176 		case 7:	/* sampled 1->1->1 */
1177 			/* move backward from T1 looking for 1->0 */
1178 			delay[bl] += 0 * SAMPLE_DLY;
1179 			direction[bl] = BACKWARD;
1180 			break;
1181 		default:
1182 			mrc_post_code(0xee, 0xee);
1183 			break;
1184 		}
1185 
1186 		/* program delays */
1187 		if (rcvn)
1188 			set_rcvn(channel, rank, bl, delay[bl]);
1189 		else
1190 			set_wdqs(channel, rank, bl, delay[bl]);
1191 	}
1192 
1193 	/*
1194 	 * Based on the observed transition pattern on the byte lane,
1195 	 * begin looking for a rising edge with single PI granularity.
1196 	 */
1197 	do {
1198 		all_edges_found = true;	/* assume all byte lanes passed */
1199 		/* take a sample */
1200 		temp = sample_dqs(mrc_params, channel, rank, rcvn);
1201 		/* check all each byte lane for proper edge */
1202 		for (bl = 0; bl < NUM_BYTE_LANES / bl_divisor; bl++) {
1203 			if (temp & (1 << bl)) {
1204 				/* sampled "1" */
1205 				if (direction[bl] == BACKWARD) {
1206 					/*
1207 					 * keep looking for edge
1208 					 * on this byte lane
1209 					 */
1210 					all_edges_found = false;
1211 					delay[bl] -= 1;
1212 					if (rcvn) {
1213 						set_rcvn(channel, rank,
1214 							 bl, delay[bl]);
1215 					} else {
1216 						set_wdqs(channel, rank,
1217 							 bl, delay[bl]);
1218 					}
1219 				}
1220 			} else {
1221 				/* sampled "0" */
1222 				if (direction[bl] == FORWARD) {
1223 					/*
1224 					 * keep looking for edge
1225 					 * on this byte lane
1226 					 */
1227 					all_edges_found = false;
1228 					delay[bl] += 1;
1229 					if (rcvn) {
1230 						set_rcvn(channel, rank,
1231 							 bl, delay[bl]);
1232 					} else {
1233 						set_wdqs(channel, rank,
1234 							 bl, delay[bl]);
1235 					}
1236 				}
1237 			}
1238 		}
1239 	} while (!all_edges_found);
1240 
1241 	/* restore DDR idle state */
1242 	dram_init_command(DCMD_PREA(rank));
1243 
1244 	DPF(D_TRN, "Delay %03X %03X %03X %03X\n",
1245 	    delay[0], delay[1], delay[2], delay[3]);
1246 
1247 	LEAVEFN();
1248 }
1249 
1250 /*
1251  * This function will return a 32 bit mask that will be used to
1252  * check for byte lane failures.
1253  */
1254 uint32_t byte_lane_mask(struct mrc_params *mrc_params)
1255 {
1256 	uint32_t j;
1257 	uint32_t ret_val = 0x00;
1258 
1259 	/*
1260 	 * set ret_val based on NUM_BYTE_LANES such that you will check
1261 	 * only BL0 in result
1262 	 *
1263 	 * (each bit in result represents a byte lane)
1264 	 */
1265 	for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES)
1266 		ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES));
1267 
1268 	/*
1269 	 * HSD#235037
1270 	 * need to adjust the mask for 16-bit mode
1271 	 */
1272 	if (mrc_params->channel_width == X16)
1273 		ret_val |= (ret_val << 2);
1274 
1275 	return ret_val;
1276 }
1277 
1278 /*
1279  * Check memory executing simple write/read/verify at the specified address.
1280  *
1281  * Bits in the result indicate failure on specific byte lane.
1282  */
1283 uint32_t check_rw_coarse(struct mrc_params *mrc_params, uint32_t address)
1284 {
1285 	uint32_t result = 0;
1286 	uint8_t first_run = 0;
1287 
1288 	if (mrc_params->hte_setup) {
1289 		mrc_params->hte_setup = 0;
1290 		first_run = 1;
1291 		select_hte();
1292 	}
1293 
1294 	result = hte_basic_write_read(mrc_params, address, first_run,
1295 				      WRITE_TRAIN);
1296 
1297 	DPF(D_TRN, "check_rw_coarse result is %x\n", result);
1298 
1299 	return result;
1300 }
1301 
1302 /*
1303  * Check memory executing write/read/verify of many data patterns
1304  * at the specified address. Bits in the result indicate failure
1305  * on specific byte lane.
1306  */
1307 uint32_t check_bls_ex(struct mrc_params *mrc_params, uint32_t address)
1308 {
1309 	uint32_t result;
1310 	uint8_t first_run = 0;
1311 
1312 	if (mrc_params->hte_setup) {
1313 		mrc_params->hte_setup = 0;
1314 		first_run = 1;
1315 		select_hte();
1316 	}
1317 
1318 	result = hte_write_stress_bit_lanes(mrc_params, address, first_run);
1319 
1320 	DPF(D_TRN, "check_bls_ex result is %x\n", result);
1321 
1322 	return result;
1323 }
1324 
1325 /*
1326  * 32-bit LFSR with characteristic polynomial: X^32 + X^22 +X^2 + X^1
1327  *
1328  * The function takes pointer to previous 32 bit value and
1329  * modifies it to next value.
1330  */
1331 void lfsr32(uint32_t *lfsr_ptr)
1332 {
1333 	uint32_t bit;
1334 	uint32_t lfsr;
1335 	int i;
1336 
1337 	lfsr = *lfsr_ptr;
1338 
1339 	for (i = 0; i < 32; i++) {
1340 		bit = 1 ^ (lfsr & 1);
1341 		bit = bit ^ ((lfsr & 2) >> 1);
1342 		bit = bit ^ ((lfsr & 4) >> 2);
1343 		bit = bit ^ ((lfsr & 0x400000) >> 22);
1344 
1345 		lfsr = ((lfsr >> 1) | (bit << 31));
1346 	}
1347 
1348 	*lfsr_ptr = lfsr;
1349 }
1350 
1351 /* Clear the pointers in a given byte lane in a given channel */
1352 void clear_pointers(void)
1353 {
1354 	uint8_t channel;
1355 	uint8_t bl;
1356 
1357 	ENTERFN();
1358 
1359 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
1360 		for (bl = 0; bl < NUM_BYTE_LANES; bl++) {
1361 			mrc_alt_write_mask(DDRPHY,
1362 					   B01PTRCTL1 +
1363 					   channel * DDRIODQ_CH_OFFSET +
1364 					   (bl >> 1) * DDRIODQ_BL_OFFSET,
1365 					   ~(1 << 8), (1 << 8));
1366 
1367 			mrc_alt_write_mask(DDRPHY,
1368 					   B01PTRCTL1 +
1369 					   channel * DDRIODQ_CH_OFFSET +
1370 					   (bl >> 1) * DDRIODQ_BL_OFFSET,
1371 					   (1 << 8), (1 << 8));
1372 		}
1373 	}
1374 
1375 	LEAVEFN();
1376 }
1377 
1378 static void print_timings_internal(uint8_t algo, uint8_t channel, uint8_t rank,
1379 				   uint8_t bl_divisor)
1380 {
1381 	uint8_t bl;
1382 
1383 	switch (algo) {
1384 	case RCVN:
1385 		DPF(D_INFO, "\nRCVN[%02d:%02d]", channel, rank);
1386 		break;
1387 	case WDQS:
1388 		DPF(D_INFO, "\nWDQS[%02d:%02d]", channel, rank);
1389 		break;
1390 	case WDQX:
1391 		DPF(D_INFO, "\nWDQx[%02d:%02d]", channel, rank);
1392 		break;
1393 	case RDQS:
1394 		DPF(D_INFO, "\nRDQS[%02d:%02d]", channel, rank);
1395 		break;
1396 	case VREF:
1397 		DPF(D_INFO, "\nVREF[%02d:%02d]", channel, rank);
1398 		break;
1399 	case WCMD:
1400 		DPF(D_INFO, "\nWCMD[%02d:%02d]", channel, rank);
1401 		break;
1402 	case WCTL:
1403 		DPF(D_INFO, "\nWCTL[%02d:%02d]", channel, rank);
1404 		break;
1405 	case WCLK:
1406 		DPF(D_INFO, "\nWCLK[%02d:%02d]", channel, rank);
1407 		break;
1408 	default:
1409 		break;
1410 	}
1411 
1412 	for (bl = 0; bl < NUM_BYTE_LANES / bl_divisor; bl++) {
1413 		switch (algo) {
1414 		case RCVN:
1415 			DPF(D_INFO, " %03d", get_rcvn(channel, rank, bl));
1416 			break;
1417 		case WDQS:
1418 			DPF(D_INFO, " %03d", get_wdqs(channel, rank, bl));
1419 			break;
1420 		case WDQX:
1421 			DPF(D_INFO, " %03d", get_wdq(channel, rank, bl));
1422 			break;
1423 		case RDQS:
1424 			DPF(D_INFO, " %03d", get_rdqs(channel, rank, bl));
1425 			break;
1426 		case VREF:
1427 			DPF(D_INFO, " %03d", get_vref(channel, bl));
1428 			break;
1429 		case WCMD:
1430 			DPF(D_INFO, " %03d", get_wcmd(channel));
1431 			break;
1432 		case WCTL:
1433 			DPF(D_INFO, " %03d", get_wctl(channel, rank));
1434 			break;
1435 		case WCLK:
1436 			DPF(D_INFO, " %03d", get_wclk(channel, rank));
1437 			break;
1438 		default:
1439 			break;
1440 		}
1441 	}
1442 }
1443 
1444 void print_timings(struct mrc_params *mrc_params)
1445 {
1446 	uint8_t algo;
1447 	uint8_t channel;
1448 	uint8_t rank;
1449 	uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1450 
1451 	DPF(D_INFO, "\n---------------------------");
1452 	DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3");
1453 	DPF(D_INFO, "\n===========================");
1454 
1455 	for (algo = 0; algo < MAX_ALGOS; algo++) {
1456 		for (channel = 0; channel < NUM_CHANNELS; channel++) {
1457 			if (mrc_params->channel_enables & (1 << channel)) {
1458 				for (rank = 0; rank < NUM_RANKS; rank++) {
1459 					if (mrc_params->rank_enables &
1460 						(1 << rank)) {
1461 						print_timings_internal(algo,
1462 							channel, rank,
1463 							bl_divisor);
1464 					}
1465 				}
1466 			}
1467 		}
1468 	}
1469 
1470 	DPF(D_INFO, "\n---------------------------");
1471 	DPF(D_INFO, "\n");
1472 }
1473