xref: /openbmc/u-boot/arch/x86/cpu/quark/mrc_util.c (revision f13606b7)
1 /*
2  * Copyright (C) 2013, Intel Corporation
3  * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com>
4  *
5  * Ported from Intel released Quark UEFI BIOS
6  * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei
7  *
8  * SPDX-License-Identifier:	Intel
9  */
10 
11 #include <common.h>
12 #include <asm/arch/device.h>
13 #include <asm/arch/mrc.h>
14 #include <asm/arch/msg_port.h>
15 #include "mrc_util.h"
16 #include "hte.h"
17 #include "smc.h"
18 
19 static const uint8_t vref_codes[64] = {
20 	/* lowest to highest */
21 	0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38,
22 	0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30,
23 	0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28,
24 	0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20,
25 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
26 	0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
27 	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
28 	0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
29 };
30 
31 void mrc_write_mask(u32 unit, u32 addr, u32 data, u32 mask)
32 {
33 	msg_port_write(unit, addr,
34 		       (msg_port_read(unit, addr) & ~(mask)) |
35 		       ((data) & (mask)));
36 }
37 
38 void mrc_alt_write_mask(u32 unit, u32 addr, u32 data, u32 mask)
39 {
40 	msg_port_alt_write(unit, addr,
41 			   (msg_port_alt_read(unit, addr) & ~(mask)) |
42 			   ((data) & (mask)));
43 }
44 
45 void mrc_post_code(uint8_t major, uint8_t minor)
46 {
47 	/* send message to UART */
48 	DPF(D_INFO, "POST: 0x%01x%02x\n", major, minor);
49 
50 	/* error check */
51 	if (major == 0xee)
52 		hang();
53 }
54 
55 /* Delay number of nanoseconds */
56 void delay_n(uint32_t ns)
57 {
58 	/* 1000 MHz clock has 1ns period --> no conversion required */
59 	uint64_t final_tsc = rdtsc();
60 
61 	final_tsc += ((get_tbclk_mhz() * ns) / 1000);
62 
63 	while (rdtsc() < final_tsc)
64 		;
65 }
66 
67 /* Delay number of microseconds */
68 void delay_u(uint32_t ms)
69 {
70 	/* 64-bit math is not an option, just use loops */
71 	while (ms--)
72 		delay_n(1000);
73 }
74 
75 /* Select Memory Manager as the source for PRI interface */
76 void select_mem_mgr(void)
77 {
78 	u32 dco;
79 
80 	ENTERFN();
81 
82 	dco = msg_port_read(MEM_CTLR, DCO);
83 	dco &= ~BIT28;
84 	msg_port_write(MEM_CTLR, DCO, dco);
85 
86 	LEAVEFN();
87 }
88 
89 /* Select HTE as the source for PRI interface */
90 void select_hte(void)
91 {
92 	u32 dco;
93 
94 	ENTERFN();
95 
96 	dco = msg_port_read(MEM_CTLR, DCO);
97 	dco |= BIT28;
98 	msg_port_write(MEM_CTLR, DCO, dco);
99 
100 	LEAVEFN();
101 }
102 
103 /*
104  * Send DRAM command
105  * data should be formated using DCMD_Xxxx macro or emrsXCommand structure
106  */
107 void dram_init_command(uint32_t data)
108 {
109 	pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_DATA_REG, data);
110 	pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_CTRL_EXT_REG, 0);
111 	msg_port_setup(MSG_OP_DRAM_INIT, MEM_CTLR, 0);
112 
113 	DPF(D_REGWR, "WR32 %03X %08X %08X\n", MEM_CTLR, 0, data);
114 }
115 
116 /* Send DRAM wake command using special MCU side-band WAKE opcode */
117 void dram_wake_command(void)
118 {
119 	ENTERFN();
120 
121 	msg_port_setup(MSG_OP_DRAM_WAKE, MEM_CTLR, 0);
122 
123 	LEAVEFN();
124 }
125 
126 void training_message(uint8_t channel, uint8_t rank, uint8_t byte_lane)
127 {
128 	/* send message to UART */
129 	DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane);
130 }
131 
132 /*
133  * This function will program the RCVEN delays
134  *
135  * (currently doesn't comprehend rank)
136  */
137 void set_rcvn(uint8_t channel, uint8_t rank,
138 	      uint8_t byte_lane, uint32_t pi_count)
139 {
140 	uint32_t reg;
141 	uint32_t msk;
142 	uint32_t temp;
143 
144 	ENTERFN();
145 
146 	DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n",
147 	    channel, rank, byte_lane, pi_count);
148 
149 	/*
150 	 * RDPTR (1/2 MCLK, 64 PIs)
151 	 * BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
152 	 * BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
153 	 */
154 	reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
155 		(channel * DDRIODQ_CH_OFFSET);
156 	msk = (byte_lane & BIT0) ? (BIT23 | BIT22 | BIT21 | BIT20) :
157 		(BIT11 | BIT10 | BIT9 | BIT8);
158 	temp = (byte_lane & BIT0) ? ((pi_count / HALF_CLK) << 20) :
159 		((pi_count / HALF_CLK) << 8);
160 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
161 
162 	/* Adjust PI_COUNT */
163 	pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
164 
165 	/*
166 	 * PI (1/64 MCLK, 1 PIs)
167 	 * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
168 	 * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
169 	 */
170 	reg = (byte_lane & BIT0) ? B1DLLPICODER0 : B0DLLPICODER0;
171 	reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
172 		(channel * DDRIODQ_CH_OFFSET));
173 	msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);
174 	temp = pi_count << 24;
175 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
176 
177 	/*
178 	 * DEADBAND
179 	 * BL0/1 -> B01DBCTL1[08/11] (+1 select)
180 	 * BL0/1 -> B01DBCTL1[02/05] (enable)
181 	 */
182 	reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
183 		(channel * DDRIODQ_CH_OFFSET);
184 	msk = 0x00;
185 	temp = 0x00;
186 
187 	/* enable */
188 	msk |= (byte_lane & BIT0) ? BIT5 : BIT2;
189 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
190 		temp |= msk;
191 
192 	/* select */
193 	msk |= (byte_lane & BIT0) ? BIT11 : BIT8;
194 	if (pi_count < EARLY_DB)
195 		temp |= msk;
196 
197 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
198 
199 	/* error check */
200 	if (pi_count > 0x3F) {
201 		training_message(channel, rank, byte_lane);
202 		mrc_post_code(0xee, 0xe0);
203 	}
204 
205 	LEAVEFN();
206 }
207 
208 /*
209  * This function will return the current RCVEN delay on the given
210  * channel, rank, byte_lane as an absolute PI count.
211  *
212  * (currently doesn't comprehend rank)
213  */
214 uint32_t get_rcvn(uint8_t channel, uint8_t rank, uint8_t byte_lane)
215 {
216 	uint32_t reg;
217 	uint32_t temp;
218 	uint32_t pi_count;
219 
220 	ENTERFN();
221 
222 	/*
223 	 * RDPTR (1/2 MCLK, 64 PIs)
224 	 * BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
225 	 * BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
226 	 */
227 	reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
228 		(channel * DDRIODQ_CH_OFFSET);
229 	temp = msg_port_alt_read(DDRPHY, reg);
230 	temp >>= (byte_lane & BIT0) ? 20 : 8;
231 	temp &= 0xF;
232 
233 	/* Adjust PI_COUNT */
234 	pi_count = temp * HALF_CLK;
235 
236 	/*
237 	 * PI (1/64 MCLK, 1 PIs)
238 	 * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
239 	 * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
240 	 */
241 	reg = (byte_lane & BIT0) ? B1DLLPICODER0 : B0DLLPICODER0;
242 	reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
243 		(channel * DDRIODQ_CH_OFFSET));
244 	temp = msg_port_alt_read(DDRPHY, reg);
245 	temp >>= 24;
246 	temp &= 0x3F;
247 
248 	/* Adjust PI_COUNT */
249 	pi_count += temp;
250 
251 	LEAVEFN();
252 
253 	return pi_count;
254 }
255 
256 /*
257  * This function will program the RDQS delays based on an absolute
258  * amount of PIs.
259  *
260  * (currently doesn't comprehend rank)
261  */
262 void set_rdqs(uint8_t channel, uint8_t rank,
263 	      uint8_t byte_lane, uint32_t pi_count)
264 {
265 	uint32_t reg;
266 	uint32_t msk;
267 	uint32_t temp;
268 
269 	ENTERFN();
270 	DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n",
271 	    channel, rank, byte_lane, pi_count);
272 
273 	/*
274 	 * PI (1/128 MCLK)
275 	 * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
276 	 * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
277 	 */
278 	reg = (byte_lane & BIT0) ? B1RXDQSPICODE : B0RXDQSPICODE;
279 	reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
280 		(channel * DDRIODQ_CH_OFFSET));
281 	msk = (BIT6 | BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0);
282 	temp = pi_count << 0;
283 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
284 
285 	/* error check (shouldn't go above 0x3F) */
286 	if (pi_count > 0x47) {
287 		training_message(channel, rank, byte_lane);
288 		mrc_post_code(0xee, 0xe1);
289 	}
290 
291 	LEAVEFN();
292 }
293 
294 /*
295  * This function will return the current RDQS delay on the given
296  * channel, rank, byte_lane as an absolute PI count.
297  *
298  * (currently doesn't comprehend rank)
299  */
300 uint32_t get_rdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane)
301 {
302 	uint32_t reg;
303 	uint32_t temp;
304 	uint32_t pi_count;
305 
306 	ENTERFN();
307 
308 	/*
309 	 * PI (1/128 MCLK)
310 	 * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
311 	 * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
312 	 */
313 	reg = (byte_lane & BIT0) ? B1RXDQSPICODE : B0RXDQSPICODE;
314 	reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
315 		(channel * DDRIODQ_CH_OFFSET));
316 	temp = msg_port_alt_read(DDRPHY, reg);
317 
318 	/* Adjust PI_COUNT */
319 	pi_count = temp & 0x7F;
320 
321 	LEAVEFN();
322 
323 	return pi_count;
324 }
325 
326 /*
327  * This function will program the WDQS delays based on an absolute
328  * amount of PIs.
329  *
330  * (currently doesn't comprehend rank)
331  */
332 void set_wdqs(uint8_t channel, uint8_t rank,
333 	      uint8_t byte_lane, uint32_t pi_count)
334 {
335 	uint32_t reg;
336 	uint32_t msk;
337 	uint32_t temp;
338 
339 	ENTERFN();
340 
341 	DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n",
342 	    channel, rank, byte_lane, pi_count);
343 
344 	/*
345 	 * RDPTR (1/2 MCLK, 64 PIs)
346 	 * BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
347 	 * BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
348 	 */
349 	reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
350 		(channel * DDRIODQ_CH_OFFSET);
351 	msk = (byte_lane & BIT0) ? (BIT19 | BIT18 | BIT17 | BIT16) :
352 		(BIT7 | BIT6 | BIT5 | BIT4);
353 	temp = pi_count / HALF_CLK;
354 	temp <<= (byte_lane & BIT0) ? 16 : 4;
355 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
356 
357 	/* Adjust PI_COUNT */
358 	pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
359 
360 	/*
361 	 * PI (1/64 MCLK, 1 PIs)
362 	 * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
363 	 * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
364 	 */
365 	reg = (byte_lane & BIT0) ? B1DLLPICODER0 : B0DLLPICODER0;
366 	reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
367 		(channel * DDRIODQ_CH_OFFSET));
368 	msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16);
369 	temp = pi_count << 16;
370 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
371 
372 	/*
373 	 * DEADBAND
374 	 * BL0/1 -> B01DBCTL1[07/10] (+1 select)
375 	 * BL0/1 -> B01DBCTL1[01/04] (enable)
376 	 */
377 	reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
378 		(channel * DDRIODQ_CH_OFFSET);
379 	msk = 0x00;
380 	temp = 0x00;
381 
382 	/* enable */
383 	msk |= (byte_lane & BIT0) ? BIT4 : BIT1;
384 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
385 		temp |= msk;
386 
387 	/* select */
388 	msk |= (byte_lane & BIT0) ? BIT10 : BIT7;
389 	if (pi_count < EARLY_DB)
390 		temp |= msk;
391 
392 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
393 
394 	/* error check */
395 	if (pi_count > 0x3F) {
396 		training_message(channel, rank, byte_lane);
397 		mrc_post_code(0xee, 0xe2);
398 	}
399 
400 	LEAVEFN();
401 }
402 
403 /*
404  * This function will return the amount of WDQS delay on the given
405  * channel, rank, byte_lane as an absolute PI count.
406  *
407  * (currently doesn't comprehend rank)
408  */
409 uint32_t get_wdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane)
410 {
411 	uint32_t reg;
412 	uint32_t temp;
413 	uint32_t pi_count;
414 
415 	ENTERFN();
416 
417 	/*
418 	 * RDPTR (1/2 MCLK, 64 PIs)
419 	 * BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
420 	 * BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
421 	 */
422 	reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
423 		(channel * DDRIODQ_CH_OFFSET);
424 	temp = msg_port_alt_read(DDRPHY, reg);
425 	temp >>= (byte_lane & BIT0) ? 16 : 4;
426 	temp &= 0xF;
427 
428 	/* Adjust PI_COUNT */
429 	pi_count = (temp * HALF_CLK);
430 
431 	/*
432 	 * PI (1/64 MCLK, 1 PIs)
433 	 * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
434 	 * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
435 	 */
436 	reg = (byte_lane & BIT0) ? B1DLLPICODER0 : B0DLLPICODER0;
437 	reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
438 		(channel * DDRIODQ_CH_OFFSET));
439 	temp = msg_port_alt_read(DDRPHY, reg);
440 	temp >>= 16;
441 	temp &= 0x3F;
442 
443 	/* Adjust PI_COUNT */
444 	pi_count += temp;
445 
446 	LEAVEFN();
447 
448 	return pi_count;
449 }
450 
451 /*
452  * This function will program the WDQ delays based on an absolute
453  * number of PIs.
454  *
455  * (currently doesn't comprehend rank)
456  */
457 void set_wdq(uint8_t channel, uint8_t rank,
458 	     uint8_t byte_lane, uint32_t pi_count)
459 {
460 	uint32_t reg;
461 	uint32_t msk;
462 	uint32_t temp;
463 
464 	ENTERFN();
465 
466 	DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n",
467 	    channel, rank, byte_lane, pi_count);
468 
469 	/*
470 	 * RDPTR (1/2 MCLK, 64 PIs)
471 	 * BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
472 	 * BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
473 	 */
474 	reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
475 		(channel * DDRIODQ_CH_OFFSET);
476 	msk = (byte_lane & BIT0) ? (BIT15 | BIT14 | BIT13 | BIT12) :
477 		(BIT3 | BIT2 | BIT1 | BIT0);
478 	temp = pi_count / HALF_CLK;
479 	temp <<= (byte_lane & BIT0) ? 12 : 0;
480 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
481 
482 	/* Adjust PI_COUNT */
483 	pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
484 
485 	/*
486 	 * PI (1/64 MCLK, 1 PIs)
487 	 * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
488 	 * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
489 	 */
490 	reg = (byte_lane & BIT0) ? B1DLLPICODER0 : B0DLLPICODER0;
491 	reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
492 		(channel * DDRIODQ_CH_OFFSET));
493 	msk = (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);
494 	temp = pi_count << 8;
495 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
496 
497 	/*
498 	 * DEADBAND
499 	 * BL0/1 -> B01DBCTL1[06/09] (+1 select)
500 	 * BL0/1 -> B01DBCTL1[00/03] (enable)
501 	 */
502 	reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
503 		(channel * DDRIODQ_CH_OFFSET);
504 	msk = 0x00;
505 	temp = 0x00;
506 
507 	/* enable */
508 	msk |= (byte_lane & BIT0) ? BIT3 : BIT0;
509 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
510 		temp |= msk;
511 
512 	/* select */
513 	msk |= (byte_lane & BIT0) ? BIT9 : BIT6;
514 	if (pi_count < EARLY_DB)
515 		temp |= msk;
516 
517 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
518 
519 	/* error check */
520 	if (pi_count > 0x3F) {
521 		training_message(channel, rank, byte_lane);
522 		mrc_post_code(0xee, 0xe3);
523 	}
524 
525 	LEAVEFN();
526 }
527 
528 /*
529  * This function will return the amount of WDQ delay on the given
530  * channel, rank, byte_lane as an absolute PI count.
531  *
532  * (currently doesn't comprehend rank)
533  */
534 uint32_t get_wdq(uint8_t channel, uint8_t rank, uint8_t byte_lane)
535 {
536 	uint32_t reg;
537 	uint32_t temp;
538 	uint32_t pi_count;
539 
540 	ENTERFN();
541 
542 	/*
543 	 * RDPTR (1/2 MCLK, 64 PIs)
544 	 * BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
545 	 * BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
546 	 */
547 	reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
548 		(channel * DDRIODQ_CH_OFFSET);
549 	temp = msg_port_alt_read(DDRPHY, reg);
550 	temp >>= (byte_lane & BIT0) ? (12) : (0);
551 	temp &= 0xF;
552 
553 	/* Adjust PI_COUNT */
554 	pi_count = temp * HALF_CLK;
555 
556 	/*
557 	 * PI (1/64 MCLK, 1 PIs)
558 	 * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
559 	 * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
560 	 */
561 	reg = (byte_lane & BIT0) ? B1DLLPICODER0 : B0DLLPICODER0;
562 	reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) +
563 		(channel * DDRIODQ_CH_OFFSET));
564 	temp = msg_port_alt_read(DDRPHY, reg);
565 	temp >>= 8;
566 	temp &= 0x3F;
567 
568 	/* Adjust PI_COUNT */
569 	pi_count += temp;
570 
571 	LEAVEFN();
572 
573 	return pi_count;
574 }
575 
576 /*
577  * This function will program the WCMD delays based on an absolute
578  * number of PIs.
579  */
580 void set_wcmd(uint8_t channel, uint32_t pi_count)
581 {
582 	uint32_t reg;
583 	uint32_t msk;
584 	uint32_t temp;
585 
586 	ENTERFN();
587 
588 	/*
589 	 * RDPTR (1/2 MCLK, 64 PIs)
590 	 * CMDPTRREG[11:08] (0x0-0xF)
591 	 */
592 	reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET);
593 	msk = (BIT11 | BIT10 | BIT9 | BIT8);
594 	temp = pi_count / HALF_CLK;
595 	temp <<= 8;
596 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
597 
598 	/* Adjust PI_COUNT */
599 	pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
600 
601 	/*
602 	 * PI (1/64 MCLK, 1 PIs)
603 	 * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
604 	 * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
605 	 * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
606 	 * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
607 	 * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
608 	 * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
609 	 * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
610 	 * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
611 	 */
612 	reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
613 
614 	msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24 |
615 		BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16 |
616 		BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8 |
617 		BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0);
618 
619 	temp = (pi_count << 24) | (pi_count << 16) |
620 		(pi_count << 8) | (pi_count << 0);
621 
622 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
623 	reg = CMDDLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);	/* PO */
624 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
625 
626 	/*
627 	 * DEADBAND
628 	 * CMDCFGREG0[17] (+1 select)
629 	 * CMDCFGREG0[16] (enable)
630 	 */
631 	reg = CMDCFGREG0 + (channel * DDRIOCCC_CH_OFFSET);
632 	msk = 0x00;
633 	temp = 0x00;
634 
635 	/* enable */
636 	msk |= BIT16;
637 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
638 		temp |= msk;
639 
640 	/* select */
641 	msk |= BIT17;
642 	if (pi_count < EARLY_DB)
643 		temp |= msk;
644 
645 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
646 
647 	/* error check */
648 	if (pi_count > 0x3F)
649 		mrc_post_code(0xee, 0xe4);
650 
651 	LEAVEFN();
652 }
653 
654 /*
655  * This function will return the amount of WCMD delay on the given
656  * channel as an absolute PI count.
657  */
658 uint32_t get_wcmd(uint8_t channel)
659 {
660 	uint32_t reg;
661 	uint32_t temp;
662 	uint32_t pi_count;
663 
664 	ENTERFN();
665 
666 	/*
667 	 * RDPTR (1/2 MCLK, 64 PIs)
668 	 * CMDPTRREG[11:08] (0x0-0xF)
669 	 */
670 	reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET);
671 	temp = msg_port_alt_read(DDRPHY, reg);
672 	temp >>= 8;
673 	temp &= 0xF;
674 
675 	/* Adjust PI_COUNT */
676 	pi_count = temp * HALF_CLK;
677 
678 	/*
679 	 * PI (1/64 MCLK, 1 PIs)
680 	 * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
681 	 * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
682 	 * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
683 	 * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
684 	 * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
685 	 * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
686 	 * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
687 	 * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
688 	 */
689 	reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
690 	temp = msg_port_alt_read(DDRPHY, reg);
691 	temp >>= 16;
692 	temp &= 0x3F;
693 
694 	/* Adjust PI_COUNT */
695 	pi_count += temp;
696 
697 	LEAVEFN();
698 
699 	return pi_count;
700 }
701 
702 /*
703  * This function will program the WCLK delays based on an absolute
704  * number of PIs.
705  */
706 void set_wclk(uint8_t channel, uint8_t rank, uint32_t pi_count)
707 {
708 	uint32_t reg;
709 	uint32_t msk;
710 	uint32_t temp;
711 
712 	ENTERFN();
713 
714 	/*
715 	 * RDPTR (1/2 MCLK, 64 PIs)
716 	 * CCPTRREG[15:12] -> CLK1 (0x0-0xF)
717 	 * CCPTRREG[11:08] -> CLK0 (0x0-0xF)
718 	 */
719 	reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
720 	msk = (BIT15 | BIT14 | BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);
721 	temp = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8);
722 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
723 
724 	/* Adjust PI_COUNT */
725 	pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
726 
727 	/*
728 	 * PI (1/64 MCLK, 1 PIs)
729 	 * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
730 	 * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
731 	 */
732 	reg = rank ? ECCB1DLLPICODER0 : ECCB1DLLPICODER0;
733 	reg += (channel * DDRIOCCC_CH_OFFSET);
734 	msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16 |
735 		BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);
736 	temp = (pi_count << 16) | (pi_count << 8);
737 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
738 	reg = rank ? ECCB1DLLPICODER1 : ECCB1DLLPICODER1;
739 	reg += (channel * DDRIOCCC_CH_OFFSET);
740 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
741 	reg = rank ? ECCB1DLLPICODER2 : ECCB1DLLPICODER2;
742 	reg += (channel * DDRIOCCC_CH_OFFSET);
743 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
744 	reg = rank ? ECCB1DLLPICODER3 : ECCB1DLLPICODER3;
745 	reg += (channel * DDRIOCCC_CH_OFFSET);
746 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
747 
748 	/*
749 	 * DEADBAND
750 	 * CCCFGREG1[11:08] (+1 select)
751 	 * CCCFGREG1[03:00] (enable)
752 	 */
753 	reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET);
754 	msk = 0x00;
755 	temp = 0x00;
756 
757 	/* enable */
758 	msk |= (BIT3 | BIT2 | BIT1 | BIT0);
759 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
760 		temp |= msk;
761 
762 	/* select */
763 	msk |= (BIT11 | BIT10 | BIT9 | BIT8);
764 	if (pi_count < EARLY_DB)
765 		temp |= msk;
766 
767 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
768 
769 	/* error check */
770 	if (pi_count > 0x3F)
771 		mrc_post_code(0xee, 0xe5);
772 
773 	LEAVEFN();
774 }
775 
776 /*
777  * This function will return the amout of WCLK delay on the given
778  * channel, rank as an absolute PI count.
779  */
780 uint32_t get_wclk(uint8_t channel, uint8_t rank)
781 {
782 	uint32_t reg;
783 	uint32_t temp;
784 	uint32_t pi_count;
785 
786 	ENTERFN();
787 
788 	/*
789 	 * RDPTR (1/2 MCLK, 64 PIs)
790 	 * CCPTRREG[15:12] -> CLK1 (0x0-0xF)
791 	 * CCPTRREG[11:08] -> CLK0 (0x0-0xF)
792 	 */
793 	reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
794 	temp = msg_port_alt_read(DDRPHY, reg);
795 	temp >>= rank ? 12 : 8;
796 	temp &= 0xF;
797 
798 	/* Adjust PI_COUNT */
799 	pi_count = temp * HALF_CLK;
800 
801 	/*
802 	 * PI (1/64 MCLK, 1 PIs)
803 	 * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
804 	 * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
805 	 */
806 	reg = rank ? ECCB1DLLPICODER0 : ECCB1DLLPICODER0;
807 	reg += (channel * DDRIOCCC_CH_OFFSET);
808 	temp = msg_port_alt_read(DDRPHY, reg);
809 	temp >>= rank ? 16 : 8;
810 	temp &= 0x3F;
811 
812 	pi_count += temp;
813 
814 	LEAVEFN();
815 
816 	return pi_count;
817 }
818 
819 /*
820  * This function will program the WCTL delays based on an absolute
821  * number of PIs.
822  *
823  * (currently doesn't comprehend rank)
824  */
825 void set_wctl(uint8_t channel, uint8_t rank, uint32_t pi_count)
826 {
827 	uint32_t reg;
828 	uint32_t msk;
829 	uint32_t temp;
830 
831 	ENTERFN();
832 
833 	/*
834 	 * RDPTR (1/2 MCLK, 64 PIs)
835 	 * CCPTRREG[31:28] (0x0-0xF)
836 	 * CCPTRREG[27:24] (0x0-0xF)
837 	 */
838 	reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
839 	msk = (BIT31 | BIT30 | BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);
840 	temp = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24);
841 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
842 
843 	/* Adjust PI_COUNT */
844 	pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
845 
846 	/*
847 	 * PI (1/64 MCLK, 1 PIs)
848 	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
849 	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
850 	 */
851 	reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);
852 	msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);
853 	temp = (pi_count << 24);
854 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
855 	reg = ECCB1DLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
856 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
857 	reg = ECCB1DLLPICODER2 + (channel * DDRIOCCC_CH_OFFSET);
858 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
859 	reg = ECCB1DLLPICODER3 + (channel * DDRIOCCC_CH_OFFSET);
860 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
861 
862 	/*
863 	 * DEADBAND
864 	 * CCCFGREG1[13:12] (+1 select)
865 	 * CCCFGREG1[05:04] (enable)
866 	 */
867 	reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET);
868 	msk = 0x00;
869 	temp = 0x00;
870 
871 	/* enable */
872 	msk |= (BIT5 | BIT4);
873 	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
874 		temp |= msk;
875 
876 	/* select */
877 	msk |= (BIT13 | BIT12);
878 	if (pi_count < EARLY_DB)
879 		temp |= msk;
880 
881 	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
882 
883 	/* error check */
884 	if (pi_count > 0x3F)
885 		mrc_post_code(0xee, 0xe6);
886 
887 	LEAVEFN();
888 }
889 
890 /*
891  * This function will return the amount of WCTL delay on the given
892  * channel, rank as an absolute PI count.
893  *
894  * (currently doesn't comprehend rank)
895  */
896 uint32_t get_wctl(uint8_t channel, uint8_t rank)
897 {
898 	uint32_t reg;
899 	uint32_t temp;
900 	uint32_t pi_count;
901 
902 	ENTERFN();
903 
904 	/*
905 	 * RDPTR (1/2 MCLK, 64 PIs)
906 	 * CCPTRREG[31:28] (0x0-0xF)
907 	 * CCPTRREG[27:24] (0x0-0xF)
908 	 */
909 	reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
910 	temp = msg_port_alt_read(DDRPHY, reg);
911 	temp >>= 24;
912 	temp &= 0xF;
913 
914 	/* Adjust PI_COUNT */
915 	pi_count = temp * HALF_CLK;
916 
917 	/*
918 	 * PI (1/64 MCLK, 1 PIs)
919 	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
920 	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
921 	 */
922 	reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);
923 	temp = msg_port_alt_read(DDRPHY, reg);
924 	temp >>= 24;
925 	temp &= 0x3F;
926 
927 	/* Adjust PI_COUNT */
928 	pi_count += temp;
929 
930 	LEAVEFN();
931 
932 	return pi_count;
933 }
934 
935 /*
936  * This function will program the internal Vref setting in a given
937  * byte lane in a given channel.
938  */
939 void set_vref(uint8_t channel, uint8_t byte_lane, uint32_t setting)
940 {
941 	uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL);
942 
943 	ENTERFN();
944 
945 	DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n",
946 	    channel, byte_lane, setting);
947 
948 	mrc_alt_write_mask(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) +
949 		((byte_lane >> 1) * DDRIODQ_BL_OFFSET)),
950 		(vref_codes[setting] << 2),
951 		(BIT7 | BIT6 | BIT5 | BIT4 | BIT3 | BIT2));
952 
953 	/*
954 	 * need to wait ~300ns for Vref to settle
955 	 * (check that this is necessary)
956 	 */
957 	delay_n(300);
958 
959 	/* ??? may need to clear pointers ??? */
960 
961 	LEAVEFN();
962 }
963 
964 /*
965  * This function will return the internal Vref setting for the given
966  * channel, byte_lane.
967  */
968 uint32_t get_vref(uint8_t channel, uint8_t byte_lane)
969 {
970 	uint8_t j;
971 	uint32_t ret_val = sizeof(vref_codes) / 2;
972 	uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL);
973 	uint32_t temp;
974 
975 	ENTERFN();
976 
977 	temp = msg_port_alt_read(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) +
978 		((byte_lane >> 1) * DDRIODQ_BL_OFFSET)));
979 	temp >>= 2;
980 	temp &= 0x3F;
981 
982 	for (j = 0; j < sizeof(vref_codes); j++) {
983 		if (vref_codes[j] == temp) {
984 			ret_val = j;
985 			break;
986 		}
987 	}
988 
989 	LEAVEFN();
990 
991 	return ret_val;
992 }
993 
994 /*
995  * This function will return a 32-bit address in the desired
996  * channel and rank.
997  */
998 uint32_t get_addr(uint8_t channel, uint8_t rank)
999 {
1000 	uint32_t offset = 0x02000000;	/* 32MB */
1001 
1002 	/* Begin product specific code */
1003 	if (channel > 0) {
1004 		DPF(D_ERROR, "ILLEGAL CHANNEL\n");
1005 		DEAD_LOOP();
1006 	}
1007 
1008 	if (rank > 1) {
1009 		DPF(D_ERROR, "ILLEGAL RANK\n");
1010 		DEAD_LOOP();
1011 	}
1012 
1013 	/* use 256MB lowest density as per DRP == 0x0003 */
1014 	offset += rank * (256 * 1024 * 1024);
1015 
1016 	return offset;
1017 }
1018 
1019 /*
1020  * This function will sample the DQTRAINSTS registers in the given
1021  * channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'.
1022  *
1023  * It will return an encoded 32-bit date in which each bit corresponds to
1024  * the sampled value on the byte lane.
1025  */
1026 uint32_t sample_dqs(struct mrc_params *mrc_params, uint8_t channel,
1027 		    uint8_t rank, bool rcvn)
1028 {
1029 	uint8_t j;	/* just a counter */
1030 	uint8_t bl;	/* which BL in the module (always 2 per module) */
1031 	uint8_t bl_grp;	/* which BL module */
1032 	/* byte lane divisor */
1033 	uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1034 	uint32_t msk[2];	/* BLx in module */
1035 	/* DQTRAINSTS register contents for each sample */
1036 	uint32_t sampled_val[SAMPLE_SIZE];
1037 	uint32_t num_0s;	/* tracks the number of '0' samples */
1038 	uint32_t num_1s;	/* tracks the number of '1' samples */
1039 	uint32_t ret_val = 0x00;	/* assume all '0' samples */
1040 	uint32_t address = get_addr(channel, rank);
1041 
1042 	/* initialise msk[] */
1043 	msk[0] = rcvn ? BIT1 : BIT9;	/* BL0 */
1044 	msk[1] = rcvn ? BIT0 : BIT8;	/* BL1 */
1045 
1046 	/* cycle through each byte lane group */
1047 	for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++) {
1048 		/* take SAMPLE_SIZE samples */
1049 		for (j = 0; j < SAMPLE_SIZE; j++) {
1050 			hte_mem_op(address, mrc_params->first_run,
1051 				   rcvn ? 0 : 1);
1052 			mrc_params->first_run = 0;
1053 
1054 			/*
1055 			 * record the contents of the proper
1056 			 * DQTRAINSTS register
1057 			 */
1058 			sampled_val[j] = msg_port_alt_read(DDRPHY,
1059 				(DQTRAINSTS +
1060 				(bl_grp * DDRIODQ_BL_OFFSET) +
1061 				(channel * DDRIODQ_CH_OFFSET)));
1062 		}
1063 
1064 		/*
1065 		 * look for a majority value (SAMPLE_SIZE / 2) + 1
1066 		 * on the byte lane and set that value in the corresponding
1067 		 * ret_val bit
1068 		 */
1069 		for (bl = 0; bl < 2; bl++) {
1070 			num_0s = 0x00;	/* reset '0' tracker for byte lane */
1071 			num_1s = 0x00;	/* reset '1' tracker for byte lane */
1072 			for (j = 0; j < SAMPLE_SIZE; j++) {
1073 				if (sampled_val[j] & msk[bl])
1074 					num_1s++;
1075 				else
1076 					num_0s++;
1077 			}
1078 		if (num_1s > num_0s)
1079 			ret_val |= (1 << (bl + (bl_grp * 2)));
1080 		}
1081 	}
1082 
1083 	/*
1084 	 * "ret_val.0" contains the status of BL0
1085 	 * "ret_val.1" contains the status of BL1
1086 	 * "ret_val.2" contains the status of BL2
1087 	 * etc.
1088 	 */
1089 	return ret_val;
1090 }
1091 
1092 /* This function will find the rising edge transition on RCVN or WDQS */
1093 void find_rising_edge(struct mrc_params *mrc_params, uint32_t delay[],
1094 		      uint8_t channel, uint8_t rank, bool rcvn)
1095 {
1096 	bool all_edges_found;	/* determines stop condition */
1097 	bool direction[NUM_BYTE_LANES];	/* direction indicator */
1098 	uint8_t sample;	/* sample counter */
1099 	uint8_t bl;	/* byte lane counter */
1100 	/* byte lane divisor */
1101 	uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1102 	uint32_t sample_result[SAMPLE_CNT];	/* results of sample_dqs() */
1103 	uint32_t temp;
1104 	uint32_t transition_pattern;
1105 
1106 	ENTERFN();
1107 
1108 	/* select hte and request initial configuration */
1109 	select_hte();
1110 	mrc_params->first_run = 1;
1111 
1112 	/* Take 3 sample points (T1,T2,T3) to obtain a transition pattern */
1113 	for (sample = 0; sample < SAMPLE_CNT; sample++) {
1114 		/* program the desired delays for sample */
1115 		for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) {
1116 			/* increase sample delay by 26 PI (0.2 CLK) */
1117 			if (rcvn) {
1118 				set_rcvn(channel, rank, bl,
1119 					 delay[bl] + (sample * SAMPLE_DLY));
1120 			} else {
1121 				set_wdqs(channel, rank, bl,
1122 					 delay[bl] + (sample * SAMPLE_DLY));
1123 			}
1124 		}
1125 
1126 		/* take samples (Tsample_i) */
1127 		sample_result[sample] = sample_dqs(mrc_params,
1128 			channel, rank, rcvn);
1129 
1130 		DPF(D_TRN,
1131 		    "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n",
1132 		    (rcvn ? "RCVN" : "WDQS"), channel, rank, sample,
1133 		    sample * SAMPLE_DLY, sample_result[sample]);
1134 	}
1135 
1136 	/*
1137 	 * This pattern will help determine where we landed and ultimately
1138 	 * how to place RCVEN/WDQS.
1139 	 */
1140 	for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) {
1141 		/* build transition_pattern (MSB is 1st sample) */
1142 		transition_pattern = 0;
1143 		for (sample = 0; sample < SAMPLE_CNT; sample++) {
1144 			transition_pattern |=
1145 				((sample_result[sample] & (1 << bl)) >> bl) <<
1146 				(SAMPLE_CNT - 1 - sample);
1147 		}
1148 
1149 		DPF(D_TRN, "=== transition pattern %d\n", transition_pattern);
1150 
1151 		/*
1152 		 * set up to look for rising edge based on
1153 		 * transition_pattern
1154 		 */
1155 		switch (transition_pattern) {
1156 		case 0:	/* sampled 0->0->0 */
1157 			/* move forward from T3 looking for 0->1 */
1158 			delay[bl] += 2 * SAMPLE_DLY;
1159 			direction[bl] = FORWARD;
1160 			break;
1161 		case 1:	/* sampled 0->0->1 */
1162 		case 5:	/* sampled 1->0->1 (bad duty cycle) *HSD#237503* */
1163 			/* move forward from T2 looking for 0->1 */
1164 			delay[bl] += 1 * SAMPLE_DLY;
1165 			direction[bl] = FORWARD;
1166 			break;
1167 		case 2:	/* sampled 0->1->0 (bad duty cycle) *HSD#237503* */
1168 		case 3:	/* sampled 0->1->1 */
1169 			/* move forward from T1 looking for 0->1 */
1170 			delay[bl] += 0 * SAMPLE_DLY;
1171 			direction[bl] = FORWARD;
1172 			break;
1173 		case 4:	/* sampled 1->0->0 (assumes BL8, HSD#234975) */
1174 			/* move forward from T3 looking for 0->1 */
1175 			delay[bl] += 2 * SAMPLE_DLY;
1176 			direction[bl] = FORWARD;
1177 			break;
1178 		case 6:	/* sampled 1->1->0 */
1179 		case 7:	/* sampled 1->1->1 */
1180 			/* move backward from T1 looking for 1->0 */
1181 			delay[bl] += 0 * SAMPLE_DLY;
1182 			direction[bl] = BACKWARD;
1183 			break;
1184 		default:
1185 			mrc_post_code(0xee, 0xee);
1186 			break;
1187 		}
1188 
1189 		/* program delays */
1190 		if (rcvn)
1191 			set_rcvn(channel, rank, bl, delay[bl]);
1192 		else
1193 			set_wdqs(channel, rank, bl, delay[bl]);
1194 	}
1195 
1196 	/*
1197 	 * Based on the observed transition pattern on the byte lane,
1198 	 * begin looking for a rising edge with single PI granularity.
1199 	 */
1200 	do {
1201 		all_edges_found = true;	/* assume all byte lanes passed */
1202 		/* take a sample */
1203 		temp = sample_dqs(mrc_params, channel, rank, rcvn);
1204 		/* check all each byte lane for proper edge */
1205 		for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) {
1206 			if (temp & (1 << bl)) {
1207 				/* sampled "1" */
1208 				if (direction[bl] == BACKWARD) {
1209 					/*
1210 					 * keep looking for edge
1211 					 * on this byte lane
1212 					 */
1213 					all_edges_found = false;
1214 					delay[bl] -= 1;
1215 					if (rcvn) {
1216 						set_rcvn(channel, rank,
1217 							 bl, delay[bl]);
1218 					} else {
1219 						set_wdqs(channel, rank,
1220 							 bl, delay[bl]);
1221 					}
1222 				}
1223 			} else {
1224 				/* sampled "0" */
1225 				if (direction[bl] == FORWARD) {
1226 					/*
1227 					 * keep looking for edge
1228 					 * on this byte lane
1229 					 */
1230 					all_edges_found = false;
1231 					delay[bl] += 1;
1232 					if (rcvn) {
1233 						set_rcvn(channel, rank,
1234 							 bl, delay[bl]);
1235 					} else {
1236 						set_wdqs(channel, rank,
1237 							 bl, delay[bl]);
1238 					}
1239 				}
1240 			}
1241 		}
1242 	} while (!all_edges_found);
1243 
1244 	/* restore DDR idle state */
1245 	dram_init_command(DCMD_PREA(rank));
1246 
1247 	DPF(D_TRN, "Delay %03X %03X %03X %03X\n",
1248 	    delay[0], delay[1], delay[2], delay[3]);
1249 
1250 	LEAVEFN();
1251 }
1252 
1253 /*
1254  * This function will return a 32 bit mask that will be used to
1255  * check for byte lane failures.
1256  */
1257 uint32_t byte_lane_mask(struct mrc_params *mrc_params)
1258 {
1259 	uint32_t j;
1260 	uint32_t ret_val = 0x00;
1261 
1262 	/*
1263 	 * set ret_val based on NUM_BYTE_LANES such that you will check
1264 	 * only BL0 in result
1265 	 *
1266 	 * (each bit in result represents a byte lane)
1267 	 */
1268 	for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES)
1269 		ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES));
1270 
1271 	/*
1272 	 * HSD#235037
1273 	 * need to adjust the mask for 16-bit mode
1274 	 */
1275 	if (mrc_params->channel_width == X16)
1276 		ret_val |= (ret_val << 2);
1277 
1278 	return ret_val;
1279 }
1280 
1281 /*
1282  * Check memory executing simple write/read/verify at the specified address.
1283  *
1284  * Bits in the result indicate failure on specific byte lane.
1285  */
1286 uint32_t check_rw_coarse(struct mrc_params *mrc_params, uint32_t address)
1287 {
1288 	uint32_t result = 0;
1289 	uint8_t first_run = 0;
1290 
1291 	if (mrc_params->hte_setup) {
1292 		mrc_params->hte_setup = 0;
1293 		first_run = 1;
1294 		select_hte();
1295 	}
1296 
1297 	result = hte_basic_write_read(mrc_params, address, first_run,
1298 				      WRITE_TRAIN);
1299 
1300 	DPF(D_TRN, "check_rw_coarse result is %x\n", result);
1301 
1302 	return result;
1303 }
1304 
1305 /*
1306  * Check memory executing write/read/verify of many data patterns
1307  * at the specified address. Bits in the result indicate failure
1308  * on specific byte lane.
1309  */
1310 uint32_t check_bls_ex(struct mrc_params *mrc_params, uint32_t address)
1311 {
1312 	uint32_t result;
1313 	uint8_t first_run = 0;
1314 
1315 	if (mrc_params->hte_setup) {
1316 		mrc_params->hte_setup = 0;
1317 		first_run = 1;
1318 		select_hte();
1319 	}
1320 
1321 	result = hte_write_stress_bit_lanes(mrc_params, address, first_run);
1322 
1323 	DPF(D_TRN, "check_bls_ex result is %x\n", result);
1324 
1325 	return result;
1326 }
1327 
1328 /*
1329  * 32-bit LFSR with characteristic polynomial: X^32 + X^22 +X^2 + X^1
1330  *
1331  * The function takes pointer to previous 32 bit value and
1332  * modifies it to next value.
1333  */
1334 void lfsr32(uint32_t *lfsr_ptr)
1335 {
1336 	uint32_t bit;
1337 	uint32_t lfsr;
1338 	int i;
1339 
1340 	lfsr = *lfsr_ptr;
1341 
1342 	for (i = 0; i < 32; i++) {
1343 		bit = 1 ^ (lfsr & BIT0);
1344 		bit = bit ^ ((lfsr & BIT1) >> 1);
1345 		bit = bit ^ ((lfsr & BIT2) >> 2);
1346 		bit = bit ^ ((lfsr & BIT22) >> 22);
1347 
1348 		lfsr = ((lfsr >> 1) | (bit << 31));
1349 	}
1350 
1351 	*lfsr_ptr = lfsr;
1352 }
1353 
1354 /* Clear the pointers in a given byte lane in a given channel */
1355 void clear_pointers(void)
1356 {
1357 	uint8_t channel;
1358 	uint8_t bl;
1359 
1360 	ENTERFN();
1361 
1362 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
1363 		for (bl = 0; bl < NUM_BYTE_LANES; bl++) {
1364 			mrc_alt_write_mask(DDRPHY,
1365 					   (B01PTRCTL1 +
1366 					   (channel * DDRIODQ_CH_OFFSET) +
1367 					   ((bl >> 1) * DDRIODQ_BL_OFFSET)),
1368 					   ~BIT8, BIT8);
1369 
1370 			mrc_alt_write_mask(DDRPHY,
1371 					   (B01PTRCTL1 +
1372 					   (channel * DDRIODQ_CH_OFFSET) +
1373 					   ((bl >> 1) * DDRIODQ_BL_OFFSET)),
1374 					   BIT8, BIT8);
1375 		}
1376 	}
1377 
1378 	LEAVEFN();
1379 }
1380 
1381 static void print_timings_internal(uint8_t algo, uint8_t channel, uint8_t rank,
1382 				   uint8_t bl_divisor)
1383 {
1384 	uint8_t bl;
1385 
1386 	switch (algo) {
1387 	case RCVN:
1388 		DPF(D_INFO, "\nRCVN[%02d:%02d]", channel, rank);
1389 		break;
1390 	case WDQS:
1391 		DPF(D_INFO, "\nWDQS[%02d:%02d]", channel, rank);
1392 		break;
1393 	case WDQX:
1394 		DPF(D_INFO, "\nWDQx[%02d:%02d]", channel, rank);
1395 		break;
1396 	case RDQS:
1397 		DPF(D_INFO, "\nRDQS[%02d:%02d]", channel, rank);
1398 		break;
1399 	case VREF:
1400 		DPF(D_INFO, "\nVREF[%02d:%02d]", channel, rank);
1401 		break;
1402 	case WCMD:
1403 		DPF(D_INFO, "\nWCMD[%02d:%02d]", channel, rank);
1404 		break;
1405 	case WCTL:
1406 		DPF(D_INFO, "\nWCTL[%02d:%02d]", channel, rank);
1407 		break;
1408 	case WCLK:
1409 		DPF(D_INFO, "\nWCLK[%02d:%02d]", channel, rank);
1410 		break;
1411 	default:
1412 		break;
1413 	}
1414 
1415 	for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) {
1416 		switch (algo) {
1417 		case RCVN:
1418 			DPF(D_INFO, " %03d", get_rcvn(channel, rank, bl));
1419 			break;
1420 		case WDQS:
1421 			DPF(D_INFO, " %03d", get_wdqs(channel, rank, bl));
1422 			break;
1423 		case WDQX:
1424 			DPF(D_INFO, " %03d", get_wdq(channel, rank, bl));
1425 			break;
1426 		case RDQS:
1427 			DPF(D_INFO, " %03d", get_rdqs(channel, rank, bl));
1428 			break;
1429 		case VREF:
1430 			DPF(D_INFO, " %03d", get_vref(channel, bl));
1431 			break;
1432 		case WCMD:
1433 			DPF(D_INFO, " %03d", get_wcmd(channel));
1434 			break;
1435 		case WCTL:
1436 			DPF(D_INFO, " %03d", get_wctl(channel, rank));
1437 			break;
1438 		case WCLK:
1439 			DPF(D_INFO, " %03d", get_wclk(channel, rank));
1440 			break;
1441 		default:
1442 			break;
1443 		}
1444 	}
1445 }
1446 
1447 void print_timings(struct mrc_params *mrc_params)
1448 {
1449 	uint8_t algo;
1450 	uint8_t channel;
1451 	uint8_t rank;
1452 	uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1453 
1454 	DPF(D_INFO, "\n---------------------------");
1455 	DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3");
1456 	DPF(D_INFO, "\n===========================");
1457 
1458 	for (algo = 0; algo < MAX_ALGOS; algo++) {
1459 		for (channel = 0; channel < NUM_CHANNELS; channel++) {
1460 			if (mrc_params->channel_enables & (1 << channel)) {
1461 				for (rank = 0; rank < NUM_RANKS; rank++) {
1462 					if (mrc_params->rank_enables &
1463 						(1 << rank)) {
1464 						print_timings_internal(algo,
1465 							channel, rank,
1466 							bl_divisor);
1467 					}
1468 				}
1469 			}
1470 		}
1471 	}
1472 
1473 	DPF(D_INFO, "\n---------------------------");
1474 	DPF(D_INFO, "\n");
1475 }
1476