xref: /openbmc/u-boot/drivers/ddr/marvell/axp/ddr3_pbs.c (revision 9ab403d0dd3c88370612c97f8c4cb88199302833)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) Marvell International Ltd. and its affiliates
4  */
5 
6 #include <common.h>
7 #include <i2c.h>
8 #include <spl.h>
9 #include <asm/io.h>
10 #include <asm/arch/cpu.h>
11 #include <asm/arch/soc.h>
12 
13 #include "ddr3_hw_training.h"
14 
15 /*
16  * Debug
17  */
18 #define DEBUG_PBS_FULL_C(s, d, l) \
19 	DEBUG_PBS_FULL_S(s); DEBUG_PBS_FULL_D(d, l); DEBUG_PBS_FULL_S("\n")
20 #define DEBUG_PBS_C(s, d, l) \
21 	DEBUG_PBS_S(s); DEBUG_PBS_D(d, l); DEBUG_PBS_S("\n")
22 
23 #ifdef MV_DEBUG_PBS
24 #define DEBUG_PBS_S(s)			puts(s)
25 #define DEBUG_PBS_D(d, l)		printf("%x", d)
26 #else
27 #define DEBUG_PBS_S(s)
28 #define DEBUG_PBS_D(d, l)
29 #endif
30 
31 #ifdef MV_DEBUG_FULL_PBS
32 #define DEBUG_PBS_FULL_S(s)		puts(s)
33 #define DEBUG_PBS_FULL_D(d, l)		printf("%x", d)
34 #else
35 #define DEBUG_PBS_FULL_S(s)
36 #define DEBUG_PBS_FULL_D(d, l)
37 #endif
38 
39 #if defined(MV88F78X60) || defined(MV88F672X)
40 
41 /* Temp array for skew data storage */
42 static u32 skew_array[(MAX_PUP_NUM) * DQ_NUM] = { 0 };
43 
44 /* PBS locked dq (per pup) */
45 extern u32 pbs_locked_dq[MAX_PUP_NUM][DQ_NUM];
46 extern u32 pbs_locked_dm[MAX_PUP_NUM];
47 extern u32 pbs_locked_value[MAX_PUP_NUM][DQ_NUM];
48 
49 #if defined(MV88F672X)
50 extern u32 pbs_pattern[2][LEN_16BIT_PBS_PATTERN];
51 extern u32 pbs_pattern_32b[2][LEN_PBS_PATTERN];
52 #else
53 extern u32 pbs_pattern_32b[2][LEN_PBS_PATTERN];
54 extern u32 pbs_pattern_64b[2][LEN_PBS_PATTERN];
55 #endif
56 
57 extern u32 pbs_dq_mapping[PUP_NUM_64BIT + 1][DQ_NUM];
58 
59 static int ddr3_tx_shift_dqs_adll_step_before_fail(MV_DRAM_INFO *dram_info,
60 		u32 cur_pup, u32 pbs_pattern_idx, u32 ecc);
61 static int ddr3_rx_shift_dqs_to_first_fail(MV_DRAM_INFO *dram_info, u32 cur_pup,
62 		u32 pbs_pattern_idx, u32 ecc);
63 static int ddr3_pbs_per_bit(MV_DRAM_INFO *dram_info, int *start_over, int is_tx,
64 		u32 *pcur_pup, u32 pbs_pattern_idx, u32 ecc);
65 static int ddr3_set_pbs_results(MV_DRAM_INFO *dram_info, int is_tx);
66 static void ddr3_pbs_write_pup_dqs_reg(u32 cs, u32 pup, u32 dqs_delay);
67 
68 /*
69  * Name:     ddr3_pbs_tx
70  * Desc:     Execute the PBS TX phase.
71  * Args:     dram_info   ddr3 training information struct
72  * Notes:
73  * Returns:  MV_OK if success, other error code if fail.
74  */
75 int ddr3_pbs_tx(MV_DRAM_INFO *dram_info)
76 {
77 	/* Array of Deskew results */
78 
79 	/*
80 	 * Array to hold the total sum of skew from all iterations
81 	 * (for average purpose)
82 	 */
83 	u32 skew_sum_array[MAX_PUP_NUM][DQ_NUM] = { {0} };
84 
85 	/*
86 	 * Array to hold the total average skew from both patterns
87 	 * (for average purpose)
88 	 */
89 	u32 pattern_skew_array[MAX_PUP_NUM][DQ_NUM] = { {0} };
90 
91 	u32 pbs_rep_time = 0;	/* counts number of loop in case of fail */
92 	/* bit array for unlock pups - used to repeat on the RX operation */
93 	u32 cur_pup;
94 	u32 max_pup;
95 	u32 pbs_retry;
96 	u32 pup, dq, pups, cur_max_pup, valid_pup, reg;
97 	u32 pattern_idx;
98 	u32 ecc;
99 	/* indicates whether we need to start the loop again */
100 	int start_over;
101 
102 	DEBUG_PBS_S("DDR3 - PBS TX - Starting PBS TX procedure\n");
103 
104 	pups = dram_info->num_of_total_pups;
105 	max_pup = dram_info->num_of_total_pups;
106 
107 	/* Enable SW override */
108 	reg = reg_read(REG_DRAM_TRAINING_2_ADDR) |
109 		(1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS);
110 	/* [0] = 1 - Enable SW override  */
111 	/* 0x15B8 - Training SW 2 Register */
112 	reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
113 	DEBUG_PBS_S("DDR3 - PBS RX - SW Override Enabled\n");
114 
115 	reg = 1 << REG_DRAM_TRAINING_AUTO_OFFS;
116 	reg_write(REG_DRAM_TRAINING_ADDR, reg);	/* 0x15B0 - Training Register */
117 
118 	/* Running twice for 2 different patterns. each patterns - 3 times */
119 	for (pattern_idx = 0; pattern_idx < COUNT_PBS_PATTERN; pattern_idx++) {
120 		DEBUG_PBS_C("DDR3 - PBS TX - Working with pattern - ",
121 			    pattern_idx, 1);
122 
123 		/* Reset sum array */
124 		for (pup = 0; pup < pups; pup++) {
125 			for (dq = 0; dq < DQ_NUM; dq++)
126 				skew_sum_array[pup][dq] = 0;
127 		}
128 
129 		/*
130 		 * Perform PBS several of times (3 for each pattern).
131 		 * At the end, we'll use the average
132 		 */
133 		/* If there is ECC, do each PBS again with mux change */
134 		for (pbs_retry = 0; pbs_retry < COUNT_PBS_REPEAT; pbs_retry++) {
135 			for (ecc = 0; ecc < (dram_info->ecc_ena + 1); ecc++) {
136 
137 				/*
138 				 * This parameter stores the current PUP
139 				 * num - ecc mode dependent - 4-8 / 1 pups
140 				 */
141 				cur_max_pup = (1 - ecc) *
142 					dram_info->num_of_std_pups + ecc;
143 
144 				if (ecc) {
145 					/* Only 1 pup in this case */
146 					valid_pup = 0x1;
147 				} else if (cur_max_pup > 4) {
148 					/* 64 bit - 8 pups */
149 					valid_pup = 0xFF;
150 				} else if (cur_max_pup == 4) {
151 					/* 32 bit - 4 pups */
152 					valid_pup = 0xF;
153 				} else {
154 					/* 16 bit - 2 pups */
155 					valid_pup = 0x3;
156 				}
157 
158 				/* ECC Support - Switch ECC Mux on ecc=1 */
159 				reg = reg_read(REG_DRAM_TRAINING_2_ADDR) &
160 					~(1 << REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
161 				reg |= (dram_info->ecc_ena * ecc <<
162 					REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
163 				reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
164 
165 				if (ecc)
166 					DEBUG_PBS_S("DDR3 - PBS Tx - ECC Mux Enabled\n");
167 				else
168 					DEBUG_PBS_S("DDR3 - PBS Tx - ECC Mux Disabled\n");
169 
170 				/* Init iteration values */
171 				/* Clear the locked DQs */
172 				for (pup = 0; pup < cur_max_pup; pup++) {
173 					for (dq = 0; dq < DQ_NUM; dq++) {
174 						pbs_locked_dq[
175 							pup + ecc *
176 							(max_pup - 1)][dq] =
177 							0;
178 					}
179 				}
180 
181 				pbs_rep_time = 0;
182 				cur_pup = valid_pup;
183 				start_over = 0;
184 
185 				/*
186 				 * Run loop On current Pattern and current
187 				 * pattern iteration (just to cover the false
188 				 * fail problem)
189 				 */
190 				do {
191 					DEBUG_PBS_S("DDR3 - PBS Tx - Pbs Rep Loop is ");
192 					DEBUG_PBS_D(pbs_rep_time, 1);
193 					DEBUG_PBS_S(", for Retry No.");
194 					DEBUG_PBS_D(pbs_retry, 1);
195 					DEBUG_PBS_S("\n");
196 
197 					/* Set all PBS values to MIN (0) */
198 					DEBUG_PBS_S("DDR3 - PBS Tx - Set all PBS values to MIN\n");
199 
200 					for (dq = 0; dq < DQ_NUM; dq++) {
201 						ddr3_write_pup_reg(
202 							PUP_PBS_TX +
203 							pbs_dq_mapping[pup *
204 								(1 - ecc) +
205 								ecc * ECC_PUP]
206 							[dq], CS0, (1 - ecc) *
207 							PUP_BC + ecc * ECC_PUP, 0,
208 							0);
209 					}
210 
211 					/*
212 					 * Shift DQ ADLL right, One step before
213 					 * fail
214 					 */
215 					DEBUG_PBS_S("DDR3 - PBS Tx - ADLL shift right one phase before fail\n");
216 
217 					if (MV_OK != ddr3_tx_shift_dqs_adll_step_before_fail
218 					    (dram_info, cur_pup, pattern_idx,
219 					     ecc))
220 						return MV_DDR3_TRAINING_ERR_PBS_ADLL_SHR_1PHASE;
221 
222 					/* PBS For each bit */
223 					DEBUG_PBS_S("DDR3 - PBS Tx - perform PBS for each bit\n");
224 
225 					/*
226 					 * In this stage - start_over = 0
227 					 */
228 					if (MV_OK != ddr3_pbs_per_bit(
229 						    dram_info, &start_over, 1,
230 						    &cur_pup, pattern_idx, ecc))
231 						return MV_DDR3_TRAINING_ERR_PBS_TX_PER_BIT;
232 
233 				} while ((start_over == 1) &&
234 					 (++pbs_rep_time < COUNT_PBS_STARTOVER));
235 
236 				if (pbs_rep_time == COUNT_PBS_STARTOVER &&
237 				    start_over == 1) {
238 					DEBUG_PBS_S("DDR3 - PBS Tx - FAIL - Adll reach max value\n");
239 					return MV_DDR3_TRAINING_ERR_PBS_TX_MAX_VAL;
240 				}
241 
242 				DEBUG_PBS_FULL_C("DDR3 - PBS TX - values for iteration - ",
243 						 pbs_retry, 1);
244 				for (pup = 0; pup < cur_max_pup; pup++) {
245 					/*
246 					 * To minimize delay elements, inc
247 					 * from pbs value the min pbs val
248 					 */
249 					DEBUG_PBS_S("DDR3 - PBS - PUP");
250 					DEBUG_PBS_D((pup + (ecc * ECC_PUP)), 1);
251 					DEBUG_PBS_S(": ");
252 
253 					for (dq = 0; dq < DQ_NUM; dq++) {
254 						/* Set skew value for all dq */
255 						/*
256 						 * Bit# Deskew <- Bit# Deskew -
257 						 * last / first  failing bit
258 						 * Deskew For all bits (per PUP)
259 						 * (minimize delay elements)
260 						 */
261 						DEBUG_PBS_S("DQ");
262 						DEBUG_PBS_D(dq, 1);
263 						DEBUG_PBS_S("-");
264 						DEBUG_PBS_D(skew_array
265 							    [((pup) * DQ_NUM) +
266 							     dq], 2);
267 						DEBUG_PBS_S(", ");
268 					}
269 					DEBUG_PBS_S("\n");
270 				}
271 
272 				/*
273 				 * Collect the results we got on this trial
274 				 * of PBS
275 				 */
276 				for (pup = 0; pup < cur_max_pup; pup++) {
277 					for (dq = 0; dq < DQ_NUM; dq++) {
278 						skew_sum_array[pup + (ecc * (max_pup - 1))]
279 							[dq] += skew_array
280 							[((pup) * DQ_NUM) + dq];
281 					}
282 				}
283 
284 				/* ECC Support - Disable ECC MUX */
285 				reg = reg_read(REG_DRAM_TRAINING_2_ADDR) &
286 					~(1 << REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
287 				reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
288 			}
289 		}
290 
291 		DEBUG_PBS_C("DDR3 - PBS TX - values for current pattern - ",
292 			    pattern_idx, 1);
293 		for (pup = 0; pup < max_pup; pup++) {
294 			/*
295 			 * To minimize delay elements, inc from pbs value the
296 			 * min pbs val
297 			 */
298 			DEBUG_PBS_S("DDR3 - PBS - PUP");
299 			DEBUG_PBS_D(pup, 1);
300 			DEBUG_PBS_S(": ");
301 
302 			for (dq = 0; dq < DQ_NUM; dq++) {
303 				/* set skew value for all dq */
304 				/* Bit# Deskew <- Bit# Deskew - last / first  failing bit Deskew For all bits (per PUP) (minimize delay elements) */
305 				DEBUG_PBS_S("DQ");
306 				DEBUG_PBS_D(dq, 1);
307 				DEBUG_PBS_S("-");
308 				DEBUG_PBS_D(skew_sum_array[pup][dq] /
309 					    COUNT_PBS_REPEAT, 2);
310 				DEBUG_PBS_S(", ");
311 			}
312 			DEBUG_PBS_S("\n");
313 		}
314 
315 		/*
316 		 * Calculate the average skew for current pattern for each
317 		 * pup and each bit
318 		 */
319 		DEBUG_PBS_C("DDR3 - PBS TX - Average for pattern - ",
320 			    pattern_idx, 1);
321 
322 		for (pup = 0; pup < max_pup; pup++) {
323 			/*
324 			 * FOR ECC only :: found min and max value for current
325 			 * pattern skew array
326 			 */
327 			/* Loop for all dqs */
328 			for (dq = 0; dq < DQ_NUM; dq++) {
329 				pattern_skew_array[pup][dq] +=
330 					(skew_sum_array[pup][dq] /
331 					 COUNT_PBS_REPEAT);
332 			}
333 		}
334 	}
335 
336 	/* Calculate the average skew */
337 	for (pup = 0; pup < max_pup; pup++) {
338 		for (dq = 0; dq < DQ_NUM; dq++)
339 			skew_array[((pup) * DQ_NUM) + dq] =
340 				pattern_skew_array[pup][dq] / COUNT_PBS_PATTERN;
341 	}
342 
343 	DEBUG_PBS_S("DDR3 - PBS TX - Average for all patterns:\n");
344 	for (pup = 0; pup < max_pup; pup++) {
345 		/*
346 		 * To minimize delay elements, inc from pbs value the min
347 		 * pbs val
348 		 */
349 		DEBUG_PBS_S("DDR3 - PBS - PUP");
350 		DEBUG_PBS_D(pup, 1);
351 		DEBUG_PBS_S(": ");
352 
353 		for (dq = 0; dq < DQ_NUM; dq++) {
354 			/* Set skew value for all dq */
355 			/*
356 			 * Bit# Deskew <- Bit# Deskew - last / first
357 			 * failing bit Deskew For all bits (per PUP)
358 			 * (minimize delay elements)
359 			 */
360 			DEBUG_PBS_S("DQ");
361 			DEBUG_PBS_D(dq, 1);
362 			DEBUG_PBS_S("-");
363 			DEBUG_PBS_D(skew_array[(pup * DQ_NUM) + dq], 2);
364 			DEBUG_PBS_S(", ");
365 		}
366 		DEBUG_PBS_S("\n");
367 	}
368 
369 	/* Return ADLL to default value */
370 	for (pup = 0; pup < max_pup; pup++) {
371 		if (pup == (max_pup - 1) && dram_info->ecc_ena)
372 			pup = ECC_PUP;
373 		ddr3_pbs_write_pup_dqs_reg(CS0, pup, INIT_WL_DELAY);
374 	}
375 
376 	/* Set averaged PBS results */
377 	ddr3_set_pbs_results(dram_info, 1);
378 
379 	/* Disable SW override - Must be in a different stage */
380 	/* [0]=0 - Enable SW override  */
381 	reg = reg_read(REG_DRAM_TRAINING_2_ADDR);
382 	reg &= ~(1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS);
383 	/* 0x15B8 - Training SW 2 Register */
384 	reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
385 
386 	reg = reg_read(REG_DRAM_TRAINING_1_ADDR) |
387 		(1 << REG_DRAM_TRAINING_1_TRNBPOINT_OFFS);
388 	reg_write(REG_DRAM_TRAINING_1_ADDR, reg);
389 
390 	DEBUG_PBS_S("DDR3 - PBS Tx - PBS TX ended successfuly\n");
391 
392 	return MV_OK;
393 }
394 
395 /*
396  * Name:     ddr3_tx_shift_dqs_adll_step_before_fail
397  * Desc:     Execute the Tx shift DQ phase.
398  * Args:     dram_info            ddr3 training information struct
399  *           cur_pup              bit array of the function active pups.
400  *           pbs_pattern_idx      Index of PBS pattern
401  * Notes:
402  * Returns:  MV_OK if success, other error code if fail.
403  */
404 static int ddr3_tx_shift_dqs_adll_step_before_fail(MV_DRAM_INFO *dram_info,
405 						   u32 cur_pup,
406 						   u32 pbs_pattern_idx, u32 ecc)
407 {
408 	u32 unlock_pup;		/* bit array of unlock pups  */
409 	u32 new_lockup_pup;	/* bit array of compare failed pups */
410 	u32 adll_val = 4;	/* INIT_WL_DELAY */
411 	u32 cur_max_pup, pup;
412 	u32 dqs_dly_set[MAX_PUP_NUM] = { 0 };
413 	u32 *pattern_ptr;
414 
415 	/* Choose pattern */
416 	switch (dram_info->ddr_width) {
417 #if defined(MV88F672X)
418 	case 16:
419 		pattern_ptr = (u32 *)&pbs_pattern[pbs_pattern_idx];
420 		break;
421 #endif
422 	case 32:
423 		pattern_ptr = (u32 *)&pbs_pattern_32b[pbs_pattern_idx];
424 		break;
425 #if defined(MV88F78X60)
426 	case 64:
427 		pattern_ptr = (u32 *)&pbs_pattern_64b[pbs_pattern_idx];
428 		break;
429 #endif
430 	default:
431 		return MV_FAIL;
432 	}
433 
434 	/* Set current pup number */
435 	if (cur_pup == 0x1)	/* Ecc mode */
436 		cur_max_pup = 1;
437 	else
438 		cur_max_pup = dram_info->num_of_std_pups;
439 
440 	unlock_pup = cur_pup;	/* '1' for each unlocked pup */
441 
442 	/* Loop on all ADLL Vaules */
443 	do {
444 		/* Loop until found first fail */
445 		adll_val++;
446 
447 		/*
448 		 * Increment (Move to right - ADLL) DQ TX delay
449 		 * (broadcast to all Data PUPs)
450 		 */
451 		for (pup = 0; pup < cur_max_pup; pup++)
452 			ddr3_pbs_write_pup_dqs_reg(CS0,
453 						   pup * (1 - ecc) +
454 						   ECC_PUP * ecc, adll_val);
455 
456 		/*
457 		 * Write and Read, compare results (read was already verified)
458 		 */
459 		/* 0 - all locked */
460 		new_lockup_pup = 0;
461 
462 		if (MV_OK != ddr3_sdram_compare(dram_info, unlock_pup,
463 						&new_lockup_pup,
464 						pattern_ptr, LEN_PBS_PATTERN,
465 						SDRAM_PBS_TX_OFFS, 1, 0,
466 						NULL,
467 						0))
468 			return MV_FAIL;
469 
470 		unlock_pup &= ~new_lockup_pup;
471 
472 		DEBUG_PBS_FULL_S("Shift DQS by 2 steps for PUPs: ");
473 		DEBUG_PBS_FULL_D(unlock_pup, 2);
474 		DEBUG_PBS_FULL_C(", Set ADLL value = ", adll_val, 2);
475 
476 		/* If any PUP failed there is '1' to mark the PUP */
477 		if (new_lockup_pup != 0) {
478 			/*
479 			 * Decrement (Move Back to Left two steps - ADLL)
480 			 * DQ TX delay for current failed pups and save
481 			 */
482 			for (pup = 0; pup < cur_max_pup; pup++) {
483 				if (((new_lockup_pup >> pup) & 0x1) &&
484 				    dqs_dly_set[pup] == 0)
485 					dqs_dly_set[pup] = adll_val - 1;
486 			}
487 		}
488 	} while ((unlock_pup != 0) && (adll_val != ADLL_MAX));
489 
490 	if (unlock_pup != 0) {
491 		DEBUG_PBS_FULL_S("DDR3 - PBS Tx - Shift DQ - Adll value reached maximum\n");
492 
493 		for (pup = 0; pup < cur_max_pup; pup++) {
494 			if (((unlock_pup >> pup) & 0x1) &&
495 			    dqs_dly_set[pup] == 0)
496 				dqs_dly_set[pup] = adll_val - 1;
497 		}
498 	}
499 
500 	DEBUG_PBS_FULL_C("PBS TX one step before fail last pups locked Adll ",
501 			 adll_val - 2, 2);
502 
503 	/* Set the PUP DQS DLY Values */
504 	for (pup = 0; pup < cur_max_pup; pup++)
505 		ddr3_pbs_write_pup_dqs_reg(CS0, pup * (1 - ecc) + ECC_PUP * ecc,
506 					   dqs_dly_set[pup]);
507 
508 	/* Found one phase before fail */
509 	return MV_OK;
510 }
511 
512 /*
513  * Name:     ddr3_pbs_rx
514  * Desc:     Execute the PBS RX phase.
515  * Args:     dram_info   ddr3 training information struct
516  * Notes:
517  * Returns:  MV_OK if success, other error code if fail.
518  */
519 int ddr3_pbs_rx(MV_DRAM_INFO *dram_info)
520 {
521 	/*
522 	 * Array to hold the total sum of skew from all iterations
523 	 * (for average purpose)
524 	 */
525 	u32 skew_sum_array[MAX_PUP_NUM][DQ_NUM] = { {0} };
526 
527 	/*
528 	 * Array to hold the total average skew from both patterns
529 	 * (for average purpose)
530 	 */
531 	u32 pattern_skew_array[MAX_PUP_NUM][DQ_NUM] = { {0} };
532 
533 	u32 pbs_rep_time = 0;	/* counts number of loop in case of fail */
534 	/* bit array for unlock pups - used to repeat on the RX operation */
535 	u32 cur_pup;
536 	u32 max_pup;
537 	u32 pbs_retry;
538 	u32 pup, dq, pups, cur_max_pup, valid_pup, reg;
539 	u32 pattern_idx;
540 	u32 ecc;
541 	/* indicates whether we need to start the loop again */
542 	int start_over;
543 	int status;
544 
545 	DEBUG_PBS_S("DDR3 - PBS RX - Starting PBS RX procedure\n");
546 
547 	pups = dram_info->num_of_total_pups;
548 	max_pup = dram_info->num_of_total_pups;
549 
550 	/* Enable SW override */
551 	reg = reg_read(REG_DRAM_TRAINING_2_ADDR) |
552 		(1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS);
553 	/* [0] = 1 - Enable SW override  */
554 	/* 0x15B8 - Training SW 2 Register */
555 	reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
556 	DEBUG_PBS_FULL_S("DDR3 - PBS RX - SW Override Enabled\n");
557 
558 	reg = 1 << REG_DRAM_TRAINING_AUTO_OFFS;
559 	reg_write(REG_DRAM_TRAINING_ADDR, reg);	/* 0x15B0 - Training Register */
560 
561 	/* Running twice for 2 different patterns. each patterns - 3 times */
562 	for (pattern_idx = 0; pattern_idx < COUNT_PBS_PATTERN; pattern_idx++) {
563 		DEBUG_PBS_FULL_C("DDR3 - PBS RX - Working with pattern - ",
564 				 pattern_idx, 1);
565 
566 		/* Reset sum array */
567 		for (pup = 0; pup < pups; pup++) {
568 			for (dq = 0; dq < DQ_NUM; dq++)
569 				skew_sum_array[pup][dq] = 0;
570 		}
571 
572 		/*
573 		 * Perform PBS several of times (3 for each pattern).
574 		 * At the end, we'll use the average
575 		 */
576 		/* If there is ECC, do each PBS again with mux change */
577 		for (pbs_retry = 0; pbs_retry < COUNT_PBS_REPEAT; pbs_retry++) {
578 			for (ecc = 0; ecc < (dram_info->ecc_ena + 1); ecc++) {
579 				/*
580 				 * This parameter stores the current PUP
581 				 * num - ecc mode dependent - 4-8 / 1 pups
582 				 */
583 				cur_max_pup = (1 - ecc) *
584 					dram_info->num_of_std_pups + ecc;
585 
586 				if (ecc) {
587 					/* Only 1 pup in this case */
588 					valid_pup = 0x1;
589 				} else if (cur_max_pup > 4) {
590 					/* 64 bit - 8 pups */
591 					valid_pup = 0xFF;
592 				} else if (cur_max_pup == 4) {
593 					/* 32 bit - 4 pups */
594 					valid_pup = 0xF;
595 				} else {
596 					/* 16 bit - 2 pups */
597 					valid_pup = 0x3;
598 				}
599 
600 				/* ECC Support - Switch ECC Mux on ecc=1 */
601 				reg = reg_read(REG_DRAM_TRAINING_2_ADDR) &
602 					~(1 << REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
603 				reg |= (dram_info->ecc_ena * ecc <<
604 					REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
605 				reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
606 
607 				if (ecc)
608 					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - ECC Mux Enabled\n");
609 				else
610 					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - ECC Mux Disabled\n");
611 
612 				/* Init iteration values */
613 				/* Clear the locked DQs */
614 				for (pup = 0; pup < cur_max_pup; pup++) {
615 					for (dq = 0; dq < DQ_NUM; dq++) {
616 						pbs_locked_dq[
617 							pup + ecc * (max_pup - 1)][dq] =
618 							0;
619 					}
620 				}
621 
622 				pbs_rep_time = 0;
623 				cur_pup = valid_pup;
624 				start_over = 0;
625 
626 				/*
627 				 * Run loop On current Pattern and current
628 				 * pattern iteration (just to cover the false
629 				 * fail problem
630 				 */
631 				do {
632 					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - Pbs Rep Loop is ");
633 					DEBUG_PBS_FULL_D(pbs_rep_time, 1);
634 					DEBUG_PBS_FULL_S(", for Retry No.");
635 					DEBUG_PBS_FULL_D(pbs_retry, 1);
636 					DEBUG_PBS_FULL_S("\n");
637 
638 					/* Set all PBS values to MAX (31) */
639 					for (pup = 0; pup < cur_max_pup; pup++) {
640 						for (dq = 0; dq < DQ_NUM; dq++)
641 							ddr3_write_pup_reg(
642 								PUP_PBS_RX +
643 								pbs_dq_mapping[
644 								pup * (1 - ecc)
645 								+ ecc * ECC_PUP]
646 								[dq], CS0,
647 								pup + ecc * ECC_PUP,
648 								0, MAX_PBS);
649 					}
650 
651 					/* Set all DQS PBS values to MIN (0) */
652 					for (pup = 0; pup < cur_max_pup; pup++) {
653 						ddr3_write_pup_reg(PUP_PBS_RX +
654 								   DQ_NUM, CS0,
655 								   pup +
656 								   ecc *
657 								   ECC_PUP, 0,
658 								   0);
659 					}
660 
661 					/* Shift DQS, To first Fail */
662 					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - Shift RX DQS to first fail\n");
663 
664 					status = ddr3_rx_shift_dqs_to_first_fail
665 						(dram_info, cur_pup,
666 						 pattern_idx, ecc);
667 					if (MV_OK != status) {
668 						DEBUG_PBS_S("DDR3 - PBS Rx - ddr3_rx_shift_dqs_to_first_fail failed.\n");
669 						DEBUG_PBS_D(status, 8);
670 						DEBUG_PBS_S("\nDDR3 - PBS Rx - SKIP.\n");
671 
672 						/* Reset read FIFO */
673 						reg = reg_read(REG_DRAM_TRAINING_ADDR);
674 						/* Start Auto Read Leveling procedure */
675 						reg |= (1 << REG_DRAM_TRAINING_RL_OFFS);
676 						/* 0x15B0 - Training Register */
677 						reg_write(REG_DRAM_TRAINING_ADDR, reg);
678 
679 						reg = reg_read(REG_DRAM_TRAINING_2_ADDR);
680 						reg |= ((1 << REG_DRAM_TRAINING_2_FIFO_RST_OFFS)
681 							+ (1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS));
682 						/* [0] = 1 - Enable SW override, [4] = 1 - FIFO reset  */
683 						/* 0x15B8 - Training SW 2 Register */
684 						reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
685 
686 						do {
687 							reg = (reg_read(REG_DRAM_TRAINING_2_ADDR))
688 								& (1 <<	REG_DRAM_TRAINING_2_FIFO_RST_OFFS);
689 						} while (reg);	/* Wait for '0' */
690 
691 						reg = reg_read(REG_DRAM_TRAINING_ADDR);
692 						/* Clear Auto Read Leveling procedure */
693 						reg &= ~(1 << REG_DRAM_TRAINING_RL_OFFS);
694 						/* 0x15B0 - Training Register */
695 						reg_write(REG_DRAM_TRAINING_ADDR, reg);
696 
697 						/* Set ADLL to 15 */
698 						for (pup = 0; pup < max_pup;
699 						     pup++) {
700 							ddr3_write_pup_reg
701 							    (PUP_DQS_RD, CS0,
702 							     pup +
703 							     (ecc * ECC_PUP), 0,
704 							     15);
705 						}
706 
707 						/* Set all PBS values to MIN (0) */
708 						for (pup = 0; pup < cur_max_pup;
709 						     pup++) {
710 							for (dq = 0;
711 							     dq < DQ_NUM; dq++)
712 								ddr3_write_pup_reg
713 								    (PUP_PBS_RX +
714 								     pbs_dq_mapping
715 								     [pup * (1 - ecc) +
716 								      ecc * ECC_PUP]
717 								     [dq], CS0,
718 								     pup + ecc * ECC_PUP,
719 								     0, MIN_PBS);
720 						}
721 
722 						return MV_OK;
723 					}
724 
725 					/* PBS For each bit */
726 					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - perform PBS for each bit\n");
727 					/* in this stage - start_over = 0; */
728 					if (MV_OK != ddr3_pbs_per_bit(
729 						    dram_info, &start_over,
730 						    0, &cur_pup,
731 						    pattern_idx, ecc)) {
732 						DEBUG_PBS_S("DDR3 - PBS Rx - ddr3_pbs_per_bit failed.");
733 						return MV_DDR3_TRAINING_ERR_PBS_RX_PER_BIT;
734 					}
735 
736 				} while ((start_over == 1) &&
737 					 (++pbs_rep_time < COUNT_PBS_STARTOVER));
738 
739 				if (pbs_rep_time == COUNT_PBS_STARTOVER &&
740 				    start_over == 1) {
741 					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - FAIL - Algorithm failed doing RX PBS\n");
742 					return MV_DDR3_TRAINING_ERR_PBS_RX_MAX_VAL;
743 				}
744 
745 				/* Return DQS ADLL to default value - 15 */
746 				/* Set all DQS PBS values to MIN (0) */
747 				for (pup = 0; pup < cur_max_pup; pup++)
748 					ddr3_write_pup_reg(PUP_DQS_RD, CS0,
749 							   pup + ecc * ECC_PUP,
750 							   0, INIT_RL_DELAY);
751 
752 				DEBUG_PBS_FULL_C("DDR3 - PBS RX - values for iteration - ",
753 						 pbs_retry, 1);
754 				for (pup = 0; pup < cur_max_pup; pup++) {
755 					/*
756 					 * To minimize delay elements, inc from
757 					 * pbs value the min pbs val
758 					 */
759 					DEBUG_PBS_FULL_S("DDR3 - PBS - PUP");
760 					DEBUG_PBS_FULL_D((pup +
761 							  (ecc * ECC_PUP)), 1);
762 					DEBUG_PBS_FULL_S(": ");
763 
764 					for (dq = 0; dq < DQ_NUM; dq++) {
765 						/* Set skew value for all dq */
766 						/*
767 						 * Bit# Deskew <- Bit# Deskew -
768 						 * last / first  failing bit
769 						 * Deskew For all bits (per PUP)
770 						 * (minimize delay elements)
771 						 */
772 						DEBUG_PBS_FULL_S("DQ");
773 						DEBUG_PBS_FULL_D(dq, 1);
774 						DEBUG_PBS_FULL_S("-");
775 						DEBUG_PBS_FULL_D(skew_array
776 								 [((pup) *
777 								   DQ_NUM) +
778 								  dq], 2);
779 						DEBUG_PBS_FULL_S(", ");
780 					}
781 					DEBUG_PBS_FULL_S("\n");
782 				}
783 
784 				/*
785 				 * Collect the results we got on this trial
786 				 * of PBS
787 				 */
788 				for (pup = 0; pup < cur_max_pup; pup++) {
789 					for (dq = 0; dq < DQ_NUM; dq++) {
790 						skew_sum_array
791 							[pup + (ecc * (max_pup - 1))]
792 							[dq] +=
793 							skew_array[((pup) * DQ_NUM) + dq];
794 					}
795 				}
796 
797 				/* ECC Support - Disable ECC MUX */
798 				reg = reg_read(REG_DRAM_TRAINING_2_ADDR) &
799 					~(1 << REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
800 				reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
801 			}
802 		}
803 
804 		/*
805 		 * Calculate the average skew for current pattern for each
806 		 * pup and each bit
807 		 */
808 		DEBUG_PBS_FULL_C("DDR3 - PBS RX - Average for pattern - ",
809 				 pattern_idx, 1);
810 		for (pup = 0; pup < max_pup; pup++) {
811 			/*
812 			 * FOR ECC only :: found min and max value for
813 			 * current pattern skew array
814 			 */
815 			/* Loop for all dqs */
816 			for (dq = 0; dq < DQ_NUM; dq++) {
817 				pattern_skew_array[pup][dq] +=
818 					(skew_sum_array[pup][dq] /
819 					 COUNT_PBS_REPEAT);
820 			}
821 		}
822 
823 		DEBUG_PBS_C("DDR3 - PBS RX - values for current pattern - ",
824 			    pattern_idx, 1);
825 		for (pup = 0; pup < max_pup; pup++) {
826 			/*
827 			 * To minimize delay elements, inc from pbs value the
828 			 * min pbs val
829 			 */
830 			DEBUG_PBS_S("DDR3 - PBS RX - PUP");
831 			DEBUG_PBS_D(pup, 1);
832 			DEBUG_PBS_S(": ");
833 
834 			for (dq = 0; dq < DQ_NUM; dq++) {
835 				/* Set skew value for all dq */
836 				/*
837 				 * Bit# Deskew <- Bit# Deskew - last / first
838 				 * failing bit Deskew For all bits (per PUP)
839 				 * (minimize delay elements)
840 				 */
841 				DEBUG_PBS_S("DQ");
842 				DEBUG_PBS_D(dq, 1);
843 				DEBUG_PBS_S("-");
844 				DEBUG_PBS_D(skew_sum_array[pup][dq] /
845 					    COUNT_PBS_REPEAT, 2);
846 				DEBUG_PBS_S(", ");
847 			}
848 			DEBUG_PBS_S("\n");
849 		}
850 	}
851 
852 	/* Calculate the average skew */
853 	for (pup = 0; pup < max_pup; pup++) {
854 		for (dq = 0; dq < DQ_NUM; dq++)
855 			skew_array[((pup) * DQ_NUM) + dq] =
856 				pattern_skew_array[pup][dq] / COUNT_PBS_PATTERN;
857 	}
858 
859 	DEBUG_PBS_S("DDR3 - PBS RX - Average for all patterns:\n");
860 	for (pup = 0; pup < max_pup; pup++) {
861 		/*
862 		 * To minimize delay elements, inc from pbs value the
863 		 * min pbs val
864 		 */
865 		DEBUG_PBS_S("DDR3 - PBS - PUP");
866 		DEBUG_PBS_D(pup, 1);
867 		DEBUG_PBS_S(": ");
868 
869 		for (dq = 0; dq < DQ_NUM; dq++) {
870 			/* Set skew value for all dq */
871 			/*
872 			 * Bit# Deskew <- Bit# Deskew - last / first
873 			 * failing bit Deskew For all bits (per PUP)
874 			 * (minimize delay elements)
875 			 */
876 			DEBUG_PBS_S("DQ");
877 			DEBUG_PBS_D(dq, 1);
878 			DEBUG_PBS_S("-");
879 			DEBUG_PBS_D(skew_array[(pup * DQ_NUM) + dq], 2);
880 			DEBUG_PBS_S(", ");
881 		}
882 		DEBUG_PBS_S("\n");
883 	}
884 
885 	/* Return ADLL to default value */
886 	ddr3_write_pup_reg(PUP_DQS_RD, CS0, PUP_BC, 0, INIT_RL_DELAY);
887 
888 	/* Set averaged PBS results */
889 	ddr3_set_pbs_results(dram_info, 0);
890 
891 	/* Disable SW override - Must be in a different stage */
892 	/* [0]=0 - Enable SW override  */
893 	reg = reg_read(REG_DRAM_TRAINING_2_ADDR);
894 	reg &= ~(1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS);
895 	/* 0x15B8 - Training SW 2 Register */
896 	reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
897 
898 	reg = reg_read(REG_DRAM_TRAINING_1_ADDR) |
899 		(1 << REG_DRAM_TRAINING_1_TRNBPOINT_OFFS);
900 	reg_write(REG_DRAM_TRAINING_1_ADDR, reg);
901 
902 	DEBUG_PBS_FULL_S("DDR3 - PBS RX - ended successfuly\n");
903 
904 	return MV_OK;
905 }
906 
907 /*
908  * Name:     ddr3_rx_shift_dqs_to_first_fail
909  * Desc:     Execute the Rx shift DQ phase.
910  * Args:     dram_info           ddr3 training information struct
911  *           cur_pup             bit array of the function active pups.
912  *           pbs_pattern_idx     Index of PBS pattern
913  * Notes:
914  * Returns:  MV_OK if success, other error code if fail.
915  */
916 static int ddr3_rx_shift_dqs_to_first_fail(MV_DRAM_INFO *dram_info, u32 cur_pup,
917 					   u32 pbs_pattern_idx, u32 ecc)
918 {
919 	u32 unlock_pup;		/* bit array of unlock pups  */
920 	u32 new_lockup_pup;	/* bit array of compare failed pups */
921 	u32 adll_val = MAX_DELAY;
922 	u32 dqs_deskew_val = 0;	/* current value of DQS PBS deskew */
923 	u32 cur_max_pup, pup, pass_pup;
924 	u32 *pattern_ptr;
925 
926 	/* Choose pattern */
927 	switch (dram_info->ddr_width) {
928 #if defined(MV88F672X)
929 	case 16:
930 		pattern_ptr = (u32 *)&pbs_pattern[pbs_pattern_idx];
931 		break;
932 #endif
933 	case 32:
934 		pattern_ptr = (u32 *)&pbs_pattern_32b[pbs_pattern_idx];
935 		break;
936 #if defined(MV88F78X60)
937 	case 64:
938 		pattern_ptr = (u32 *)&pbs_pattern_64b[pbs_pattern_idx];
939 		break;
940 #endif
941 	default:
942 		return MV_FAIL;
943 	}
944 
945 	/* Set current pup number */
946 	if (cur_pup == 0x1)	/* Ecc mode */
947 		cur_max_pup = 1;
948 	else
949 		cur_max_pup = dram_info->num_of_std_pups;
950 
951 	unlock_pup = cur_pup;	/* '1' for each unlocked pup */
952 
953 	DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - Starting...\n");
954 
955 	/* Set DQS ADLL to MAX */
956 	DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - Set DQS ADLL to Max for all PUPs\n");
957 	for (pup = 0; pup < cur_max_pup; pup++)
958 		ddr3_write_pup_reg(PUP_DQS_RD, CS0, pup + ecc * ECC_PUP, 0,
959 				   MAX_DELAY);
960 
961 	/* Loop on all ADLL Vaules */
962 	do {
963 		/* Loop until found fail for all pups */
964 		new_lockup_pup = 0;
965 		if (MV_OK != ddr3_sdram_compare(dram_info, unlock_pup,
966 						&new_lockup_pup,
967 						pattern_ptr, LEN_PBS_PATTERN,
968 						SDRAM_PBS_I_OFFS +
969 						pbs_pattern_idx * SDRAM_PBS_NEXT_OFFS,
970 						0, 0, NULL, 0)) {
971 			DEBUG_PBS_S("DDR3 - PBS Rx - Shift DQS - MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP(ddr3_sdram_compare)\n");
972 			return MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP;
973 		}
974 
975 		if ((new_lockup_pup != 0) && (dqs_deskew_val <= 1)) {
976 			/* Fail on start with first deskew value */
977 			/* Decrement DQS ADLL */
978 			--adll_val;
979 			if (adll_val == ADLL_MIN) {
980 				DEBUG_PBS_S("DDR3 - PBS Rx - Shift DQS - fail on start with first deskew value\n");
981 				return MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP;
982 			}
983 			ddr3_write_pup_reg(PUP_DQS_RD, CS0, pup + ecc * ECC_PUP,
984 					   0, adll_val);
985 			continue;
986 		}
987 
988 		/* Update all new locked pups */
989 		unlock_pup &= ~new_lockup_pup;
990 
991 		if ((unlock_pup == 0) || (dqs_deskew_val == MAX_PBS)) {
992 			if (dqs_deskew_val == MAX_PBS) {
993 				/*
994 				 * Reach max value of dqs deskew or get fail
995 				 * for all pups
996 				 */
997 				DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - DQS deskew reached maximum value\n");
998 			}
999 			break;
1000 		}
1001 
1002 		DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - Inc DQS deskew for PUPs: ");
1003 		DEBUG_PBS_FULL_D(unlock_pup, 2);
1004 		DEBUG_PBS_FULL_C(", deskew = ", dqs_deskew_val, 2);
1005 
1006 		/* Increment DQS deskew elements - Only for unlocked pups */
1007 		dqs_deskew_val++;
1008 		for (pup = 0; pup < cur_max_pup; pup++) {
1009 			if (IS_PUP_ACTIVE(unlock_pup, pup) == 1) {
1010 				ddr3_write_pup_reg(PUP_PBS_RX + DQS_DQ_NUM, CS0,
1011 						   pup + ecc * ECC_PUP, 0,
1012 						   dqs_deskew_val);
1013 			}
1014 		}
1015 	} while (1);
1016 
1017 	DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - ADLL shift one step before fail\n");
1018 	/* Continue to ADLL shift one step before fail */
1019 	unlock_pup = cur_pup;
1020 	do {
1021 		/* Loop until pass compare for all pups */
1022 		new_lockup_pup = 0;
1023 		/* Read and compare results  */
1024 		if (MV_OK != ddr3_sdram_compare(dram_info, unlock_pup, &new_lockup_pup,
1025 						pattern_ptr, LEN_PBS_PATTERN,
1026 						SDRAM_PBS_I_OFFS +
1027 						pbs_pattern_idx * SDRAM_PBS_NEXT_OFFS,
1028 						1, 0, NULL, 0)) {
1029 			DEBUG_PBS_S("DDR3 - PBS Rx - Shift DQS - MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP(ddr3_sdram_compare)\n");
1030 			return MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP;
1031 		}
1032 
1033 		/*
1034 		 * Get mask for pup which passed so their adll will be
1035 		 * changed to 2 steps before fails
1036 		 */
1037 		pass_pup = unlock_pup & ~new_lockup_pup;
1038 
1039 		DEBUG_PBS_FULL_S("Shift DQS by 2 steps for PUPs: ");
1040 		DEBUG_PBS_FULL_D(pass_pup, 2);
1041 		DEBUG_PBS_FULL_C(", Set ADLL value = ", (adll_val - 2), 2);
1042 
1043 		/* Only for pass pups   */
1044 		for (pup = 0; pup < cur_max_pup; pup++) {
1045 			if (IS_PUP_ACTIVE(pass_pup, pup) == 1) {
1046 				ddr3_write_pup_reg(PUP_DQS_RD, CS0,
1047 						   pup + ecc * ECC_PUP, 0,
1048 						   (adll_val - 2));
1049 			}
1050 		}
1051 
1052 		/* Locked pups that compare success  */
1053 		unlock_pup &= new_lockup_pup;
1054 
1055 		if (unlock_pup == 0) {
1056 			/* All pups locked */
1057 			break;
1058 		}
1059 
1060 		/* Found error */
1061 		if (adll_val == 0) {
1062 			DEBUG_PBS_FULL_S("DDR3 - PBS Rx - Shift DQS - Adll reach min value\n");
1063 			return MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_MAX_VAL;
1064 		}
1065 
1066 		/*
1067 		 * Decrement (Move Back to Left one phase - ADLL) dqs RX delay
1068 		 */
1069 		adll_val--;
1070 		for (pup = 0; pup < cur_max_pup; pup++) {
1071 			if (IS_PUP_ACTIVE(unlock_pup, pup) == 1) {
1072 				ddr3_write_pup_reg(PUP_DQS_RD, CS0,
1073 						   pup + ecc * ECC_PUP, 0,
1074 						   adll_val);
1075 			}
1076 		}
1077 	} while (1);
1078 
1079 	return MV_OK;
1080 }
1081 
1082 /*
1083  * lock_pups() extracted from ddr3_pbs_per_bit(). This just got too
1084  * much indented making it hard to read / edit.
1085  */
1086 static void lock_pups(u32 pup, u32 *pup_locked, u8 *unlock_pup_dq_array,
1087 		      u32 pbs_curr_val, u32 start_pbs, u32 ecc, int is_tx)
1088 {
1089 	u32 dq;
1090 	int idx;
1091 
1092 	/* Lock PBS value for all remaining PUPs bits */
1093 	DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - Lock PBS value for all remaining PUPs bits, pup ");
1094 	DEBUG_PBS_FULL_D(pup, 1);
1095 	DEBUG_PBS_FULL_C(" pbs value ", pbs_curr_val, 2);
1096 
1097 	idx = pup * (1 - ecc) + ecc * ECC_PUP;
1098 	*pup_locked &= ~(1 << pup);
1099 
1100 	for (dq = 0; dq < DQ_NUM; dq++) {
1101 		if (IS_PUP_ACTIVE(unlock_pup_dq_array[dq], pup) == 1) {
1102 			int offs;
1103 
1104 			/* Lock current dq */
1105 			unlock_pup_dq_array[dq] &= ~(1 << pup);
1106 			skew_array[(pup * DQ_NUM) + dq] = pbs_curr_val;
1107 
1108 			if (is_tx == 1)
1109 				offs = PUP_PBS_TX;
1110 			else
1111 				offs = PUP_PBS_RX;
1112 
1113 			ddr3_write_pup_reg(offs +
1114 					   pbs_dq_mapping[idx][dq], CS0,
1115 					   idx, 0, start_pbs);
1116 		}
1117 	}
1118 }
1119 
1120 /*
1121  * Name:     ddr3_pbs_per_bit
1122  * Desc:     Execute the Per Bit Skew phase.
1123  * Args:     start_over      Return whether need to start over the algorithm
1124  *           is_tx           Indicate whether Rx or Tx
1125  *           pcur_pup        bit array of the function active pups. return the
1126  *                           pups that need to repeat on the PBS
1127  *           pbs_pattern_idx Index of PBS pattern
1128  *
1129  * Notes:    Current implementation supports double activation of this function.
1130  *           i.e. in order to activate this function (using start_over) more than
1131  *           twice, the implementation should change.
1132  *           imlementation limitation are marked using
1133  *           ' CHIP-ONLY! - Implementation Limitation '
1134  * Returns:  MV_OK if success, other error code if fail.
1135  */
1136 static int ddr3_pbs_per_bit(MV_DRAM_INFO *dram_info, int *start_over, int is_tx,
1137 			    u32 *pcur_pup, u32 pbs_pattern_idx, u32 ecc)
1138 {
1139 	/*
1140 	 * Bit array to indicate if we already get fail on bit per pup & dq bit
1141 	 */
1142 	u8 unlock_pup_dq_array[DQ_NUM] = {
1143 		*pcur_pup, *pcur_pup, *pcur_pup, *pcur_pup, *pcur_pup,
1144 		*pcur_pup, *pcur_pup, *pcur_pup
1145 	};
1146 
1147 	u8 cmp_unlock_pup_dq_array[COUNT_PBS_COMP_RETRY_NUM][DQ_NUM];
1148 	u32 pup, dq;
1149 	/* value of pbs is according to RX or TX */
1150 	u32 start_pbs, last_pbs;
1151 	u32 pbs_curr_val;
1152 	/* bit array that indicates all dq of the pup locked */
1153 	u32 pup_locked;
1154 	u32 first_fail[MAX_PUP_NUM] = { 0 };	/* count first fail per pup */
1155 	/* indicates whether we get first fail per pup */
1156 	int first_failed[MAX_PUP_NUM] = { 0 };
1157 	/* bit array that indicates pup already get fail */
1158 	u32 sum_pup_fail;
1159 	/* use to calculate diff between curr pbs to first fail pbs */
1160 	u32 calc_pbs_diff;
1161 	u32 pbs_cmp_retry;
1162 	u32 max_pup;
1163 
1164 	/* Set init values for retry array - 8 retry */
1165 	for (pbs_cmp_retry = 0; pbs_cmp_retry < COUNT_PBS_COMP_RETRY_NUM;
1166 	     pbs_cmp_retry++) {
1167 		for (dq = 0; dq < DQ_NUM; dq++)
1168 			cmp_unlock_pup_dq_array[pbs_cmp_retry][dq] = *pcur_pup;
1169 	}
1170 
1171 	memset(&skew_array, 0, MAX_PUP_NUM * DQ_NUM * sizeof(u32));
1172 
1173 	DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - Started\n");
1174 
1175 	/* The pbs value depends if rx or tx */
1176 	if (is_tx == 1) {
1177 		start_pbs = MIN_PBS;
1178 		last_pbs = MAX_PBS;
1179 	} else {
1180 		start_pbs = MAX_PBS;
1181 		last_pbs = MIN_PBS;
1182 	}
1183 
1184 	pbs_curr_val = start_pbs;
1185 	pup_locked = *pcur_pup;
1186 
1187 	/* Set current pup number */
1188 	if (pup_locked == 0x1)	/* Ecc mode */
1189 		max_pup = 1;
1190 	else
1191 		max_pup = dram_info->num_of_std_pups;
1192 
1193 	do {
1194 		/* Increment/ decrement PBS for un-lock bits only */
1195 		if (is_tx == 1)
1196 			pbs_curr_val++;
1197 		else
1198 			pbs_curr_val--;
1199 
1200 		/* Set Current PBS delay  */
1201 		for (dq = 0; dq < DQ_NUM; dq++) {
1202 			/* Check DQ bits to see if locked in all pups */
1203 			if (unlock_pup_dq_array[dq] == 0) {
1204 				DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - All pups are locked for DQ ");
1205 				DEBUG_PBS_FULL_D(dq, 1);
1206 				DEBUG_PBS_FULL_S("\n");
1207 				continue;
1208 			}
1209 
1210 			for (pup = 0; pup < max_pup; pup++) {
1211 				int idx;
1212 
1213 				idx = pup * (1 - ecc) + ecc * ECC_PUP;
1214 
1215 				if (IS_PUP_ACTIVE(unlock_pup_dq_array[dq], pup)
1216 				    == 0)
1217 					continue;
1218 
1219 				if (is_tx == 1)
1220 					ddr3_write_pup_reg(
1221 						PUP_PBS_TX + pbs_dq_mapping[idx][dq],
1222 						CS0, idx, 0, pbs_curr_val);
1223 				else
1224 					ddr3_write_pup_reg(
1225 						PUP_PBS_RX + pbs_dq_mapping[idx][dq],
1226 						CS0, idx, 0, pbs_curr_val);
1227 			}
1228 		}
1229 
1230 		/*
1231 		 * Write Read and compare results - run the test
1232 		 * DDR_PBS_COMP_RETRY_NUM times
1233 		 */
1234 		/* Run number of read and write to verify */
1235 		for (pbs_cmp_retry = 0;
1236 		     pbs_cmp_retry < COUNT_PBS_COMP_RETRY_NUM;
1237 		     pbs_cmp_retry++) {
1238 
1239 			if (MV_OK !=
1240 			    ddr3_sdram_pbs_compare(dram_info, pup_locked, is_tx,
1241 						   pbs_pattern_idx,
1242 						   pbs_curr_val, start_pbs,
1243 						   skew_array,
1244 						   cmp_unlock_pup_dq_array
1245 						   [pbs_cmp_retry], ecc))
1246 				return MV_FAIL;
1247 
1248 			for (pup = 0; pup < max_pup; pup++) {
1249 				for (dq = 0; dq < DQ_NUM; dq++) {
1250 					if ((IS_PUP_ACTIVE(unlock_pup_dq_array[dq],
1251 							   pup) == 1)
1252 					    && (IS_PUP_ACTIVE(cmp_unlock_pup_dq_array
1253 					      [pbs_cmp_retry][dq],
1254 					      pup) == 0)) {
1255 						DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - PbsCurrVal: ");
1256 						DEBUG_PBS_FULL_D(pbs_curr_val, 2);
1257 						DEBUG_PBS_FULL_S(" PUP: ");
1258 						DEBUG_PBS_FULL_D(pup, 1);
1259 						DEBUG_PBS_FULL_S(" DQ: ");
1260 						DEBUG_PBS_FULL_D(dq, 1);
1261 						DEBUG_PBS_FULL_S(" - failed\n");
1262 					}
1263 				}
1264 			}
1265 
1266 			for (dq = 0; dq < DQ_NUM; dq++) {
1267 				unlock_pup_dq_array[dq] &=
1268 				    cmp_unlock_pup_dq_array[pbs_cmp_retry][dq];
1269 			}
1270 		}
1271 
1272 		pup_locked = 0;
1273 		sum_pup_fail = *pcur_pup;
1274 
1275 		/* Check which DQ is failed */
1276 		for (dq = 0; dq < DQ_NUM; dq++) {
1277 			/* Summarize the locked pup */
1278 			pup_locked |= unlock_pup_dq_array[dq];
1279 
1280 			/* Check if get fail */
1281 			sum_pup_fail &= unlock_pup_dq_array[dq];
1282 		}
1283 
1284 		/* If all PUPS are locked in all DQ - Break */
1285 		if (pup_locked == 0) {
1286 			/* All pups are locked */
1287 			*start_over = 0;
1288 			DEBUG_PBS_FULL_S("DDR3 - PBS Per bit -  All bit in all pups are successfully locked\n");
1289 			break;
1290 		}
1291 
1292 		/* PBS deskew elements reach max ? */
1293 		if (pbs_curr_val == last_pbs) {
1294 			DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - PBS deskew elements reach max\n");
1295 			/* CHIP-ONLY! - Implementation Limitation */
1296 			*start_over = (sum_pup_fail != 0) && (!(*start_over));
1297 			*pcur_pup = pup_locked;
1298 
1299 			DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - StartOver: ");
1300 			DEBUG_PBS_FULL_D(*start_over, 1);
1301 			DEBUG_PBS_FULL_S("  pup_locked: ");
1302 			DEBUG_PBS_FULL_D(pup_locked, 2);
1303 			DEBUG_PBS_FULL_S("  sum_pup_fail: ");
1304 			DEBUG_PBS_FULL_D(sum_pup_fail, 2);
1305 			DEBUG_PBS_FULL_S("\n");
1306 
1307 			/* Lock PBS value for all remaining  bits */
1308 			for (pup = 0; pup < max_pup; pup++) {
1309 				/* Check if current pup already received error */
1310 				if (IS_PUP_ACTIVE(pup_locked, pup) == 1) {
1311 					/* Valid pup for current function */
1312 					if (IS_PUP_ACTIVE(sum_pup_fail, pup) ==
1313 					    1 && (*start_over == 1)) {
1314 						DEBUG_PBS_FULL_C("DDR3 - PBS Per bit - skipping lock of pup (first loop of pbs)",
1315 								 pup, 1);
1316 						continue;
1317 					} else
1318 					    if (IS_PUP_ACTIVE(sum_pup_fail, pup)
1319 						== 1) {
1320 						DEBUG_PBS_FULL_C("DDR3 - PBS Per bit - Locking pup %d (even though it wasn't supposed to be locked)",
1321 								 pup, 1);
1322 					}
1323 
1324 					/* Already got fail on the PUP */
1325 					/* Lock PBS value for all remaining bits */
1326 					DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - Locking remaning DQs for pup - ");
1327 					DEBUG_PBS_FULL_D(pup, 1);
1328 					DEBUG_PBS_FULL_S(": ");
1329 
1330 					for (dq = 0; dq < DQ_NUM; dq++) {
1331 						if (IS_PUP_ACTIVE
1332 						    (unlock_pup_dq_array[dq],
1333 						     pup) == 1) {
1334 							DEBUG_PBS_FULL_D(dq, 1);
1335 							DEBUG_PBS_FULL_S(",");
1336 							/* set current PBS */
1337 							skew_array[((pup) *
1338 								    DQ_NUM) +
1339 								   dq] =
1340 							    pbs_curr_val;
1341 						}
1342 					}
1343 
1344 					if (*start_over == 1) {
1345 						/*
1346 						 * Reset this pup bit - when
1347 						 * restart the PBS, ignore this
1348 						 * pup
1349 						 */
1350 						*pcur_pup &= ~(1 << pup);
1351 					}
1352 					DEBUG_PBS_FULL_S("\n");
1353 				} else {
1354 					DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - Pup ");
1355 					DEBUG_PBS_FULL_D(pup, 1);
1356 					DEBUG_PBS_FULL_C(" is not set in puplocked - ",
1357 							 pup_locked, 1);
1358 				}
1359 			}
1360 
1361 			/* Need to start the PBS again */
1362 			if (*start_over == 1) {
1363 				DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - false fail - returning to start\n");
1364 				return MV_OK;
1365 			}
1366 			break;
1367 		}
1368 
1369 		/* Diff Check */
1370 		for (pup = 0; pup < max_pup; pup++) {
1371 			if (IS_PUP_ACTIVE(pup_locked, pup) == 1) {
1372 				/* pup is not locked */
1373 				if (first_failed[pup] == 0) {
1374 					/* No first fail until now */
1375 					if (IS_PUP_ACTIVE(sum_pup_fail, pup) ==
1376 					    0) {
1377 						/* Get first fail */
1378 						DEBUG_PBS_FULL_C("DDR3 - PBS Per bit - First fail in pup ",
1379 								 pup, 1);
1380 						first_failed[pup] = 1;
1381 						first_fail[pup] = pbs_curr_val;
1382 					}
1383 				} else {
1384 					/* Already got first fail */
1385 					if (is_tx == 1) {
1386 						/* TX - inc pbs */
1387 						calc_pbs_diff =	pbs_curr_val -
1388 							first_fail[pup];
1389 					} else {
1390 						/* RX - dec pbs */
1391 						calc_pbs_diff = first_fail[pup] -
1392 							pbs_curr_val;
1393 					}
1394 
1395 					if (calc_pbs_diff >= PBS_DIFF_LIMIT) {
1396 						lock_pups(pup, &pup_locked,
1397 							  unlock_pup_dq_array,
1398 							  pbs_curr_val,
1399 							  start_pbs, ecc, is_tx);
1400 					}
1401 				}
1402 			}
1403 		}
1404 	} while (1);
1405 
1406 	return MV_OK;
1407 }
1408 
1409 /*
1410  * Name:         ddr3_set_pbs_results
1411  * Desc:         Set to HW the PBS phase results.
1412  * Args:         is_tx       Indicates whether to set Tx or RX results
1413  * Notes:
1414  * Returns:      MV_OK if success, other error code if fail.
1415  */
1416 static int ddr3_set_pbs_results(MV_DRAM_INFO *dram_info, int is_tx)
1417 {
1418 	u32 pup, phys_pup, dq;
1419 	u32 max_pup;		/* number of valid pups */
1420 	u32 pbs_min;		/* minimal pbs val per pup */
1421 	u32 pbs_max;		/* maximum pbs val per pup */
1422 	u32 val[9];
1423 
1424 	max_pup = dram_info->num_of_total_pups;
1425 	DEBUG_PBS_FULL_S("DDR3 - PBS - ddr3_set_pbs_results:\n");
1426 
1427 	/* Loop for all dqs & pups */
1428 	for (pup = 0; pup < max_pup; pup++) {
1429 		if (pup == (max_pup - 1) && dram_info->ecc_ena)
1430 			phys_pup = ECC_PUP;
1431 		else
1432 			phys_pup = pup;
1433 
1434 		/*
1435 		 * To minimize delay elements, inc from pbs value the min
1436 		 * pbs val
1437 		 */
1438 		pbs_min = MAX_PBS;
1439 		pbs_max = 0;
1440 		for (dq = 0; dq < DQ_NUM; dq++) {
1441 			if (pbs_min > skew_array[(pup * DQ_NUM) + dq])
1442 				pbs_min = skew_array[(pup * DQ_NUM) + dq];
1443 
1444 			if (pbs_max < skew_array[(pup * DQ_NUM) + dq])
1445 				pbs_max = skew_array[(pup * DQ_NUM) + dq];
1446 		}
1447 
1448 		pbs_max -= pbs_min;
1449 
1450 		DEBUG_PBS_FULL_S("DDR3 - PBS - PUP");
1451 		DEBUG_PBS_FULL_D(phys_pup, 1);
1452 		DEBUG_PBS_FULL_S(": Min Val = ");
1453 		DEBUG_PBS_FULL_D(pbs_min, 2);
1454 		DEBUG_PBS_FULL_C(", Max Val = ", pbs_max, 2);
1455 
1456 		val[pup] = 0;
1457 
1458 		for (dq = 0; dq < DQ_NUM; dq++) {
1459 			int idx;
1460 			int offs;
1461 
1462 			/* Set skew value for all dq */
1463 			/*
1464 			 * Bit# Deskew <- Bit# Deskew - last / first
1465 			 * failing bit Deskew For all bits (per PUP)
1466 			 * (minimize delay elements)
1467 			 */
1468 
1469 			DEBUG_PBS_FULL_S("DQ");
1470 			DEBUG_PBS_FULL_D(dq, 1);
1471 			DEBUG_PBS_FULL_S("-");
1472 			DEBUG_PBS_FULL_D((skew_array[(pup * DQ_NUM) + dq] -
1473 					  pbs_min), 2);
1474 			DEBUG_PBS_FULL_S(", ");
1475 
1476 			idx = (pup * DQ_NUM) + dq;
1477 
1478 			if (is_tx == 1)
1479 				offs = PUP_PBS_TX;
1480 			else
1481 				offs = PUP_PBS_RX;
1482 
1483 			ddr3_write_pup_reg(offs + pbs_dq_mapping[phys_pup][dq],
1484 					   CS0, phys_pup, 0,
1485 					   skew_array[idx] - pbs_min);
1486 
1487 			if (is_tx == 1)
1488 				val[pup] += skew_array[idx] - pbs_min;
1489 		}
1490 
1491 		DEBUG_PBS_FULL_S("\n");
1492 
1493 		/* Set the DQS the half of the Max PBS of the DQs  */
1494 		if (is_tx == 1) {
1495 			ddr3_write_pup_reg(PUP_PBS_TX + 8, CS0, phys_pup, 0,
1496 					   pbs_max / 2);
1497 			ddr3_write_pup_reg(PUP_PBS_TX + 0xa, CS0, phys_pup, 0,
1498 					   val[pup] / 8);
1499 		} else
1500 			ddr3_write_pup_reg(PUP_PBS_RX + 8, CS0, phys_pup, 0,
1501 					   pbs_max / 2);
1502 	}
1503 
1504 	return MV_OK;
1505 }
1506 
1507 static void ddr3_pbs_write_pup_dqs_reg(u32 cs, u32 pup, u32 dqs_delay)
1508 {
1509 	u32 reg, delay;
1510 
1511 	reg = (ddr3_read_pup_reg(PUP_WL_MODE, cs, pup) & 0x3FF);
1512 	delay = reg & PUP_DELAY_MASK;
1513 	reg |= ((dqs_delay + delay) << REG_PHY_DQS_REF_DLY_OFFS);
1514 	reg |= REG_PHY_REGISTRY_FILE_ACCESS_OP_WR;
1515 	reg |= (pup << REG_PHY_PUP_OFFS);
1516 	reg |= ((0x4 * cs + PUP_WL_MODE) << REG_PHY_CS_OFFS);
1517 
1518 	reg_write(REG_PHY_REGISTRY_FILE_ACCESS_ADDR, reg);	/* 0x16A0 */
1519 	do {
1520 		reg = reg_read(REG_PHY_REGISTRY_FILE_ACCESS_ADDR) &
1521 			REG_PHY_REGISTRY_FILE_ACCESS_OP_DONE;
1522 	} while (reg);	/* Wait for '0' to mark the end of the transaction */
1523 
1524 	udelay(10);
1525 }
1526 
1527 /*
1528  * Set training patterns
1529  */
1530 int ddr3_load_pbs_patterns(MV_DRAM_INFO *dram_info)
1531 {
1532 	u32 cs, cs_count, cs_tmp;
1533 	u32 sdram_addr;
1534 	u32 *pattern_ptr0, *pattern_ptr1;
1535 
1536 	/* Choose pattern */
1537 	switch (dram_info->ddr_width) {
1538 #if defined(MV88F672X)
1539 	case 16:
1540 		pattern_ptr0 = (u32 *)&pbs_pattern[0];
1541 		pattern_ptr1 = (u32 *)&pbs_pattern[1];
1542 		break;
1543 #endif
1544 	case 32:
1545 		pattern_ptr0 = (u32 *)&pbs_pattern_32b[0];
1546 		pattern_ptr1 = (u32 *)&pbs_pattern_32b[1];
1547 		break;
1548 #if defined(MV88F78X60)
1549 	case 64:
1550 		pattern_ptr0 = (u32 *)&pbs_pattern_64b[0];
1551 		pattern_ptr1 = (u32 *)&pbs_pattern_64b[1];
1552 		break;
1553 #endif
1554 	default:
1555 		return MV_FAIL;
1556 	}
1557 
1558 	/* Loop for each CS */
1559 	for (cs = 0; cs < MAX_CS; cs++) {
1560 		if (dram_info->cs_ena & (1 << cs)) {
1561 			cs_count = 0;
1562 			for (cs_tmp = 0; cs_tmp < cs; cs_tmp++) {
1563 				if (dram_info->cs_ena & (1 << cs_tmp))
1564 					cs_count++;
1565 			}
1566 
1567 			/* Init PBS I pattern */
1568 			sdram_addr = (cs_count * (SDRAM_CS_SIZE + 1) +
1569 				      SDRAM_PBS_I_OFFS);
1570 			if (MV_OK !=
1571 			    ddr3_sdram_compare(dram_info, (u32) NULL, NULL,
1572 					       pattern_ptr0, LEN_STD_PATTERN,
1573 					       sdram_addr, 1, 0, NULL,
1574 					       0))
1575 				return MV_FAIL;
1576 
1577 			/* Init PBS II pattern */
1578 			sdram_addr = (cs_count * (SDRAM_CS_SIZE + 1) +
1579 				      SDRAM_PBS_II_OFFS);
1580 			if (MV_OK !=
1581 			    ddr3_sdram_compare(dram_info, (u32) NULL, NULL,
1582 					       pattern_ptr1, LEN_STD_PATTERN,
1583 					       sdram_addr, 1, 0, NULL,
1584 					       0))
1585 				return MV_FAIL;
1586 		}
1587 	}
1588 
1589 	return MV_OK;
1590 }
1591 #endif
1592