xref: /openbmc/u-boot/drivers/ddr/marvell/axp/ddr3_pbs.c (revision c9e40e65e1dcf19f518fa3811bb6de2bf407360f)
1 /*
2  * Copyright (C) Marvell International Ltd. and its affiliates
3  *
4  * SPDX-License-Identifier:	GPL-2.0
5  */
6 
7 #include <common.h>
8 #include <i2c.h>
9 #include <spl.h>
10 #include <asm/io.h>
11 #include <asm/arch/cpu.h>
12 #include <asm/arch/soc.h>
13 
14 #include "ddr3_hw_training.h"
15 
16 /*
17  * Debug
18  */
19 #define DEBUG_PBS_FULL_C(s, d, l) \
20 	DEBUG_PBS_FULL_S(s); DEBUG_PBS_FULL_D(d, l); DEBUG_PBS_FULL_S("\n")
21 #define DEBUG_PBS_C(s, d, l) \
22 	DEBUG_PBS_S(s); DEBUG_PBS_D(d, l); DEBUG_PBS_S("\n")
23 
24 #ifdef MV_DEBUG_PBS
25 #define DEBUG_PBS_S(s)			puts(s)
26 #define DEBUG_PBS_D(d, l)		printf("%x", d)
27 #else
28 #define DEBUG_PBS_S(s)
29 #define DEBUG_PBS_D(d, l)
30 #endif
31 
32 #ifdef MV_DEBUG_FULL_PBS
33 #define DEBUG_PBS_FULL_S(s)		puts(s)
34 #define DEBUG_PBS_FULL_D(d, l)		printf("%x", d)
35 #else
36 #define DEBUG_PBS_FULL_S(s)
37 #define DEBUG_PBS_FULL_D(d, l)
38 #endif
39 
40 #if defined(MV88F78X60) || defined(MV88F672X)
41 
42 /* Temp array for skew data storage */
43 static u32 skew_array[(MAX_PUP_NUM) * DQ_NUM] = { 0 };
44 
45 /* PBS locked dq (per pup) */
46 extern u32 pbs_locked_dq[MAX_PUP_NUM][DQ_NUM];
47 extern u32 pbs_locked_dm[MAX_PUP_NUM];
48 extern u32 pbs_locked_value[MAX_PUP_NUM][DQ_NUM];
49 
50 #if defined(MV88F672X)
51 extern u32 pbs_pattern[2][LEN_16BIT_PBS_PATTERN];
52 extern u32 pbs_pattern_32b[2][LEN_PBS_PATTERN];
53 #else
54 extern u32 pbs_pattern_32b[2][LEN_PBS_PATTERN];
55 extern u32 pbs_pattern_64b[2][LEN_PBS_PATTERN];
56 #endif
57 
58 extern u32 pbs_dq_mapping[PUP_NUM_64BIT + 1][DQ_NUM];
59 
60 static int ddr3_tx_shift_dqs_adll_step_before_fail(MV_DRAM_INFO *dram_info,
61 		u32 cur_pup, u32 pbs_pattern_idx, u32 ecc);
62 static int ddr3_rx_shift_dqs_to_first_fail(MV_DRAM_INFO *dram_info, u32 cur_pup,
63 		u32 pbs_pattern_idx, u32 ecc);
64 static int ddr3_pbs_per_bit(MV_DRAM_INFO *dram_info, int *start_over, int is_tx,
65 		u32 *pcur_pup, u32 pbs_pattern_idx, u32 ecc);
66 static int ddr3_set_pbs_results(MV_DRAM_INFO *dram_info, int is_tx);
67 static void ddr3_pbs_write_pup_dqs_reg(u32 cs, u32 pup, u32 dqs_delay);
68 
69 /*
70  * Name:     ddr3_pbs_tx
71  * Desc:     Execute the PBS TX phase.
72  * Args:     dram_info   ddr3 training information struct
73  * Notes:
74  * Returns:  MV_OK if success, other error code if fail.
75  */
76 int ddr3_pbs_tx(MV_DRAM_INFO *dram_info)
77 {
78 	/* Array of Deskew results */
79 
80 	/*
81 	 * Array to hold the total sum of skew from all iterations
82 	 * (for average purpose)
83 	 */
84 	u32 skew_sum_array[MAX_PUP_NUM][DQ_NUM] = { {0} };
85 
86 	/*
87 	 * Array to hold the total average skew from both patterns
88 	 * (for average purpose)
89 	 */
90 	u32 pattern_skew_array[MAX_PUP_NUM][DQ_NUM] = { {0} };
91 
92 	u32 pbs_rep_time = 0;	/* counts number of loop in case of fail */
93 	/* bit array for unlock pups - used to repeat on the RX operation */
94 	u32 cur_pup;
95 	u32 max_pup;
96 	u32 pbs_retry;
97 	u32 pup, dq, pups, cur_max_pup, valid_pup, reg;
98 	u32 pattern_idx;
99 	u32 ecc;
100 	/* indicates whether we need to start the loop again */
101 	int start_over;
102 
103 	DEBUG_PBS_S("DDR3 - PBS TX - Starting PBS TX procedure\n");
104 
105 	pups = dram_info->num_of_total_pups;
106 	max_pup = dram_info->num_of_total_pups;
107 
108 	/* Enable SW override */
109 	reg = reg_read(REG_DRAM_TRAINING_2_ADDR) |
110 		(1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS);
111 	/* [0] = 1 - Enable SW override  */
112 	/* 0x15B8 - Training SW 2 Register */
113 	reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
114 	DEBUG_PBS_S("DDR3 - PBS RX - SW Override Enabled\n");
115 
116 	reg = 1 << REG_DRAM_TRAINING_AUTO_OFFS;
117 	reg_write(REG_DRAM_TRAINING_ADDR, reg);	/* 0x15B0 - Training Register */
118 
119 	/* Running twice for 2 different patterns. each patterns - 3 times */
120 	for (pattern_idx = 0; pattern_idx < COUNT_PBS_PATTERN; pattern_idx++) {
121 		DEBUG_PBS_C("DDR3 - PBS TX - Working with pattern - ",
122 			    pattern_idx, 1);
123 
124 		/* Reset sum array */
125 		for (pup = 0; pup < pups; pup++) {
126 			for (dq = 0; dq < DQ_NUM; dq++)
127 				skew_sum_array[pup][dq] = 0;
128 		}
129 
130 		/*
131 		 * Perform PBS several of times (3 for each pattern).
132 		 * At the end, we'll use the average
133 		 */
134 		/* If there is ECC, do each PBS again with mux change */
135 		for (pbs_retry = 0; pbs_retry < COUNT_PBS_REPEAT; pbs_retry++) {
136 			for (ecc = 0; ecc < (dram_info->ecc_ena + 1); ecc++) {
137 
138 				/*
139 				 * This parameter stores the current PUP
140 				 * num - ecc mode dependent - 4-8 / 1 pups
141 				 */
142 				cur_max_pup = (1 - ecc) *
143 					dram_info->num_of_std_pups + ecc;
144 
145 				if (ecc) {
146 					/* Only 1 pup in this case */
147 					valid_pup = 0x1;
148 				} else if (cur_max_pup > 4) {
149 					/* 64 bit - 8 pups */
150 					valid_pup = 0xFF;
151 				} else if (cur_max_pup == 4) {
152 					/* 32 bit - 4 pups */
153 					valid_pup = 0xF;
154 				} else {
155 					/* 16 bit - 2 pups */
156 					valid_pup = 0x3;
157 				}
158 
159 				/* ECC Support - Switch ECC Mux on ecc=1 */
160 				reg = reg_read(REG_DRAM_TRAINING_2_ADDR) &
161 					~(1 << REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
162 				reg |= (dram_info->ecc_ena * ecc <<
163 					REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
164 				reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
165 
166 				if (ecc)
167 					DEBUG_PBS_S("DDR3 - PBS Tx - ECC Mux Enabled\n");
168 				else
169 					DEBUG_PBS_S("DDR3 - PBS Tx - ECC Mux Disabled\n");
170 
171 				/* Init iteration values */
172 				/* Clear the locked DQs */
173 				for (pup = 0; pup < cur_max_pup; pup++) {
174 					for (dq = 0; dq < DQ_NUM; dq++) {
175 						pbs_locked_dq[
176 							pup + ecc *
177 							(max_pup - 1)][dq] =
178 							0;
179 					}
180 				}
181 
182 				pbs_rep_time = 0;
183 				cur_pup = valid_pup;
184 				start_over = 0;
185 
186 				/*
187 				 * Run loop On current Pattern and current
188 				 * pattern iteration (just to cover the false
189 				 * fail problem)
190 				 */
191 				do {
192 					DEBUG_PBS_S("DDR3 - PBS Tx - Pbs Rep Loop is ");
193 					DEBUG_PBS_D(pbs_rep_time, 1);
194 					DEBUG_PBS_S(", for Retry No.");
195 					DEBUG_PBS_D(pbs_retry, 1);
196 					DEBUG_PBS_S("\n");
197 
198 					/* Set all PBS values to MIN (0) */
199 					DEBUG_PBS_S("DDR3 - PBS Tx - Set all PBS values to MIN\n");
200 
201 					for (dq = 0; dq < DQ_NUM; dq++) {
202 						ddr3_write_pup_reg(
203 							PUP_PBS_TX +
204 							pbs_dq_mapping[pup *
205 								(1 - ecc) +
206 								ecc * ECC_PUP]
207 							[dq], CS0, (1 - ecc) *
208 							PUP_BC + ecc * ECC_PUP, 0,
209 							0);
210 					}
211 
212 					/*
213 					 * Shift DQ ADLL right, One step before
214 					 * fail
215 					 */
216 					DEBUG_PBS_S("DDR3 - PBS Tx - ADLL shift right one phase before fail\n");
217 
218 					if (MV_OK != ddr3_tx_shift_dqs_adll_step_before_fail
219 					    (dram_info, cur_pup, pattern_idx,
220 					     ecc))
221 						return MV_DDR3_TRAINING_ERR_PBS_ADLL_SHR_1PHASE;
222 
223 					/* PBS For each bit */
224 					DEBUG_PBS_S("DDR3 - PBS Tx - perform PBS for each bit\n");
225 
226 					/*
227 					 * In this stage - start_over = 0
228 					 */
229 					if (MV_OK != ddr3_pbs_per_bit(
230 						    dram_info, &start_over, 1,
231 						    &cur_pup, pattern_idx, ecc))
232 						return MV_DDR3_TRAINING_ERR_PBS_TX_PER_BIT;
233 
234 				} while ((start_over == 1) &&
235 					 (++pbs_rep_time < COUNT_PBS_STARTOVER));
236 
237 				if (pbs_rep_time == COUNT_PBS_STARTOVER &&
238 				    start_over == 1) {
239 					DEBUG_PBS_S("DDR3 - PBS Tx - FAIL - Adll reach max value\n");
240 					return MV_DDR3_TRAINING_ERR_PBS_TX_MAX_VAL;
241 				}
242 
243 				DEBUG_PBS_FULL_C("DDR3 - PBS TX - values for iteration - ",
244 						 pbs_retry, 1);
245 				for (pup = 0; pup < cur_max_pup; pup++) {
246 					/*
247 					 * To minimize delay elements, inc
248 					 * from pbs value the min pbs val
249 					 */
250 					DEBUG_PBS_S("DDR3 - PBS - PUP");
251 					DEBUG_PBS_D((pup + (ecc * ECC_PUP)), 1);
252 					DEBUG_PBS_S(": ");
253 
254 					for (dq = 0; dq < DQ_NUM; dq++) {
255 						/* Set skew value for all dq */
256 						/*
257 						 * Bit# Deskew <- Bit# Deskew -
258 						 * last / first  failing bit
259 						 * Deskew For all bits (per PUP)
260 						 * (minimize delay elements)
261 						 */
262 						DEBUG_PBS_S("DQ");
263 						DEBUG_PBS_D(dq, 1);
264 						DEBUG_PBS_S("-");
265 						DEBUG_PBS_D(skew_array
266 							    [((pup) * DQ_NUM) +
267 							     dq], 2);
268 						DEBUG_PBS_S(", ");
269 					}
270 					DEBUG_PBS_S("\n");
271 				}
272 
273 				/*
274 				 * Collect the results we got on this trial
275 				 * of PBS
276 				 */
277 				for (pup = 0; pup < cur_max_pup; pup++) {
278 					for (dq = 0; dq < DQ_NUM; dq++) {
279 						skew_sum_array[pup + (ecc * (max_pup - 1))]
280 							[dq] += skew_array
281 							[((pup) * DQ_NUM) + dq];
282 					}
283 				}
284 
285 				/* ECC Support - Disable ECC MUX */
286 				reg = reg_read(REG_DRAM_TRAINING_2_ADDR) &
287 					~(1 << REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
288 				reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
289 			}
290 		}
291 
292 		DEBUG_PBS_C("DDR3 - PBS TX - values for current pattern - ",
293 			    pattern_idx, 1);
294 		for (pup = 0; pup < max_pup; pup++) {
295 			/*
296 			 * To minimize delay elements, inc from pbs value the
297 			 * min pbs val
298 			 */
299 			DEBUG_PBS_S("DDR3 - PBS - PUP");
300 			DEBUG_PBS_D(pup, 1);
301 			DEBUG_PBS_S(": ");
302 
303 			for (dq = 0; dq < DQ_NUM; dq++) {
304 				/* set skew value for all dq */
305 				/* Bit# Deskew <- Bit# Deskew - last / first  failing bit Deskew For all bits (per PUP) (minimize delay elements) */
306 				DEBUG_PBS_S("DQ");
307 				DEBUG_PBS_D(dq, 1);
308 				DEBUG_PBS_S("-");
309 				DEBUG_PBS_D(skew_sum_array[pup][dq] /
310 					    COUNT_PBS_REPEAT, 2);
311 				DEBUG_PBS_S(", ");
312 			}
313 			DEBUG_PBS_S("\n");
314 		}
315 
316 		/*
317 		 * Calculate the average skew for current pattern for each
318 		 * pup and each bit
319 		 */
320 		DEBUG_PBS_C("DDR3 - PBS TX - Average for pattern - ",
321 			    pattern_idx, 1);
322 
323 		for (pup = 0; pup < max_pup; pup++) {
324 			/*
325 			 * FOR ECC only :: found min and max value for current
326 			 * pattern skew array
327 			 */
328 			/* Loop for all dqs */
329 			for (dq = 0; dq < DQ_NUM; dq++) {
330 				pattern_skew_array[pup][dq] +=
331 					(skew_sum_array[pup][dq] /
332 					 COUNT_PBS_REPEAT);
333 			}
334 		}
335 	}
336 
337 	/* Calculate the average skew */
338 	for (pup = 0; pup < max_pup; pup++) {
339 		for (dq = 0; dq < DQ_NUM; dq++)
340 			skew_array[((pup) * DQ_NUM) + dq] =
341 				pattern_skew_array[pup][dq] / COUNT_PBS_PATTERN;
342 	}
343 
344 	DEBUG_PBS_S("DDR3 - PBS TX - Average for all patterns:\n");
345 	for (pup = 0; pup < max_pup; pup++) {
346 		/*
347 		 * To minimize delay elements, inc from pbs value the min
348 		 * pbs val
349 		 */
350 		DEBUG_PBS_S("DDR3 - PBS - PUP");
351 		DEBUG_PBS_D(pup, 1);
352 		DEBUG_PBS_S(": ");
353 
354 		for (dq = 0; dq < DQ_NUM; dq++) {
355 			/* Set skew value for all dq */
356 			/*
357 			 * Bit# Deskew <- Bit# Deskew - last / first
358 			 * failing bit Deskew For all bits (per PUP)
359 			 * (minimize delay elements)
360 			 */
361 			DEBUG_PBS_S("DQ");
362 			DEBUG_PBS_D(dq, 1);
363 			DEBUG_PBS_S("-");
364 			DEBUG_PBS_D(skew_array[(pup * DQ_NUM) + dq], 2);
365 			DEBUG_PBS_S(", ");
366 		}
367 		DEBUG_PBS_S("\n");
368 	}
369 
370 	/* Return ADLL to default value */
371 	for (pup = 0; pup < max_pup; pup++) {
372 		if (pup == (max_pup - 1) && dram_info->ecc_ena)
373 			pup = ECC_PUP;
374 		ddr3_pbs_write_pup_dqs_reg(CS0, pup, INIT_WL_DELAY);
375 	}
376 
377 	/* Set averaged PBS results */
378 	ddr3_set_pbs_results(dram_info, 1);
379 
380 	/* Disable SW override - Must be in a different stage */
381 	/* [0]=0 - Enable SW override  */
382 	reg = reg_read(REG_DRAM_TRAINING_2_ADDR);
383 	reg &= ~(1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS);
384 	/* 0x15B8 - Training SW 2 Register */
385 	reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
386 
387 	reg = reg_read(REG_DRAM_TRAINING_1_ADDR) |
388 		(1 << REG_DRAM_TRAINING_1_TRNBPOINT_OFFS);
389 	reg_write(REG_DRAM_TRAINING_1_ADDR, reg);
390 
391 	DEBUG_PBS_S("DDR3 - PBS Tx - PBS TX ended successfuly\n");
392 
393 	return MV_OK;
394 }
395 
396 /*
397  * Name:     ddr3_tx_shift_dqs_adll_step_before_fail
398  * Desc:     Execute the Tx shift DQ phase.
399  * Args:     dram_info            ddr3 training information struct
400  *           cur_pup              bit array of the function active pups.
401  *           pbs_pattern_idx      Index of PBS pattern
402  * Notes:
403  * Returns:  MV_OK if success, other error code if fail.
404  */
405 static int ddr3_tx_shift_dqs_adll_step_before_fail(MV_DRAM_INFO *dram_info,
406 						   u32 cur_pup,
407 						   u32 pbs_pattern_idx, u32 ecc)
408 {
409 	u32 unlock_pup;		/* bit array of unlock pups  */
410 	u32 new_lockup_pup;	/* bit array of compare failed pups */
411 	u32 adll_val = 4;	/* INIT_WL_DELAY */
412 	u32 cur_max_pup, pup;
413 	u32 dqs_dly_set[MAX_PUP_NUM] = { 0 };
414 	u32 *pattern_ptr;
415 
416 	/* Choose pattern */
417 	switch (dram_info->ddr_width) {
418 #if defined(MV88F672X)
419 	case 16:
420 		pattern_ptr = (u32 *)&pbs_pattern[pbs_pattern_idx];
421 		break;
422 #endif
423 	case 32:
424 		pattern_ptr = (u32 *)&pbs_pattern_32b[pbs_pattern_idx];
425 		break;
426 #if defined(MV88F78X60)
427 	case 64:
428 		pattern_ptr = (u32 *)&pbs_pattern_64b[pbs_pattern_idx];
429 		break;
430 #endif
431 	default:
432 		return MV_FAIL;
433 	}
434 
435 	/* Set current pup number */
436 	if (cur_pup == 0x1)	/* Ecc mode */
437 		cur_max_pup = 1;
438 	else
439 		cur_max_pup = dram_info->num_of_std_pups;
440 
441 	unlock_pup = cur_pup;	/* '1' for each unlocked pup */
442 
443 	/* Loop on all ADLL Vaules */
444 	do {
445 		/* Loop until found first fail */
446 		adll_val++;
447 
448 		/*
449 		 * Increment (Move to right - ADLL) DQ TX delay
450 		 * (broadcast to all Data PUPs)
451 		 */
452 		for (pup = 0; pup < cur_max_pup; pup++)
453 			ddr3_pbs_write_pup_dqs_reg(CS0,
454 						   pup * (1 - ecc) +
455 						   ECC_PUP * ecc, adll_val);
456 
457 		/*
458 		 * Write and Read, compare results (read was already verified)
459 		 */
460 		/* 0 - all locked */
461 		new_lockup_pup = 0;
462 
463 		if (MV_OK != ddr3_sdram_compare(dram_info, unlock_pup,
464 						&new_lockup_pup,
465 						pattern_ptr, LEN_PBS_PATTERN,
466 						SDRAM_PBS_TX_OFFS, 1, 0,
467 						NULL,
468 						0))
469 			return MV_FAIL;
470 
471 		unlock_pup &= ~new_lockup_pup;
472 
473 		DEBUG_PBS_FULL_S("Shift DQS by 2 steps for PUPs: ");
474 		DEBUG_PBS_FULL_D(unlock_pup, 2);
475 		DEBUG_PBS_FULL_C(", Set ADLL value = ", adll_val, 2);
476 
477 		/* If any PUP failed there is '1' to mark the PUP */
478 		if (new_lockup_pup != 0) {
479 			/*
480 			 * Decrement (Move Back to Left two steps - ADLL)
481 			 * DQ TX delay for current failed pups and save
482 			 */
483 			for (pup = 0; pup < cur_max_pup; pup++) {
484 				if (((new_lockup_pup >> pup) & 0x1) &&
485 				    dqs_dly_set[pup] == 0)
486 					dqs_dly_set[pup] = adll_val - 1;
487 			}
488 		}
489 	} while ((unlock_pup != 0) && (adll_val != ADLL_MAX));
490 
491 	if (unlock_pup != 0) {
492 		DEBUG_PBS_FULL_S("DDR3 - PBS Tx - Shift DQ - Adll value reached maximum\n");
493 
494 		for (pup = 0; pup < cur_max_pup; pup++) {
495 			if (((unlock_pup >> pup) & 0x1) &&
496 			    dqs_dly_set[pup] == 0)
497 				dqs_dly_set[pup] = adll_val - 1;
498 		}
499 	}
500 
501 	DEBUG_PBS_FULL_C("PBS TX one step before fail last pups locked Adll ",
502 			 adll_val - 2, 2);
503 
504 	/* Set the PUP DQS DLY Values */
505 	for (pup = 0; pup < cur_max_pup; pup++)
506 		ddr3_pbs_write_pup_dqs_reg(CS0, pup * (1 - ecc) + ECC_PUP * ecc,
507 					   dqs_dly_set[pup]);
508 
509 	/* Found one phase before fail */
510 	return MV_OK;
511 }
512 
513 /*
514  * Name:     ddr3_pbs_rx
515  * Desc:     Execute the PBS RX phase.
516  * Args:     dram_info   ddr3 training information struct
517  * Notes:
518  * Returns:  MV_OK if success, other error code if fail.
519  */
520 int ddr3_pbs_rx(MV_DRAM_INFO *dram_info)
521 {
522 	/*
523 	 * Array to hold the total sum of skew from all iterations
524 	 * (for average purpose)
525 	 */
526 	u32 skew_sum_array[MAX_PUP_NUM][DQ_NUM] = { {0} };
527 
528 	/*
529 	 * Array to hold the total average skew from both patterns
530 	 * (for average purpose)
531 	 */
532 	u32 pattern_skew_array[MAX_PUP_NUM][DQ_NUM] = { {0} };
533 
534 	u32 pbs_rep_time = 0;	/* counts number of loop in case of fail */
535 	/* bit array for unlock pups - used to repeat on the RX operation */
536 	u32 cur_pup;
537 	u32 max_pup;
538 	u32 pbs_retry;
539 	u32 pup, dq, pups, cur_max_pup, valid_pup, reg;
540 	u32 pattern_idx;
541 	u32 ecc;
542 	/* indicates whether we need to start the loop again */
543 	int start_over;
544 	int status;
545 
546 	DEBUG_PBS_S("DDR3 - PBS RX - Starting PBS RX procedure\n");
547 
548 	pups = dram_info->num_of_total_pups;
549 	max_pup = dram_info->num_of_total_pups;
550 
551 	/* Enable SW override */
552 	reg = reg_read(REG_DRAM_TRAINING_2_ADDR) |
553 		(1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS);
554 	/* [0] = 1 - Enable SW override  */
555 	/* 0x15B8 - Training SW 2 Register */
556 	reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
557 	DEBUG_PBS_FULL_S("DDR3 - PBS RX - SW Override Enabled\n");
558 
559 	reg = 1 << REG_DRAM_TRAINING_AUTO_OFFS;
560 	reg_write(REG_DRAM_TRAINING_ADDR, reg);	/* 0x15B0 - Training Register */
561 
562 	/* Running twice for 2 different patterns. each patterns - 3 times */
563 	for (pattern_idx = 0; pattern_idx < COUNT_PBS_PATTERN; pattern_idx++) {
564 		DEBUG_PBS_FULL_C("DDR3 - PBS RX - Working with pattern - ",
565 				 pattern_idx, 1);
566 
567 		/* Reset sum array */
568 		for (pup = 0; pup < pups; pup++) {
569 			for (dq = 0; dq < DQ_NUM; dq++)
570 				skew_sum_array[pup][dq] = 0;
571 		}
572 
573 		/*
574 		 * Perform PBS several of times (3 for each pattern).
575 		 * At the end, we'll use the average
576 		 */
577 		/* If there is ECC, do each PBS again with mux change */
578 		for (pbs_retry = 0; pbs_retry < COUNT_PBS_REPEAT; pbs_retry++) {
579 			for (ecc = 0; ecc < (dram_info->ecc_ena + 1); ecc++) {
580 				/*
581 				 * This parameter stores the current PUP
582 				 * num - ecc mode dependent - 4-8 / 1 pups
583 				 */
584 				cur_max_pup = (1 - ecc) *
585 					dram_info->num_of_std_pups + ecc;
586 
587 				if (ecc) {
588 					/* Only 1 pup in this case */
589 					valid_pup = 0x1;
590 				} else if (cur_max_pup > 4) {
591 					/* 64 bit - 8 pups */
592 					valid_pup = 0xFF;
593 				} else if (cur_max_pup == 4) {
594 					/* 32 bit - 4 pups */
595 					valid_pup = 0xF;
596 				} else {
597 					/* 16 bit - 2 pups */
598 					valid_pup = 0x3;
599 				}
600 
601 				/* ECC Support - Switch ECC Mux on ecc=1 */
602 				reg = reg_read(REG_DRAM_TRAINING_2_ADDR) &
603 					~(1 << REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
604 				reg |= (dram_info->ecc_ena * ecc <<
605 					REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
606 				reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
607 
608 				if (ecc)
609 					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - ECC Mux Enabled\n");
610 				else
611 					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - ECC Mux Disabled\n");
612 
613 				/* Init iteration values */
614 				/* Clear the locked DQs */
615 				for (pup = 0; pup < cur_max_pup; pup++) {
616 					for (dq = 0; dq < DQ_NUM; dq++) {
617 						pbs_locked_dq[
618 							pup + ecc * (max_pup - 1)][dq] =
619 							0;
620 					}
621 				}
622 
623 				pbs_rep_time = 0;
624 				cur_pup = valid_pup;
625 				start_over = 0;
626 
627 				/*
628 				 * Run loop On current Pattern and current
629 				 * pattern iteration (just to cover the false
630 				 * fail problem
631 				 */
632 				do {
633 					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - Pbs Rep Loop is ");
634 					DEBUG_PBS_FULL_D(pbs_rep_time, 1);
635 					DEBUG_PBS_FULL_S(", for Retry No.");
636 					DEBUG_PBS_FULL_D(pbs_retry, 1);
637 					DEBUG_PBS_FULL_S("\n");
638 
639 					/* Set all PBS values to MAX (31) */
640 					for (pup = 0; pup < cur_max_pup; pup++) {
641 						for (dq = 0; dq < DQ_NUM; dq++)
642 							ddr3_write_pup_reg(
643 								PUP_PBS_RX +
644 								pbs_dq_mapping[
645 								pup * (1 - ecc)
646 								+ ecc * ECC_PUP]
647 								[dq], CS0,
648 								pup + ecc * ECC_PUP,
649 								0, MAX_PBS);
650 					}
651 
652 					/* Set all DQS PBS values to MIN (0) */
653 					for (pup = 0; pup < cur_max_pup; pup++) {
654 						ddr3_write_pup_reg(PUP_PBS_RX +
655 								   DQ_NUM, CS0,
656 								   pup +
657 								   ecc *
658 								   ECC_PUP, 0,
659 								   0);
660 					}
661 
662 					/* Shift DQS, To first Fail */
663 					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - Shift RX DQS to first fail\n");
664 
665 					status = ddr3_rx_shift_dqs_to_first_fail
666 						(dram_info, cur_pup,
667 						 pattern_idx, ecc);
668 					if (MV_OK != status) {
669 						DEBUG_PBS_S("DDR3 - PBS Rx - ddr3_rx_shift_dqs_to_first_fail failed.\n");
670 						DEBUG_PBS_D(status, 8);
671 						DEBUG_PBS_S("\nDDR3 - PBS Rx - SKIP.\n");
672 
673 						/* Reset read FIFO */
674 						reg = reg_read(REG_DRAM_TRAINING_ADDR);
675 						/* Start Auto Read Leveling procedure */
676 						reg |= (1 << REG_DRAM_TRAINING_RL_OFFS);
677 						/* 0x15B0 - Training Register */
678 						reg_write(REG_DRAM_TRAINING_ADDR, reg);
679 
680 						reg = reg_read(REG_DRAM_TRAINING_2_ADDR);
681 						reg |= ((1 << REG_DRAM_TRAINING_2_FIFO_RST_OFFS)
682 							+ (1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS));
683 						/* [0] = 1 - Enable SW override, [4] = 1 - FIFO reset  */
684 						/* 0x15B8 - Training SW 2 Register */
685 						reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
686 
687 						do {
688 							reg = (reg_read(REG_DRAM_TRAINING_2_ADDR))
689 								& (1 <<	REG_DRAM_TRAINING_2_FIFO_RST_OFFS);
690 						} while (reg);	/* Wait for '0' */
691 
692 						reg = reg_read(REG_DRAM_TRAINING_ADDR);
693 						/* Clear Auto Read Leveling procedure */
694 						reg &= ~(1 << REG_DRAM_TRAINING_RL_OFFS);
695 						/* 0x15B0 - Training Register */
696 						reg_write(REG_DRAM_TRAINING_ADDR, reg);
697 
698 						/* Set ADLL to 15 */
699 						for (pup = 0; pup < max_pup;
700 						     pup++) {
701 							ddr3_write_pup_reg
702 							    (PUP_DQS_RD, CS0,
703 							     pup +
704 							     (ecc * ECC_PUP), 0,
705 							     15);
706 						}
707 
708 						/* Set all PBS values to MIN (0) */
709 						for (pup = 0; pup < cur_max_pup;
710 						     pup++) {
711 							for (dq = 0;
712 							     dq < DQ_NUM; dq++)
713 								ddr3_write_pup_reg
714 								    (PUP_PBS_RX +
715 								     pbs_dq_mapping
716 								     [pup * (1 - ecc) +
717 								      ecc * ECC_PUP]
718 								     [dq], CS0,
719 								     pup + ecc * ECC_PUP,
720 								     0, MIN_PBS);
721 						}
722 
723 						return MV_OK;
724 					}
725 
726 					/* PBS For each bit */
727 					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - perform PBS for each bit\n");
728 					/* in this stage - start_over = 0; */
729 					if (MV_OK != ddr3_pbs_per_bit(
730 						    dram_info, &start_over,
731 						    0, &cur_pup,
732 						    pattern_idx, ecc)) {
733 						DEBUG_PBS_S("DDR3 - PBS Rx - ddr3_pbs_per_bit failed.");
734 						return MV_DDR3_TRAINING_ERR_PBS_RX_PER_BIT;
735 					}
736 
737 				} while ((start_over == 1) &&
738 					 (++pbs_rep_time < COUNT_PBS_STARTOVER));
739 
740 				if (pbs_rep_time == COUNT_PBS_STARTOVER &&
741 				    start_over == 1) {
742 					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - FAIL - Algorithm failed doing RX PBS\n");
743 					return MV_DDR3_TRAINING_ERR_PBS_RX_MAX_VAL;
744 				}
745 
746 				/* Return DQS ADLL to default value - 15 */
747 				/* Set all DQS PBS values to MIN (0) */
748 				for (pup = 0; pup < cur_max_pup; pup++)
749 					ddr3_write_pup_reg(PUP_DQS_RD, CS0,
750 							   pup + ecc * ECC_PUP,
751 							   0, INIT_RL_DELAY);
752 
753 				DEBUG_PBS_FULL_C("DDR3 - PBS RX - values for iteration - ",
754 						 pbs_retry, 1);
755 				for (pup = 0; pup < cur_max_pup; pup++) {
756 					/*
757 					 * To minimize delay elements, inc from
758 					 * pbs value the min pbs val
759 					 */
760 					DEBUG_PBS_FULL_S("DDR3 - PBS - PUP");
761 					DEBUG_PBS_FULL_D((pup +
762 							  (ecc * ECC_PUP)), 1);
763 					DEBUG_PBS_FULL_S(": ");
764 
765 					for (dq = 0; dq < DQ_NUM; dq++) {
766 						/* Set skew value for all dq */
767 						/*
768 						 * Bit# Deskew <- Bit# Deskew -
769 						 * last / first  failing bit
770 						 * Deskew For all bits (per PUP)
771 						 * (minimize delay elements)
772 						 */
773 						DEBUG_PBS_FULL_S("DQ");
774 						DEBUG_PBS_FULL_D(dq, 1);
775 						DEBUG_PBS_FULL_S("-");
776 						DEBUG_PBS_FULL_D(skew_array
777 								 [((pup) *
778 								   DQ_NUM) +
779 								  dq], 2);
780 						DEBUG_PBS_FULL_S(", ");
781 					}
782 					DEBUG_PBS_FULL_S("\n");
783 				}
784 
785 				/*
786 				 * Collect the results we got on this trial
787 				 * of PBS
788 				 */
789 				for (pup = 0; pup < cur_max_pup; pup++) {
790 					for (dq = 0; dq < DQ_NUM; dq++) {
791 						skew_sum_array
792 							[pup + (ecc * (max_pup - 1))]
793 							[dq] +=
794 							skew_array[((pup) * DQ_NUM) + dq];
795 					}
796 				}
797 
798 				/* ECC Support - Disable ECC MUX */
799 				reg = reg_read(REG_DRAM_TRAINING_2_ADDR) &
800 					~(1 << REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
801 				reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
802 			}
803 		}
804 
805 		/*
806 		 * Calculate the average skew for current pattern for each
807 		 * pup and each bit
808 		 */
809 		DEBUG_PBS_FULL_C("DDR3 - PBS RX - Average for pattern - ",
810 				 pattern_idx, 1);
811 		for (pup = 0; pup < max_pup; pup++) {
812 			/*
813 			 * FOR ECC only :: found min and max value for
814 			 * current pattern skew array
815 			 */
816 			/* Loop for all dqs */
817 			for (dq = 0; dq < DQ_NUM; dq++) {
818 				pattern_skew_array[pup][dq] +=
819 					(skew_sum_array[pup][dq] /
820 					 COUNT_PBS_REPEAT);
821 			}
822 		}
823 
824 		DEBUG_PBS_C("DDR3 - PBS RX - values for current pattern - ",
825 			    pattern_idx, 1);
826 		for (pup = 0; pup < max_pup; pup++) {
827 			/*
828 			 * To minimize delay elements, inc from pbs value the
829 			 * min pbs val
830 			 */
831 			DEBUG_PBS_S("DDR3 - PBS RX - PUP");
832 			DEBUG_PBS_D(pup, 1);
833 			DEBUG_PBS_S(": ");
834 
835 			for (dq = 0; dq < DQ_NUM; dq++) {
836 				/* Set skew value for all dq */
837 				/*
838 				 * Bit# Deskew <- Bit# Deskew - last / first
839 				 * failing bit Deskew For all bits (per PUP)
840 				 * (minimize delay elements)
841 				 */
842 				DEBUG_PBS_S("DQ");
843 				DEBUG_PBS_D(dq, 1);
844 				DEBUG_PBS_S("-");
845 				DEBUG_PBS_D(skew_sum_array[pup][dq] /
846 					    COUNT_PBS_REPEAT, 2);
847 				DEBUG_PBS_S(", ");
848 			}
849 			DEBUG_PBS_S("\n");
850 		}
851 	}
852 
853 	/* Calculate the average skew */
854 	for (pup = 0; pup < max_pup; pup++) {
855 		for (dq = 0; dq < DQ_NUM; dq++)
856 			skew_array[((pup) * DQ_NUM) + dq] =
857 				pattern_skew_array[pup][dq] / COUNT_PBS_PATTERN;
858 	}
859 
860 	DEBUG_PBS_S("DDR3 - PBS RX - Average for all patterns:\n");
861 	for (pup = 0; pup < max_pup; pup++) {
862 		/*
863 		 * To minimize delay elements, inc from pbs value the
864 		 * min pbs val
865 		 */
866 		DEBUG_PBS_S("DDR3 - PBS - PUP");
867 		DEBUG_PBS_D(pup, 1);
868 		DEBUG_PBS_S(": ");
869 
870 		for (dq = 0; dq < DQ_NUM; dq++) {
871 			/* Set skew value for all dq */
872 			/*
873 			 * Bit# Deskew <- Bit# Deskew - last / first
874 			 * failing bit Deskew For all bits (per PUP)
875 			 * (minimize delay elements)
876 			 */
877 			DEBUG_PBS_S("DQ");
878 			DEBUG_PBS_D(dq, 1);
879 			DEBUG_PBS_S("-");
880 			DEBUG_PBS_D(skew_array[(pup * DQ_NUM) + dq], 2);
881 			DEBUG_PBS_S(", ");
882 		}
883 		DEBUG_PBS_S("\n");
884 	}
885 
886 	/* Return ADLL to default value */
887 	ddr3_write_pup_reg(PUP_DQS_RD, CS0, PUP_BC, 0, INIT_RL_DELAY);
888 
889 	/* Set averaged PBS results */
890 	ddr3_set_pbs_results(dram_info, 0);
891 
892 	/* Disable SW override - Must be in a different stage */
893 	/* [0]=0 - Enable SW override  */
894 	reg = reg_read(REG_DRAM_TRAINING_2_ADDR);
895 	reg &= ~(1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS);
896 	/* 0x15B8 - Training SW 2 Register */
897 	reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
898 
899 	reg = reg_read(REG_DRAM_TRAINING_1_ADDR) |
900 		(1 << REG_DRAM_TRAINING_1_TRNBPOINT_OFFS);
901 	reg_write(REG_DRAM_TRAINING_1_ADDR, reg);
902 
903 	DEBUG_PBS_FULL_S("DDR3 - PBS RX - ended successfuly\n");
904 
905 	return MV_OK;
906 }
907 
908 /*
909  * Name:     ddr3_rx_shift_dqs_to_first_fail
910  * Desc:     Execute the Rx shift DQ phase.
911  * Args:     dram_info           ddr3 training information struct
912  *           cur_pup             bit array of the function active pups.
913  *           pbs_pattern_idx     Index of PBS pattern
914  * Notes:
915  * Returns:  MV_OK if success, other error code if fail.
916  */
917 static int ddr3_rx_shift_dqs_to_first_fail(MV_DRAM_INFO *dram_info, u32 cur_pup,
918 					   u32 pbs_pattern_idx, u32 ecc)
919 {
920 	u32 unlock_pup;		/* bit array of unlock pups  */
921 	u32 new_lockup_pup;	/* bit array of compare failed pups */
922 	u32 adll_val = MAX_DELAY;
923 	u32 dqs_deskew_val = 0;	/* current value of DQS PBS deskew */
924 	u32 cur_max_pup, pup, pass_pup;
925 	u32 *pattern_ptr;
926 
927 	/* Choose pattern */
928 	switch (dram_info->ddr_width) {
929 #if defined(MV88F672X)
930 	case 16:
931 		pattern_ptr = (u32 *)&pbs_pattern[pbs_pattern_idx];
932 		break;
933 #endif
934 	case 32:
935 		pattern_ptr = (u32 *)&pbs_pattern_32b[pbs_pattern_idx];
936 		break;
937 #if defined(MV88F78X60)
938 	case 64:
939 		pattern_ptr = (u32 *)&pbs_pattern_64b[pbs_pattern_idx];
940 		break;
941 #endif
942 	default:
943 		return MV_FAIL;
944 	}
945 
946 	/* Set current pup number */
947 	if (cur_pup == 0x1)	/* Ecc mode */
948 		cur_max_pup = 1;
949 	else
950 		cur_max_pup = dram_info->num_of_std_pups;
951 
952 	unlock_pup = cur_pup;	/* '1' for each unlocked pup */
953 
954 	DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - Starting...\n");
955 
956 	/* Set DQS ADLL to MAX */
957 	DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - Set DQS ADLL to Max for all PUPs\n");
958 	for (pup = 0; pup < cur_max_pup; pup++)
959 		ddr3_write_pup_reg(PUP_DQS_RD, CS0, pup + ecc * ECC_PUP, 0,
960 				   MAX_DELAY);
961 
962 	/* Loop on all ADLL Vaules */
963 	do {
964 		/* Loop until found fail for all pups */
965 		new_lockup_pup = 0;
966 		if (MV_OK != ddr3_sdram_compare(dram_info, unlock_pup,
967 						&new_lockup_pup,
968 						pattern_ptr, LEN_PBS_PATTERN,
969 						SDRAM_PBS_I_OFFS +
970 						pbs_pattern_idx * SDRAM_PBS_NEXT_OFFS,
971 						0, 0, NULL, 0)) {
972 			DEBUG_PBS_S("DDR3 - PBS Rx - Shift DQS - MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP(ddr3_sdram_compare)\n");
973 			return MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP;
974 		}
975 
976 		if ((new_lockup_pup != 0) && (dqs_deskew_val <= 1)) {
977 			/* Fail on start with first deskew value */
978 			/* Decrement DQS ADLL */
979 			--adll_val;
980 			if (adll_val == ADLL_MIN) {
981 				DEBUG_PBS_S("DDR3 - PBS Rx - Shift DQS - fail on start with first deskew value\n");
982 				return MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP;
983 			}
984 			ddr3_write_pup_reg(PUP_DQS_RD, CS0, pup + ecc * ECC_PUP,
985 					   0, adll_val);
986 			continue;
987 		}
988 
989 		/* Update all new locked pups */
990 		unlock_pup &= ~new_lockup_pup;
991 
992 		if ((unlock_pup == 0) || (dqs_deskew_val == MAX_PBS)) {
993 			if (dqs_deskew_val == MAX_PBS) {
994 				/*
995 				 * Reach max value of dqs deskew or get fail
996 				 * for all pups
997 				 */
998 				DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - DQS deskew reached maximum value\n");
999 			}
1000 			break;
1001 		}
1002 
1003 		DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - Inc DQS deskew for PUPs: ");
1004 		DEBUG_PBS_FULL_D(unlock_pup, 2);
1005 		DEBUG_PBS_FULL_C(", deskew = ", dqs_deskew_val, 2);
1006 
1007 		/* Increment DQS deskew elements - Only for unlocked pups */
1008 		dqs_deskew_val++;
1009 		for (pup = 0; pup < cur_max_pup; pup++) {
1010 			if (IS_PUP_ACTIVE(unlock_pup, pup) == 1) {
1011 				ddr3_write_pup_reg(PUP_PBS_RX + DQS_DQ_NUM, CS0,
1012 						   pup + ecc * ECC_PUP, 0,
1013 						   dqs_deskew_val);
1014 			}
1015 		}
1016 	} while (1);
1017 
1018 	DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - ADLL shift one step before fail\n");
1019 	/* Continue to ADLL shift one step before fail */
1020 	unlock_pup = cur_pup;
1021 	do {
1022 		/* Loop until pass compare for all pups */
1023 		new_lockup_pup = 0;
1024 		/* Read and compare results  */
1025 		if (MV_OK != ddr3_sdram_compare(dram_info, unlock_pup, &new_lockup_pup,
1026 						pattern_ptr, LEN_PBS_PATTERN,
1027 						SDRAM_PBS_I_OFFS +
1028 						pbs_pattern_idx * SDRAM_PBS_NEXT_OFFS,
1029 						1, 0, NULL, 0)) {
1030 			DEBUG_PBS_S("DDR3 - PBS Rx - Shift DQS - MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP(ddr3_sdram_compare)\n");
1031 			return MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP;
1032 		}
1033 
1034 		/*
1035 		 * Get mask for pup which passed so their adll will be
1036 		 * changed to 2 steps before fails
1037 		 */
1038 		pass_pup = unlock_pup & ~new_lockup_pup;
1039 
1040 		DEBUG_PBS_FULL_S("Shift DQS by 2 steps for PUPs: ");
1041 		DEBUG_PBS_FULL_D(pass_pup, 2);
1042 		DEBUG_PBS_FULL_C(", Set ADLL value = ", (adll_val - 2), 2);
1043 
1044 		/* Only for pass pups   */
1045 		for (pup = 0; pup < cur_max_pup; pup++) {
1046 			if (IS_PUP_ACTIVE(pass_pup, pup) == 1) {
1047 				ddr3_write_pup_reg(PUP_DQS_RD, CS0,
1048 						   pup + ecc * ECC_PUP, 0,
1049 						   (adll_val - 2));
1050 			}
1051 		}
1052 
1053 		/* Locked pups that compare success  */
1054 		unlock_pup &= new_lockup_pup;
1055 
1056 		if (unlock_pup == 0) {
1057 			/* All pups locked */
1058 			break;
1059 		}
1060 
1061 		/* Found error */
1062 		if (adll_val == 0) {
1063 			DEBUG_PBS_FULL_S("DDR3 - PBS Rx - Shift DQS - Adll reach min value\n");
1064 			return MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_MAX_VAL;
1065 		}
1066 
1067 		/*
1068 		 * Decrement (Move Back to Left one phase - ADLL) dqs RX delay
1069 		 */
1070 		adll_val--;
1071 		for (pup = 0; pup < cur_max_pup; pup++) {
1072 			if (IS_PUP_ACTIVE(unlock_pup, pup) == 1) {
1073 				ddr3_write_pup_reg(PUP_DQS_RD, CS0,
1074 						   pup + ecc * ECC_PUP, 0,
1075 						   adll_val);
1076 			}
1077 		}
1078 	} while (1);
1079 
1080 	return MV_OK;
1081 }
1082 
1083 /*
1084  * lock_pups() extracted from ddr3_pbs_per_bit(). This just got too
1085  * much indented making it hard to read / edit.
1086  */
1087 static void lock_pups(u32 pup, u32 *pup_locked, u8 *unlock_pup_dq_array,
1088 		      u32 pbs_curr_val, u32 start_pbs, u32 ecc, int is_tx)
1089 {
1090 	u32 dq;
1091 	int idx;
1092 
1093 	/* Lock PBS value for all remaining PUPs bits */
1094 	DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - Lock PBS value for all remaining PUPs bits, pup ");
1095 	DEBUG_PBS_FULL_D(pup, 1);
1096 	DEBUG_PBS_FULL_C(" pbs value ", pbs_curr_val, 2);
1097 
1098 	idx = pup * (1 - ecc) + ecc * ECC_PUP;
1099 	*pup_locked &= ~(1 << pup);
1100 
1101 	for (dq = 0; dq < DQ_NUM; dq++) {
1102 		if (IS_PUP_ACTIVE(unlock_pup_dq_array[dq], pup) == 1) {
1103 			int offs;
1104 
1105 			/* Lock current dq */
1106 			unlock_pup_dq_array[dq] &= ~(1 << pup);
1107 			skew_array[(pup * DQ_NUM) + dq] = pbs_curr_val;
1108 
1109 			if (is_tx == 1)
1110 				offs = PUP_PBS_TX;
1111 			else
1112 				offs = PUP_PBS_RX;
1113 
1114 			ddr3_write_pup_reg(offs +
1115 					   pbs_dq_mapping[idx][dq], CS0,
1116 					   idx, 0, start_pbs);
1117 		}
1118 	}
1119 }
1120 
1121 /*
1122  * Name:     ddr3_pbs_per_bit
1123  * Desc:     Execute the Per Bit Skew phase.
1124  * Args:     start_over      Return whether need to start over the algorithm
1125  *           is_tx           Indicate whether Rx or Tx
1126  *           pcur_pup        bit array of the function active pups. return the
1127  *                           pups that need to repeat on the PBS
1128  *           pbs_pattern_idx Index of PBS pattern
1129  *
1130  * Notes:    Current implementation supports double activation of this function.
1131  *           i.e. in order to activate this function (using start_over) more than
1132  *           twice, the implementation should change.
1133  *           imlementation limitation are marked using
1134  *           ' CHIP-ONLY! - Implementation Limitation '
1135  * Returns:  MV_OK if success, other error code if fail.
1136  */
1137 static int ddr3_pbs_per_bit(MV_DRAM_INFO *dram_info, int *start_over, int is_tx,
1138 			    u32 *pcur_pup, u32 pbs_pattern_idx, u32 ecc)
1139 {
1140 	/*
1141 	 * Bit array to indicate if we already get fail on bit per pup & dq bit
1142 	 */
1143 	u8 unlock_pup_dq_array[DQ_NUM] = {
1144 		*pcur_pup, *pcur_pup, *pcur_pup, *pcur_pup, *pcur_pup,
1145 		*pcur_pup, *pcur_pup, *pcur_pup
1146 	};
1147 
1148 	u8 cmp_unlock_pup_dq_array[COUNT_PBS_COMP_RETRY_NUM][DQ_NUM];
1149 	u32 pup, dq;
1150 	/* value of pbs is according to RX or TX */
1151 	u32 start_pbs, last_pbs;
1152 	u32 pbs_curr_val;
1153 	/* bit array that indicates all dq of the pup locked */
1154 	u32 pup_locked;
1155 	u32 first_fail[MAX_PUP_NUM] = { 0 };	/* count first fail per pup */
1156 	/* indicates whether we get first fail per pup */
1157 	int first_failed[MAX_PUP_NUM] = { 0 };
1158 	/* bit array that indicates pup already get fail */
1159 	u32 sum_pup_fail;
1160 	/* use to calculate diff between curr pbs to first fail pbs */
1161 	u32 calc_pbs_diff;
1162 	u32 pbs_cmp_retry;
1163 	u32 max_pup;
1164 
1165 	/* Set init values for retry array - 8 retry */
1166 	for (pbs_cmp_retry = 0; pbs_cmp_retry < COUNT_PBS_COMP_RETRY_NUM;
1167 	     pbs_cmp_retry++) {
1168 		for (dq = 0; dq < DQ_NUM; dq++)
1169 			cmp_unlock_pup_dq_array[pbs_cmp_retry][dq] = *pcur_pup;
1170 	}
1171 
1172 	memset(&skew_array, 0, MAX_PUP_NUM * DQ_NUM * sizeof(u32));
1173 
1174 	DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - Started\n");
1175 
1176 	/* The pbs value depends if rx or tx */
1177 	if (is_tx == 1) {
1178 		start_pbs = MIN_PBS;
1179 		last_pbs = MAX_PBS;
1180 	} else {
1181 		start_pbs = MAX_PBS;
1182 		last_pbs = MIN_PBS;
1183 	}
1184 
1185 	pbs_curr_val = start_pbs;
1186 	pup_locked = *pcur_pup;
1187 
1188 	/* Set current pup number */
1189 	if (pup_locked == 0x1)	/* Ecc mode */
1190 		max_pup = 1;
1191 	else
1192 		max_pup = dram_info->num_of_std_pups;
1193 
1194 	do {
1195 		/* Increment/ decrement PBS for un-lock bits only */
1196 		if (is_tx == 1)
1197 			pbs_curr_val++;
1198 		else
1199 			pbs_curr_val--;
1200 
1201 		/* Set Current PBS delay  */
1202 		for (dq = 0; dq < DQ_NUM; dq++) {
1203 			/* Check DQ bits to see if locked in all pups */
1204 			if (unlock_pup_dq_array[dq] == 0) {
1205 				DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - All pups are locked for DQ ");
1206 				DEBUG_PBS_FULL_D(dq, 1);
1207 				DEBUG_PBS_FULL_S("\n");
1208 				continue;
1209 			}
1210 
1211 			for (pup = 0; pup < max_pup; pup++) {
1212 				int idx;
1213 
1214 				idx = pup * (1 - ecc) + ecc * ECC_PUP;
1215 
1216 				if (IS_PUP_ACTIVE(unlock_pup_dq_array[dq], pup)
1217 				    == 0)
1218 					continue;
1219 
1220 				if (is_tx == 1)
1221 					ddr3_write_pup_reg(
1222 						PUP_PBS_TX + pbs_dq_mapping[idx][dq],
1223 						CS0, idx, 0, pbs_curr_val);
1224 				else
1225 					ddr3_write_pup_reg(
1226 						PUP_PBS_RX + pbs_dq_mapping[idx][dq],
1227 						CS0, idx, 0, pbs_curr_val);
1228 			}
1229 		}
1230 
1231 		/*
1232 		 * Write Read and compare results - run the test
1233 		 * DDR_PBS_COMP_RETRY_NUM times
1234 		 */
1235 		/* Run number of read and write to verify */
1236 		for (pbs_cmp_retry = 0;
1237 		     pbs_cmp_retry < COUNT_PBS_COMP_RETRY_NUM;
1238 		     pbs_cmp_retry++) {
1239 
1240 			if (MV_OK !=
1241 			    ddr3_sdram_pbs_compare(dram_info, pup_locked, is_tx,
1242 						   pbs_pattern_idx,
1243 						   pbs_curr_val, start_pbs,
1244 						   skew_array,
1245 						   cmp_unlock_pup_dq_array
1246 						   [pbs_cmp_retry], ecc))
1247 				return MV_FAIL;
1248 
1249 			for (pup = 0; pup < max_pup; pup++) {
1250 				for (dq = 0; dq < DQ_NUM; dq++) {
1251 					if ((IS_PUP_ACTIVE(unlock_pup_dq_array[dq],
1252 							   pup) == 1)
1253 					    && (IS_PUP_ACTIVE(cmp_unlock_pup_dq_array
1254 					      [pbs_cmp_retry][dq],
1255 					      pup) == 0)) {
1256 						DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - PbsCurrVal: ");
1257 						DEBUG_PBS_FULL_D(pbs_curr_val, 2);
1258 						DEBUG_PBS_FULL_S(" PUP: ");
1259 						DEBUG_PBS_FULL_D(pup, 1);
1260 						DEBUG_PBS_FULL_S(" DQ: ");
1261 						DEBUG_PBS_FULL_D(dq, 1);
1262 						DEBUG_PBS_FULL_S(" - failed\n");
1263 					}
1264 				}
1265 			}
1266 
1267 			for (dq = 0; dq < DQ_NUM; dq++) {
1268 				unlock_pup_dq_array[dq] &=
1269 				    cmp_unlock_pup_dq_array[pbs_cmp_retry][dq];
1270 			}
1271 		}
1272 
1273 		pup_locked = 0;
1274 		sum_pup_fail = *pcur_pup;
1275 
1276 		/* Check which DQ is failed */
1277 		for (dq = 0; dq < DQ_NUM; dq++) {
1278 			/* Summarize the locked pup */
1279 			pup_locked |= unlock_pup_dq_array[dq];
1280 
1281 			/* Check if get fail */
1282 			sum_pup_fail &= unlock_pup_dq_array[dq];
1283 		}
1284 
1285 		/* If all PUPS are locked in all DQ - Break */
1286 		if (pup_locked == 0) {
1287 			/* All pups are locked */
1288 			*start_over = 0;
1289 			DEBUG_PBS_FULL_S("DDR3 - PBS Per bit -  All bit in all pups are successfully locked\n");
1290 			break;
1291 		}
1292 
1293 		/* PBS deskew elements reach max ? */
1294 		if (pbs_curr_val == last_pbs) {
1295 			DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - PBS deskew elements reach max\n");
1296 			/* CHIP-ONLY! - Implementation Limitation */
1297 			*start_over = (sum_pup_fail != 0) && (!(*start_over));
1298 			*pcur_pup = pup_locked;
1299 
1300 			DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - StartOver: ");
1301 			DEBUG_PBS_FULL_D(*start_over, 1);
1302 			DEBUG_PBS_FULL_S("  pup_locked: ");
1303 			DEBUG_PBS_FULL_D(pup_locked, 2);
1304 			DEBUG_PBS_FULL_S("  sum_pup_fail: ");
1305 			DEBUG_PBS_FULL_D(sum_pup_fail, 2);
1306 			DEBUG_PBS_FULL_S("\n");
1307 
1308 			/* Lock PBS value for all remaining  bits */
1309 			for (pup = 0; pup < max_pup; pup++) {
1310 				/* Check if current pup already received error */
1311 				if (IS_PUP_ACTIVE(pup_locked, pup) == 1) {
1312 					/* Valid pup for current function */
1313 					if (IS_PUP_ACTIVE(sum_pup_fail, pup) ==
1314 					    1 && (*start_over == 1)) {
1315 						DEBUG_PBS_FULL_C("DDR3 - PBS Per bit - skipping lock of pup (first loop of pbs)",
1316 								 pup, 1);
1317 						continue;
1318 					} else
1319 					    if (IS_PUP_ACTIVE(sum_pup_fail, pup)
1320 						== 1) {
1321 						DEBUG_PBS_FULL_C("DDR3 - PBS Per bit - Locking pup %d (even though it wasn't supposed to be locked)",
1322 								 pup, 1);
1323 					}
1324 
1325 					/* Already got fail on the PUP */
1326 					/* Lock PBS value for all remaining bits */
1327 					DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - Locking remaning DQs for pup - ");
1328 					DEBUG_PBS_FULL_D(pup, 1);
1329 					DEBUG_PBS_FULL_S(": ");
1330 
1331 					for (dq = 0; dq < DQ_NUM; dq++) {
1332 						if (IS_PUP_ACTIVE
1333 						    (unlock_pup_dq_array[dq],
1334 						     pup) == 1) {
1335 							DEBUG_PBS_FULL_D(dq, 1);
1336 							DEBUG_PBS_FULL_S(",");
1337 							/* set current PBS */
1338 							skew_array[((pup) *
1339 								    DQ_NUM) +
1340 								   dq] =
1341 							    pbs_curr_val;
1342 						}
1343 					}
1344 
1345 					if (*start_over == 1) {
1346 						/*
1347 						 * Reset this pup bit - when
1348 						 * restart the PBS, ignore this
1349 						 * pup
1350 						 */
1351 						*pcur_pup &= ~(1 << pup);
1352 					}
1353 					DEBUG_PBS_FULL_S("\n");
1354 				} else {
1355 					DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - Pup ");
1356 					DEBUG_PBS_FULL_D(pup, 1);
1357 					DEBUG_PBS_FULL_C(" is not set in puplocked - ",
1358 							 pup_locked, 1);
1359 				}
1360 			}
1361 
1362 			/* Need to start the PBS again */
1363 			if (*start_over == 1) {
1364 				DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - false fail - returning to start\n");
1365 				return MV_OK;
1366 			}
1367 			break;
1368 		}
1369 
1370 		/* Diff Check */
1371 		for (pup = 0; pup < max_pup; pup++) {
1372 			if (IS_PUP_ACTIVE(pup_locked, pup) == 1) {
1373 				/* pup is not locked */
1374 				if (first_failed[pup] == 0) {
1375 					/* No first fail until now */
1376 					if (IS_PUP_ACTIVE(sum_pup_fail, pup) ==
1377 					    0) {
1378 						/* Get first fail */
1379 						DEBUG_PBS_FULL_C("DDR3 - PBS Per bit - First fail in pup ",
1380 								 pup, 1);
1381 						first_failed[pup] = 1;
1382 						first_fail[pup] = pbs_curr_val;
1383 					}
1384 				} else {
1385 					/* Already got first fail */
1386 					if (is_tx == 1) {
1387 						/* TX - inc pbs */
1388 						calc_pbs_diff =	pbs_curr_val -
1389 							first_fail[pup];
1390 					} else {
1391 						/* RX - dec pbs */
1392 						calc_pbs_diff = first_fail[pup] -
1393 							pbs_curr_val;
1394 					}
1395 
1396 					if (calc_pbs_diff >= PBS_DIFF_LIMIT) {
1397 						lock_pups(pup, &pup_locked,
1398 							  unlock_pup_dq_array,
1399 							  pbs_curr_val,
1400 							  start_pbs, ecc, is_tx);
1401 					}
1402 				}
1403 			}
1404 		}
1405 	} while (1);
1406 
1407 	return MV_OK;
1408 }
1409 
1410 /*
1411  * Name:         ddr3_set_pbs_results
1412  * Desc:         Set to HW the PBS phase results.
1413  * Args:         is_tx       Indicates whether to set Tx or RX results
1414  * Notes:
1415  * Returns:      MV_OK if success, other error code if fail.
1416  */
1417 static int ddr3_set_pbs_results(MV_DRAM_INFO *dram_info, int is_tx)
1418 {
1419 	u32 pup, phys_pup, dq;
1420 	u32 max_pup;		/* number of valid pups */
1421 	u32 pbs_min;		/* minimal pbs val per pup */
1422 	u32 pbs_max;		/* maximum pbs val per pup */
1423 	u32 val[9];
1424 
1425 	max_pup = dram_info->num_of_total_pups;
1426 	DEBUG_PBS_FULL_S("DDR3 - PBS - ddr3_set_pbs_results:\n");
1427 
1428 	/* Loop for all dqs & pups */
1429 	for (pup = 0; pup < max_pup; pup++) {
1430 		if (pup == (max_pup - 1) && dram_info->ecc_ena)
1431 			phys_pup = ECC_PUP;
1432 		else
1433 			phys_pup = pup;
1434 
1435 		/*
1436 		 * To minimize delay elements, inc from pbs value the min
1437 		 * pbs val
1438 		 */
1439 		pbs_min = MAX_PBS;
1440 		pbs_max = 0;
1441 		for (dq = 0; dq < DQ_NUM; dq++) {
1442 			if (pbs_min > skew_array[(pup * DQ_NUM) + dq])
1443 				pbs_min = skew_array[(pup * DQ_NUM) + dq];
1444 
1445 			if (pbs_max < skew_array[(pup * DQ_NUM) + dq])
1446 				pbs_max = skew_array[(pup * DQ_NUM) + dq];
1447 		}
1448 
1449 		pbs_max -= pbs_min;
1450 
1451 		DEBUG_PBS_FULL_S("DDR3 - PBS - PUP");
1452 		DEBUG_PBS_FULL_D(phys_pup, 1);
1453 		DEBUG_PBS_FULL_S(": Min Val = ");
1454 		DEBUG_PBS_FULL_D(pbs_min, 2);
1455 		DEBUG_PBS_FULL_C(", Max Val = ", pbs_max, 2);
1456 
1457 		val[pup] = 0;
1458 
1459 		for (dq = 0; dq < DQ_NUM; dq++) {
1460 			int idx;
1461 			int offs;
1462 
1463 			/* Set skew value for all dq */
1464 			/*
1465 			 * Bit# Deskew <- Bit# Deskew - last / first
1466 			 * failing bit Deskew For all bits (per PUP)
1467 			 * (minimize delay elements)
1468 			 */
1469 
1470 			DEBUG_PBS_FULL_S("DQ");
1471 			DEBUG_PBS_FULL_D(dq, 1);
1472 			DEBUG_PBS_FULL_S("-");
1473 			DEBUG_PBS_FULL_D((skew_array[(pup * DQ_NUM) + dq] -
1474 					  pbs_min), 2);
1475 			DEBUG_PBS_FULL_S(", ");
1476 
1477 			idx = (pup * DQ_NUM) + dq;
1478 
1479 			if (is_tx == 1)
1480 				offs = PUP_PBS_TX;
1481 			else
1482 				offs = PUP_PBS_RX;
1483 
1484 			ddr3_write_pup_reg(offs + pbs_dq_mapping[phys_pup][dq],
1485 					   CS0, phys_pup, 0,
1486 					   skew_array[idx] - pbs_min);
1487 
1488 			if (is_tx == 1)
1489 				val[pup] += skew_array[idx] - pbs_min;
1490 		}
1491 
1492 		DEBUG_PBS_FULL_S("\n");
1493 
1494 		/* Set the DQS the half of the Max PBS of the DQs  */
1495 		if (is_tx == 1) {
1496 			ddr3_write_pup_reg(PUP_PBS_TX + 8, CS0, phys_pup, 0,
1497 					   pbs_max / 2);
1498 			ddr3_write_pup_reg(PUP_PBS_TX + 0xa, CS0, phys_pup, 0,
1499 					   val[pup] / 8);
1500 		} else
1501 			ddr3_write_pup_reg(PUP_PBS_RX + 8, CS0, phys_pup, 0,
1502 					   pbs_max / 2);
1503 	}
1504 
1505 	return MV_OK;
1506 }
1507 
1508 static void ddr3_pbs_write_pup_dqs_reg(u32 cs, u32 pup, u32 dqs_delay)
1509 {
1510 	u32 reg, delay;
1511 
1512 	reg = (ddr3_read_pup_reg(PUP_WL_MODE, cs, pup) & 0x3FF);
1513 	delay = reg & PUP_DELAY_MASK;
1514 	reg |= ((dqs_delay + delay) << REG_PHY_DQS_REF_DLY_OFFS);
1515 	reg |= REG_PHY_REGISTRY_FILE_ACCESS_OP_WR;
1516 	reg |= (pup << REG_PHY_PUP_OFFS);
1517 	reg |= ((0x4 * cs + PUP_WL_MODE) << REG_PHY_CS_OFFS);
1518 
1519 	reg_write(REG_PHY_REGISTRY_FILE_ACCESS_ADDR, reg);	/* 0x16A0 */
1520 	do {
1521 		reg = reg_read(REG_PHY_REGISTRY_FILE_ACCESS_ADDR) &
1522 			REG_PHY_REGISTRY_FILE_ACCESS_OP_DONE;
1523 	} while (reg);	/* Wait for '0' to mark the end of the transaction */
1524 
1525 	udelay(10);
1526 }
1527 
1528 /*
1529  * Set training patterns
1530  */
1531 int ddr3_load_pbs_patterns(MV_DRAM_INFO *dram_info)
1532 {
1533 	u32 cs, cs_count, cs_tmp;
1534 	u32 sdram_addr;
1535 	u32 *pattern_ptr0, *pattern_ptr1;
1536 
1537 	/* Choose pattern */
1538 	switch (dram_info->ddr_width) {
1539 #if defined(MV88F672X)
1540 	case 16:
1541 		pattern_ptr0 = (u32 *)&pbs_pattern[0];
1542 		pattern_ptr1 = (u32 *)&pbs_pattern[1];
1543 		break;
1544 #endif
1545 	case 32:
1546 		pattern_ptr0 = (u32 *)&pbs_pattern_32b[0];
1547 		pattern_ptr1 = (u32 *)&pbs_pattern_32b[1];
1548 		break;
1549 #if defined(MV88F78X60)
1550 	case 64:
1551 		pattern_ptr0 = (u32 *)&pbs_pattern_64b[0];
1552 		pattern_ptr1 = (u32 *)&pbs_pattern_64b[1];
1553 		break;
1554 #endif
1555 	default:
1556 		return MV_FAIL;
1557 	}
1558 
1559 	/* Loop for each CS */
1560 	for (cs = 0; cs < MAX_CS; cs++) {
1561 		if (dram_info->cs_ena & (1 << cs)) {
1562 			cs_count = 0;
1563 			for (cs_tmp = 0; cs_tmp < cs; cs_tmp++) {
1564 				if (dram_info->cs_ena & (1 << cs_tmp))
1565 					cs_count++;
1566 			}
1567 
1568 			/* Init PBS I pattern */
1569 			sdram_addr = (cs_count * (SDRAM_CS_SIZE + 1) +
1570 				      SDRAM_PBS_I_OFFS);
1571 			if (MV_OK !=
1572 			    ddr3_sdram_compare(dram_info, (u32) NULL, NULL,
1573 					       pattern_ptr0, LEN_STD_PATTERN,
1574 					       sdram_addr, 1, 0, NULL,
1575 					       0))
1576 				return MV_FAIL;
1577 
1578 			/* Init PBS II pattern */
1579 			sdram_addr = (cs_count * (SDRAM_CS_SIZE + 1) +
1580 				      SDRAM_PBS_II_OFFS);
1581 			if (MV_OK !=
1582 			    ddr3_sdram_compare(dram_info, (u32) NULL, NULL,
1583 					       pattern_ptr1, LEN_STD_PATTERN,
1584 					       sdram_addr, 1, 0, NULL,
1585 					       0))
1586 				return MV_FAIL;
1587 		}
1588 	}
1589 
1590 	return MV_OK;
1591 }
1592 #endif
1593