1 /* 2 * Copyright Altera Corporation (C) 2012-2015 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7 #include <common.h> 8 #include <asm/io.h> 9 #include <asm/arch/sdram.h> 10 #include "sequencer.h" 11 #include "sequencer_auto.h" 12 #include "sequencer_auto_ac_init.h" 13 #include "sequencer_auto_inst_init.h" 14 #include "sequencer_defines.h" 15 16 static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs = 17 (struct socfpga_sdr_rw_load_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800); 18 19 static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs = 20 (struct socfpga_sdr_rw_load_jump_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00); 21 22 static struct socfpga_sdr_reg_file *sdr_reg_file = 23 (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS; 24 25 static struct socfpga_sdr_scc_mgr *sdr_scc_mgr = 26 (struct socfpga_sdr_scc_mgr *)(SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00); 27 28 static struct socfpga_phy_mgr_cmd *phy_mgr_cmd = 29 (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS; 30 31 static struct socfpga_phy_mgr_cfg *phy_mgr_cfg = 32 (struct socfpga_phy_mgr_cfg *)(SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40); 33 34 static struct socfpga_data_mgr *data_mgr = 35 (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS; 36 37 static struct socfpga_sdr_ctrl *sdr_ctrl = 38 (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS; 39 40 #define DELTA_D 1 41 42 /* 43 * In order to reduce ROM size, most of the selectable calibration steps are 44 * decided at compile time based on the user's calibration mode selection, 45 * as captured by the STATIC_CALIB_STEPS selection below. 46 * 47 * However, to support simulation-time selection of fast simulation mode, where 48 * we skip everything except the bare minimum, we need a few of the steps to 49 * be dynamic. In those cases, we either use the DYNAMIC_CALIB_STEPS for the 50 * check, which is based on the rtl-supplied value, or we dynamically compute 51 * the value to use based on the dynamically-chosen calibration mode 52 */ 53 54 #define DLEVEL 0 55 #define STATIC_IN_RTL_SIM 0 56 #define STATIC_SKIP_DELAY_LOOPS 0 57 58 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \ 59 STATIC_SKIP_DELAY_LOOPS) 60 61 /* calibration steps requested by the rtl */ 62 uint16_t dyn_calib_steps; 63 64 /* 65 * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option 66 * instead of static, we use boolean logic to select between 67 * non-skip and skip values 68 * 69 * The mask is set to include all bits when not-skipping, but is 70 * zero when skipping 71 */ 72 73 uint16_t skip_delay_mask; /* mask off bits when skipping/not-skipping */ 74 75 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \ 76 ((non_skip_value) & skip_delay_mask) 77 78 struct gbl_type *gbl; 79 struct param_type *param; 80 uint32_t curr_shadow_reg; 81 82 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, 83 uint32_t write_group, uint32_t use_dm, 84 uint32_t all_correct, uint32_t *bit_chk, uint32_t all_ranks); 85 86 static void set_failing_group_stage(uint32_t group, uint32_t stage, 87 uint32_t substage) 88 { 89 /* 90 * Only set the global stage if there was not been any other 91 * failing group 92 */ 93 if (gbl->error_stage == CAL_STAGE_NIL) { 94 gbl->error_substage = substage; 95 gbl->error_stage = stage; 96 gbl->error_group = group; 97 } 98 } 99 100 static void reg_file_set_group(u16 set_group) 101 { 102 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16); 103 } 104 105 static void reg_file_set_stage(u8 set_stage) 106 { 107 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff); 108 } 109 110 static void reg_file_set_sub_stage(u8 set_sub_stage) 111 { 112 set_sub_stage &= 0xff; 113 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8); 114 } 115 116 /** 117 * phy_mgr_initialize() - Initialize PHY Manager 118 * 119 * Initialize PHY Manager. 120 */ 121 static void phy_mgr_initialize(void) 122 { 123 u32 ratio; 124 125 debug("%s:%d\n", __func__, __LINE__); 126 /* Calibration has control over path to memory */ 127 /* 128 * In Hard PHY this is a 2-bit control: 129 * 0: AFI Mux Select 130 * 1: DDIO Mux Select 131 */ 132 writel(0x3, &phy_mgr_cfg->mux_sel); 133 134 /* USER memory clock is not stable we begin initialization */ 135 writel(0, &phy_mgr_cfg->reset_mem_stbl); 136 137 /* USER calibration status all set to zero */ 138 writel(0, &phy_mgr_cfg->cal_status); 139 140 writel(0, &phy_mgr_cfg->cal_debug_info); 141 142 /* Init params only if we do NOT skip calibration. */ 143 if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) 144 return; 145 146 ratio = RW_MGR_MEM_DQ_PER_READ_DQS / 147 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS; 148 param->read_correct_mask_vg = (1 << ratio) - 1; 149 param->write_correct_mask_vg = (1 << ratio) - 1; 150 param->read_correct_mask = (1 << RW_MGR_MEM_DQ_PER_READ_DQS) - 1; 151 param->write_correct_mask = (1 << RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1; 152 ratio = RW_MGR_MEM_DATA_WIDTH / 153 RW_MGR_MEM_DATA_MASK_WIDTH; 154 param->dm_correct_mask = (1 << ratio) - 1; 155 } 156 157 static void set_rank_and_odt_mask(const u32 rank, const u32 odt_mode) 158 { 159 u32 odt_mask_0 = 0; 160 u32 odt_mask_1 = 0; 161 u32 cs_and_odt_mask; 162 163 if (odt_mode == RW_MGR_ODT_MODE_OFF) { 164 odt_mask_0 = 0x0; 165 odt_mask_1 = 0x0; 166 } else { /* RW_MGR_ODT_MODE_READ_WRITE */ 167 switch (RW_MGR_MEM_NUMBER_OF_RANKS) { 168 case 1: /* 1 Rank */ 169 /* Read: ODT = 0 ; Write: ODT = 1 */ 170 odt_mask_0 = 0x0; 171 odt_mask_1 = 0x1; 172 break; 173 case 2: /* 2 Ranks */ 174 if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) { 175 /* - Dual-Slot , Single-Rank 176 * (1 chip-select per DIMM) 177 * OR 178 * - RDIMM, 4 total CS (2 CS per DIMM) 179 * means 2 DIMM 180 * Since MEM_NUMBER_OF_RANKS is 2 they are 181 * both single rank 182 * with 2 CS each (special for RDIMM) 183 * Read: Turn on ODT on the opposite rank 184 * Write: Turn on ODT on all ranks 185 */ 186 odt_mask_0 = 0x3 & ~(1 << rank); 187 odt_mask_1 = 0x3; 188 } else { 189 /* 190 * USER - Single-Slot , Dual-rank DIMMs 191 * (2 chip-selects per DIMM) 192 * USER Read: Turn on ODT off on all ranks 193 * USER Write: Turn on ODT on active rank 194 */ 195 odt_mask_0 = 0x0; 196 odt_mask_1 = 0x3 & (1 << rank); 197 } 198 break; 199 case 4: /* 4 Ranks */ 200 /* Read: 201 * ----------+-----------------------+ 202 * | | 203 * | ODT | 204 * Read From +-----------------------+ 205 * Rank | 3 | 2 | 1 | 0 | 206 * ----------+-----+-----+-----+-----+ 207 * 0 | 0 | 1 | 0 | 0 | 208 * 1 | 1 | 0 | 0 | 0 | 209 * 2 | 0 | 0 | 0 | 1 | 210 * 3 | 0 | 0 | 1 | 0 | 211 * ----------+-----+-----+-----+-----+ 212 * 213 * Write: 214 * ----------+-----------------------+ 215 * | | 216 * | ODT | 217 * Write To +-----------------------+ 218 * Rank | 3 | 2 | 1 | 0 | 219 * ----------+-----+-----+-----+-----+ 220 * 0 | 0 | 1 | 0 | 1 | 221 * 1 | 1 | 0 | 1 | 0 | 222 * 2 | 0 | 1 | 0 | 1 | 223 * 3 | 1 | 0 | 1 | 0 | 224 * ----------+-----+-----+-----+-----+ 225 */ 226 switch (rank) { 227 case 0: 228 odt_mask_0 = 0x4; 229 odt_mask_1 = 0x5; 230 break; 231 case 1: 232 odt_mask_0 = 0x8; 233 odt_mask_1 = 0xA; 234 break; 235 case 2: 236 odt_mask_0 = 0x1; 237 odt_mask_1 = 0x5; 238 break; 239 case 3: 240 odt_mask_0 = 0x2; 241 odt_mask_1 = 0xA; 242 break; 243 } 244 break; 245 } 246 } 247 248 cs_and_odt_mask = (0xFF & ~(1 << rank)) | 249 ((0xFF & odt_mask_0) << 8) | 250 ((0xFF & odt_mask_1) << 16); 251 writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS | 252 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET); 253 } 254 255 /** 256 * scc_mgr_set() - Set SCC Manager register 257 * @off: Base offset in SCC Manager space 258 * @grp: Read/Write group 259 * @val: Value to be set 260 * 261 * This function sets the SCC Manager (Scan Chain Control Manager) register. 262 */ 263 static void scc_mgr_set(u32 off, u32 grp, u32 val) 264 { 265 writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2)); 266 } 267 268 /** 269 * scc_mgr_initialize() - Initialize SCC Manager registers 270 * 271 * Initialize SCC Manager registers. 272 */ 273 static void scc_mgr_initialize(void) 274 { 275 /* 276 * Clear register file for HPS. 16 (2^4) is the size of the 277 * full register file in the scc mgr: 278 * RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS + 279 * MEM_IF_READ_DQS_WIDTH - 1); 280 */ 281 int i; 282 283 for (i = 0; i < 16; i++) { 284 debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n", 285 __func__, __LINE__, i); 286 scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, 0, i); 287 } 288 } 289 290 static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, uint32_t phase) 291 { 292 scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase); 293 } 294 295 static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay) 296 { 297 scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay); 298 } 299 300 static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase) 301 { 302 scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase); 303 } 304 305 static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay) 306 { 307 scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay); 308 } 309 310 static void scc_mgr_set_dqs_io_in_delay(uint32_t delay) 311 { 312 scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS, 313 delay); 314 } 315 316 static void scc_mgr_set_dq_in_delay(uint32_t dq_in_group, uint32_t delay) 317 { 318 scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay); 319 } 320 321 static void scc_mgr_set_dq_out1_delay(uint32_t dq_in_group, uint32_t delay) 322 { 323 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay); 324 } 325 326 static void scc_mgr_set_dqs_out1_delay(uint32_t delay) 327 { 328 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS, 329 delay); 330 } 331 332 static void scc_mgr_set_dm_out1_delay(uint32_t dm, uint32_t delay) 333 { 334 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, 335 RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + dm, 336 delay); 337 } 338 339 /* load up dqs config settings */ 340 static void scc_mgr_load_dqs(uint32_t dqs) 341 { 342 writel(dqs, &sdr_scc_mgr->dqs_ena); 343 } 344 345 /* load up dqs io config settings */ 346 static void scc_mgr_load_dqs_io(void) 347 { 348 writel(0, &sdr_scc_mgr->dqs_io_ena); 349 } 350 351 /* load up dq config settings */ 352 static void scc_mgr_load_dq(uint32_t dq_in_group) 353 { 354 writel(dq_in_group, &sdr_scc_mgr->dq_ena); 355 } 356 357 /* load up dm config settings */ 358 static void scc_mgr_load_dm(uint32_t dm) 359 { 360 writel(dm, &sdr_scc_mgr->dm_ena); 361 } 362 363 /** 364 * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks 365 * @off: Base offset in SCC Manager space 366 * @grp: Read/Write group 367 * @val: Value to be set 368 * @update: If non-zero, trigger SCC Manager update for all ranks 369 * 370 * This function sets the SCC Manager (Scan Chain Control Manager) register 371 * and optionally triggers the SCC update for all ranks. 372 */ 373 static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val, 374 const int update) 375 { 376 u32 r; 377 378 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 379 r += NUM_RANKS_PER_SHADOW_REG) { 380 scc_mgr_set(off, grp, val); 381 382 if (update || (r == 0)) { 383 writel(grp, &sdr_scc_mgr->dqs_ena); 384 writel(0, &sdr_scc_mgr->update); 385 } 386 } 387 } 388 389 static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase) 390 { 391 /* 392 * USER although the h/w doesn't support different phases per 393 * shadow register, for simplicity our scc manager modeling 394 * keeps different phase settings per shadow reg, and it's 395 * important for us to keep them in sync to match h/w. 396 * for efficiency, the scan chain update should occur only 397 * once to sr0. 398 */ 399 scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_PHASE_OFFSET, 400 read_group, phase, 0); 401 } 402 403 static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group, 404 uint32_t phase) 405 { 406 /* 407 * USER although the h/w doesn't support different phases per 408 * shadow register, for simplicity our scc manager modeling 409 * keeps different phase settings per shadow reg, and it's 410 * important for us to keep them in sync to match h/w. 411 * for efficiency, the scan chain update should occur only 412 * once to sr0. 413 */ 414 scc_mgr_set_all_ranks(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, 415 write_group, phase, 0); 416 } 417 418 static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group, 419 uint32_t delay) 420 { 421 /* 422 * In shadow register mode, the T11 settings are stored in 423 * registers in the core, which are updated by the DQS_ENA 424 * signals. Not issuing the SCC_MGR_UPD command allows us to 425 * save lots of rank switching overhead, by calling 426 * select_shadow_regs_for_update with update_scan_chains 427 * set to 0. 428 */ 429 scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_DELAY_OFFSET, 430 read_group, delay, 1); 431 writel(0, &sdr_scc_mgr->update); 432 } 433 434 /** 435 * scc_mgr_set_oct_out1_delay() - Set OCT output delay 436 * @write_group: Write group 437 * @delay: Delay value 438 * 439 * This function sets the OCT output delay in SCC manager. 440 */ 441 static void scc_mgr_set_oct_out1_delay(const u32 write_group, const u32 delay) 442 { 443 const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH / 444 RW_MGR_MEM_IF_WRITE_DQS_WIDTH; 445 const int base = write_group * ratio; 446 int i; 447 /* 448 * Load the setting in the SCC manager 449 * Although OCT affects only write data, the OCT delay is controlled 450 * by the DQS logic block which is instantiated once per read group. 451 * For protocols where a write group consists of multiple read groups, 452 * the setting must be set multiple times. 453 */ 454 for (i = 0; i < ratio; i++) 455 scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay); 456 } 457 458 /** 459 * scc_mgr_set_hhp_extras() - Set HHP extras. 460 * 461 * Load the fixed setting in the SCC manager HHP extras. 462 */ 463 static void scc_mgr_set_hhp_extras(void) 464 { 465 /* 466 * Load the fixed setting in the SCC manager 467 * bits: 0:0 = 1'b1 - DQS bypass 468 * bits: 1:1 = 1'b1 - DQ bypass 469 * bits: 4:2 = 3'b001 - rfifo_mode 470 * bits: 6:5 = 2'b01 - rfifo clock_select 471 * bits: 7:7 = 1'b0 - separate gating from ungating setting 472 * bits: 8:8 = 1'b0 - separate OE from Output delay setting 473 */ 474 const u32 value = (0 << 8) | (0 << 7) | (1 << 5) | 475 (1 << 2) | (1 << 1) | (1 << 0); 476 const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | 477 SCC_MGR_HHP_GLOBALS_OFFSET | 478 SCC_MGR_HHP_EXTRAS_OFFSET; 479 480 debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n", 481 __func__, __LINE__); 482 writel(value, addr); 483 debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n", 484 __func__, __LINE__); 485 } 486 487 /** 488 * scc_mgr_zero_all() - Zero all DQS config 489 * 490 * Zero all DQS config. 491 */ 492 static void scc_mgr_zero_all(void) 493 { 494 int i, r; 495 496 /* 497 * USER Zero all DQS config settings, across all groups and all 498 * shadow registers 499 */ 500 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 501 r += NUM_RANKS_PER_SHADOW_REG) { 502 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 503 /* 504 * The phases actually don't exist on a per-rank basis, 505 * but there's no harm updating them several times, so 506 * let's keep the code simple. 507 */ 508 scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE); 509 scc_mgr_set_dqs_en_phase(i, 0); 510 scc_mgr_set_dqs_en_delay(i, 0); 511 } 512 513 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { 514 scc_mgr_set_dqdqs_output_phase(i, 0); 515 /* Arria V/Cyclone V don't have out2. */ 516 scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE); 517 } 518 } 519 520 /* Multicast to all DQS group enables. */ 521 writel(0xff, &sdr_scc_mgr->dqs_ena); 522 writel(0, &sdr_scc_mgr->update); 523 } 524 525 /** 526 * scc_set_bypass_mode() - Set bypass mode and trigger SCC update 527 * @write_group: Write group 528 * 529 * Set bypass mode and trigger SCC update. 530 */ 531 static void scc_set_bypass_mode(const u32 write_group) 532 { 533 /* Multicast to all DQ enables. */ 534 writel(0xff, &sdr_scc_mgr->dq_ena); 535 writel(0xff, &sdr_scc_mgr->dm_ena); 536 537 /* Update current DQS IO enable. */ 538 writel(0, &sdr_scc_mgr->dqs_io_ena); 539 540 /* Update the DQS logic. */ 541 writel(write_group, &sdr_scc_mgr->dqs_ena); 542 543 /* Hit update. */ 544 writel(0, &sdr_scc_mgr->update); 545 } 546 547 /** 548 * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group 549 * @write_group: Write group 550 * 551 * Load DQS settings for Write Group, do not trigger SCC update. 552 */ 553 static void scc_mgr_load_dqs_for_write_group(const u32 write_group) 554 { 555 const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH / 556 RW_MGR_MEM_IF_WRITE_DQS_WIDTH; 557 const int base = write_group * ratio; 558 int i; 559 /* 560 * Load the setting in the SCC manager 561 * Although OCT affects only write data, the OCT delay is controlled 562 * by the DQS logic block which is instantiated once per read group. 563 * For protocols where a write group consists of multiple read groups, 564 * the setting must be set multiple times. 565 */ 566 for (i = 0; i < ratio; i++) 567 writel(base + i, &sdr_scc_mgr->dqs_ena); 568 } 569 570 /** 571 * scc_mgr_zero_group() - Zero all configs for a group 572 * 573 * Zero DQ, DM, DQS and OCT configs for a group. 574 */ 575 static void scc_mgr_zero_group(const u32 write_group, const int out_only) 576 { 577 int i, r; 578 579 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 580 r += NUM_RANKS_PER_SHADOW_REG) { 581 /* Zero all DQ config settings. */ 582 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 583 scc_mgr_set_dq_out1_delay(i, 0); 584 if (!out_only) 585 scc_mgr_set_dq_in_delay(i, 0); 586 } 587 588 /* Multicast to all DQ enables. */ 589 writel(0xff, &sdr_scc_mgr->dq_ena); 590 591 /* Zero all DM config settings. */ 592 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) 593 scc_mgr_set_dm_out1_delay(i, 0); 594 595 /* Multicast to all DM enables. */ 596 writel(0xff, &sdr_scc_mgr->dm_ena); 597 598 /* Zero all DQS IO settings. */ 599 if (!out_only) 600 scc_mgr_set_dqs_io_in_delay(0); 601 602 /* Arria V/Cyclone V don't have out2. */ 603 scc_mgr_set_dqs_out1_delay(IO_DQS_OUT_RESERVE); 604 scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE); 605 scc_mgr_load_dqs_for_write_group(write_group); 606 607 /* Multicast to all DQS IO enables (only 1 in total). */ 608 writel(0, &sdr_scc_mgr->dqs_io_ena); 609 610 /* Hit update to zero everything. */ 611 writel(0, &sdr_scc_mgr->update); 612 } 613 } 614 615 /* 616 * apply and load a particular input delay for the DQ pins in a group 617 * group_bgn is the index of the first dq pin (in the write group) 618 */ 619 static void scc_mgr_apply_group_dq_in_delay(uint32_t group_bgn, uint32_t delay) 620 { 621 uint32_t i, p; 622 623 for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { 624 scc_mgr_set_dq_in_delay(p, delay); 625 scc_mgr_load_dq(p); 626 } 627 } 628 629 /** 630 * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the DQ pins in a group 631 * @delay: Delay value 632 * 633 * Apply and load a particular output delay for the DQ pins in a group. 634 */ 635 static void scc_mgr_apply_group_dq_out1_delay(const u32 delay) 636 { 637 int i; 638 639 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 640 scc_mgr_set_dq_out1_delay(i, delay); 641 scc_mgr_load_dq(i); 642 } 643 } 644 645 /* apply and load a particular output delay for the DM pins in a group */ 646 static void scc_mgr_apply_group_dm_out1_delay(uint32_t delay1) 647 { 648 uint32_t i; 649 650 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { 651 scc_mgr_set_dm_out1_delay(i, delay1); 652 scc_mgr_load_dm(i); 653 } 654 } 655 656 657 /* apply and load delay on both DQS and OCT out1 */ 658 static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group, 659 uint32_t delay) 660 { 661 scc_mgr_set_dqs_out1_delay(delay); 662 scc_mgr_load_dqs_io(); 663 664 scc_mgr_set_oct_out1_delay(write_group, delay); 665 scc_mgr_load_dqs_for_write_group(write_group); 666 } 667 668 /** 669 * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side: DQ, DM, DQS, OCT 670 * @write_group: Write group 671 * @delay: Delay value 672 * 673 * Apply a delay to the entire output side: DQ, DM, DQS, OCT. 674 */ 675 static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group, 676 const u32 delay) 677 { 678 u32 i, new_delay; 679 680 /* DQ shift */ 681 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) 682 scc_mgr_load_dq(i); 683 684 /* DM shift */ 685 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) 686 scc_mgr_load_dm(i); 687 688 /* DQS shift */ 689 new_delay = READ_SCC_DQS_IO_OUT2_DELAY + delay; 690 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 691 debug_cond(DLEVEL == 1, 692 "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n", 693 __func__, __LINE__, write_group, delay, new_delay, 694 IO_IO_OUT2_DELAY_MAX, 695 new_delay - IO_IO_OUT2_DELAY_MAX); 696 new_delay -= IO_IO_OUT2_DELAY_MAX; 697 scc_mgr_set_dqs_out1_delay(new_delay); 698 } 699 700 scc_mgr_load_dqs_io(); 701 702 /* OCT shift */ 703 new_delay = READ_SCC_OCT_OUT2_DELAY + delay; 704 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 705 debug_cond(DLEVEL == 1, 706 "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n", 707 __func__, __LINE__, write_group, delay, 708 new_delay, IO_IO_OUT2_DELAY_MAX, 709 new_delay - IO_IO_OUT2_DELAY_MAX); 710 new_delay -= IO_IO_OUT2_DELAY_MAX; 711 scc_mgr_set_oct_out1_delay(write_group, new_delay); 712 } 713 714 scc_mgr_load_dqs_for_write_group(write_group); 715 } 716 717 /** 718 * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side to all ranks 719 * @write_group: Write group 720 * @delay: Delay value 721 * 722 * Apply a delay to the entire output side (DQ, DM, DQS, OCT) to all ranks. 723 */ 724 static void 725 scc_mgr_apply_group_all_out_delay_add_all_ranks(const u32 write_group, 726 const u32 delay) 727 { 728 int r; 729 730 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 731 r += NUM_RANKS_PER_SHADOW_REG) { 732 scc_mgr_apply_group_all_out_delay_add(write_group, delay); 733 writel(0, &sdr_scc_mgr->update); 734 } 735 } 736 737 /** 738 * set_jump_as_return() - Return instruction optimization 739 * 740 * Optimization used to recover some slots in ddr3 inst_rom could be 741 * applied to other protocols if we wanted to 742 */ 743 static void set_jump_as_return(void) 744 { 745 /* 746 * To save space, we replace return with jump to special shared 747 * RETURN instruction so we set the counter to large value so that 748 * we always jump. 749 */ 750 writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0); 751 writel(RW_MGR_RETURN, &sdr_rw_load_jump_mgr_regs->load_jump_add0); 752 } 753 754 /* 755 * should always use constants as argument to ensure all computations are 756 * performed at compile time 757 */ 758 static void delay_for_n_mem_clocks(const uint32_t clocks) 759 { 760 uint32_t afi_clocks; 761 uint8_t inner = 0; 762 uint8_t outer = 0; 763 uint16_t c_loop = 0; 764 765 debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks); 766 767 768 afi_clocks = (clocks + AFI_RATE_RATIO-1) / AFI_RATE_RATIO; 769 /* scale (rounding up) to get afi clocks */ 770 771 /* 772 * Note, we don't bother accounting for being off a little bit 773 * because of a few extra instructions in outer loops 774 * Note, the loops have a test at the end, and do the test before 775 * the decrement, and so always perform the loop 776 * 1 time more than the counter value 777 */ 778 if (afi_clocks == 0) { 779 ; 780 } else if (afi_clocks <= 0x100) { 781 inner = afi_clocks-1; 782 outer = 0; 783 c_loop = 0; 784 } else if (afi_clocks <= 0x10000) { 785 inner = 0xff; 786 outer = (afi_clocks-1) >> 8; 787 c_loop = 0; 788 } else { 789 inner = 0xff; 790 outer = 0xff; 791 c_loop = (afi_clocks-1) >> 16; 792 } 793 794 /* 795 * rom instructions are structured as follows: 796 * 797 * IDLE_LOOP2: jnz cntr0, TARGET_A 798 * IDLE_LOOP1: jnz cntr1, TARGET_B 799 * return 800 * 801 * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and 802 * TARGET_B is set to IDLE_LOOP2 as well 803 * 804 * if we have no outer loop, though, then we can use IDLE_LOOP1 only, 805 * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely 806 * 807 * a little confusing, but it helps save precious space in the inst_rom 808 * and sequencer rom and keeps the delays more accurate and reduces 809 * overhead 810 */ 811 if (afi_clocks <= 0x100) { 812 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), 813 &sdr_rw_load_mgr_regs->load_cntr1); 814 815 writel(RW_MGR_IDLE_LOOP1, 816 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 817 818 writel(RW_MGR_IDLE_LOOP1, SDR_PHYGRP_RWMGRGRP_ADDRESS | 819 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 820 } else { 821 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), 822 &sdr_rw_load_mgr_regs->load_cntr0); 823 824 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer), 825 &sdr_rw_load_mgr_regs->load_cntr1); 826 827 writel(RW_MGR_IDLE_LOOP2, 828 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 829 830 writel(RW_MGR_IDLE_LOOP2, 831 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 832 833 /* hack to get around compiler not being smart enough */ 834 if (afi_clocks <= 0x10000) { 835 /* only need to run once */ 836 writel(RW_MGR_IDLE_LOOP2, SDR_PHYGRP_RWMGRGRP_ADDRESS | 837 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 838 } else { 839 do { 840 writel(RW_MGR_IDLE_LOOP2, 841 SDR_PHYGRP_RWMGRGRP_ADDRESS | 842 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 843 } while (c_loop-- != 0); 844 } 845 } 846 debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks); 847 } 848 849 /** 850 * rw_mgr_mem_init_load_regs() - Load instruction registers 851 * @cntr0: Counter 0 value 852 * @cntr1: Counter 1 value 853 * @cntr2: Counter 2 value 854 * @jump: Jump instruction value 855 * 856 * Load instruction registers. 857 */ 858 static void rw_mgr_mem_init_load_regs(u32 cntr0, u32 cntr1, u32 cntr2, u32 jump) 859 { 860 uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS | 861 RW_MGR_RUN_SINGLE_GROUP_OFFSET; 862 863 /* Load counters */ 864 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr0), 865 &sdr_rw_load_mgr_regs->load_cntr0); 866 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr1), 867 &sdr_rw_load_mgr_regs->load_cntr1); 868 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr2), 869 &sdr_rw_load_mgr_regs->load_cntr2); 870 871 /* Load jump address */ 872 writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add0); 873 writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add1); 874 writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add2); 875 876 /* Execute count instruction */ 877 writel(jump, grpaddr); 878 } 879 880 /** 881 * rw_mgr_mem_load_user() - Load user calibration values 882 * @fin1: Final instruction 1 883 * @fin2: Final instruction 2 884 * @precharge: If 1, precharge the banks at the end 885 * 886 * Load user calibration values and optionally precharge the banks. 887 */ 888 static void rw_mgr_mem_load_user(const u32 fin1, const u32 fin2, 889 const int precharge) 890 { 891 u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS | 892 RW_MGR_RUN_SINGLE_GROUP_OFFSET; 893 u32 r; 894 895 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { 896 if (param->skip_ranks[r]) { 897 /* request to skip the rank */ 898 continue; 899 } 900 901 /* set rank */ 902 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 903 904 /* precharge all banks ... */ 905 if (precharge) 906 writel(RW_MGR_PRECHARGE_ALL, grpaddr); 907 908 /* 909 * USER Use Mirror-ed commands for odd ranks if address 910 * mirrorring is on 911 */ 912 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) { 913 set_jump_as_return(); 914 writel(RW_MGR_MRS2_MIRR, grpaddr); 915 delay_for_n_mem_clocks(4); 916 set_jump_as_return(); 917 writel(RW_MGR_MRS3_MIRR, grpaddr); 918 delay_for_n_mem_clocks(4); 919 set_jump_as_return(); 920 writel(RW_MGR_MRS1_MIRR, grpaddr); 921 delay_for_n_mem_clocks(4); 922 set_jump_as_return(); 923 writel(fin1, grpaddr); 924 } else { 925 set_jump_as_return(); 926 writel(RW_MGR_MRS2, grpaddr); 927 delay_for_n_mem_clocks(4); 928 set_jump_as_return(); 929 writel(RW_MGR_MRS3, grpaddr); 930 delay_for_n_mem_clocks(4); 931 set_jump_as_return(); 932 writel(RW_MGR_MRS1, grpaddr); 933 set_jump_as_return(); 934 writel(fin2, grpaddr); 935 } 936 937 if (precharge) 938 continue; 939 940 set_jump_as_return(); 941 writel(RW_MGR_ZQCL, grpaddr); 942 943 /* tZQinit = tDLLK = 512 ck cycles */ 944 delay_for_n_mem_clocks(512); 945 } 946 } 947 948 static void rw_mgr_mem_initialize(void) 949 { 950 debug("%s:%d\n", __func__, __LINE__); 951 952 /* The reset / cke part of initialization is broadcasted to all ranks */ 953 writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS | 954 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET); 955 956 /* 957 * Here's how you load register for a loop 958 * Counters are located @ 0x800 959 * Jump address are located @ 0xC00 960 * For both, registers 0 to 3 are selected using bits 3 and 2, like 961 * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C 962 * I know this ain't pretty, but Avalon bus throws away the 2 least 963 * significant bits 964 */ 965 966 /* start with memory RESET activated */ 967 968 /* tINIT = 200us */ 969 970 /* 971 * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles 972 * If a and b are the number of iteration in 2 nested loops 973 * it takes the following number of cycles to complete the operation: 974 * number_of_cycles = ((2 + n) * a + 2) * b 975 * where n is the number of instruction in the inner loop 976 * One possible solution is n = 0 , a = 256 , b = 106 => a = FF, 977 * b = 6A 978 */ 979 rw_mgr_mem_init_load_regs(SEQ_TINIT_CNTR0_VAL, SEQ_TINIT_CNTR1_VAL, 980 SEQ_TINIT_CNTR2_VAL, 981 RW_MGR_INIT_RESET_0_CKE_0); 982 983 /* indicate that memory is stable */ 984 writel(1, &phy_mgr_cfg->reset_mem_stbl); 985 986 /* 987 * transition the RESET to high 988 * Wait for 500us 989 */ 990 991 /* 992 * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles 993 * If a and b are the number of iteration in 2 nested loops 994 * it takes the following number of cycles to complete the operation 995 * number_of_cycles = ((2 + n) * a + 2) * b 996 * where n is the number of instruction in the inner loop 997 * One possible solution is n = 2 , a = 131 , b = 256 => a = 83, 998 * b = FF 999 */ 1000 rw_mgr_mem_init_load_regs(SEQ_TRESET_CNTR0_VAL, SEQ_TRESET_CNTR1_VAL, 1001 SEQ_TRESET_CNTR2_VAL, 1002 RW_MGR_INIT_RESET_1_CKE_0); 1003 1004 /* bring up clock enable */ 1005 1006 /* tXRP < 250 ck cycles */ 1007 delay_for_n_mem_clocks(250); 1008 1009 rw_mgr_mem_load_user(RW_MGR_MRS0_DLL_RESET_MIRR, RW_MGR_MRS0_DLL_RESET, 1010 0); 1011 } 1012 1013 /* 1014 * At the end of calibration we have to program the user settings in, and 1015 * USER hand off the memory to the user. 1016 */ 1017 static void rw_mgr_mem_handoff(void) 1018 { 1019 rw_mgr_mem_load_user(RW_MGR_MRS0_USER_MIRR, RW_MGR_MRS0_USER, 1); 1020 /* 1021 * USER need to wait tMOD (12CK or 15ns) time before issuing 1022 * other commands, but we will have plenty of NIOS cycles before 1023 * actual handoff so its okay. 1024 */ 1025 } 1026 1027 /* 1028 * performs a guaranteed read on the patterns we are going to use during a 1029 * read test to ensure memory works 1030 */ 1031 static uint32_t rw_mgr_mem_calibrate_read_test_patterns(uint32_t rank_bgn, 1032 uint32_t group, uint32_t num_tries, uint32_t *bit_chk, 1033 uint32_t all_ranks) 1034 { 1035 uint32_t r, vg; 1036 uint32_t correct_mask_vg; 1037 uint32_t tmp_bit_chk; 1038 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1039 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1040 uint32_t addr; 1041 uint32_t base_rw_mgr; 1042 1043 *bit_chk = param->read_correct_mask; 1044 correct_mask_vg = param->read_correct_mask_vg; 1045 1046 for (r = rank_bgn; r < rank_end; r++) { 1047 if (param->skip_ranks[r]) 1048 /* request to skip the rank */ 1049 continue; 1050 1051 /* set rank */ 1052 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1053 1054 /* Load up a constant bursts of read commands */ 1055 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0); 1056 writel(RW_MGR_GUARANTEED_READ, 1057 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1058 1059 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1); 1060 writel(RW_MGR_GUARANTEED_READ_CONT, 1061 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1062 1063 tmp_bit_chk = 0; 1064 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) { 1065 /* reset the fifos to get pointers to known state */ 1066 1067 writel(0, &phy_mgr_cmd->fifo_reset); 1068 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1069 RW_MGR_RESET_READ_DATAPATH_OFFSET); 1070 1071 tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS 1072 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); 1073 1074 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1075 writel(RW_MGR_GUARANTEED_READ, addr + 1076 ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + 1077 vg) << 2)); 1078 1079 base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); 1080 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & (~base_rw_mgr)); 1081 1082 if (vg == 0) 1083 break; 1084 } 1085 *bit_chk &= tmp_bit_chk; 1086 } 1087 1088 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1089 writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2)); 1090 1091 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1092 debug_cond(DLEVEL == 1, "%s:%d test_load_patterns(%u,ALL) => (%u == %u) =>\ 1093 %lu\n", __func__, __LINE__, group, *bit_chk, param->read_correct_mask, 1094 (long unsigned int)(*bit_chk == param->read_correct_mask)); 1095 return *bit_chk == param->read_correct_mask; 1096 } 1097 1098 static uint32_t rw_mgr_mem_calibrate_read_test_patterns_all_ranks 1099 (uint32_t group, uint32_t num_tries, uint32_t *bit_chk) 1100 { 1101 return rw_mgr_mem_calibrate_read_test_patterns(0, group, 1102 num_tries, bit_chk, 1); 1103 } 1104 1105 /* load up the patterns we are going to use during a read test */ 1106 static void rw_mgr_mem_calibrate_read_load_patterns(uint32_t rank_bgn, 1107 uint32_t all_ranks) 1108 { 1109 uint32_t r; 1110 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1111 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1112 1113 debug("%s:%d\n", __func__, __LINE__); 1114 for (r = rank_bgn; r < rank_end; r++) { 1115 if (param->skip_ranks[r]) 1116 /* request to skip the rank */ 1117 continue; 1118 1119 /* set rank */ 1120 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1121 1122 /* Load up a constant bursts */ 1123 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0); 1124 1125 writel(RW_MGR_GUARANTEED_WRITE_WAIT0, 1126 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1127 1128 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1); 1129 1130 writel(RW_MGR_GUARANTEED_WRITE_WAIT1, 1131 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1132 1133 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2); 1134 1135 writel(RW_MGR_GUARANTEED_WRITE_WAIT2, 1136 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1137 1138 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3); 1139 1140 writel(RW_MGR_GUARANTEED_WRITE_WAIT3, 1141 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 1142 1143 writel(RW_MGR_GUARANTEED_WRITE, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1144 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 1145 } 1146 1147 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1148 } 1149 1150 /* 1151 * try a read and see if it returns correct data back. has dummy reads 1152 * inserted into the mix used to align dqs enable. has more thorough checks 1153 * than the regular read test. 1154 */ 1155 static uint32_t rw_mgr_mem_calibrate_read_test(uint32_t rank_bgn, uint32_t group, 1156 uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk, 1157 uint32_t all_groups, uint32_t all_ranks) 1158 { 1159 uint32_t r, vg; 1160 uint32_t correct_mask_vg; 1161 uint32_t tmp_bit_chk; 1162 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1163 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1164 uint32_t addr; 1165 uint32_t base_rw_mgr; 1166 1167 *bit_chk = param->read_correct_mask; 1168 correct_mask_vg = param->read_correct_mask_vg; 1169 1170 uint32_t quick_read_mode = (((STATIC_CALIB_STEPS) & 1171 CALIB_SKIP_DELAY_SWEEPS) && ENABLE_SUPER_QUICK_CALIBRATION); 1172 1173 for (r = rank_bgn; r < rank_end; r++) { 1174 if (param->skip_ranks[r]) 1175 /* request to skip the rank */ 1176 continue; 1177 1178 /* set rank */ 1179 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1180 1181 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1); 1182 1183 writel(RW_MGR_READ_B2B_WAIT1, 1184 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1185 1186 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2); 1187 writel(RW_MGR_READ_B2B_WAIT2, 1188 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1189 1190 if (quick_read_mode) 1191 writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0); 1192 /* need at least two (1+1) reads to capture failures */ 1193 else if (all_groups) 1194 writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0); 1195 else 1196 writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0); 1197 1198 writel(RW_MGR_READ_B2B, 1199 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1200 if (all_groups) 1201 writel(RW_MGR_MEM_IF_READ_DQS_WIDTH * 1202 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1, 1203 &sdr_rw_load_mgr_regs->load_cntr3); 1204 else 1205 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3); 1206 1207 writel(RW_MGR_READ_B2B, 1208 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 1209 1210 tmp_bit_chk = 0; 1211 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) { 1212 /* reset the fifos to get pointers to known state */ 1213 writel(0, &phy_mgr_cmd->fifo_reset); 1214 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1215 RW_MGR_RESET_READ_DATAPATH_OFFSET); 1216 1217 tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS 1218 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); 1219 1220 if (all_groups) 1221 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_ALL_GROUPS_OFFSET; 1222 else 1223 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1224 1225 writel(RW_MGR_READ_B2B, addr + 1226 ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + 1227 vg) << 2)); 1228 1229 base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); 1230 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr)); 1231 1232 if (vg == 0) 1233 break; 1234 } 1235 *bit_chk &= tmp_bit_chk; 1236 } 1237 1238 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1239 writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2)); 1240 1241 if (all_correct) { 1242 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1243 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ALL,%u) =>\ 1244 (%u == %u) => %lu", __func__, __LINE__, group, 1245 all_groups, *bit_chk, param->read_correct_mask, 1246 (long unsigned int)(*bit_chk == 1247 param->read_correct_mask)); 1248 return *bit_chk == param->read_correct_mask; 1249 } else { 1250 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1251 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ONE,%u) =>\ 1252 (%u != %lu) => %lu\n", __func__, __LINE__, 1253 group, all_groups, *bit_chk, (long unsigned int)0, 1254 (long unsigned int)(*bit_chk != 0x00)); 1255 return *bit_chk != 0x00; 1256 } 1257 } 1258 1259 static uint32_t rw_mgr_mem_calibrate_read_test_all_ranks(uint32_t group, 1260 uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk, 1261 uint32_t all_groups) 1262 { 1263 return rw_mgr_mem_calibrate_read_test(0, group, num_tries, all_correct, 1264 bit_chk, all_groups, 1); 1265 } 1266 1267 static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t *v) 1268 { 1269 writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy); 1270 (*v)++; 1271 } 1272 1273 static void rw_mgr_decr_vfifo(uint32_t grp, uint32_t *v) 1274 { 1275 uint32_t i; 1276 1277 for (i = 0; i < VFIFO_SIZE-1; i++) 1278 rw_mgr_incr_vfifo(grp, v); 1279 } 1280 1281 static int find_vfifo_read(uint32_t grp, uint32_t *bit_chk) 1282 { 1283 uint32_t v; 1284 uint32_t fail_cnt = 0; 1285 uint32_t test_status; 1286 1287 for (v = 0; v < VFIFO_SIZE; ) { 1288 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo %u\n", 1289 __func__, __LINE__, v); 1290 test_status = rw_mgr_mem_calibrate_read_test_all_ranks 1291 (grp, 1, PASS_ONE_BIT, bit_chk, 0); 1292 if (!test_status) { 1293 fail_cnt++; 1294 1295 if (fail_cnt == 2) 1296 break; 1297 } 1298 1299 /* fiddle with FIFO */ 1300 rw_mgr_incr_vfifo(grp, &v); 1301 } 1302 1303 if (v >= VFIFO_SIZE) { 1304 /* no failing read found!! Something must have gone wrong */ 1305 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo failed\n", 1306 __func__, __LINE__); 1307 return 0; 1308 } else { 1309 return v; 1310 } 1311 } 1312 1313 static int find_working_phase(uint32_t *grp, uint32_t *bit_chk, 1314 uint32_t dtaps_per_ptap, uint32_t *work_bgn, 1315 uint32_t *v, uint32_t *d, uint32_t *p, 1316 uint32_t *i, uint32_t *max_working_cnt) 1317 { 1318 uint32_t found_begin = 0; 1319 uint32_t tmp_delay = 0; 1320 uint32_t test_status; 1321 1322 for (*d = 0; *d <= dtaps_per_ptap; (*d)++, tmp_delay += 1323 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1324 *work_bgn = tmp_delay; 1325 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1326 1327 for (*i = 0; *i < VFIFO_SIZE; (*i)++) { 1328 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_bgn += 1329 IO_DELAY_PER_OPA_TAP) { 1330 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1331 1332 test_status = 1333 rw_mgr_mem_calibrate_read_test_all_ranks 1334 (*grp, 1, PASS_ONE_BIT, bit_chk, 0); 1335 1336 if (test_status) { 1337 *max_working_cnt = 1; 1338 found_begin = 1; 1339 break; 1340 } 1341 } 1342 1343 if (found_begin) 1344 break; 1345 1346 if (*p > IO_DQS_EN_PHASE_MAX) 1347 /* fiddle with FIFO */ 1348 rw_mgr_incr_vfifo(*grp, v); 1349 } 1350 1351 if (found_begin) 1352 break; 1353 } 1354 1355 if (*i >= VFIFO_SIZE) { 1356 /* cannot find working solution */ 1357 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/\ 1358 ptap/dtap\n", __func__, __LINE__); 1359 return 0; 1360 } else { 1361 return 1; 1362 } 1363 } 1364 1365 static void sdr_backup_phase(uint32_t *grp, uint32_t *bit_chk, 1366 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1367 uint32_t *p, uint32_t *max_working_cnt) 1368 { 1369 uint32_t found_begin = 0; 1370 uint32_t tmp_delay; 1371 1372 /* Special case code for backing up a phase */ 1373 if (*p == 0) { 1374 *p = IO_DQS_EN_PHASE_MAX; 1375 rw_mgr_decr_vfifo(*grp, v); 1376 } else { 1377 (*p)--; 1378 } 1379 tmp_delay = *work_bgn - IO_DELAY_PER_OPA_TAP; 1380 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1381 1382 for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_bgn; 1383 (*d)++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1384 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1385 1386 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1, 1387 PASS_ONE_BIT, 1388 bit_chk, 0)) { 1389 found_begin = 1; 1390 *work_bgn = tmp_delay; 1391 break; 1392 } 1393 } 1394 1395 /* We have found a working dtap before the ptap found above */ 1396 if (found_begin == 1) 1397 (*max_working_cnt)++; 1398 1399 /* 1400 * Restore VFIFO to old state before we decremented it 1401 * (if needed). 1402 */ 1403 (*p)++; 1404 if (*p > IO_DQS_EN_PHASE_MAX) { 1405 *p = 0; 1406 rw_mgr_incr_vfifo(*grp, v); 1407 } 1408 1409 scc_mgr_set_dqs_en_delay_all_ranks(*grp, 0); 1410 } 1411 1412 static int sdr_nonworking_phase(uint32_t *grp, uint32_t *bit_chk, 1413 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1414 uint32_t *p, uint32_t *i, uint32_t *max_working_cnt, 1415 uint32_t *work_end) 1416 { 1417 uint32_t found_end = 0; 1418 1419 (*p)++; 1420 *work_end += IO_DELAY_PER_OPA_TAP; 1421 if (*p > IO_DQS_EN_PHASE_MAX) { 1422 /* fiddle with FIFO */ 1423 *p = 0; 1424 rw_mgr_incr_vfifo(*grp, v); 1425 } 1426 1427 for (; *i < VFIFO_SIZE + 1; (*i)++) { 1428 for (; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_end 1429 += IO_DELAY_PER_OPA_TAP) { 1430 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1431 1432 if (!rw_mgr_mem_calibrate_read_test_all_ranks 1433 (*grp, 1, PASS_ONE_BIT, bit_chk, 0)) { 1434 found_end = 1; 1435 break; 1436 } else { 1437 (*max_working_cnt)++; 1438 } 1439 } 1440 1441 if (found_end) 1442 break; 1443 1444 if (*p > IO_DQS_EN_PHASE_MAX) { 1445 /* fiddle with FIFO */ 1446 rw_mgr_incr_vfifo(*grp, v); 1447 *p = 0; 1448 } 1449 } 1450 1451 if (*i >= VFIFO_SIZE + 1) { 1452 /* cannot see edge of failing read */ 1453 debug_cond(DLEVEL == 2, "%s:%d sdr_nonworking_phase: end:\ 1454 failed\n", __func__, __LINE__); 1455 return 0; 1456 } else { 1457 return 1; 1458 } 1459 } 1460 1461 static int sdr_find_window_centre(uint32_t *grp, uint32_t *bit_chk, 1462 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1463 uint32_t *p, uint32_t *work_mid, 1464 uint32_t *work_end) 1465 { 1466 int i; 1467 int tmp_delay = 0; 1468 1469 *work_mid = (*work_bgn + *work_end) / 2; 1470 1471 debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n", 1472 *work_bgn, *work_end, *work_mid); 1473 /* Get the middle delay to be less than a VFIFO delay */ 1474 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; 1475 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP) 1476 ; 1477 debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay); 1478 while (*work_mid > tmp_delay) 1479 *work_mid -= tmp_delay; 1480 debug_cond(DLEVEL == 2, "new work_mid %d\n", *work_mid); 1481 1482 tmp_delay = 0; 1483 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX && tmp_delay < *work_mid; 1484 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP) 1485 ; 1486 tmp_delay -= IO_DELAY_PER_OPA_TAP; 1487 debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", (*p) - 1, tmp_delay); 1488 for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_mid; (*d)++, 1489 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) 1490 ; 1491 debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", *d, tmp_delay); 1492 1493 scc_mgr_set_dqs_en_phase_all_ranks(*grp, (*p) - 1); 1494 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1495 1496 /* 1497 * push vfifo until we can successfully calibrate. We can do this 1498 * because the largest possible margin in 1 VFIFO cycle. 1499 */ 1500 for (i = 0; i < VFIFO_SIZE; i++) { 1501 debug_cond(DLEVEL == 2, "find_dqs_en_phase: center: vfifo=%u\n", 1502 *v); 1503 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1, 1504 PASS_ONE_BIT, 1505 bit_chk, 0)) { 1506 break; 1507 } 1508 1509 /* fiddle with FIFO */ 1510 rw_mgr_incr_vfifo(*grp, v); 1511 } 1512 1513 if (i >= VFIFO_SIZE) { 1514 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center: \ 1515 failed\n", __func__, __LINE__); 1516 return 0; 1517 } else { 1518 return 1; 1519 } 1520 } 1521 1522 /* find a good dqs enable to use */ 1523 static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp) 1524 { 1525 uint32_t v, d, p, i; 1526 uint32_t max_working_cnt; 1527 uint32_t bit_chk; 1528 uint32_t dtaps_per_ptap; 1529 uint32_t work_bgn, work_mid, work_end; 1530 uint32_t found_passing_read, found_failing_read, initial_failing_dtap; 1531 1532 debug("%s:%d %u\n", __func__, __LINE__, grp); 1533 1534 reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); 1535 1536 scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); 1537 scc_mgr_set_dqs_en_phase_all_ranks(grp, 0); 1538 1539 /* ************************************************************** */ 1540 /* * Step 0 : Determine number of delay taps for each phase tap * */ 1541 dtaps_per_ptap = IO_DELAY_PER_OPA_TAP/IO_DELAY_PER_DQS_EN_DCHAIN_TAP; 1542 1543 /* ********************************************************* */ 1544 /* * Step 1 : First push vfifo until we get a failing read * */ 1545 v = find_vfifo_read(grp, &bit_chk); 1546 1547 max_working_cnt = 0; 1548 1549 /* ******************************************************** */ 1550 /* * step 2: find first working phase, increment in ptaps * */ 1551 work_bgn = 0; 1552 if (find_working_phase(&grp, &bit_chk, dtaps_per_ptap, &work_bgn, &v, &d, 1553 &p, &i, &max_working_cnt) == 0) 1554 return 0; 1555 1556 work_end = work_bgn; 1557 1558 /* 1559 * If d is 0 then the working window covers a phase tap and 1560 * we can follow the old procedure otherwise, we've found the beginning, 1561 * and we need to increment the dtaps until we find the end. 1562 */ 1563 if (d == 0) { 1564 /* ********************************************************* */ 1565 /* * step 3a: if we have room, back off by one and 1566 increment in dtaps * */ 1567 1568 sdr_backup_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1569 &max_working_cnt); 1570 1571 /* ********************************************************* */ 1572 /* * step 4a: go forward from working phase to non working 1573 phase, increment in ptaps * */ 1574 if (sdr_nonworking_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1575 &i, &max_working_cnt, &work_end) == 0) 1576 return 0; 1577 1578 /* ********************************************************* */ 1579 /* * step 5a: back off one from last, increment in dtaps * */ 1580 1581 /* Special case code for backing up a phase */ 1582 if (p == 0) { 1583 p = IO_DQS_EN_PHASE_MAX; 1584 rw_mgr_decr_vfifo(grp, &v); 1585 } else { 1586 p = p - 1; 1587 } 1588 1589 work_end -= IO_DELAY_PER_OPA_TAP; 1590 scc_mgr_set_dqs_en_phase_all_ranks(grp, p); 1591 1592 /* * The actual increment of dtaps is done outside of 1593 the if/else loop to share code */ 1594 d = 0; 1595 1596 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p: \ 1597 vfifo=%u ptap=%u\n", __func__, __LINE__, 1598 v, p); 1599 } else { 1600 /* ******************************************************* */ 1601 /* * step 3-5b: Find the right edge of the window using 1602 delay taps * */ 1603 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase:vfifo=%u \ 1604 ptap=%u dtap=%u bgn=%u\n", __func__, __LINE__, 1605 v, p, d, work_bgn); 1606 1607 work_end = work_bgn; 1608 1609 /* * The actual increment of dtaps is done outside of the 1610 if/else loop to share code */ 1611 1612 /* Only here to counterbalance a subtract later on which is 1613 not needed if this branch of the algorithm is taken */ 1614 max_working_cnt++; 1615 } 1616 1617 /* The dtap increment to find the failing edge is done here */ 1618 for (; d <= IO_DQS_EN_DELAY_MAX; d++, work_end += 1619 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1620 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \ 1621 end-2: dtap=%u\n", __func__, __LINE__, d); 1622 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1623 1624 if (!rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, 1625 PASS_ONE_BIT, 1626 &bit_chk, 0)) { 1627 break; 1628 } 1629 } 1630 1631 /* Go back to working dtap */ 1632 if (d != 0) 1633 work_end -= IO_DELAY_PER_DQS_EN_DCHAIN_TAP; 1634 1635 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p/d: vfifo=%u \ 1636 ptap=%u dtap=%u end=%u\n", __func__, __LINE__, 1637 v, p, d-1, work_end); 1638 1639 if (work_end < work_bgn) { 1640 /* nil range */ 1641 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: end-2: \ 1642 failed\n", __func__, __LINE__); 1643 return 0; 1644 } 1645 1646 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: found range [%u,%u]\n", 1647 __func__, __LINE__, work_bgn, work_end); 1648 1649 /* *************************************************************** */ 1650 /* 1651 * * We need to calculate the number of dtaps that equal a ptap 1652 * * To do that we'll back up a ptap and re-find the edge of the 1653 * * window using dtaps 1654 */ 1655 1656 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: calculate dtaps_per_ptap \ 1657 for tracking\n", __func__, __LINE__); 1658 1659 /* Special case code for backing up a phase */ 1660 if (p == 0) { 1661 p = IO_DQS_EN_PHASE_MAX; 1662 rw_mgr_decr_vfifo(grp, &v); 1663 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \ 1664 cycle/phase: v=%u p=%u\n", __func__, __LINE__, 1665 v, p); 1666 } else { 1667 p = p - 1; 1668 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \ 1669 phase only: v=%u p=%u", __func__, __LINE__, 1670 v, p); 1671 } 1672 1673 scc_mgr_set_dqs_en_phase_all_ranks(grp, p); 1674 1675 /* 1676 * Increase dtap until we first see a passing read (in case the 1677 * window is smaller than a ptap), 1678 * and then a failing read to mark the edge of the window again 1679 */ 1680 1681 /* Find a passing read */ 1682 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find passing read\n", 1683 __func__, __LINE__); 1684 found_passing_read = 0; 1685 found_failing_read = 0; 1686 initial_failing_dtap = d; 1687 for (; d <= IO_DQS_EN_DELAY_MAX; d++) { 1688 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: testing \ 1689 read d=%u\n", __func__, __LINE__, d); 1690 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1691 1692 if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, 1693 PASS_ONE_BIT, 1694 &bit_chk, 0)) { 1695 found_passing_read = 1; 1696 break; 1697 } 1698 } 1699 1700 if (found_passing_read) { 1701 /* Find a failing read */ 1702 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find failing \ 1703 read\n", __func__, __LINE__); 1704 for (d = d + 1; d <= IO_DQS_EN_DELAY_MAX; d++) { 1705 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \ 1706 testing read d=%u\n", __func__, __LINE__, d); 1707 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1708 1709 if (!rw_mgr_mem_calibrate_read_test_all_ranks 1710 (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { 1711 found_failing_read = 1; 1712 break; 1713 } 1714 } 1715 } else { 1716 debug_cond(DLEVEL == 1, "%s:%d find_dqs_en_phase: failed to \ 1717 calculate dtaps", __func__, __LINE__); 1718 debug_cond(DLEVEL == 1, "per ptap. Fall back on static value\n"); 1719 } 1720 1721 /* 1722 * The dynamically calculated dtaps_per_ptap is only valid if we 1723 * found a passing/failing read. If we didn't, it means d hit the max 1724 * (IO_DQS_EN_DELAY_MAX). Otherwise, dtaps_per_ptap retains its 1725 * statically calculated value. 1726 */ 1727 if (found_passing_read && found_failing_read) 1728 dtaps_per_ptap = d - initial_failing_dtap; 1729 1730 writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap); 1731 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: dtaps_per_ptap=%u \ 1732 - %u = %u", __func__, __LINE__, d, 1733 initial_failing_dtap, dtaps_per_ptap); 1734 1735 /* ******************************************** */ 1736 /* * step 6: Find the centre of the window * */ 1737 if (sdr_find_window_centre(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1738 &work_mid, &work_end) == 0) 1739 return 0; 1740 1741 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center found: \ 1742 vfifo=%u ptap=%u dtap=%u\n", __func__, __LINE__, 1743 v, p-1, d); 1744 return 1; 1745 } 1746 1747 /* 1748 * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different 1749 * dq_in_delay values 1750 */ 1751 static uint32_t 1752 rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay 1753 (uint32_t write_group, uint32_t read_group, uint32_t test_bgn) 1754 { 1755 uint32_t found; 1756 uint32_t i; 1757 uint32_t p; 1758 uint32_t d; 1759 uint32_t r; 1760 1761 const uint32_t delay_step = IO_IO_IN_DELAY_MAX / 1762 (RW_MGR_MEM_DQ_PER_READ_DQS-1); 1763 /* we start at zero, so have one less dq to devide among */ 1764 1765 debug("%s:%d (%u,%u,%u)", __func__, __LINE__, write_group, read_group, 1766 test_bgn); 1767 1768 /* try different dq_in_delays since the dq path is shorter than dqs */ 1769 1770 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 1771 r += NUM_RANKS_PER_SHADOW_REG) { 1772 for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++, d += delay_step) { 1773 debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_\ 1774 vfifo_find_dqs_", __func__, __LINE__); 1775 debug_cond(DLEVEL == 1, "en_phase_sweep_dq_in_delay: g=%u/%u ", 1776 write_group, read_group); 1777 debug_cond(DLEVEL == 1, "r=%u, i=%u p=%u d=%u\n", r, i , p, d); 1778 scc_mgr_set_dq_in_delay(p, d); 1779 scc_mgr_load_dq(p); 1780 } 1781 writel(0, &sdr_scc_mgr->update); 1782 } 1783 1784 found = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(read_group); 1785 1786 debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_vfifo_find_dqs_\ 1787 en_phase_sweep_dq", __func__, __LINE__); 1788 debug_cond(DLEVEL == 1, "_in_delay: g=%u/%u found=%u; Reseting delay \ 1789 chain to zero\n", write_group, read_group, found); 1790 1791 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 1792 r += NUM_RANKS_PER_SHADOW_REG) { 1793 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; 1794 i++, p++) { 1795 scc_mgr_set_dq_in_delay(p, 0); 1796 scc_mgr_load_dq(p); 1797 } 1798 writel(0, &sdr_scc_mgr->update); 1799 } 1800 1801 return found; 1802 } 1803 1804 /* per-bit deskew DQ and center */ 1805 static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn, 1806 uint32_t write_group, uint32_t read_group, uint32_t test_bgn, 1807 uint32_t use_read_test, uint32_t update_fom) 1808 { 1809 uint32_t i, p, d, min_index; 1810 /* 1811 * Store these as signed since there are comparisons with 1812 * signed numbers. 1813 */ 1814 uint32_t bit_chk; 1815 uint32_t sticky_bit_chk; 1816 int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; 1817 int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; 1818 int32_t final_dq[RW_MGR_MEM_DQ_PER_READ_DQS]; 1819 int32_t mid; 1820 int32_t orig_mid_min, mid_min; 1821 int32_t new_dqs, start_dqs, start_dqs_en, shift_dq, final_dqs, 1822 final_dqs_en; 1823 int32_t dq_margin, dqs_margin; 1824 uint32_t stop; 1825 uint32_t temp_dq_in_delay1, temp_dq_in_delay2; 1826 uint32_t addr; 1827 1828 debug("%s:%d: %u %u", __func__, __LINE__, read_group, test_bgn); 1829 1830 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_IN_DELAY_OFFSET; 1831 start_dqs = readl(addr + (read_group << 2)); 1832 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) 1833 start_dqs_en = readl(addr + ((read_group << 2) 1834 - IO_DQS_EN_DELAY_OFFSET)); 1835 1836 /* set the left and right edge of each bit to an illegal value */ 1837 /* use (IO_IO_IN_DELAY_MAX + 1) as an illegal value */ 1838 sticky_bit_chk = 0; 1839 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 1840 left_edge[i] = IO_IO_IN_DELAY_MAX + 1; 1841 right_edge[i] = IO_IO_IN_DELAY_MAX + 1; 1842 } 1843 1844 /* Search for the left edge of the window for each bit */ 1845 for (d = 0; d <= IO_IO_IN_DELAY_MAX; d++) { 1846 scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, d); 1847 1848 writel(0, &sdr_scc_mgr->update); 1849 1850 /* 1851 * Stop searching when the read test doesn't pass AND when 1852 * we've seen a passing read on every bit. 1853 */ 1854 if (use_read_test) { 1855 stop = !rw_mgr_mem_calibrate_read_test(rank_bgn, 1856 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT, 1857 &bit_chk, 0, 0); 1858 } else { 1859 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1860 0, PASS_ONE_BIT, 1861 &bit_chk, 0); 1862 bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * 1863 (read_group - (write_group * 1864 RW_MGR_MEM_IF_READ_DQS_WIDTH / 1865 RW_MGR_MEM_IF_WRITE_DQS_WIDTH))); 1866 stop = (bit_chk == 0); 1867 } 1868 sticky_bit_chk = sticky_bit_chk | bit_chk; 1869 stop = stop && (sticky_bit_chk == param->read_correct_mask); 1870 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(left): dtap=%u => %u == %u \ 1871 && %u", __func__, __LINE__, d, 1872 sticky_bit_chk, 1873 param->read_correct_mask, stop); 1874 1875 if (stop == 1) { 1876 break; 1877 } else { 1878 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 1879 if (bit_chk & 1) { 1880 /* Remember a passing test as the 1881 left_edge */ 1882 left_edge[i] = d; 1883 } else { 1884 /* If a left edge has not been seen yet, 1885 then a future passing test will mark 1886 this edge as the right edge */ 1887 if (left_edge[i] == 1888 IO_IO_IN_DELAY_MAX + 1) { 1889 right_edge[i] = -(d + 1); 1890 } 1891 } 1892 bit_chk = bit_chk >> 1; 1893 } 1894 } 1895 } 1896 1897 /* Reset DQ delay chains to 0 */ 1898 scc_mgr_apply_group_dq_in_delay(test_bgn, 0); 1899 sticky_bit_chk = 0; 1900 for (i = RW_MGR_MEM_DQ_PER_READ_DQS - 1;; i--) { 1901 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \ 1902 %d right_edge[%u]: %d\n", __func__, __LINE__, 1903 i, left_edge[i], i, right_edge[i]); 1904 1905 /* 1906 * Check for cases where we haven't found the left edge, 1907 * which makes our assignment of the the right edge invalid. 1908 * Reset it to the illegal value. 1909 */ 1910 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) && ( 1911 right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { 1912 right_edge[i] = IO_IO_IN_DELAY_MAX + 1; 1913 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: reset \ 1914 right_edge[%u]: %d\n", __func__, __LINE__, 1915 i, right_edge[i]); 1916 } 1917 1918 /* 1919 * Reset sticky bit (except for bits where we have seen 1920 * both the left and right edge). 1921 */ 1922 sticky_bit_chk = sticky_bit_chk << 1; 1923 if ((left_edge[i] != IO_IO_IN_DELAY_MAX + 1) && 1924 (right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { 1925 sticky_bit_chk = sticky_bit_chk | 1; 1926 } 1927 1928 if (i == 0) 1929 break; 1930 } 1931 1932 /* Search for the right edge of the window for each bit */ 1933 for (d = 0; d <= IO_DQS_IN_DELAY_MAX - start_dqs; d++) { 1934 scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs); 1935 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 1936 uint32_t delay = d + start_dqs_en; 1937 if (delay > IO_DQS_EN_DELAY_MAX) 1938 delay = IO_DQS_EN_DELAY_MAX; 1939 scc_mgr_set_dqs_en_delay(read_group, delay); 1940 } 1941 scc_mgr_load_dqs(read_group); 1942 1943 writel(0, &sdr_scc_mgr->update); 1944 1945 /* 1946 * Stop searching when the read test doesn't pass AND when 1947 * we've seen a passing read on every bit. 1948 */ 1949 if (use_read_test) { 1950 stop = !rw_mgr_mem_calibrate_read_test(rank_bgn, 1951 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT, 1952 &bit_chk, 0, 0); 1953 } else { 1954 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1955 0, PASS_ONE_BIT, 1956 &bit_chk, 0); 1957 bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * 1958 (read_group - (write_group * 1959 RW_MGR_MEM_IF_READ_DQS_WIDTH / 1960 RW_MGR_MEM_IF_WRITE_DQS_WIDTH))); 1961 stop = (bit_chk == 0); 1962 } 1963 sticky_bit_chk = sticky_bit_chk | bit_chk; 1964 stop = stop && (sticky_bit_chk == param->read_correct_mask); 1965 1966 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(right): dtap=%u => %u == \ 1967 %u && %u", __func__, __LINE__, d, 1968 sticky_bit_chk, param->read_correct_mask, stop); 1969 1970 if (stop == 1) { 1971 break; 1972 } else { 1973 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 1974 if (bit_chk & 1) { 1975 /* Remember a passing test as 1976 the right_edge */ 1977 right_edge[i] = d; 1978 } else { 1979 if (d != 0) { 1980 /* If a right edge has not been 1981 seen yet, then a future passing 1982 test will mark this edge as the 1983 left edge */ 1984 if (right_edge[i] == 1985 IO_IO_IN_DELAY_MAX + 1) { 1986 left_edge[i] = -(d + 1); 1987 } 1988 } else { 1989 /* d = 0 failed, but it passed 1990 when testing the left edge, 1991 so it must be marginal, 1992 set it to -1 */ 1993 if (right_edge[i] == 1994 IO_IO_IN_DELAY_MAX + 1 && 1995 left_edge[i] != 1996 IO_IO_IN_DELAY_MAX 1997 + 1) { 1998 right_edge[i] = -1; 1999 } 2000 /* If a right edge has not been 2001 seen yet, then a future passing 2002 test will mark this edge as the 2003 left edge */ 2004 else if (right_edge[i] == 2005 IO_IO_IN_DELAY_MAX + 2006 1) { 2007 left_edge[i] = -(d + 1); 2008 } 2009 } 2010 } 2011 2012 debug_cond(DLEVEL == 2, "%s:%d vfifo_center[r,\ 2013 d=%u]: ", __func__, __LINE__, d); 2014 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d ", 2015 (int)(bit_chk & 1), i, left_edge[i]); 2016 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 2017 right_edge[i]); 2018 bit_chk = bit_chk >> 1; 2019 } 2020 } 2021 } 2022 2023 /* Check that all bits have a window */ 2024 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 2025 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \ 2026 %d right_edge[%u]: %d", __func__, __LINE__, 2027 i, left_edge[i], i, right_edge[i]); 2028 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) || (right_edge[i] 2029 == IO_IO_IN_DELAY_MAX + 1)) { 2030 /* 2031 * Restore delay chain settings before letting the loop 2032 * in rw_mgr_mem_calibrate_vfifo to retry different 2033 * dqs/ck relationships. 2034 */ 2035 scc_mgr_set_dqs_bus_in_delay(read_group, start_dqs); 2036 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2037 scc_mgr_set_dqs_en_delay(read_group, 2038 start_dqs_en); 2039 } 2040 scc_mgr_load_dqs(read_group); 2041 writel(0, &sdr_scc_mgr->update); 2042 2043 debug_cond(DLEVEL == 1, "%s:%d vfifo_center: failed to \ 2044 find edge [%u]: %d %d", __func__, __LINE__, 2045 i, left_edge[i], right_edge[i]); 2046 if (use_read_test) { 2047 set_failing_group_stage(read_group * 2048 RW_MGR_MEM_DQ_PER_READ_DQS + i, 2049 CAL_STAGE_VFIFO, 2050 CAL_SUBSTAGE_VFIFO_CENTER); 2051 } else { 2052 set_failing_group_stage(read_group * 2053 RW_MGR_MEM_DQ_PER_READ_DQS + i, 2054 CAL_STAGE_VFIFO_AFTER_WRITES, 2055 CAL_SUBSTAGE_VFIFO_CENTER); 2056 } 2057 return 0; 2058 } 2059 } 2060 2061 /* Find middle of window for each DQ bit */ 2062 mid_min = left_edge[0] - right_edge[0]; 2063 min_index = 0; 2064 for (i = 1; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 2065 mid = left_edge[i] - right_edge[i]; 2066 if (mid < mid_min) { 2067 mid_min = mid; 2068 min_index = i; 2069 } 2070 } 2071 2072 /* 2073 * -mid_min/2 represents the amount that we need to move DQS. 2074 * If mid_min is odd and positive we'll need to add one to 2075 * make sure the rounding in further calculations is correct 2076 * (always bias to the right), so just add 1 for all positive values. 2077 */ 2078 if (mid_min > 0) 2079 mid_min++; 2080 2081 mid_min = mid_min / 2; 2082 2083 debug_cond(DLEVEL == 1, "%s:%d vfifo_center: mid_min=%d (index=%u)\n", 2084 __func__, __LINE__, mid_min, min_index); 2085 2086 /* Determine the amount we can change DQS (which is -mid_min) */ 2087 orig_mid_min = mid_min; 2088 new_dqs = start_dqs - mid_min; 2089 if (new_dqs > IO_DQS_IN_DELAY_MAX) 2090 new_dqs = IO_DQS_IN_DELAY_MAX; 2091 else if (new_dqs < 0) 2092 new_dqs = 0; 2093 2094 mid_min = start_dqs - new_dqs; 2095 debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n", 2096 mid_min, new_dqs); 2097 2098 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2099 if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX) 2100 mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX; 2101 else if (start_dqs_en - mid_min < 0) 2102 mid_min += start_dqs_en - mid_min; 2103 } 2104 new_dqs = start_dqs - mid_min; 2105 2106 debug_cond(DLEVEL == 1, "vfifo_center: start_dqs=%d start_dqs_en=%d \ 2107 new_dqs=%d mid_min=%d\n", start_dqs, 2108 IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1, 2109 new_dqs, mid_min); 2110 2111 /* Initialize data for export structures */ 2112 dqs_margin = IO_IO_IN_DELAY_MAX + 1; 2113 dq_margin = IO_IO_IN_DELAY_MAX + 1; 2114 2115 /* add delay to bring centre of all DQ windows to the same "level" */ 2116 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { 2117 /* Use values before divide by 2 to reduce round off error */ 2118 shift_dq = (left_edge[i] - right_edge[i] - 2119 (left_edge[min_index] - right_edge[min_index]))/2 + 2120 (orig_mid_min - mid_min); 2121 2122 debug_cond(DLEVEL == 2, "vfifo_center: before: \ 2123 shift_dq[%u]=%d\n", i, shift_dq); 2124 2125 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET; 2126 temp_dq_in_delay1 = readl(addr + (p << 2)); 2127 temp_dq_in_delay2 = readl(addr + (i << 2)); 2128 2129 if (shift_dq + (int32_t)temp_dq_in_delay1 > 2130 (int32_t)IO_IO_IN_DELAY_MAX) { 2131 shift_dq = (int32_t)IO_IO_IN_DELAY_MAX - temp_dq_in_delay2; 2132 } else if (shift_dq + (int32_t)temp_dq_in_delay1 < 0) { 2133 shift_dq = -(int32_t)temp_dq_in_delay1; 2134 } 2135 debug_cond(DLEVEL == 2, "vfifo_center: after: \ 2136 shift_dq[%u]=%d\n", i, shift_dq); 2137 final_dq[i] = temp_dq_in_delay1 + shift_dq; 2138 scc_mgr_set_dq_in_delay(p, final_dq[i]); 2139 scc_mgr_load_dq(p); 2140 2141 debug_cond(DLEVEL == 2, "vfifo_center: margin[%u]=[%d,%d]\n", i, 2142 left_edge[i] - shift_dq + (-mid_min), 2143 right_edge[i] + shift_dq - (-mid_min)); 2144 /* To determine values for export structures */ 2145 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) 2146 dq_margin = left_edge[i] - shift_dq + (-mid_min); 2147 2148 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin) 2149 dqs_margin = right_edge[i] + shift_dq - (-mid_min); 2150 } 2151 2152 final_dqs = new_dqs; 2153 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) 2154 final_dqs_en = start_dqs_en - mid_min; 2155 2156 /* Move DQS-en */ 2157 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2158 scc_mgr_set_dqs_en_delay(read_group, final_dqs_en); 2159 scc_mgr_load_dqs(read_group); 2160 } 2161 2162 /* Move DQS */ 2163 scc_mgr_set_dqs_bus_in_delay(read_group, final_dqs); 2164 scc_mgr_load_dqs(read_group); 2165 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: dq_margin=%d \ 2166 dqs_margin=%d", __func__, __LINE__, 2167 dq_margin, dqs_margin); 2168 2169 /* 2170 * Do not remove this line as it makes sure all of our decisions 2171 * have been applied. Apply the update bit. 2172 */ 2173 writel(0, &sdr_scc_mgr->update); 2174 2175 return (dq_margin >= 0) && (dqs_margin >= 0); 2176 } 2177 2178 /* 2179 * calibrate the read valid prediction FIFO. 2180 * 2181 * - read valid prediction will consist of finding a good DQS enable phase, 2182 * DQS enable delay, DQS input phase, and DQS input delay. 2183 * - we also do a per-bit deskew on the DQ lines. 2184 */ 2185 static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group, 2186 uint32_t test_bgn) 2187 { 2188 uint32_t p, d, rank_bgn, sr; 2189 uint32_t dtaps_per_ptap; 2190 uint32_t bit_chk; 2191 uint32_t grp_calibrated; 2192 uint32_t write_group, write_test_bgn; 2193 uint32_t failed_substage; 2194 2195 debug("%s:%d: %u %u\n", __func__, __LINE__, read_group, test_bgn); 2196 2197 /* update info for sims */ 2198 reg_file_set_stage(CAL_STAGE_VFIFO); 2199 2200 write_group = read_group; 2201 write_test_bgn = test_bgn; 2202 2203 /* USER Determine number of delay taps for each phase tap */ 2204 dtaps_per_ptap = DIV_ROUND_UP(IO_DELAY_PER_OPA_TAP, 2205 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) - 1; 2206 2207 /* update info for sims */ 2208 reg_file_set_group(read_group); 2209 2210 grp_calibrated = 0; 2211 2212 reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ); 2213 failed_substage = CAL_SUBSTAGE_GUARANTEED_READ; 2214 2215 for (d = 0; d <= dtaps_per_ptap && grp_calibrated == 0; d += 2) { 2216 /* 2217 * In RLDRAMX we may be messing the delay of pins in 2218 * the same write group but outside of the current read 2219 * the group, but that's ok because we haven't 2220 * calibrated output side yet. 2221 */ 2222 if (d > 0) { 2223 scc_mgr_apply_group_all_out_delay_add_all_ranks( 2224 write_group, d); 2225 } 2226 2227 for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX && grp_calibrated == 0; 2228 p++) { 2229 /* set a particular dqdqs phase */ 2230 scc_mgr_set_dqdqs_output_phase_all_ranks(read_group, p); 2231 2232 debug_cond(DLEVEL == 1, "%s:%d calibrate_vfifo: g=%u \ 2233 p=%u d=%u\n", __func__, __LINE__, 2234 read_group, p, d); 2235 2236 /* 2237 * Load up the patterns used by read calibration 2238 * using current DQDQS phase. 2239 */ 2240 rw_mgr_mem_calibrate_read_load_patterns(0, 1); 2241 if (!(gbl->phy_debug_mode_flags & 2242 PHY_DEBUG_DISABLE_GUARANTEED_READ)) { 2243 if (!rw_mgr_mem_calibrate_read_test_patterns_all_ranks 2244 (read_group, 1, &bit_chk)) { 2245 debug_cond(DLEVEL == 1, "%s:%d Guaranteed read test failed:", 2246 __func__, __LINE__); 2247 debug_cond(DLEVEL == 1, " g=%u p=%u d=%u\n", 2248 read_group, p, d); 2249 break; 2250 } 2251 } 2252 2253 /* case:56390 */ 2254 grp_calibrated = 1; 2255 if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay 2256 (write_group, read_group, test_bgn)) { 2257 /* 2258 * USER Read per-bit deskew can be done on a 2259 * per shadow register basis. 2260 */ 2261 for (rank_bgn = 0, sr = 0; 2262 rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; 2263 rank_bgn += NUM_RANKS_PER_SHADOW_REG, 2264 ++sr) { 2265 /* 2266 * Determine if this set of ranks 2267 * should be skipped entirely. 2268 */ 2269 if (!param->skip_shadow_regs[sr]) { 2270 /* 2271 * If doing read after write 2272 * calibration, do not update 2273 * FOM, now - do it then. 2274 */ 2275 if (!rw_mgr_mem_calibrate_vfifo_center 2276 (rank_bgn, write_group, 2277 read_group, test_bgn, 1, 0)) { 2278 grp_calibrated = 0; 2279 failed_substage = 2280 CAL_SUBSTAGE_VFIFO_CENTER; 2281 } 2282 } 2283 } 2284 } else { 2285 grp_calibrated = 0; 2286 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE; 2287 } 2288 } 2289 } 2290 2291 if (grp_calibrated == 0) { 2292 set_failing_group_stage(write_group, CAL_STAGE_VFIFO, 2293 failed_substage); 2294 return 0; 2295 } 2296 2297 /* 2298 * Reset the delay chains back to zero if they have moved > 1 2299 * (check for > 1 because loop will increase d even when pass in 2300 * first case). 2301 */ 2302 if (d > 2) 2303 scc_mgr_zero_group(write_group, 1); 2304 2305 return 1; 2306 } 2307 2308 /* VFIFO Calibration -- Read Deskew Calibration after write deskew */ 2309 static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group, 2310 uint32_t test_bgn) 2311 { 2312 uint32_t rank_bgn, sr; 2313 uint32_t grp_calibrated; 2314 uint32_t write_group; 2315 2316 debug("%s:%d %u %u", __func__, __LINE__, read_group, test_bgn); 2317 2318 /* update info for sims */ 2319 2320 reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES); 2321 reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); 2322 2323 write_group = read_group; 2324 2325 /* update info for sims */ 2326 reg_file_set_group(read_group); 2327 2328 grp_calibrated = 1; 2329 /* Read per-bit deskew can be done on a per shadow register basis */ 2330 for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; 2331 rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) { 2332 /* Determine if this set of ranks should be skipped entirely */ 2333 if (!param->skip_shadow_regs[sr]) { 2334 /* This is the last calibration round, update FOM here */ 2335 if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn, 2336 write_group, 2337 read_group, 2338 test_bgn, 0, 2339 1)) { 2340 grp_calibrated = 0; 2341 } 2342 } 2343 } 2344 2345 2346 if (grp_calibrated == 0) { 2347 set_failing_group_stage(write_group, 2348 CAL_STAGE_VFIFO_AFTER_WRITES, 2349 CAL_SUBSTAGE_VFIFO_CENTER); 2350 return 0; 2351 } 2352 2353 return 1; 2354 } 2355 2356 /* Calibrate LFIFO to find smallest read latency */ 2357 static uint32_t rw_mgr_mem_calibrate_lfifo(void) 2358 { 2359 uint32_t found_one; 2360 uint32_t bit_chk; 2361 2362 debug("%s:%d\n", __func__, __LINE__); 2363 2364 /* update info for sims */ 2365 reg_file_set_stage(CAL_STAGE_LFIFO); 2366 reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY); 2367 2368 /* Load up the patterns used by read calibration for all ranks */ 2369 rw_mgr_mem_calibrate_read_load_patterns(0, 1); 2370 found_one = 0; 2371 2372 do { 2373 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 2374 debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u", 2375 __func__, __LINE__, gbl->curr_read_lat); 2376 2377 if (!rw_mgr_mem_calibrate_read_test_all_ranks(0, 2378 NUM_READ_TESTS, 2379 PASS_ALL_BITS, 2380 &bit_chk, 1)) { 2381 break; 2382 } 2383 2384 found_one = 1; 2385 /* reduce read latency and see if things are working */ 2386 /* correctly */ 2387 gbl->curr_read_lat--; 2388 } while (gbl->curr_read_lat > 0); 2389 2390 /* reset the fifos to get pointers to known state */ 2391 2392 writel(0, &phy_mgr_cmd->fifo_reset); 2393 2394 if (found_one) { 2395 /* add a fudge factor to the read latency that was determined */ 2396 gbl->curr_read_lat += 2; 2397 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 2398 debug_cond(DLEVEL == 2, "%s:%d lfifo: success: using \ 2399 read_lat=%u\n", __func__, __LINE__, 2400 gbl->curr_read_lat); 2401 return 1; 2402 } else { 2403 set_failing_group_stage(0xff, CAL_STAGE_LFIFO, 2404 CAL_SUBSTAGE_READ_LATENCY); 2405 2406 debug_cond(DLEVEL == 2, "%s:%d lfifo: failed at initial \ 2407 read_lat=%u\n", __func__, __LINE__, 2408 gbl->curr_read_lat); 2409 return 0; 2410 } 2411 } 2412 2413 /* 2414 * issue write test command. 2415 * two variants are provided. one that just tests a write pattern and 2416 * another that tests datamask functionality. 2417 */ 2418 static void rw_mgr_mem_calibrate_write_test_issue(uint32_t group, 2419 uint32_t test_dm) 2420 { 2421 uint32_t mcc_instruction; 2422 uint32_t quick_write_mode = (((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES) && 2423 ENABLE_SUPER_QUICK_CALIBRATION); 2424 uint32_t rw_wl_nop_cycles; 2425 uint32_t addr; 2426 2427 /* 2428 * Set counter and jump addresses for the right 2429 * number of NOP cycles. 2430 * The number of supported NOP cycles can range from -1 to infinity 2431 * Three different cases are handled: 2432 * 2433 * 1. For a number of NOP cycles greater than 0, the RW Mgr looping 2434 * mechanism will be used to insert the right number of NOPs 2435 * 2436 * 2. For a number of NOP cycles equals to 0, the micro-instruction 2437 * issuing the write command will jump straight to the 2438 * micro-instruction that turns on DQS (for DDRx), or outputs write 2439 * data (for RLD), skipping 2440 * the NOP micro-instruction all together 2441 * 2442 * 3. A number of NOP cycles equal to -1 indicates that DQS must be 2443 * turned on in the same micro-instruction that issues the write 2444 * command. Then we need 2445 * to directly jump to the micro-instruction that sends out the data 2446 * 2447 * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters 2448 * (2 and 3). One jump-counter (0) is used to perform multiple 2449 * write-read operations. 2450 * one counter left to issue this command in "multiple-group" mode 2451 */ 2452 2453 rw_wl_nop_cycles = gbl->rw_wl_nop_cycles; 2454 2455 if (rw_wl_nop_cycles == -1) { 2456 /* 2457 * CNTR 2 - We want to execute the special write operation that 2458 * turns on DQS right away and then skip directly to the 2459 * instruction that sends out the data. We set the counter to a 2460 * large number so that the jump is always taken. 2461 */ 2462 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); 2463 2464 /* CNTR 3 - Not used */ 2465 if (test_dm) { 2466 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1; 2467 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA, 2468 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2469 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP, 2470 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2471 } else { 2472 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0_WL_1; 2473 writel(RW_MGR_LFSR_WR_RD_BANK_0_DATA, 2474 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2475 writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP, 2476 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2477 } 2478 } else if (rw_wl_nop_cycles == 0) { 2479 /* 2480 * CNTR 2 - We want to skip the NOP operation and go straight 2481 * to the DQS enable instruction. We set the counter to a large 2482 * number so that the jump is always taken. 2483 */ 2484 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); 2485 2486 /* CNTR 3 - Not used */ 2487 if (test_dm) { 2488 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0; 2489 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS, 2490 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2491 } else { 2492 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0; 2493 writel(RW_MGR_LFSR_WR_RD_BANK_0_DQS, 2494 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2495 } 2496 } else { 2497 /* 2498 * CNTR 2 - In this case we want to execute the next instruction 2499 * and NOT take the jump. So we set the counter to 0. The jump 2500 * address doesn't count. 2501 */ 2502 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2); 2503 writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2504 2505 /* 2506 * CNTR 3 - Set the nop counter to the number of cycles we 2507 * need to loop for, minus 1. 2508 */ 2509 writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3); 2510 if (test_dm) { 2511 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0; 2512 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP, 2513 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2514 } else { 2515 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0; 2516 writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP, 2517 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2518 } 2519 } 2520 2521 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 2522 RW_MGR_RESET_READ_DATAPATH_OFFSET); 2523 2524 if (quick_write_mode) 2525 writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0); 2526 else 2527 writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0); 2528 2529 writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0); 2530 2531 /* 2532 * CNTR 1 - This is used to ensure enough time elapses 2533 * for read data to come back. 2534 */ 2535 writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1); 2536 2537 if (test_dm) { 2538 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT, 2539 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 2540 } else { 2541 writel(RW_MGR_LFSR_WR_RD_BANK_0_WAIT, 2542 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 2543 } 2544 2545 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 2546 writel(mcc_instruction, addr + (group << 2)); 2547 } 2548 2549 /* Test writes, can check for a single bit pass or multiple bit pass */ 2550 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, 2551 uint32_t write_group, uint32_t use_dm, uint32_t all_correct, 2552 uint32_t *bit_chk, uint32_t all_ranks) 2553 { 2554 uint32_t r; 2555 uint32_t correct_mask_vg; 2556 uint32_t tmp_bit_chk; 2557 uint32_t vg; 2558 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 2559 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 2560 uint32_t addr_rw_mgr; 2561 uint32_t base_rw_mgr; 2562 2563 *bit_chk = param->write_correct_mask; 2564 correct_mask_vg = param->write_correct_mask_vg; 2565 2566 for (r = rank_bgn; r < rank_end; r++) { 2567 if (param->skip_ranks[r]) { 2568 /* request to skip the rank */ 2569 continue; 2570 } 2571 2572 /* set rank */ 2573 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 2574 2575 tmp_bit_chk = 0; 2576 addr_rw_mgr = SDR_PHYGRP_RWMGRGRP_ADDRESS; 2577 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS-1; ; vg--) { 2578 /* reset the fifos to get pointers to known state */ 2579 writel(0, &phy_mgr_cmd->fifo_reset); 2580 2581 tmp_bit_chk = tmp_bit_chk << 2582 (RW_MGR_MEM_DQ_PER_WRITE_DQS / 2583 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS); 2584 rw_mgr_mem_calibrate_write_test_issue(write_group * 2585 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS+vg, 2586 use_dm); 2587 2588 base_rw_mgr = readl(addr_rw_mgr); 2589 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr)); 2590 if (vg == 0) 2591 break; 2592 } 2593 *bit_chk &= tmp_bit_chk; 2594 } 2595 2596 if (all_correct) { 2597 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 2598 debug_cond(DLEVEL == 2, "write_test(%u,%u,ALL) : %u == \ 2599 %u => %lu", write_group, use_dm, 2600 *bit_chk, param->write_correct_mask, 2601 (long unsigned int)(*bit_chk == 2602 param->write_correct_mask)); 2603 return *bit_chk == param->write_correct_mask; 2604 } else { 2605 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 2606 debug_cond(DLEVEL == 2, "write_test(%u,%u,ONE) : %u != ", 2607 write_group, use_dm, *bit_chk); 2608 debug_cond(DLEVEL == 2, "%lu" " => %lu", (long unsigned int)0, 2609 (long unsigned int)(*bit_chk != 0)); 2610 return *bit_chk != 0x00; 2611 } 2612 } 2613 2614 /* 2615 * center all windows. do per-bit-deskew to possibly increase size of 2616 * certain windows. 2617 */ 2618 static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn, 2619 uint32_t write_group, uint32_t test_bgn) 2620 { 2621 uint32_t i, p, min_index; 2622 int32_t d; 2623 /* 2624 * Store these as signed since there are comparisons with 2625 * signed numbers. 2626 */ 2627 uint32_t bit_chk; 2628 uint32_t sticky_bit_chk; 2629 int32_t left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS]; 2630 int32_t right_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS]; 2631 int32_t mid; 2632 int32_t mid_min, orig_mid_min; 2633 int32_t new_dqs, start_dqs, shift_dq; 2634 int32_t dq_margin, dqs_margin, dm_margin; 2635 uint32_t stop; 2636 uint32_t temp_dq_out1_delay; 2637 uint32_t addr; 2638 2639 debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn); 2640 2641 dm_margin = 0; 2642 2643 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; 2644 start_dqs = readl(addr + 2645 (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2)); 2646 2647 /* per-bit deskew */ 2648 2649 /* 2650 * set the left and right edge of each bit to an illegal value 2651 * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value. 2652 */ 2653 sticky_bit_chk = 0; 2654 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2655 left_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2656 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2657 } 2658 2659 /* Search for the left edge of the window for each bit */ 2660 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++) { 2661 scc_mgr_apply_group_dq_out1_delay(write_group, d); 2662 2663 writel(0, &sdr_scc_mgr->update); 2664 2665 /* 2666 * Stop searching when the read test doesn't pass AND when 2667 * we've seen a passing read on every bit. 2668 */ 2669 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 2670 0, PASS_ONE_BIT, &bit_chk, 0); 2671 sticky_bit_chk = sticky_bit_chk | bit_chk; 2672 stop = stop && (sticky_bit_chk == param->write_correct_mask); 2673 debug_cond(DLEVEL == 2, "write_center(left): dtap=%d => %u \ 2674 == %u && %u [bit_chk= %u ]\n", 2675 d, sticky_bit_chk, param->write_correct_mask, 2676 stop, bit_chk); 2677 2678 if (stop == 1) { 2679 break; 2680 } else { 2681 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2682 if (bit_chk & 1) { 2683 /* 2684 * Remember a passing test as the 2685 * left_edge. 2686 */ 2687 left_edge[i] = d; 2688 } else { 2689 /* 2690 * If a left edge has not been seen 2691 * yet, then a future passing test will 2692 * mark this edge as the right edge. 2693 */ 2694 if (left_edge[i] == 2695 IO_IO_OUT1_DELAY_MAX + 1) { 2696 right_edge[i] = -(d + 1); 2697 } 2698 } 2699 debug_cond(DLEVEL == 2, "write_center[l,d=%d):", d); 2700 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d", 2701 (int)(bit_chk & 1), i, left_edge[i]); 2702 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 2703 right_edge[i]); 2704 bit_chk = bit_chk >> 1; 2705 } 2706 } 2707 } 2708 2709 /* Reset DQ delay chains to 0 */ 2710 scc_mgr_apply_group_dq_out1_delay(0); 2711 sticky_bit_chk = 0; 2712 for (i = RW_MGR_MEM_DQ_PER_WRITE_DQS - 1;; i--) { 2713 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \ 2714 %d right_edge[%u]: %d\n", __func__, __LINE__, 2715 i, left_edge[i], i, right_edge[i]); 2716 2717 /* 2718 * Check for cases where we haven't found the left edge, 2719 * which makes our assignment of the the right edge invalid. 2720 * Reset it to the illegal value. 2721 */ 2722 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) && 2723 (right_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) { 2724 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2725 debug_cond(DLEVEL == 2, "%s:%d write_center: reset \ 2726 right_edge[%u]: %d\n", __func__, __LINE__, 2727 i, right_edge[i]); 2728 } 2729 2730 /* 2731 * Reset sticky bit (except for bits where we have 2732 * seen the left edge). 2733 */ 2734 sticky_bit_chk = sticky_bit_chk << 1; 2735 if ((left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) 2736 sticky_bit_chk = sticky_bit_chk | 1; 2737 2738 if (i == 0) 2739 break; 2740 } 2741 2742 /* Search for the right edge of the window for each bit */ 2743 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - start_dqs; d++) { 2744 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, 2745 d + start_dqs); 2746 2747 writel(0, &sdr_scc_mgr->update); 2748 2749 /* 2750 * Stop searching when the read test doesn't pass AND when 2751 * we've seen a passing read on every bit. 2752 */ 2753 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 2754 0, PASS_ONE_BIT, &bit_chk, 0); 2755 2756 sticky_bit_chk = sticky_bit_chk | bit_chk; 2757 stop = stop && (sticky_bit_chk == param->write_correct_mask); 2758 2759 debug_cond(DLEVEL == 2, "write_center (right): dtap=%u => %u == \ 2760 %u && %u\n", d, sticky_bit_chk, 2761 param->write_correct_mask, stop); 2762 2763 if (stop == 1) { 2764 if (d == 0) { 2765 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; 2766 i++) { 2767 /* d = 0 failed, but it passed when 2768 testing the left edge, so it must be 2769 marginal, set it to -1 */ 2770 if (right_edge[i] == 2771 IO_IO_OUT1_DELAY_MAX + 1 && 2772 left_edge[i] != 2773 IO_IO_OUT1_DELAY_MAX + 1) { 2774 right_edge[i] = -1; 2775 } 2776 } 2777 } 2778 break; 2779 } else { 2780 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2781 if (bit_chk & 1) { 2782 /* 2783 * Remember a passing test as 2784 * the right_edge. 2785 */ 2786 right_edge[i] = d; 2787 } else { 2788 if (d != 0) { 2789 /* 2790 * If a right edge has not 2791 * been seen yet, then a future 2792 * passing test will mark this 2793 * edge as the left edge. 2794 */ 2795 if (right_edge[i] == 2796 IO_IO_OUT1_DELAY_MAX + 1) 2797 left_edge[i] = -(d + 1); 2798 } else { 2799 /* 2800 * d = 0 failed, but it passed 2801 * when testing the left edge, 2802 * so it must be marginal, set 2803 * it to -1. 2804 */ 2805 if (right_edge[i] == 2806 IO_IO_OUT1_DELAY_MAX + 1 && 2807 left_edge[i] != 2808 IO_IO_OUT1_DELAY_MAX + 1) 2809 right_edge[i] = -1; 2810 /* 2811 * If a right edge has not been 2812 * seen yet, then a future 2813 * passing test will mark this 2814 * edge as the left edge. 2815 */ 2816 else if (right_edge[i] == 2817 IO_IO_OUT1_DELAY_MAX + 2818 1) 2819 left_edge[i] = -(d + 1); 2820 } 2821 } 2822 debug_cond(DLEVEL == 2, "write_center[r,d=%d):", d); 2823 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d", 2824 (int)(bit_chk & 1), i, left_edge[i]); 2825 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 2826 right_edge[i]); 2827 bit_chk = bit_chk >> 1; 2828 } 2829 } 2830 } 2831 2832 /* Check that all bits have a window */ 2833 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2834 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \ 2835 %d right_edge[%u]: %d", __func__, __LINE__, 2836 i, left_edge[i], i, right_edge[i]); 2837 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) || 2838 (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1)) { 2839 set_failing_group_stage(test_bgn + i, 2840 CAL_STAGE_WRITES, 2841 CAL_SUBSTAGE_WRITES_CENTER); 2842 return 0; 2843 } 2844 } 2845 2846 /* Find middle of window for each DQ bit */ 2847 mid_min = left_edge[0] - right_edge[0]; 2848 min_index = 0; 2849 for (i = 1; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2850 mid = left_edge[i] - right_edge[i]; 2851 if (mid < mid_min) { 2852 mid_min = mid; 2853 min_index = i; 2854 } 2855 } 2856 2857 /* 2858 * -mid_min/2 represents the amount that we need to move DQS. 2859 * If mid_min is odd and positive we'll need to add one to 2860 * make sure the rounding in further calculations is correct 2861 * (always bias to the right), so just add 1 for all positive values. 2862 */ 2863 if (mid_min > 0) 2864 mid_min++; 2865 mid_min = mid_min / 2; 2866 debug_cond(DLEVEL == 1, "%s:%d write_center: mid_min=%d\n", __func__, 2867 __LINE__, mid_min); 2868 2869 /* Determine the amount we can change DQS (which is -mid_min) */ 2870 orig_mid_min = mid_min; 2871 new_dqs = start_dqs; 2872 mid_min = 0; 2873 debug_cond(DLEVEL == 1, "%s:%d write_center: start_dqs=%d new_dqs=%d \ 2874 mid_min=%d\n", __func__, __LINE__, start_dqs, new_dqs, mid_min); 2875 /* Initialize data for export structures */ 2876 dqs_margin = IO_IO_OUT1_DELAY_MAX + 1; 2877 dq_margin = IO_IO_OUT1_DELAY_MAX + 1; 2878 2879 /* add delay to bring centre of all DQ windows to the same "level" */ 2880 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { 2881 /* Use values before divide by 2 to reduce round off error */ 2882 shift_dq = (left_edge[i] - right_edge[i] - 2883 (left_edge[min_index] - right_edge[min_index]))/2 + 2884 (orig_mid_min - mid_min); 2885 2886 debug_cond(DLEVEL == 2, "%s:%d write_center: before: shift_dq \ 2887 [%u]=%d\n", __func__, __LINE__, i, shift_dq); 2888 2889 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; 2890 temp_dq_out1_delay = readl(addr + (i << 2)); 2891 if (shift_dq + (int32_t)temp_dq_out1_delay > 2892 (int32_t)IO_IO_OUT1_DELAY_MAX) { 2893 shift_dq = (int32_t)IO_IO_OUT1_DELAY_MAX - temp_dq_out1_delay; 2894 } else if (shift_dq + (int32_t)temp_dq_out1_delay < 0) { 2895 shift_dq = -(int32_t)temp_dq_out1_delay; 2896 } 2897 debug_cond(DLEVEL == 2, "write_center: after: shift_dq[%u]=%d\n", 2898 i, shift_dq); 2899 scc_mgr_set_dq_out1_delay(i, temp_dq_out1_delay + shift_dq); 2900 scc_mgr_load_dq(i); 2901 2902 debug_cond(DLEVEL == 2, "write_center: margin[%u]=[%d,%d]\n", i, 2903 left_edge[i] - shift_dq + (-mid_min), 2904 right_edge[i] + shift_dq - (-mid_min)); 2905 /* To determine values for export structures */ 2906 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) 2907 dq_margin = left_edge[i] - shift_dq + (-mid_min); 2908 2909 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin) 2910 dqs_margin = right_edge[i] + shift_dq - (-mid_min); 2911 } 2912 2913 /* Move DQS */ 2914 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); 2915 writel(0, &sdr_scc_mgr->update); 2916 2917 /* Centre DM */ 2918 debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__); 2919 2920 /* 2921 * set the left and right edge of each bit to an illegal value, 2922 * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value, 2923 */ 2924 left_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; 2925 right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; 2926 int32_t bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 2927 int32_t end_curr = IO_IO_OUT1_DELAY_MAX + 1; 2928 int32_t bgn_best = IO_IO_OUT1_DELAY_MAX + 1; 2929 int32_t end_best = IO_IO_OUT1_DELAY_MAX + 1; 2930 int32_t win_best = 0; 2931 2932 /* Search for the/part of the window with DM shift */ 2933 for (d = IO_IO_OUT1_DELAY_MAX; d >= 0; d -= DELTA_D) { 2934 scc_mgr_apply_group_dm_out1_delay(d); 2935 writel(0, &sdr_scc_mgr->update); 2936 2937 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, 2938 PASS_ALL_BITS, &bit_chk, 2939 0)) { 2940 /* USE Set current end of the window */ 2941 end_curr = -d; 2942 /* 2943 * If a starting edge of our window has not been seen 2944 * this is our current start of the DM window. 2945 */ 2946 if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) 2947 bgn_curr = -d; 2948 2949 /* 2950 * If current window is bigger than best seen. 2951 * Set best seen to be current window. 2952 */ 2953 if ((end_curr-bgn_curr+1) > win_best) { 2954 win_best = end_curr-bgn_curr+1; 2955 bgn_best = bgn_curr; 2956 end_best = end_curr; 2957 } 2958 } else { 2959 /* We just saw a failing test. Reset temp edge */ 2960 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 2961 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 2962 } 2963 } 2964 2965 2966 /* Reset DM delay chains to 0 */ 2967 scc_mgr_apply_group_dm_out1_delay(0); 2968 2969 /* 2970 * Check to see if the current window nudges up aganist 0 delay. 2971 * If so we need to continue the search by shifting DQS otherwise DQS 2972 * search begins as a new search. */ 2973 if (end_curr != 0) { 2974 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 2975 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 2976 } 2977 2978 /* Search for the/part of the window with DQS shifts */ 2979 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - new_dqs; d += DELTA_D) { 2980 /* 2981 * Note: This only shifts DQS, so are we limiting ourselve to 2982 * width of DQ unnecessarily. 2983 */ 2984 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, 2985 d + new_dqs); 2986 2987 writel(0, &sdr_scc_mgr->update); 2988 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, 2989 PASS_ALL_BITS, &bit_chk, 2990 0)) { 2991 /* USE Set current end of the window */ 2992 end_curr = d; 2993 /* 2994 * If a beginning edge of our window has not been seen 2995 * this is our current begin of the DM window. 2996 */ 2997 if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) 2998 bgn_curr = d; 2999 3000 /* 3001 * If current window is bigger than best seen. Set best 3002 * seen to be current window. 3003 */ 3004 if ((end_curr-bgn_curr+1) > win_best) { 3005 win_best = end_curr-bgn_curr+1; 3006 bgn_best = bgn_curr; 3007 end_best = end_curr; 3008 } 3009 } else { 3010 /* We just saw a failing test. Reset temp edge */ 3011 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 3012 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 3013 3014 /* Early exit optimization: if ther remaining delay 3015 chain space is less than already seen largest window 3016 we can exit */ 3017 if ((win_best-1) > 3018 (IO_IO_OUT1_DELAY_MAX - new_dqs - d)) { 3019 break; 3020 } 3021 } 3022 } 3023 3024 /* assign left and right edge for cal and reporting; */ 3025 left_edge[0] = -1*bgn_best; 3026 right_edge[0] = end_best; 3027 3028 debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n", __func__, 3029 __LINE__, left_edge[0], right_edge[0]); 3030 3031 /* Move DQS (back to orig) */ 3032 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); 3033 3034 /* Move DM */ 3035 3036 /* Find middle of window for the DM bit */ 3037 mid = (left_edge[0] - right_edge[0]) / 2; 3038 3039 /* only move right, since we are not moving DQS/DQ */ 3040 if (mid < 0) 3041 mid = 0; 3042 3043 /* dm_marign should fail if we never find a window */ 3044 if (win_best == 0) 3045 dm_margin = -1; 3046 else 3047 dm_margin = left_edge[0] - mid; 3048 3049 scc_mgr_apply_group_dm_out1_delay(mid); 3050 writel(0, &sdr_scc_mgr->update); 3051 3052 debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d mid=%d \ 3053 dm_margin=%d\n", __func__, __LINE__, left_edge[0], 3054 right_edge[0], mid, dm_margin); 3055 /* Export values */ 3056 gbl->fom_out += dq_margin + dqs_margin; 3057 3058 debug_cond(DLEVEL == 2, "%s:%d write_center: dq_margin=%d \ 3059 dqs_margin=%d dm_margin=%d\n", __func__, __LINE__, 3060 dq_margin, dqs_margin, dm_margin); 3061 3062 /* 3063 * Do not remove this line as it makes sure all of our 3064 * decisions have been applied. 3065 */ 3066 writel(0, &sdr_scc_mgr->update); 3067 return (dq_margin >= 0) && (dqs_margin >= 0) && (dm_margin >= 0); 3068 } 3069 3070 /* calibrate the write operations */ 3071 static uint32_t rw_mgr_mem_calibrate_writes(uint32_t rank_bgn, uint32_t g, 3072 uint32_t test_bgn) 3073 { 3074 /* update info for sims */ 3075 debug("%s:%d %u %u\n", __func__, __LINE__, g, test_bgn); 3076 3077 reg_file_set_stage(CAL_STAGE_WRITES); 3078 reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER); 3079 3080 reg_file_set_group(g); 3081 3082 if (!rw_mgr_mem_calibrate_writes_center(rank_bgn, g, test_bgn)) { 3083 set_failing_group_stage(g, CAL_STAGE_WRITES, 3084 CAL_SUBSTAGE_WRITES_CENTER); 3085 return 0; 3086 } 3087 3088 return 1; 3089 } 3090 3091 /** 3092 * mem_precharge_and_activate() - Precharge all banks and activate 3093 * 3094 * Precharge all banks and activate row 0 in bank "000..." and bank "111...". 3095 */ 3096 static void mem_precharge_and_activate(void) 3097 { 3098 int r; 3099 3100 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { 3101 /* Test if the rank should be skipped. */ 3102 if (param->skip_ranks[r]) 3103 continue; 3104 3105 /* Set rank. */ 3106 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 3107 3108 /* Precharge all banks. */ 3109 writel(RW_MGR_PRECHARGE_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS | 3110 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 3111 3112 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0); 3113 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT1, 3114 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 3115 3116 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1); 3117 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT2, 3118 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 3119 3120 /* Activate rows. */ 3121 writel(RW_MGR_ACTIVATE_0_AND_1, SDR_PHYGRP_RWMGRGRP_ADDRESS | 3122 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 3123 } 3124 } 3125 3126 /** 3127 * mem_init_latency() - Configure memory RLAT and WLAT settings 3128 * 3129 * Configure memory RLAT and WLAT parameters. 3130 */ 3131 static void mem_init_latency(void) 3132 { 3133 /* 3134 * For AV/CV, LFIFO is hardened and always runs at full rate 3135 * so max latency in AFI clocks, used here, is correspondingly 3136 * smaller. 3137 */ 3138 const u32 max_latency = (1 << MAX_LATENCY_COUNT_WIDTH) - 1; 3139 u32 rlat, wlat; 3140 3141 debug("%s:%d\n", __func__, __LINE__); 3142 3143 /* 3144 * Read in write latency. 3145 * WL for Hard PHY does not include additive latency. 3146 */ 3147 wlat = readl(&data_mgr->t_wl_add); 3148 wlat += readl(&data_mgr->mem_t_add); 3149 3150 gbl->rw_wl_nop_cycles = wlat - 1; 3151 3152 /* Read in readl latency. */ 3153 rlat = readl(&data_mgr->t_rl_add); 3154 3155 /* Set a pretty high read latency initially. */ 3156 gbl->curr_read_lat = rlat + 16; 3157 if (gbl->curr_read_lat > max_latency) 3158 gbl->curr_read_lat = max_latency; 3159 3160 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 3161 3162 /* Advertise write latency. */ 3163 writel(wlat, &phy_mgr_cfg->afi_wlat); 3164 } 3165 3166 /* Set VFIFO and LFIFO to instant-on settings in skip calibration mode */ 3167 static void mem_skip_calibrate(void) 3168 { 3169 uint32_t vfifo_offset; 3170 uint32_t i, j, r; 3171 3172 debug("%s:%d\n", __func__, __LINE__); 3173 /* Need to update every shadow register set used by the interface */ 3174 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 3175 r += NUM_RANKS_PER_SHADOW_REG) { 3176 /* 3177 * Set output phase alignment settings appropriate for 3178 * skip calibration. 3179 */ 3180 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3181 scc_mgr_set_dqs_en_phase(i, 0); 3182 #if IO_DLL_CHAIN_LENGTH == 6 3183 scc_mgr_set_dqdqs_output_phase(i, 6); 3184 #else 3185 scc_mgr_set_dqdqs_output_phase(i, 7); 3186 #endif 3187 /* 3188 * Case:33398 3189 * 3190 * Write data arrives to the I/O two cycles before write 3191 * latency is reached (720 deg). 3192 * -> due to bit-slip in a/c bus 3193 * -> to allow board skew where dqs is longer than ck 3194 * -> how often can this happen!? 3195 * -> can claim back some ptaps for high freq 3196 * support if we can relax this, but i digress... 3197 * 3198 * The write_clk leads mem_ck by 90 deg 3199 * The minimum ptap of the OPA is 180 deg 3200 * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay 3201 * The write_clk is always delayed by 2 ptaps 3202 * 3203 * Hence, to make DQS aligned to CK, we need to delay 3204 * DQS by: 3205 * (720 - 90 - 180 - 2 * (360 / IO_DLL_CHAIN_LENGTH)) 3206 * 3207 * Dividing the above by (360 / IO_DLL_CHAIN_LENGTH) 3208 * gives us the number of ptaps, which simplies to: 3209 * 3210 * (1.25 * IO_DLL_CHAIN_LENGTH - 2) 3211 */ 3212 scc_mgr_set_dqdqs_output_phase(i, (1.25 * 3213 IO_DLL_CHAIN_LENGTH - 2)); 3214 } 3215 writel(0xff, &sdr_scc_mgr->dqs_ena); 3216 writel(0xff, &sdr_scc_mgr->dqs_io_ena); 3217 3218 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { 3219 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS | 3220 SCC_MGR_GROUP_COUNTER_OFFSET); 3221 } 3222 writel(0xff, &sdr_scc_mgr->dq_ena); 3223 writel(0xff, &sdr_scc_mgr->dm_ena); 3224 writel(0, &sdr_scc_mgr->update); 3225 } 3226 3227 /* Compensate for simulation model behaviour */ 3228 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3229 scc_mgr_set_dqs_bus_in_delay(i, 10); 3230 scc_mgr_load_dqs(i); 3231 } 3232 writel(0, &sdr_scc_mgr->update); 3233 3234 /* 3235 * ArriaV has hard FIFOs that can only be initialized by incrementing 3236 * in sequencer. 3237 */ 3238 vfifo_offset = CALIB_VFIFO_OFFSET; 3239 for (j = 0; j < vfifo_offset; j++) { 3240 writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy); 3241 } 3242 writel(0, &phy_mgr_cmd->fifo_reset); 3243 3244 /* 3245 * For ACV with hard lfifo, we get the skip-cal setting from 3246 * generation-time constant. 3247 */ 3248 gbl->curr_read_lat = CALIB_LFIFO_OFFSET; 3249 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 3250 } 3251 3252 /* Memory calibration entry point */ 3253 static uint32_t mem_calibrate(void) 3254 { 3255 uint32_t i; 3256 uint32_t rank_bgn, sr; 3257 uint32_t write_group, write_test_bgn; 3258 uint32_t read_group, read_test_bgn; 3259 uint32_t run_groups, current_run; 3260 uint32_t failing_groups = 0; 3261 uint32_t group_failed = 0; 3262 uint32_t sr_failed = 0; 3263 3264 debug("%s:%d\n", __func__, __LINE__); 3265 3266 /* Initialize the data settings */ 3267 gbl->error_substage = CAL_SUBSTAGE_NIL; 3268 gbl->error_stage = CAL_STAGE_NIL; 3269 gbl->error_group = 0xff; 3270 gbl->fom_in = 0; 3271 gbl->fom_out = 0; 3272 3273 /* Initialize WLAT and RLAT. */ 3274 mem_init_latency(); 3275 3276 /* Initialize bit slips. */ 3277 mem_precharge_and_activate(); 3278 3279 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3280 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS | 3281 SCC_MGR_GROUP_COUNTER_OFFSET); 3282 /* Only needed once to set all groups, pins, DQ, DQS, DM. */ 3283 if (i == 0) 3284 scc_mgr_set_hhp_extras(); 3285 3286 scc_set_bypass_mode(i); 3287 } 3288 3289 if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) { 3290 /* 3291 * Set VFIFO and LFIFO to instant-on settings in skip 3292 * calibration mode. 3293 */ 3294 mem_skip_calibrate(); 3295 } else { 3296 for (i = 0; i < NUM_CALIB_REPEAT; i++) { 3297 /* 3298 * Zero all delay chain/phase settings for all 3299 * groups and all shadow register sets. 3300 */ 3301 scc_mgr_zero_all(); 3302 3303 run_groups = ~param->skip_groups; 3304 3305 for (write_group = 0, write_test_bgn = 0; write_group 3306 < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; write_group++, 3307 write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) { 3308 /* Initialized the group failure */ 3309 group_failed = 0; 3310 3311 current_run = run_groups & ((1 << 3312 RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1); 3313 run_groups = run_groups >> 3314 RW_MGR_NUM_DQS_PER_WRITE_GROUP; 3315 3316 if (current_run == 0) 3317 continue; 3318 3319 writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS | 3320 SCC_MGR_GROUP_COUNTER_OFFSET); 3321 scc_mgr_zero_group(write_group, 0); 3322 3323 for (read_group = write_group * 3324 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3325 RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3326 read_test_bgn = 0; 3327 read_group < (write_group + 1) * 3328 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3329 RW_MGR_MEM_IF_WRITE_DQS_WIDTH && 3330 group_failed == 0; 3331 read_group++, read_test_bgn += 3332 RW_MGR_MEM_DQ_PER_READ_DQS) { 3333 /* Calibrate the VFIFO */ 3334 if (!((STATIC_CALIB_STEPS) & 3335 CALIB_SKIP_VFIFO)) { 3336 if (!rw_mgr_mem_calibrate_vfifo 3337 (read_group, 3338 read_test_bgn)) { 3339 group_failed = 1; 3340 3341 if (!(gbl-> 3342 phy_debug_mode_flags & 3343 PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3344 return 0; 3345 } 3346 } 3347 } 3348 } 3349 3350 /* Calibrate the output side */ 3351 if (group_failed == 0) { 3352 for (rank_bgn = 0, sr = 0; rank_bgn 3353 < RW_MGR_MEM_NUMBER_OF_RANKS; 3354 rank_bgn += 3355 NUM_RANKS_PER_SHADOW_REG, 3356 ++sr) { 3357 sr_failed = 0; 3358 if (!((STATIC_CALIB_STEPS) & 3359 CALIB_SKIP_WRITES)) { 3360 if ((STATIC_CALIB_STEPS) 3361 & CALIB_SKIP_DELAY_SWEEPS) { 3362 /* not needed in quick mode! */ 3363 } else { 3364 /* 3365 * Determine if this set of 3366 * ranks should be skipped 3367 * entirely. 3368 */ 3369 if (!param->skip_shadow_regs[sr]) { 3370 if (!rw_mgr_mem_calibrate_writes 3371 (rank_bgn, write_group, 3372 write_test_bgn)) { 3373 sr_failed = 1; 3374 if (!(gbl-> 3375 phy_debug_mode_flags & 3376 PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3377 return 0; 3378 } 3379 } 3380 } 3381 } 3382 } 3383 if (sr_failed != 0) 3384 group_failed = 1; 3385 } 3386 } 3387 3388 if (group_failed == 0) { 3389 for (read_group = write_group * 3390 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3391 RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3392 read_test_bgn = 0; 3393 read_group < (write_group + 1) 3394 * RW_MGR_MEM_IF_READ_DQS_WIDTH 3395 / RW_MGR_MEM_IF_WRITE_DQS_WIDTH && 3396 group_failed == 0; 3397 read_group++, read_test_bgn += 3398 RW_MGR_MEM_DQ_PER_READ_DQS) { 3399 if (!((STATIC_CALIB_STEPS) & 3400 CALIB_SKIP_WRITES)) { 3401 if (!rw_mgr_mem_calibrate_vfifo_end 3402 (read_group, read_test_bgn)) { 3403 group_failed = 1; 3404 3405 if (!(gbl->phy_debug_mode_flags 3406 & PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3407 return 0; 3408 } 3409 } 3410 } 3411 } 3412 } 3413 3414 if (group_failed != 0) 3415 failing_groups++; 3416 } 3417 3418 /* 3419 * USER If there are any failing groups then report 3420 * the failure. 3421 */ 3422 if (failing_groups != 0) 3423 return 0; 3424 3425 /* Calibrate the LFIFO */ 3426 if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_LFIFO)) { 3427 /* 3428 * If we're skipping groups as part of debug, 3429 * don't calibrate LFIFO. 3430 */ 3431 if (param->skip_groups == 0) { 3432 if (!rw_mgr_mem_calibrate_lfifo()) 3433 return 0; 3434 } 3435 } 3436 } 3437 } 3438 3439 /* 3440 * Do not remove this line as it makes sure all of our decisions 3441 * have been applied. 3442 */ 3443 writel(0, &sdr_scc_mgr->update); 3444 return 1; 3445 } 3446 3447 /** 3448 * run_mem_calibrate() - Perform memory calibration 3449 * 3450 * This function triggers the entire memory calibration procedure. 3451 */ 3452 static int run_mem_calibrate(void) 3453 { 3454 int pass; 3455 3456 debug("%s:%d\n", __func__, __LINE__); 3457 3458 /* Reset pass/fail status shown on afi_cal_success/fail */ 3459 writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status); 3460 3461 /* Stop tracking manager. */ 3462 clrbits_le32(&sdr_ctrl->ctrl_cfg, 1 << 22); 3463 3464 phy_mgr_initialize(); 3465 rw_mgr_mem_initialize(); 3466 3467 /* Perform the actual memory calibration. */ 3468 pass = mem_calibrate(); 3469 3470 mem_precharge_and_activate(); 3471 writel(0, &phy_mgr_cmd->fifo_reset); 3472 3473 /* Handoff. */ 3474 rw_mgr_mem_handoff(); 3475 /* 3476 * In Hard PHY this is a 2-bit control: 3477 * 0: AFI Mux Select 3478 * 1: DDIO Mux Select 3479 */ 3480 writel(0x2, &phy_mgr_cfg->mux_sel); 3481 3482 /* Start tracking manager. */ 3483 setbits_le32(&sdr_ctrl->ctrl_cfg, 1 << 22); 3484 3485 return pass; 3486 } 3487 3488 /** 3489 * debug_mem_calibrate() - Report result of memory calibration 3490 * @pass: Value indicating whether calibration passed or failed 3491 * 3492 * This function reports the results of the memory calibration 3493 * and writes debug information into the register file. 3494 */ 3495 static void debug_mem_calibrate(int pass) 3496 { 3497 uint32_t debug_info; 3498 3499 if (pass) { 3500 printf("%s: CALIBRATION PASSED\n", __FILE__); 3501 3502 gbl->fom_in /= 2; 3503 gbl->fom_out /= 2; 3504 3505 if (gbl->fom_in > 0xff) 3506 gbl->fom_in = 0xff; 3507 3508 if (gbl->fom_out > 0xff) 3509 gbl->fom_out = 0xff; 3510 3511 /* Update the FOM in the register file */ 3512 debug_info = gbl->fom_in; 3513 debug_info |= gbl->fom_out << 8; 3514 writel(debug_info, &sdr_reg_file->fom); 3515 3516 writel(debug_info, &phy_mgr_cfg->cal_debug_info); 3517 writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status); 3518 } else { 3519 printf("%s: CALIBRATION FAILED\n", __FILE__); 3520 3521 debug_info = gbl->error_stage; 3522 debug_info |= gbl->error_substage << 8; 3523 debug_info |= gbl->error_group << 16; 3524 3525 writel(debug_info, &sdr_reg_file->failing_stage); 3526 writel(debug_info, &phy_mgr_cfg->cal_debug_info); 3527 writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status); 3528 3529 /* Update the failing group/stage in the register file */ 3530 debug_info = gbl->error_stage; 3531 debug_info |= gbl->error_substage << 8; 3532 debug_info |= gbl->error_group << 16; 3533 writel(debug_info, &sdr_reg_file->failing_stage); 3534 } 3535 3536 printf("%s: Calibration complete\n", __FILE__); 3537 } 3538 3539 /** 3540 * hc_initialize_rom_data() - Initialize ROM data 3541 * 3542 * Initialize ROM data. 3543 */ 3544 static void hc_initialize_rom_data(void) 3545 { 3546 u32 i, addr; 3547 3548 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET; 3549 for (i = 0; i < ARRAY_SIZE(inst_rom_init); i++) 3550 writel(inst_rom_init[i], addr + (i << 2)); 3551 3552 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET; 3553 for (i = 0; i < ARRAY_SIZE(ac_rom_init); i++) 3554 writel(ac_rom_init[i], addr + (i << 2)); 3555 } 3556 3557 /** 3558 * initialize_reg_file() - Initialize SDR register file 3559 * 3560 * Initialize SDR register file. 3561 */ 3562 static void initialize_reg_file(void) 3563 { 3564 /* Initialize the register file with the correct data */ 3565 writel(REG_FILE_INIT_SEQ_SIGNATURE, &sdr_reg_file->signature); 3566 writel(0, &sdr_reg_file->debug_data_addr); 3567 writel(0, &sdr_reg_file->cur_stage); 3568 writel(0, &sdr_reg_file->fom); 3569 writel(0, &sdr_reg_file->failing_stage); 3570 writel(0, &sdr_reg_file->debug1); 3571 writel(0, &sdr_reg_file->debug2); 3572 } 3573 3574 /** 3575 * initialize_hps_phy() - Initialize HPS PHY 3576 * 3577 * Initialize HPS PHY. 3578 */ 3579 static void initialize_hps_phy(void) 3580 { 3581 uint32_t reg; 3582 /* 3583 * Tracking also gets configured here because it's in the 3584 * same register. 3585 */ 3586 uint32_t trk_sample_count = 7500; 3587 uint32_t trk_long_idle_sample_count = (10 << 16) | 100; 3588 /* 3589 * Format is number of outer loops in the 16 MSB, sample 3590 * count in 16 LSB. 3591 */ 3592 3593 reg = 0; 3594 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2); 3595 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1); 3596 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1); 3597 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1); 3598 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0); 3599 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1); 3600 /* 3601 * This field selects the intrinsic latency to RDATA_EN/FULL path. 3602 * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles. 3603 */ 3604 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0); 3605 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET( 3606 trk_sample_count); 3607 writel(reg, &sdr_ctrl->phy_ctrl0); 3608 3609 reg = 0; 3610 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET( 3611 trk_sample_count >> 3612 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH); 3613 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET( 3614 trk_long_idle_sample_count); 3615 writel(reg, &sdr_ctrl->phy_ctrl1); 3616 3617 reg = 0; 3618 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET( 3619 trk_long_idle_sample_count >> 3620 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH); 3621 writel(reg, &sdr_ctrl->phy_ctrl2); 3622 } 3623 3624 /** 3625 * initialize_tracking() - Initialize tracking 3626 * 3627 * Initialize the register file with usable initial data. 3628 */ 3629 static void initialize_tracking(void) 3630 { 3631 /* 3632 * Initialize the register file with the correct data. 3633 * Compute usable version of value in case we skip full 3634 * computation later. 3635 */ 3636 writel(DIV_ROUND_UP(IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP) - 1, 3637 &sdr_reg_file->dtaps_per_ptap); 3638 3639 /* trk_sample_count */ 3640 writel(7500, &sdr_reg_file->trk_sample_count); 3641 3642 /* longidle outer loop [15:0] */ 3643 writel((10 << 16) | (100 << 0), &sdr_reg_file->trk_longidle); 3644 3645 /* 3646 * longidle sample count [31:24] 3647 * trfc, worst case of 933Mhz 4Gb [23:16] 3648 * trcd, worst case [15:8] 3649 * vfifo wait [7:0] 3650 */ 3651 writel((243 << 24) | (14 << 16) | (10 << 8) | (4 << 0), 3652 &sdr_reg_file->delays); 3653 3654 /* mux delay */ 3655 writel((RW_MGR_IDLE << 24) | (RW_MGR_ACTIVATE_1 << 16) | 3656 (RW_MGR_SGLE_READ << 8) | (RW_MGR_PRECHARGE_ALL << 0), 3657 &sdr_reg_file->trk_rw_mgr_addr); 3658 3659 writel(RW_MGR_MEM_IF_READ_DQS_WIDTH, 3660 &sdr_reg_file->trk_read_dqs_width); 3661 3662 /* trefi [7:0] */ 3663 writel((RW_MGR_REFRESH_ALL << 24) | (1000 << 0), 3664 &sdr_reg_file->trk_rfsh); 3665 } 3666 3667 int sdram_calibration_full(void) 3668 { 3669 struct param_type my_param; 3670 struct gbl_type my_gbl; 3671 uint32_t pass; 3672 3673 memset(&my_param, 0, sizeof(my_param)); 3674 memset(&my_gbl, 0, sizeof(my_gbl)); 3675 3676 param = &my_param; 3677 gbl = &my_gbl; 3678 3679 /* Set the calibration enabled by default */ 3680 gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT; 3681 /* 3682 * Only sweep all groups (regardless of fail state) by default 3683 * Set enabled read test by default. 3684 */ 3685 #if DISABLE_GUARANTEED_READ 3686 gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ; 3687 #endif 3688 /* Initialize the register file */ 3689 initialize_reg_file(); 3690 3691 /* Initialize any PHY CSR */ 3692 initialize_hps_phy(); 3693 3694 scc_mgr_initialize(); 3695 3696 initialize_tracking(); 3697 3698 printf("%s: Preparing to start memory calibration\n", __FILE__); 3699 3700 debug("%s:%d\n", __func__, __LINE__); 3701 debug_cond(DLEVEL == 1, 3702 "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ", 3703 RW_MGR_MEM_NUMBER_OF_RANKS, RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM, 3704 RW_MGR_MEM_DQ_PER_READ_DQS, RW_MGR_MEM_DQ_PER_WRITE_DQS, 3705 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS, 3706 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS); 3707 debug_cond(DLEVEL == 1, 3708 "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ", 3709 RW_MGR_MEM_IF_READ_DQS_WIDTH, RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3710 RW_MGR_MEM_DATA_WIDTH, RW_MGR_MEM_DATA_MASK_WIDTH, 3711 IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP); 3712 debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u", 3713 IO_DELAY_PER_DQS_EN_DCHAIN_TAP, IO_DLL_CHAIN_LENGTH); 3714 debug_cond(DLEVEL == 1, "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ", 3715 IO_DQS_EN_PHASE_MAX, IO_DQDQS_OUT_PHASE_MAX, 3716 IO_DQS_EN_DELAY_MAX, IO_DQS_IN_DELAY_MAX); 3717 debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ", 3718 IO_IO_IN_DELAY_MAX, IO_IO_OUT1_DELAY_MAX, 3719 IO_IO_OUT2_DELAY_MAX); 3720 debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n", 3721 IO_DQS_IN_RESERVE, IO_DQS_OUT_RESERVE); 3722 3723 hc_initialize_rom_data(); 3724 3725 /* update info for sims */ 3726 reg_file_set_stage(CAL_STAGE_NIL); 3727 reg_file_set_group(0); 3728 3729 /* 3730 * Load global needed for those actions that require 3731 * some dynamic calibration support. 3732 */ 3733 dyn_calib_steps = STATIC_CALIB_STEPS; 3734 /* 3735 * Load global to allow dynamic selection of delay loop settings 3736 * based on calibration mode. 3737 */ 3738 if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS)) 3739 skip_delay_mask = 0xff; 3740 else 3741 skip_delay_mask = 0x0; 3742 3743 pass = run_mem_calibrate(); 3744 debug_mem_calibrate(pass); 3745 return pass; 3746 } 3747