1 /* 2 * Copyright Altera Corporation (C) 2012-2015 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7 #include <common.h> 8 #include <asm/io.h> 9 #include <asm/arch/sdram.h> 10 #include "sequencer.h" 11 #include "sequencer_auto.h" 12 #include "sequencer_auto_ac_init.h" 13 #include "sequencer_auto_inst_init.h" 14 #include "sequencer_defines.h" 15 16 static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs = 17 (struct socfpga_sdr_rw_load_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800); 18 19 static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs = 20 (struct socfpga_sdr_rw_load_jump_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00); 21 22 static struct socfpga_sdr_reg_file *sdr_reg_file = 23 (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS; 24 25 static struct socfpga_sdr_scc_mgr *sdr_scc_mgr = 26 (struct socfpga_sdr_scc_mgr *)(SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00); 27 28 static struct socfpga_phy_mgr_cmd *phy_mgr_cmd = 29 (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS; 30 31 static struct socfpga_phy_mgr_cfg *phy_mgr_cfg = 32 (struct socfpga_phy_mgr_cfg *)(SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40); 33 34 static struct socfpga_data_mgr *data_mgr = 35 (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS; 36 37 static struct socfpga_sdr_ctrl *sdr_ctrl = 38 (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS; 39 40 #define DELTA_D 1 41 42 /* 43 * In order to reduce ROM size, most of the selectable calibration steps are 44 * decided at compile time based on the user's calibration mode selection, 45 * as captured by the STATIC_CALIB_STEPS selection below. 46 * 47 * However, to support simulation-time selection of fast simulation mode, where 48 * we skip everything except the bare minimum, we need a few of the steps to 49 * be dynamic. In those cases, we either use the DYNAMIC_CALIB_STEPS for the 50 * check, which is based on the rtl-supplied value, or we dynamically compute 51 * the value to use based on the dynamically-chosen calibration mode 52 */ 53 54 #define DLEVEL 0 55 #define STATIC_IN_RTL_SIM 0 56 #define STATIC_SKIP_DELAY_LOOPS 0 57 58 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \ 59 STATIC_SKIP_DELAY_LOOPS) 60 61 /* calibration steps requested by the rtl */ 62 uint16_t dyn_calib_steps; 63 64 /* 65 * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option 66 * instead of static, we use boolean logic to select between 67 * non-skip and skip values 68 * 69 * The mask is set to include all bits when not-skipping, but is 70 * zero when skipping 71 */ 72 73 uint16_t skip_delay_mask; /* mask off bits when skipping/not-skipping */ 74 75 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \ 76 ((non_skip_value) & skip_delay_mask) 77 78 struct gbl_type *gbl; 79 struct param_type *param; 80 uint32_t curr_shadow_reg; 81 82 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, 83 uint32_t write_group, uint32_t use_dm, 84 uint32_t all_correct, uint32_t *bit_chk, uint32_t all_ranks); 85 86 static void set_failing_group_stage(uint32_t group, uint32_t stage, 87 uint32_t substage) 88 { 89 /* 90 * Only set the global stage if there was not been any other 91 * failing group 92 */ 93 if (gbl->error_stage == CAL_STAGE_NIL) { 94 gbl->error_substage = substage; 95 gbl->error_stage = stage; 96 gbl->error_group = group; 97 } 98 } 99 100 static void reg_file_set_group(u16 set_group) 101 { 102 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16); 103 } 104 105 static void reg_file_set_stage(u8 set_stage) 106 { 107 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff); 108 } 109 110 static void reg_file_set_sub_stage(u8 set_sub_stage) 111 { 112 set_sub_stage &= 0xff; 113 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8); 114 } 115 116 static void initialize(void) 117 { 118 debug("%s:%d\n", __func__, __LINE__); 119 /* USER calibration has control over path to memory */ 120 /* 121 * In Hard PHY this is a 2-bit control: 122 * 0: AFI Mux Select 123 * 1: DDIO Mux Select 124 */ 125 writel(0x3, &phy_mgr_cfg->mux_sel); 126 127 /* USER memory clock is not stable we begin initialization */ 128 writel(0, &phy_mgr_cfg->reset_mem_stbl); 129 130 /* USER calibration status all set to zero */ 131 writel(0, &phy_mgr_cfg->cal_status); 132 133 writel(0, &phy_mgr_cfg->cal_debug_info); 134 135 if ((dyn_calib_steps & CALIB_SKIP_ALL) != CALIB_SKIP_ALL) { 136 param->read_correct_mask_vg = ((uint32_t)1 << 137 (RW_MGR_MEM_DQ_PER_READ_DQS / 138 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1; 139 param->write_correct_mask_vg = ((uint32_t)1 << 140 (RW_MGR_MEM_DQ_PER_READ_DQS / 141 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1; 142 param->read_correct_mask = ((uint32_t)1 << 143 RW_MGR_MEM_DQ_PER_READ_DQS) - 1; 144 param->write_correct_mask = ((uint32_t)1 << 145 RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1; 146 param->dm_correct_mask = ((uint32_t)1 << 147 (RW_MGR_MEM_DATA_WIDTH / RW_MGR_MEM_DATA_MASK_WIDTH)) 148 - 1; 149 } 150 } 151 152 static void set_rank_and_odt_mask(uint32_t rank, uint32_t odt_mode) 153 { 154 uint32_t odt_mask_0 = 0; 155 uint32_t odt_mask_1 = 0; 156 uint32_t cs_and_odt_mask; 157 158 if (odt_mode == RW_MGR_ODT_MODE_READ_WRITE) { 159 if (RW_MGR_MEM_NUMBER_OF_RANKS == 1) { 160 /* 161 * 1 Rank 162 * Read: ODT = 0 163 * Write: ODT = 1 164 */ 165 odt_mask_0 = 0x0; 166 odt_mask_1 = 0x1; 167 } else if (RW_MGR_MEM_NUMBER_OF_RANKS == 2) { 168 /* 2 Ranks */ 169 if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) { 170 /* - Dual-Slot , Single-Rank 171 * (1 chip-select per DIMM) 172 * OR 173 * - RDIMM, 4 total CS (2 CS per DIMM) 174 * means 2 DIMM 175 * Since MEM_NUMBER_OF_RANKS is 2 they are 176 * both single rank 177 * with 2 CS each (special for RDIMM) 178 * Read: Turn on ODT on the opposite rank 179 * Write: Turn on ODT on all ranks 180 */ 181 odt_mask_0 = 0x3 & ~(1 << rank); 182 odt_mask_1 = 0x3; 183 } else { 184 /* 185 * USER - Single-Slot , Dual-rank DIMMs 186 * (2 chip-selects per DIMM) 187 * USER Read: Turn on ODT off on all ranks 188 * USER Write: Turn on ODT on active rank 189 */ 190 odt_mask_0 = 0x0; 191 odt_mask_1 = 0x3 & (1 << rank); 192 } 193 } else { 194 /* 4 Ranks 195 * Read: 196 * ----------+-----------------------+ 197 * | | 198 * | ODT | 199 * Read From +-----------------------+ 200 * Rank | 3 | 2 | 1 | 0 | 201 * ----------+-----+-----+-----+-----+ 202 * 0 | 0 | 1 | 0 | 0 | 203 * 1 | 1 | 0 | 0 | 0 | 204 * 2 | 0 | 0 | 0 | 1 | 205 * 3 | 0 | 0 | 1 | 0 | 206 * ----------+-----+-----+-----+-----+ 207 * 208 * Write: 209 * ----------+-----------------------+ 210 * | | 211 * | ODT | 212 * Write To +-----------------------+ 213 * Rank | 3 | 2 | 1 | 0 | 214 * ----------+-----+-----+-----+-----+ 215 * 0 | 0 | 1 | 0 | 1 | 216 * 1 | 1 | 0 | 1 | 0 | 217 * 2 | 0 | 1 | 0 | 1 | 218 * 3 | 1 | 0 | 1 | 0 | 219 * ----------+-----+-----+-----+-----+ 220 */ 221 switch (rank) { 222 case 0: 223 odt_mask_0 = 0x4; 224 odt_mask_1 = 0x5; 225 break; 226 case 1: 227 odt_mask_0 = 0x8; 228 odt_mask_1 = 0xA; 229 break; 230 case 2: 231 odt_mask_0 = 0x1; 232 odt_mask_1 = 0x5; 233 break; 234 case 3: 235 odt_mask_0 = 0x2; 236 odt_mask_1 = 0xA; 237 break; 238 } 239 } 240 } else { 241 odt_mask_0 = 0x0; 242 odt_mask_1 = 0x0; 243 } 244 245 cs_and_odt_mask = 246 (0xFF & ~(1 << rank)) | 247 ((0xFF & odt_mask_0) << 8) | 248 ((0xFF & odt_mask_1) << 16); 249 writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS | 250 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET); 251 } 252 253 /** 254 * scc_mgr_set() - Set SCC Manager register 255 * @off: Base offset in SCC Manager space 256 * @grp: Read/Write group 257 * @val: Value to be set 258 * 259 * This function sets the SCC Manager (Scan Chain Control Manager) register. 260 */ 261 static void scc_mgr_set(u32 off, u32 grp, u32 val) 262 { 263 writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2)); 264 } 265 266 /** 267 * scc_mgr_initialize() - Initialize SCC Manager registers 268 * 269 * Initialize SCC Manager registers. 270 */ 271 static void scc_mgr_initialize(void) 272 { 273 /* 274 * Clear register file for HPS. 16 (2^4) is the size of the 275 * full register file in the scc mgr: 276 * RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS + 277 * MEM_IF_READ_DQS_WIDTH - 1); 278 */ 279 int i; 280 281 for (i = 0; i < 16; i++) { 282 debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n", 283 __func__, __LINE__, i); 284 scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, 0, i); 285 } 286 } 287 288 static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, uint32_t phase) 289 { 290 scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase); 291 } 292 293 static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay) 294 { 295 scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay); 296 } 297 298 static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase) 299 { 300 scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase); 301 } 302 303 static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay) 304 { 305 scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay); 306 } 307 308 static void scc_mgr_set_dqs_io_in_delay(uint32_t delay) 309 { 310 scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS, 311 delay); 312 } 313 314 static void scc_mgr_set_dq_in_delay(uint32_t dq_in_group, uint32_t delay) 315 { 316 scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay); 317 } 318 319 static void scc_mgr_set_dq_out1_delay(uint32_t dq_in_group, uint32_t delay) 320 { 321 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay); 322 } 323 324 static void scc_mgr_set_dqs_out1_delay(uint32_t delay) 325 { 326 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS, 327 delay); 328 } 329 330 static void scc_mgr_set_dm_out1_delay(uint32_t dm, uint32_t delay) 331 { 332 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, 333 RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + dm, 334 delay); 335 } 336 337 /* load up dqs config settings */ 338 static void scc_mgr_load_dqs(uint32_t dqs) 339 { 340 writel(dqs, &sdr_scc_mgr->dqs_ena); 341 } 342 343 /* load up dqs io config settings */ 344 static void scc_mgr_load_dqs_io(void) 345 { 346 writel(0, &sdr_scc_mgr->dqs_io_ena); 347 } 348 349 /* load up dq config settings */ 350 static void scc_mgr_load_dq(uint32_t dq_in_group) 351 { 352 writel(dq_in_group, &sdr_scc_mgr->dq_ena); 353 } 354 355 /* load up dm config settings */ 356 static void scc_mgr_load_dm(uint32_t dm) 357 { 358 writel(dm, &sdr_scc_mgr->dm_ena); 359 } 360 361 /** 362 * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks 363 * @off: Base offset in SCC Manager space 364 * @grp: Read/Write group 365 * @val: Value to be set 366 * @update: If non-zero, trigger SCC Manager update for all ranks 367 * 368 * This function sets the SCC Manager (Scan Chain Control Manager) register 369 * and optionally triggers the SCC update for all ranks. 370 */ 371 static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val, 372 const int update) 373 { 374 u32 r; 375 376 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 377 r += NUM_RANKS_PER_SHADOW_REG) { 378 scc_mgr_set(off, grp, val); 379 380 if (update || (r == 0)) { 381 writel(grp, &sdr_scc_mgr->dqs_ena); 382 writel(0, &sdr_scc_mgr->update); 383 } 384 } 385 } 386 387 static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase) 388 { 389 /* 390 * USER although the h/w doesn't support different phases per 391 * shadow register, for simplicity our scc manager modeling 392 * keeps different phase settings per shadow reg, and it's 393 * important for us to keep them in sync to match h/w. 394 * for efficiency, the scan chain update should occur only 395 * once to sr0. 396 */ 397 scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_PHASE_OFFSET, 398 read_group, phase, 0); 399 } 400 401 static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group, 402 uint32_t phase) 403 { 404 /* 405 * USER although the h/w doesn't support different phases per 406 * shadow register, for simplicity our scc manager modeling 407 * keeps different phase settings per shadow reg, and it's 408 * important for us to keep them in sync to match h/w. 409 * for efficiency, the scan chain update should occur only 410 * once to sr0. 411 */ 412 scc_mgr_set_all_ranks(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, 413 write_group, phase, 0); 414 } 415 416 static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group, 417 uint32_t delay) 418 { 419 /* 420 * In shadow register mode, the T11 settings are stored in 421 * registers in the core, which are updated by the DQS_ENA 422 * signals. Not issuing the SCC_MGR_UPD command allows us to 423 * save lots of rank switching overhead, by calling 424 * select_shadow_regs_for_update with update_scan_chains 425 * set to 0. 426 */ 427 scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_DELAY_OFFSET, 428 read_group, delay, 1); 429 writel(0, &sdr_scc_mgr->update); 430 } 431 432 /** 433 * scc_mgr_set_oct_out1_delay() - Set OCT output delay 434 * @write_group: Write group 435 * @delay: Delay value 436 * 437 * This function sets the OCT output delay in SCC manager. 438 */ 439 static void scc_mgr_set_oct_out1_delay(const u32 write_group, const u32 delay) 440 { 441 const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH / 442 RW_MGR_MEM_IF_WRITE_DQS_WIDTH; 443 const int base = write_group * ratio; 444 int i; 445 /* 446 * Load the setting in the SCC manager 447 * Although OCT affects only write data, the OCT delay is controlled 448 * by the DQS logic block which is instantiated once per read group. 449 * For protocols where a write group consists of multiple read groups, 450 * the setting must be set multiple times. 451 */ 452 for (i = 0; i < ratio; i++) 453 scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay); 454 } 455 456 /** 457 * scc_mgr_set_hhp_extras() - Set HHP extras. 458 * 459 * Load the fixed setting in the SCC manager HHP extras. 460 */ 461 static void scc_mgr_set_hhp_extras(void) 462 { 463 /* 464 * Load the fixed setting in the SCC manager 465 * bits: 0:0 = 1'b1 - DQS bypass 466 * bits: 1:1 = 1'b1 - DQ bypass 467 * bits: 4:2 = 3'b001 - rfifo_mode 468 * bits: 6:5 = 2'b01 - rfifo clock_select 469 * bits: 7:7 = 1'b0 - separate gating from ungating setting 470 * bits: 8:8 = 1'b0 - separate OE from Output delay setting 471 */ 472 const u32 value = (0 << 8) | (0 << 7) | (1 << 5) | 473 (1 << 2) | (1 << 1) | (1 << 0); 474 const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | 475 SCC_MGR_HHP_GLOBALS_OFFSET | 476 SCC_MGR_HHP_EXTRAS_OFFSET; 477 478 debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n", 479 __func__, __LINE__); 480 writel(value, addr); 481 debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n", 482 __func__, __LINE__); 483 } 484 485 /** 486 * scc_mgr_zero_all() - Zero all DQS config 487 * 488 * Zero all DQS config. 489 */ 490 static void scc_mgr_zero_all(void) 491 { 492 int i, r; 493 494 /* 495 * USER Zero all DQS config settings, across all groups and all 496 * shadow registers 497 */ 498 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 499 r += NUM_RANKS_PER_SHADOW_REG) { 500 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 501 /* 502 * The phases actually don't exist on a per-rank basis, 503 * but there's no harm updating them several times, so 504 * let's keep the code simple. 505 */ 506 scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE); 507 scc_mgr_set_dqs_en_phase(i, 0); 508 scc_mgr_set_dqs_en_delay(i, 0); 509 } 510 511 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { 512 scc_mgr_set_dqdqs_output_phase(i, 0); 513 /* Arria V/Cyclone V don't have out2. */ 514 scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE); 515 } 516 } 517 518 /* Multicast to all DQS group enables. */ 519 writel(0xff, &sdr_scc_mgr->dqs_ena); 520 writel(0, &sdr_scc_mgr->update); 521 } 522 523 /** 524 * scc_set_bypass_mode() - Set bypass mode and trigger SCC update 525 * @write_group: Write group 526 * 527 * Set bypass mode and trigger SCC update. 528 */ 529 static void scc_set_bypass_mode(const u32 write_group) 530 { 531 /* Multicast to all DQ enables. */ 532 writel(0xff, &sdr_scc_mgr->dq_ena); 533 writel(0xff, &sdr_scc_mgr->dm_ena); 534 535 /* Update current DQS IO enable. */ 536 writel(0, &sdr_scc_mgr->dqs_io_ena); 537 538 /* Update the DQS logic. */ 539 writel(write_group, &sdr_scc_mgr->dqs_ena); 540 541 /* Hit update. */ 542 writel(0, &sdr_scc_mgr->update); 543 } 544 545 /** 546 * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group 547 * @write_group: Write group 548 * 549 * Load DQS settings for Write Group, do not trigger SCC update. 550 */ 551 static void scc_mgr_load_dqs_for_write_group(const u32 write_group) 552 { 553 const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH / 554 RW_MGR_MEM_IF_WRITE_DQS_WIDTH; 555 const int base = write_group * ratio; 556 int i; 557 /* 558 * Load the setting in the SCC manager 559 * Although OCT affects only write data, the OCT delay is controlled 560 * by the DQS logic block which is instantiated once per read group. 561 * For protocols where a write group consists of multiple read groups, 562 * the setting must be set multiple times. 563 */ 564 for (i = 0; i < ratio; i++) 565 writel(base + i, &sdr_scc_mgr->dqs_ena); 566 } 567 568 /** 569 * scc_mgr_zero_group() - Zero all configs for a group 570 * 571 * Zero DQ, DM, DQS and OCT configs for a group. 572 */ 573 static void scc_mgr_zero_group(const u32 write_group, const int out_only) 574 { 575 int i, r; 576 577 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 578 r += NUM_RANKS_PER_SHADOW_REG) { 579 /* Zero all DQ config settings. */ 580 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 581 scc_mgr_set_dq_out1_delay(i, 0); 582 if (!out_only) 583 scc_mgr_set_dq_in_delay(i, 0); 584 } 585 586 /* Multicast to all DQ enables. */ 587 writel(0xff, &sdr_scc_mgr->dq_ena); 588 589 /* Zero all DM config settings. */ 590 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) 591 scc_mgr_set_dm_out1_delay(i, 0); 592 593 /* Multicast to all DM enables. */ 594 writel(0xff, &sdr_scc_mgr->dm_ena); 595 596 /* Zero all DQS IO settings. */ 597 if (!out_only) 598 scc_mgr_set_dqs_io_in_delay(0); 599 600 /* Arria V/Cyclone V don't have out2. */ 601 scc_mgr_set_dqs_out1_delay(IO_DQS_OUT_RESERVE); 602 scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE); 603 scc_mgr_load_dqs_for_write_group(write_group); 604 605 /* Multicast to all DQS IO enables (only 1 in total). */ 606 writel(0, &sdr_scc_mgr->dqs_io_ena); 607 608 /* Hit update to zero everything. */ 609 writel(0, &sdr_scc_mgr->update); 610 } 611 } 612 613 /* 614 * apply and load a particular input delay for the DQ pins in a group 615 * group_bgn is the index of the first dq pin (in the write group) 616 */ 617 static void scc_mgr_apply_group_dq_in_delay(uint32_t group_bgn, uint32_t delay) 618 { 619 uint32_t i, p; 620 621 for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { 622 scc_mgr_set_dq_in_delay(p, delay); 623 scc_mgr_load_dq(p); 624 } 625 } 626 627 /** 628 * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the DQ pins in a group 629 * @delay: Delay value 630 * 631 * Apply and load a particular output delay for the DQ pins in a group. 632 */ 633 static void scc_mgr_apply_group_dq_out1_delay(const u32 delay) 634 { 635 int i; 636 637 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 638 scc_mgr_set_dq_out1_delay(i, delay); 639 scc_mgr_load_dq(i); 640 } 641 } 642 643 /* apply and load a particular output delay for the DM pins in a group */ 644 static void scc_mgr_apply_group_dm_out1_delay(uint32_t delay1) 645 { 646 uint32_t i; 647 648 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { 649 scc_mgr_set_dm_out1_delay(i, delay1); 650 scc_mgr_load_dm(i); 651 } 652 } 653 654 655 /* apply and load delay on both DQS and OCT out1 */ 656 static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group, 657 uint32_t delay) 658 { 659 scc_mgr_set_dqs_out1_delay(delay); 660 scc_mgr_load_dqs_io(); 661 662 scc_mgr_set_oct_out1_delay(write_group, delay); 663 scc_mgr_load_dqs_for_write_group(write_group); 664 } 665 666 /** 667 * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side: DQ, DM, DQS, OCT 668 * @write_group: Write group 669 * @delay: Delay value 670 * 671 * Apply a delay to the entire output side: DQ, DM, DQS, OCT. 672 */ 673 static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group, 674 const u32 delay) 675 { 676 u32 i, new_delay; 677 678 /* DQ shift */ 679 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) 680 scc_mgr_load_dq(i); 681 682 /* DM shift */ 683 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) 684 scc_mgr_load_dm(i); 685 686 /* DQS shift */ 687 new_delay = READ_SCC_DQS_IO_OUT2_DELAY + delay; 688 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 689 debug_cond(DLEVEL == 1, 690 "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n", 691 __func__, __LINE__, write_group, delay, new_delay, 692 IO_IO_OUT2_DELAY_MAX, 693 new_delay - IO_IO_OUT2_DELAY_MAX); 694 new_delay -= IO_IO_OUT2_DELAY_MAX; 695 scc_mgr_set_dqs_out1_delay(new_delay); 696 } 697 698 scc_mgr_load_dqs_io(); 699 700 /* OCT shift */ 701 new_delay = READ_SCC_OCT_OUT2_DELAY + delay; 702 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 703 debug_cond(DLEVEL == 1, 704 "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n", 705 __func__, __LINE__, write_group, delay, 706 new_delay, IO_IO_OUT2_DELAY_MAX, 707 new_delay - IO_IO_OUT2_DELAY_MAX); 708 new_delay -= IO_IO_OUT2_DELAY_MAX; 709 scc_mgr_set_oct_out1_delay(write_group, new_delay); 710 } 711 712 scc_mgr_load_dqs_for_write_group(write_group); 713 } 714 715 /** 716 * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side to all ranks 717 * @write_group: Write group 718 * @delay: Delay value 719 * 720 * Apply a delay to the entire output side (DQ, DM, DQS, OCT) to all ranks. 721 */ 722 static void 723 scc_mgr_apply_group_all_out_delay_add_all_ranks(const u32 write_group, 724 const u32 delay) 725 { 726 int r; 727 728 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 729 r += NUM_RANKS_PER_SHADOW_REG) { 730 scc_mgr_apply_group_all_out_delay_add(write_group, delay); 731 writel(0, &sdr_scc_mgr->update); 732 } 733 } 734 735 /* optimization used to recover some slots in ddr3 inst_rom */ 736 /* could be applied to other protocols if we wanted to */ 737 static void set_jump_as_return(void) 738 { 739 /* 740 * to save space, we replace return with jump to special shared 741 * RETURN instruction so we set the counter to large value so that 742 * we always jump 743 */ 744 writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0); 745 writel(RW_MGR_RETURN, &sdr_rw_load_jump_mgr_regs->load_jump_add0); 746 } 747 748 /* 749 * should always use constants as argument to ensure all computations are 750 * performed at compile time 751 */ 752 static void delay_for_n_mem_clocks(const uint32_t clocks) 753 { 754 uint32_t afi_clocks; 755 uint8_t inner = 0; 756 uint8_t outer = 0; 757 uint16_t c_loop = 0; 758 759 debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks); 760 761 762 afi_clocks = (clocks + AFI_RATE_RATIO-1) / AFI_RATE_RATIO; 763 /* scale (rounding up) to get afi clocks */ 764 765 /* 766 * Note, we don't bother accounting for being off a little bit 767 * because of a few extra instructions in outer loops 768 * Note, the loops have a test at the end, and do the test before 769 * the decrement, and so always perform the loop 770 * 1 time more than the counter value 771 */ 772 if (afi_clocks == 0) { 773 ; 774 } else if (afi_clocks <= 0x100) { 775 inner = afi_clocks-1; 776 outer = 0; 777 c_loop = 0; 778 } else if (afi_clocks <= 0x10000) { 779 inner = 0xff; 780 outer = (afi_clocks-1) >> 8; 781 c_loop = 0; 782 } else { 783 inner = 0xff; 784 outer = 0xff; 785 c_loop = (afi_clocks-1) >> 16; 786 } 787 788 /* 789 * rom instructions are structured as follows: 790 * 791 * IDLE_LOOP2: jnz cntr0, TARGET_A 792 * IDLE_LOOP1: jnz cntr1, TARGET_B 793 * return 794 * 795 * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and 796 * TARGET_B is set to IDLE_LOOP2 as well 797 * 798 * if we have no outer loop, though, then we can use IDLE_LOOP1 only, 799 * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely 800 * 801 * a little confusing, but it helps save precious space in the inst_rom 802 * and sequencer rom and keeps the delays more accurate and reduces 803 * overhead 804 */ 805 if (afi_clocks <= 0x100) { 806 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), 807 &sdr_rw_load_mgr_regs->load_cntr1); 808 809 writel(RW_MGR_IDLE_LOOP1, 810 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 811 812 writel(RW_MGR_IDLE_LOOP1, SDR_PHYGRP_RWMGRGRP_ADDRESS | 813 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 814 } else { 815 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), 816 &sdr_rw_load_mgr_regs->load_cntr0); 817 818 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer), 819 &sdr_rw_load_mgr_regs->load_cntr1); 820 821 writel(RW_MGR_IDLE_LOOP2, 822 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 823 824 writel(RW_MGR_IDLE_LOOP2, 825 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 826 827 /* hack to get around compiler not being smart enough */ 828 if (afi_clocks <= 0x10000) { 829 /* only need to run once */ 830 writel(RW_MGR_IDLE_LOOP2, SDR_PHYGRP_RWMGRGRP_ADDRESS | 831 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 832 } else { 833 do { 834 writel(RW_MGR_IDLE_LOOP2, 835 SDR_PHYGRP_RWMGRGRP_ADDRESS | 836 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 837 } while (c_loop-- != 0); 838 } 839 } 840 debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks); 841 } 842 843 static void rw_mgr_mem_initialize(void) 844 { 845 uint32_t r; 846 uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS | 847 RW_MGR_RUN_SINGLE_GROUP_OFFSET; 848 849 debug("%s:%d\n", __func__, __LINE__); 850 851 /* The reset / cke part of initialization is broadcasted to all ranks */ 852 writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS | 853 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET); 854 855 /* 856 * Here's how you load register for a loop 857 * Counters are located @ 0x800 858 * Jump address are located @ 0xC00 859 * For both, registers 0 to 3 are selected using bits 3 and 2, like 860 * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C 861 * I know this ain't pretty, but Avalon bus throws away the 2 least 862 * significant bits 863 */ 864 865 /* start with memory RESET activated */ 866 867 /* tINIT = 200us */ 868 869 /* 870 * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles 871 * If a and b are the number of iteration in 2 nested loops 872 * it takes the following number of cycles to complete the operation: 873 * number_of_cycles = ((2 + n) * a + 2) * b 874 * where n is the number of instruction in the inner loop 875 * One possible solution is n = 0 , a = 256 , b = 106 => a = FF, 876 * b = 6A 877 */ 878 879 /* Load counters */ 880 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR0_VAL), 881 &sdr_rw_load_mgr_regs->load_cntr0); 882 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR1_VAL), 883 &sdr_rw_load_mgr_regs->load_cntr1); 884 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR2_VAL), 885 &sdr_rw_load_mgr_regs->load_cntr2); 886 887 /* Load jump address */ 888 writel(RW_MGR_INIT_RESET_0_CKE_0, 889 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 890 writel(RW_MGR_INIT_RESET_0_CKE_0, 891 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 892 writel(RW_MGR_INIT_RESET_0_CKE_0, 893 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 894 895 /* Execute count instruction */ 896 writel(RW_MGR_INIT_RESET_0_CKE_0, grpaddr); 897 898 /* indicate that memory is stable */ 899 writel(1, &phy_mgr_cfg->reset_mem_stbl); 900 901 /* 902 * transition the RESET to high 903 * Wait for 500us 904 */ 905 906 /* 907 * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles 908 * If a and b are the number of iteration in 2 nested loops 909 * it takes the following number of cycles to complete the operation 910 * number_of_cycles = ((2 + n) * a + 2) * b 911 * where n is the number of instruction in the inner loop 912 * One possible solution is n = 2 , a = 131 , b = 256 => a = 83, 913 * b = FF 914 */ 915 916 /* Load counters */ 917 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR0_VAL), 918 &sdr_rw_load_mgr_regs->load_cntr0); 919 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR1_VAL), 920 &sdr_rw_load_mgr_regs->load_cntr1); 921 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR2_VAL), 922 &sdr_rw_load_mgr_regs->load_cntr2); 923 924 /* Load jump address */ 925 writel(RW_MGR_INIT_RESET_1_CKE_0, 926 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 927 writel(RW_MGR_INIT_RESET_1_CKE_0, 928 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 929 writel(RW_MGR_INIT_RESET_1_CKE_0, 930 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 931 932 writel(RW_MGR_INIT_RESET_1_CKE_0, grpaddr); 933 934 /* bring up clock enable */ 935 936 /* tXRP < 250 ck cycles */ 937 delay_for_n_mem_clocks(250); 938 939 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { 940 if (param->skip_ranks[r]) { 941 /* request to skip the rank */ 942 continue; 943 } 944 945 /* set rank */ 946 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 947 948 /* 949 * USER Use Mirror-ed commands for odd ranks if address 950 * mirrorring is on 951 */ 952 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) { 953 set_jump_as_return(); 954 writel(RW_MGR_MRS2_MIRR, grpaddr); 955 delay_for_n_mem_clocks(4); 956 set_jump_as_return(); 957 writel(RW_MGR_MRS3_MIRR, grpaddr); 958 delay_for_n_mem_clocks(4); 959 set_jump_as_return(); 960 writel(RW_MGR_MRS1_MIRR, grpaddr); 961 delay_for_n_mem_clocks(4); 962 set_jump_as_return(); 963 writel(RW_MGR_MRS0_DLL_RESET_MIRR, grpaddr); 964 } else { 965 set_jump_as_return(); 966 writel(RW_MGR_MRS2, grpaddr); 967 delay_for_n_mem_clocks(4); 968 set_jump_as_return(); 969 writel(RW_MGR_MRS3, grpaddr); 970 delay_for_n_mem_clocks(4); 971 set_jump_as_return(); 972 writel(RW_MGR_MRS1, grpaddr); 973 set_jump_as_return(); 974 writel(RW_MGR_MRS0_DLL_RESET, grpaddr); 975 } 976 set_jump_as_return(); 977 writel(RW_MGR_ZQCL, grpaddr); 978 979 /* tZQinit = tDLLK = 512 ck cycles */ 980 delay_for_n_mem_clocks(512); 981 } 982 } 983 984 /* 985 * At the end of calibration we have to program the user settings in, and 986 * USER hand off the memory to the user. 987 */ 988 static void rw_mgr_mem_handoff(void) 989 { 990 uint32_t r; 991 uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS | 992 RW_MGR_RUN_SINGLE_GROUP_OFFSET; 993 994 debug("%s:%d\n", __func__, __LINE__); 995 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { 996 if (param->skip_ranks[r]) 997 /* request to skip the rank */ 998 continue; 999 /* set rank */ 1000 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 1001 1002 /* precharge all banks ... */ 1003 writel(RW_MGR_PRECHARGE_ALL, grpaddr); 1004 1005 /* load up MR settings specified by user */ 1006 1007 /* 1008 * Use Mirror-ed commands for odd ranks if address 1009 * mirrorring is on 1010 */ 1011 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) { 1012 set_jump_as_return(); 1013 writel(RW_MGR_MRS2_MIRR, grpaddr); 1014 delay_for_n_mem_clocks(4); 1015 set_jump_as_return(); 1016 writel(RW_MGR_MRS3_MIRR, grpaddr); 1017 delay_for_n_mem_clocks(4); 1018 set_jump_as_return(); 1019 writel(RW_MGR_MRS1_MIRR, grpaddr); 1020 delay_for_n_mem_clocks(4); 1021 set_jump_as_return(); 1022 writel(RW_MGR_MRS0_USER_MIRR, grpaddr); 1023 } else { 1024 set_jump_as_return(); 1025 writel(RW_MGR_MRS2, grpaddr); 1026 delay_for_n_mem_clocks(4); 1027 set_jump_as_return(); 1028 writel(RW_MGR_MRS3, grpaddr); 1029 delay_for_n_mem_clocks(4); 1030 set_jump_as_return(); 1031 writel(RW_MGR_MRS1, grpaddr); 1032 delay_for_n_mem_clocks(4); 1033 set_jump_as_return(); 1034 writel(RW_MGR_MRS0_USER, grpaddr); 1035 } 1036 /* 1037 * USER need to wait tMOD (12CK or 15ns) time before issuing 1038 * other commands, but we will have plenty of NIOS cycles before 1039 * actual handoff so its okay. 1040 */ 1041 } 1042 } 1043 1044 /* 1045 * performs a guaranteed read on the patterns we are going to use during a 1046 * read test to ensure memory works 1047 */ 1048 static uint32_t rw_mgr_mem_calibrate_read_test_patterns(uint32_t rank_bgn, 1049 uint32_t group, uint32_t num_tries, uint32_t *bit_chk, 1050 uint32_t all_ranks) 1051 { 1052 uint32_t r, vg; 1053 uint32_t correct_mask_vg; 1054 uint32_t tmp_bit_chk; 1055 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1056 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1057 uint32_t addr; 1058 uint32_t base_rw_mgr; 1059 1060 *bit_chk = param->read_correct_mask; 1061 correct_mask_vg = param->read_correct_mask_vg; 1062 1063 for (r = rank_bgn; r < rank_end; r++) { 1064 if (param->skip_ranks[r]) 1065 /* request to skip the rank */ 1066 continue; 1067 1068 /* set rank */ 1069 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1070 1071 /* Load up a constant bursts of read commands */ 1072 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0); 1073 writel(RW_MGR_GUARANTEED_READ, 1074 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1075 1076 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1); 1077 writel(RW_MGR_GUARANTEED_READ_CONT, 1078 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1079 1080 tmp_bit_chk = 0; 1081 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) { 1082 /* reset the fifos to get pointers to known state */ 1083 1084 writel(0, &phy_mgr_cmd->fifo_reset); 1085 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1086 RW_MGR_RESET_READ_DATAPATH_OFFSET); 1087 1088 tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS 1089 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); 1090 1091 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1092 writel(RW_MGR_GUARANTEED_READ, addr + 1093 ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + 1094 vg) << 2)); 1095 1096 base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); 1097 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & (~base_rw_mgr)); 1098 1099 if (vg == 0) 1100 break; 1101 } 1102 *bit_chk &= tmp_bit_chk; 1103 } 1104 1105 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1106 writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2)); 1107 1108 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1109 debug_cond(DLEVEL == 1, "%s:%d test_load_patterns(%u,ALL) => (%u == %u) =>\ 1110 %lu\n", __func__, __LINE__, group, *bit_chk, param->read_correct_mask, 1111 (long unsigned int)(*bit_chk == param->read_correct_mask)); 1112 return *bit_chk == param->read_correct_mask; 1113 } 1114 1115 static uint32_t rw_mgr_mem_calibrate_read_test_patterns_all_ranks 1116 (uint32_t group, uint32_t num_tries, uint32_t *bit_chk) 1117 { 1118 return rw_mgr_mem_calibrate_read_test_patterns(0, group, 1119 num_tries, bit_chk, 1); 1120 } 1121 1122 /* load up the patterns we are going to use during a read test */ 1123 static void rw_mgr_mem_calibrate_read_load_patterns(uint32_t rank_bgn, 1124 uint32_t all_ranks) 1125 { 1126 uint32_t r; 1127 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1128 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1129 1130 debug("%s:%d\n", __func__, __LINE__); 1131 for (r = rank_bgn; r < rank_end; r++) { 1132 if (param->skip_ranks[r]) 1133 /* request to skip the rank */ 1134 continue; 1135 1136 /* set rank */ 1137 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1138 1139 /* Load up a constant bursts */ 1140 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0); 1141 1142 writel(RW_MGR_GUARANTEED_WRITE_WAIT0, 1143 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1144 1145 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1); 1146 1147 writel(RW_MGR_GUARANTEED_WRITE_WAIT1, 1148 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1149 1150 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2); 1151 1152 writel(RW_MGR_GUARANTEED_WRITE_WAIT2, 1153 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1154 1155 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3); 1156 1157 writel(RW_MGR_GUARANTEED_WRITE_WAIT3, 1158 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 1159 1160 writel(RW_MGR_GUARANTEED_WRITE, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1161 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 1162 } 1163 1164 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1165 } 1166 1167 /* 1168 * try a read and see if it returns correct data back. has dummy reads 1169 * inserted into the mix used to align dqs enable. has more thorough checks 1170 * than the regular read test. 1171 */ 1172 static uint32_t rw_mgr_mem_calibrate_read_test(uint32_t rank_bgn, uint32_t group, 1173 uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk, 1174 uint32_t all_groups, uint32_t all_ranks) 1175 { 1176 uint32_t r, vg; 1177 uint32_t correct_mask_vg; 1178 uint32_t tmp_bit_chk; 1179 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1180 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1181 uint32_t addr; 1182 uint32_t base_rw_mgr; 1183 1184 *bit_chk = param->read_correct_mask; 1185 correct_mask_vg = param->read_correct_mask_vg; 1186 1187 uint32_t quick_read_mode = (((STATIC_CALIB_STEPS) & 1188 CALIB_SKIP_DELAY_SWEEPS) && ENABLE_SUPER_QUICK_CALIBRATION); 1189 1190 for (r = rank_bgn; r < rank_end; r++) { 1191 if (param->skip_ranks[r]) 1192 /* request to skip the rank */ 1193 continue; 1194 1195 /* set rank */ 1196 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1197 1198 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1); 1199 1200 writel(RW_MGR_READ_B2B_WAIT1, 1201 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1202 1203 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2); 1204 writel(RW_MGR_READ_B2B_WAIT2, 1205 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1206 1207 if (quick_read_mode) 1208 writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0); 1209 /* need at least two (1+1) reads to capture failures */ 1210 else if (all_groups) 1211 writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0); 1212 else 1213 writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0); 1214 1215 writel(RW_MGR_READ_B2B, 1216 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1217 if (all_groups) 1218 writel(RW_MGR_MEM_IF_READ_DQS_WIDTH * 1219 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1, 1220 &sdr_rw_load_mgr_regs->load_cntr3); 1221 else 1222 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3); 1223 1224 writel(RW_MGR_READ_B2B, 1225 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 1226 1227 tmp_bit_chk = 0; 1228 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) { 1229 /* reset the fifos to get pointers to known state */ 1230 writel(0, &phy_mgr_cmd->fifo_reset); 1231 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1232 RW_MGR_RESET_READ_DATAPATH_OFFSET); 1233 1234 tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS 1235 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); 1236 1237 if (all_groups) 1238 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_ALL_GROUPS_OFFSET; 1239 else 1240 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1241 1242 writel(RW_MGR_READ_B2B, addr + 1243 ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + 1244 vg) << 2)); 1245 1246 base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); 1247 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr)); 1248 1249 if (vg == 0) 1250 break; 1251 } 1252 *bit_chk &= tmp_bit_chk; 1253 } 1254 1255 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1256 writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2)); 1257 1258 if (all_correct) { 1259 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1260 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ALL,%u) =>\ 1261 (%u == %u) => %lu", __func__, __LINE__, group, 1262 all_groups, *bit_chk, param->read_correct_mask, 1263 (long unsigned int)(*bit_chk == 1264 param->read_correct_mask)); 1265 return *bit_chk == param->read_correct_mask; 1266 } else { 1267 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1268 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ONE,%u) =>\ 1269 (%u != %lu) => %lu\n", __func__, __LINE__, 1270 group, all_groups, *bit_chk, (long unsigned int)0, 1271 (long unsigned int)(*bit_chk != 0x00)); 1272 return *bit_chk != 0x00; 1273 } 1274 } 1275 1276 static uint32_t rw_mgr_mem_calibrate_read_test_all_ranks(uint32_t group, 1277 uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk, 1278 uint32_t all_groups) 1279 { 1280 return rw_mgr_mem_calibrate_read_test(0, group, num_tries, all_correct, 1281 bit_chk, all_groups, 1); 1282 } 1283 1284 static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t *v) 1285 { 1286 writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy); 1287 (*v)++; 1288 } 1289 1290 static void rw_mgr_decr_vfifo(uint32_t grp, uint32_t *v) 1291 { 1292 uint32_t i; 1293 1294 for (i = 0; i < VFIFO_SIZE-1; i++) 1295 rw_mgr_incr_vfifo(grp, v); 1296 } 1297 1298 static int find_vfifo_read(uint32_t grp, uint32_t *bit_chk) 1299 { 1300 uint32_t v; 1301 uint32_t fail_cnt = 0; 1302 uint32_t test_status; 1303 1304 for (v = 0; v < VFIFO_SIZE; ) { 1305 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo %u\n", 1306 __func__, __LINE__, v); 1307 test_status = rw_mgr_mem_calibrate_read_test_all_ranks 1308 (grp, 1, PASS_ONE_BIT, bit_chk, 0); 1309 if (!test_status) { 1310 fail_cnt++; 1311 1312 if (fail_cnt == 2) 1313 break; 1314 } 1315 1316 /* fiddle with FIFO */ 1317 rw_mgr_incr_vfifo(grp, &v); 1318 } 1319 1320 if (v >= VFIFO_SIZE) { 1321 /* no failing read found!! Something must have gone wrong */ 1322 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo failed\n", 1323 __func__, __LINE__); 1324 return 0; 1325 } else { 1326 return v; 1327 } 1328 } 1329 1330 static int find_working_phase(uint32_t *grp, uint32_t *bit_chk, 1331 uint32_t dtaps_per_ptap, uint32_t *work_bgn, 1332 uint32_t *v, uint32_t *d, uint32_t *p, 1333 uint32_t *i, uint32_t *max_working_cnt) 1334 { 1335 uint32_t found_begin = 0; 1336 uint32_t tmp_delay = 0; 1337 uint32_t test_status; 1338 1339 for (*d = 0; *d <= dtaps_per_ptap; (*d)++, tmp_delay += 1340 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1341 *work_bgn = tmp_delay; 1342 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1343 1344 for (*i = 0; *i < VFIFO_SIZE; (*i)++) { 1345 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_bgn += 1346 IO_DELAY_PER_OPA_TAP) { 1347 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1348 1349 test_status = 1350 rw_mgr_mem_calibrate_read_test_all_ranks 1351 (*grp, 1, PASS_ONE_BIT, bit_chk, 0); 1352 1353 if (test_status) { 1354 *max_working_cnt = 1; 1355 found_begin = 1; 1356 break; 1357 } 1358 } 1359 1360 if (found_begin) 1361 break; 1362 1363 if (*p > IO_DQS_EN_PHASE_MAX) 1364 /* fiddle with FIFO */ 1365 rw_mgr_incr_vfifo(*grp, v); 1366 } 1367 1368 if (found_begin) 1369 break; 1370 } 1371 1372 if (*i >= VFIFO_SIZE) { 1373 /* cannot find working solution */ 1374 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/\ 1375 ptap/dtap\n", __func__, __LINE__); 1376 return 0; 1377 } else { 1378 return 1; 1379 } 1380 } 1381 1382 static void sdr_backup_phase(uint32_t *grp, uint32_t *bit_chk, 1383 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1384 uint32_t *p, uint32_t *max_working_cnt) 1385 { 1386 uint32_t found_begin = 0; 1387 uint32_t tmp_delay; 1388 1389 /* Special case code for backing up a phase */ 1390 if (*p == 0) { 1391 *p = IO_DQS_EN_PHASE_MAX; 1392 rw_mgr_decr_vfifo(*grp, v); 1393 } else { 1394 (*p)--; 1395 } 1396 tmp_delay = *work_bgn - IO_DELAY_PER_OPA_TAP; 1397 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1398 1399 for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_bgn; 1400 (*d)++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1401 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1402 1403 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1, 1404 PASS_ONE_BIT, 1405 bit_chk, 0)) { 1406 found_begin = 1; 1407 *work_bgn = tmp_delay; 1408 break; 1409 } 1410 } 1411 1412 /* We have found a working dtap before the ptap found above */ 1413 if (found_begin == 1) 1414 (*max_working_cnt)++; 1415 1416 /* 1417 * Restore VFIFO to old state before we decremented it 1418 * (if needed). 1419 */ 1420 (*p)++; 1421 if (*p > IO_DQS_EN_PHASE_MAX) { 1422 *p = 0; 1423 rw_mgr_incr_vfifo(*grp, v); 1424 } 1425 1426 scc_mgr_set_dqs_en_delay_all_ranks(*grp, 0); 1427 } 1428 1429 static int sdr_nonworking_phase(uint32_t *grp, uint32_t *bit_chk, 1430 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1431 uint32_t *p, uint32_t *i, uint32_t *max_working_cnt, 1432 uint32_t *work_end) 1433 { 1434 uint32_t found_end = 0; 1435 1436 (*p)++; 1437 *work_end += IO_DELAY_PER_OPA_TAP; 1438 if (*p > IO_DQS_EN_PHASE_MAX) { 1439 /* fiddle with FIFO */ 1440 *p = 0; 1441 rw_mgr_incr_vfifo(*grp, v); 1442 } 1443 1444 for (; *i < VFIFO_SIZE + 1; (*i)++) { 1445 for (; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_end 1446 += IO_DELAY_PER_OPA_TAP) { 1447 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1448 1449 if (!rw_mgr_mem_calibrate_read_test_all_ranks 1450 (*grp, 1, PASS_ONE_BIT, bit_chk, 0)) { 1451 found_end = 1; 1452 break; 1453 } else { 1454 (*max_working_cnt)++; 1455 } 1456 } 1457 1458 if (found_end) 1459 break; 1460 1461 if (*p > IO_DQS_EN_PHASE_MAX) { 1462 /* fiddle with FIFO */ 1463 rw_mgr_incr_vfifo(*grp, v); 1464 *p = 0; 1465 } 1466 } 1467 1468 if (*i >= VFIFO_SIZE + 1) { 1469 /* cannot see edge of failing read */ 1470 debug_cond(DLEVEL == 2, "%s:%d sdr_nonworking_phase: end:\ 1471 failed\n", __func__, __LINE__); 1472 return 0; 1473 } else { 1474 return 1; 1475 } 1476 } 1477 1478 static int sdr_find_window_centre(uint32_t *grp, uint32_t *bit_chk, 1479 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1480 uint32_t *p, uint32_t *work_mid, 1481 uint32_t *work_end) 1482 { 1483 int i; 1484 int tmp_delay = 0; 1485 1486 *work_mid = (*work_bgn + *work_end) / 2; 1487 1488 debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n", 1489 *work_bgn, *work_end, *work_mid); 1490 /* Get the middle delay to be less than a VFIFO delay */ 1491 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; 1492 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP) 1493 ; 1494 debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay); 1495 while (*work_mid > tmp_delay) 1496 *work_mid -= tmp_delay; 1497 debug_cond(DLEVEL == 2, "new work_mid %d\n", *work_mid); 1498 1499 tmp_delay = 0; 1500 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX && tmp_delay < *work_mid; 1501 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP) 1502 ; 1503 tmp_delay -= IO_DELAY_PER_OPA_TAP; 1504 debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", (*p) - 1, tmp_delay); 1505 for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_mid; (*d)++, 1506 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) 1507 ; 1508 debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", *d, tmp_delay); 1509 1510 scc_mgr_set_dqs_en_phase_all_ranks(*grp, (*p) - 1); 1511 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1512 1513 /* 1514 * push vfifo until we can successfully calibrate. We can do this 1515 * because the largest possible margin in 1 VFIFO cycle. 1516 */ 1517 for (i = 0; i < VFIFO_SIZE; i++) { 1518 debug_cond(DLEVEL == 2, "find_dqs_en_phase: center: vfifo=%u\n", 1519 *v); 1520 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1, 1521 PASS_ONE_BIT, 1522 bit_chk, 0)) { 1523 break; 1524 } 1525 1526 /* fiddle with FIFO */ 1527 rw_mgr_incr_vfifo(*grp, v); 1528 } 1529 1530 if (i >= VFIFO_SIZE) { 1531 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center: \ 1532 failed\n", __func__, __LINE__); 1533 return 0; 1534 } else { 1535 return 1; 1536 } 1537 } 1538 1539 /* find a good dqs enable to use */ 1540 static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp) 1541 { 1542 uint32_t v, d, p, i; 1543 uint32_t max_working_cnt; 1544 uint32_t bit_chk; 1545 uint32_t dtaps_per_ptap; 1546 uint32_t work_bgn, work_mid, work_end; 1547 uint32_t found_passing_read, found_failing_read, initial_failing_dtap; 1548 1549 debug("%s:%d %u\n", __func__, __LINE__, grp); 1550 1551 reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); 1552 1553 scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); 1554 scc_mgr_set_dqs_en_phase_all_ranks(grp, 0); 1555 1556 /* ************************************************************** */ 1557 /* * Step 0 : Determine number of delay taps for each phase tap * */ 1558 dtaps_per_ptap = IO_DELAY_PER_OPA_TAP/IO_DELAY_PER_DQS_EN_DCHAIN_TAP; 1559 1560 /* ********************************************************* */ 1561 /* * Step 1 : First push vfifo until we get a failing read * */ 1562 v = find_vfifo_read(grp, &bit_chk); 1563 1564 max_working_cnt = 0; 1565 1566 /* ******************************************************** */ 1567 /* * step 2: find first working phase, increment in ptaps * */ 1568 work_bgn = 0; 1569 if (find_working_phase(&grp, &bit_chk, dtaps_per_ptap, &work_bgn, &v, &d, 1570 &p, &i, &max_working_cnt) == 0) 1571 return 0; 1572 1573 work_end = work_bgn; 1574 1575 /* 1576 * If d is 0 then the working window covers a phase tap and 1577 * we can follow the old procedure otherwise, we've found the beginning, 1578 * and we need to increment the dtaps until we find the end. 1579 */ 1580 if (d == 0) { 1581 /* ********************************************************* */ 1582 /* * step 3a: if we have room, back off by one and 1583 increment in dtaps * */ 1584 1585 sdr_backup_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1586 &max_working_cnt); 1587 1588 /* ********************************************************* */ 1589 /* * step 4a: go forward from working phase to non working 1590 phase, increment in ptaps * */ 1591 if (sdr_nonworking_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1592 &i, &max_working_cnt, &work_end) == 0) 1593 return 0; 1594 1595 /* ********************************************************* */ 1596 /* * step 5a: back off one from last, increment in dtaps * */ 1597 1598 /* Special case code for backing up a phase */ 1599 if (p == 0) { 1600 p = IO_DQS_EN_PHASE_MAX; 1601 rw_mgr_decr_vfifo(grp, &v); 1602 } else { 1603 p = p - 1; 1604 } 1605 1606 work_end -= IO_DELAY_PER_OPA_TAP; 1607 scc_mgr_set_dqs_en_phase_all_ranks(grp, p); 1608 1609 /* * The actual increment of dtaps is done outside of 1610 the if/else loop to share code */ 1611 d = 0; 1612 1613 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p: \ 1614 vfifo=%u ptap=%u\n", __func__, __LINE__, 1615 v, p); 1616 } else { 1617 /* ******************************************************* */ 1618 /* * step 3-5b: Find the right edge of the window using 1619 delay taps * */ 1620 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase:vfifo=%u \ 1621 ptap=%u dtap=%u bgn=%u\n", __func__, __LINE__, 1622 v, p, d, work_bgn); 1623 1624 work_end = work_bgn; 1625 1626 /* * The actual increment of dtaps is done outside of the 1627 if/else loop to share code */ 1628 1629 /* Only here to counterbalance a subtract later on which is 1630 not needed if this branch of the algorithm is taken */ 1631 max_working_cnt++; 1632 } 1633 1634 /* The dtap increment to find the failing edge is done here */ 1635 for (; d <= IO_DQS_EN_DELAY_MAX; d++, work_end += 1636 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1637 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \ 1638 end-2: dtap=%u\n", __func__, __LINE__, d); 1639 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1640 1641 if (!rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, 1642 PASS_ONE_BIT, 1643 &bit_chk, 0)) { 1644 break; 1645 } 1646 } 1647 1648 /* Go back to working dtap */ 1649 if (d != 0) 1650 work_end -= IO_DELAY_PER_DQS_EN_DCHAIN_TAP; 1651 1652 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p/d: vfifo=%u \ 1653 ptap=%u dtap=%u end=%u\n", __func__, __LINE__, 1654 v, p, d-1, work_end); 1655 1656 if (work_end < work_bgn) { 1657 /* nil range */ 1658 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: end-2: \ 1659 failed\n", __func__, __LINE__); 1660 return 0; 1661 } 1662 1663 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: found range [%u,%u]\n", 1664 __func__, __LINE__, work_bgn, work_end); 1665 1666 /* *************************************************************** */ 1667 /* 1668 * * We need to calculate the number of dtaps that equal a ptap 1669 * * To do that we'll back up a ptap and re-find the edge of the 1670 * * window using dtaps 1671 */ 1672 1673 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: calculate dtaps_per_ptap \ 1674 for tracking\n", __func__, __LINE__); 1675 1676 /* Special case code for backing up a phase */ 1677 if (p == 0) { 1678 p = IO_DQS_EN_PHASE_MAX; 1679 rw_mgr_decr_vfifo(grp, &v); 1680 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \ 1681 cycle/phase: v=%u p=%u\n", __func__, __LINE__, 1682 v, p); 1683 } else { 1684 p = p - 1; 1685 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \ 1686 phase only: v=%u p=%u", __func__, __LINE__, 1687 v, p); 1688 } 1689 1690 scc_mgr_set_dqs_en_phase_all_ranks(grp, p); 1691 1692 /* 1693 * Increase dtap until we first see a passing read (in case the 1694 * window is smaller than a ptap), 1695 * and then a failing read to mark the edge of the window again 1696 */ 1697 1698 /* Find a passing read */ 1699 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find passing read\n", 1700 __func__, __LINE__); 1701 found_passing_read = 0; 1702 found_failing_read = 0; 1703 initial_failing_dtap = d; 1704 for (; d <= IO_DQS_EN_DELAY_MAX; d++) { 1705 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: testing \ 1706 read d=%u\n", __func__, __LINE__, d); 1707 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1708 1709 if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, 1710 PASS_ONE_BIT, 1711 &bit_chk, 0)) { 1712 found_passing_read = 1; 1713 break; 1714 } 1715 } 1716 1717 if (found_passing_read) { 1718 /* Find a failing read */ 1719 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find failing \ 1720 read\n", __func__, __LINE__); 1721 for (d = d + 1; d <= IO_DQS_EN_DELAY_MAX; d++) { 1722 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \ 1723 testing read d=%u\n", __func__, __LINE__, d); 1724 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1725 1726 if (!rw_mgr_mem_calibrate_read_test_all_ranks 1727 (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { 1728 found_failing_read = 1; 1729 break; 1730 } 1731 } 1732 } else { 1733 debug_cond(DLEVEL == 1, "%s:%d find_dqs_en_phase: failed to \ 1734 calculate dtaps", __func__, __LINE__); 1735 debug_cond(DLEVEL == 1, "per ptap. Fall back on static value\n"); 1736 } 1737 1738 /* 1739 * The dynamically calculated dtaps_per_ptap is only valid if we 1740 * found a passing/failing read. If we didn't, it means d hit the max 1741 * (IO_DQS_EN_DELAY_MAX). Otherwise, dtaps_per_ptap retains its 1742 * statically calculated value. 1743 */ 1744 if (found_passing_read && found_failing_read) 1745 dtaps_per_ptap = d - initial_failing_dtap; 1746 1747 writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap); 1748 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: dtaps_per_ptap=%u \ 1749 - %u = %u", __func__, __LINE__, d, 1750 initial_failing_dtap, dtaps_per_ptap); 1751 1752 /* ******************************************** */ 1753 /* * step 6: Find the centre of the window * */ 1754 if (sdr_find_window_centre(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1755 &work_mid, &work_end) == 0) 1756 return 0; 1757 1758 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center found: \ 1759 vfifo=%u ptap=%u dtap=%u\n", __func__, __LINE__, 1760 v, p-1, d); 1761 return 1; 1762 } 1763 1764 /* 1765 * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different 1766 * dq_in_delay values 1767 */ 1768 static uint32_t 1769 rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay 1770 (uint32_t write_group, uint32_t read_group, uint32_t test_bgn) 1771 { 1772 uint32_t found; 1773 uint32_t i; 1774 uint32_t p; 1775 uint32_t d; 1776 uint32_t r; 1777 1778 const uint32_t delay_step = IO_IO_IN_DELAY_MAX / 1779 (RW_MGR_MEM_DQ_PER_READ_DQS-1); 1780 /* we start at zero, so have one less dq to devide among */ 1781 1782 debug("%s:%d (%u,%u,%u)", __func__, __LINE__, write_group, read_group, 1783 test_bgn); 1784 1785 /* try different dq_in_delays since the dq path is shorter than dqs */ 1786 1787 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 1788 r += NUM_RANKS_PER_SHADOW_REG) { 1789 for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++, d += delay_step) { 1790 debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_\ 1791 vfifo_find_dqs_", __func__, __LINE__); 1792 debug_cond(DLEVEL == 1, "en_phase_sweep_dq_in_delay: g=%u/%u ", 1793 write_group, read_group); 1794 debug_cond(DLEVEL == 1, "r=%u, i=%u p=%u d=%u\n", r, i , p, d); 1795 scc_mgr_set_dq_in_delay(p, d); 1796 scc_mgr_load_dq(p); 1797 } 1798 writel(0, &sdr_scc_mgr->update); 1799 } 1800 1801 found = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(read_group); 1802 1803 debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_vfifo_find_dqs_\ 1804 en_phase_sweep_dq", __func__, __LINE__); 1805 debug_cond(DLEVEL == 1, "_in_delay: g=%u/%u found=%u; Reseting delay \ 1806 chain to zero\n", write_group, read_group, found); 1807 1808 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 1809 r += NUM_RANKS_PER_SHADOW_REG) { 1810 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; 1811 i++, p++) { 1812 scc_mgr_set_dq_in_delay(p, 0); 1813 scc_mgr_load_dq(p); 1814 } 1815 writel(0, &sdr_scc_mgr->update); 1816 } 1817 1818 return found; 1819 } 1820 1821 /* per-bit deskew DQ and center */ 1822 static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn, 1823 uint32_t write_group, uint32_t read_group, uint32_t test_bgn, 1824 uint32_t use_read_test, uint32_t update_fom) 1825 { 1826 uint32_t i, p, d, min_index; 1827 /* 1828 * Store these as signed since there are comparisons with 1829 * signed numbers. 1830 */ 1831 uint32_t bit_chk; 1832 uint32_t sticky_bit_chk; 1833 int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; 1834 int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; 1835 int32_t final_dq[RW_MGR_MEM_DQ_PER_READ_DQS]; 1836 int32_t mid; 1837 int32_t orig_mid_min, mid_min; 1838 int32_t new_dqs, start_dqs, start_dqs_en, shift_dq, final_dqs, 1839 final_dqs_en; 1840 int32_t dq_margin, dqs_margin; 1841 uint32_t stop; 1842 uint32_t temp_dq_in_delay1, temp_dq_in_delay2; 1843 uint32_t addr; 1844 1845 debug("%s:%d: %u %u", __func__, __LINE__, read_group, test_bgn); 1846 1847 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_IN_DELAY_OFFSET; 1848 start_dqs = readl(addr + (read_group << 2)); 1849 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) 1850 start_dqs_en = readl(addr + ((read_group << 2) 1851 - IO_DQS_EN_DELAY_OFFSET)); 1852 1853 /* set the left and right edge of each bit to an illegal value */ 1854 /* use (IO_IO_IN_DELAY_MAX + 1) as an illegal value */ 1855 sticky_bit_chk = 0; 1856 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 1857 left_edge[i] = IO_IO_IN_DELAY_MAX + 1; 1858 right_edge[i] = IO_IO_IN_DELAY_MAX + 1; 1859 } 1860 1861 /* Search for the left edge of the window for each bit */ 1862 for (d = 0; d <= IO_IO_IN_DELAY_MAX; d++) { 1863 scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, d); 1864 1865 writel(0, &sdr_scc_mgr->update); 1866 1867 /* 1868 * Stop searching when the read test doesn't pass AND when 1869 * we've seen a passing read on every bit. 1870 */ 1871 if (use_read_test) { 1872 stop = !rw_mgr_mem_calibrate_read_test(rank_bgn, 1873 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT, 1874 &bit_chk, 0, 0); 1875 } else { 1876 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1877 0, PASS_ONE_BIT, 1878 &bit_chk, 0); 1879 bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * 1880 (read_group - (write_group * 1881 RW_MGR_MEM_IF_READ_DQS_WIDTH / 1882 RW_MGR_MEM_IF_WRITE_DQS_WIDTH))); 1883 stop = (bit_chk == 0); 1884 } 1885 sticky_bit_chk = sticky_bit_chk | bit_chk; 1886 stop = stop && (sticky_bit_chk == param->read_correct_mask); 1887 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(left): dtap=%u => %u == %u \ 1888 && %u", __func__, __LINE__, d, 1889 sticky_bit_chk, 1890 param->read_correct_mask, stop); 1891 1892 if (stop == 1) { 1893 break; 1894 } else { 1895 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 1896 if (bit_chk & 1) { 1897 /* Remember a passing test as the 1898 left_edge */ 1899 left_edge[i] = d; 1900 } else { 1901 /* If a left edge has not been seen yet, 1902 then a future passing test will mark 1903 this edge as the right edge */ 1904 if (left_edge[i] == 1905 IO_IO_IN_DELAY_MAX + 1) { 1906 right_edge[i] = -(d + 1); 1907 } 1908 } 1909 bit_chk = bit_chk >> 1; 1910 } 1911 } 1912 } 1913 1914 /* Reset DQ delay chains to 0 */ 1915 scc_mgr_apply_group_dq_in_delay(test_bgn, 0); 1916 sticky_bit_chk = 0; 1917 for (i = RW_MGR_MEM_DQ_PER_READ_DQS - 1;; i--) { 1918 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \ 1919 %d right_edge[%u]: %d\n", __func__, __LINE__, 1920 i, left_edge[i], i, right_edge[i]); 1921 1922 /* 1923 * Check for cases where we haven't found the left edge, 1924 * which makes our assignment of the the right edge invalid. 1925 * Reset it to the illegal value. 1926 */ 1927 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) && ( 1928 right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { 1929 right_edge[i] = IO_IO_IN_DELAY_MAX + 1; 1930 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: reset \ 1931 right_edge[%u]: %d\n", __func__, __LINE__, 1932 i, right_edge[i]); 1933 } 1934 1935 /* 1936 * Reset sticky bit (except for bits where we have seen 1937 * both the left and right edge). 1938 */ 1939 sticky_bit_chk = sticky_bit_chk << 1; 1940 if ((left_edge[i] != IO_IO_IN_DELAY_MAX + 1) && 1941 (right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { 1942 sticky_bit_chk = sticky_bit_chk | 1; 1943 } 1944 1945 if (i == 0) 1946 break; 1947 } 1948 1949 /* Search for the right edge of the window for each bit */ 1950 for (d = 0; d <= IO_DQS_IN_DELAY_MAX - start_dqs; d++) { 1951 scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs); 1952 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 1953 uint32_t delay = d + start_dqs_en; 1954 if (delay > IO_DQS_EN_DELAY_MAX) 1955 delay = IO_DQS_EN_DELAY_MAX; 1956 scc_mgr_set_dqs_en_delay(read_group, delay); 1957 } 1958 scc_mgr_load_dqs(read_group); 1959 1960 writel(0, &sdr_scc_mgr->update); 1961 1962 /* 1963 * Stop searching when the read test doesn't pass AND when 1964 * we've seen a passing read on every bit. 1965 */ 1966 if (use_read_test) { 1967 stop = !rw_mgr_mem_calibrate_read_test(rank_bgn, 1968 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT, 1969 &bit_chk, 0, 0); 1970 } else { 1971 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1972 0, PASS_ONE_BIT, 1973 &bit_chk, 0); 1974 bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * 1975 (read_group - (write_group * 1976 RW_MGR_MEM_IF_READ_DQS_WIDTH / 1977 RW_MGR_MEM_IF_WRITE_DQS_WIDTH))); 1978 stop = (bit_chk == 0); 1979 } 1980 sticky_bit_chk = sticky_bit_chk | bit_chk; 1981 stop = stop && (sticky_bit_chk == param->read_correct_mask); 1982 1983 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(right): dtap=%u => %u == \ 1984 %u && %u", __func__, __LINE__, d, 1985 sticky_bit_chk, param->read_correct_mask, stop); 1986 1987 if (stop == 1) { 1988 break; 1989 } else { 1990 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 1991 if (bit_chk & 1) { 1992 /* Remember a passing test as 1993 the right_edge */ 1994 right_edge[i] = d; 1995 } else { 1996 if (d != 0) { 1997 /* If a right edge has not been 1998 seen yet, then a future passing 1999 test will mark this edge as the 2000 left edge */ 2001 if (right_edge[i] == 2002 IO_IO_IN_DELAY_MAX + 1) { 2003 left_edge[i] = -(d + 1); 2004 } 2005 } else { 2006 /* d = 0 failed, but it passed 2007 when testing the left edge, 2008 so it must be marginal, 2009 set it to -1 */ 2010 if (right_edge[i] == 2011 IO_IO_IN_DELAY_MAX + 1 && 2012 left_edge[i] != 2013 IO_IO_IN_DELAY_MAX 2014 + 1) { 2015 right_edge[i] = -1; 2016 } 2017 /* If a right edge has not been 2018 seen yet, then a future passing 2019 test will mark this edge as the 2020 left edge */ 2021 else if (right_edge[i] == 2022 IO_IO_IN_DELAY_MAX + 2023 1) { 2024 left_edge[i] = -(d + 1); 2025 } 2026 } 2027 } 2028 2029 debug_cond(DLEVEL == 2, "%s:%d vfifo_center[r,\ 2030 d=%u]: ", __func__, __LINE__, d); 2031 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d ", 2032 (int)(bit_chk & 1), i, left_edge[i]); 2033 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 2034 right_edge[i]); 2035 bit_chk = bit_chk >> 1; 2036 } 2037 } 2038 } 2039 2040 /* Check that all bits have a window */ 2041 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 2042 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \ 2043 %d right_edge[%u]: %d", __func__, __LINE__, 2044 i, left_edge[i], i, right_edge[i]); 2045 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) || (right_edge[i] 2046 == IO_IO_IN_DELAY_MAX + 1)) { 2047 /* 2048 * Restore delay chain settings before letting the loop 2049 * in rw_mgr_mem_calibrate_vfifo to retry different 2050 * dqs/ck relationships. 2051 */ 2052 scc_mgr_set_dqs_bus_in_delay(read_group, start_dqs); 2053 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2054 scc_mgr_set_dqs_en_delay(read_group, 2055 start_dqs_en); 2056 } 2057 scc_mgr_load_dqs(read_group); 2058 writel(0, &sdr_scc_mgr->update); 2059 2060 debug_cond(DLEVEL == 1, "%s:%d vfifo_center: failed to \ 2061 find edge [%u]: %d %d", __func__, __LINE__, 2062 i, left_edge[i], right_edge[i]); 2063 if (use_read_test) { 2064 set_failing_group_stage(read_group * 2065 RW_MGR_MEM_DQ_PER_READ_DQS + i, 2066 CAL_STAGE_VFIFO, 2067 CAL_SUBSTAGE_VFIFO_CENTER); 2068 } else { 2069 set_failing_group_stage(read_group * 2070 RW_MGR_MEM_DQ_PER_READ_DQS + i, 2071 CAL_STAGE_VFIFO_AFTER_WRITES, 2072 CAL_SUBSTAGE_VFIFO_CENTER); 2073 } 2074 return 0; 2075 } 2076 } 2077 2078 /* Find middle of window for each DQ bit */ 2079 mid_min = left_edge[0] - right_edge[0]; 2080 min_index = 0; 2081 for (i = 1; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 2082 mid = left_edge[i] - right_edge[i]; 2083 if (mid < mid_min) { 2084 mid_min = mid; 2085 min_index = i; 2086 } 2087 } 2088 2089 /* 2090 * -mid_min/2 represents the amount that we need to move DQS. 2091 * If mid_min is odd and positive we'll need to add one to 2092 * make sure the rounding in further calculations is correct 2093 * (always bias to the right), so just add 1 for all positive values. 2094 */ 2095 if (mid_min > 0) 2096 mid_min++; 2097 2098 mid_min = mid_min / 2; 2099 2100 debug_cond(DLEVEL == 1, "%s:%d vfifo_center: mid_min=%d (index=%u)\n", 2101 __func__, __LINE__, mid_min, min_index); 2102 2103 /* Determine the amount we can change DQS (which is -mid_min) */ 2104 orig_mid_min = mid_min; 2105 new_dqs = start_dqs - mid_min; 2106 if (new_dqs > IO_DQS_IN_DELAY_MAX) 2107 new_dqs = IO_DQS_IN_DELAY_MAX; 2108 else if (new_dqs < 0) 2109 new_dqs = 0; 2110 2111 mid_min = start_dqs - new_dqs; 2112 debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n", 2113 mid_min, new_dqs); 2114 2115 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2116 if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX) 2117 mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX; 2118 else if (start_dqs_en - mid_min < 0) 2119 mid_min += start_dqs_en - mid_min; 2120 } 2121 new_dqs = start_dqs - mid_min; 2122 2123 debug_cond(DLEVEL == 1, "vfifo_center: start_dqs=%d start_dqs_en=%d \ 2124 new_dqs=%d mid_min=%d\n", start_dqs, 2125 IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1, 2126 new_dqs, mid_min); 2127 2128 /* Initialize data for export structures */ 2129 dqs_margin = IO_IO_IN_DELAY_MAX + 1; 2130 dq_margin = IO_IO_IN_DELAY_MAX + 1; 2131 2132 /* add delay to bring centre of all DQ windows to the same "level" */ 2133 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { 2134 /* Use values before divide by 2 to reduce round off error */ 2135 shift_dq = (left_edge[i] - right_edge[i] - 2136 (left_edge[min_index] - right_edge[min_index]))/2 + 2137 (orig_mid_min - mid_min); 2138 2139 debug_cond(DLEVEL == 2, "vfifo_center: before: \ 2140 shift_dq[%u]=%d\n", i, shift_dq); 2141 2142 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET; 2143 temp_dq_in_delay1 = readl(addr + (p << 2)); 2144 temp_dq_in_delay2 = readl(addr + (i << 2)); 2145 2146 if (shift_dq + (int32_t)temp_dq_in_delay1 > 2147 (int32_t)IO_IO_IN_DELAY_MAX) { 2148 shift_dq = (int32_t)IO_IO_IN_DELAY_MAX - temp_dq_in_delay2; 2149 } else if (shift_dq + (int32_t)temp_dq_in_delay1 < 0) { 2150 shift_dq = -(int32_t)temp_dq_in_delay1; 2151 } 2152 debug_cond(DLEVEL == 2, "vfifo_center: after: \ 2153 shift_dq[%u]=%d\n", i, shift_dq); 2154 final_dq[i] = temp_dq_in_delay1 + shift_dq; 2155 scc_mgr_set_dq_in_delay(p, final_dq[i]); 2156 scc_mgr_load_dq(p); 2157 2158 debug_cond(DLEVEL == 2, "vfifo_center: margin[%u]=[%d,%d]\n", i, 2159 left_edge[i] - shift_dq + (-mid_min), 2160 right_edge[i] + shift_dq - (-mid_min)); 2161 /* To determine values for export structures */ 2162 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) 2163 dq_margin = left_edge[i] - shift_dq + (-mid_min); 2164 2165 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin) 2166 dqs_margin = right_edge[i] + shift_dq - (-mid_min); 2167 } 2168 2169 final_dqs = new_dqs; 2170 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) 2171 final_dqs_en = start_dqs_en - mid_min; 2172 2173 /* Move DQS-en */ 2174 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2175 scc_mgr_set_dqs_en_delay(read_group, final_dqs_en); 2176 scc_mgr_load_dqs(read_group); 2177 } 2178 2179 /* Move DQS */ 2180 scc_mgr_set_dqs_bus_in_delay(read_group, final_dqs); 2181 scc_mgr_load_dqs(read_group); 2182 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: dq_margin=%d \ 2183 dqs_margin=%d", __func__, __LINE__, 2184 dq_margin, dqs_margin); 2185 2186 /* 2187 * Do not remove this line as it makes sure all of our decisions 2188 * have been applied. Apply the update bit. 2189 */ 2190 writel(0, &sdr_scc_mgr->update); 2191 2192 return (dq_margin >= 0) && (dqs_margin >= 0); 2193 } 2194 2195 /* 2196 * calibrate the read valid prediction FIFO. 2197 * 2198 * - read valid prediction will consist of finding a good DQS enable phase, 2199 * DQS enable delay, DQS input phase, and DQS input delay. 2200 * - we also do a per-bit deskew on the DQ lines. 2201 */ 2202 static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group, 2203 uint32_t test_bgn) 2204 { 2205 uint32_t p, d, rank_bgn, sr; 2206 uint32_t dtaps_per_ptap; 2207 uint32_t tmp_delay; 2208 uint32_t bit_chk; 2209 uint32_t grp_calibrated; 2210 uint32_t write_group, write_test_bgn; 2211 uint32_t failed_substage; 2212 2213 debug("%s:%d: %u %u\n", __func__, __LINE__, read_group, test_bgn); 2214 2215 /* update info for sims */ 2216 reg_file_set_stage(CAL_STAGE_VFIFO); 2217 2218 write_group = read_group; 2219 write_test_bgn = test_bgn; 2220 2221 /* USER Determine number of delay taps for each phase tap */ 2222 dtaps_per_ptap = 0; 2223 tmp_delay = 0; 2224 while (tmp_delay < IO_DELAY_PER_OPA_TAP) { 2225 dtaps_per_ptap++; 2226 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP; 2227 } 2228 dtaps_per_ptap--; 2229 tmp_delay = 0; 2230 2231 /* update info for sims */ 2232 reg_file_set_group(read_group); 2233 2234 grp_calibrated = 0; 2235 2236 reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ); 2237 failed_substage = CAL_SUBSTAGE_GUARANTEED_READ; 2238 2239 for (d = 0; d <= dtaps_per_ptap && grp_calibrated == 0; d += 2) { 2240 /* 2241 * In RLDRAMX we may be messing the delay of pins in 2242 * the same write group but outside of the current read 2243 * the group, but that's ok because we haven't 2244 * calibrated output side yet. 2245 */ 2246 if (d > 0) { 2247 scc_mgr_apply_group_all_out_delay_add_all_ranks( 2248 write_group, d); 2249 } 2250 2251 for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX && grp_calibrated == 0; 2252 p++) { 2253 /* set a particular dqdqs phase */ 2254 scc_mgr_set_dqdqs_output_phase_all_ranks(read_group, p); 2255 2256 debug_cond(DLEVEL == 1, "%s:%d calibrate_vfifo: g=%u \ 2257 p=%u d=%u\n", __func__, __LINE__, 2258 read_group, p, d); 2259 2260 /* 2261 * Load up the patterns used by read calibration 2262 * using current DQDQS phase. 2263 */ 2264 rw_mgr_mem_calibrate_read_load_patterns(0, 1); 2265 if (!(gbl->phy_debug_mode_flags & 2266 PHY_DEBUG_DISABLE_GUARANTEED_READ)) { 2267 if (!rw_mgr_mem_calibrate_read_test_patterns_all_ranks 2268 (read_group, 1, &bit_chk)) { 2269 debug_cond(DLEVEL == 1, "%s:%d Guaranteed read test failed:", 2270 __func__, __LINE__); 2271 debug_cond(DLEVEL == 1, " g=%u p=%u d=%u\n", 2272 read_group, p, d); 2273 break; 2274 } 2275 } 2276 2277 /* case:56390 */ 2278 grp_calibrated = 1; 2279 if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay 2280 (write_group, read_group, test_bgn)) { 2281 /* 2282 * USER Read per-bit deskew can be done on a 2283 * per shadow register basis. 2284 */ 2285 for (rank_bgn = 0, sr = 0; 2286 rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; 2287 rank_bgn += NUM_RANKS_PER_SHADOW_REG, 2288 ++sr) { 2289 /* 2290 * Determine if this set of ranks 2291 * should be skipped entirely. 2292 */ 2293 if (!param->skip_shadow_regs[sr]) { 2294 /* 2295 * If doing read after write 2296 * calibration, do not update 2297 * FOM, now - do it then. 2298 */ 2299 if (!rw_mgr_mem_calibrate_vfifo_center 2300 (rank_bgn, write_group, 2301 read_group, test_bgn, 1, 0)) { 2302 grp_calibrated = 0; 2303 failed_substage = 2304 CAL_SUBSTAGE_VFIFO_CENTER; 2305 } 2306 } 2307 } 2308 } else { 2309 grp_calibrated = 0; 2310 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE; 2311 } 2312 } 2313 } 2314 2315 if (grp_calibrated == 0) { 2316 set_failing_group_stage(write_group, CAL_STAGE_VFIFO, 2317 failed_substage); 2318 return 0; 2319 } 2320 2321 /* 2322 * Reset the delay chains back to zero if they have moved > 1 2323 * (check for > 1 because loop will increase d even when pass in 2324 * first case). 2325 */ 2326 if (d > 2) 2327 scc_mgr_zero_group(write_group, 1); 2328 2329 return 1; 2330 } 2331 2332 /* VFIFO Calibration -- Read Deskew Calibration after write deskew */ 2333 static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group, 2334 uint32_t test_bgn) 2335 { 2336 uint32_t rank_bgn, sr; 2337 uint32_t grp_calibrated; 2338 uint32_t write_group; 2339 2340 debug("%s:%d %u %u", __func__, __LINE__, read_group, test_bgn); 2341 2342 /* update info for sims */ 2343 2344 reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES); 2345 reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); 2346 2347 write_group = read_group; 2348 2349 /* update info for sims */ 2350 reg_file_set_group(read_group); 2351 2352 grp_calibrated = 1; 2353 /* Read per-bit deskew can be done on a per shadow register basis */ 2354 for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; 2355 rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) { 2356 /* Determine if this set of ranks should be skipped entirely */ 2357 if (!param->skip_shadow_regs[sr]) { 2358 /* This is the last calibration round, update FOM here */ 2359 if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn, 2360 write_group, 2361 read_group, 2362 test_bgn, 0, 2363 1)) { 2364 grp_calibrated = 0; 2365 } 2366 } 2367 } 2368 2369 2370 if (grp_calibrated == 0) { 2371 set_failing_group_stage(write_group, 2372 CAL_STAGE_VFIFO_AFTER_WRITES, 2373 CAL_SUBSTAGE_VFIFO_CENTER); 2374 return 0; 2375 } 2376 2377 return 1; 2378 } 2379 2380 /* Calibrate LFIFO to find smallest read latency */ 2381 static uint32_t rw_mgr_mem_calibrate_lfifo(void) 2382 { 2383 uint32_t found_one; 2384 uint32_t bit_chk; 2385 2386 debug("%s:%d\n", __func__, __LINE__); 2387 2388 /* update info for sims */ 2389 reg_file_set_stage(CAL_STAGE_LFIFO); 2390 reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY); 2391 2392 /* Load up the patterns used by read calibration for all ranks */ 2393 rw_mgr_mem_calibrate_read_load_patterns(0, 1); 2394 found_one = 0; 2395 2396 do { 2397 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 2398 debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u", 2399 __func__, __LINE__, gbl->curr_read_lat); 2400 2401 if (!rw_mgr_mem_calibrate_read_test_all_ranks(0, 2402 NUM_READ_TESTS, 2403 PASS_ALL_BITS, 2404 &bit_chk, 1)) { 2405 break; 2406 } 2407 2408 found_one = 1; 2409 /* reduce read latency and see if things are working */ 2410 /* correctly */ 2411 gbl->curr_read_lat--; 2412 } while (gbl->curr_read_lat > 0); 2413 2414 /* reset the fifos to get pointers to known state */ 2415 2416 writel(0, &phy_mgr_cmd->fifo_reset); 2417 2418 if (found_one) { 2419 /* add a fudge factor to the read latency that was determined */ 2420 gbl->curr_read_lat += 2; 2421 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 2422 debug_cond(DLEVEL == 2, "%s:%d lfifo: success: using \ 2423 read_lat=%u\n", __func__, __LINE__, 2424 gbl->curr_read_lat); 2425 return 1; 2426 } else { 2427 set_failing_group_stage(0xff, CAL_STAGE_LFIFO, 2428 CAL_SUBSTAGE_READ_LATENCY); 2429 2430 debug_cond(DLEVEL == 2, "%s:%d lfifo: failed at initial \ 2431 read_lat=%u\n", __func__, __LINE__, 2432 gbl->curr_read_lat); 2433 return 0; 2434 } 2435 } 2436 2437 /* 2438 * issue write test command. 2439 * two variants are provided. one that just tests a write pattern and 2440 * another that tests datamask functionality. 2441 */ 2442 static void rw_mgr_mem_calibrate_write_test_issue(uint32_t group, 2443 uint32_t test_dm) 2444 { 2445 uint32_t mcc_instruction; 2446 uint32_t quick_write_mode = (((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES) && 2447 ENABLE_SUPER_QUICK_CALIBRATION); 2448 uint32_t rw_wl_nop_cycles; 2449 uint32_t addr; 2450 2451 /* 2452 * Set counter and jump addresses for the right 2453 * number of NOP cycles. 2454 * The number of supported NOP cycles can range from -1 to infinity 2455 * Three different cases are handled: 2456 * 2457 * 1. For a number of NOP cycles greater than 0, the RW Mgr looping 2458 * mechanism will be used to insert the right number of NOPs 2459 * 2460 * 2. For a number of NOP cycles equals to 0, the micro-instruction 2461 * issuing the write command will jump straight to the 2462 * micro-instruction that turns on DQS (for DDRx), or outputs write 2463 * data (for RLD), skipping 2464 * the NOP micro-instruction all together 2465 * 2466 * 3. A number of NOP cycles equal to -1 indicates that DQS must be 2467 * turned on in the same micro-instruction that issues the write 2468 * command. Then we need 2469 * to directly jump to the micro-instruction that sends out the data 2470 * 2471 * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters 2472 * (2 and 3). One jump-counter (0) is used to perform multiple 2473 * write-read operations. 2474 * one counter left to issue this command in "multiple-group" mode 2475 */ 2476 2477 rw_wl_nop_cycles = gbl->rw_wl_nop_cycles; 2478 2479 if (rw_wl_nop_cycles == -1) { 2480 /* 2481 * CNTR 2 - We want to execute the special write operation that 2482 * turns on DQS right away and then skip directly to the 2483 * instruction that sends out the data. We set the counter to a 2484 * large number so that the jump is always taken. 2485 */ 2486 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); 2487 2488 /* CNTR 3 - Not used */ 2489 if (test_dm) { 2490 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1; 2491 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA, 2492 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2493 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP, 2494 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2495 } else { 2496 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0_WL_1; 2497 writel(RW_MGR_LFSR_WR_RD_BANK_0_DATA, 2498 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2499 writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP, 2500 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2501 } 2502 } else if (rw_wl_nop_cycles == 0) { 2503 /* 2504 * CNTR 2 - We want to skip the NOP operation and go straight 2505 * to the DQS enable instruction. We set the counter to a large 2506 * number so that the jump is always taken. 2507 */ 2508 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); 2509 2510 /* CNTR 3 - Not used */ 2511 if (test_dm) { 2512 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0; 2513 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS, 2514 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2515 } else { 2516 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0; 2517 writel(RW_MGR_LFSR_WR_RD_BANK_0_DQS, 2518 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2519 } 2520 } else { 2521 /* 2522 * CNTR 2 - In this case we want to execute the next instruction 2523 * and NOT take the jump. So we set the counter to 0. The jump 2524 * address doesn't count. 2525 */ 2526 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2); 2527 writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2528 2529 /* 2530 * CNTR 3 - Set the nop counter to the number of cycles we 2531 * need to loop for, minus 1. 2532 */ 2533 writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3); 2534 if (test_dm) { 2535 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0; 2536 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP, 2537 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2538 } else { 2539 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0; 2540 writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP, 2541 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2542 } 2543 } 2544 2545 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 2546 RW_MGR_RESET_READ_DATAPATH_OFFSET); 2547 2548 if (quick_write_mode) 2549 writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0); 2550 else 2551 writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0); 2552 2553 writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0); 2554 2555 /* 2556 * CNTR 1 - This is used to ensure enough time elapses 2557 * for read data to come back. 2558 */ 2559 writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1); 2560 2561 if (test_dm) { 2562 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT, 2563 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 2564 } else { 2565 writel(RW_MGR_LFSR_WR_RD_BANK_0_WAIT, 2566 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 2567 } 2568 2569 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 2570 writel(mcc_instruction, addr + (group << 2)); 2571 } 2572 2573 /* Test writes, can check for a single bit pass or multiple bit pass */ 2574 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, 2575 uint32_t write_group, uint32_t use_dm, uint32_t all_correct, 2576 uint32_t *bit_chk, uint32_t all_ranks) 2577 { 2578 uint32_t r; 2579 uint32_t correct_mask_vg; 2580 uint32_t tmp_bit_chk; 2581 uint32_t vg; 2582 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 2583 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 2584 uint32_t addr_rw_mgr; 2585 uint32_t base_rw_mgr; 2586 2587 *bit_chk = param->write_correct_mask; 2588 correct_mask_vg = param->write_correct_mask_vg; 2589 2590 for (r = rank_bgn; r < rank_end; r++) { 2591 if (param->skip_ranks[r]) { 2592 /* request to skip the rank */ 2593 continue; 2594 } 2595 2596 /* set rank */ 2597 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 2598 2599 tmp_bit_chk = 0; 2600 addr_rw_mgr = SDR_PHYGRP_RWMGRGRP_ADDRESS; 2601 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS-1; ; vg--) { 2602 /* reset the fifos to get pointers to known state */ 2603 writel(0, &phy_mgr_cmd->fifo_reset); 2604 2605 tmp_bit_chk = tmp_bit_chk << 2606 (RW_MGR_MEM_DQ_PER_WRITE_DQS / 2607 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS); 2608 rw_mgr_mem_calibrate_write_test_issue(write_group * 2609 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS+vg, 2610 use_dm); 2611 2612 base_rw_mgr = readl(addr_rw_mgr); 2613 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr)); 2614 if (vg == 0) 2615 break; 2616 } 2617 *bit_chk &= tmp_bit_chk; 2618 } 2619 2620 if (all_correct) { 2621 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 2622 debug_cond(DLEVEL == 2, "write_test(%u,%u,ALL) : %u == \ 2623 %u => %lu", write_group, use_dm, 2624 *bit_chk, param->write_correct_mask, 2625 (long unsigned int)(*bit_chk == 2626 param->write_correct_mask)); 2627 return *bit_chk == param->write_correct_mask; 2628 } else { 2629 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 2630 debug_cond(DLEVEL == 2, "write_test(%u,%u,ONE) : %u != ", 2631 write_group, use_dm, *bit_chk); 2632 debug_cond(DLEVEL == 2, "%lu" " => %lu", (long unsigned int)0, 2633 (long unsigned int)(*bit_chk != 0)); 2634 return *bit_chk != 0x00; 2635 } 2636 } 2637 2638 /* 2639 * center all windows. do per-bit-deskew to possibly increase size of 2640 * certain windows. 2641 */ 2642 static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn, 2643 uint32_t write_group, uint32_t test_bgn) 2644 { 2645 uint32_t i, p, min_index; 2646 int32_t d; 2647 /* 2648 * Store these as signed since there are comparisons with 2649 * signed numbers. 2650 */ 2651 uint32_t bit_chk; 2652 uint32_t sticky_bit_chk; 2653 int32_t left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS]; 2654 int32_t right_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS]; 2655 int32_t mid; 2656 int32_t mid_min, orig_mid_min; 2657 int32_t new_dqs, start_dqs, shift_dq; 2658 int32_t dq_margin, dqs_margin, dm_margin; 2659 uint32_t stop; 2660 uint32_t temp_dq_out1_delay; 2661 uint32_t addr; 2662 2663 debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn); 2664 2665 dm_margin = 0; 2666 2667 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; 2668 start_dqs = readl(addr + 2669 (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2)); 2670 2671 /* per-bit deskew */ 2672 2673 /* 2674 * set the left and right edge of each bit to an illegal value 2675 * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value. 2676 */ 2677 sticky_bit_chk = 0; 2678 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2679 left_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2680 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2681 } 2682 2683 /* Search for the left edge of the window for each bit */ 2684 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++) { 2685 scc_mgr_apply_group_dq_out1_delay(write_group, d); 2686 2687 writel(0, &sdr_scc_mgr->update); 2688 2689 /* 2690 * Stop searching when the read test doesn't pass AND when 2691 * we've seen a passing read on every bit. 2692 */ 2693 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 2694 0, PASS_ONE_BIT, &bit_chk, 0); 2695 sticky_bit_chk = sticky_bit_chk | bit_chk; 2696 stop = stop && (sticky_bit_chk == param->write_correct_mask); 2697 debug_cond(DLEVEL == 2, "write_center(left): dtap=%d => %u \ 2698 == %u && %u [bit_chk= %u ]\n", 2699 d, sticky_bit_chk, param->write_correct_mask, 2700 stop, bit_chk); 2701 2702 if (stop == 1) { 2703 break; 2704 } else { 2705 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2706 if (bit_chk & 1) { 2707 /* 2708 * Remember a passing test as the 2709 * left_edge. 2710 */ 2711 left_edge[i] = d; 2712 } else { 2713 /* 2714 * If a left edge has not been seen 2715 * yet, then a future passing test will 2716 * mark this edge as the right edge. 2717 */ 2718 if (left_edge[i] == 2719 IO_IO_OUT1_DELAY_MAX + 1) { 2720 right_edge[i] = -(d + 1); 2721 } 2722 } 2723 debug_cond(DLEVEL == 2, "write_center[l,d=%d):", d); 2724 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d", 2725 (int)(bit_chk & 1), i, left_edge[i]); 2726 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 2727 right_edge[i]); 2728 bit_chk = bit_chk >> 1; 2729 } 2730 } 2731 } 2732 2733 /* Reset DQ delay chains to 0 */ 2734 scc_mgr_apply_group_dq_out1_delay(0); 2735 sticky_bit_chk = 0; 2736 for (i = RW_MGR_MEM_DQ_PER_WRITE_DQS - 1;; i--) { 2737 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \ 2738 %d right_edge[%u]: %d\n", __func__, __LINE__, 2739 i, left_edge[i], i, right_edge[i]); 2740 2741 /* 2742 * Check for cases where we haven't found the left edge, 2743 * which makes our assignment of the the right edge invalid. 2744 * Reset it to the illegal value. 2745 */ 2746 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) && 2747 (right_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) { 2748 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2749 debug_cond(DLEVEL == 2, "%s:%d write_center: reset \ 2750 right_edge[%u]: %d\n", __func__, __LINE__, 2751 i, right_edge[i]); 2752 } 2753 2754 /* 2755 * Reset sticky bit (except for bits where we have 2756 * seen the left edge). 2757 */ 2758 sticky_bit_chk = sticky_bit_chk << 1; 2759 if ((left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) 2760 sticky_bit_chk = sticky_bit_chk | 1; 2761 2762 if (i == 0) 2763 break; 2764 } 2765 2766 /* Search for the right edge of the window for each bit */ 2767 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - start_dqs; d++) { 2768 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, 2769 d + start_dqs); 2770 2771 writel(0, &sdr_scc_mgr->update); 2772 2773 /* 2774 * Stop searching when the read test doesn't pass AND when 2775 * we've seen a passing read on every bit. 2776 */ 2777 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 2778 0, PASS_ONE_BIT, &bit_chk, 0); 2779 2780 sticky_bit_chk = sticky_bit_chk | bit_chk; 2781 stop = stop && (sticky_bit_chk == param->write_correct_mask); 2782 2783 debug_cond(DLEVEL == 2, "write_center (right): dtap=%u => %u == \ 2784 %u && %u\n", d, sticky_bit_chk, 2785 param->write_correct_mask, stop); 2786 2787 if (stop == 1) { 2788 if (d == 0) { 2789 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; 2790 i++) { 2791 /* d = 0 failed, but it passed when 2792 testing the left edge, so it must be 2793 marginal, set it to -1 */ 2794 if (right_edge[i] == 2795 IO_IO_OUT1_DELAY_MAX + 1 && 2796 left_edge[i] != 2797 IO_IO_OUT1_DELAY_MAX + 1) { 2798 right_edge[i] = -1; 2799 } 2800 } 2801 } 2802 break; 2803 } else { 2804 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2805 if (bit_chk & 1) { 2806 /* 2807 * Remember a passing test as 2808 * the right_edge. 2809 */ 2810 right_edge[i] = d; 2811 } else { 2812 if (d != 0) { 2813 /* 2814 * If a right edge has not 2815 * been seen yet, then a future 2816 * passing test will mark this 2817 * edge as the left edge. 2818 */ 2819 if (right_edge[i] == 2820 IO_IO_OUT1_DELAY_MAX + 1) 2821 left_edge[i] = -(d + 1); 2822 } else { 2823 /* 2824 * d = 0 failed, but it passed 2825 * when testing the left edge, 2826 * so it must be marginal, set 2827 * it to -1. 2828 */ 2829 if (right_edge[i] == 2830 IO_IO_OUT1_DELAY_MAX + 1 && 2831 left_edge[i] != 2832 IO_IO_OUT1_DELAY_MAX + 1) 2833 right_edge[i] = -1; 2834 /* 2835 * If a right edge has not been 2836 * seen yet, then a future 2837 * passing test will mark this 2838 * edge as the left edge. 2839 */ 2840 else if (right_edge[i] == 2841 IO_IO_OUT1_DELAY_MAX + 2842 1) 2843 left_edge[i] = -(d + 1); 2844 } 2845 } 2846 debug_cond(DLEVEL == 2, "write_center[r,d=%d):", d); 2847 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d", 2848 (int)(bit_chk & 1), i, left_edge[i]); 2849 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 2850 right_edge[i]); 2851 bit_chk = bit_chk >> 1; 2852 } 2853 } 2854 } 2855 2856 /* Check that all bits have a window */ 2857 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2858 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \ 2859 %d right_edge[%u]: %d", __func__, __LINE__, 2860 i, left_edge[i], i, right_edge[i]); 2861 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) || 2862 (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1)) { 2863 set_failing_group_stage(test_bgn + i, 2864 CAL_STAGE_WRITES, 2865 CAL_SUBSTAGE_WRITES_CENTER); 2866 return 0; 2867 } 2868 } 2869 2870 /* Find middle of window for each DQ bit */ 2871 mid_min = left_edge[0] - right_edge[0]; 2872 min_index = 0; 2873 for (i = 1; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2874 mid = left_edge[i] - right_edge[i]; 2875 if (mid < mid_min) { 2876 mid_min = mid; 2877 min_index = i; 2878 } 2879 } 2880 2881 /* 2882 * -mid_min/2 represents the amount that we need to move DQS. 2883 * If mid_min is odd and positive we'll need to add one to 2884 * make sure the rounding in further calculations is correct 2885 * (always bias to the right), so just add 1 for all positive values. 2886 */ 2887 if (mid_min > 0) 2888 mid_min++; 2889 mid_min = mid_min / 2; 2890 debug_cond(DLEVEL == 1, "%s:%d write_center: mid_min=%d\n", __func__, 2891 __LINE__, mid_min); 2892 2893 /* Determine the amount we can change DQS (which is -mid_min) */ 2894 orig_mid_min = mid_min; 2895 new_dqs = start_dqs; 2896 mid_min = 0; 2897 debug_cond(DLEVEL == 1, "%s:%d write_center: start_dqs=%d new_dqs=%d \ 2898 mid_min=%d\n", __func__, __LINE__, start_dqs, new_dqs, mid_min); 2899 /* Initialize data for export structures */ 2900 dqs_margin = IO_IO_OUT1_DELAY_MAX + 1; 2901 dq_margin = IO_IO_OUT1_DELAY_MAX + 1; 2902 2903 /* add delay to bring centre of all DQ windows to the same "level" */ 2904 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { 2905 /* Use values before divide by 2 to reduce round off error */ 2906 shift_dq = (left_edge[i] - right_edge[i] - 2907 (left_edge[min_index] - right_edge[min_index]))/2 + 2908 (orig_mid_min - mid_min); 2909 2910 debug_cond(DLEVEL == 2, "%s:%d write_center: before: shift_dq \ 2911 [%u]=%d\n", __func__, __LINE__, i, shift_dq); 2912 2913 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; 2914 temp_dq_out1_delay = readl(addr + (i << 2)); 2915 if (shift_dq + (int32_t)temp_dq_out1_delay > 2916 (int32_t)IO_IO_OUT1_DELAY_MAX) { 2917 shift_dq = (int32_t)IO_IO_OUT1_DELAY_MAX - temp_dq_out1_delay; 2918 } else if (shift_dq + (int32_t)temp_dq_out1_delay < 0) { 2919 shift_dq = -(int32_t)temp_dq_out1_delay; 2920 } 2921 debug_cond(DLEVEL == 2, "write_center: after: shift_dq[%u]=%d\n", 2922 i, shift_dq); 2923 scc_mgr_set_dq_out1_delay(i, temp_dq_out1_delay + shift_dq); 2924 scc_mgr_load_dq(i); 2925 2926 debug_cond(DLEVEL == 2, "write_center: margin[%u]=[%d,%d]\n", i, 2927 left_edge[i] - shift_dq + (-mid_min), 2928 right_edge[i] + shift_dq - (-mid_min)); 2929 /* To determine values for export structures */ 2930 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) 2931 dq_margin = left_edge[i] - shift_dq + (-mid_min); 2932 2933 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin) 2934 dqs_margin = right_edge[i] + shift_dq - (-mid_min); 2935 } 2936 2937 /* Move DQS */ 2938 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); 2939 writel(0, &sdr_scc_mgr->update); 2940 2941 /* Centre DM */ 2942 debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__); 2943 2944 /* 2945 * set the left and right edge of each bit to an illegal value, 2946 * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value, 2947 */ 2948 left_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; 2949 right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; 2950 int32_t bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 2951 int32_t end_curr = IO_IO_OUT1_DELAY_MAX + 1; 2952 int32_t bgn_best = IO_IO_OUT1_DELAY_MAX + 1; 2953 int32_t end_best = IO_IO_OUT1_DELAY_MAX + 1; 2954 int32_t win_best = 0; 2955 2956 /* Search for the/part of the window with DM shift */ 2957 for (d = IO_IO_OUT1_DELAY_MAX; d >= 0; d -= DELTA_D) { 2958 scc_mgr_apply_group_dm_out1_delay(d); 2959 writel(0, &sdr_scc_mgr->update); 2960 2961 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, 2962 PASS_ALL_BITS, &bit_chk, 2963 0)) { 2964 /* USE Set current end of the window */ 2965 end_curr = -d; 2966 /* 2967 * If a starting edge of our window has not been seen 2968 * this is our current start of the DM window. 2969 */ 2970 if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) 2971 bgn_curr = -d; 2972 2973 /* 2974 * If current window is bigger than best seen. 2975 * Set best seen to be current window. 2976 */ 2977 if ((end_curr-bgn_curr+1) > win_best) { 2978 win_best = end_curr-bgn_curr+1; 2979 bgn_best = bgn_curr; 2980 end_best = end_curr; 2981 } 2982 } else { 2983 /* We just saw a failing test. Reset temp edge */ 2984 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 2985 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 2986 } 2987 } 2988 2989 2990 /* Reset DM delay chains to 0 */ 2991 scc_mgr_apply_group_dm_out1_delay(0); 2992 2993 /* 2994 * Check to see if the current window nudges up aganist 0 delay. 2995 * If so we need to continue the search by shifting DQS otherwise DQS 2996 * search begins as a new search. */ 2997 if (end_curr != 0) { 2998 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 2999 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 3000 } 3001 3002 /* Search for the/part of the window with DQS shifts */ 3003 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - new_dqs; d += DELTA_D) { 3004 /* 3005 * Note: This only shifts DQS, so are we limiting ourselve to 3006 * width of DQ unnecessarily. 3007 */ 3008 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, 3009 d + new_dqs); 3010 3011 writel(0, &sdr_scc_mgr->update); 3012 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, 3013 PASS_ALL_BITS, &bit_chk, 3014 0)) { 3015 /* USE Set current end of the window */ 3016 end_curr = d; 3017 /* 3018 * If a beginning edge of our window has not been seen 3019 * this is our current begin of the DM window. 3020 */ 3021 if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) 3022 bgn_curr = d; 3023 3024 /* 3025 * If current window is bigger than best seen. Set best 3026 * seen to be current window. 3027 */ 3028 if ((end_curr-bgn_curr+1) > win_best) { 3029 win_best = end_curr-bgn_curr+1; 3030 bgn_best = bgn_curr; 3031 end_best = end_curr; 3032 } 3033 } else { 3034 /* We just saw a failing test. Reset temp edge */ 3035 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 3036 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 3037 3038 /* Early exit optimization: if ther remaining delay 3039 chain space is less than already seen largest window 3040 we can exit */ 3041 if ((win_best-1) > 3042 (IO_IO_OUT1_DELAY_MAX - new_dqs - d)) { 3043 break; 3044 } 3045 } 3046 } 3047 3048 /* assign left and right edge for cal and reporting; */ 3049 left_edge[0] = -1*bgn_best; 3050 right_edge[0] = end_best; 3051 3052 debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n", __func__, 3053 __LINE__, left_edge[0], right_edge[0]); 3054 3055 /* Move DQS (back to orig) */ 3056 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); 3057 3058 /* Move DM */ 3059 3060 /* Find middle of window for the DM bit */ 3061 mid = (left_edge[0] - right_edge[0]) / 2; 3062 3063 /* only move right, since we are not moving DQS/DQ */ 3064 if (mid < 0) 3065 mid = 0; 3066 3067 /* dm_marign should fail if we never find a window */ 3068 if (win_best == 0) 3069 dm_margin = -1; 3070 else 3071 dm_margin = left_edge[0] - mid; 3072 3073 scc_mgr_apply_group_dm_out1_delay(mid); 3074 writel(0, &sdr_scc_mgr->update); 3075 3076 debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d mid=%d \ 3077 dm_margin=%d\n", __func__, __LINE__, left_edge[0], 3078 right_edge[0], mid, dm_margin); 3079 /* Export values */ 3080 gbl->fom_out += dq_margin + dqs_margin; 3081 3082 debug_cond(DLEVEL == 2, "%s:%d write_center: dq_margin=%d \ 3083 dqs_margin=%d dm_margin=%d\n", __func__, __LINE__, 3084 dq_margin, dqs_margin, dm_margin); 3085 3086 /* 3087 * Do not remove this line as it makes sure all of our 3088 * decisions have been applied. 3089 */ 3090 writel(0, &sdr_scc_mgr->update); 3091 return (dq_margin >= 0) && (dqs_margin >= 0) && (dm_margin >= 0); 3092 } 3093 3094 /* calibrate the write operations */ 3095 static uint32_t rw_mgr_mem_calibrate_writes(uint32_t rank_bgn, uint32_t g, 3096 uint32_t test_bgn) 3097 { 3098 /* update info for sims */ 3099 debug("%s:%d %u %u\n", __func__, __LINE__, g, test_bgn); 3100 3101 reg_file_set_stage(CAL_STAGE_WRITES); 3102 reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER); 3103 3104 reg_file_set_group(g); 3105 3106 if (!rw_mgr_mem_calibrate_writes_center(rank_bgn, g, test_bgn)) { 3107 set_failing_group_stage(g, CAL_STAGE_WRITES, 3108 CAL_SUBSTAGE_WRITES_CENTER); 3109 return 0; 3110 } 3111 3112 return 1; 3113 } 3114 3115 /* precharge all banks and activate row 0 in bank "000..." and bank "111..." */ 3116 static void mem_precharge_and_activate(void) 3117 { 3118 uint32_t r; 3119 3120 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { 3121 if (param->skip_ranks[r]) { 3122 /* request to skip the rank */ 3123 continue; 3124 } 3125 3126 /* set rank */ 3127 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 3128 3129 /* precharge all banks ... */ 3130 writel(RW_MGR_PRECHARGE_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS | 3131 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 3132 3133 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0); 3134 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT1, 3135 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 3136 3137 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1); 3138 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT2, 3139 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 3140 3141 /* activate rows */ 3142 writel(RW_MGR_ACTIVATE_0_AND_1, SDR_PHYGRP_RWMGRGRP_ADDRESS | 3143 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 3144 } 3145 } 3146 3147 /* Configure various memory related parameters. */ 3148 static void mem_config(void) 3149 { 3150 uint32_t rlat, wlat; 3151 uint32_t rw_wl_nop_cycles; 3152 uint32_t max_latency; 3153 3154 debug("%s:%d\n", __func__, __LINE__); 3155 /* read in write and read latency */ 3156 wlat = readl(&data_mgr->t_wl_add); 3157 wlat += readl(&data_mgr->mem_t_add); 3158 3159 /* WL for hard phy does not include additive latency */ 3160 3161 /* 3162 * add addtional write latency to offset the address/command extra 3163 * clock cycle. We change the AC mux setting causing AC to be delayed 3164 * by one mem clock cycle. Only do this for DDR3 3165 */ 3166 wlat = wlat + 1; 3167 3168 rlat = readl(&data_mgr->t_rl_add); 3169 3170 rw_wl_nop_cycles = wlat - 2; 3171 gbl->rw_wl_nop_cycles = rw_wl_nop_cycles; 3172 3173 /* 3174 * For AV/CV, lfifo is hardened and always runs at full rate so 3175 * max latency in AFI clocks, used here, is correspondingly smaller. 3176 */ 3177 max_latency = (1<<MAX_LATENCY_COUNT_WIDTH)/1 - 1; 3178 /* configure for a burst length of 8 */ 3179 3180 /* write latency */ 3181 /* Adjust Write Latency for Hard PHY */ 3182 wlat = wlat + 1; 3183 3184 /* set a pretty high read latency initially */ 3185 gbl->curr_read_lat = rlat + 16; 3186 3187 if (gbl->curr_read_lat > max_latency) 3188 gbl->curr_read_lat = max_latency; 3189 3190 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 3191 3192 /* advertise write latency */ 3193 gbl->curr_write_lat = wlat; 3194 writel(wlat - 2, &phy_mgr_cfg->afi_wlat); 3195 3196 /* initialize bit slips */ 3197 mem_precharge_and_activate(); 3198 } 3199 3200 /* Set VFIFO and LFIFO to instant-on settings in skip calibration mode */ 3201 static void mem_skip_calibrate(void) 3202 { 3203 uint32_t vfifo_offset; 3204 uint32_t i, j, r; 3205 3206 debug("%s:%d\n", __func__, __LINE__); 3207 /* Need to update every shadow register set used by the interface */ 3208 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 3209 r += NUM_RANKS_PER_SHADOW_REG) { 3210 /* 3211 * Set output phase alignment settings appropriate for 3212 * skip calibration. 3213 */ 3214 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3215 scc_mgr_set_dqs_en_phase(i, 0); 3216 #if IO_DLL_CHAIN_LENGTH == 6 3217 scc_mgr_set_dqdqs_output_phase(i, 6); 3218 #else 3219 scc_mgr_set_dqdqs_output_phase(i, 7); 3220 #endif 3221 /* 3222 * Case:33398 3223 * 3224 * Write data arrives to the I/O two cycles before write 3225 * latency is reached (720 deg). 3226 * -> due to bit-slip in a/c bus 3227 * -> to allow board skew where dqs is longer than ck 3228 * -> how often can this happen!? 3229 * -> can claim back some ptaps for high freq 3230 * support if we can relax this, but i digress... 3231 * 3232 * The write_clk leads mem_ck by 90 deg 3233 * The minimum ptap of the OPA is 180 deg 3234 * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay 3235 * The write_clk is always delayed by 2 ptaps 3236 * 3237 * Hence, to make DQS aligned to CK, we need to delay 3238 * DQS by: 3239 * (720 - 90 - 180 - 2 * (360 / IO_DLL_CHAIN_LENGTH)) 3240 * 3241 * Dividing the above by (360 / IO_DLL_CHAIN_LENGTH) 3242 * gives us the number of ptaps, which simplies to: 3243 * 3244 * (1.25 * IO_DLL_CHAIN_LENGTH - 2) 3245 */ 3246 scc_mgr_set_dqdqs_output_phase(i, (1.25 * 3247 IO_DLL_CHAIN_LENGTH - 2)); 3248 } 3249 writel(0xff, &sdr_scc_mgr->dqs_ena); 3250 writel(0xff, &sdr_scc_mgr->dqs_io_ena); 3251 3252 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { 3253 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS | 3254 SCC_MGR_GROUP_COUNTER_OFFSET); 3255 } 3256 writel(0xff, &sdr_scc_mgr->dq_ena); 3257 writel(0xff, &sdr_scc_mgr->dm_ena); 3258 writel(0, &sdr_scc_mgr->update); 3259 } 3260 3261 /* Compensate for simulation model behaviour */ 3262 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3263 scc_mgr_set_dqs_bus_in_delay(i, 10); 3264 scc_mgr_load_dqs(i); 3265 } 3266 writel(0, &sdr_scc_mgr->update); 3267 3268 /* 3269 * ArriaV has hard FIFOs that can only be initialized by incrementing 3270 * in sequencer. 3271 */ 3272 vfifo_offset = CALIB_VFIFO_OFFSET; 3273 for (j = 0; j < vfifo_offset; j++) { 3274 writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy); 3275 } 3276 writel(0, &phy_mgr_cmd->fifo_reset); 3277 3278 /* 3279 * For ACV with hard lfifo, we get the skip-cal setting from 3280 * generation-time constant. 3281 */ 3282 gbl->curr_read_lat = CALIB_LFIFO_OFFSET; 3283 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 3284 } 3285 3286 /* Memory calibration entry point */ 3287 static uint32_t mem_calibrate(void) 3288 { 3289 uint32_t i; 3290 uint32_t rank_bgn, sr; 3291 uint32_t write_group, write_test_bgn; 3292 uint32_t read_group, read_test_bgn; 3293 uint32_t run_groups, current_run; 3294 uint32_t failing_groups = 0; 3295 uint32_t group_failed = 0; 3296 uint32_t sr_failed = 0; 3297 3298 debug("%s:%d\n", __func__, __LINE__); 3299 /* Initialize the data settings */ 3300 3301 gbl->error_substage = CAL_SUBSTAGE_NIL; 3302 gbl->error_stage = CAL_STAGE_NIL; 3303 gbl->error_group = 0xff; 3304 gbl->fom_in = 0; 3305 gbl->fom_out = 0; 3306 3307 mem_config(); 3308 3309 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3310 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS | 3311 SCC_MGR_GROUP_COUNTER_OFFSET); 3312 /* Only needed once to set all groups, pins, DQ, DQS, DM. */ 3313 if (i == 0) 3314 scc_mgr_set_hhp_extras(); 3315 3316 scc_set_bypass_mode(i); 3317 } 3318 3319 if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) { 3320 /* 3321 * Set VFIFO and LFIFO to instant-on settings in skip 3322 * calibration mode. 3323 */ 3324 mem_skip_calibrate(); 3325 } else { 3326 for (i = 0; i < NUM_CALIB_REPEAT; i++) { 3327 /* 3328 * Zero all delay chain/phase settings for all 3329 * groups and all shadow register sets. 3330 */ 3331 scc_mgr_zero_all(); 3332 3333 run_groups = ~param->skip_groups; 3334 3335 for (write_group = 0, write_test_bgn = 0; write_group 3336 < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; write_group++, 3337 write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) { 3338 /* Initialized the group failure */ 3339 group_failed = 0; 3340 3341 current_run = run_groups & ((1 << 3342 RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1); 3343 run_groups = run_groups >> 3344 RW_MGR_NUM_DQS_PER_WRITE_GROUP; 3345 3346 if (current_run == 0) 3347 continue; 3348 3349 writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS | 3350 SCC_MGR_GROUP_COUNTER_OFFSET); 3351 scc_mgr_zero_group(write_group, 0); 3352 3353 for (read_group = write_group * 3354 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3355 RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3356 read_test_bgn = 0; 3357 read_group < (write_group + 1) * 3358 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3359 RW_MGR_MEM_IF_WRITE_DQS_WIDTH && 3360 group_failed == 0; 3361 read_group++, read_test_bgn += 3362 RW_MGR_MEM_DQ_PER_READ_DQS) { 3363 /* Calibrate the VFIFO */ 3364 if (!((STATIC_CALIB_STEPS) & 3365 CALIB_SKIP_VFIFO)) { 3366 if (!rw_mgr_mem_calibrate_vfifo 3367 (read_group, 3368 read_test_bgn)) { 3369 group_failed = 1; 3370 3371 if (!(gbl-> 3372 phy_debug_mode_flags & 3373 PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3374 return 0; 3375 } 3376 } 3377 } 3378 } 3379 3380 /* Calibrate the output side */ 3381 if (group_failed == 0) { 3382 for (rank_bgn = 0, sr = 0; rank_bgn 3383 < RW_MGR_MEM_NUMBER_OF_RANKS; 3384 rank_bgn += 3385 NUM_RANKS_PER_SHADOW_REG, 3386 ++sr) { 3387 sr_failed = 0; 3388 if (!((STATIC_CALIB_STEPS) & 3389 CALIB_SKIP_WRITES)) { 3390 if ((STATIC_CALIB_STEPS) 3391 & CALIB_SKIP_DELAY_SWEEPS) { 3392 /* not needed in quick mode! */ 3393 } else { 3394 /* 3395 * Determine if this set of 3396 * ranks should be skipped 3397 * entirely. 3398 */ 3399 if (!param->skip_shadow_regs[sr]) { 3400 if (!rw_mgr_mem_calibrate_writes 3401 (rank_bgn, write_group, 3402 write_test_bgn)) { 3403 sr_failed = 1; 3404 if (!(gbl-> 3405 phy_debug_mode_flags & 3406 PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3407 return 0; 3408 } 3409 } 3410 } 3411 } 3412 } 3413 if (sr_failed != 0) 3414 group_failed = 1; 3415 } 3416 } 3417 3418 if (group_failed == 0) { 3419 for (read_group = write_group * 3420 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3421 RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3422 read_test_bgn = 0; 3423 read_group < (write_group + 1) 3424 * RW_MGR_MEM_IF_READ_DQS_WIDTH 3425 / RW_MGR_MEM_IF_WRITE_DQS_WIDTH && 3426 group_failed == 0; 3427 read_group++, read_test_bgn += 3428 RW_MGR_MEM_DQ_PER_READ_DQS) { 3429 if (!((STATIC_CALIB_STEPS) & 3430 CALIB_SKIP_WRITES)) { 3431 if (!rw_mgr_mem_calibrate_vfifo_end 3432 (read_group, read_test_bgn)) { 3433 group_failed = 1; 3434 3435 if (!(gbl->phy_debug_mode_flags 3436 & PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3437 return 0; 3438 } 3439 } 3440 } 3441 } 3442 } 3443 3444 if (group_failed != 0) 3445 failing_groups++; 3446 } 3447 3448 /* 3449 * USER If there are any failing groups then report 3450 * the failure. 3451 */ 3452 if (failing_groups != 0) 3453 return 0; 3454 3455 /* Calibrate the LFIFO */ 3456 if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_LFIFO)) { 3457 /* 3458 * If we're skipping groups as part of debug, 3459 * don't calibrate LFIFO. 3460 */ 3461 if (param->skip_groups == 0) { 3462 if (!rw_mgr_mem_calibrate_lfifo()) 3463 return 0; 3464 } 3465 } 3466 } 3467 } 3468 3469 /* 3470 * Do not remove this line as it makes sure all of our decisions 3471 * have been applied. 3472 */ 3473 writel(0, &sdr_scc_mgr->update); 3474 return 1; 3475 } 3476 3477 static uint32_t run_mem_calibrate(void) 3478 { 3479 uint32_t pass; 3480 uint32_t debug_info; 3481 3482 debug("%s:%d\n", __func__, __LINE__); 3483 3484 /* Reset pass/fail status shown on afi_cal_success/fail */ 3485 writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status); 3486 3487 /* stop tracking manger */ 3488 uint32_t ctrlcfg = readl(&sdr_ctrl->ctrl_cfg); 3489 3490 writel(ctrlcfg & 0xFFBFFFFF, &sdr_ctrl->ctrl_cfg); 3491 3492 initialize(); 3493 rw_mgr_mem_initialize(); 3494 3495 pass = mem_calibrate(); 3496 3497 mem_precharge_and_activate(); 3498 writel(0, &phy_mgr_cmd->fifo_reset); 3499 3500 /* 3501 * Handoff: 3502 * Don't return control of the PHY back to AFI when in debug mode. 3503 */ 3504 if ((gbl->phy_debug_mode_flags & PHY_DEBUG_IN_DEBUG_MODE) == 0) { 3505 rw_mgr_mem_handoff(); 3506 /* 3507 * In Hard PHY this is a 2-bit control: 3508 * 0: AFI Mux Select 3509 * 1: DDIO Mux Select 3510 */ 3511 writel(0x2, &phy_mgr_cfg->mux_sel); 3512 } 3513 3514 writel(ctrlcfg, &sdr_ctrl->ctrl_cfg); 3515 3516 if (pass) { 3517 printf("%s: CALIBRATION PASSED\n", __FILE__); 3518 3519 gbl->fom_in /= 2; 3520 gbl->fom_out /= 2; 3521 3522 if (gbl->fom_in > 0xff) 3523 gbl->fom_in = 0xff; 3524 3525 if (gbl->fom_out > 0xff) 3526 gbl->fom_out = 0xff; 3527 3528 /* Update the FOM in the register file */ 3529 debug_info = gbl->fom_in; 3530 debug_info |= gbl->fom_out << 8; 3531 writel(debug_info, &sdr_reg_file->fom); 3532 3533 writel(debug_info, &phy_mgr_cfg->cal_debug_info); 3534 writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status); 3535 } else { 3536 printf("%s: CALIBRATION FAILED\n", __FILE__); 3537 3538 debug_info = gbl->error_stage; 3539 debug_info |= gbl->error_substage << 8; 3540 debug_info |= gbl->error_group << 16; 3541 3542 writel(debug_info, &sdr_reg_file->failing_stage); 3543 writel(debug_info, &phy_mgr_cfg->cal_debug_info); 3544 writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status); 3545 3546 /* Update the failing group/stage in the register file */ 3547 debug_info = gbl->error_stage; 3548 debug_info |= gbl->error_substage << 8; 3549 debug_info |= gbl->error_group << 16; 3550 writel(debug_info, &sdr_reg_file->failing_stage); 3551 } 3552 3553 return pass; 3554 } 3555 3556 /** 3557 * hc_initialize_rom_data() - Initialize ROM data 3558 * 3559 * Initialize ROM data. 3560 */ 3561 static void hc_initialize_rom_data(void) 3562 { 3563 u32 i, addr; 3564 3565 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET; 3566 for (i = 0; i < ARRAY_SIZE(inst_rom_init); i++) 3567 writel(inst_rom_init[i], addr + (i << 2)); 3568 3569 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET; 3570 for (i = 0; i < ARRAY_SIZE(ac_rom_init); i++) 3571 writel(ac_rom_init[i], addr + (i << 2)); 3572 } 3573 3574 /** 3575 * initialize_reg_file() - Initialize SDR register file 3576 * 3577 * Initialize SDR register file. 3578 */ 3579 static void initialize_reg_file(void) 3580 { 3581 /* Initialize the register file with the correct data */ 3582 writel(REG_FILE_INIT_SEQ_SIGNATURE, &sdr_reg_file->signature); 3583 writel(0, &sdr_reg_file->debug_data_addr); 3584 writel(0, &sdr_reg_file->cur_stage); 3585 writel(0, &sdr_reg_file->fom); 3586 writel(0, &sdr_reg_file->failing_stage); 3587 writel(0, &sdr_reg_file->debug1); 3588 writel(0, &sdr_reg_file->debug2); 3589 } 3590 3591 /** 3592 * initialize_hps_phy() - Initialize HPS PHY 3593 * 3594 * Initialize HPS PHY. 3595 */ 3596 static void initialize_hps_phy(void) 3597 { 3598 uint32_t reg; 3599 /* 3600 * Tracking also gets configured here because it's in the 3601 * same register. 3602 */ 3603 uint32_t trk_sample_count = 7500; 3604 uint32_t trk_long_idle_sample_count = (10 << 16) | 100; 3605 /* 3606 * Format is number of outer loops in the 16 MSB, sample 3607 * count in 16 LSB. 3608 */ 3609 3610 reg = 0; 3611 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2); 3612 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1); 3613 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1); 3614 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1); 3615 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0); 3616 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1); 3617 /* 3618 * This field selects the intrinsic latency to RDATA_EN/FULL path. 3619 * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles. 3620 */ 3621 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0); 3622 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET( 3623 trk_sample_count); 3624 writel(reg, &sdr_ctrl->phy_ctrl0); 3625 3626 reg = 0; 3627 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET( 3628 trk_sample_count >> 3629 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH); 3630 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET( 3631 trk_long_idle_sample_count); 3632 writel(reg, &sdr_ctrl->phy_ctrl1); 3633 3634 reg = 0; 3635 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET( 3636 trk_long_idle_sample_count >> 3637 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH); 3638 writel(reg, &sdr_ctrl->phy_ctrl2); 3639 } 3640 3641 static void initialize_tracking(void) 3642 { 3643 uint32_t concatenated_longidle = 0x0; 3644 uint32_t concatenated_delays = 0x0; 3645 uint32_t concatenated_rw_addr = 0x0; 3646 uint32_t concatenated_refresh = 0x0; 3647 uint32_t trk_sample_count = 7500; 3648 uint32_t dtaps_per_ptap; 3649 uint32_t tmp_delay; 3650 3651 /* 3652 * compute usable version of value in case we skip full 3653 * computation later 3654 */ 3655 dtaps_per_ptap = 0; 3656 tmp_delay = 0; 3657 while (tmp_delay < IO_DELAY_PER_OPA_TAP) { 3658 dtaps_per_ptap++; 3659 tmp_delay += IO_DELAY_PER_DCHAIN_TAP; 3660 } 3661 dtaps_per_ptap--; 3662 3663 concatenated_longidle = concatenated_longidle ^ 10; 3664 /*longidle outer loop */ 3665 concatenated_longidle = concatenated_longidle << 16; 3666 concatenated_longidle = concatenated_longidle ^ 100; 3667 /*longidle sample count */ 3668 concatenated_delays = concatenated_delays ^ 243; 3669 /* trfc, worst case of 933Mhz 4Gb */ 3670 concatenated_delays = concatenated_delays << 8; 3671 concatenated_delays = concatenated_delays ^ 14; 3672 /* trcd, worst case */ 3673 concatenated_delays = concatenated_delays << 8; 3674 concatenated_delays = concatenated_delays ^ 10; 3675 /* vfifo wait */ 3676 concatenated_delays = concatenated_delays << 8; 3677 concatenated_delays = concatenated_delays ^ 4; 3678 /* mux delay */ 3679 3680 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_IDLE; 3681 concatenated_rw_addr = concatenated_rw_addr << 8; 3682 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_ACTIVATE_1; 3683 concatenated_rw_addr = concatenated_rw_addr << 8; 3684 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_SGLE_READ; 3685 concatenated_rw_addr = concatenated_rw_addr << 8; 3686 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_PRECHARGE_ALL; 3687 3688 concatenated_refresh = concatenated_refresh ^ RW_MGR_REFRESH_ALL; 3689 concatenated_refresh = concatenated_refresh << 24; 3690 concatenated_refresh = concatenated_refresh ^ 1000; /* trefi */ 3691 3692 /* Initialize the register file with the correct data */ 3693 writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap); 3694 writel(trk_sample_count, &sdr_reg_file->trk_sample_count); 3695 writel(concatenated_longidle, &sdr_reg_file->trk_longidle); 3696 writel(concatenated_delays, &sdr_reg_file->delays); 3697 writel(concatenated_rw_addr, &sdr_reg_file->trk_rw_mgr_addr); 3698 writel(RW_MGR_MEM_IF_READ_DQS_WIDTH, &sdr_reg_file->trk_read_dqs_width); 3699 writel(concatenated_refresh, &sdr_reg_file->trk_rfsh); 3700 } 3701 3702 int sdram_calibration_full(void) 3703 { 3704 struct param_type my_param; 3705 struct gbl_type my_gbl; 3706 uint32_t pass; 3707 uint32_t i; 3708 3709 param = &my_param; 3710 gbl = &my_gbl; 3711 3712 /* Initialize the debug mode flags */ 3713 gbl->phy_debug_mode_flags = 0; 3714 /* Set the calibration enabled by default */ 3715 gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT; 3716 /* 3717 * Only sweep all groups (regardless of fail state) by default 3718 * Set enabled read test by default. 3719 */ 3720 #if DISABLE_GUARANTEED_READ 3721 gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ; 3722 #endif 3723 /* Initialize the register file */ 3724 initialize_reg_file(); 3725 3726 /* Initialize any PHY CSR */ 3727 initialize_hps_phy(); 3728 3729 scc_mgr_initialize(); 3730 3731 initialize_tracking(); 3732 3733 /* USER Enable all ranks, groups */ 3734 for (i = 0; i < RW_MGR_MEM_NUMBER_OF_RANKS; i++) 3735 param->skip_ranks[i] = 0; 3736 for (i = 0; i < NUM_SHADOW_REGS; ++i) 3737 param->skip_shadow_regs[i] = 0; 3738 param->skip_groups = 0; 3739 3740 printf("%s: Preparing to start memory calibration\n", __FILE__); 3741 3742 debug("%s:%d\n", __func__, __LINE__); 3743 debug_cond(DLEVEL == 1, 3744 "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ", 3745 RW_MGR_MEM_NUMBER_OF_RANKS, RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM, 3746 RW_MGR_MEM_DQ_PER_READ_DQS, RW_MGR_MEM_DQ_PER_WRITE_DQS, 3747 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS, 3748 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS); 3749 debug_cond(DLEVEL == 1, 3750 "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ", 3751 RW_MGR_MEM_IF_READ_DQS_WIDTH, RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3752 RW_MGR_MEM_DATA_WIDTH, RW_MGR_MEM_DATA_MASK_WIDTH, 3753 IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP); 3754 debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u", 3755 IO_DELAY_PER_DQS_EN_DCHAIN_TAP, IO_DLL_CHAIN_LENGTH); 3756 debug_cond(DLEVEL == 1, "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ", 3757 IO_DQS_EN_PHASE_MAX, IO_DQDQS_OUT_PHASE_MAX, 3758 IO_DQS_EN_DELAY_MAX, IO_DQS_IN_DELAY_MAX); 3759 debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ", 3760 IO_IO_IN_DELAY_MAX, IO_IO_OUT1_DELAY_MAX, 3761 IO_IO_OUT2_DELAY_MAX); 3762 debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n", 3763 IO_DQS_IN_RESERVE, IO_DQS_OUT_RESERVE); 3764 3765 hc_initialize_rom_data(); 3766 3767 /* update info for sims */ 3768 reg_file_set_stage(CAL_STAGE_NIL); 3769 reg_file_set_group(0); 3770 3771 /* 3772 * Load global needed for those actions that require 3773 * some dynamic calibration support. 3774 */ 3775 dyn_calib_steps = STATIC_CALIB_STEPS; 3776 /* 3777 * Load global to allow dynamic selection of delay loop settings 3778 * based on calibration mode. 3779 */ 3780 if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS)) 3781 skip_delay_mask = 0xff; 3782 else 3783 skip_delay_mask = 0x0; 3784 3785 pass = run_mem_calibrate(); 3786 3787 printf("%s: Calibration complete\n", __FILE__); 3788 return pass; 3789 } 3790