1 /* 2 * Copyright Altera Corporation (C) 2012-2015 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7 #include <common.h> 8 #include <asm/io.h> 9 #include <asm/arch/sdram.h> 10 #include "sequencer.h" 11 #include "sequencer_auto.h" 12 #include "sequencer_auto_ac_init.h" 13 #include "sequencer_auto_inst_init.h" 14 #include "sequencer_defines.h" 15 16 static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs = 17 (struct socfpga_sdr_rw_load_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800); 18 19 static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs = 20 (struct socfpga_sdr_rw_load_jump_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00); 21 22 static struct socfpga_sdr_reg_file *sdr_reg_file = 23 (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS; 24 25 static struct socfpga_sdr_scc_mgr *sdr_scc_mgr = 26 (struct socfpga_sdr_scc_mgr *)(SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00); 27 28 static struct socfpga_phy_mgr_cmd *phy_mgr_cmd = 29 (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS; 30 31 static struct socfpga_phy_mgr_cfg *phy_mgr_cfg = 32 (struct socfpga_phy_mgr_cfg *)(SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40); 33 34 static struct socfpga_data_mgr *data_mgr = 35 (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS; 36 37 static struct socfpga_sdr_ctrl *sdr_ctrl = 38 (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS; 39 40 #define DELTA_D 1 41 42 /* 43 * In order to reduce ROM size, most of the selectable calibration steps are 44 * decided at compile time based on the user's calibration mode selection, 45 * as captured by the STATIC_CALIB_STEPS selection below. 46 * 47 * However, to support simulation-time selection of fast simulation mode, where 48 * we skip everything except the bare minimum, we need a few of the steps to 49 * be dynamic. In those cases, we either use the DYNAMIC_CALIB_STEPS for the 50 * check, which is based on the rtl-supplied value, or we dynamically compute 51 * the value to use based on the dynamically-chosen calibration mode 52 */ 53 54 #define DLEVEL 0 55 #define STATIC_IN_RTL_SIM 0 56 #define STATIC_SKIP_DELAY_LOOPS 0 57 58 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \ 59 STATIC_SKIP_DELAY_LOOPS) 60 61 /* calibration steps requested by the rtl */ 62 uint16_t dyn_calib_steps; 63 64 /* 65 * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option 66 * instead of static, we use boolean logic to select between 67 * non-skip and skip values 68 * 69 * The mask is set to include all bits when not-skipping, but is 70 * zero when skipping 71 */ 72 73 uint16_t skip_delay_mask; /* mask off bits when skipping/not-skipping */ 74 75 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \ 76 ((non_skip_value) & skip_delay_mask) 77 78 struct gbl_type *gbl; 79 struct param_type *param; 80 uint32_t curr_shadow_reg; 81 82 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, 83 uint32_t write_group, uint32_t use_dm, 84 uint32_t all_correct, uint32_t *bit_chk, uint32_t all_ranks); 85 86 static void set_failing_group_stage(uint32_t group, uint32_t stage, 87 uint32_t substage) 88 { 89 /* 90 * Only set the global stage if there was not been any other 91 * failing group 92 */ 93 if (gbl->error_stage == CAL_STAGE_NIL) { 94 gbl->error_substage = substage; 95 gbl->error_stage = stage; 96 gbl->error_group = group; 97 } 98 } 99 100 static void reg_file_set_group(u16 set_group) 101 { 102 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16); 103 } 104 105 static void reg_file_set_stage(u8 set_stage) 106 { 107 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff); 108 } 109 110 static void reg_file_set_sub_stage(u8 set_sub_stage) 111 { 112 set_sub_stage &= 0xff; 113 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8); 114 } 115 116 static void initialize(void) 117 { 118 debug("%s:%d\n", __func__, __LINE__); 119 /* USER calibration has control over path to memory */ 120 /* 121 * In Hard PHY this is a 2-bit control: 122 * 0: AFI Mux Select 123 * 1: DDIO Mux Select 124 */ 125 writel(0x3, &phy_mgr_cfg->mux_sel); 126 127 /* USER memory clock is not stable we begin initialization */ 128 writel(0, &phy_mgr_cfg->reset_mem_stbl); 129 130 /* USER calibration status all set to zero */ 131 writel(0, &phy_mgr_cfg->cal_status); 132 133 writel(0, &phy_mgr_cfg->cal_debug_info); 134 135 if ((dyn_calib_steps & CALIB_SKIP_ALL) != CALIB_SKIP_ALL) { 136 param->read_correct_mask_vg = ((uint32_t)1 << 137 (RW_MGR_MEM_DQ_PER_READ_DQS / 138 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1; 139 param->write_correct_mask_vg = ((uint32_t)1 << 140 (RW_MGR_MEM_DQ_PER_READ_DQS / 141 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1; 142 param->read_correct_mask = ((uint32_t)1 << 143 RW_MGR_MEM_DQ_PER_READ_DQS) - 1; 144 param->write_correct_mask = ((uint32_t)1 << 145 RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1; 146 param->dm_correct_mask = ((uint32_t)1 << 147 (RW_MGR_MEM_DATA_WIDTH / RW_MGR_MEM_DATA_MASK_WIDTH)) 148 - 1; 149 } 150 } 151 152 static void set_rank_and_odt_mask(uint32_t rank, uint32_t odt_mode) 153 { 154 uint32_t odt_mask_0 = 0; 155 uint32_t odt_mask_1 = 0; 156 uint32_t cs_and_odt_mask; 157 158 if (odt_mode == RW_MGR_ODT_MODE_READ_WRITE) { 159 if (RW_MGR_MEM_NUMBER_OF_RANKS == 1) { 160 /* 161 * 1 Rank 162 * Read: ODT = 0 163 * Write: ODT = 1 164 */ 165 odt_mask_0 = 0x0; 166 odt_mask_1 = 0x1; 167 } else if (RW_MGR_MEM_NUMBER_OF_RANKS == 2) { 168 /* 2 Ranks */ 169 if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) { 170 /* - Dual-Slot , Single-Rank 171 * (1 chip-select per DIMM) 172 * OR 173 * - RDIMM, 4 total CS (2 CS per DIMM) 174 * means 2 DIMM 175 * Since MEM_NUMBER_OF_RANKS is 2 they are 176 * both single rank 177 * with 2 CS each (special for RDIMM) 178 * Read: Turn on ODT on the opposite rank 179 * Write: Turn on ODT on all ranks 180 */ 181 odt_mask_0 = 0x3 & ~(1 << rank); 182 odt_mask_1 = 0x3; 183 } else { 184 /* 185 * USER - Single-Slot , Dual-rank DIMMs 186 * (2 chip-selects per DIMM) 187 * USER Read: Turn on ODT off on all ranks 188 * USER Write: Turn on ODT on active rank 189 */ 190 odt_mask_0 = 0x0; 191 odt_mask_1 = 0x3 & (1 << rank); 192 } 193 } else { 194 /* 4 Ranks 195 * Read: 196 * ----------+-----------------------+ 197 * | | 198 * | ODT | 199 * Read From +-----------------------+ 200 * Rank | 3 | 2 | 1 | 0 | 201 * ----------+-----+-----+-----+-----+ 202 * 0 | 0 | 1 | 0 | 0 | 203 * 1 | 1 | 0 | 0 | 0 | 204 * 2 | 0 | 0 | 0 | 1 | 205 * 3 | 0 | 0 | 1 | 0 | 206 * ----------+-----+-----+-----+-----+ 207 * 208 * Write: 209 * ----------+-----------------------+ 210 * | | 211 * | ODT | 212 * Write To +-----------------------+ 213 * Rank | 3 | 2 | 1 | 0 | 214 * ----------+-----+-----+-----+-----+ 215 * 0 | 0 | 1 | 0 | 1 | 216 * 1 | 1 | 0 | 1 | 0 | 217 * 2 | 0 | 1 | 0 | 1 | 218 * 3 | 1 | 0 | 1 | 0 | 219 * ----------+-----+-----+-----+-----+ 220 */ 221 switch (rank) { 222 case 0: 223 odt_mask_0 = 0x4; 224 odt_mask_1 = 0x5; 225 break; 226 case 1: 227 odt_mask_0 = 0x8; 228 odt_mask_1 = 0xA; 229 break; 230 case 2: 231 odt_mask_0 = 0x1; 232 odt_mask_1 = 0x5; 233 break; 234 case 3: 235 odt_mask_0 = 0x2; 236 odt_mask_1 = 0xA; 237 break; 238 } 239 } 240 } else { 241 odt_mask_0 = 0x0; 242 odt_mask_1 = 0x0; 243 } 244 245 cs_and_odt_mask = 246 (0xFF & ~(1 << rank)) | 247 ((0xFF & odt_mask_0) << 8) | 248 ((0xFF & odt_mask_1) << 16); 249 writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS | 250 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET); 251 } 252 253 /** 254 * scc_mgr_set() - Set SCC Manager register 255 * @off: Base offset in SCC Manager space 256 * @grp: Read/Write group 257 * @val: Value to be set 258 * 259 * This function sets the SCC Manager (Scan Chain Control Manager) register. 260 */ 261 static void scc_mgr_set(u32 off, u32 grp, u32 val) 262 { 263 writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2)); 264 } 265 266 /** 267 * scc_mgr_initialize() - Initialize SCC Manager registers 268 * 269 * Initialize SCC Manager registers. 270 */ 271 static void scc_mgr_initialize(void) 272 { 273 /* 274 * Clear register file for HPS. 16 (2^4) is the size of the 275 * full register file in the scc mgr: 276 * RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS + 277 * MEM_IF_READ_DQS_WIDTH - 1); 278 */ 279 int i; 280 281 for (i = 0; i < 16; i++) { 282 debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n", 283 __func__, __LINE__, i); 284 scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, 0, i); 285 } 286 } 287 288 static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, uint32_t phase) 289 { 290 scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase); 291 } 292 293 static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay) 294 { 295 scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay); 296 } 297 298 static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase) 299 { 300 scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase); 301 } 302 303 static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay) 304 { 305 scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay); 306 } 307 308 static void scc_mgr_set_dqs_io_in_delay(uint32_t delay) 309 { 310 scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS, 311 delay); 312 } 313 314 static void scc_mgr_set_dq_in_delay(uint32_t dq_in_group, uint32_t delay) 315 { 316 scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay); 317 } 318 319 static void scc_mgr_set_dq_out1_delay(uint32_t dq_in_group, uint32_t delay) 320 { 321 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay); 322 } 323 324 static void scc_mgr_set_dqs_out1_delay(uint32_t delay) 325 { 326 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS, 327 delay); 328 } 329 330 static void scc_mgr_set_dm_out1_delay(uint32_t dm, uint32_t delay) 331 { 332 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, 333 RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + dm, 334 delay); 335 } 336 337 /* load up dqs config settings */ 338 static void scc_mgr_load_dqs(uint32_t dqs) 339 { 340 writel(dqs, &sdr_scc_mgr->dqs_ena); 341 } 342 343 /* load up dqs io config settings */ 344 static void scc_mgr_load_dqs_io(void) 345 { 346 writel(0, &sdr_scc_mgr->dqs_io_ena); 347 } 348 349 /* load up dq config settings */ 350 static void scc_mgr_load_dq(uint32_t dq_in_group) 351 { 352 writel(dq_in_group, &sdr_scc_mgr->dq_ena); 353 } 354 355 /* load up dm config settings */ 356 static void scc_mgr_load_dm(uint32_t dm) 357 { 358 writel(dm, &sdr_scc_mgr->dm_ena); 359 } 360 361 /** 362 * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks 363 * @off: Base offset in SCC Manager space 364 * @grp: Read/Write group 365 * @val: Value to be set 366 * @update: If non-zero, trigger SCC Manager update for all ranks 367 * 368 * This function sets the SCC Manager (Scan Chain Control Manager) register 369 * and optionally triggers the SCC update for all ranks. 370 */ 371 static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val, 372 const int update) 373 { 374 u32 r; 375 376 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 377 r += NUM_RANKS_PER_SHADOW_REG) { 378 scc_mgr_set(off, grp, val); 379 380 if (update || (r == 0)) { 381 writel(grp, &sdr_scc_mgr->dqs_ena); 382 writel(0, &sdr_scc_mgr->update); 383 } 384 } 385 } 386 387 static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase) 388 { 389 /* 390 * USER although the h/w doesn't support different phases per 391 * shadow register, for simplicity our scc manager modeling 392 * keeps different phase settings per shadow reg, and it's 393 * important for us to keep them in sync to match h/w. 394 * for efficiency, the scan chain update should occur only 395 * once to sr0. 396 */ 397 scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_PHASE_OFFSET, 398 read_group, phase, 0); 399 } 400 401 static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group, 402 uint32_t phase) 403 { 404 /* 405 * USER although the h/w doesn't support different phases per 406 * shadow register, for simplicity our scc manager modeling 407 * keeps different phase settings per shadow reg, and it's 408 * important for us to keep them in sync to match h/w. 409 * for efficiency, the scan chain update should occur only 410 * once to sr0. 411 */ 412 scc_mgr_set_all_ranks(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, 413 write_group, phase, 0); 414 } 415 416 static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group, 417 uint32_t delay) 418 { 419 /* 420 * In shadow register mode, the T11 settings are stored in 421 * registers in the core, which are updated by the DQS_ENA 422 * signals. Not issuing the SCC_MGR_UPD command allows us to 423 * save lots of rank switching overhead, by calling 424 * select_shadow_regs_for_update with update_scan_chains 425 * set to 0. 426 */ 427 scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_DELAY_OFFSET, 428 read_group, delay, 1); 429 writel(0, &sdr_scc_mgr->update); 430 } 431 432 /** 433 * scc_mgr_set_oct_out1_delay() - Set OCT output delay 434 * @write_group: Write group 435 * @delay: Delay value 436 * 437 * This function sets the OCT output delay in SCC manager. 438 */ 439 static void scc_mgr_set_oct_out1_delay(const u32 write_group, const u32 delay) 440 { 441 const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH / 442 RW_MGR_MEM_IF_WRITE_DQS_WIDTH; 443 const int base = write_group * ratio; 444 int i; 445 /* 446 * Load the setting in the SCC manager 447 * Although OCT affects only write data, the OCT delay is controlled 448 * by the DQS logic block which is instantiated once per read group. 449 * For protocols where a write group consists of multiple read groups, 450 * the setting must be set multiple times. 451 */ 452 for (i = 0; i < ratio; i++) 453 scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay); 454 } 455 456 /** 457 * scc_mgr_set_hhp_extras() - Set HHP extras. 458 * 459 * Load the fixed setting in the SCC manager HHP extras. 460 */ 461 static void scc_mgr_set_hhp_extras(void) 462 { 463 /* 464 * Load the fixed setting in the SCC manager 465 * bits: 0:0 = 1'b1 - DQS bypass 466 * bits: 1:1 = 1'b1 - DQ bypass 467 * bits: 4:2 = 3'b001 - rfifo_mode 468 * bits: 6:5 = 2'b01 - rfifo clock_select 469 * bits: 7:7 = 1'b0 - separate gating from ungating setting 470 * bits: 8:8 = 1'b0 - separate OE from Output delay setting 471 */ 472 const u32 value = (0 << 8) | (0 << 7) | (1 << 5) | 473 (1 << 2) | (1 << 1) | (1 << 0); 474 const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | 475 SCC_MGR_HHP_GLOBALS_OFFSET | 476 SCC_MGR_HHP_EXTRAS_OFFSET; 477 478 debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n", 479 __func__, __LINE__); 480 writel(value, addr); 481 debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n", 482 __func__, __LINE__); 483 } 484 485 /** 486 * scc_mgr_zero_all() - Zero all DQS config 487 * 488 * Zero all DQS config. 489 */ 490 static void scc_mgr_zero_all(void) 491 { 492 int i, r; 493 494 /* 495 * USER Zero all DQS config settings, across all groups and all 496 * shadow registers 497 */ 498 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 499 r += NUM_RANKS_PER_SHADOW_REG) { 500 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 501 /* 502 * The phases actually don't exist on a per-rank basis, 503 * but there's no harm updating them several times, so 504 * let's keep the code simple. 505 */ 506 scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE); 507 scc_mgr_set_dqs_en_phase(i, 0); 508 scc_mgr_set_dqs_en_delay(i, 0); 509 } 510 511 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { 512 scc_mgr_set_dqdqs_output_phase(i, 0); 513 /* Arria V/Cyclone V don't have out2. */ 514 scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE); 515 } 516 } 517 518 /* Multicast to all DQS group enables. */ 519 writel(0xff, &sdr_scc_mgr->dqs_ena); 520 writel(0, &sdr_scc_mgr->update); 521 } 522 523 /** 524 * scc_set_bypass_mode() - Set bypass mode and trigger SCC update 525 * @write_group: Write group 526 * 527 * Set bypass mode and trigger SCC update. 528 */ 529 static void scc_set_bypass_mode(const u32 write_group) 530 { 531 /* Multicast to all DQ enables. */ 532 writel(0xff, &sdr_scc_mgr->dq_ena); 533 writel(0xff, &sdr_scc_mgr->dm_ena); 534 535 /* Update current DQS IO enable. */ 536 writel(0, &sdr_scc_mgr->dqs_io_ena); 537 538 /* Update the DQS logic. */ 539 writel(write_group, &sdr_scc_mgr->dqs_ena); 540 541 /* Hit update. */ 542 writel(0, &sdr_scc_mgr->update); 543 } 544 545 /** 546 * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group 547 * @write_group: Write group 548 * 549 * Load DQS settings for Write Group, do not trigger SCC update. 550 */ 551 static void scc_mgr_load_dqs_for_write_group(const u32 write_group) 552 { 553 const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH / 554 RW_MGR_MEM_IF_WRITE_DQS_WIDTH; 555 const int base = write_group * ratio; 556 int i; 557 /* 558 * Load the setting in the SCC manager 559 * Although OCT affects only write data, the OCT delay is controlled 560 * by the DQS logic block which is instantiated once per read group. 561 * For protocols where a write group consists of multiple read groups, 562 * the setting must be set multiple times. 563 */ 564 for (i = 0; i < ratio; i++) 565 writel(base + i, &sdr_scc_mgr->dqs_ena); 566 } 567 568 /** 569 * scc_mgr_zero_group() - Zero all configs for a group 570 * 571 * Zero DQ, DM, DQS and OCT configs for a group. 572 */ 573 static void scc_mgr_zero_group(const u32 write_group, const int out_only) 574 { 575 int i, r; 576 577 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 578 r += NUM_RANKS_PER_SHADOW_REG) { 579 /* Zero all DQ config settings. */ 580 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 581 scc_mgr_set_dq_out1_delay(i, 0); 582 if (!out_only) 583 scc_mgr_set_dq_in_delay(i, 0); 584 } 585 586 /* Multicast to all DQ enables. */ 587 writel(0xff, &sdr_scc_mgr->dq_ena); 588 589 /* Zero all DM config settings. */ 590 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) 591 scc_mgr_set_dm_out1_delay(i, 0); 592 593 /* Multicast to all DM enables. */ 594 writel(0xff, &sdr_scc_mgr->dm_ena); 595 596 /* Zero all DQS IO settings. */ 597 if (!out_only) 598 scc_mgr_set_dqs_io_in_delay(0); 599 600 /* Arria V/Cyclone V don't have out2. */ 601 scc_mgr_set_dqs_out1_delay(IO_DQS_OUT_RESERVE); 602 scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE); 603 scc_mgr_load_dqs_for_write_group(write_group); 604 605 /* Multicast to all DQS IO enables (only 1 in total). */ 606 writel(0, &sdr_scc_mgr->dqs_io_ena); 607 608 /* Hit update to zero everything. */ 609 writel(0, &sdr_scc_mgr->update); 610 } 611 } 612 613 /* 614 * apply and load a particular input delay for the DQ pins in a group 615 * group_bgn is the index of the first dq pin (in the write group) 616 */ 617 static void scc_mgr_apply_group_dq_in_delay(uint32_t group_bgn, uint32_t delay) 618 { 619 uint32_t i, p; 620 621 for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { 622 scc_mgr_set_dq_in_delay(p, delay); 623 scc_mgr_load_dq(p); 624 } 625 } 626 627 /** 628 * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the DQ pins in a group 629 * @delay: Delay value 630 * 631 * Apply and load a particular output delay for the DQ pins in a group. 632 */ 633 static void scc_mgr_apply_group_dq_out1_delay(const u32 delay) 634 { 635 int i; 636 637 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 638 scc_mgr_set_dq_out1_delay(i, delay); 639 scc_mgr_load_dq(i); 640 } 641 } 642 643 /* apply and load a particular output delay for the DM pins in a group */ 644 static void scc_mgr_apply_group_dm_out1_delay(uint32_t delay1) 645 { 646 uint32_t i; 647 648 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { 649 scc_mgr_set_dm_out1_delay(i, delay1); 650 scc_mgr_load_dm(i); 651 } 652 } 653 654 655 /* apply and load delay on both DQS and OCT out1 */ 656 static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group, 657 uint32_t delay) 658 { 659 scc_mgr_set_dqs_out1_delay(delay); 660 scc_mgr_load_dqs_io(); 661 662 scc_mgr_set_oct_out1_delay(write_group, delay); 663 scc_mgr_load_dqs_for_write_group(write_group); 664 } 665 666 /* apply a delay to the entire output side: DQ, DM, DQS, OCT */ 667 static void scc_mgr_apply_group_all_out_delay_add(uint32_t write_group, 668 uint32_t group_bgn, 669 uint32_t delay) 670 { 671 uint32_t i, p, new_delay; 672 673 /* dq shift */ 674 for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { 675 new_delay = READ_SCC_DQ_OUT2_DELAY; 676 new_delay += delay; 677 678 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 679 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQ[%u,%u]:\ 680 %u > %lu => %lu", __func__, __LINE__, 681 write_group, group_bgn, delay, i, p, new_delay, 682 (long unsigned int)IO_IO_OUT2_DELAY_MAX, 683 (long unsigned int)IO_IO_OUT2_DELAY_MAX); 684 new_delay = IO_IO_OUT2_DELAY_MAX; 685 } 686 687 scc_mgr_load_dq(i); 688 } 689 690 /* dm shift */ 691 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { 692 new_delay = READ_SCC_DM_IO_OUT2_DELAY; 693 new_delay += delay; 694 695 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 696 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DM[%u]:\ 697 %u > %lu => %lu\n", __func__, __LINE__, 698 write_group, group_bgn, delay, i, new_delay, 699 (long unsigned int)IO_IO_OUT2_DELAY_MAX, 700 (long unsigned int)IO_IO_OUT2_DELAY_MAX); 701 new_delay = IO_IO_OUT2_DELAY_MAX; 702 } 703 704 scc_mgr_load_dm(i); 705 } 706 707 /* dqs shift */ 708 new_delay = READ_SCC_DQS_IO_OUT2_DELAY; 709 new_delay += delay; 710 711 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 712 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;" 713 " adding %u to OUT1\n", __func__, __LINE__, 714 write_group, group_bgn, delay, new_delay, 715 IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX, 716 new_delay - IO_IO_OUT2_DELAY_MAX); 717 scc_mgr_set_dqs_out1_delay(new_delay - 718 IO_IO_OUT2_DELAY_MAX); 719 new_delay = IO_IO_OUT2_DELAY_MAX; 720 } 721 722 scc_mgr_load_dqs_io(); 723 724 /* oct shift */ 725 new_delay = READ_SCC_OCT_OUT2_DELAY; 726 new_delay += delay; 727 728 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 729 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;" 730 " adding %u to OUT1\n", __func__, __LINE__, 731 write_group, group_bgn, delay, new_delay, 732 IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX, 733 new_delay - IO_IO_OUT2_DELAY_MAX); 734 scc_mgr_set_oct_out1_delay(write_group, new_delay - 735 IO_IO_OUT2_DELAY_MAX); 736 new_delay = IO_IO_OUT2_DELAY_MAX; 737 } 738 739 scc_mgr_load_dqs_for_write_group(write_group); 740 } 741 742 /* 743 * USER apply a delay to the entire output side (DQ, DM, DQS, OCT) 744 * and to all ranks 745 */ 746 static void scc_mgr_apply_group_all_out_delay_add_all_ranks( 747 uint32_t write_group, uint32_t group_bgn, uint32_t delay) 748 { 749 uint32_t r; 750 751 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 752 r += NUM_RANKS_PER_SHADOW_REG) { 753 scc_mgr_apply_group_all_out_delay_add(write_group, 754 group_bgn, delay); 755 writel(0, &sdr_scc_mgr->update); 756 } 757 } 758 759 /* optimization used to recover some slots in ddr3 inst_rom */ 760 /* could be applied to other protocols if we wanted to */ 761 static void set_jump_as_return(void) 762 { 763 /* 764 * to save space, we replace return with jump to special shared 765 * RETURN instruction so we set the counter to large value so that 766 * we always jump 767 */ 768 writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0); 769 writel(RW_MGR_RETURN, &sdr_rw_load_jump_mgr_regs->load_jump_add0); 770 } 771 772 /* 773 * should always use constants as argument to ensure all computations are 774 * performed at compile time 775 */ 776 static void delay_for_n_mem_clocks(const uint32_t clocks) 777 { 778 uint32_t afi_clocks; 779 uint8_t inner = 0; 780 uint8_t outer = 0; 781 uint16_t c_loop = 0; 782 783 debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks); 784 785 786 afi_clocks = (clocks + AFI_RATE_RATIO-1) / AFI_RATE_RATIO; 787 /* scale (rounding up) to get afi clocks */ 788 789 /* 790 * Note, we don't bother accounting for being off a little bit 791 * because of a few extra instructions in outer loops 792 * Note, the loops have a test at the end, and do the test before 793 * the decrement, and so always perform the loop 794 * 1 time more than the counter value 795 */ 796 if (afi_clocks == 0) { 797 ; 798 } else if (afi_clocks <= 0x100) { 799 inner = afi_clocks-1; 800 outer = 0; 801 c_loop = 0; 802 } else if (afi_clocks <= 0x10000) { 803 inner = 0xff; 804 outer = (afi_clocks-1) >> 8; 805 c_loop = 0; 806 } else { 807 inner = 0xff; 808 outer = 0xff; 809 c_loop = (afi_clocks-1) >> 16; 810 } 811 812 /* 813 * rom instructions are structured as follows: 814 * 815 * IDLE_LOOP2: jnz cntr0, TARGET_A 816 * IDLE_LOOP1: jnz cntr1, TARGET_B 817 * return 818 * 819 * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and 820 * TARGET_B is set to IDLE_LOOP2 as well 821 * 822 * if we have no outer loop, though, then we can use IDLE_LOOP1 only, 823 * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely 824 * 825 * a little confusing, but it helps save precious space in the inst_rom 826 * and sequencer rom and keeps the delays more accurate and reduces 827 * overhead 828 */ 829 if (afi_clocks <= 0x100) { 830 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), 831 &sdr_rw_load_mgr_regs->load_cntr1); 832 833 writel(RW_MGR_IDLE_LOOP1, 834 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 835 836 writel(RW_MGR_IDLE_LOOP1, SDR_PHYGRP_RWMGRGRP_ADDRESS | 837 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 838 } else { 839 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), 840 &sdr_rw_load_mgr_regs->load_cntr0); 841 842 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer), 843 &sdr_rw_load_mgr_regs->load_cntr1); 844 845 writel(RW_MGR_IDLE_LOOP2, 846 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 847 848 writel(RW_MGR_IDLE_LOOP2, 849 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 850 851 /* hack to get around compiler not being smart enough */ 852 if (afi_clocks <= 0x10000) { 853 /* only need to run once */ 854 writel(RW_MGR_IDLE_LOOP2, SDR_PHYGRP_RWMGRGRP_ADDRESS | 855 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 856 } else { 857 do { 858 writel(RW_MGR_IDLE_LOOP2, 859 SDR_PHYGRP_RWMGRGRP_ADDRESS | 860 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 861 } while (c_loop-- != 0); 862 } 863 } 864 debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks); 865 } 866 867 static void rw_mgr_mem_initialize(void) 868 { 869 uint32_t r; 870 uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS | 871 RW_MGR_RUN_SINGLE_GROUP_OFFSET; 872 873 debug("%s:%d\n", __func__, __LINE__); 874 875 /* The reset / cke part of initialization is broadcasted to all ranks */ 876 writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS | 877 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET); 878 879 /* 880 * Here's how you load register for a loop 881 * Counters are located @ 0x800 882 * Jump address are located @ 0xC00 883 * For both, registers 0 to 3 are selected using bits 3 and 2, like 884 * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C 885 * I know this ain't pretty, but Avalon bus throws away the 2 least 886 * significant bits 887 */ 888 889 /* start with memory RESET activated */ 890 891 /* tINIT = 200us */ 892 893 /* 894 * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles 895 * If a and b are the number of iteration in 2 nested loops 896 * it takes the following number of cycles to complete the operation: 897 * number_of_cycles = ((2 + n) * a + 2) * b 898 * where n is the number of instruction in the inner loop 899 * One possible solution is n = 0 , a = 256 , b = 106 => a = FF, 900 * b = 6A 901 */ 902 903 /* Load counters */ 904 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR0_VAL), 905 &sdr_rw_load_mgr_regs->load_cntr0); 906 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR1_VAL), 907 &sdr_rw_load_mgr_regs->load_cntr1); 908 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR2_VAL), 909 &sdr_rw_load_mgr_regs->load_cntr2); 910 911 /* Load jump address */ 912 writel(RW_MGR_INIT_RESET_0_CKE_0, 913 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 914 writel(RW_MGR_INIT_RESET_0_CKE_0, 915 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 916 writel(RW_MGR_INIT_RESET_0_CKE_0, 917 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 918 919 /* Execute count instruction */ 920 writel(RW_MGR_INIT_RESET_0_CKE_0, grpaddr); 921 922 /* indicate that memory is stable */ 923 writel(1, &phy_mgr_cfg->reset_mem_stbl); 924 925 /* 926 * transition the RESET to high 927 * Wait for 500us 928 */ 929 930 /* 931 * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles 932 * If a and b are the number of iteration in 2 nested loops 933 * it takes the following number of cycles to complete the operation 934 * number_of_cycles = ((2 + n) * a + 2) * b 935 * where n is the number of instruction in the inner loop 936 * One possible solution is n = 2 , a = 131 , b = 256 => a = 83, 937 * b = FF 938 */ 939 940 /* Load counters */ 941 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR0_VAL), 942 &sdr_rw_load_mgr_regs->load_cntr0); 943 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR1_VAL), 944 &sdr_rw_load_mgr_regs->load_cntr1); 945 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR2_VAL), 946 &sdr_rw_load_mgr_regs->load_cntr2); 947 948 /* Load jump address */ 949 writel(RW_MGR_INIT_RESET_1_CKE_0, 950 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 951 writel(RW_MGR_INIT_RESET_1_CKE_0, 952 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 953 writel(RW_MGR_INIT_RESET_1_CKE_0, 954 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 955 956 writel(RW_MGR_INIT_RESET_1_CKE_0, grpaddr); 957 958 /* bring up clock enable */ 959 960 /* tXRP < 250 ck cycles */ 961 delay_for_n_mem_clocks(250); 962 963 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { 964 if (param->skip_ranks[r]) { 965 /* request to skip the rank */ 966 continue; 967 } 968 969 /* set rank */ 970 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 971 972 /* 973 * USER Use Mirror-ed commands for odd ranks if address 974 * mirrorring is on 975 */ 976 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) { 977 set_jump_as_return(); 978 writel(RW_MGR_MRS2_MIRR, grpaddr); 979 delay_for_n_mem_clocks(4); 980 set_jump_as_return(); 981 writel(RW_MGR_MRS3_MIRR, grpaddr); 982 delay_for_n_mem_clocks(4); 983 set_jump_as_return(); 984 writel(RW_MGR_MRS1_MIRR, grpaddr); 985 delay_for_n_mem_clocks(4); 986 set_jump_as_return(); 987 writel(RW_MGR_MRS0_DLL_RESET_MIRR, grpaddr); 988 } else { 989 set_jump_as_return(); 990 writel(RW_MGR_MRS2, grpaddr); 991 delay_for_n_mem_clocks(4); 992 set_jump_as_return(); 993 writel(RW_MGR_MRS3, grpaddr); 994 delay_for_n_mem_clocks(4); 995 set_jump_as_return(); 996 writel(RW_MGR_MRS1, grpaddr); 997 set_jump_as_return(); 998 writel(RW_MGR_MRS0_DLL_RESET, grpaddr); 999 } 1000 set_jump_as_return(); 1001 writel(RW_MGR_ZQCL, grpaddr); 1002 1003 /* tZQinit = tDLLK = 512 ck cycles */ 1004 delay_for_n_mem_clocks(512); 1005 } 1006 } 1007 1008 /* 1009 * At the end of calibration we have to program the user settings in, and 1010 * USER hand off the memory to the user. 1011 */ 1012 static void rw_mgr_mem_handoff(void) 1013 { 1014 uint32_t r; 1015 uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS | 1016 RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1017 1018 debug("%s:%d\n", __func__, __LINE__); 1019 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { 1020 if (param->skip_ranks[r]) 1021 /* request to skip the rank */ 1022 continue; 1023 /* set rank */ 1024 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 1025 1026 /* precharge all banks ... */ 1027 writel(RW_MGR_PRECHARGE_ALL, grpaddr); 1028 1029 /* load up MR settings specified by user */ 1030 1031 /* 1032 * Use Mirror-ed commands for odd ranks if address 1033 * mirrorring is on 1034 */ 1035 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) { 1036 set_jump_as_return(); 1037 writel(RW_MGR_MRS2_MIRR, grpaddr); 1038 delay_for_n_mem_clocks(4); 1039 set_jump_as_return(); 1040 writel(RW_MGR_MRS3_MIRR, grpaddr); 1041 delay_for_n_mem_clocks(4); 1042 set_jump_as_return(); 1043 writel(RW_MGR_MRS1_MIRR, grpaddr); 1044 delay_for_n_mem_clocks(4); 1045 set_jump_as_return(); 1046 writel(RW_MGR_MRS0_USER_MIRR, grpaddr); 1047 } else { 1048 set_jump_as_return(); 1049 writel(RW_MGR_MRS2, grpaddr); 1050 delay_for_n_mem_clocks(4); 1051 set_jump_as_return(); 1052 writel(RW_MGR_MRS3, grpaddr); 1053 delay_for_n_mem_clocks(4); 1054 set_jump_as_return(); 1055 writel(RW_MGR_MRS1, grpaddr); 1056 delay_for_n_mem_clocks(4); 1057 set_jump_as_return(); 1058 writel(RW_MGR_MRS0_USER, grpaddr); 1059 } 1060 /* 1061 * USER need to wait tMOD (12CK or 15ns) time before issuing 1062 * other commands, but we will have plenty of NIOS cycles before 1063 * actual handoff so its okay. 1064 */ 1065 } 1066 } 1067 1068 /* 1069 * performs a guaranteed read on the patterns we are going to use during a 1070 * read test to ensure memory works 1071 */ 1072 static uint32_t rw_mgr_mem_calibrate_read_test_patterns(uint32_t rank_bgn, 1073 uint32_t group, uint32_t num_tries, uint32_t *bit_chk, 1074 uint32_t all_ranks) 1075 { 1076 uint32_t r, vg; 1077 uint32_t correct_mask_vg; 1078 uint32_t tmp_bit_chk; 1079 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1080 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1081 uint32_t addr; 1082 uint32_t base_rw_mgr; 1083 1084 *bit_chk = param->read_correct_mask; 1085 correct_mask_vg = param->read_correct_mask_vg; 1086 1087 for (r = rank_bgn; r < rank_end; r++) { 1088 if (param->skip_ranks[r]) 1089 /* request to skip the rank */ 1090 continue; 1091 1092 /* set rank */ 1093 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1094 1095 /* Load up a constant bursts of read commands */ 1096 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0); 1097 writel(RW_MGR_GUARANTEED_READ, 1098 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1099 1100 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1); 1101 writel(RW_MGR_GUARANTEED_READ_CONT, 1102 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1103 1104 tmp_bit_chk = 0; 1105 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) { 1106 /* reset the fifos to get pointers to known state */ 1107 1108 writel(0, &phy_mgr_cmd->fifo_reset); 1109 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1110 RW_MGR_RESET_READ_DATAPATH_OFFSET); 1111 1112 tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS 1113 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); 1114 1115 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1116 writel(RW_MGR_GUARANTEED_READ, addr + 1117 ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + 1118 vg) << 2)); 1119 1120 base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); 1121 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & (~base_rw_mgr)); 1122 1123 if (vg == 0) 1124 break; 1125 } 1126 *bit_chk &= tmp_bit_chk; 1127 } 1128 1129 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1130 writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2)); 1131 1132 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1133 debug_cond(DLEVEL == 1, "%s:%d test_load_patterns(%u,ALL) => (%u == %u) =>\ 1134 %lu\n", __func__, __LINE__, group, *bit_chk, param->read_correct_mask, 1135 (long unsigned int)(*bit_chk == param->read_correct_mask)); 1136 return *bit_chk == param->read_correct_mask; 1137 } 1138 1139 static uint32_t rw_mgr_mem_calibrate_read_test_patterns_all_ranks 1140 (uint32_t group, uint32_t num_tries, uint32_t *bit_chk) 1141 { 1142 return rw_mgr_mem_calibrate_read_test_patterns(0, group, 1143 num_tries, bit_chk, 1); 1144 } 1145 1146 /* load up the patterns we are going to use during a read test */ 1147 static void rw_mgr_mem_calibrate_read_load_patterns(uint32_t rank_bgn, 1148 uint32_t all_ranks) 1149 { 1150 uint32_t r; 1151 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1152 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1153 1154 debug("%s:%d\n", __func__, __LINE__); 1155 for (r = rank_bgn; r < rank_end; r++) { 1156 if (param->skip_ranks[r]) 1157 /* request to skip the rank */ 1158 continue; 1159 1160 /* set rank */ 1161 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1162 1163 /* Load up a constant bursts */ 1164 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0); 1165 1166 writel(RW_MGR_GUARANTEED_WRITE_WAIT0, 1167 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1168 1169 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1); 1170 1171 writel(RW_MGR_GUARANTEED_WRITE_WAIT1, 1172 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1173 1174 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2); 1175 1176 writel(RW_MGR_GUARANTEED_WRITE_WAIT2, 1177 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1178 1179 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3); 1180 1181 writel(RW_MGR_GUARANTEED_WRITE_WAIT3, 1182 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 1183 1184 writel(RW_MGR_GUARANTEED_WRITE, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1185 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 1186 } 1187 1188 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1189 } 1190 1191 /* 1192 * try a read and see if it returns correct data back. has dummy reads 1193 * inserted into the mix used to align dqs enable. has more thorough checks 1194 * than the regular read test. 1195 */ 1196 static uint32_t rw_mgr_mem_calibrate_read_test(uint32_t rank_bgn, uint32_t group, 1197 uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk, 1198 uint32_t all_groups, uint32_t all_ranks) 1199 { 1200 uint32_t r, vg; 1201 uint32_t correct_mask_vg; 1202 uint32_t tmp_bit_chk; 1203 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1204 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1205 uint32_t addr; 1206 uint32_t base_rw_mgr; 1207 1208 *bit_chk = param->read_correct_mask; 1209 correct_mask_vg = param->read_correct_mask_vg; 1210 1211 uint32_t quick_read_mode = (((STATIC_CALIB_STEPS) & 1212 CALIB_SKIP_DELAY_SWEEPS) && ENABLE_SUPER_QUICK_CALIBRATION); 1213 1214 for (r = rank_bgn; r < rank_end; r++) { 1215 if (param->skip_ranks[r]) 1216 /* request to skip the rank */ 1217 continue; 1218 1219 /* set rank */ 1220 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1221 1222 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1); 1223 1224 writel(RW_MGR_READ_B2B_WAIT1, 1225 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1226 1227 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2); 1228 writel(RW_MGR_READ_B2B_WAIT2, 1229 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1230 1231 if (quick_read_mode) 1232 writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0); 1233 /* need at least two (1+1) reads to capture failures */ 1234 else if (all_groups) 1235 writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0); 1236 else 1237 writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0); 1238 1239 writel(RW_MGR_READ_B2B, 1240 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1241 if (all_groups) 1242 writel(RW_MGR_MEM_IF_READ_DQS_WIDTH * 1243 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1, 1244 &sdr_rw_load_mgr_regs->load_cntr3); 1245 else 1246 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3); 1247 1248 writel(RW_MGR_READ_B2B, 1249 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 1250 1251 tmp_bit_chk = 0; 1252 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) { 1253 /* reset the fifos to get pointers to known state */ 1254 writel(0, &phy_mgr_cmd->fifo_reset); 1255 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1256 RW_MGR_RESET_READ_DATAPATH_OFFSET); 1257 1258 tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS 1259 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); 1260 1261 if (all_groups) 1262 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_ALL_GROUPS_OFFSET; 1263 else 1264 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1265 1266 writel(RW_MGR_READ_B2B, addr + 1267 ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + 1268 vg) << 2)); 1269 1270 base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); 1271 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr)); 1272 1273 if (vg == 0) 1274 break; 1275 } 1276 *bit_chk &= tmp_bit_chk; 1277 } 1278 1279 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1280 writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2)); 1281 1282 if (all_correct) { 1283 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1284 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ALL,%u) =>\ 1285 (%u == %u) => %lu", __func__, __LINE__, group, 1286 all_groups, *bit_chk, param->read_correct_mask, 1287 (long unsigned int)(*bit_chk == 1288 param->read_correct_mask)); 1289 return *bit_chk == param->read_correct_mask; 1290 } else { 1291 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1292 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ONE,%u) =>\ 1293 (%u != %lu) => %lu\n", __func__, __LINE__, 1294 group, all_groups, *bit_chk, (long unsigned int)0, 1295 (long unsigned int)(*bit_chk != 0x00)); 1296 return *bit_chk != 0x00; 1297 } 1298 } 1299 1300 static uint32_t rw_mgr_mem_calibrate_read_test_all_ranks(uint32_t group, 1301 uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk, 1302 uint32_t all_groups) 1303 { 1304 return rw_mgr_mem_calibrate_read_test(0, group, num_tries, all_correct, 1305 bit_chk, all_groups, 1); 1306 } 1307 1308 static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t *v) 1309 { 1310 writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy); 1311 (*v)++; 1312 } 1313 1314 static void rw_mgr_decr_vfifo(uint32_t grp, uint32_t *v) 1315 { 1316 uint32_t i; 1317 1318 for (i = 0; i < VFIFO_SIZE-1; i++) 1319 rw_mgr_incr_vfifo(grp, v); 1320 } 1321 1322 static int find_vfifo_read(uint32_t grp, uint32_t *bit_chk) 1323 { 1324 uint32_t v; 1325 uint32_t fail_cnt = 0; 1326 uint32_t test_status; 1327 1328 for (v = 0; v < VFIFO_SIZE; ) { 1329 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo %u\n", 1330 __func__, __LINE__, v); 1331 test_status = rw_mgr_mem_calibrate_read_test_all_ranks 1332 (grp, 1, PASS_ONE_BIT, bit_chk, 0); 1333 if (!test_status) { 1334 fail_cnt++; 1335 1336 if (fail_cnt == 2) 1337 break; 1338 } 1339 1340 /* fiddle with FIFO */ 1341 rw_mgr_incr_vfifo(grp, &v); 1342 } 1343 1344 if (v >= VFIFO_SIZE) { 1345 /* no failing read found!! Something must have gone wrong */ 1346 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo failed\n", 1347 __func__, __LINE__); 1348 return 0; 1349 } else { 1350 return v; 1351 } 1352 } 1353 1354 static int find_working_phase(uint32_t *grp, uint32_t *bit_chk, 1355 uint32_t dtaps_per_ptap, uint32_t *work_bgn, 1356 uint32_t *v, uint32_t *d, uint32_t *p, 1357 uint32_t *i, uint32_t *max_working_cnt) 1358 { 1359 uint32_t found_begin = 0; 1360 uint32_t tmp_delay = 0; 1361 uint32_t test_status; 1362 1363 for (*d = 0; *d <= dtaps_per_ptap; (*d)++, tmp_delay += 1364 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1365 *work_bgn = tmp_delay; 1366 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1367 1368 for (*i = 0; *i < VFIFO_SIZE; (*i)++) { 1369 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_bgn += 1370 IO_DELAY_PER_OPA_TAP) { 1371 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1372 1373 test_status = 1374 rw_mgr_mem_calibrate_read_test_all_ranks 1375 (*grp, 1, PASS_ONE_BIT, bit_chk, 0); 1376 1377 if (test_status) { 1378 *max_working_cnt = 1; 1379 found_begin = 1; 1380 break; 1381 } 1382 } 1383 1384 if (found_begin) 1385 break; 1386 1387 if (*p > IO_DQS_EN_PHASE_MAX) 1388 /* fiddle with FIFO */ 1389 rw_mgr_incr_vfifo(*grp, v); 1390 } 1391 1392 if (found_begin) 1393 break; 1394 } 1395 1396 if (*i >= VFIFO_SIZE) { 1397 /* cannot find working solution */ 1398 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/\ 1399 ptap/dtap\n", __func__, __LINE__); 1400 return 0; 1401 } else { 1402 return 1; 1403 } 1404 } 1405 1406 static void sdr_backup_phase(uint32_t *grp, uint32_t *bit_chk, 1407 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1408 uint32_t *p, uint32_t *max_working_cnt) 1409 { 1410 uint32_t found_begin = 0; 1411 uint32_t tmp_delay; 1412 1413 /* Special case code for backing up a phase */ 1414 if (*p == 0) { 1415 *p = IO_DQS_EN_PHASE_MAX; 1416 rw_mgr_decr_vfifo(*grp, v); 1417 } else { 1418 (*p)--; 1419 } 1420 tmp_delay = *work_bgn - IO_DELAY_PER_OPA_TAP; 1421 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1422 1423 for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_bgn; 1424 (*d)++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1425 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1426 1427 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1, 1428 PASS_ONE_BIT, 1429 bit_chk, 0)) { 1430 found_begin = 1; 1431 *work_bgn = tmp_delay; 1432 break; 1433 } 1434 } 1435 1436 /* We have found a working dtap before the ptap found above */ 1437 if (found_begin == 1) 1438 (*max_working_cnt)++; 1439 1440 /* 1441 * Restore VFIFO to old state before we decremented it 1442 * (if needed). 1443 */ 1444 (*p)++; 1445 if (*p > IO_DQS_EN_PHASE_MAX) { 1446 *p = 0; 1447 rw_mgr_incr_vfifo(*grp, v); 1448 } 1449 1450 scc_mgr_set_dqs_en_delay_all_ranks(*grp, 0); 1451 } 1452 1453 static int sdr_nonworking_phase(uint32_t *grp, uint32_t *bit_chk, 1454 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1455 uint32_t *p, uint32_t *i, uint32_t *max_working_cnt, 1456 uint32_t *work_end) 1457 { 1458 uint32_t found_end = 0; 1459 1460 (*p)++; 1461 *work_end += IO_DELAY_PER_OPA_TAP; 1462 if (*p > IO_DQS_EN_PHASE_MAX) { 1463 /* fiddle with FIFO */ 1464 *p = 0; 1465 rw_mgr_incr_vfifo(*grp, v); 1466 } 1467 1468 for (; *i < VFIFO_SIZE + 1; (*i)++) { 1469 for (; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_end 1470 += IO_DELAY_PER_OPA_TAP) { 1471 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1472 1473 if (!rw_mgr_mem_calibrate_read_test_all_ranks 1474 (*grp, 1, PASS_ONE_BIT, bit_chk, 0)) { 1475 found_end = 1; 1476 break; 1477 } else { 1478 (*max_working_cnt)++; 1479 } 1480 } 1481 1482 if (found_end) 1483 break; 1484 1485 if (*p > IO_DQS_EN_PHASE_MAX) { 1486 /* fiddle with FIFO */ 1487 rw_mgr_incr_vfifo(*grp, v); 1488 *p = 0; 1489 } 1490 } 1491 1492 if (*i >= VFIFO_SIZE + 1) { 1493 /* cannot see edge of failing read */ 1494 debug_cond(DLEVEL == 2, "%s:%d sdr_nonworking_phase: end:\ 1495 failed\n", __func__, __LINE__); 1496 return 0; 1497 } else { 1498 return 1; 1499 } 1500 } 1501 1502 static int sdr_find_window_centre(uint32_t *grp, uint32_t *bit_chk, 1503 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1504 uint32_t *p, uint32_t *work_mid, 1505 uint32_t *work_end) 1506 { 1507 int i; 1508 int tmp_delay = 0; 1509 1510 *work_mid = (*work_bgn + *work_end) / 2; 1511 1512 debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n", 1513 *work_bgn, *work_end, *work_mid); 1514 /* Get the middle delay to be less than a VFIFO delay */ 1515 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; 1516 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP) 1517 ; 1518 debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay); 1519 while (*work_mid > tmp_delay) 1520 *work_mid -= tmp_delay; 1521 debug_cond(DLEVEL == 2, "new work_mid %d\n", *work_mid); 1522 1523 tmp_delay = 0; 1524 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX && tmp_delay < *work_mid; 1525 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP) 1526 ; 1527 tmp_delay -= IO_DELAY_PER_OPA_TAP; 1528 debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", (*p) - 1, tmp_delay); 1529 for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_mid; (*d)++, 1530 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) 1531 ; 1532 debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", *d, tmp_delay); 1533 1534 scc_mgr_set_dqs_en_phase_all_ranks(*grp, (*p) - 1); 1535 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1536 1537 /* 1538 * push vfifo until we can successfully calibrate. We can do this 1539 * because the largest possible margin in 1 VFIFO cycle. 1540 */ 1541 for (i = 0; i < VFIFO_SIZE; i++) { 1542 debug_cond(DLEVEL == 2, "find_dqs_en_phase: center: vfifo=%u\n", 1543 *v); 1544 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1, 1545 PASS_ONE_BIT, 1546 bit_chk, 0)) { 1547 break; 1548 } 1549 1550 /* fiddle with FIFO */ 1551 rw_mgr_incr_vfifo(*grp, v); 1552 } 1553 1554 if (i >= VFIFO_SIZE) { 1555 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center: \ 1556 failed\n", __func__, __LINE__); 1557 return 0; 1558 } else { 1559 return 1; 1560 } 1561 } 1562 1563 /* find a good dqs enable to use */ 1564 static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp) 1565 { 1566 uint32_t v, d, p, i; 1567 uint32_t max_working_cnt; 1568 uint32_t bit_chk; 1569 uint32_t dtaps_per_ptap; 1570 uint32_t work_bgn, work_mid, work_end; 1571 uint32_t found_passing_read, found_failing_read, initial_failing_dtap; 1572 1573 debug("%s:%d %u\n", __func__, __LINE__, grp); 1574 1575 reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); 1576 1577 scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); 1578 scc_mgr_set_dqs_en_phase_all_ranks(grp, 0); 1579 1580 /* ************************************************************** */ 1581 /* * Step 0 : Determine number of delay taps for each phase tap * */ 1582 dtaps_per_ptap = IO_DELAY_PER_OPA_TAP/IO_DELAY_PER_DQS_EN_DCHAIN_TAP; 1583 1584 /* ********************************************************* */ 1585 /* * Step 1 : First push vfifo until we get a failing read * */ 1586 v = find_vfifo_read(grp, &bit_chk); 1587 1588 max_working_cnt = 0; 1589 1590 /* ******************************************************** */ 1591 /* * step 2: find first working phase, increment in ptaps * */ 1592 work_bgn = 0; 1593 if (find_working_phase(&grp, &bit_chk, dtaps_per_ptap, &work_bgn, &v, &d, 1594 &p, &i, &max_working_cnt) == 0) 1595 return 0; 1596 1597 work_end = work_bgn; 1598 1599 /* 1600 * If d is 0 then the working window covers a phase tap and 1601 * we can follow the old procedure otherwise, we've found the beginning, 1602 * and we need to increment the dtaps until we find the end. 1603 */ 1604 if (d == 0) { 1605 /* ********************************************************* */ 1606 /* * step 3a: if we have room, back off by one and 1607 increment in dtaps * */ 1608 1609 sdr_backup_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1610 &max_working_cnt); 1611 1612 /* ********************************************************* */ 1613 /* * step 4a: go forward from working phase to non working 1614 phase, increment in ptaps * */ 1615 if (sdr_nonworking_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1616 &i, &max_working_cnt, &work_end) == 0) 1617 return 0; 1618 1619 /* ********************************************************* */ 1620 /* * step 5a: back off one from last, increment in dtaps * */ 1621 1622 /* Special case code for backing up a phase */ 1623 if (p == 0) { 1624 p = IO_DQS_EN_PHASE_MAX; 1625 rw_mgr_decr_vfifo(grp, &v); 1626 } else { 1627 p = p - 1; 1628 } 1629 1630 work_end -= IO_DELAY_PER_OPA_TAP; 1631 scc_mgr_set_dqs_en_phase_all_ranks(grp, p); 1632 1633 /* * The actual increment of dtaps is done outside of 1634 the if/else loop to share code */ 1635 d = 0; 1636 1637 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p: \ 1638 vfifo=%u ptap=%u\n", __func__, __LINE__, 1639 v, p); 1640 } else { 1641 /* ******************************************************* */ 1642 /* * step 3-5b: Find the right edge of the window using 1643 delay taps * */ 1644 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase:vfifo=%u \ 1645 ptap=%u dtap=%u bgn=%u\n", __func__, __LINE__, 1646 v, p, d, work_bgn); 1647 1648 work_end = work_bgn; 1649 1650 /* * The actual increment of dtaps is done outside of the 1651 if/else loop to share code */ 1652 1653 /* Only here to counterbalance a subtract later on which is 1654 not needed if this branch of the algorithm is taken */ 1655 max_working_cnt++; 1656 } 1657 1658 /* The dtap increment to find the failing edge is done here */ 1659 for (; d <= IO_DQS_EN_DELAY_MAX; d++, work_end += 1660 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1661 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \ 1662 end-2: dtap=%u\n", __func__, __LINE__, d); 1663 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1664 1665 if (!rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, 1666 PASS_ONE_BIT, 1667 &bit_chk, 0)) { 1668 break; 1669 } 1670 } 1671 1672 /* Go back to working dtap */ 1673 if (d != 0) 1674 work_end -= IO_DELAY_PER_DQS_EN_DCHAIN_TAP; 1675 1676 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p/d: vfifo=%u \ 1677 ptap=%u dtap=%u end=%u\n", __func__, __LINE__, 1678 v, p, d-1, work_end); 1679 1680 if (work_end < work_bgn) { 1681 /* nil range */ 1682 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: end-2: \ 1683 failed\n", __func__, __LINE__); 1684 return 0; 1685 } 1686 1687 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: found range [%u,%u]\n", 1688 __func__, __LINE__, work_bgn, work_end); 1689 1690 /* *************************************************************** */ 1691 /* 1692 * * We need to calculate the number of dtaps that equal a ptap 1693 * * To do that we'll back up a ptap and re-find the edge of the 1694 * * window using dtaps 1695 */ 1696 1697 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: calculate dtaps_per_ptap \ 1698 for tracking\n", __func__, __LINE__); 1699 1700 /* Special case code for backing up a phase */ 1701 if (p == 0) { 1702 p = IO_DQS_EN_PHASE_MAX; 1703 rw_mgr_decr_vfifo(grp, &v); 1704 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \ 1705 cycle/phase: v=%u p=%u\n", __func__, __LINE__, 1706 v, p); 1707 } else { 1708 p = p - 1; 1709 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \ 1710 phase only: v=%u p=%u", __func__, __LINE__, 1711 v, p); 1712 } 1713 1714 scc_mgr_set_dqs_en_phase_all_ranks(grp, p); 1715 1716 /* 1717 * Increase dtap until we first see a passing read (in case the 1718 * window is smaller than a ptap), 1719 * and then a failing read to mark the edge of the window again 1720 */ 1721 1722 /* Find a passing read */ 1723 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find passing read\n", 1724 __func__, __LINE__); 1725 found_passing_read = 0; 1726 found_failing_read = 0; 1727 initial_failing_dtap = d; 1728 for (; d <= IO_DQS_EN_DELAY_MAX; d++) { 1729 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: testing \ 1730 read d=%u\n", __func__, __LINE__, d); 1731 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1732 1733 if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, 1734 PASS_ONE_BIT, 1735 &bit_chk, 0)) { 1736 found_passing_read = 1; 1737 break; 1738 } 1739 } 1740 1741 if (found_passing_read) { 1742 /* Find a failing read */ 1743 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find failing \ 1744 read\n", __func__, __LINE__); 1745 for (d = d + 1; d <= IO_DQS_EN_DELAY_MAX; d++) { 1746 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \ 1747 testing read d=%u\n", __func__, __LINE__, d); 1748 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1749 1750 if (!rw_mgr_mem_calibrate_read_test_all_ranks 1751 (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { 1752 found_failing_read = 1; 1753 break; 1754 } 1755 } 1756 } else { 1757 debug_cond(DLEVEL == 1, "%s:%d find_dqs_en_phase: failed to \ 1758 calculate dtaps", __func__, __LINE__); 1759 debug_cond(DLEVEL == 1, "per ptap. Fall back on static value\n"); 1760 } 1761 1762 /* 1763 * The dynamically calculated dtaps_per_ptap is only valid if we 1764 * found a passing/failing read. If we didn't, it means d hit the max 1765 * (IO_DQS_EN_DELAY_MAX). Otherwise, dtaps_per_ptap retains its 1766 * statically calculated value. 1767 */ 1768 if (found_passing_read && found_failing_read) 1769 dtaps_per_ptap = d - initial_failing_dtap; 1770 1771 writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap); 1772 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: dtaps_per_ptap=%u \ 1773 - %u = %u", __func__, __LINE__, d, 1774 initial_failing_dtap, dtaps_per_ptap); 1775 1776 /* ******************************************** */ 1777 /* * step 6: Find the centre of the window * */ 1778 if (sdr_find_window_centre(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1779 &work_mid, &work_end) == 0) 1780 return 0; 1781 1782 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center found: \ 1783 vfifo=%u ptap=%u dtap=%u\n", __func__, __LINE__, 1784 v, p-1, d); 1785 return 1; 1786 } 1787 1788 /* 1789 * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different 1790 * dq_in_delay values 1791 */ 1792 static uint32_t 1793 rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay 1794 (uint32_t write_group, uint32_t read_group, uint32_t test_bgn) 1795 { 1796 uint32_t found; 1797 uint32_t i; 1798 uint32_t p; 1799 uint32_t d; 1800 uint32_t r; 1801 1802 const uint32_t delay_step = IO_IO_IN_DELAY_MAX / 1803 (RW_MGR_MEM_DQ_PER_READ_DQS-1); 1804 /* we start at zero, so have one less dq to devide among */ 1805 1806 debug("%s:%d (%u,%u,%u)", __func__, __LINE__, write_group, read_group, 1807 test_bgn); 1808 1809 /* try different dq_in_delays since the dq path is shorter than dqs */ 1810 1811 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 1812 r += NUM_RANKS_PER_SHADOW_REG) { 1813 for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++, d += delay_step) { 1814 debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_\ 1815 vfifo_find_dqs_", __func__, __LINE__); 1816 debug_cond(DLEVEL == 1, "en_phase_sweep_dq_in_delay: g=%u/%u ", 1817 write_group, read_group); 1818 debug_cond(DLEVEL == 1, "r=%u, i=%u p=%u d=%u\n", r, i , p, d); 1819 scc_mgr_set_dq_in_delay(p, d); 1820 scc_mgr_load_dq(p); 1821 } 1822 writel(0, &sdr_scc_mgr->update); 1823 } 1824 1825 found = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(read_group); 1826 1827 debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_vfifo_find_dqs_\ 1828 en_phase_sweep_dq", __func__, __LINE__); 1829 debug_cond(DLEVEL == 1, "_in_delay: g=%u/%u found=%u; Reseting delay \ 1830 chain to zero\n", write_group, read_group, found); 1831 1832 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 1833 r += NUM_RANKS_PER_SHADOW_REG) { 1834 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; 1835 i++, p++) { 1836 scc_mgr_set_dq_in_delay(p, 0); 1837 scc_mgr_load_dq(p); 1838 } 1839 writel(0, &sdr_scc_mgr->update); 1840 } 1841 1842 return found; 1843 } 1844 1845 /* per-bit deskew DQ and center */ 1846 static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn, 1847 uint32_t write_group, uint32_t read_group, uint32_t test_bgn, 1848 uint32_t use_read_test, uint32_t update_fom) 1849 { 1850 uint32_t i, p, d, min_index; 1851 /* 1852 * Store these as signed since there are comparisons with 1853 * signed numbers. 1854 */ 1855 uint32_t bit_chk; 1856 uint32_t sticky_bit_chk; 1857 int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; 1858 int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; 1859 int32_t final_dq[RW_MGR_MEM_DQ_PER_READ_DQS]; 1860 int32_t mid; 1861 int32_t orig_mid_min, mid_min; 1862 int32_t new_dqs, start_dqs, start_dqs_en, shift_dq, final_dqs, 1863 final_dqs_en; 1864 int32_t dq_margin, dqs_margin; 1865 uint32_t stop; 1866 uint32_t temp_dq_in_delay1, temp_dq_in_delay2; 1867 uint32_t addr; 1868 1869 debug("%s:%d: %u %u", __func__, __LINE__, read_group, test_bgn); 1870 1871 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_IN_DELAY_OFFSET; 1872 start_dqs = readl(addr + (read_group << 2)); 1873 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) 1874 start_dqs_en = readl(addr + ((read_group << 2) 1875 - IO_DQS_EN_DELAY_OFFSET)); 1876 1877 /* set the left and right edge of each bit to an illegal value */ 1878 /* use (IO_IO_IN_DELAY_MAX + 1) as an illegal value */ 1879 sticky_bit_chk = 0; 1880 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 1881 left_edge[i] = IO_IO_IN_DELAY_MAX + 1; 1882 right_edge[i] = IO_IO_IN_DELAY_MAX + 1; 1883 } 1884 1885 /* Search for the left edge of the window for each bit */ 1886 for (d = 0; d <= IO_IO_IN_DELAY_MAX; d++) { 1887 scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, d); 1888 1889 writel(0, &sdr_scc_mgr->update); 1890 1891 /* 1892 * Stop searching when the read test doesn't pass AND when 1893 * we've seen a passing read on every bit. 1894 */ 1895 if (use_read_test) { 1896 stop = !rw_mgr_mem_calibrate_read_test(rank_bgn, 1897 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT, 1898 &bit_chk, 0, 0); 1899 } else { 1900 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1901 0, PASS_ONE_BIT, 1902 &bit_chk, 0); 1903 bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * 1904 (read_group - (write_group * 1905 RW_MGR_MEM_IF_READ_DQS_WIDTH / 1906 RW_MGR_MEM_IF_WRITE_DQS_WIDTH))); 1907 stop = (bit_chk == 0); 1908 } 1909 sticky_bit_chk = sticky_bit_chk | bit_chk; 1910 stop = stop && (sticky_bit_chk == param->read_correct_mask); 1911 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(left): dtap=%u => %u == %u \ 1912 && %u", __func__, __LINE__, d, 1913 sticky_bit_chk, 1914 param->read_correct_mask, stop); 1915 1916 if (stop == 1) { 1917 break; 1918 } else { 1919 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 1920 if (bit_chk & 1) { 1921 /* Remember a passing test as the 1922 left_edge */ 1923 left_edge[i] = d; 1924 } else { 1925 /* If a left edge has not been seen yet, 1926 then a future passing test will mark 1927 this edge as the right edge */ 1928 if (left_edge[i] == 1929 IO_IO_IN_DELAY_MAX + 1) { 1930 right_edge[i] = -(d + 1); 1931 } 1932 } 1933 bit_chk = bit_chk >> 1; 1934 } 1935 } 1936 } 1937 1938 /* Reset DQ delay chains to 0 */ 1939 scc_mgr_apply_group_dq_in_delay(test_bgn, 0); 1940 sticky_bit_chk = 0; 1941 for (i = RW_MGR_MEM_DQ_PER_READ_DQS - 1;; i--) { 1942 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \ 1943 %d right_edge[%u]: %d\n", __func__, __LINE__, 1944 i, left_edge[i], i, right_edge[i]); 1945 1946 /* 1947 * Check for cases where we haven't found the left edge, 1948 * which makes our assignment of the the right edge invalid. 1949 * Reset it to the illegal value. 1950 */ 1951 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) && ( 1952 right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { 1953 right_edge[i] = IO_IO_IN_DELAY_MAX + 1; 1954 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: reset \ 1955 right_edge[%u]: %d\n", __func__, __LINE__, 1956 i, right_edge[i]); 1957 } 1958 1959 /* 1960 * Reset sticky bit (except for bits where we have seen 1961 * both the left and right edge). 1962 */ 1963 sticky_bit_chk = sticky_bit_chk << 1; 1964 if ((left_edge[i] != IO_IO_IN_DELAY_MAX + 1) && 1965 (right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { 1966 sticky_bit_chk = sticky_bit_chk | 1; 1967 } 1968 1969 if (i == 0) 1970 break; 1971 } 1972 1973 /* Search for the right edge of the window for each bit */ 1974 for (d = 0; d <= IO_DQS_IN_DELAY_MAX - start_dqs; d++) { 1975 scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs); 1976 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 1977 uint32_t delay = d + start_dqs_en; 1978 if (delay > IO_DQS_EN_DELAY_MAX) 1979 delay = IO_DQS_EN_DELAY_MAX; 1980 scc_mgr_set_dqs_en_delay(read_group, delay); 1981 } 1982 scc_mgr_load_dqs(read_group); 1983 1984 writel(0, &sdr_scc_mgr->update); 1985 1986 /* 1987 * Stop searching when the read test doesn't pass AND when 1988 * we've seen a passing read on every bit. 1989 */ 1990 if (use_read_test) { 1991 stop = !rw_mgr_mem_calibrate_read_test(rank_bgn, 1992 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT, 1993 &bit_chk, 0, 0); 1994 } else { 1995 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1996 0, PASS_ONE_BIT, 1997 &bit_chk, 0); 1998 bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * 1999 (read_group - (write_group * 2000 RW_MGR_MEM_IF_READ_DQS_WIDTH / 2001 RW_MGR_MEM_IF_WRITE_DQS_WIDTH))); 2002 stop = (bit_chk == 0); 2003 } 2004 sticky_bit_chk = sticky_bit_chk | bit_chk; 2005 stop = stop && (sticky_bit_chk == param->read_correct_mask); 2006 2007 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(right): dtap=%u => %u == \ 2008 %u && %u", __func__, __LINE__, d, 2009 sticky_bit_chk, param->read_correct_mask, stop); 2010 2011 if (stop == 1) { 2012 break; 2013 } else { 2014 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 2015 if (bit_chk & 1) { 2016 /* Remember a passing test as 2017 the right_edge */ 2018 right_edge[i] = d; 2019 } else { 2020 if (d != 0) { 2021 /* If a right edge has not been 2022 seen yet, then a future passing 2023 test will mark this edge as the 2024 left edge */ 2025 if (right_edge[i] == 2026 IO_IO_IN_DELAY_MAX + 1) { 2027 left_edge[i] = -(d + 1); 2028 } 2029 } else { 2030 /* d = 0 failed, but it passed 2031 when testing the left edge, 2032 so it must be marginal, 2033 set it to -1 */ 2034 if (right_edge[i] == 2035 IO_IO_IN_DELAY_MAX + 1 && 2036 left_edge[i] != 2037 IO_IO_IN_DELAY_MAX 2038 + 1) { 2039 right_edge[i] = -1; 2040 } 2041 /* If a right edge has not been 2042 seen yet, then a future passing 2043 test will mark this edge as the 2044 left edge */ 2045 else if (right_edge[i] == 2046 IO_IO_IN_DELAY_MAX + 2047 1) { 2048 left_edge[i] = -(d + 1); 2049 } 2050 } 2051 } 2052 2053 debug_cond(DLEVEL == 2, "%s:%d vfifo_center[r,\ 2054 d=%u]: ", __func__, __LINE__, d); 2055 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d ", 2056 (int)(bit_chk & 1), i, left_edge[i]); 2057 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 2058 right_edge[i]); 2059 bit_chk = bit_chk >> 1; 2060 } 2061 } 2062 } 2063 2064 /* Check that all bits have a window */ 2065 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 2066 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \ 2067 %d right_edge[%u]: %d", __func__, __LINE__, 2068 i, left_edge[i], i, right_edge[i]); 2069 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) || (right_edge[i] 2070 == IO_IO_IN_DELAY_MAX + 1)) { 2071 /* 2072 * Restore delay chain settings before letting the loop 2073 * in rw_mgr_mem_calibrate_vfifo to retry different 2074 * dqs/ck relationships. 2075 */ 2076 scc_mgr_set_dqs_bus_in_delay(read_group, start_dqs); 2077 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2078 scc_mgr_set_dqs_en_delay(read_group, 2079 start_dqs_en); 2080 } 2081 scc_mgr_load_dqs(read_group); 2082 writel(0, &sdr_scc_mgr->update); 2083 2084 debug_cond(DLEVEL == 1, "%s:%d vfifo_center: failed to \ 2085 find edge [%u]: %d %d", __func__, __LINE__, 2086 i, left_edge[i], right_edge[i]); 2087 if (use_read_test) { 2088 set_failing_group_stage(read_group * 2089 RW_MGR_MEM_DQ_PER_READ_DQS + i, 2090 CAL_STAGE_VFIFO, 2091 CAL_SUBSTAGE_VFIFO_CENTER); 2092 } else { 2093 set_failing_group_stage(read_group * 2094 RW_MGR_MEM_DQ_PER_READ_DQS + i, 2095 CAL_STAGE_VFIFO_AFTER_WRITES, 2096 CAL_SUBSTAGE_VFIFO_CENTER); 2097 } 2098 return 0; 2099 } 2100 } 2101 2102 /* Find middle of window for each DQ bit */ 2103 mid_min = left_edge[0] - right_edge[0]; 2104 min_index = 0; 2105 for (i = 1; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 2106 mid = left_edge[i] - right_edge[i]; 2107 if (mid < mid_min) { 2108 mid_min = mid; 2109 min_index = i; 2110 } 2111 } 2112 2113 /* 2114 * -mid_min/2 represents the amount that we need to move DQS. 2115 * If mid_min is odd and positive we'll need to add one to 2116 * make sure the rounding in further calculations is correct 2117 * (always bias to the right), so just add 1 for all positive values. 2118 */ 2119 if (mid_min > 0) 2120 mid_min++; 2121 2122 mid_min = mid_min / 2; 2123 2124 debug_cond(DLEVEL == 1, "%s:%d vfifo_center: mid_min=%d (index=%u)\n", 2125 __func__, __LINE__, mid_min, min_index); 2126 2127 /* Determine the amount we can change DQS (which is -mid_min) */ 2128 orig_mid_min = mid_min; 2129 new_dqs = start_dqs - mid_min; 2130 if (new_dqs > IO_DQS_IN_DELAY_MAX) 2131 new_dqs = IO_DQS_IN_DELAY_MAX; 2132 else if (new_dqs < 0) 2133 new_dqs = 0; 2134 2135 mid_min = start_dqs - new_dqs; 2136 debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n", 2137 mid_min, new_dqs); 2138 2139 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2140 if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX) 2141 mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX; 2142 else if (start_dqs_en - mid_min < 0) 2143 mid_min += start_dqs_en - mid_min; 2144 } 2145 new_dqs = start_dqs - mid_min; 2146 2147 debug_cond(DLEVEL == 1, "vfifo_center: start_dqs=%d start_dqs_en=%d \ 2148 new_dqs=%d mid_min=%d\n", start_dqs, 2149 IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1, 2150 new_dqs, mid_min); 2151 2152 /* Initialize data for export structures */ 2153 dqs_margin = IO_IO_IN_DELAY_MAX + 1; 2154 dq_margin = IO_IO_IN_DELAY_MAX + 1; 2155 2156 /* add delay to bring centre of all DQ windows to the same "level" */ 2157 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { 2158 /* Use values before divide by 2 to reduce round off error */ 2159 shift_dq = (left_edge[i] - right_edge[i] - 2160 (left_edge[min_index] - right_edge[min_index]))/2 + 2161 (orig_mid_min - mid_min); 2162 2163 debug_cond(DLEVEL == 2, "vfifo_center: before: \ 2164 shift_dq[%u]=%d\n", i, shift_dq); 2165 2166 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET; 2167 temp_dq_in_delay1 = readl(addr + (p << 2)); 2168 temp_dq_in_delay2 = readl(addr + (i << 2)); 2169 2170 if (shift_dq + (int32_t)temp_dq_in_delay1 > 2171 (int32_t)IO_IO_IN_DELAY_MAX) { 2172 shift_dq = (int32_t)IO_IO_IN_DELAY_MAX - temp_dq_in_delay2; 2173 } else if (shift_dq + (int32_t)temp_dq_in_delay1 < 0) { 2174 shift_dq = -(int32_t)temp_dq_in_delay1; 2175 } 2176 debug_cond(DLEVEL == 2, "vfifo_center: after: \ 2177 shift_dq[%u]=%d\n", i, shift_dq); 2178 final_dq[i] = temp_dq_in_delay1 + shift_dq; 2179 scc_mgr_set_dq_in_delay(p, final_dq[i]); 2180 scc_mgr_load_dq(p); 2181 2182 debug_cond(DLEVEL == 2, "vfifo_center: margin[%u]=[%d,%d]\n", i, 2183 left_edge[i] - shift_dq + (-mid_min), 2184 right_edge[i] + shift_dq - (-mid_min)); 2185 /* To determine values for export structures */ 2186 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) 2187 dq_margin = left_edge[i] - shift_dq + (-mid_min); 2188 2189 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin) 2190 dqs_margin = right_edge[i] + shift_dq - (-mid_min); 2191 } 2192 2193 final_dqs = new_dqs; 2194 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) 2195 final_dqs_en = start_dqs_en - mid_min; 2196 2197 /* Move DQS-en */ 2198 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2199 scc_mgr_set_dqs_en_delay(read_group, final_dqs_en); 2200 scc_mgr_load_dqs(read_group); 2201 } 2202 2203 /* Move DQS */ 2204 scc_mgr_set_dqs_bus_in_delay(read_group, final_dqs); 2205 scc_mgr_load_dqs(read_group); 2206 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: dq_margin=%d \ 2207 dqs_margin=%d", __func__, __LINE__, 2208 dq_margin, dqs_margin); 2209 2210 /* 2211 * Do not remove this line as it makes sure all of our decisions 2212 * have been applied. Apply the update bit. 2213 */ 2214 writel(0, &sdr_scc_mgr->update); 2215 2216 return (dq_margin >= 0) && (dqs_margin >= 0); 2217 } 2218 2219 /* 2220 * calibrate the read valid prediction FIFO. 2221 * 2222 * - read valid prediction will consist of finding a good DQS enable phase, 2223 * DQS enable delay, DQS input phase, and DQS input delay. 2224 * - we also do a per-bit deskew on the DQ lines. 2225 */ 2226 static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group, 2227 uint32_t test_bgn) 2228 { 2229 uint32_t p, d, rank_bgn, sr; 2230 uint32_t dtaps_per_ptap; 2231 uint32_t tmp_delay; 2232 uint32_t bit_chk; 2233 uint32_t grp_calibrated; 2234 uint32_t write_group, write_test_bgn; 2235 uint32_t failed_substage; 2236 2237 debug("%s:%d: %u %u\n", __func__, __LINE__, read_group, test_bgn); 2238 2239 /* update info for sims */ 2240 reg_file_set_stage(CAL_STAGE_VFIFO); 2241 2242 write_group = read_group; 2243 write_test_bgn = test_bgn; 2244 2245 /* USER Determine number of delay taps for each phase tap */ 2246 dtaps_per_ptap = 0; 2247 tmp_delay = 0; 2248 while (tmp_delay < IO_DELAY_PER_OPA_TAP) { 2249 dtaps_per_ptap++; 2250 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP; 2251 } 2252 dtaps_per_ptap--; 2253 tmp_delay = 0; 2254 2255 /* update info for sims */ 2256 reg_file_set_group(read_group); 2257 2258 grp_calibrated = 0; 2259 2260 reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ); 2261 failed_substage = CAL_SUBSTAGE_GUARANTEED_READ; 2262 2263 for (d = 0; d <= dtaps_per_ptap && grp_calibrated == 0; d += 2) { 2264 /* 2265 * In RLDRAMX we may be messing the delay of pins in 2266 * the same write group but outside of the current read 2267 * the group, but that's ok because we haven't 2268 * calibrated output side yet. 2269 */ 2270 if (d > 0) { 2271 scc_mgr_apply_group_all_out_delay_add_all_ranks 2272 (write_group, write_test_bgn, d); 2273 } 2274 2275 for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX && grp_calibrated == 0; 2276 p++) { 2277 /* set a particular dqdqs phase */ 2278 scc_mgr_set_dqdqs_output_phase_all_ranks(read_group, p); 2279 2280 debug_cond(DLEVEL == 1, "%s:%d calibrate_vfifo: g=%u \ 2281 p=%u d=%u\n", __func__, __LINE__, 2282 read_group, p, d); 2283 2284 /* 2285 * Load up the patterns used by read calibration 2286 * using current DQDQS phase. 2287 */ 2288 rw_mgr_mem_calibrate_read_load_patterns(0, 1); 2289 if (!(gbl->phy_debug_mode_flags & 2290 PHY_DEBUG_DISABLE_GUARANTEED_READ)) { 2291 if (!rw_mgr_mem_calibrate_read_test_patterns_all_ranks 2292 (read_group, 1, &bit_chk)) { 2293 debug_cond(DLEVEL == 1, "%s:%d Guaranteed read test failed:", 2294 __func__, __LINE__); 2295 debug_cond(DLEVEL == 1, " g=%u p=%u d=%u\n", 2296 read_group, p, d); 2297 break; 2298 } 2299 } 2300 2301 /* case:56390 */ 2302 grp_calibrated = 1; 2303 if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay 2304 (write_group, read_group, test_bgn)) { 2305 /* 2306 * USER Read per-bit deskew can be done on a 2307 * per shadow register basis. 2308 */ 2309 for (rank_bgn = 0, sr = 0; 2310 rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; 2311 rank_bgn += NUM_RANKS_PER_SHADOW_REG, 2312 ++sr) { 2313 /* 2314 * Determine if this set of ranks 2315 * should be skipped entirely. 2316 */ 2317 if (!param->skip_shadow_regs[sr]) { 2318 /* 2319 * If doing read after write 2320 * calibration, do not update 2321 * FOM, now - do it then. 2322 */ 2323 if (!rw_mgr_mem_calibrate_vfifo_center 2324 (rank_bgn, write_group, 2325 read_group, test_bgn, 1, 0)) { 2326 grp_calibrated = 0; 2327 failed_substage = 2328 CAL_SUBSTAGE_VFIFO_CENTER; 2329 } 2330 } 2331 } 2332 } else { 2333 grp_calibrated = 0; 2334 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE; 2335 } 2336 } 2337 } 2338 2339 if (grp_calibrated == 0) { 2340 set_failing_group_stage(write_group, CAL_STAGE_VFIFO, 2341 failed_substage); 2342 return 0; 2343 } 2344 2345 /* 2346 * Reset the delay chains back to zero if they have moved > 1 2347 * (check for > 1 because loop will increase d even when pass in 2348 * first case). 2349 */ 2350 if (d > 2) 2351 scc_mgr_zero_group(write_group, 1); 2352 2353 return 1; 2354 } 2355 2356 /* VFIFO Calibration -- Read Deskew Calibration after write deskew */ 2357 static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group, 2358 uint32_t test_bgn) 2359 { 2360 uint32_t rank_bgn, sr; 2361 uint32_t grp_calibrated; 2362 uint32_t write_group; 2363 2364 debug("%s:%d %u %u", __func__, __LINE__, read_group, test_bgn); 2365 2366 /* update info for sims */ 2367 2368 reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES); 2369 reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); 2370 2371 write_group = read_group; 2372 2373 /* update info for sims */ 2374 reg_file_set_group(read_group); 2375 2376 grp_calibrated = 1; 2377 /* Read per-bit deskew can be done on a per shadow register basis */ 2378 for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; 2379 rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) { 2380 /* Determine if this set of ranks should be skipped entirely */ 2381 if (!param->skip_shadow_regs[sr]) { 2382 /* This is the last calibration round, update FOM here */ 2383 if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn, 2384 write_group, 2385 read_group, 2386 test_bgn, 0, 2387 1)) { 2388 grp_calibrated = 0; 2389 } 2390 } 2391 } 2392 2393 2394 if (grp_calibrated == 0) { 2395 set_failing_group_stage(write_group, 2396 CAL_STAGE_VFIFO_AFTER_WRITES, 2397 CAL_SUBSTAGE_VFIFO_CENTER); 2398 return 0; 2399 } 2400 2401 return 1; 2402 } 2403 2404 /* Calibrate LFIFO to find smallest read latency */ 2405 static uint32_t rw_mgr_mem_calibrate_lfifo(void) 2406 { 2407 uint32_t found_one; 2408 uint32_t bit_chk; 2409 2410 debug("%s:%d\n", __func__, __LINE__); 2411 2412 /* update info for sims */ 2413 reg_file_set_stage(CAL_STAGE_LFIFO); 2414 reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY); 2415 2416 /* Load up the patterns used by read calibration for all ranks */ 2417 rw_mgr_mem_calibrate_read_load_patterns(0, 1); 2418 found_one = 0; 2419 2420 do { 2421 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 2422 debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u", 2423 __func__, __LINE__, gbl->curr_read_lat); 2424 2425 if (!rw_mgr_mem_calibrate_read_test_all_ranks(0, 2426 NUM_READ_TESTS, 2427 PASS_ALL_BITS, 2428 &bit_chk, 1)) { 2429 break; 2430 } 2431 2432 found_one = 1; 2433 /* reduce read latency and see if things are working */ 2434 /* correctly */ 2435 gbl->curr_read_lat--; 2436 } while (gbl->curr_read_lat > 0); 2437 2438 /* reset the fifos to get pointers to known state */ 2439 2440 writel(0, &phy_mgr_cmd->fifo_reset); 2441 2442 if (found_one) { 2443 /* add a fudge factor to the read latency that was determined */ 2444 gbl->curr_read_lat += 2; 2445 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 2446 debug_cond(DLEVEL == 2, "%s:%d lfifo: success: using \ 2447 read_lat=%u\n", __func__, __LINE__, 2448 gbl->curr_read_lat); 2449 return 1; 2450 } else { 2451 set_failing_group_stage(0xff, CAL_STAGE_LFIFO, 2452 CAL_SUBSTAGE_READ_LATENCY); 2453 2454 debug_cond(DLEVEL == 2, "%s:%d lfifo: failed at initial \ 2455 read_lat=%u\n", __func__, __LINE__, 2456 gbl->curr_read_lat); 2457 return 0; 2458 } 2459 } 2460 2461 /* 2462 * issue write test command. 2463 * two variants are provided. one that just tests a write pattern and 2464 * another that tests datamask functionality. 2465 */ 2466 static void rw_mgr_mem_calibrate_write_test_issue(uint32_t group, 2467 uint32_t test_dm) 2468 { 2469 uint32_t mcc_instruction; 2470 uint32_t quick_write_mode = (((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES) && 2471 ENABLE_SUPER_QUICK_CALIBRATION); 2472 uint32_t rw_wl_nop_cycles; 2473 uint32_t addr; 2474 2475 /* 2476 * Set counter and jump addresses for the right 2477 * number of NOP cycles. 2478 * The number of supported NOP cycles can range from -1 to infinity 2479 * Three different cases are handled: 2480 * 2481 * 1. For a number of NOP cycles greater than 0, the RW Mgr looping 2482 * mechanism will be used to insert the right number of NOPs 2483 * 2484 * 2. For a number of NOP cycles equals to 0, the micro-instruction 2485 * issuing the write command will jump straight to the 2486 * micro-instruction that turns on DQS (for DDRx), or outputs write 2487 * data (for RLD), skipping 2488 * the NOP micro-instruction all together 2489 * 2490 * 3. A number of NOP cycles equal to -1 indicates that DQS must be 2491 * turned on in the same micro-instruction that issues the write 2492 * command. Then we need 2493 * to directly jump to the micro-instruction that sends out the data 2494 * 2495 * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters 2496 * (2 and 3). One jump-counter (0) is used to perform multiple 2497 * write-read operations. 2498 * one counter left to issue this command in "multiple-group" mode 2499 */ 2500 2501 rw_wl_nop_cycles = gbl->rw_wl_nop_cycles; 2502 2503 if (rw_wl_nop_cycles == -1) { 2504 /* 2505 * CNTR 2 - We want to execute the special write operation that 2506 * turns on DQS right away and then skip directly to the 2507 * instruction that sends out the data. We set the counter to a 2508 * large number so that the jump is always taken. 2509 */ 2510 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); 2511 2512 /* CNTR 3 - Not used */ 2513 if (test_dm) { 2514 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1; 2515 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA, 2516 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2517 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP, 2518 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2519 } else { 2520 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0_WL_1; 2521 writel(RW_MGR_LFSR_WR_RD_BANK_0_DATA, 2522 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2523 writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP, 2524 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2525 } 2526 } else if (rw_wl_nop_cycles == 0) { 2527 /* 2528 * CNTR 2 - We want to skip the NOP operation and go straight 2529 * to the DQS enable instruction. We set the counter to a large 2530 * number so that the jump is always taken. 2531 */ 2532 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); 2533 2534 /* CNTR 3 - Not used */ 2535 if (test_dm) { 2536 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0; 2537 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS, 2538 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2539 } else { 2540 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0; 2541 writel(RW_MGR_LFSR_WR_RD_BANK_0_DQS, 2542 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2543 } 2544 } else { 2545 /* 2546 * CNTR 2 - In this case we want to execute the next instruction 2547 * and NOT take the jump. So we set the counter to 0. The jump 2548 * address doesn't count. 2549 */ 2550 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2); 2551 writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2552 2553 /* 2554 * CNTR 3 - Set the nop counter to the number of cycles we 2555 * need to loop for, minus 1. 2556 */ 2557 writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3); 2558 if (test_dm) { 2559 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0; 2560 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP, 2561 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2562 } else { 2563 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0; 2564 writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP, 2565 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2566 } 2567 } 2568 2569 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 2570 RW_MGR_RESET_READ_DATAPATH_OFFSET); 2571 2572 if (quick_write_mode) 2573 writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0); 2574 else 2575 writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0); 2576 2577 writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0); 2578 2579 /* 2580 * CNTR 1 - This is used to ensure enough time elapses 2581 * for read data to come back. 2582 */ 2583 writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1); 2584 2585 if (test_dm) { 2586 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT, 2587 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 2588 } else { 2589 writel(RW_MGR_LFSR_WR_RD_BANK_0_WAIT, 2590 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 2591 } 2592 2593 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 2594 writel(mcc_instruction, addr + (group << 2)); 2595 } 2596 2597 /* Test writes, can check for a single bit pass or multiple bit pass */ 2598 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, 2599 uint32_t write_group, uint32_t use_dm, uint32_t all_correct, 2600 uint32_t *bit_chk, uint32_t all_ranks) 2601 { 2602 uint32_t r; 2603 uint32_t correct_mask_vg; 2604 uint32_t tmp_bit_chk; 2605 uint32_t vg; 2606 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 2607 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 2608 uint32_t addr_rw_mgr; 2609 uint32_t base_rw_mgr; 2610 2611 *bit_chk = param->write_correct_mask; 2612 correct_mask_vg = param->write_correct_mask_vg; 2613 2614 for (r = rank_bgn; r < rank_end; r++) { 2615 if (param->skip_ranks[r]) { 2616 /* request to skip the rank */ 2617 continue; 2618 } 2619 2620 /* set rank */ 2621 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 2622 2623 tmp_bit_chk = 0; 2624 addr_rw_mgr = SDR_PHYGRP_RWMGRGRP_ADDRESS; 2625 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS-1; ; vg--) { 2626 /* reset the fifos to get pointers to known state */ 2627 writel(0, &phy_mgr_cmd->fifo_reset); 2628 2629 tmp_bit_chk = tmp_bit_chk << 2630 (RW_MGR_MEM_DQ_PER_WRITE_DQS / 2631 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS); 2632 rw_mgr_mem_calibrate_write_test_issue(write_group * 2633 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS+vg, 2634 use_dm); 2635 2636 base_rw_mgr = readl(addr_rw_mgr); 2637 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr)); 2638 if (vg == 0) 2639 break; 2640 } 2641 *bit_chk &= tmp_bit_chk; 2642 } 2643 2644 if (all_correct) { 2645 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 2646 debug_cond(DLEVEL == 2, "write_test(%u,%u,ALL) : %u == \ 2647 %u => %lu", write_group, use_dm, 2648 *bit_chk, param->write_correct_mask, 2649 (long unsigned int)(*bit_chk == 2650 param->write_correct_mask)); 2651 return *bit_chk == param->write_correct_mask; 2652 } else { 2653 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 2654 debug_cond(DLEVEL == 2, "write_test(%u,%u,ONE) : %u != ", 2655 write_group, use_dm, *bit_chk); 2656 debug_cond(DLEVEL == 2, "%lu" " => %lu", (long unsigned int)0, 2657 (long unsigned int)(*bit_chk != 0)); 2658 return *bit_chk != 0x00; 2659 } 2660 } 2661 2662 /* 2663 * center all windows. do per-bit-deskew to possibly increase size of 2664 * certain windows. 2665 */ 2666 static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn, 2667 uint32_t write_group, uint32_t test_bgn) 2668 { 2669 uint32_t i, p, min_index; 2670 int32_t d; 2671 /* 2672 * Store these as signed since there are comparisons with 2673 * signed numbers. 2674 */ 2675 uint32_t bit_chk; 2676 uint32_t sticky_bit_chk; 2677 int32_t left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS]; 2678 int32_t right_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS]; 2679 int32_t mid; 2680 int32_t mid_min, orig_mid_min; 2681 int32_t new_dqs, start_dqs, shift_dq; 2682 int32_t dq_margin, dqs_margin, dm_margin; 2683 uint32_t stop; 2684 uint32_t temp_dq_out1_delay; 2685 uint32_t addr; 2686 2687 debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn); 2688 2689 dm_margin = 0; 2690 2691 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; 2692 start_dqs = readl(addr + 2693 (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2)); 2694 2695 /* per-bit deskew */ 2696 2697 /* 2698 * set the left and right edge of each bit to an illegal value 2699 * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value. 2700 */ 2701 sticky_bit_chk = 0; 2702 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2703 left_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2704 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2705 } 2706 2707 /* Search for the left edge of the window for each bit */ 2708 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++) { 2709 scc_mgr_apply_group_dq_out1_delay(write_group, d); 2710 2711 writel(0, &sdr_scc_mgr->update); 2712 2713 /* 2714 * Stop searching when the read test doesn't pass AND when 2715 * we've seen a passing read on every bit. 2716 */ 2717 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 2718 0, PASS_ONE_BIT, &bit_chk, 0); 2719 sticky_bit_chk = sticky_bit_chk | bit_chk; 2720 stop = stop && (sticky_bit_chk == param->write_correct_mask); 2721 debug_cond(DLEVEL == 2, "write_center(left): dtap=%d => %u \ 2722 == %u && %u [bit_chk= %u ]\n", 2723 d, sticky_bit_chk, param->write_correct_mask, 2724 stop, bit_chk); 2725 2726 if (stop == 1) { 2727 break; 2728 } else { 2729 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2730 if (bit_chk & 1) { 2731 /* 2732 * Remember a passing test as the 2733 * left_edge. 2734 */ 2735 left_edge[i] = d; 2736 } else { 2737 /* 2738 * If a left edge has not been seen 2739 * yet, then a future passing test will 2740 * mark this edge as the right edge. 2741 */ 2742 if (left_edge[i] == 2743 IO_IO_OUT1_DELAY_MAX + 1) { 2744 right_edge[i] = -(d + 1); 2745 } 2746 } 2747 debug_cond(DLEVEL == 2, "write_center[l,d=%d):", d); 2748 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d", 2749 (int)(bit_chk & 1), i, left_edge[i]); 2750 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 2751 right_edge[i]); 2752 bit_chk = bit_chk >> 1; 2753 } 2754 } 2755 } 2756 2757 /* Reset DQ delay chains to 0 */ 2758 scc_mgr_apply_group_dq_out1_delay(0); 2759 sticky_bit_chk = 0; 2760 for (i = RW_MGR_MEM_DQ_PER_WRITE_DQS - 1;; i--) { 2761 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \ 2762 %d right_edge[%u]: %d\n", __func__, __LINE__, 2763 i, left_edge[i], i, right_edge[i]); 2764 2765 /* 2766 * Check for cases where we haven't found the left edge, 2767 * which makes our assignment of the the right edge invalid. 2768 * Reset it to the illegal value. 2769 */ 2770 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) && 2771 (right_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) { 2772 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2773 debug_cond(DLEVEL == 2, "%s:%d write_center: reset \ 2774 right_edge[%u]: %d\n", __func__, __LINE__, 2775 i, right_edge[i]); 2776 } 2777 2778 /* 2779 * Reset sticky bit (except for bits where we have 2780 * seen the left edge). 2781 */ 2782 sticky_bit_chk = sticky_bit_chk << 1; 2783 if ((left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) 2784 sticky_bit_chk = sticky_bit_chk | 1; 2785 2786 if (i == 0) 2787 break; 2788 } 2789 2790 /* Search for the right edge of the window for each bit */ 2791 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - start_dqs; d++) { 2792 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, 2793 d + start_dqs); 2794 2795 writel(0, &sdr_scc_mgr->update); 2796 2797 /* 2798 * Stop searching when the read test doesn't pass AND when 2799 * we've seen a passing read on every bit. 2800 */ 2801 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 2802 0, PASS_ONE_BIT, &bit_chk, 0); 2803 2804 sticky_bit_chk = sticky_bit_chk | bit_chk; 2805 stop = stop && (sticky_bit_chk == param->write_correct_mask); 2806 2807 debug_cond(DLEVEL == 2, "write_center (right): dtap=%u => %u == \ 2808 %u && %u\n", d, sticky_bit_chk, 2809 param->write_correct_mask, stop); 2810 2811 if (stop == 1) { 2812 if (d == 0) { 2813 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; 2814 i++) { 2815 /* d = 0 failed, but it passed when 2816 testing the left edge, so it must be 2817 marginal, set it to -1 */ 2818 if (right_edge[i] == 2819 IO_IO_OUT1_DELAY_MAX + 1 && 2820 left_edge[i] != 2821 IO_IO_OUT1_DELAY_MAX + 1) { 2822 right_edge[i] = -1; 2823 } 2824 } 2825 } 2826 break; 2827 } else { 2828 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2829 if (bit_chk & 1) { 2830 /* 2831 * Remember a passing test as 2832 * the right_edge. 2833 */ 2834 right_edge[i] = d; 2835 } else { 2836 if (d != 0) { 2837 /* 2838 * If a right edge has not 2839 * been seen yet, then a future 2840 * passing test will mark this 2841 * edge as the left edge. 2842 */ 2843 if (right_edge[i] == 2844 IO_IO_OUT1_DELAY_MAX + 1) 2845 left_edge[i] = -(d + 1); 2846 } else { 2847 /* 2848 * d = 0 failed, but it passed 2849 * when testing the left edge, 2850 * so it must be marginal, set 2851 * it to -1. 2852 */ 2853 if (right_edge[i] == 2854 IO_IO_OUT1_DELAY_MAX + 1 && 2855 left_edge[i] != 2856 IO_IO_OUT1_DELAY_MAX + 1) 2857 right_edge[i] = -1; 2858 /* 2859 * If a right edge has not been 2860 * seen yet, then a future 2861 * passing test will mark this 2862 * edge as the left edge. 2863 */ 2864 else if (right_edge[i] == 2865 IO_IO_OUT1_DELAY_MAX + 2866 1) 2867 left_edge[i] = -(d + 1); 2868 } 2869 } 2870 debug_cond(DLEVEL == 2, "write_center[r,d=%d):", d); 2871 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d", 2872 (int)(bit_chk & 1), i, left_edge[i]); 2873 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 2874 right_edge[i]); 2875 bit_chk = bit_chk >> 1; 2876 } 2877 } 2878 } 2879 2880 /* Check that all bits have a window */ 2881 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2882 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \ 2883 %d right_edge[%u]: %d", __func__, __LINE__, 2884 i, left_edge[i], i, right_edge[i]); 2885 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) || 2886 (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1)) { 2887 set_failing_group_stage(test_bgn + i, 2888 CAL_STAGE_WRITES, 2889 CAL_SUBSTAGE_WRITES_CENTER); 2890 return 0; 2891 } 2892 } 2893 2894 /* Find middle of window for each DQ bit */ 2895 mid_min = left_edge[0] - right_edge[0]; 2896 min_index = 0; 2897 for (i = 1; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2898 mid = left_edge[i] - right_edge[i]; 2899 if (mid < mid_min) { 2900 mid_min = mid; 2901 min_index = i; 2902 } 2903 } 2904 2905 /* 2906 * -mid_min/2 represents the amount that we need to move DQS. 2907 * If mid_min is odd and positive we'll need to add one to 2908 * make sure the rounding in further calculations is correct 2909 * (always bias to the right), so just add 1 for all positive values. 2910 */ 2911 if (mid_min > 0) 2912 mid_min++; 2913 mid_min = mid_min / 2; 2914 debug_cond(DLEVEL == 1, "%s:%d write_center: mid_min=%d\n", __func__, 2915 __LINE__, mid_min); 2916 2917 /* Determine the amount we can change DQS (which is -mid_min) */ 2918 orig_mid_min = mid_min; 2919 new_dqs = start_dqs; 2920 mid_min = 0; 2921 debug_cond(DLEVEL == 1, "%s:%d write_center: start_dqs=%d new_dqs=%d \ 2922 mid_min=%d\n", __func__, __LINE__, start_dqs, new_dqs, mid_min); 2923 /* Initialize data for export structures */ 2924 dqs_margin = IO_IO_OUT1_DELAY_MAX + 1; 2925 dq_margin = IO_IO_OUT1_DELAY_MAX + 1; 2926 2927 /* add delay to bring centre of all DQ windows to the same "level" */ 2928 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { 2929 /* Use values before divide by 2 to reduce round off error */ 2930 shift_dq = (left_edge[i] - right_edge[i] - 2931 (left_edge[min_index] - right_edge[min_index]))/2 + 2932 (orig_mid_min - mid_min); 2933 2934 debug_cond(DLEVEL == 2, "%s:%d write_center: before: shift_dq \ 2935 [%u]=%d\n", __func__, __LINE__, i, shift_dq); 2936 2937 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; 2938 temp_dq_out1_delay = readl(addr + (i << 2)); 2939 if (shift_dq + (int32_t)temp_dq_out1_delay > 2940 (int32_t)IO_IO_OUT1_DELAY_MAX) { 2941 shift_dq = (int32_t)IO_IO_OUT1_DELAY_MAX - temp_dq_out1_delay; 2942 } else if (shift_dq + (int32_t)temp_dq_out1_delay < 0) { 2943 shift_dq = -(int32_t)temp_dq_out1_delay; 2944 } 2945 debug_cond(DLEVEL == 2, "write_center: after: shift_dq[%u]=%d\n", 2946 i, shift_dq); 2947 scc_mgr_set_dq_out1_delay(i, temp_dq_out1_delay + shift_dq); 2948 scc_mgr_load_dq(i); 2949 2950 debug_cond(DLEVEL == 2, "write_center: margin[%u]=[%d,%d]\n", i, 2951 left_edge[i] - shift_dq + (-mid_min), 2952 right_edge[i] + shift_dq - (-mid_min)); 2953 /* To determine values for export structures */ 2954 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) 2955 dq_margin = left_edge[i] - shift_dq + (-mid_min); 2956 2957 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin) 2958 dqs_margin = right_edge[i] + shift_dq - (-mid_min); 2959 } 2960 2961 /* Move DQS */ 2962 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); 2963 writel(0, &sdr_scc_mgr->update); 2964 2965 /* Centre DM */ 2966 debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__); 2967 2968 /* 2969 * set the left and right edge of each bit to an illegal value, 2970 * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value, 2971 */ 2972 left_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; 2973 right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; 2974 int32_t bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 2975 int32_t end_curr = IO_IO_OUT1_DELAY_MAX + 1; 2976 int32_t bgn_best = IO_IO_OUT1_DELAY_MAX + 1; 2977 int32_t end_best = IO_IO_OUT1_DELAY_MAX + 1; 2978 int32_t win_best = 0; 2979 2980 /* Search for the/part of the window with DM shift */ 2981 for (d = IO_IO_OUT1_DELAY_MAX; d >= 0; d -= DELTA_D) { 2982 scc_mgr_apply_group_dm_out1_delay(d); 2983 writel(0, &sdr_scc_mgr->update); 2984 2985 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, 2986 PASS_ALL_BITS, &bit_chk, 2987 0)) { 2988 /* USE Set current end of the window */ 2989 end_curr = -d; 2990 /* 2991 * If a starting edge of our window has not been seen 2992 * this is our current start of the DM window. 2993 */ 2994 if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) 2995 bgn_curr = -d; 2996 2997 /* 2998 * If current window is bigger than best seen. 2999 * Set best seen to be current window. 3000 */ 3001 if ((end_curr-bgn_curr+1) > win_best) { 3002 win_best = end_curr-bgn_curr+1; 3003 bgn_best = bgn_curr; 3004 end_best = end_curr; 3005 } 3006 } else { 3007 /* We just saw a failing test. Reset temp edge */ 3008 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 3009 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 3010 } 3011 } 3012 3013 3014 /* Reset DM delay chains to 0 */ 3015 scc_mgr_apply_group_dm_out1_delay(0); 3016 3017 /* 3018 * Check to see if the current window nudges up aganist 0 delay. 3019 * If so we need to continue the search by shifting DQS otherwise DQS 3020 * search begins as a new search. */ 3021 if (end_curr != 0) { 3022 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 3023 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 3024 } 3025 3026 /* Search for the/part of the window with DQS shifts */ 3027 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - new_dqs; d += DELTA_D) { 3028 /* 3029 * Note: This only shifts DQS, so are we limiting ourselve to 3030 * width of DQ unnecessarily. 3031 */ 3032 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, 3033 d + new_dqs); 3034 3035 writel(0, &sdr_scc_mgr->update); 3036 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, 3037 PASS_ALL_BITS, &bit_chk, 3038 0)) { 3039 /* USE Set current end of the window */ 3040 end_curr = d; 3041 /* 3042 * If a beginning edge of our window has not been seen 3043 * this is our current begin of the DM window. 3044 */ 3045 if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) 3046 bgn_curr = d; 3047 3048 /* 3049 * If current window is bigger than best seen. Set best 3050 * seen to be current window. 3051 */ 3052 if ((end_curr-bgn_curr+1) > win_best) { 3053 win_best = end_curr-bgn_curr+1; 3054 bgn_best = bgn_curr; 3055 end_best = end_curr; 3056 } 3057 } else { 3058 /* We just saw a failing test. Reset temp edge */ 3059 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 3060 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 3061 3062 /* Early exit optimization: if ther remaining delay 3063 chain space is less than already seen largest window 3064 we can exit */ 3065 if ((win_best-1) > 3066 (IO_IO_OUT1_DELAY_MAX - new_dqs - d)) { 3067 break; 3068 } 3069 } 3070 } 3071 3072 /* assign left and right edge for cal and reporting; */ 3073 left_edge[0] = -1*bgn_best; 3074 right_edge[0] = end_best; 3075 3076 debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n", __func__, 3077 __LINE__, left_edge[0], right_edge[0]); 3078 3079 /* Move DQS (back to orig) */ 3080 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); 3081 3082 /* Move DM */ 3083 3084 /* Find middle of window for the DM bit */ 3085 mid = (left_edge[0] - right_edge[0]) / 2; 3086 3087 /* only move right, since we are not moving DQS/DQ */ 3088 if (mid < 0) 3089 mid = 0; 3090 3091 /* dm_marign should fail if we never find a window */ 3092 if (win_best == 0) 3093 dm_margin = -1; 3094 else 3095 dm_margin = left_edge[0] - mid; 3096 3097 scc_mgr_apply_group_dm_out1_delay(mid); 3098 writel(0, &sdr_scc_mgr->update); 3099 3100 debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d mid=%d \ 3101 dm_margin=%d\n", __func__, __LINE__, left_edge[0], 3102 right_edge[0], mid, dm_margin); 3103 /* Export values */ 3104 gbl->fom_out += dq_margin + dqs_margin; 3105 3106 debug_cond(DLEVEL == 2, "%s:%d write_center: dq_margin=%d \ 3107 dqs_margin=%d dm_margin=%d\n", __func__, __LINE__, 3108 dq_margin, dqs_margin, dm_margin); 3109 3110 /* 3111 * Do not remove this line as it makes sure all of our 3112 * decisions have been applied. 3113 */ 3114 writel(0, &sdr_scc_mgr->update); 3115 return (dq_margin >= 0) && (dqs_margin >= 0) && (dm_margin >= 0); 3116 } 3117 3118 /* calibrate the write operations */ 3119 static uint32_t rw_mgr_mem_calibrate_writes(uint32_t rank_bgn, uint32_t g, 3120 uint32_t test_bgn) 3121 { 3122 /* update info for sims */ 3123 debug("%s:%d %u %u\n", __func__, __LINE__, g, test_bgn); 3124 3125 reg_file_set_stage(CAL_STAGE_WRITES); 3126 reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER); 3127 3128 reg_file_set_group(g); 3129 3130 if (!rw_mgr_mem_calibrate_writes_center(rank_bgn, g, test_bgn)) { 3131 set_failing_group_stage(g, CAL_STAGE_WRITES, 3132 CAL_SUBSTAGE_WRITES_CENTER); 3133 return 0; 3134 } 3135 3136 return 1; 3137 } 3138 3139 /* precharge all banks and activate row 0 in bank "000..." and bank "111..." */ 3140 static void mem_precharge_and_activate(void) 3141 { 3142 uint32_t r; 3143 3144 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { 3145 if (param->skip_ranks[r]) { 3146 /* request to skip the rank */ 3147 continue; 3148 } 3149 3150 /* set rank */ 3151 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 3152 3153 /* precharge all banks ... */ 3154 writel(RW_MGR_PRECHARGE_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS | 3155 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 3156 3157 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0); 3158 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT1, 3159 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 3160 3161 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1); 3162 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT2, 3163 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 3164 3165 /* activate rows */ 3166 writel(RW_MGR_ACTIVATE_0_AND_1, SDR_PHYGRP_RWMGRGRP_ADDRESS | 3167 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 3168 } 3169 } 3170 3171 /* Configure various memory related parameters. */ 3172 static void mem_config(void) 3173 { 3174 uint32_t rlat, wlat; 3175 uint32_t rw_wl_nop_cycles; 3176 uint32_t max_latency; 3177 3178 debug("%s:%d\n", __func__, __LINE__); 3179 /* read in write and read latency */ 3180 wlat = readl(&data_mgr->t_wl_add); 3181 wlat += readl(&data_mgr->mem_t_add); 3182 3183 /* WL for hard phy does not include additive latency */ 3184 3185 /* 3186 * add addtional write latency to offset the address/command extra 3187 * clock cycle. We change the AC mux setting causing AC to be delayed 3188 * by one mem clock cycle. Only do this for DDR3 3189 */ 3190 wlat = wlat + 1; 3191 3192 rlat = readl(&data_mgr->t_rl_add); 3193 3194 rw_wl_nop_cycles = wlat - 2; 3195 gbl->rw_wl_nop_cycles = rw_wl_nop_cycles; 3196 3197 /* 3198 * For AV/CV, lfifo is hardened and always runs at full rate so 3199 * max latency in AFI clocks, used here, is correspondingly smaller. 3200 */ 3201 max_latency = (1<<MAX_LATENCY_COUNT_WIDTH)/1 - 1; 3202 /* configure for a burst length of 8 */ 3203 3204 /* write latency */ 3205 /* Adjust Write Latency for Hard PHY */ 3206 wlat = wlat + 1; 3207 3208 /* set a pretty high read latency initially */ 3209 gbl->curr_read_lat = rlat + 16; 3210 3211 if (gbl->curr_read_lat > max_latency) 3212 gbl->curr_read_lat = max_latency; 3213 3214 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 3215 3216 /* advertise write latency */ 3217 gbl->curr_write_lat = wlat; 3218 writel(wlat - 2, &phy_mgr_cfg->afi_wlat); 3219 3220 /* initialize bit slips */ 3221 mem_precharge_and_activate(); 3222 } 3223 3224 /* Set VFIFO and LFIFO to instant-on settings in skip calibration mode */ 3225 static void mem_skip_calibrate(void) 3226 { 3227 uint32_t vfifo_offset; 3228 uint32_t i, j, r; 3229 3230 debug("%s:%d\n", __func__, __LINE__); 3231 /* Need to update every shadow register set used by the interface */ 3232 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 3233 r += NUM_RANKS_PER_SHADOW_REG) { 3234 /* 3235 * Set output phase alignment settings appropriate for 3236 * skip calibration. 3237 */ 3238 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3239 scc_mgr_set_dqs_en_phase(i, 0); 3240 #if IO_DLL_CHAIN_LENGTH == 6 3241 scc_mgr_set_dqdqs_output_phase(i, 6); 3242 #else 3243 scc_mgr_set_dqdqs_output_phase(i, 7); 3244 #endif 3245 /* 3246 * Case:33398 3247 * 3248 * Write data arrives to the I/O two cycles before write 3249 * latency is reached (720 deg). 3250 * -> due to bit-slip in a/c bus 3251 * -> to allow board skew where dqs is longer than ck 3252 * -> how often can this happen!? 3253 * -> can claim back some ptaps for high freq 3254 * support if we can relax this, but i digress... 3255 * 3256 * The write_clk leads mem_ck by 90 deg 3257 * The minimum ptap of the OPA is 180 deg 3258 * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay 3259 * The write_clk is always delayed by 2 ptaps 3260 * 3261 * Hence, to make DQS aligned to CK, we need to delay 3262 * DQS by: 3263 * (720 - 90 - 180 - 2 * (360 / IO_DLL_CHAIN_LENGTH)) 3264 * 3265 * Dividing the above by (360 / IO_DLL_CHAIN_LENGTH) 3266 * gives us the number of ptaps, which simplies to: 3267 * 3268 * (1.25 * IO_DLL_CHAIN_LENGTH - 2) 3269 */ 3270 scc_mgr_set_dqdqs_output_phase(i, (1.25 * 3271 IO_DLL_CHAIN_LENGTH - 2)); 3272 } 3273 writel(0xff, &sdr_scc_mgr->dqs_ena); 3274 writel(0xff, &sdr_scc_mgr->dqs_io_ena); 3275 3276 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { 3277 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS | 3278 SCC_MGR_GROUP_COUNTER_OFFSET); 3279 } 3280 writel(0xff, &sdr_scc_mgr->dq_ena); 3281 writel(0xff, &sdr_scc_mgr->dm_ena); 3282 writel(0, &sdr_scc_mgr->update); 3283 } 3284 3285 /* Compensate for simulation model behaviour */ 3286 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3287 scc_mgr_set_dqs_bus_in_delay(i, 10); 3288 scc_mgr_load_dqs(i); 3289 } 3290 writel(0, &sdr_scc_mgr->update); 3291 3292 /* 3293 * ArriaV has hard FIFOs that can only be initialized by incrementing 3294 * in sequencer. 3295 */ 3296 vfifo_offset = CALIB_VFIFO_OFFSET; 3297 for (j = 0; j < vfifo_offset; j++) { 3298 writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy); 3299 } 3300 writel(0, &phy_mgr_cmd->fifo_reset); 3301 3302 /* 3303 * For ACV with hard lfifo, we get the skip-cal setting from 3304 * generation-time constant. 3305 */ 3306 gbl->curr_read_lat = CALIB_LFIFO_OFFSET; 3307 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 3308 } 3309 3310 /* Memory calibration entry point */ 3311 static uint32_t mem_calibrate(void) 3312 { 3313 uint32_t i; 3314 uint32_t rank_bgn, sr; 3315 uint32_t write_group, write_test_bgn; 3316 uint32_t read_group, read_test_bgn; 3317 uint32_t run_groups, current_run; 3318 uint32_t failing_groups = 0; 3319 uint32_t group_failed = 0; 3320 uint32_t sr_failed = 0; 3321 3322 debug("%s:%d\n", __func__, __LINE__); 3323 /* Initialize the data settings */ 3324 3325 gbl->error_substage = CAL_SUBSTAGE_NIL; 3326 gbl->error_stage = CAL_STAGE_NIL; 3327 gbl->error_group = 0xff; 3328 gbl->fom_in = 0; 3329 gbl->fom_out = 0; 3330 3331 mem_config(); 3332 3333 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3334 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS | 3335 SCC_MGR_GROUP_COUNTER_OFFSET); 3336 /* Only needed once to set all groups, pins, DQ, DQS, DM. */ 3337 if (i == 0) 3338 scc_mgr_set_hhp_extras(); 3339 3340 scc_set_bypass_mode(i); 3341 } 3342 3343 if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) { 3344 /* 3345 * Set VFIFO and LFIFO to instant-on settings in skip 3346 * calibration mode. 3347 */ 3348 mem_skip_calibrate(); 3349 } else { 3350 for (i = 0; i < NUM_CALIB_REPEAT; i++) { 3351 /* 3352 * Zero all delay chain/phase settings for all 3353 * groups and all shadow register sets. 3354 */ 3355 scc_mgr_zero_all(); 3356 3357 run_groups = ~param->skip_groups; 3358 3359 for (write_group = 0, write_test_bgn = 0; write_group 3360 < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; write_group++, 3361 write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) { 3362 /* Initialized the group failure */ 3363 group_failed = 0; 3364 3365 current_run = run_groups & ((1 << 3366 RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1); 3367 run_groups = run_groups >> 3368 RW_MGR_NUM_DQS_PER_WRITE_GROUP; 3369 3370 if (current_run == 0) 3371 continue; 3372 3373 writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS | 3374 SCC_MGR_GROUP_COUNTER_OFFSET); 3375 scc_mgr_zero_group(write_group, 0); 3376 3377 for (read_group = write_group * 3378 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3379 RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3380 read_test_bgn = 0; 3381 read_group < (write_group + 1) * 3382 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3383 RW_MGR_MEM_IF_WRITE_DQS_WIDTH && 3384 group_failed == 0; 3385 read_group++, read_test_bgn += 3386 RW_MGR_MEM_DQ_PER_READ_DQS) { 3387 /* Calibrate the VFIFO */ 3388 if (!((STATIC_CALIB_STEPS) & 3389 CALIB_SKIP_VFIFO)) { 3390 if (!rw_mgr_mem_calibrate_vfifo 3391 (read_group, 3392 read_test_bgn)) { 3393 group_failed = 1; 3394 3395 if (!(gbl-> 3396 phy_debug_mode_flags & 3397 PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3398 return 0; 3399 } 3400 } 3401 } 3402 } 3403 3404 /* Calibrate the output side */ 3405 if (group_failed == 0) { 3406 for (rank_bgn = 0, sr = 0; rank_bgn 3407 < RW_MGR_MEM_NUMBER_OF_RANKS; 3408 rank_bgn += 3409 NUM_RANKS_PER_SHADOW_REG, 3410 ++sr) { 3411 sr_failed = 0; 3412 if (!((STATIC_CALIB_STEPS) & 3413 CALIB_SKIP_WRITES)) { 3414 if ((STATIC_CALIB_STEPS) 3415 & CALIB_SKIP_DELAY_SWEEPS) { 3416 /* not needed in quick mode! */ 3417 } else { 3418 /* 3419 * Determine if this set of 3420 * ranks should be skipped 3421 * entirely. 3422 */ 3423 if (!param->skip_shadow_regs[sr]) { 3424 if (!rw_mgr_mem_calibrate_writes 3425 (rank_bgn, write_group, 3426 write_test_bgn)) { 3427 sr_failed = 1; 3428 if (!(gbl-> 3429 phy_debug_mode_flags & 3430 PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3431 return 0; 3432 } 3433 } 3434 } 3435 } 3436 } 3437 if (sr_failed != 0) 3438 group_failed = 1; 3439 } 3440 } 3441 3442 if (group_failed == 0) { 3443 for (read_group = write_group * 3444 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3445 RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3446 read_test_bgn = 0; 3447 read_group < (write_group + 1) 3448 * RW_MGR_MEM_IF_READ_DQS_WIDTH 3449 / RW_MGR_MEM_IF_WRITE_DQS_WIDTH && 3450 group_failed == 0; 3451 read_group++, read_test_bgn += 3452 RW_MGR_MEM_DQ_PER_READ_DQS) { 3453 if (!((STATIC_CALIB_STEPS) & 3454 CALIB_SKIP_WRITES)) { 3455 if (!rw_mgr_mem_calibrate_vfifo_end 3456 (read_group, read_test_bgn)) { 3457 group_failed = 1; 3458 3459 if (!(gbl->phy_debug_mode_flags 3460 & PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3461 return 0; 3462 } 3463 } 3464 } 3465 } 3466 } 3467 3468 if (group_failed != 0) 3469 failing_groups++; 3470 } 3471 3472 /* 3473 * USER If there are any failing groups then report 3474 * the failure. 3475 */ 3476 if (failing_groups != 0) 3477 return 0; 3478 3479 /* Calibrate the LFIFO */ 3480 if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_LFIFO)) { 3481 /* 3482 * If we're skipping groups as part of debug, 3483 * don't calibrate LFIFO. 3484 */ 3485 if (param->skip_groups == 0) { 3486 if (!rw_mgr_mem_calibrate_lfifo()) 3487 return 0; 3488 } 3489 } 3490 } 3491 } 3492 3493 /* 3494 * Do not remove this line as it makes sure all of our decisions 3495 * have been applied. 3496 */ 3497 writel(0, &sdr_scc_mgr->update); 3498 return 1; 3499 } 3500 3501 static uint32_t run_mem_calibrate(void) 3502 { 3503 uint32_t pass; 3504 uint32_t debug_info; 3505 3506 debug("%s:%d\n", __func__, __LINE__); 3507 3508 /* Reset pass/fail status shown on afi_cal_success/fail */ 3509 writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status); 3510 3511 /* stop tracking manger */ 3512 uint32_t ctrlcfg = readl(&sdr_ctrl->ctrl_cfg); 3513 3514 writel(ctrlcfg & 0xFFBFFFFF, &sdr_ctrl->ctrl_cfg); 3515 3516 initialize(); 3517 rw_mgr_mem_initialize(); 3518 3519 pass = mem_calibrate(); 3520 3521 mem_precharge_and_activate(); 3522 writel(0, &phy_mgr_cmd->fifo_reset); 3523 3524 /* 3525 * Handoff: 3526 * Don't return control of the PHY back to AFI when in debug mode. 3527 */ 3528 if ((gbl->phy_debug_mode_flags & PHY_DEBUG_IN_DEBUG_MODE) == 0) { 3529 rw_mgr_mem_handoff(); 3530 /* 3531 * In Hard PHY this is a 2-bit control: 3532 * 0: AFI Mux Select 3533 * 1: DDIO Mux Select 3534 */ 3535 writel(0x2, &phy_mgr_cfg->mux_sel); 3536 } 3537 3538 writel(ctrlcfg, &sdr_ctrl->ctrl_cfg); 3539 3540 if (pass) { 3541 printf("%s: CALIBRATION PASSED\n", __FILE__); 3542 3543 gbl->fom_in /= 2; 3544 gbl->fom_out /= 2; 3545 3546 if (gbl->fom_in > 0xff) 3547 gbl->fom_in = 0xff; 3548 3549 if (gbl->fom_out > 0xff) 3550 gbl->fom_out = 0xff; 3551 3552 /* Update the FOM in the register file */ 3553 debug_info = gbl->fom_in; 3554 debug_info |= gbl->fom_out << 8; 3555 writel(debug_info, &sdr_reg_file->fom); 3556 3557 writel(debug_info, &phy_mgr_cfg->cal_debug_info); 3558 writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status); 3559 } else { 3560 printf("%s: CALIBRATION FAILED\n", __FILE__); 3561 3562 debug_info = gbl->error_stage; 3563 debug_info |= gbl->error_substage << 8; 3564 debug_info |= gbl->error_group << 16; 3565 3566 writel(debug_info, &sdr_reg_file->failing_stage); 3567 writel(debug_info, &phy_mgr_cfg->cal_debug_info); 3568 writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status); 3569 3570 /* Update the failing group/stage in the register file */ 3571 debug_info = gbl->error_stage; 3572 debug_info |= gbl->error_substage << 8; 3573 debug_info |= gbl->error_group << 16; 3574 writel(debug_info, &sdr_reg_file->failing_stage); 3575 } 3576 3577 return pass; 3578 } 3579 3580 /** 3581 * hc_initialize_rom_data() - Initialize ROM data 3582 * 3583 * Initialize ROM data. 3584 */ 3585 static void hc_initialize_rom_data(void) 3586 { 3587 u32 i, addr; 3588 3589 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET; 3590 for (i = 0; i < ARRAY_SIZE(inst_rom_init); i++) 3591 writel(inst_rom_init[i], addr + (i << 2)); 3592 3593 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET; 3594 for (i = 0; i < ARRAY_SIZE(ac_rom_init); i++) 3595 writel(ac_rom_init[i], addr + (i << 2)); 3596 } 3597 3598 /** 3599 * initialize_reg_file() - Initialize SDR register file 3600 * 3601 * Initialize SDR register file. 3602 */ 3603 static void initialize_reg_file(void) 3604 { 3605 /* Initialize the register file with the correct data */ 3606 writel(REG_FILE_INIT_SEQ_SIGNATURE, &sdr_reg_file->signature); 3607 writel(0, &sdr_reg_file->debug_data_addr); 3608 writel(0, &sdr_reg_file->cur_stage); 3609 writel(0, &sdr_reg_file->fom); 3610 writel(0, &sdr_reg_file->failing_stage); 3611 writel(0, &sdr_reg_file->debug1); 3612 writel(0, &sdr_reg_file->debug2); 3613 } 3614 3615 /** 3616 * initialize_hps_phy() - Initialize HPS PHY 3617 * 3618 * Initialize HPS PHY. 3619 */ 3620 static void initialize_hps_phy(void) 3621 { 3622 uint32_t reg; 3623 /* 3624 * Tracking also gets configured here because it's in the 3625 * same register. 3626 */ 3627 uint32_t trk_sample_count = 7500; 3628 uint32_t trk_long_idle_sample_count = (10 << 16) | 100; 3629 /* 3630 * Format is number of outer loops in the 16 MSB, sample 3631 * count in 16 LSB. 3632 */ 3633 3634 reg = 0; 3635 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2); 3636 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1); 3637 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1); 3638 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1); 3639 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0); 3640 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1); 3641 /* 3642 * This field selects the intrinsic latency to RDATA_EN/FULL path. 3643 * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles. 3644 */ 3645 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0); 3646 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET( 3647 trk_sample_count); 3648 writel(reg, &sdr_ctrl->phy_ctrl0); 3649 3650 reg = 0; 3651 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET( 3652 trk_sample_count >> 3653 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH); 3654 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET( 3655 trk_long_idle_sample_count); 3656 writel(reg, &sdr_ctrl->phy_ctrl1); 3657 3658 reg = 0; 3659 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET( 3660 trk_long_idle_sample_count >> 3661 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH); 3662 writel(reg, &sdr_ctrl->phy_ctrl2); 3663 } 3664 3665 static void initialize_tracking(void) 3666 { 3667 uint32_t concatenated_longidle = 0x0; 3668 uint32_t concatenated_delays = 0x0; 3669 uint32_t concatenated_rw_addr = 0x0; 3670 uint32_t concatenated_refresh = 0x0; 3671 uint32_t trk_sample_count = 7500; 3672 uint32_t dtaps_per_ptap; 3673 uint32_t tmp_delay; 3674 3675 /* 3676 * compute usable version of value in case we skip full 3677 * computation later 3678 */ 3679 dtaps_per_ptap = 0; 3680 tmp_delay = 0; 3681 while (tmp_delay < IO_DELAY_PER_OPA_TAP) { 3682 dtaps_per_ptap++; 3683 tmp_delay += IO_DELAY_PER_DCHAIN_TAP; 3684 } 3685 dtaps_per_ptap--; 3686 3687 concatenated_longidle = concatenated_longidle ^ 10; 3688 /*longidle outer loop */ 3689 concatenated_longidle = concatenated_longidle << 16; 3690 concatenated_longidle = concatenated_longidle ^ 100; 3691 /*longidle sample count */ 3692 concatenated_delays = concatenated_delays ^ 243; 3693 /* trfc, worst case of 933Mhz 4Gb */ 3694 concatenated_delays = concatenated_delays << 8; 3695 concatenated_delays = concatenated_delays ^ 14; 3696 /* trcd, worst case */ 3697 concatenated_delays = concatenated_delays << 8; 3698 concatenated_delays = concatenated_delays ^ 10; 3699 /* vfifo wait */ 3700 concatenated_delays = concatenated_delays << 8; 3701 concatenated_delays = concatenated_delays ^ 4; 3702 /* mux delay */ 3703 3704 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_IDLE; 3705 concatenated_rw_addr = concatenated_rw_addr << 8; 3706 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_ACTIVATE_1; 3707 concatenated_rw_addr = concatenated_rw_addr << 8; 3708 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_SGLE_READ; 3709 concatenated_rw_addr = concatenated_rw_addr << 8; 3710 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_PRECHARGE_ALL; 3711 3712 concatenated_refresh = concatenated_refresh ^ RW_MGR_REFRESH_ALL; 3713 concatenated_refresh = concatenated_refresh << 24; 3714 concatenated_refresh = concatenated_refresh ^ 1000; /* trefi */ 3715 3716 /* Initialize the register file with the correct data */ 3717 writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap); 3718 writel(trk_sample_count, &sdr_reg_file->trk_sample_count); 3719 writel(concatenated_longidle, &sdr_reg_file->trk_longidle); 3720 writel(concatenated_delays, &sdr_reg_file->delays); 3721 writel(concatenated_rw_addr, &sdr_reg_file->trk_rw_mgr_addr); 3722 writel(RW_MGR_MEM_IF_READ_DQS_WIDTH, &sdr_reg_file->trk_read_dqs_width); 3723 writel(concatenated_refresh, &sdr_reg_file->trk_rfsh); 3724 } 3725 3726 int sdram_calibration_full(void) 3727 { 3728 struct param_type my_param; 3729 struct gbl_type my_gbl; 3730 uint32_t pass; 3731 uint32_t i; 3732 3733 param = &my_param; 3734 gbl = &my_gbl; 3735 3736 /* Initialize the debug mode flags */ 3737 gbl->phy_debug_mode_flags = 0; 3738 /* Set the calibration enabled by default */ 3739 gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT; 3740 /* 3741 * Only sweep all groups (regardless of fail state) by default 3742 * Set enabled read test by default. 3743 */ 3744 #if DISABLE_GUARANTEED_READ 3745 gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ; 3746 #endif 3747 /* Initialize the register file */ 3748 initialize_reg_file(); 3749 3750 /* Initialize any PHY CSR */ 3751 initialize_hps_phy(); 3752 3753 scc_mgr_initialize(); 3754 3755 initialize_tracking(); 3756 3757 /* USER Enable all ranks, groups */ 3758 for (i = 0; i < RW_MGR_MEM_NUMBER_OF_RANKS; i++) 3759 param->skip_ranks[i] = 0; 3760 for (i = 0; i < NUM_SHADOW_REGS; ++i) 3761 param->skip_shadow_regs[i] = 0; 3762 param->skip_groups = 0; 3763 3764 printf("%s: Preparing to start memory calibration\n", __FILE__); 3765 3766 debug("%s:%d\n", __func__, __LINE__); 3767 debug_cond(DLEVEL == 1, 3768 "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ", 3769 RW_MGR_MEM_NUMBER_OF_RANKS, RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM, 3770 RW_MGR_MEM_DQ_PER_READ_DQS, RW_MGR_MEM_DQ_PER_WRITE_DQS, 3771 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS, 3772 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS); 3773 debug_cond(DLEVEL == 1, 3774 "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ", 3775 RW_MGR_MEM_IF_READ_DQS_WIDTH, RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3776 RW_MGR_MEM_DATA_WIDTH, RW_MGR_MEM_DATA_MASK_WIDTH, 3777 IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP); 3778 debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u", 3779 IO_DELAY_PER_DQS_EN_DCHAIN_TAP, IO_DLL_CHAIN_LENGTH); 3780 debug_cond(DLEVEL == 1, "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ", 3781 IO_DQS_EN_PHASE_MAX, IO_DQDQS_OUT_PHASE_MAX, 3782 IO_DQS_EN_DELAY_MAX, IO_DQS_IN_DELAY_MAX); 3783 debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ", 3784 IO_IO_IN_DELAY_MAX, IO_IO_OUT1_DELAY_MAX, 3785 IO_IO_OUT2_DELAY_MAX); 3786 debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n", 3787 IO_DQS_IN_RESERVE, IO_DQS_OUT_RESERVE); 3788 3789 hc_initialize_rom_data(); 3790 3791 /* update info for sims */ 3792 reg_file_set_stage(CAL_STAGE_NIL); 3793 reg_file_set_group(0); 3794 3795 /* 3796 * Load global needed for those actions that require 3797 * some dynamic calibration support. 3798 */ 3799 dyn_calib_steps = STATIC_CALIB_STEPS; 3800 /* 3801 * Load global to allow dynamic selection of delay loop settings 3802 * based on calibration mode. 3803 */ 3804 if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS)) 3805 skip_delay_mask = 0xff; 3806 else 3807 skip_delay_mask = 0x0; 3808 3809 pass = run_mem_calibrate(); 3810 3811 printf("%s: Calibration complete\n", __FILE__); 3812 return pass; 3813 } 3814