1 /* 2 * Copyright Altera Corporation (C) 2012-2015 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7 #include <common.h> 8 #include <asm/io.h> 9 #include <asm/arch/sdram.h> 10 #include "sequencer.h" 11 #include "sequencer_auto.h" 12 #include "sequencer_auto_ac_init.h" 13 #include "sequencer_auto_inst_init.h" 14 #include "sequencer_defines.h" 15 16 static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs = 17 (struct socfpga_sdr_rw_load_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800); 18 19 static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs = 20 (struct socfpga_sdr_rw_load_jump_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00); 21 22 static struct socfpga_sdr_reg_file *sdr_reg_file = 23 (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS; 24 25 static struct socfpga_sdr_scc_mgr *sdr_scc_mgr = 26 (struct socfpga_sdr_scc_mgr *)(SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00); 27 28 static struct socfpga_phy_mgr_cmd *phy_mgr_cmd = 29 (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS; 30 31 static struct socfpga_phy_mgr_cfg *phy_mgr_cfg = 32 (struct socfpga_phy_mgr_cfg *)(SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40); 33 34 static struct socfpga_data_mgr *data_mgr = 35 (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS; 36 37 static struct socfpga_sdr_ctrl *sdr_ctrl = 38 (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS; 39 40 #define DELTA_D 1 41 42 /* 43 * In order to reduce ROM size, most of the selectable calibration steps are 44 * decided at compile time based on the user's calibration mode selection, 45 * as captured by the STATIC_CALIB_STEPS selection below. 46 * 47 * However, to support simulation-time selection of fast simulation mode, where 48 * we skip everything except the bare minimum, we need a few of the steps to 49 * be dynamic. In those cases, we either use the DYNAMIC_CALIB_STEPS for the 50 * check, which is based on the rtl-supplied value, or we dynamically compute 51 * the value to use based on the dynamically-chosen calibration mode 52 */ 53 54 #define DLEVEL 0 55 #define STATIC_IN_RTL_SIM 0 56 #define STATIC_SKIP_DELAY_LOOPS 0 57 58 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \ 59 STATIC_SKIP_DELAY_LOOPS) 60 61 /* calibration steps requested by the rtl */ 62 uint16_t dyn_calib_steps; 63 64 /* 65 * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option 66 * instead of static, we use boolean logic to select between 67 * non-skip and skip values 68 * 69 * The mask is set to include all bits when not-skipping, but is 70 * zero when skipping 71 */ 72 73 uint16_t skip_delay_mask; /* mask off bits when skipping/not-skipping */ 74 75 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \ 76 ((non_skip_value) & skip_delay_mask) 77 78 struct gbl_type *gbl; 79 struct param_type *param; 80 uint32_t curr_shadow_reg; 81 82 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, 83 uint32_t write_group, uint32_t use_dm, 84 uint32_t all_correct, uint32_t *bit_chk, uint32_t all_ranks); 85 86 static void set_failing_group_stage(uint32_t group, uint32_t stage, 87 uint32_t substage) 88 { 89 /* 90 * Only set the global stage if there was not been any other 91 * failing group 92 */ 93 if (gbl->error_stage == CAL_STAGE_NIL) { 94 gbl->error_substage = substage; 95 gbl->error_stage = stage; 96 gbl->error_group = group; 97 } 98 } 99 100 static void reg_file_set_group(u16 set_group) 101 { 102 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16); 103 } 104 105 static void reg_file_set_stage(u8 set_stage) 106 { 107 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff); 108 } 109 110 static void reg_file_set_sub_stage(u8 set_sub_stage) 111 { 112 set_sub_stage &= 0xff; 113 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8); 114 } 115 116 static void initialize(void) 117 { 118 debug("%s:%d\n", __func__, __LINE__); 119 /* USER calibration has control over path to memory */ 120 /* 121 * In Hard PHY this is a 2-bit control: 122 * 0: AFI Mux Select 123 * 1: DDIO Mux Select 124 */ 125 writel(0x3, &phy_mgr_cfg->mux_sel); 126 127 /* USER memory clock is not stable we begin initialization */ 128 writel(0, &phy_mgr_cfg->reset_mem_stbl); 129 130 /* USER calibration status all set to zero */ 131 writel(0, &phy_mgr_cfg->cal_status); 132 133 writel(0, &phy_mgr_cfg->cal_debug_info); 134 135 if ((dyn_calib_steps & CALIB_SKIP_ALL) != CALIB_SKIP_ALL) { 136 param->read_correct_mask_vg = ((uint32_t)1 << 137 (RW_MGR_MEM_DQ_PER_READ_DQS / 138 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1; 139 param->write_correct_mask_vg = ((uint32_t)1 << 140 (RW_MGR_MEM_DQ_PER_READ_DQS / 141 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1; 142 param->read_correct_mask = ((uint32_t)1 << 143 RW_MGR_MEM_DQ_PER_READ_DQS) - 1; 144 param->write_correct_mask = ((uint32_t)1 << 145 RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1; 146 param->dm_correct_mask = ((uint32_t)1 << 147 (RW_MGR_MEM_DATA_WIDTH / RW_MGR_MEM_DATA_MASK_WIDTH)) 148 - 1; 149 } 150 } 151 152 static void set_rank_and_odt_mask(uint32_t rank, uint32_t odt_mode) 153 { 154 uint32_t odt_mask_0 = 0; 155 uint32_t odt_mask_1 = 0; 156 uint32_t cs_and_odt_mask; 157 158 if (odt_mode == RW_MGR_ODT_MODE_READ_WRITE) { 159 if (RW_MGR_MEM_NUMBER_OF_RANKS == 1) { 160 /* 161 * 1 Rank 162 * Read: ODT = 0 163 * Write: ODT = 1 164 */ 165 odt_mask_0 = 0x0; 166 odt_mask_1 = 0x1; 167 } else if (RW_MGR_MEM_NUMBER_OF_RANKS == 2) { 168 /* 2 Ranks */ 169 if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) { 170 /* - Dual-Slot , Single-Rank 171 * (1 chip-select per DIMM) 172 * OR 173 * - RDIMM, 4 total CS (2 CS per DIMM) 174 * means 2 DIMM 175 * Since MEM_NUMBER_OF_RANKS is 2 they are 176 * both single rank 177 * with 2 CS each (special for RDIMM) 178 * Read: Turn on ODT on the opposite rank 179 * Write: Turn on ODT on all ranks 180 */ 181 odt_mask_0 = 0x3 & ~(1 << rank); 182 odt_mask_1 = 0x3; 183 } else { 184 /* 185 * USER - Single-Slot , Dual-rank DIMMs 186 * (2 chip-selects per DIMM) 187 * USER Read: Turn on ODT off on all ranks 188 * USER Write: Turn on ODT on active rank 189 */ 190 odt_mask_0 = 0x0; 191 odt_mask_1 = 0x3 & (1 << rank); 192 } 193 } else { 194 /* 4 Ranks 195 * Read: 196 * ----------+-----------------------+ 197 * | | 198 * | ODT | 199 * Read From +-----------------------+ 200 * Rank | 3 | 2 | 1 | 0 | 201 * ----------+-----+-----+-----+-----+ 202 * 0 | 0 | 1 | 0 | 0 | 203 * 1 | 1 | 0 | 0 | 0 | 204 * 2 | 0 | 0 | 0 | 1 | 205 * 3 | 0 | 0 | 1 | 0 | 206 * ----------+-----+-----+-----+-----+ 207 * 208 * Write: 209 * ----------+-----------------------+ 210 * | | 211 * | ODT | 212 * Write To +-----------------------+ 213 * Rank | 3 | 2 | 1 | 0 | 214 * ----------+-----+-----+-----+-----+ 215 * 0 | 0 | 1 | 0 | 1 | 216 * 1 | 1 | 0 | 1 | 0 | 217 * 2 | 0 | 1 | 0 | 1 | 218 * 3 | 1 | 0 | 1 | 0 | 219 * ----------+-----+-----+-----+-----+ 220 */ 221 switch (rank) { 222 case 0: 223 odt_mask_0 = 0x4; 224 odt_mask_1 = 0x5; 225 break; 226 case 1: 227 odt_mask_0 = 0x8; 228 odt_mask_1 = 0xA; 229 break; 230 case 2: 231 odt_mask_0 = 0x1; 232 odt_mask_1 = 0x5; 233 break; 234 case 3: 235 odt_mask_0 = 0x2; 236 odt_mask_1 = 0xA; 237 break; 238 } 239 } 240 } else { 241 odt_mask_0 = 0x0; 242 odt_mask_1 = 0x0; 243 } 244 245 cs_and_odt_mask = 246 (0xFF & ~(1 << rank)) | 247 ((0xFF & odt_mask_0) << 8) | 248 ((0xFF & odt_mask_1) << 16); 249 writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS | 250 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET); 251 } 252 253 /** 254 * scc_mgr_set() - Set SCC Manager register 255 * @off: Base offset in SCC Manager space 256 * @grp: Read/Write group 257 * @val: Value to be set 258 * 259 * This function sets the SCC Manager (Scan Chain Control Manager) register. 260 */ 261 static void scc_mgr_set(u32 off, u32 grp, u32 val) 262 { 263 writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2)); 264 } 265 266 /** 267 * scc_mgr_initialize() - Initialize SCC Manager registers 268 * 269 * Initialize SCC Manager registers. 270 */ 271 static void scc_mgr_initialize(void) 272 { 273 /* 274 * Clear register file for HPS. 16 (2^4) is the size of the 275 * full register file in the scc mgr: 276 * RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS + 277 * MEM_IF_READ_DQS_WIDTH - 1); 278 */ 279 int i; 280 281 for (i = 0; i < 16; i++) { 282 debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n", 283 __func__, __LINE__, i); 284 scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, 0, i); 285 } 286 } 287 288 static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, uint32_t phase) 289 { 290 scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase); 291 } 292 293 static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay) 294 { 295 scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay); 296 } 297 298 static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase) 299 { 300 scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase); 301 } 302 303 static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay) 304 { 305 scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay); 306 } 307 308 static void scc_mgr_set_dqs_io_in_delay(uint32_t write_group, uint32_t delay) 309 { 310 scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS, 311 delay); 312 } 313 314 static void scc_mgr_set_dq_in_delay(uint32_t dq_in_group, uint32_t delay) 315 { 316 scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay); 317 } 318 319 static void scc_mgr_set_dq_out1_delay(uint32_t dq_in_group, uint32_t delay) 320 { 321 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay); 322 } 323 324 static void scc_mgr_set_dqs_out1_delay(uint32_t write_group, 325 uint32_t delay) 326 { 327 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS, 328 delay); 329 } 330 331 static void scc_mgr_set_dm_out1_delay(uint32_t dm, uint32_t delay) 332 { 333 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, 334 RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + dm, 335 delay); 336 } 337 338 /* load up dqs config settings */ 339 static void scc_mgr_load_dqs(uint32_t dqs) 340 { 341 writel(dqs, &sdr_scc_mgr->dqs_ena); 342 } 343 344 /* load up dqs io config settings */ 345 static void scc_mgr_load_dqs_io(void) 346 { 347 writel(0, &sdr_scc_mgr->dqs_io_ena); 348 } 349 350 /* load up dq config settings */ 351 static void scc_mgr_load_dq(uint32_t dq_in_group) 352 { 353 writel(dq_in_group, &sdr_scc_mgr->dq_ena); 354 } 355 356 /* load up dm config settings */ 357 static void scc_mgr_load_dm(uint32_t dm) 358 { 359 writel(dm, &sdr_scc_mgr->dm_ena); 360 } 361 362 /** 363 * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks 364 * @off: Base offset in SCC Manager space 365 * @grp: Read/Write group 366 * @val: Value to be set 367 * @update: If non-zero, trigger SCC Manager update for all ranks 368 * 369 * This function sets the SCC Manager (Scan Chain Control Manager) register 370 * and optionally triggers the SCC update for all ranks. 371 */ 372 static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val, 373 const int update) 374 { 375 u32 r; 376 377 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 378 r += NUM_RANKS_PER_SHADOW_REG) { 379 scc_mgr_set(off, grp, val); 380 381 if (update || (r == 0)) { 382 writel(grp, &sdr_scc_mgr->dqs_ena); 383 writel(0, &sdr_scc_mgr->update); 384 } 385 } 386 } 387 388 static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase) 389 { 390 /* 391 * USER although the h/w doesn't support different phases per 392 * shadow register, for simplicity our scc manager modeling 393 * keeps different phase settings per shadow reg, and it's 394 * important for us to keep them in sync to match h/w. 395 * for efficiency, the scan chain update should occur only 396 * once to sr0. 397 */ 398 scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_PHASE_OFFSET, 399 read_group, phase, 0); 400 } 401 402 static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group, 403 uint32_t phase) 404 { 405 /* 406 * USER although the h/w doesn't support different phases per 407 * shadow register, for simplicity our scc manager modeling 408 * keeps different phase settings per shadow reg, and it's 409 * important for us to keep them in sync to match h/w. 410 * for efficiency, the scan chain update should occur only 411 * once to sr0. 412 */ 413 scc_mgr_set_all_ranks(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, 414 write_group, phase, 0); 415 } 416 417 static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group, 418 uint32_t delay) 419 { 420 /* 421 * In shadow register mode, the T11 settings are stored in 422 * registers in the core, which are updated by the DQS_ENA 423 * signals. Not issuing the SCC_MGR_UPD command allows us to 424 * save lots of rank switching overhead, by calling 425 * select_shadow_regs_for_update with update_scan_chains 426 * set to 0. 427 */ 428 scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_DELAY_OFFSET, 429 read_group, delay, 1); 430 writel(0, &sdr_scc_mgr->update); 431 } 432 433 static void scc_mgr_set_oct_out1_delay(uint32_t write_group, uint32_t delay) 434 { 435 uint32_t read_group; 436 uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_OCT_OUT1_DELAY_OFFSET; 437 438 /* 439 * Load the setting in the SCC manager 440 * Although OCT affects only write data, the OCT delay is controlled 441 * by the DQS logic block which is instantiated once per read group. 442 * For protocols where a write group consists of multiple read groups, 443 * the setting must be set multiple times. 444 */ 445 for (read_group = write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH / 446 RW_MGR_MEM_IF_WRITE_DQS_WIDTH; 447 read_group < (write_group + 1) * RW_MGR_MEM_IF_READ_DQS_WIDTH / 448 RW_MGR_MEM_IF_WRITE_DQS_WIDTH; ++read_group) 449 writel(delay, addr + (read_group << 2)); 450 } 451 452 static void scc_mgr_set_hhp_extras(void) 453 { 454 /* 455 * Load the fixed setting in the SCC manager 456 * bits: 0:0 = 1'b1 - dqs bypass 457 * bits: 1:1 = 1'b1 - dq bypass 458 * bits: 4:2 = 3'b001 - rfifo_mode 459 * bits: 6:5 = 2'b01 - rfifo clock_select 460 * bits: 7:7 = 1'b0 - separate gating from ungating setting 461 * bits: 8:8 = 1'b0 - separate OE from Output delay setting 462 */ 463 uint32_t value = (0<<8) | (0<<7) | (1<<5) | (1<<2) | (1<<1) | (1<<0); 464 uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_HHP_GLOBALS_OFFSET; 465 466 writel(value, addr + SCC_MGR_HHP_EXTRAS_OFFSET); 467 } 468 469 /* 470 * USER Zero all DQS config 471 * TODO: maybe rename to scc_mgr_zero_dqs_config (or something) 472 */ 473 static void scc_mgr_zero_all(void) 474 { 475 uint32_t i, r; 476 477 /* 478 * USER Zero all DQS config settings, across all groups and all 479 * shadow registers 480 */ 481 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += 482 NUM_RANKS_PER_SHADOW_REG) { 483 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 484 /* 485 * The phases actually don't exist on a per-rank basis, 486 * but there's no harm updating them several times, so 487 * let's keep the code simple. 488 */ 489 scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE); 490 scc_mgr_set_dqs_en_phase(i, 0); 491 scc_mgr_set_dqs_en_delay(i, 0); 492 } 493 494 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { 495 scc_mgr_set_dqdqs_output_phase(i, 0); 496 /* av/cv don't have out2 */ 497 scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE); 498 } 499 } 500 501 /* multicast to all DQS group enables */ 502 writel(0xff, &sdr_scc_mgr->dqs_ena); 503 writel(0, &sdr_scc_mgr->update); 504 } 505 506 static void scc_set_bypass_mode(uint32_t write_group, uint32_t mode) 507 { 508 /* mode = 0 : Do NOT bypass - Half Rate Mode */ 509 /* mode = 1 : Bypass - Full Rate Mode */ 510 511 /* only need to set once for all groups, pins, dq, dqs, dm */ 512 if (write_group == 0) { 513 debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n", __func__, 514 __LINE__); 515 scc_mgr_set_hhp_extras(); 516 debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n", 517 __func__, __LINE__); 518 } 519 /* multicast to all DQ enables */ 520 writel(0xff, &sdr_scc_mgr->dq_ena); 521 writel(0xff, &sdr_scc_mgr->dm_ena); 522 523 /* update current DQS IO enable */ 524 writel(0, &sdr_scc_mgr->dqs_io_ena); 525 526 /* update the DQS logic */ 527 writel(write_group, &sdr_scc_mgr->dqs_ena); 528 529 /* hit update */ 530 writel(0, &sdr_scc_mgr->update); 531 } 532 533 /** 534 * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group 535 * @write_group: Write group 536 * 537 * Load DQS settings for Write Group, do not trigger SCC update. 538 */ 539 static void scc_mgr_load_dqs_for_write_group(const u32 write_group) 540 { 541 const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH / 542 RW_MGR_MEM_IF_WRITE_DQS_WIDTH; 543 const int base = write_group * ratio; 544 int i; 545 /* 546 * Load the setting in the SCC manager 547 * Although OCT affects only write data, the OCT delay is controlled 548 * by the DQS logic block which is instantiated once per read group. 549 * For protocols where a write group consists of multiple read groups, 550 * the setting must be set multiple times. 551 */ 552 for (i = 0; i < ratio; i++) 553 writel(base + i, &sdr_scc_mgr->dqs_ena); 554 } 555 556 static void scc_mgr_zero_group(uint32_t write_group, uint32_t test_begin, 557 int32_t out_only) 558 { 559 uint32_t i, r; 560 561 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += 562 NUM_RANKS_PER_SHADOW_REG) { 563 /* Zero all DQ config settings */ 564 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 565 scc_mgr_set_dq_out1_delay(i, 0); 566 if (!out_only) 567 scc_mgr_set_dq_in_delay(i, 0); 568 } 569 570 /* multicast to all DQ enables */ 571 writel(0xff, &sdr_scc_mgr->dq_ena); 572 573 /* Zero all DM config settings */ 574 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { 575 scc_mgr_set_dm_out1_delay(i, 0); 576 } 577 578 /* multicast to all DM enables */ 579 writel(0xff, &sdr_scc_mgr->dm_ena); 580 581 /* zero all DQS io settings */ 582 if (!out_only) 583 scc_mgr_set_dqs_io_in_delay(write_group, 0); 584 /* av/cv don't have out2 */ 585 scc_mgr_set_dqs_out1_delay(write_group, IO_DQS_OUT_RESERVE); 586 scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE); 587 scc_mgr_load_dqs_for_write_group(write_group); 588 589 /* multicast to all DQS IO enables (only 1) */ 590 writel(0, &sdr_scc_mgr->dqs_io_ena); 591 592 /* hit update to zero everything */ 593 writel(0, &sdr_scc_mgr->update); 594 } 595 } 596 597 /* 598 * apply and load a particular input delay for the DQ pins in a group 599 * group_bgn is the index of the first dq pin (in the write group) 600 */ 601 static void scc_mgr_apply_group_dq_in_delay(uint32_t write_group, 602 uint32_t group_bgn, uint32_t delay) 603 { 604 uint32_t i, p; 605 606 for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { 607 scc_mgr_set_dq_in_delay(p, delay); 608 scc_mgr_load_dq(p); 609 } 610 } 611 612 /* apply and load a particular output delay for the DQ pins in a group */ 613 static void scc_mgr_apply_group_dq_out1_delay(uint32_t write_group, 614 uint32_t group_bgn, 615 uint32_t delay1) 616 { 617 uint32_t i, p; 618 619 for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { 620 scc_mgr_set_dq_out1_delay(i, delay1); 621 scc_mgr_load_dq(i); 622 } 623 } 624 625 /* apply and load a particular output delay for the DM pins in a group */ 626 static void scc_mgr_apply_group_dm_out1_delay(uint32_t write_group, 627 uint32_t delay1) 628 { 629 uint32_t i; 630 631 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { 632 scc_mgr_set_dm_out1_delay(i, delay1); 633 scc_mgr_load_dm(i); 634 } 635 } 636 637 638 /* apply and load delay on both DQS and OCT out1 */ 639 static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group, 640 uint32_t delay) 641 { 642 scc_mgr_set_dqs_out1_delay(write_group, delay); 643 scc_mgr_load_dqs_io(); 644 645 scc_mgr_set_oct_out1_delay(write_group, delay); 646 scc_mgr_load_dqs_for_write_group(write_group); 647 } 648 649 /* apply a delay to the entire output side: DQ, DM, DQS, OCT */ 650 static void scc_mgr_apply_group_all_out_delay_add(uint32_t write_group, 651 uint32_t group_bgn, 652 uint32_t delay) 653 { 654 uint32_t i, p, new_delay; 655 656 /* dq shift */ 657 for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { 658 new_delay = READ_SCC_DQ_OUT2_DELAY; 659 new_delay += delay; 660 661 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 662 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQ[%u,%u]:\ 663 %u > %lu => %lu", __func__, __LINE__, 664 write_group, group_bgn, delay, i, p, new_delay, 665 (long unsigned int)IO_IO_OUT2_DELAY_MAX, 666 (long unsigned int)IO_IO_OUT2_DELAY_MAX); 667 new_delay = IO_IO_OUT2_DELAY_MAX; 668 } 669 670 scc_mgr_load_dq(i); 671 } 672 673 /* dm shift */ 674 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { 675 new_delay = READ_SCC_DM_IO_OUT2_DELAY; 676 new_delay += delay; 677 678 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 679 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DM[%u]:\ 680 %u > %lu => %lu\n", __func__, __LINE__, 681 write_group, group_bgn, delay, i, new_delay, 682 (long unsigned int)IO_IO_OUT2_DELAY_MAX, 683 (long unsigned int)IO_IO_OUT2_DELAY_MAX); 684 new_delay = IO_IO_OUT2_DELAY_MAX; 685 } 686 687 scc_mgr_load_dm(i); 688 } 689 690 /* dqs shift */ 691 new_delay = READ_SCC_DQS_IO_OUT2_DELAY; 692 new_delay += delay; 693 694 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 695 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;" 696 " adding %u to OUT1\n", __func__, __LINE__, 697 write_group, group_bgn, delay, new_delay, 698 IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX, 699 new_delay - IO_IO_OUT2_DELAY_MAX); 700 scc_mgr_set_dqs_out1_delay(write_group, new_delay - 701 IO_IO_OUT2_DELAY_MAX); 702 new_delay = IO_IO_OUT2_DELAY_MAX; 703 } 704 705 scc_mgr_load_dqs_io(); 706 707 /* oct shift */ 708 new_delay = READ_SCC_OCT_OUT2_DELAY; 709 new_delay += delay; 710 711 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 712 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;" 713 " adding %u to OUT1\n", __func__, __LINE__, 714 write_group, group_bgn, delay, new_delay, 715 IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX, 716 new_delay - IO_IO_OUT2_DELAY_MAX); 717 scc_mgr_set_oct_out1_delay(write_group, new_delay - 718 IO_IO_OUT2_DELAY_MAX); 719 new_delay = IO_IO_OUT2_DELAY_MAX; 720 } 721 722 scc_mgr_load_dqs_for_write_group(write_group); 723 } 724 725 /* 726 * USER apply a delay to the entire output side (DQ, DM, DQS, OCT) 727 * and to all ranks 728 */ 729 static void scc_mgr_apply_group_all_out_delay_add_all_ranks( 730 uint32_t write_group, uint32_t group_bgn, uint32_t delay) 731 { 732 uint32_t r; 733 734 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 735 r += NUM_RANKS_PER_SHADOW_REG) { 736 scc_mgr_apply_group_all_out_delay_add(write_group, 737 group_bgn, delay); 738 writel(0, &sdr_scc_mgr->update); 739 } 740 } 741 742 /* optimization used to recover some slots in ddr3 inst_rom */ 743 /* could be applied to other protocols if we wanted to */ 744 static void set_jump_as_return(void) 745 { 746 /* 747 * to save space, we replace return with jump to special shared 748 * RETURN instruction so we set the counter to large value so that 749 * we always jump 750 */ 751 writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0); 752 writel(RW_MGR_RETURN, &sdr_rw_load_jump_mgr_regs->load_jump_add0); 753 } 754 755 /* 756 * should always use constants as argument to ensure all computations are 757 * performed at compile time 758 */ 759 static void delay_for_n_mem_clocks(const uint32_t clocks) 760 { 761 uint32_t afi_clocks; 762 uint8_t inner = 0; 763 uint8_t outer = 0; 764 uint16_t c_loop = 0; 765 766 debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks); 767 768 769 afi_clocks = (clocks + AFI_RATE_RATIO-1) / AFI_RATE_RATIO; 770 /* scale (rounding up) to get afi clocks */ 771 772 /* 773 * Note, we don't bother accounting for being off a little bit 774 * because of a few extra instructions in outer loops 775 * Note, the loops have a test at the end, and do the test before 776 * the decrement, and so always perform the loop 777 * 1 time more than the counter value 778 */ 779 if (afi_clocks == 0) { 780 ; 781 } else if (afi_clocks <= 0x100) { 782 inner = afi_clocks-1; 783 outer = 0; 784 c_loop = 0; 785 } else if (afi_clocks <= 0x10000) { 786 inner = 0xff; 787 outer = (afi_clocks-1) >> 8; 788 c_loop = 0; 789 } else { 790 inner = 0xff; 791 outer = 0xff; 792 c_loop = (afi_clocks-1) >> 16; 793 } 794 795 /* 796 * rom instructions are structured as follows: 797 * 798 * IDLE_LOOP2: jnz cntr0, TARGET_A 799 * IDLE_LOOP1: jnz cntr1, TARGET_B 800 * return 801 * 802 * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and 803 * TARGET_B is set to IDLE_LOOP2 as well 804 * 805 * if we have no outer loop, though, then we can use IDLE_LOOP1 only, 806 * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely 807 * 808 * a little confusing, but it helps save precious space in the inst_rom 809 * and sequencer rom and keeps the delays more accurate and reduces 810 * overhead 811 */ 812 if (afi_clocks <= 0x100) { 813 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), 814 &sdr_rw_load_mgr_regs->load_cntr1); 815 816 writel(RW_MGR_IDLE_LOOP1, 817 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 818 819 writel(RW_MGR_IDLE_LOOP1, SDR_PHYGRP_RWMGRGRP_ADDRESS | 820 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 821 } else { 822 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), 823 &sdr_rw_load_mgr_regs->load_cntr0); 824 825 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer), 826 &sdr_rw_load_mgr_regs->load_cntr1); 827 828 writel(RW_MGR_IDLE_LOOP2, 829 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 830 831 writel(RW_MGR_IDLE_LOOP2, 832 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 833 834 /* hack to get around compiler not being smart enough */ 835 if (afi_clocks <= 0x10000) { 836 /* only need to run once */ 837 writel(RW_MGR_IDLE_LOOP2, SDR_PHYGRP_RWMGRGRP_ADDRESS | 838 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 839 } else { 840 do { 841 writel(RW_MGR_IDLE_LOOP2, 842 SDR_PHYGRP_RWMGRGRP_ADDRESS | 843 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 844 } while (c_loop-- != 0); 845 } 846 } 847 debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks); 848 } 849 850 static void rw_mgr_mem_initialize(void) 851 { 852 uint32_t r; 853 uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS | 854 RW_MGR_RUN_SINGLE_GROUP_OFFSET; 855 856 debug("%s:%d\n", __func__, __LINE__); 857 858 /* The reset / cke part of initialization is broadcasted to all ranks */ 859 writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS | 860 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET); 861 862 /* 863 * Here's how you load register for a loop 864 * Counters are located @ 0x800 865 * Jump address are located @ 0xC00 866 * For both, registers 0 to 3 are selected using bits 3 and 2, like 867 * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C 868 * I know this ain't pretty, but Avalon bus throws away the 2 least 869 * significant bits 870 */ 871 872 /* start with memory RESET activated */ 873 874 /* tINIT = 200us */ 875 876 /* 877 * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles 878 * If a and b are the number of iteration in 2 nested loops 879 * it takes the following number of cycles to complete the operation: 880 * number_of_cycles = ((2 + n) * a + 2) * b 881 * where n is the number of instruction in the inner loop 882 * One possible solution is n = 0 , a = 256 , b = 106 => a = FF, 883 * b = 6A 884 */ 885 886 /* Load counters */ 887 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR0_VAL), 888 &sdr_rw_load_mgr_regs->load_cntr0); 889 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR1_VAL), 890 &sdr_rw_load_mgr_regs->load_cntr1); 891 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR2_VAL), 892 &sdr_rw_load_mgr_regs->load_cntr2); 893 894 /* Load jump address */ 895 writel(RW_MGR_INIT_RESET_0_CKE_0, 896 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 897 writel(RW_MGR_INIT_RESET_0_CKE_0, 898 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 899 writel(RW_MGR_INIT_RESET_0_CKE_0, 900 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 901 902 /* Execute count instruction */ 903 writel(RW_MGR_INIT_RESET_0_CKE_0, grpaddr); 904 905 /* indicate that memory is stable */ 906 writel(1, &phy_mgr_cfg->reset_mem_stbl); 907 908 /* 909 * transition the RESET to high 910 * Wait for 500us 911 */ 912 913 /* 914 * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles 915 * If a and b are the number of iteration in 2 nested loops 916 * it takes the following number of cycles to complete the operation 917 * number_of_cycles = ((2 + n) * a + 2) * b 918 * where n is the number of instruction in the inner loop 919 * One possible solution is n = 2 , a = 131 , b = 256 => a = 83, 920 * b = FF 921 */ 922 923 /* Load counters */ 924 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR0_VAL), 925 &sdr_rw_load_mgr_regs->load_cntr0); 926 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR1_VAL), 927 &sdr_rw_load_mgr_regs->load_cntr1); 928 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR2_VAL), 929 &sdr_rw_load_mgr_regs->load_cntr2); 930 931 /* Load jump address */ 932 writel(RW_MGR_INIT_RESET_1_CKE_0, 933 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 934 writel(RW_MGR_INIT_RESET_1_CKE_0, 935 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 936 writel(RW_MGR_INIT_RESET_1_CKE_0, 937 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 938 939 writel(RW_MGR_INIT_RESET_1_CKE_0, grpaddr); 940 941 /* bring up clock enable */ 942 943 /* tXRP < 250 ck cycles */ 944 delay_for_n_mem_clocks(250); 945 946 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { 947 if (param->skip_ranks[r]) { 948 /* request to skip the rank */ 949 continue; 950 } 951 952 /* set rank */ 953 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 954 955 /* 956 * USER Use Mirror-ed commands for odd ranks if address 957 * mirrorring is on 958 */ 959 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) { 960 set_jump_as_return(); 961 writel(RW_MGR_MRS2_MIRR, grpaddr); 962 delay_for_n_mem_clocks(4); 963 set_jump_as_return(); 964 writel(RW_MGR_MRS3_MIRR, grpaddr); 965 delay_for_n_mem_clocks(4); 966 set_jump_as_return(); 967 writel(RW_MGR_MRS1_MIRR, grpaddr); 968 delay_for_n_mem_clocks(4); 969 set_jump_as_return(); 970 writel(RW_MGR_MRS0_DLL_RESET_MIRR, grpaddr); 971 } else { 972 set_jump_as_return(); 973 writel(RW_MGR_MRS2, grpaddr); 974 delay_for_n_mem_clocks(4); 975 set_jump_as_return(); 976 writel(RW_MGR_MRS3, grpaddr); 977 delay_for_n_mem_clocks(4); 978 set_jump_as_return(); 979 writel(RW_MGR_MRS1, grpaddr); 980 set_jump_as_return(); 981 writel(RW_MGR_MRS0_DLL_RESET, grpaddr); 982 } 983 set_jump_as_return(); 984 writel(RW_MGR_ZQCL, grpaddr); 985 986 /* tZQinit = tDLLK = 512 ck cycles */ 987 delay_for_n_mem_clocks(512); 988 } 989 } 990 991 /* 992 * At the end of calibration we have to program the user settings in, and 993 * USER hand off the memory to the user. 994 */ 995 static void rw_mgr_mem_handoff(void) 996 { 997 uint32_t r; 998 uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS | 999 RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1000 1001 debug("%s:%d\n", __func__, __LINE__); 1002 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { 1003 if (param->skip_ranks[r]) 1004 /* request to skip the rank */ 1005 continue; 1006 /* set rank */ 1007 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 1008 1009 /* precharge all banks ... */ 1010 writel(RW_MGR_PRECHARGE_ALL, grpaddr); 1011 1012 /* load up MR settings specified by user */ 1013 1014 /* 1015 * Use Mirror-ed commands for odd ranks if address 1016 * mirrorring is on 1017 */ 1018 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) { 1019 set_jump_as_return(); 1020 writel(RW_MGR_MRS2_MIRR, grpaddr); 1021 delay_for_n_mem_clocks(4); 1022 set_jump_as_return(); 1023 writel(RW_MGR_MRS3_MIRR, grpaddr); 1024 delay_for_n_mem_clocks(4); 1025 set_jump_as_return(); 1026 writel(RW_MGR_MRS1_MIRR, grpaddr); 1027 delay_for_n_mem_clocks(4); 1028 set_jump_as_return(); 1029 writel(RW_MGR_MRS0_USER_MIRR, grpaddr); 1030 } else { 1031 set_jump_as_return(); 1032 writel(RW_MGR_MRS2, grpaddr); 1033 delay_for_n_mem_clocks(4); 1034 set_jump_as_return(); 1035 writel(RW_MGR_MRS3, grpaddr); 1036 delay_for_n_mem_clocks(4); 1037 set_jump_as_return(); 1038 writel(RW_MGR_MRS1, grpaddr); 1039 delay_for_n_mem_clocks(4); 1040 set_jump_as_return(); 1041 writel(RW_MGR_MRS0_USER, grpaddr); 1042 } 1043 /* 1044 * USER need to wait tMOD (12CK or 15ns) time before issuing 1045 * other commands, but we will have plenty of NIOS cycles before 1046 * actual handoff so its okay. 1047 */ 1048 } 1049 } 1050 1051 /* 1052 * performs a guaranteed read on the patterns we are going to use during a 1053 * read test to ensure memory works 1054 */ 1055 static uint32_t rw_mgr_mem_calibrate_read_test_patterns(uint32_t rank_bgn, 1056 uint32_t group, uint32_t num_tries, uint32_t *bit_chk, 1057 uint32_t all_ranks) 1058 { 1059 uint32_t r, vg; 1060 uint32_t correct_mask_vg; 1061 uint32_t tmp_bit_chk; 1062 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1063 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1064 uint32_t addr; 1065 uint32_t base_rw_mgr; 1066 1067 *bit_chk = param->read_correct_mask; 1068 correct_mask_vg = param->read_correct_mask_vg; 1069 1070 for (r = rank_bgn; r < rank_end; r++) { 1071 if (param->skip_ranks[r]) 1072 /* request to skip the rank */ 1073 continue; 1074 1075 /* set rank */ 1076 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1077 1078 /* Load up a constant bursts of read commands */ 1079 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0); 1080 writel(RW_MGR_GUARANTEED_READ, 1081 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1082 1083 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1); 1084 writel(RW_MGR_GUARANTEED_READ_CONT, 1085 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1086 1087 tmp_bit_chk = 0; 1088 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) { 1089 /* reset the fifos to get pointers to known state */ 1090 1091 writel(0, &phy_mgr_cmd->fifo_reset); 1092 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1093 RW_MGR_RESET_READ_DATAPATH_OFFSET); 1094 1095 tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS 1096 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); 1097 1098 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1099 writel(RW_MGR_GUARANTEED_READ, addr + 1100 ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + 1101 vg) << 2)); 1102 1103 base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); 1104 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & (~base_rw_mgr)); 1105 1106 if (vg == 0) 1107 break; 1108 } 1109 *bit_chk &= tmp_bit_chk; 1110 } 1111 1112 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1113 writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2)); 1114 1115 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1116 debug_cond(DLEVEL == 1, "%s:%d test_load_patterns(%u,ALL) => (%u == %u) =>\ 1117 %lu\n", __func__, __LINE__, group, *bit_chk, param->read_correct_mask, 1118 (long unsigned int)(*bit_chk == param->read_correct_mask)); 1119 return *bit_chk == param->read_correct_mask; 1120 } 1121 1122 static uint32_t rw_mgr_mem_calibrate_read_test_patterns_all_ranks 1123 (uint32_t group, uint32_t num_tries, uint32_t *bit_chk) 1124 { 1125 return rw_mgr_mem_calibrate_read_test_patterns(0, group, 1126 num_tries, bit_chk, 1); 1127 } 1128 1129 /* load up the patterns we are going to use during a read test */ 1130 static void rw_mgr_mem_calibrate_read_load_patterns(uint32_t rank_bgn, 1131 uint32_t all_ranks) 1132 { 1133 uint32_t r; 1134 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1135 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1136 1137 debug("%s:%d\n", __func__, __LINE__); 1138 for (r = rank_bgn; r < rank_end; r++) { 1139 if (param->skip_ranks[r]) 1140 /* request to skip the rank */ 1141 continue; 1142 1143 /* set rank */ 1144 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1145 1146 /* Load up a constant bursts */ 1147 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0); 1148 1149 writel(RW_MGR_GUARANTEED_WRITE_WAIT0, 1150 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1151 1152 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1); 1153 1154 writel(RW_MGR_GUARANTEED_WRITE_WAIT1, 1155 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1156 1157 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2); 1158 1159 writel(RW_MGR_GUARANTEED_WRITE_WAIT2, 1160 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1161 1162 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3); 1163 1164 writel(RW_MGR_GUARANTEED_WRITE_WAIT3, 1165 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 1166 1167 writel(RW_MGR_GUARANTEED_WRITE, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1168 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 1169 } 1170 1171 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1172 } 1173 1174 /* 1175 * try a read and see if it returns correct data back. has dummy reads 1176 * inserted into the mix used to align dqs enable. has more thorough checks 1177 * than the regular read test. 1178 */ 1179 static uint32_t rw_mgr_mem_calibrate_read_test(uint32_t rank_bgn, uint32_t group, 1180 uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk, 1181 uint32_t all_groups, uint32_t all_ranks) 1182 { 1183 uint32_t r, vg; 1184 uint32_t correct_mask_vg; 1185 uint32_t tmp_bit_chk; 1186 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1187 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1188 uint32_t addr; 1189 uint32_t base_rw_mgr; 1190 1191 *bit_chk = param->read_correct_mask; 1192 correct_mask_vg = param->read_correct_mask_vg; 1193 1194 uint32_t quick_read_mode = (((STATIC_CALIB_STEPS) & 1195 CALIB_SKIP_DELAY_SWEEPS) && ENABLE_SUPER_QUICK_CALIBRATION); 1196 1197 for (r = rank_bgn; r < rank_end; r++) { 1198 if (param->skip_ranks[r]) 1199 /* request to skip the rank */ 1200 continue; 1201 1202 /* set rank */ 1203 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1204 1205 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1); 1206 1207 writel(RW_MGR_READ_B2B_WAIT1, 1208 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1209 1210 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2); 1211 writel(RW_MGR_READ_B2B_WAIT2, 1212 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1213 1214 if (quick_read_mode) 1215 writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0); 1216 /* need at least two (1+1) reads to capture failures */ 1217 else if (all_groups) 1218 writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0); 1219 else 1220 writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0); 1221 1222 writel(RW_MGR_READ_B2B, 1223 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1224 if (all_groups) 1225 writel(RW_MGR_MEM_IF_READ_DQS_WIDTH * 1226 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1, 1227 &sdr_rw_load_mgr_regs->load_cntr3); 1228 else 1229 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3); 1230 1231 writel(RW_MGR_READ_B2B, 1232 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 1233 1234 tmp_bit_chk = 0; 1235 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) { 1236 /* reset the fifos to get pointers to known state */ 1237 writel(0, &phy_mgr_cmd->fifo_reset); 1238 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1239 RW_MGR_RESET_READ_DATAPATH_OFFSET); 1240 1241 tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS 1242 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); 1243 1244 if (all_groups) 1245 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_ALL_GROUPS_OFFSET; 1246 else 1247 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1248 1249 writel(RW_MGR_READ_B2B, addr + 1250 ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + 1251 vg) << 2)); 1252 1253 base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); 1254 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr)); 1255 1256 if (vg == 0) 1257 break; 1258 } 1259 *bit_chk &= tmp_bit_chk; 1260 } 1261 1262 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1263 writel(RW_MGR_CLEAR_DQS_ENABLE, addr + (group << 2)); 1264 1265 if (all_correct) { 1266 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1267 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ALL,%u) =>\ 1268 (%u == %u) => %lu", __func__, __LINE__, group, 1269 all_groups, *bit_chk, param->read_correct_mask, 1270 (long unsigned int)(*bit_chk == 1271 param->read_correct_mask)); 1272 return *bit_chk == param->read_correct_mask; 1273 } else { 1274 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1275 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ONE,%u) =>\ 1276 (%u != %lu) => %lu\n", __func__, __LINE__, 1277 group, all_groups, *bit_chk, (long unsigned int)0, 1278 (long unsigned int)(*bit_chk != 0x00)); 1279 return *bit_chk != 0x00; 1280 } 1281 } 1282 1283 static uint32_t rw_mgr_mem_calibrate_read_test_all_ranks(uint32_t group, 1284 uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk, 1285 uint32_t all_groups) 1286 { 1287 return rw_mgr_mem_calibrate_read_test(0, group, num_tries, all_correct, 1288 bit_chk, all_groups, 1); 1289 } 1290 1291 static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t *v) 1292 { 1293 writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy); 1294 (*v)++; 1295 } 1296 1297 static void rw_mgr_decr_vfifo(uint32_t grp, uint32_t *v) 1298 { 1299 uint32_t i; 1300 1301 for (i = 0; i < VFIFO_SIZE-1; i++) 1302 rw_mgr_incr_vfifo(grp, v); 1303 } 1304 1305 static int find_vfifo_read(uint32_t grp, uint32_t *bit_chk) 1306 { 1307 uint32_t v; 1308 uint32_t fail_cnt = 0; 1309 uint32_t test_status; 1310 1311 for (v = 0; v < VFIFO_SIZE; ) { 1312 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo %u\n", 1313 __func__, __LINE__, v); 1314 test_status = rw_mgr_mem_calibrate_read_test_all_ranks 1315 (grp, 1, PASS_ONE_BIT, bit_chk, 0); 1316 if (!test_status) { 1317 fail_cnt++; 1318 1319 if (fail_cnt == 2) 1320 break; 1321 } 1322 1323 /* fiddle with FIFO */ 1324 rw_mgr_incr_vfifo(grp, &v); 1325 } 1326 1327 if (v >= VFIFO_SIZE) { 1328 /* no failing read found!! Something must have gone wrong */ 1329 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo failed\n", 1330 __func__, __LINE__); 1331 return 0; 1332 } else { 1333 return v; 1334 } 1335 } 1336 1337 static int find_working_phase(uint32_t *grp, uint32_t *bit_chk, 1338 uint32_t dtaps_per_ptap, uint32_t *work_bgn, 1339 uint32_t *v, uint32_t *d, uint32_t *p, 1340 uint32_t *i, uint32_t *max_working_cnt) 1341 { 1342 uint32_t found_begin = 0; 1343 uint32_t tmp_delay = 0; 1344 uint32_t test_status; 1345 1346 for (*d = 0; *d <= dtaps_per_ptap; (*d)++, tmp_delay += 1347 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1348 *work_bgn = tmp_delay; 1349 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1350 1351 for (*i = 0; *i < VFIFO_SIZE; (*i)++) { 1352 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_bgn += 1353 IO_DELAY_PER_OPA_TAP) { 1354 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1355 1356 test_status = 1357 rw_mgr_mem_calibrate_read_test_all_ranks 1358 (*grp, 1, PASS_ONE_BIT, bit_chk, 0); 1359 1360 if (test_status) { 1361 *max_working_cnt = 1; 1362 found_begin = 1; 1363 break; 1364 } 1365 } 1366 1367 if (found_begin) 1368 break; 1369 1370 if (*p > IO_DQS_EN_PHASE_MAX) 1371 /* fiddle with FIFO */ 1372 rw_mgr_incr_vfifo(*grp, v); 1373 } 1374 1375 if (found_begin) 1376 break; 1377 } 1378 1379 if (*i >= VFIFO_SIZE) { 1380 /* cannot find working solution */ 1381 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/\ 1382 ptap/dtap\n", __func__, __LINE__); 1383 return 0; 1384 } else { 1385 return 1; 1386 } 1387 } 1388 1389 static void sdr_backup_phase(uint32_t *grp, uint32_t *bit_chk, 1390 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1391 uint32_t *p, uint32_t *max_working_cnt) 1392 { 1393 uint32_t found_begin = 0; 1394 uint32_t tmp_delay; 1395 1396 /* Special case code for backing up a phase */ 1397 if (*p == 0) { 1398 *p = IO_DQS_EN_PHASE_MAX; 1399 rw_mgr_decr_vfifo(*grp, v); 1400 } else { 1401 (*p)--; 1402 } 1403 tmp_delay = *work_bgn - IO_DELAY_PER_OPA_TAP; 1404 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1405 1406 for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_bgn; 1407 (*d)++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1408 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1409 1410 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1, 1411 PASS_ONE_BIT, 1412 bit_chk, 0)) { 1413 found_begin = 1; 1414 *work_bgn = tmp_delay; 1415 break; 1416 } 1417 } 1418 1419 /* We have found a working dtap before the ptap found above */ 1420 if (found_begin == 1) 1421 (*max_working_cnt)++; 1422 1423 /* 1424 * Restore VFIFO to old state before we decremented it 1425 * (if needed). 1426 */ 1427 (*p)++; 1428 if (*p > IO_DQS_EN_PHASE_MAX) { 1429 *p = 0; 1430 rw_mgr_incr_vfifo(*grp, v); 1431 } 1432 1433 scc_mgr_set_dqs_en_delay_all_ranks(*grp, 0); 1434 } 1435 1436 static int sdr_nonworking_phase(uint32_t *grp, uint32_t *bit_chk, 1437 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1438 uint32_t *p, uint32_t *i, uint32_t *max_working_cnt, 1439 uint32_t *work_end) 1440 { 1441 uint32_t found_end = 0; 1442 1443 (*p)++; 1444 *work_end += IO_DELAY_PER_OPA_TAP; 1445 if (*p > IO_DQS_EN_PHASE_MAX) { 1446 /* fiddle with FIFO */ 1447 *p = 0; 1448 rw_mgr_incr_vfifo(*grp, v); 1449 } 1450 1451 for (; *i < VFIFO_SIZE + 1; (*i)++) { 1452 for (; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_end 1453 += IO_DELAY_PER_OPA_TAP) { 1454 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1455 1456 if (!rw_mgr_mem_calibrate_read_test_all_ranks 1457 (*grp, 1, PASS_ONE_BIT, bit_chk, 0)) { 1458 found_end = 1; 1459 break; 1460 } else { 1461 (*max_working_cnt)++; 1462 } 1463 } 1464 1465 if (found_end) 1466 break; 1467 1468 if (*p > IO_DQS_EN_PHASE_MAX) { 1469 /* fiddle with FIFO */ 1470 rw_mgr_incr_vfifo(*grp, v); 1471 *p = 0; 1472 } 1473 } 1474 1475 if (*i >= VFIFO_SIZE + 1) { 1476 /* cannot see edge of failing read */ 1477 debug_cond(DLEVEL == 2, "%s:%d sdr_nonworking_phase: end:\ 1478 failed\n", __func__, __LINE__); 1479 return 0; 1480 } else { 1481 return 1; 1482 } 1483 } 1484 1485 static int sdr_find_window_centre(uint32_t *grp, uint32_t *bit_chk, 1486 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1487 uint32_t *p, uint32_t *work_mid, 1488 uint32_t *work_end) 1489 { 1490 int i; 1491 int tmp_delay = 0; 1492 1493 *work_mid = (*work_bgn + *work_end) / 2; 1494 1495 debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n", 1496 *work_bgn, *work_end, *work_mid); 1497 /* Get the middle delay to be less than a VFIFO delay */ 1498 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; 1499 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP) 1500 ; 1501 debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay); 1502 while (*work_mid > tmp_delay) 1503 *work_mid -= tmp_delay; 1504 debug_cond(DLEVEL == 2, "new work_mid %d\n", *work_mid); 1505 1506 tmp_delay = 0; 1507 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX && tmp_delay < *work_mid; 1508 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP) 1509 ; 1510 tmp_delay -= IO_DELAY_PER_OPA_TAP; 1511 debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", (*p) - 1, tmp_delay); 1512 for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_mid; (*d)++, 1513 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) 1514 ; 1515 debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", *d, tmp_delay); 1516 1517 scc_mgr_set_dqs_en_phase_all_ranks(*grp, (*p) - 1); 1518 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1519 1520 /* 1521 * push vfifo until we can successfully calibrate. We can do this 1522 * because the largest possible margin in 1 VFIFO cycle. 1523 */ 1524 for (i = 0; i < VFIFO_SIZE; i++) { 1525 debug_cond(DLEVEL == 2, "find_dqs_en_phase: center: vfifo=%u\n", 1526 *v); 1527 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1, 1528 PASS_ONE_BIT, 1529 bit_chk, 0)) { 1530 break; 1531 } 1532 1533 /* fiddle with FIFO */ 1534 rw_mgr_incr_vfifo(*grp, v); 1535 } 1536 1537 if (i >= VFIFO_SIZE) { 1538 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center: \ 1539 failed\n", __func__, __LINE__); 1540 return 0; 1541 } else { 1542 return 1; 1543 } 1544 } 1545 1546 /* find a good dqs enable to use */ 1547 static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp) 1548 { 1549 uint32_t v, d, p, i; 1550 uint32_t max_working_cnt; 1551 uint32_t bit_chk; 1552 uint32_t dtaps_per_ptap; 1553 uint32_t work_bgn, work_mid, work_end; 1554 uint32_t found_passing_read, found_failing_read, initial_failing_dtap; 1555 1556 debug("%s:%d %u\n", __func__, __LINE__, grp); 1557 1558 reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); 1559 1560 scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); 1561 scc_mgr_set_dqs_en_phase_all_ranks(grp, 0); 1562 1563 /* ************************************************************** */ 1564 /* * Step 0 : Determine number of delay taps for each phase tap * */ 1565 dtaps_per_ptap = IO_DELAY_PER_OPA_TAP/IO_DELAY_PER_DQS_EN_DCHAIN_TAP; 1566 1567 /* ********************************************************* */ 1568 /* * Step 1 : First push vfifo until we get a failing read * */ 1569 v = find_vfifo_read(grp, &bit_chk); 1570 1571 max_working_cnt = 0; 1572 1573 /* ******************************************************** */ 1574 /* * step 2: find first working phase, increment in ptaps * */ 1575 work_bgn = 0; 1576 if (find_working_phase(&grp, &bit_chk, dtaps_per_ptap, &work_bgn, &v, &d, 1577 &p, &i, &max_working_cnt) == 0) 1578 return 0; 1579 1580 work_end = work_bgn; 1581 1582 /* 1583 * If d is 0 then the working window covers a phase tap and 1584 * we can follow the old procedure otherwise, we've found the beginning, 1585 * and we need to increment the dtaps until we find the end. 1586 */ 1587 if (d == 0) { 1588 /* ********************************************************* */ 1589 /* * step 3a: if we have room, back off by one and 1590 increment in dtaps * */ 1591 1592 sdr_backup_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1593 &max_working_cnt); 1594 1595 /* ********************************************************* */ 1596 /* * step 4a: go forward from working phase to non working 1597 phase, increment in ptaps * */ 1598 if (sdr_nonworking_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1599 &i, &max_working_cnt, &work_end) == 0) 1600 return 0; 1601 1602 /* ********************************************************* */ 1603 /* * step 5a: back off one from last, increment in dtaps * */ 1604 1605 /* Special case code for backing up a phase */ 1606 if (p == 0) { 1607 p = IO_DQS_EN_PHASE_MAX; 1608 rw_mgr_decr_vfifo(grp, &v); 1609 } else { 1610 p = p - 1; 1611 } 1612 1613 work_end -= IO_DELAY_PER_OPA_TAP; 1614 scc_mgr_set_dqs_en_phase_all_ranks(grp, p); 1615 1616 /* * The actual increment of dtaps is done outside of 1617 the if/else loop to share code */ 1618 d = 0; 1619 1620 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p: \ 1621 vfifo=%u ptap=%u\n", __func__, __LINE__, 1622 v, p); 1623 } else { 1624 /* ******************************************************* */ 1625 /* * step 3-5b: Find the right edge of the window using 1626 delay taps * */ 1627 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase:vfifo=%u \ 1628 ptap=%u dtap=%u bgn=%u\n", __func__, __LINE__, 1629 v, p, d, work_bgn); 1630 1631 work_end = work_bgn; 1632 1633 /* * The actual increment of dtaps is done outside of the 1634 if/else loop to share code */ 1635 1636 /* Only here to counterbalance a subtract later on which is 1637 not needed if this branch of the algorithm is taken */ 1638 max_working_cnt++; 1639 } 1640 1641 /* The dtap increment to find the failing edge is done here */ 1642 for (; d <= IO_DQS_EN_DELAY_MAX; d++, work_end += 1643 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1644 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \ 1645 end-2: dtap=%u\n", __func__, __LINE__, d); 1646 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1647 1648 if (!rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, 1649 PASS_ONE_BIT, 1650 &bit_chk, 0)) { 1651 break; 1652 } 1653 } 1654 1655 /* Go back to working dtap */ 1656 if (d != 0) 1657 work_end -= IO_DELAY_PER_DQS_EN_DCHAIN_TAP; 1658 1659 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p/d: vfifo=%u \ 1660 ptap=%u dtap=%u end=%u\n", __func__, __LINE__, 1661 v, p, d-1, work_end); 1662 1663 if (work_end < work_bgn) { 1664 /* nil range */ 1665 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: end-2: \ 1666 failed\n", __func__, __LINE__); 1667 return 0; 1668 } 1669 1670 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: found range [%u,%u]\n", 1671 __func__, __LINE__, work_bgn, work_end); 1672 1673 /* *************************************************************** */ 1674 /* 1675 * * We need to calculate the number of dtaps that equal a ptap 1676 * * To do that we'll back up a ptap and re-find the edge of the 1677 * * window using dtaps 1678 */ 1679 1680 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: calculate dtaps_per_ptap \ 1681 for tracking\n", __func__, __LINE__); 1682 1683 /* Special case code for backing up a phase */ 1684 if (p == 0) { 1685 p = IO_DQS_EN_PHASE_MAX; 1686 rw_mgr_decr_vfifo(grp, &v); 1687 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \ 1688 cycle/phase: v=%u p=%u\n", __func__, __LINE__, 1689 v, p); 1690 } else { 1691 p = p - 1; 1692 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \ 1693 phase only: v=%u p=%u", __func__, __LINE__, 1694 v, p); 1695 } 1696 1697 scc_mgr_set_dqs_en_phase_all_ranks(grp, p); 1698 1699 /* 1700 * Increase dtap until we first see a passing read (in case the 1701 * window is smaller than a ptap), 1702 * and then a failing read to mark the edge of the window again 1703 */ 1704 1705 /* Find a passing read */ 1706 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find passing read\n", 1707 __func__, __LINE__); 1708 found_passing_read = 0; 1709 found_failing_read = 0; 1710 initial_failing_dtap = d; 1711 for (; d <= IO_DQS_EN_DELAY_MAX; d++) { 1712 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: testing \ 1713 read d=%u\n", __func__, __LINE__, d); 1714 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1715 1716 if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, 1717 PASS_ONE_BIT, 1718 &bit_chk, 0)) { 1719 found_passing_read = 1; 1720 break; 1721 } 1722 } 1723 1724 if (found_passing_read) { 1725 /* Find a failing read */ 1726 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find failing \ 1727 read\n", __func__, __LINE__); 1728 for (d = d + 1; d <= IO_DQS_EN_DELAY_MAX; d++) { 1729 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \ 1730 testing read d=%u\n", __func__, __LINE__, d); 1731 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1732 1733 if (!rw_mgr_mem_calibrate_read_test_all_ranks 1734 (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { 1735 found_failing_read = 1; 1736 break; 1737 } 1738 } 1739 } else { 1740 debug_cond(DLEVEL == 1, "%s:%d find_dqs_en_phase: failed to \ 1741 calculate dtaps", __func__, __LINE__); 1742 debug_cond(DLEVEL == 1, "per ptap. Fall back on static value\n"); 1743 } 1744 1745 /* 1746 * The dynamically calculated dtaps_per_ptap is only valid if we 1747 * found a passing/failing read. If we didn't, it means d hit the max 1748 * (IO_DQS_EN_DELAY_MAX). Otherwise, dtaps_per_ptap retains its 1749 * statically calculated value. 1750 */ 1751 if (found_passing_read && found_failing_read) 1752 dtaps_per_ptap = d - initial_failing_dtap; 1753 1754 writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap); 1755 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: dtaps_per_ptap=%u \ 1756 - %u = %u", __func__, __LINE__, d, 1757 initial_failing_dtap, dtaps_per_ptap); 1758 1759 /* ******************************************** */ 1760 /* * step 6: Find the centre of the window * */ 1761 if (sdr_find_window_centre(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1762 &work_mid, &work_end) == 0) 1763 return 0; 1764 1765 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center found: \ 1766 vfifo=%u ptap=%u dtap=%u\n", __func__, __LINE__, 1767 v, p-1, d); 1768 return 1; 1769 } 1770 1771 /* 1772 * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different 1773 * dq_in_delay values 1774 */ 1775 static uint32_t 1776 rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay 1777 (uint32_t write_group, uint32_t read_group, uint32_t test_bgn) 1778 { 1779 uint32_t found; 1780 uint32_t i; 1781 uint32_t p; 1782 uint32_t d; 1783 uint32_t r; 1784 1785 const uint32_t delay_step = IO_IO_IN_DELAY_MAX / 1786 (RW_MGR_MEM_DQ_PER_READ_DQS-1); 1787 /* we start at zero, so have one less dq to devide among */ 1788 1789 debug("%s:%d (%u,%u,%u)", __func__, __LINE__, write_group, read_group, 1790 test_bgn); 1791 1792 /* try different dq_in_delays since the dq path is shorter than dqs */ 1793 1794 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 1795 r += NUM_RANKS_PER_SHADOW_REG) { 1796 for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; 1797 i++, p++, d += delay_step) { 1798 debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_\ 1799 vfifo_find_dqs_", __func__, __LINE__); 1800 debug_cond(DLEVEL == 1, "en_phase_sweep_dq_in_delay: g=%u/%u ", 1801 write_group, read_group); 1802 debug_cond(DLEVEL == 1, "r=%u, i=%u p=%u d=%u\n", r, i , p, d); 1803 scc_mgr_set_dq_in_delay(p, d); 1804 scc_mgr_load_dq(p); 1805 } 1806 writel(0, &sdr_scc_mgr->update); 1807 } 1808 1809 found = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(read_group); 1810 1811 debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_vfifo_find_dqs_\ 1812 en_phase_sweep_dq", __func__, __LINE__); 1813 debug_cond(DLEVEL == 1, "_in_delay: g=%u/%u found=%u; Reseting delay \ 1814 chain to zero\n", write_group, read_group, found); 1815 1816 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 1817 r += NUM_RANKS_PER_SHADOW_REG) { 1818 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; 1819 i++, p++) { 1820 scc_mgr_set_dq_in_delay(p, 0); 1821 scc_mgr_load_dq(p); 1822 } 1823 writel(0, &sdr_scc_mgr->update); 1824 } 1825 1826 return found; 1827 } 1828 1829 /* per-bit deskew DQ and center */ 1830 static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn, 1831 uint32_t write_group, uint32_t read_group, uint32_t test_bgn, 1832 uint32_t use_read_test, uint32_t update_fom) 1833 { 1834 uint32_t i, p, d, min_index; 1835 /* 1836 * Store these as signed since there are comparisons with 1837 * signed numbers. 1838 */ 1839 uint32_t bit_chk; 1840 uint32_t sticky_bit_chk; 1841 int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; 1842 int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; 1843 int32_t final_dq[RW_MGR_MEM_DQ_PER_READ_DQS]; 1844 int32_t mid; 1845 int32_t orig_mid_min, mid_min; 1846 int32_t new_dqs, start_dqs, start_dqs_en, shift_dq, final_dqs, 1847 final_dqs_en; 1848 int32_t dq_margin, dqs_margin; 1849 uint32_t stop; 1850 uint32_t temp_dq_in_delay1, temp_dq_in_delay2; 1851 uint32_t addr; 1852 1853 debug("%s:%d: %u %u", __func__, __LINE__, read_group, test_bgn); 1854 1855 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_IN_DELAY_OFFSET; 1856 start_dqs = readl(addr + (read_group << 2)); 1857 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) 1858 start_dqs_en = readl(addr + ((read_group << 2) 1859 - IO_DQS_EN_DELAY_OFFSET)); 1860 1861 /* set the left and right edge of each bit to an illegal value */ 1862 /* use (IO_IO_IN_DELAY_MAX + 1) as an illegal value */ 1863 sticky_bit_chk = 0; 1864 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 1865 left_edge[i] = IO_IO_IN_DELAY_MAX + 1; 1866 right_edge[i] = IO_IO_IN_DELAY_MAX + 1; 1867 } 1868 1869 /* Search for the left edge of the window for each bit */ 1870 for (d = 0; d <= IO_IO_IN_DELAY_MAX; d++) { 1871 scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, d); 1872 1873 writel(0, &sdr_scc_mgr->update); 1874 1875 /* 1876 * Stop searching when the read test doesn't pass AND when 1877 * we've seen a passing read on every bit. 1878 */ 1879 if (use_read_test) { 1880 stop = !rw_mgr_mem_calibrate_read_test(rank_bgn, 1881 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT, 1882 &bit_chk, 0, 0); 1883 } else { 1884 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1885 0, PASS_ONE_BIT, 1886 &bit_chk, 0); 1887 bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * 1888 (read_group - (write_group * 1889 RW_MGR_MEM_IF_READ_DQS_WIDTH / 1890 RW_MGR_MEM_IF_WRITE_DQS_WIDTH))); 1891 stop = (bit_chk == 0); 1892 } 1893 sticky_bit_chk = sticky_bit_chk | bit_chk; 1894 stop = stop && (sticky_bit_chk == param->read_correct_mask); 1895 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(left): dtap=%u => %u == %u \ 1896 && %u", __func__, __LINE__, d, 1897 sticky_bit_chk, 1898 param->read_correct_mask, stop); 1899 1900 if (stop == 1) { 1901 break; 1902 } else { 1903 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 1904 if (bit_chk & 1) { 1905 /* Remember a passing test as the 1906 left_edge */ 1907 left_edge[i] = d; 1908 } else { 1909 /* If a left edge has not been seen yet, 1910 then a future passing test will mark 1911 this edge as the right edge */ 1912 if (left_edge[i] == 1913 IO_IO_IN_DELAY_MAX + 1) { 1914 right_edge[i] = -(d + 1); 1915 } 1916 } 1917 bit_chk = bit_chk >> 1; 1918 } 1919 } 1920 } 1921 1922 /* Reset DQ delay chains to 0 */ 1923 scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, 0); 1924 sticky_bit_chk = 0; 1925 for (i = RW_MGR_MEM_DQ_PER_READ_DQS - 1;; i--) { 1926 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \ 1927 %d right_edge[%u]: %d\n", __func__, __LINE__, 1928 i, left_edge[i], i, right_edge[i]); 1929 1930 /* 1931 * Check for cases where we haven't found the left edge, 1932 * which makes our assignment of the the right edge invalid. 1933 * Reset it to the illegal value. 1934 */ 1935 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) && ( 1936 right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { 1937 right_edge[i] = IO_IO_IN_DELAY_MAX + 1; 1938 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: reset \ 1939 right_edge[%u]: %d\n", __func__, __LINE__, 1940 i, right_edge[i]); 1941 } 1942 1943 /* 1944 * Reset sticky bit (except for bits where we have seen 1945 * both the left and right edge). 1946 */ 1947 sticky_bit_chk = sticky_bit_chk << 1; 1948 if ((left_edge[i] != IO_IO_IN_DELAY_MAX + 1) && 1949 (right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { 1950 sticky_bit_chk = sticky_bit_chk | 1; 1951 } 1952 1953 if (i == 0) 1954 break; 1955 } 1956 1957 /* Search for the right edge of the window for each bit */ 1958 for (d = 0; d <= IO_DQS_IN_DELAY_MAX - start_dqs; d++) { 1959 scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs); 1960 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 1961 uint32_t delay = d + start_dqs_en; 1962 if (delay > IO_DQS_EN_DELAY_MAX) 1963 delay = IO_DQS_EN_DELAY_MAX; 1964 scc_mgr_set_dqs_en_delay(read_group, delay); 1965 } 1966 scc_mgr_load_dqs(read_group); 1967 1968 writel(0, &sdr_scc_mgr->update); 1969 1970 /* 1971 * Stop searching when the read test doesn't pass AND when 1972 * we've seen a passing read on every bit. 1973 */ 1974 if (use_read_test) { 1975 stop = !rw_mgr_mem_calibrate_read_test(rank_bgn, 1976 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT, 1977 &bit_chk, 0, 0); 1978 } else { 1979 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1980 0, PASS_ONE_BIT, 1981 &bit_chk, 0); 1982 bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * 1983 (read_group - (write_group * 1984 RW_MGR_MEM_IF_READ_DQS_WIDTH / 1985 RW_MGR_MEM_IF_WRITE_DQS_WIDTH))); 1986 stop = (bit_chk == 0); 1987 } 1988 sticky_bit_chk = sticky_bit_chk | bit_chk; 1989 stop = stop && (sticky_bit_chk == param->read_correct_mask); 1990 1991 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(right): dtap=%u => %u == \ 1992 %u && %u", __func__, __LINE__, d, 1993 sticky_bit_chk, param->read_correct_mask, stop); 1994 1995 if (stop == 1) { 1996 break; 1997 } else { 1998 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 1999 if (bit_chk & 1) { 2000 /* Remember a passing test as 2001 the right_edge */ 2002 right_edge[i] = d; 2003 } else { 2004 if (d != 0) { 2005 /* If a right edge has not been 2006 seen yet, then a future passing 2007 test will mark this edge as the 2008 left edge */ 2009 if (right_edge[i] == 2010 IO_IO_IN_DELAY_MAX + 1) { 2011 left_edge[i] = -(d + 1); 2012 } 2013 } else { 2014 /* d = 0 failed, but it passed 2015 when testing the left edge, 2016 so it must be marginal, 2017 set it to -1 */ 2018 if (right_edge[i] == 2019 IO_IO_IN_DELAY_MAX + 1 && 2020 left_edge[i] != 2021 IO_IO_IN_DELAY_MAX 2022 + 1) { 2023 right_edge[i] = -1; 2024 } 2025 /* If a right edge has not been 2026 seen yet, then a future passing 2027 test will mark this edge as the 2028 left edge */ 2029 else if (right_edge[i] == 2030 IO_IO_IN_DELAY_MAX + 2031 1) { 2032 left_edge[i] = -(d + 1); 2033 } 2034 } 2035 } 2036 2037 debug_cond(DLEVEL == 2, "%s:%d vfifo_center[r,\ 2038 d=%u]: ", __func__, __LINE__, d); 2039 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d ", 2040 (int)(bit_chk & 1), i, left_edge[i]); 2041 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 2042 right_edge[i]); 2043 bit_chk = bit_chk >> 1; 2044 } 2045 } 2046 } 2047 2048 /* Check that all bits have a window */ 2049 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 2050 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \ 2051 %d right_edge[%u]: %d", __func__, __LINE__, 2052 i, left_edge[i], i, right_edge[i]); 2053 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) || (right_edge[i] 2054 == IO_IO_IN_DELAY_MAX + 1)) { 2055 /* 2056 * Restore delay chain settings before letting the loop 2057 * in rw_mgr_mem_calibrate_vfifo to retry different 2058 * dqs/ck relationships. 2059 */ 2060 scc_mgr_set_dqs_bus_in_delay(read_group, start_dqs); 2061 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2062 scc_mgr_set_dqs_en_delay(read_group, 2063 start_dqs_en); 2064 } 2065 scc_mgr_load_dqs(read_group); 2066 writel(0, &sdr_scc_mgr->update); 2067 2068 debug_cond(DLEVEL == 1, "%s:%d vfifo_center: failed to \ 2069 find edge [%u]: %d %d", __func__, __LINE__, 2070 i, left_edge[i], right_edge[i]); 2071 if (use_read_test) { 2072 set_failing_group_stage(read_group * 2073 RW_MGR_MEM_DQ_PER_READ_DQS + i, 2074 CAL_STAGE_VFIFO, 2075 CAL_SUBSTAGE_VFIFO_CENTER); 2076 } else { 2077 set_failing_group_stage(read_group * 2078 RW_MGR_MEM_DQ_PER_READ_DQS + i, 2079 CAL_STAGE_VFIFO_AFTER_WRITES, 2080 CAL_SUBSTAGE_VFIFO_CENTER); 2081 } 2082 return 0; 2083 } 2084 } 2085 2086 /* Find middle of window for each DQ bit */ 2087 mid_min = left_edge[0] - right_edge[0]; 2088 min_index = 0; 2089 for (i = 1; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 2090 mid = left_edge[i] - right_edge[i]; 2091 if (mid < mid_min) { 2092 mid_min = mid; 2093 min_index = i; 2094 } 2095 } 2096 2097 /* 2098 * -mid_min/2 represents the amount that we need to move DQS. 2099 * If mid_min is odd and positive we'll need to add one to 2100 * make sure the rounding in further calculations is correct 2101 * (always bias to the right), so just add 1 for all positive values. 2102 */ 2103 if (mid_min > 0) 2104 mid_min++; 2105 2106 mid_min = mid_min / 2; 2107 2108 debug_cond(DLEVEL == 1, "%s:%d vfifo_center: mid_min=%d (index=%u)\n", 2109 __func__, __LINE__, mid_min, min_index); 2110 2111 /* Determine the amount we can change DQS (which is -mid_min) */ 2112 orig_mid_min = mid_min; 2113 new_dqs = start_dqs - mid_min; 2114 if (new_dqs > IO_DQS_IN_DELAY_MAX) 2115 new_dqs = IO_DQS_IN_DELAY_MAX; 2116 else if (new_dqs < 0) 2117 new_dqs = 0; 2118 2119 mid_min = start_dqs - new_dqs; 2120 debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n", 2121 mid_min, new_dqs); 2122 2123 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2124 if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX) 2125 mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX; 2126 else if (start_dqs_en - mid_min < 0) 2127 mid_min += start_dqs_en - mid_min; 2128 } 2129 new_dqs = start_dqs - mid_min; 2130 2131 debug_cond(DLEVEL == 1, "vfifo_center: start_dqs=%d start_dqs_en=%d \ 2132 new_dqs=%d mid_min=%d\n", start_dqs, 2133 IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1, 2134 new_dqs, mid_min); 2135 2136 /* Initialize data for export structures */ 2137 dqs_margin = IO_IO_IN_DELAY_MAX + 1; 2138 dq_margin = IO_IO_IN_DELAY_MAX + 1; 2139 2140 /* add delay to bring centre of all DQ windows to the same "level" */ 2141 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { 2142 /* Use values before divide by 2 to reduce round off error */ 2143 shift_dq = (left_edge[i] - right_edge[i] - 2144 (left_edge[min_index] - right_edge[min_index]))/2 + 2145 (orig_mid_min - mid_min); 2146 2147 debug_cond(DLEVEL == 2, "vfifo_center: before: \ 2148 shift_dq[%u]=%d\n", i, shift_dq); 2149 2150 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET; 2151 temp_dq_in_delay1 = readl(addr + (p << 2)); 2152 temp_dq_in_delay2 = readl(addr + (i << 2)); 2153 2154 if (shift_dq + (int32_t)temp_dq_in_delay1 > 2155 (int32_t)IO_IO_IN_DELAY_MAX) { 2156 shift_dq = (int32_t)IO_IO_IN_DELAY_MAX - temp_dq_in_delay2; 2157 } else if (shift_dq + (int32_t)temp_dq_in_delay1 < 0) { 2158 shift_dq = -(int32_t)temp_dq_in_delay1; 2159 } 2160 debug_cond(DLEVEL == 2, "vfifo_center: after: \ 2161 shift_dq[%u]=%d\n", i, shift_dq); 2162 final_dq[i] = temp_dq_in_delay1 + shift_dq; 2163 scc_mgr_set_dq_in_delay(p, final_dq[i]); 2164 scc_mgr_load_dq(p); 2165 2166 debug_cond(DLEVEL == 2, "vfifo_center: margin[%u]=[%d,%d]\n", i, 2167 left_edge[i] - shift_dq + (-mid_min), 2168 right_edge[i] + shift_dq - (-mid_min)); 2169 /* To determine values for export structures */ 2170 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) 2171 dq_margin = left_edge[i] - shift_dq + (-mid_min); 2172 2173 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin) 2174 dqs_margin = right_edge[i] + shift_dq - (-mid_min); 2175 } 2176 2177 final_dqs = new_dqs; 2178 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) 2179 final_dqs_en = start_dqs_en - mid_min; 2180 2181 /* Move DQS-en */ 2182 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2183 scc_mgr_set_dqs_en_delay(read_group, final_dqs_en); 2184 scc_mgr_load_dqs(read_group); 2185 } 2186 2187 /* Move DQS */ 2188 scc_mgr_set_dqs_bus_in_delay(read_group, final_dqs); 2189 scc_mgr_load_dqs(read_group); 2190 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: dq_margin=%d \ 2191 dqs_margin=%d", __func__, __LINE__, 2192 dq_margin, dqs_margin); 2193 2194 /* 2195 * Do not remove this line as it makes sure all of our decisions 2196 * have been applied. Apply the update bit. 2197 */ 2198 writel(0, &sdr_scc_mgr->update); 2199 2200 return (dq_margin >= 0) && (dqs_margin >= 0); 2201 } 2202 2203 /* 2204 * calibrate the read valid prediction FIFO. 2205 * 2206 * - read valid prediction will consist of finding a good DQS enable phase, 2207 * DQS enable delay, DQS input phase, and DQS input delay. 2208 * - we also do a per-bit deskew on the DQ lines. 2209 */ 2210 static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group, 2211 uint32_t test_bgn) 2212 { 2213 uint32_t p, d, rank_bgn, sr; 2214 uint32_t dtaps_per_ptap; 2215 uint32_t tmp_delay; 2216 uint32_t bit_chk; 2217 uint32_t grp_calibrated; 2218 uint32_t write_group, write_test_bgn; 2219 uint32_t failed_substage; 2220 2221 debug("%s:%d: %u %u\n", __func__, __LINE__, read_group, test_bgn); 2222 2223 /* update info for sims */ 2224 reg_file_set_stage(CAL_STAGE_VFIFO); 2225 2226 write_group = read_group; 2227 write_test_bgn = test_bgn; 2228 2229 /* USER Determine number of delay taps for each phase tap */ 2230 dtaps_per_ptap = 0; 2231 tmp_delay = 0; 2232 while (tmp_delay < IO_DELAY_PER_OPA_TAP) { 2233 dtaps_per_ptap++; 2234 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP; 2235 } 2236 dtaps_per_ptap--; 2237 tmp_delay = 0; 2238 2239 /* update info for sims */ 2240 reg_file_set_group(read_group); 2241 2242 grp_calibrated = 0; 2243 2244 reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ); 2245 failed_substage = CAL_SUBSTAGE_GUARANTEED_READ; 2246 2247 for (d = 0; d <= dtaps_per_ptap && grp_calibrated == 0; d += 2) { 2248 /* 2249 * In RLDRAMX we may be messing the delay of pins in 2250 * the same write group but outside of the current read 2251 * the group, but that's ok because we haven't 2252 * calibrated output side yet. 2253 */ 2254 if (d > 0) { 2255 scc_mgr_apply_group_all_out_delay_add_all_ranks 2256 (write_group, write_test_bgn, d); 2257 } 2258 2259 for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX && grp_calibrated == 0; 2260 p++) { 2261 /* set a particular dqdqs phase */ 2262 scc_mgr_set_dqdqs_output_phase_all_ranks(read_group, p); 2263 2264 debug_cond(DLEVEL == 1, "%s:%d calibrate_vfifo: g=%u \ 2265 p=%u d=%u\n", __func__, __LINE__, 2266 read_group, p, d); 2267 2268 /* 2269 * Load up the patterns used by read calibration 2270 * using current DQDQS phase. 2271 */ 2272 rw_mgr_mem_calibrate_read_load_patterns(0, 1); 2273 if (!(gbl->phy_debug_mode_flags & 2274 PHY_DEBUG_DISABLE_GUARANTEED_READ)) { 2275 if (!rw_mgr_mem_calibrate_read_test_patterns_all_ranks 2276 (read_group, 1, &bit_chk)) { 2277 debug_cond(DLEVEL == 1, "%s:%d Guaranteed read test failed:", 2278 __func__, __LINE__); 2279 debug_cond(DLEVEL == 1, " g=%u p=%u d=%u\n", 2280 read_group, p, d); 2281 break; 2282 } 2283 } 2284 2285 /* case:56390 */ 2286 grp_calibrated = 1; 2287 if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay 2288 (write_group, read_group, test_bgn)) { 2289 /* 2290 * USER Read per-bit deskew can be done on a 2291 * per shadow register basis. 2292 */ 2293 for (rank_bgn = 0, sr = 0; 2294 rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; 2295 rank_bgn += NUM_RANKS_PER_SHADOW_REG, 2296 ++sr) { 2297 /* 2298 * Determine if this set of ranks 2299 * should be skipped entirely. 2300 */ 2301 if (!param->skip_shadow_regs[sr]) { 2302 /* 2303 * If doing read after write 2304 * calibration, do not update 2305 * FOM, now - do it then. 2306 */ 2307 if (!rw_mgr_mem_calibrate_vfifo_center 2308 (rank_bgn, write_group, 2309 read_group, test_bgn, 1, 0)) { 2310 grp_calibrated = 0; 2311 failed_substage = 2312 CAL_SUBSTAGE_VFIFO_CENTER; 2313 } 2314 } 2315 } 2316 } else { 2317 grp_calibrated = 0; 2318 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE; 2319 } 2320 } 2321 } 2322 2323 if (grp_calibrated == 0) { 2324 set_failing_group_stage(write_group, CAL_STAGE_VFIFO, 2325 failed_substage); 2326 return 0; 2327 } 2328 2329 /* 2330 * Reset the delay chains back to zero if they have moved > 1 2331 * (check for > 1 because loop will increase d even when pass in 2332 * first case). 2333 */ 2334 if (d > 2) 2335 scc_mgr_zero_group(write_group, write_test_bgn, 1); 2336 2337 return 1; 2338 } 2339 2340 /* VFIFO Calibration -- Read Deskew Calibration after write deskew */ 2341 static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group, 2342 uint32_t test_bgn) 2343 { 2344 uint32_t rank_bgn, sr; 2345 uint32_t grp_calibrated; 2346 uint32_t write_group; 2347 2348 debug("%s:%d %u %u", __func__, __LINE__, read_group, test_bgn); 2349 2350 /* update info for sims */ 2351 2352 reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES); 2353 reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); 2354 2355 write_group = read_group; 2356 2357 /* update info for sims */ 2358 reg_file_set_group(read_group); 2359 2360 grp_calibrated = 1; 2361 /* Read per-bit deskew can be done on a per shadow register basis */ 2362 for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; 2363 rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) { 2364 /* Determine if this set of ranks should be skipped entirely */ 2365 if (!param->skip_shadow_regs[sr]) { 2366 /* This is the last calibration round, update FOM here */ 2367 if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn, 2368 write_group, 2369 read_group, 2370 test_bgn, 0, 2371 1)) { 2372 grp_calibrated = 0; 2373 } 2374 } 2375 } 2376 2377 2378 if (grp_calibrated == 0) { 2379 set_failing_group_stage(write_group, 2380 CAL_STAGE_VFIFO_AFTER_WRITES, 2381 CAL_SUBSTAGE_VFIFO_CENTER); 2382 return 0; 2383 } 2384 2385 return 1; 2386 } 2387 2388 /* Calibrate LFIFO to find smallest read latency */ 2389 static uint32_t rw_mgr_mem_calibrate_lfifo(void) 2390 { 2391 uint32_t found_one; 2392 uint32_t bit_chk; 2393 2394 debug("%s:%d\n", __func__, __LINE__); 2395 2396 /* update info for sims */ 2397 reg_file_set_stage(CAL_STAGE_LFIFO); 2398 reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY); 2399 2400 /* Load up the patterns used by read calibration for all ranks */ 2401 rw_mgr_mem_calibrate_read_load_patterns(0, 1); 2402 found_one = 0; 2403 2404 do { 2405 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 2406 debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u", 2407 __func__, __LINE__, gbl->curr_read_lat); 2408 2409 if (!rw_mgr_mem_calibrate_read_test_all_ranks(0, 2410 NUM_READ_TESTS, 2411 PASS_ALL_BITS, 2412 &bit_chk, 1)) { 2413 break; 2414 } 2415 2416 found_one = 1; 2417 /* reduce read latency and see if things are working */ 2418 /* correctly */ 2419 gbl->curr_read_lat--; 2420 } while (gbl->curr_read_lat > 0); 2421 2422 /* reset the fifos to get pointers to known state */ 2423 2424 writel(0, &phy_mgr_cmd->fifo_reset); 2425 2426 if (found_one) { 2427 /* add a fudge factor to the read latency that was determined */ 2428 gbl->curr_read_lat += 2; 2429 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 2430 debug_cond(DLEVEL == 2, "%s:%d lfifo: success: using \ 2431 read_lat=%u\n", __func__, __LINE__, 2432 gbl->curr_read_lat); 2433 return 1; 2434 } else { 2435 set_failing_group_stage(0xff, CAL_STAGE_LFIFO, 2436 CAL_SUBSTAGE_READ_LATENCY); 2437 2438 debug_cond(DLEVEL == 2, "%s:%d lfifo: failed at initial \ 2439 read_lat=%u\n", __func__, __LINE__, 2440 gbl->curr_read_lat); 2441 return 0; 2442 } 2443 } 2444 2445 /* 2446 * issue write test command. 2447 * two variants are provided. one that just tests a write pattern and 2448 * another that tests datamask functionality. 2449 */ 2450 static void rw_mgr_mem_calibrate_write_test_issue(uint32_t group, 2451 uint32_t test_dm) 2452 { 2453 uint32_t mcc_instruction; 2454 uint32_t quick_write_mode = (((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES) && 2455 ENABLE_SUPER_QUICK_CALIBRATION); 2456 uint32_t rw_wl_nop_cycles; 2457 uint32_t addr; 2458 2459 /* 2460 * Set counter and jump addresses for the right 2461 * number of NOP cycles. 2462 * The number of supported NOP cycles can range from -1 to infinity 2463 * Three different cases are handled: 2464 * 2465 * 1. For a number of NOP cycles greater than 0, the RW Mgr looping 2466 * mechanism will be used to insert the right number of NOPs 2467 * 2468 * 2. For a number of NOP cycles equals to 0, the micro-instruction 2469 * issuing the write command will jump straight to the 2470 * micro-instruction that turns on DQS (for DDRx), or outputs write 2471 * data (for RLD), skipping 2472 * the NOP micro-instruction all together 2473 * 2474 * 3. A number of NOP cycles equal to -1 indicates that DQS must be 2475 * turned on in the same micro-instruction that issues the write 2476 * command. Then we need 2477 * to directly jump to the micro-instruction that sends out the data 2478 * 2479 * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters 2480 * (2 and 3). One jump-counter (0) is used to perform multiple 2481 * write-read operations. 2482 * one counter left to issue this command in "multiple-group" mode 2483 */ 2484 2485 rw_wl_nop_cycles = gbl->rw_wl_nop_cycles; 2486 2487 if (rw_wl_nop_cycles == -1) { 2488 /* 2489 * CNTR 2 - We want to execute the special write operation that 2490 * turns on DQS right away and then skip directly to the 2491 * instruction that sends out the data. We set the counter to a 2492 * large number so that the jump is always taken. 2493 */ 2494 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); 2495 2496 /* CNTR 3 - Not used */ 2497 if (test_dm) { 2498 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1; 2499 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA, 2500 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2501 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP, 2502 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2503 } else { 2504 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0_WL_1; 2505 writel(RW_MGR_LFSR_WR_RD_BANK_0_DATA, 2506 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2507 writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP, 2508 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2509 } 2510 } else if (rw_wl_nop_cycles == 0) { 2511 /* 2512 * CNTR 2 - We want to skip the NOP operation and go straight 2513 * to the DQS enable instruction. We set the counter to a large 2514 * number so that the jump is always taken. 2515 */ 2516 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); 2517 2518 /* CNTR 3 - Not used */ 2519 if (test_dm) { 2520 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0; 2521 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS, 2522 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2523 } else { 2524 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0; 2525 writel(RW_MGR_LFSR_WR_RD_BANK_0_DQS, 2526 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2527 } 2528 } else { 2529 /* 2530 * CNTR 2 - In this case we want to execute the next instruction 2531 * and NOT take the jump. So we set the counter to 0. The jump 2532 * address doesn't count. 2533 */ 2534 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2); 2535 writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2); 2536 2537 /* 2538 * CNTR 3 - Set the nop counter to the number of cycles we 2539 * need to loop for, minus 1. 2540 */ 2541 writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3); 2542 if (test_dm) { 2543 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0; 2544 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP, 2545 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2546 } else { 2547 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0; 2548 writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP, 2549 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 2550 } 2551 } 2552 2553 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 2554 RW_MGR_RESET_READ_DATAPATH_OFFSET); 2555 2556 if (quick_write_mode) 2557 writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0); 2558 else 2559 writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0); 2560 2561 writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0); 2562 2563 /* 2564 * CNTR 1 - This is used to ensure enough time elapses 2565 * for read data to come back. 2566 */ 2567 writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1); 2568 2569 if (test_dm) { 2570 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT, 2571 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 2572 } else { 2573 writel(RW_MGR_LFSR_WR_RD_BANK_0_WAIT, 2574 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 2575 } 2576 2577 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 2578 writel(mcc_instruction, addr + (group << 2)); 2579 } 2580 2581 /* Test writes, can check for a single bit pass or multiple bit pass */ 2582 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, 2583 uint32_t write_group, uint32_t use_dm, uint32_t all_correct, 2584 uint32_t *bit_chk, uint32_t all_ranks) 2585 { 2586 uint32_t r; 2587 uint32_t correct_mask_vg; 2588 uint32_t tmp_bit_chk; 2589 uint32_t vg; 2590 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 2591 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 2592 uint32_t addr_rw_mgr; 2593 uint32_t base_rw_mgr; 2594 2595 *bit_chk = param->write_correct_mask; 2596 correct_mask_vg = param->write_correct_mask_vg; 2597 2598 for (r = rank_bgn; r < rank_end; r++) { 2599 if (param->skip_ranks[r]) { 2600 /* request to skip the rank */ 2601 continue; 2602 } 2603 2604 /* set rank */ 2605 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 2606 2607 tmp_bit_chk = 0; 2608 addr_rw_mgr = SDR_PHYGRP_RWMGRGRP_ADDRESS; 2609 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS-1; ; vg--) { 2610 /* reset the fifos to get pointers to known state */ 2611 writel(0, &phy_mgr_cmd->fifo_reset); 2612 2613 tmp_bit_chk = tmp_bit_chk << 2614 (RW_MGR_MEM_DQ_PER_WRITE_DQS / 2615 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS); 2616 rw_mgr_mem_calibrate_write_test_issue(write_group * 2617 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS+vg, 2618 use_dm); 2619 2620 base_rw_mgr = readl(addr_rw_mgr); 2621 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr)); 2622 if (vg == 0) 2623 break; 2624 } 2625 *bit_chk &= tmp_bit_chk; 2626 } 2627 2628 if (all_correct) { 2629 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 2630 debug_cond(DLEVEL == 2, "write_test(%u,%u,ALL) : %u == \ 2631 %u => %lu", write_group, use_dm, 2632 *bit_chk, param->write_correct_mask, 2633 (long unsigned int)(*bit_chk == 2634 param->write_correct_mask)); 2635 return *bit_chk == param->write_correct_mask; 2636 } else { 2637 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 2638 debug_cond(DLEVEL == 2, "write_test(%u,%u,ONE) : %u != ", 2639 write_group, use_dm, *bit_chk); 2640 debug_cond(DLEVEL == 2, "%lu" " => %lu", (long unsigned int)0, 2641 (long unsigned int)(*bit_chk != 0)); 2642 return *bit_chk != 0x00; 2643 } 2644 } 2645 2646 /* 2647 * center all windows. do per-bit-deskew to possibly increase size of 2648 * certain windows. 2649 */ 2650 static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn, 2651 uint32_t write_group, uint32_t test_bgn) 2652 { 2653 uint32_t i, p, min_index; 2654 int32_t d; 2655 /* 2656 * Store these as signed since there are comparisons with 2657 * signed numbers. 2658 */ 2659 uint32_t bit_chk; 2660 uint32_t sticky_bit_chk; 2661 int32_t left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS]; 2662 int32_t right_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS]; 2663 int32_t mid; 2664 int32_t mid_min, orig_mid_min; 2665 int32_t new_dqs, start_dqs, shift_dq; 2666 int32_t dq_margin, dqs_margin, dm_margin; 2667 uint32_t stop; 2668 uint32_t temp_dq_out1_delay; 2669 uint32_t addr; 2670 2671 debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn); 2672 2673 dm_margin = 0; 2674 2675 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; 2676 start_dqs = readl(addr + 2677 (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2)); 2678 2679 /* per-bit deskew */ 2680 2681 /* 2682 * set the left and right edge of each bit to an illegal value 2683 * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value. 2684 */ 2685 sticky_bit_chk = 0; 2686 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2687 left_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2688 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2689 } 2690 2691 /* Search for the left edge of the window for each bit */ 2692 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++) { 2693 scc_mgr_apply_group_dq_out1_delay(write_group, test_bgn, d); 2694 2695 writel(0, &sdr_scc_mgr->update); 2696 2697 /* 2698 * Stop searching when the read test doesn't pass AND when 2699 * we've seen a passing read on every bit. 2700 */ 2701 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 2702 0, PASS_ONE_BIT, &bit_chk, 0); 2703 sticky_bit_chk = sticky_bit_chk | bit_chk; 2704 stop = stop && (sticky_bit_chk == param->write_correct_mask); 2705 debug_cond(DLEVEL == 2, "write_center(left): dtap=%d => %u \ 2706 == %u && %u [bit_chk= %u ]\n", 2707 d, sticky_bit_chk, param->write_correct_mask, 2708 stop, bit_chk); 2709 2710 if (stop == 1) { 2711 break; 2712 } else { 2713 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2714 if (bit_chk & 1) { 2715 /* 2716 * Remember a passing test as the 2717 * left_edge. 2718 */ 2719 left_edge[i] = d; 2720 } else { 2721 /* 2722 * If a left edge has not been seen 2723 * yet, then a future passing test will 2724 * mark this edge as the right edge. 2725 */ 2726 if (left_edge[i] == 2727 IO_IO_OUT1_DELAY_MAX + 1) { 2728 right_edge[i] = -(d + 1); 2729 } 2730 } 2731 debug_cond(DLEVEL == 2, "write_center[l,d=%d):", d); 2732 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d", 2733 (int)(bit_chk & 1), i, left_edge[i]); 2734 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 2735 right_edge[i]); 2736 bit_chk = bit_chk >> 1; 2737 } 2738 } 2739 } 2740 2741 /* Reset DQ delay chains to 0 */ 2742 scc_mgr_apply_group_dq_out1_delay(write_group, test_bgn, 0); 2743 sticky_bit_chk = 0; 2744 for (i = RW_MGR_MEM_DQ_PER_WRITE_DQS - 1;; i--) { 2745 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \ 2746 %d right_edge[%u]: %d\n", __func__, __LINE__, 2747 i, left_edge[i], i, right_edge[i]); 2748 2749 /* 2750 * Check for cases where we haven't found the left edge, 2751 * which makes our assignment of the the right edge invalid. 2752 * Reset it to the illegal value. 2753 */ 2754 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) && 2755 (right_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) { 2756 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2757 debug_cond(DLEVEL == 2, "%s:%d write_center: reset \ 2758 right_edge[%u]: %d\n", __func__, __LINE__, 2759 i, right_edge[i]); 2760 } 2761 2762 /* 2763 * Reset sticky bit (except for bits where we have 2764 * seen the left edge). 2765 */ 2766 sticky_bit_chk = sticky_bit_chk << 1; 2767 if ((left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) 2768 sticky_bit_chk = sticky_bit_chk | 1; 2769 2770 if (i == 0) 2771 break; 2772 } 2773 2774 /* Search for the right edge of the window for each bit */ 2775 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - start_dqs; d++) { 2776 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, 2777 d + start_dqs); 2778 2779 writel(0, &sdr_scc_mgr->update); 2780 2781 /* 2782 * Stop searching when the read test doesn't pass AND when 2783 * we've seen a passing read on every bit. 2784 */ 2785 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 2786 0, PASS_ONE_BIT, &bit_chk, 0); 2787 2788 sticky_bit_chk = sticky_bit_chk | bit_chk; 2789 stop = stop && (sticky_bit_chk == param->write_correct_mask); 2790 2791 debug_cond(DLEVEL == 2, "write_center (right): dtap=%u => %u == \ 2792 %u && %u\n", d, sticky_bit_chk, 2793 param->write_correct_mask, stop); 2794 2795 if (stop == 1) { 2796 if (d == 0) { 2797 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; 2798 i++) { 2799 /* d = 0 failed, but it passed when 2800 testing the left edge, so it must be 2801 marginal, set it to -1 */ 2802 if (right_edge[i] == 2803 IO_IO_OUT1_DELAY_MAX + 1 && 2804 left_edge[i] != 2805 IO_IO_OUT1_DELAY_MAX + 1) { 2806 right_edge[i] = -1; 2807 } 2808 } 2809 } 2810 break; 2811 } else { 2812 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2813 if (bit_chk & 1) { 2814 /* 2815 * Remember a passing test as 2816 * the right_edge. 2817 */ 2818 right_edge[i] = d; 2819 } else { 2820 if (d != 0) { 2821 /* 2822 * If a right edge has not 2823 * been seen yet, then a future 2824 * passing test will mark this 2825 * edge as the left edge. 2826 */ 2827 if (right_edge[i] == 2828 IO_IO_OUT1_DELAY_MAX + 1) 2829 left_edge[i] = -(d + 1); 2830 } else { 2831 /* 2832 * d = 0 failed, but it passed 2833 * when testing the left edge, 2834 * so it must be marginal, set 2835 * it to -1. 2836 */ 2837 if (right_edge[i] == 2838 IO_IO_OUT1_DELAY_MAX + 1 && 2839 left_edge[i] != 2840 IO_IO_OUT1_DELAY_MAX + 1) 2841 right_edge[i] = -1; 2842 /* 2843 * If a right edge has not been 2844 * seen yet, then a future 2845 * passing test will mark this 2846 * edge as the left edge. 2847 */ 2848 else if (right_edge[i] == 2849 IO_IO_OUT1_DELAY_MAX + 2850 1) 2851 left_edge[i] = -(d + 1); 2852 } 2853 } 2854 debug_cond(DLEVEL == 2, "write_center[r,d=%d):", d); 2855 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d", 2856 (int)(bit_chk & 1), i, left_edge[i]); 2857 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 2858 right_edge[i]); 2859 bit_chk = bit_chk >> 1; 2860 } 2861 } 2862 } 2863 2864 /* Check that all bits have a window */ 2865 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2866 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \ 2867 %d right_edge[%u]: %d", __func__, __LINE__, 2868 i, left_edge[i], i, right_edge[i]); 2869 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) || 2870 (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1)) { 2871 set_failing_group_stage(test_bgn + i, 2872 CAL_STAGE_WRITES, 2873 CAL_SUBSTAGE_WRITES_CENTER); 2874 return 0; 2875 } 2876 } 2877 2878 /* Find middle of window for each DQ bit */ 2879 mid_min = left_edge[0] - right_edge[0]; 2880 min_index = 0; 2881 for (i = 1; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2882 mid = left_edge[i] - right_edge[i]; 2883 if (mid < mid_min) { 2884 mid_min = mid; 2885 min_index = i; 2886 } 2887 } 2888 2889 /* 2890 * -mid_min/2 represents the amount that we need to move DQS. 2891 * If mid_min is odd and positive we'll need to add one to 2892 * make sure the rounding in further calculations is correct 2893 * (always bias to the right), so just add 1 for all positive values. 2894 */ 2895 if (mid_min > 0) 2896 mid_min++; 2897 mid_min = mid_min / 2; 2898 debug_cond(DLEVEL == 1, "%s:%d write_center: mid_min=%d\n", __func__, 2899 __LINE__, mid_min); 2900 2901 /* Determine the amount we can change DQS (which is -mid_min) */ 2902 orig_mid_min = mid_min; 2903 new_dqs = start_dqs; 2904 mid_min = 0; 2905 debug_cond(DLEVEL == 1, "%s:%d write_center: start_dqs=%d new_dqs=%d \ 2906 mid_min=%d\n", __func__, __LINE__, start_dqs, new_dqs, mid_min); 2907 /* Initialize data for export structures */ 2908 dqs_margin = IO_IO_OUT1_DELAY_MAX + 1; 2909 dq_margin = IO_IO_OUT1_DELAY_MAX + 1; 2910 2911 /* add delay to bring centre of all DQ windows to the same "level" */ 2912 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { 2913 /* Use values before divide by 2 to reduce round off error */ 2914 shift_dq = (left_edge[i] - right_edge[i] - 2915 (left_edge[min_index] - right_edge[min_index]))/2 + 2916 (orig_mid_min - mid_min); 2917 2918 debug_cond(DLEVEL == 2, "%s:%d write_center: before: shift_dq \ 2919 [%u]=%d\n", __func__, __LINE__, i, shift_dq); 2920 2921 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; 2922 temp_dq_out1_delay = readl(addr + (i << 2)); 2923 if (shift_dq + (int32_t)temp_dq_out1_delay > 2924 (int32_t)IO_IO_OUT1_DELAY_MAX) { 2925 shift_dq = (int32_t)IO_IO_OUT1_DELAY_MAX - temp_dq_out1_delay; 2926 } else if (shift_dq + (int32_t)temp_dq_out1_delay < 0) { 2927 shift_dq = -(int32_t)temp_dq_out1_delay; 2928 } 2929 debug_cond(DLEVEL == 2, "write_center: after: shift_dq[%u]=%d\n", 2930 i, shift_dq); 2931 scc_mgr_set_dq_out1_delay(i, temp_dq_out1_delay + shift_dq); 2932 scc_mgr_load_dq(i); 2933 2934 debug_cond(DLEVEL == 2, "write_center: margin[%u]=[%d,%d]\n", i, 2935 left_edge[i] - shift_dq + (-mid_min), 2936 right_edge[i] + shift_dq - (-mid_min)); 2937 /* To determine values for export structures */ 2938 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) 2939 dq_margin = left_edge[i] - shift_dq + (-mid_min); 2940 2941 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin) 2942 dqs_margin = right_edge[i] + shift_dq - (-mid_min); 2943 } 2944 2945 /* Move DQS */ 2946 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); 2947 writel(0, &sdr_scc_mgr->update); 2948 2949 /* Centre DM */ 2950 debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__); 2951 2952 /* 2953 * set the left and right edge of each bit to an illegal value, 2954 * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value, 2955 */ 2956 left_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; 2957 right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; 2958 int32_t bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 2959 int32_t end_curr = IO_IO_OUT1_DELAY_MAX + 1; 2960 int32_t bgn_best = IO_IO_OUT1_DELAY_MAX + 1; 2961 int32_t end_best = IO_IO_OUT1_DELAY_MAX + 1; 2962 int32_t win_best = 0; 2963 2964 /* Search for the/part of the window with DM shift */ 2965 for (d = IO_IO_OUT1_DELAY_MAX; d >= 0; d -= DELTA_D) { 2966 scc_mgr_apply_group_dm_out1_delay(write_group, d); 2967 writel(0, &sdr_scc_mgr->update); 2968 2969 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, 2970 PASS_ALL_BITS, &bit_chk, 2971 0)) { 2972 /* USE Set current end of the window */ 2973 end_curr = -d; 2974 /* 2975 * If a starting edge of our window has not been seen 2976 * this is our current start of the DM window. 2977 */ 2978 if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) 2979 bgn_curr = -d; 2980 2981 /* 2982 * If current window is bigger than best seen. 2983 * Set best seen to be current window. 2984 */ 2985 if ((end_curr-bgn_curr+1) > win_best) { 2986 win_best = end_curr-bgn_curr+1; 2987 bgn_best = bgn_curr; 2988 end_best = end_curr; 2989 } 2990 } else { 2991 /* We just saw a failing test. Reset temp edge */ 2992 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 2993 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 2994 } 2995 } 2996 2997 2998 /* Reset DM delay chains to 0 */ 2999 scc_mgr_apply_group_dm_out1_delay(write_group, 0); 3000 3001 /* 3002 * Check to see if the current window nudges up aganist 0 delay. 3003 * If so we need to continue the search by shifting DQS otherwise DQS 3004 * search begins as a new search. */ 3005 if (end_curr != 0) { 3006 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 3007 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 3008 } 3009 3010 /* Search for the/part of the window with DQS shifts */ 3011 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - new_dqs; d += DELTA_D) { 3012 /* 3013 * Note: This only shifts DQS, so are we limiting ourselve to 3014 * width of DQ unnecessarily. 3015 */ 3016 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, 3017 d + new_dqs); 3018 3019 writel(0, &sdr_scc_mgr->update); 3020 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, 3021 PASS_ALL_BITS, &bit_chk, 3022 0)) { 3023 /* USE Set current end of the window */ 3024 end_curr = d; 3025 /* 3026 * If a beginning edge of our window has not been seen 3027 * this is our current begin of the DM window. 3028 */ 3029 if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) 3030 bgn_curr = d; 3031 3032 /* 3033 * If current window is bigger than best seen. Set best 3034 * seen to be current window. 3035 */ 3036 if ((end_curr-bgn_curr+1) > win_best) { 3037 win_best = end_curr-bgn_curr+1; 3038 bgn_best = bgn_curr; 3039 end_best = end_curr; 3040 } 3041 } else { 3042 /* We just saw a failing test. Reset temp edge */ 3043 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 3044 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 3045 3046 /* Early exit optimization: if ther remaining delay 3047 chain space is less than already seen largest window 3048 we can exit */ 3049 if ((win_best-1) > 3050 (IO_IO_OUT1_DELAY_MAX - new_dqs - d)) { 3051 break; 3052 } 3053 } 3054 } 3055 3056 /* assign left and right edge for cal and reporting; */ 3057 left_edge[0] = -1*bgn_best; 3058 right_edge[0] = end_best; 3059 3060 debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n", __func__, 3061 __LINE__, left_edge[0], right_edge[0]); 3062 3063 /* Move DQS (back to orig) */ 3064 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); 3065 3066 /* Move DM */ 3067 3068 /* Find middle of window for the DM bit */ 3069 mid = (left_edge[0] - right_edge[0]) / 2; 3070 3071 /* only move right, since we are not moving DQS/DQ */ 3072 if (mid < 0) 3073 mid = 0; 3074 3075 /* dm_marign should fail if we never find a window */ 3076 if (win_best == 0) 3077 dm_margin = -1; 3078 else 3079 dm_margin = left_edge[0] - mid; 3080 3081 scc_mgr_apply_group_dm_out1_delay(write_group, mid); 3082 writel(0, &sdr_scc_mgr->update); 3083 3084 debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d mid=%d \ 3085 dm_margin=%d\n", __func__, __LINE__, left_edge[0], 3086 right_edge[0], mid, dm_margin); 3087 /* Export values */ 3088 gbl->fom_out += dq_margin + dqs_margin; 3089 3090 debug_cond(DLEVEL == 2, "%s:%d write_center: dq_margin=%d \ 3091 dqs_margin=%d dm_margin=%d\n", __func__, __LINE__, 3092 dq_margin, dqs_margin, dm_margin); 3093 3094 /* 3095 * Do not remove this line as it makes sure all of our 3096 * decisions have been applied. 3097 */ 3098 writel(0, &sdr_scc_mgr->update); 3099 return (dq_margin >= 0) && (dqs_margin >= 0) && (dm_margin >= 0); 3100 } 3101 3102 /* calibrate the write operations */ 3103 static uint32_t rw_mgr_mem_calibrate_writes(uint32_t rank_bgn, uint32_t g, 3104 uint32_t test_bgn) 3105 { 3106 /* update info for sims */ 3107 debug("%s:%d %u %u\n", __func__, __LINE__, g, test_bgn); 3108 3109 reg_file_set_stage(CAL_STAGE_WRITES); 3110 reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER); 3111 3112 reg_file_set_group(g); 3113 3114 if (!rw_mgr_mem_calibrate_writes_center(rank_bgn, g, test_bgn)) { 3115 set_failing_group_stage(g, CAL_STAGE_WRITES, 3116 CAL_SUBSTAGE_WRITES_CENTER); 3117 return 0; 3118 } 3119 3120 return 1; 3121 } 3122 3123 /* precharge all banks and activate row 0 in bank "000..." and bank "111..." */ 3124 static void mem_precharge_and_activate(void) 3125 { 3126 uint32_t r; 3127 3128 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { 3129 if (param->skip_ranks[r]) { 3130 /* request to skip the rank */ 3131 continue; 3132 } 3133 3134 /* set rank */ 3135 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 3136 3137 /* precharge all banks ... */ 3138 writel(RW_MGR_PRECHARGE_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS | 3139 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 3140 3141 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0); 3142 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT1, 3143 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 3144 3145 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1); 3146 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT2, 3147 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 3148 3149 /* activate rows */ 3150 writel(RW_MGR_ACTIVATE_0_AND_1, SDR_PHYGRP_RWMGRGRP_ADDRESS | 3151 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 3152 } 3153 } 3154 3155 /* Configure various memory related parameters. */ 3156 static void mem_config(void) 3157 { 3158 uint32_t rlat, wlat; 3159 uint32_t rw_wl_nop_cycles; 3160 uint32_t max_latency; 3161 3162 debug("%s:%d\n", __func__, __LINE__); 3163 /* read in write and read latency */ 3164 wlat = readl(&data_mgr->t_wl_add); 3165 wlat += readl(&data_mgr->mem_t_add); 3166 3167 /* WL for hard phy does not include additive latency */ 3168 3169 /* 3170 * add addtional write latency to offset the address/command extra 3171 * clock cycle. We change the AC mux setting causing AC to be delayed 3172 * by one mem clock cycle. Only do this for DDR3 3173 */ 3174 wlat = wlat + 1; 3175 3176 rlat = readl(&data_mgr->t_rl_add); 3177 3178 rw_wl_nop_cycles = wlat - 2; 3179 gbl->rw_wl_nop_cycles = rw_wl_nop_cycles; 3180 3181 /* 3182 * For AV/CV, lfifo is hardened and always runs at full rate so 3183 * max latency in AFI clocks, used here, is correspondingly smaller. 3184 */ 3185 max_latency = (1<<MAX_LATENCY_COUNT_WIDTH)/1 - 1; 3186 /* configure for a burst length of 8 */ 3187 3188 /* write latency */ 3189 /* Adjust Write Latency for Hard PHY */ 3190 wlat = wlat + 1; 3191 3192 /* set a pretty high read latency initially */ 3193 gbl->curr_read_lat = rlat + 16; 3194 3195 if (gbl->curr_read_lat > max_latency) 3196 gbl->curr_read_lat = max_latency; 3197 3198 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 3199 3200 /* advertise write latency */ 3201 gbl->curr_write_lat = wlat; 3202 writel(wlat - 2, &phy_mgr_cfg->afi_wlat); 3203 3204 /* initialize bit slips */ 3205 mem_precharge_and_activate(); 3206 } 3207 3208 /* Set VFIFO and LFIFO to instant-on settings in skip calibration mode */ 3209 static void mem_skip_calibrate(void) 3210 { 3211 uint32_t vfifo_offset; 3212 uint32_t i, j, r; 3213 3214 debug("%s:%d\n", __func__, __LINE__); 3215 /* Need to update every shadow register set used by the interface */ 3216 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 3217 r += NUM_RANKS_PER_SHADOW_REG) { 3218 /* 3219 * Set output phase alignment settings appropriate for 3220 * skip calibration. 3221 */ 3222 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3223 scc_mgr_set_dqs_en_phase(i, 0); 3224 #if IO_DLL_CHAIN_LENGTH == 6 3225 scc_mgr_set_dqdqs_output_phase(i, 6); 3226 #else 3227 scc_mgr_set_dqdqs_output_phase(i, 7); 3228 #endif 3229 /* 3230 * Case:33398 3231 * 3232 * Write data arrives to the I/O two cycles before write 3233 * latency is reached (720 deg). 3234 * -> due to bit-slip in a/c bus 3235 * -> to allow board skew where dqs is longer than ck 3236 * -> how often can this happen!? 3237 * -> can claim back some ptaps for high freq 3238 * support if we can relax this, but i digress... 3239 * 3240 * The write_clk leads mem_ck by 90 deg 3241 * The minimum ptap of the OPA is 180 deg 3242 * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay 3243 * The write_clk is always delayed by 2 ptaps 3244 * 3245 * Hence, to make DQS aligned to CK, we need to delay 3246 * DQS by: 3247 * (720 - 90 - 180 - 2 * (360 / IO_DLL_CHAIN_LENGTH)) 3248 * 3249 * Dividing the above by (360 / IO_DLL_CHAIN_LENGTH) 3250 * gives us the number of ptaps, which simplies to: 3251 * 3252 * (1.25 * IO_DLL_CHAIN_LENGTH - 2) 3253 */ 3254 scc_mgr_set_dqdqs_output_phase(i, (1.25 * 3255 IO_DLL_CHAIN_LENGTH - 2)); 3256 } 3257 writel(0xff, &sdr_scc_mgr->dqs_ena); 3258 writel(0xff, &sdr_scc_mgr->dqs_io_ena); 3259 3260 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { 3261 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS | 3262 SCC_MGR_GROUP_COUNTER_OFFSET); 3263 } 3264 writel(0xff, &sdr_scc_mgr->dq_ena); 3265 writel(0xff, &sdr_scc_mgr->dm_ena); 3266 writel(0, &sdr_scc_mgr->update); 3267 } 3268 3269 /* Compensate for simulation model behaviour */ 3270 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3271 scc_mgr_set_dqs_bus_in_delay(i, 10); 3272 scc_mgr_load_dqs(i); 3273 } 3274 writel(0, &sdr_scc_mgr->update); 3275 3276 /* 3277 * ArriaV has hard FIFOs that can only be initialized by incrementing 3278 * in sequencer. 3279 */ 3280 vfifo_offset = CALIB_VFIFO_OFFSET; 3281 for (j = 0; j < vfifo_offset; j++) { 3282 writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy); 3283 } 3284 writel(0, &phy_mgr_cmd->fifo_reset); 3285 3286 /* 3287 * For ACV with hard lfifo, we get the skip-cal setting from 3288 * generation-time constant. 3289 */ 3290 gbl->curr_read_lat = CALIB_LFIFO_OFFSET; 3291 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 3292 } 3293 3294 /* Memory calibration entry point */ 3295 static uint32_t mem_calibrate(void) 3296 { 3297 uint32_t i; 3298 uint32_t rank_bgn, sr; 3299 uint32_t write_group, write_test_bgn; 3300 uint32_t read_group, read_test_bgn; 3301 uint32_t run_groups, current_run; 3302 uint32_t failing_groups = 0; 3303 uint32_t group_failed = 0; 3304 uint32_t sr_failed = 0; 3305 3306 debug("%s:%d\n", __func__, __LINE__); 3307 /* Initialize the data settings */ 3308 3309 gbl->error_substage = CAL_SUBSTAGE_NIL; 3310 gbl->error_stage = CAL_STAGE_NIL; 3311 gbl->error_group = 0xff; 3312 gbl->fom_in = 0; 3313 gbl->fom_out = 0; 3314 3315 mem_config(); 3316 3317 uint32_t bypass_mode = 0x1; 3318 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3319 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS | 3320 SCC_MGR_GROUP_COUNTER_OFFSET); 3321 scc_set_bypass_mode(i, bypass_mode); 3322 } 3323 3324 if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) { 3325 /* 3326 * Set VFIFO and LFIFO to instant-on settings in skip 3327 * calibration mode. 3328 */ 3329 mem_skip_calibrate(); 3330 } else { 3331 for (i = 0; i < NUM_CALIB_REPEAT; i++) { 3332 /* 3333 * Zero all delay chain/phase settings for all 3334 * groups and all shadow register sets. 3335 */ 3336 scc_mgr_zero_all(); 3337 3338 run_groups = ~param->skip_groups; 3339 3340 for (write_group = 0, write_test_bgn = 0; write_group 3341 < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; write_group++, 3342 write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) { 3343 /* Initialized the group failure */ 3344 group_failed = 0; 3345 3346 current_run = run_groups & ((1 << 3347 RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1); 3348 run_groups = run_groups >> 3349 RW_MGR_NUM_DQS_PER_WRITE_GROUP; 3350 3351 if (current_run == 0) 3352 continue; 3353 3354 writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS | 3355 SCC_MGR_GROUP_COUNTER_OFFSET); 3356 scc_mgr_zero_group(write_group, write_test_bgn, 3357 0); 3358 3359 for (read_group = write_group * 3360 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3361 RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3362 read_test_bgn = 0; 3363 read_group < (write_group + 1) * 3364 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3365 RW_MGR_MEM_IF_WRITE_DQS_WIDTH && 3366 group_failed == 0; 3367 read_group++, read_test_bgn += 3368 RW_MGR_MEM_DQ_PER_READ_DQS) { 3369 /* Calibrate the VFIFO */ 3370 if (!((STATIC_CALIB_STEPS) & 3371 CALIB_SKIP_VFIFO)) { 3372 if (!rw_mgr_mem_calibrate_vfifo 3373 (read_group, 3374 read_test_bgn)) { 3375 group_failed = 1; 3376 3377 if (!(gbl-> 3378 phy_debug_mode_flags & 3379 PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3380 return 0; 3381 } 3382 } 3383 } 3384 } 3385 3386 /* Calibrate the output side */ 3387 if (group_failed == 0) { 3388 for (rank_bgn = 0, sr = 0; rank_bgn 3389 < RW_MGR_MEM_NUMBER_OF_RANKS; 3390 rank_bgn += 3391 NUM_RANKS_PER_SHADOW_REG, 3392 ++sr) { 3393 sr_failed = 0; 3394 if (!((STATIC_CALIB_STEPS) & 3395 CALIB_SKIP_WRITES)) { 3396 if ((STATIC_CALIB_STEPS) 3397 & CALIB_SKIP_DELAY_SWEEPS) { 3398 /* not needed in quick mode! */ 3399 } else { 3400 /* 3401 * Determine if this set of 3402 * ranks should be skipped 3403 * entirely. 3404 */ 3405 if (!param->skip_shadow_regs[sr]) { 3406 if (!rw_mgr_mem_calibrate_writes 3407 (rank_bgn, write_group, 3408 write_test_bgn)) { 3409 sr_failed = 1; 3410 if (!(gbl-> 3411 phy_debug_mode_flags & 3412 PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3413 return 0; 3414 } 3415 } 3416 } 3417 } 3418 } 3419 if (sr_failed != 0) 3420 group_failed = 1; 3421 } 3422 } 3423 3424 if (group_failed == 0) { 3425 for (read_group = write_group * 3426 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3427 RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3428 read_test_bgn = 0; 3429 read_group < (write_group + 1) 3430 * RW_MGR_MEM_IF_READ_DQS_WIDTH 3431 / RW_MGR_MEM_IF_WRITE_DQS_WIDTH && 3432 group_failed == 0; 3433 read_group++, read_test_bgn += 3434 RW_MGR_MEM_DQ_PER_READ_DQS) { 3435 if (!((STATIC_CALIB_STEPS) & 3436 CALIB_SKIP_WRITES)) { 3437 if (!rw_mgr_mem_calibrate_vfifo_end 3438 (read_group, read_test_bgn)) { 3439 group_failed = 1; 3440 3441 if (!(gbl->phy_debug_mode_flags 3442 & PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3443 return 0; 3444 } 3445 } 3446 } 3447 } 3448 } 3449 3450 if (group_failed != 0) 3451 failing_groups++; 3452 } 3453 3454 /* 3455 * USER If there are any failing groups then report 3456 * the failure. 3457 */ 3458 if (failing_groups != 0) 3459 return 0; 3460 3461 /* Calibrate the LFIFO */ 3462 if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_LFIFO)) { 3463 /* 3464 * If we're skipping groups as part of debug, 3465 * don't calibrate LFIFO. 3466 */ 3467 if (param->skip_groups == 0) { 3468 if (!rw_mgr_mem_calibrate_lfifo()) 3469 return 0; 3470 } 3471 } 3472 } 3473 } 3474 3475 /* 3476 * Do not remove this line as it makes sure all of our decisions 3477 * have been applied. 3478 */ 3479 writel(0, &sdr_scc_mgr->update); 3480 return 1; 3481 } 3482 3483 static uint32_t run_mem_calibrate(void) 3484 { 3485 uint32_t pass; 3486 uint32_t debug_info; 3487 3488 debug("%s:%d\n", __func__, __LINE__); 3489 3490 /* Reset pass/fail status shown on afi_cal_success/fail */ 3491 writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status); 3492 3493 /* stop tracking manger */ 3494 uint32_t ctrlcfg = readl(&sdr_ctrl->ctrl_cfg); 3495 3496 writel(ctrlcfg & 0xFFBFFFFF, &sdr_ctrl->ctrl_cfg); 3497 3498 initialize(); 3499 rw_mgr_mem_initialize(); 3500 3501 pass = mem_calibrate(); 3502 3503 mem_precharge_and_activate(); 3504 writel(0, &phy_mgr_cmd->fifo_reset); 3505 3506 /* 3507 * Handoff: 3508 * Don't return control of the PHY back to AFI when in debug mode. 3509 */ 3510 if ((gbl->phy_debug_mode_flags & PHY_DEBUG_IN_DEBUG_MODE) == 0) { 3511 rw_mgr_mem_handoff(); 3512 /* 3513 * In Hard PHY this is a 2-bit control: 3514 * 0: AFI Mux Select 3515 * 1: DDIO Mux Select 3516 */ 3517 writel(0x2, &phy_mgr_cfg->mux_sel); 3518 } 3519 3520 writel(ctrlcfg, &sdr_ctrl->ctrl_cfg); 3521 3522 if (pass) { 3523 printf("%s: CALIBRATION PASSED\n", __FILE__); 3524 3525 gbl->fom_in /= 2; 3526 gbl->fom_out /= 2; 3527 3528 if (gbl->fom_in > 0xff) 3529 gbl->fom_in = 0xff; 3530 3531 if (gbl->fom_out > 0xff) 3532 gbl->fom_out = 0xff; 3533 3534 /* Update the FOM in the register file */ 3535 debug_info = gbl->fom_in; 3536 debug_info |= gbl->fom_out << 8; 3537 writel(debug_info, &sdr_reg_file->fom); 3538 3539 writel(debug_info, &phy_mgr_cfg->cal_debug_info); 3540 writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status); 3541 } else { 3542 printf("%s: CALIBRATION FAILED\n", __FILE__); 3543 3544 debug_info = gbl->error_stage; 3545 debug_info |= gbl->error_substage << 8; 3546 debug_info |= gbl->error_group << 16; 3547 3548 writel(debug_info, &sdr_reg_file->failing_stage); 3549 writel(debug_info, &phy_mgr_cfg->cal_debug_info); 3550 writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status); 3551 3552 /* Update the failing group/stage in the register file */ 3553 debug_info = gbl->error_stage; 3554 debug_info |= gbl->error_substage << 8; 3555 debug_info |= gbl->error_group << 16; 3556 writel(debug_info, &sdr_reg_file->failing_stage); 3557 } 3558 3559 return pass; 3560 } 3561 3562 /** 3563 * hc_initialize_rom_data() - Initialize ROM data 3564 * 3565 * Initialize ROM data. 3566 */ 3567 static void hc_initialize_rom_data(void) 3568 { 3569 u32 i, addr; 3570 3571 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET; 3572 for (i = 0; i < ARRAY_SIZE(inst_rom_init); i++) 3573 writel(inst_rom_init[i], addr + (i << 2)); 3574 3575 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET; 3576 for (i = 0; i < ARRAY_SIZE(ac_rom_init); i++) 3577 writel(ac_rom_init[i], addr + (i << 2)); 3578 } 3579 3580 /** 3581 * initialize_reg_file() - Initialize SDR register file 3582 * 3583 * Initialize SDR register file. 3584 */ 3585 static void initialize_reg_file(void) 3586 { 3587 /* Initialize the register file with the correct data */ 3588 writel(REG_FILE_INIT_SEQ_SIGNATURE, &sdr_reg_file->signature); 3589 writel(0, &sdr_reg_file->debug_data_addr); 3590 writel(0, &sdr_reg_file->cur_stage); 3591 writel(0, &sdr_reg_file->fom); 3592 writel(0, &sdr_reg_file->failing_stage); 3593 writel(0, &sdr_reg_file->debug1); 3594 writel(0, &sdr_reg_file->debug2); 3595 } 3596 3597 /** 3598 * initialize_hps_phy() - Initialize HPS PHY 3599 * 3600 * Initialize HPS PHY. 3601 */ 3602 static void initialize_hps_phy(void) 3603 { 3604 uint32_t reg; 3605 /* 3606 * Tracking also gets configured here because it's in the 3607 * same register. 3608 */ 3609 uint32_t trk_sample_count = 7500; 3610 uint32_t trk_long_idle_sample_count = (10 << 16) | 100; 3611 /* 3612 * Format is number of outer loops in the 16 MSB, sample 3613 * count in 16 LSB. 3614 */ 3615 3616 reg = 0; 3617 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2); 3618 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1); 3619 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1); 3620 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1); 3621 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0); 3622 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1); 3623 /* 3624 * This field selects the intrinsic latency to RDATA_EN/FULL path. 3625 * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles. 3626 */ 3627 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0); 3628 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET( 3629 trk_sample_count); 3630 writel(reg, &sdr_ctrl->phy_ctrl0); 3631 3632 reg = 0; 3633 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET( 3634 trk_sample_count >> 3635 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH); 3636 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET( 3637 trk_long_idle_sample_count); 3638 writel(reg, &sdr_ctrl->phy_ctrl1); 3639 3640 reg = 0; 3641 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET( 3642 trk_long_idle_sample_count >> 3643 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH); 3644 writel(reg, &sdr_ctrl->phy_ctrl2); 3645 } 3646 3647 static void initialize_tracking(void) 3648 { 3649 uint32_t concatenated_longidle = 0x0; 3650 uint32_t concatenated_delays = 0x0; 3651 uint32_t concatenated_rw_addr = 0x0; 3652 uint32_t concatenated_refresh = 0x0; 3653 uint32_t trk_sample_count = 7500; 3654 uint32_t dtaps_per_ptap; 3655 uint32_t tmp_delay; 3656 3657 /* 3658 * compute usable version of value in case we skip full 3659 * computation later 3660 */ 3661 dtaps_per_ptap = 0; 3662 tmp_delay = 0; 3663 while (tmp_delay < IO_DELAY_PER_OPA_TAP) { 3664 dtaps_per_ptap++; 3665 tmp_delay += IO_DELAY_PER_DCHAIN_TAP; 3666 } 3667 dtaps_per_ptap--; 3668 3669 concatenated_longidle = concatenated_longidle ^ 10; 3670 /*longidle outer loop */ 3671 concatenated_longidle = concatenated_longidle << 16; 3672 concatenated_longidle = concatenated_longidle ^ 100; 3673 /*longidle sample count */ 3674 concatenated_delays = concatenated_delays ^ 243; 3675 /* trfc, worst case of 933Mhz 4Gb */ 3676 concatenated_delays = concatenated_delays << 8; 3677 concatenated_delays = concatenated_delays ^ 14; 3678 /* trcd, worst case */ 3679 concatenated_delays = concatenated_delays << 8; 3680 concatenated_delays = concatenated_delays ^ 10; 3681 /* vfifo wait */ 3682 concatenated_delays = concatenated_delays << 8; 3683 concatenated_delays = concatenated_delays ^ 4; 3684 /* mux delay */ 3685 3686 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_IDLE; 3687 concatenated_rw_addr = concatenated_rw_addr << 8; 3688 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_ACTIVATE_1; 3689 concatenated_rw_addr = concatenated_rw_addr << 8; 3690 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_SGLE_READ; 3691 concatenated_rw_addr = concatenated_rw_addr << 8; 3692 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_PRECHARGE_ALL; 3693 3694 concatenated_refresh = concatenated_refresh ^ RW_MGR_REFRESH_ALL; 3695 concatenated_refresh = concatenated_refresh << 24; 3696 concatenated_refresh = concatenated_refresh ^ 1000; /* trefi */ 3697 3698 /* Initialize the register file with the correct data */ 3699 writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap); 3700 writel(trk_sample_count, &sdr_reg_file->trk_sample_count); 3701 writel(concatenated_longidle, &sdr_reg_file->trk_longidle); 3702 writel(concatenated_delays, &sdr_reg_file->delays); 3703 writel(concatenated_rw_addr, &sdr_reg_file->trk_rw_mgr_addr); 3704 writel(RW_MGR_MEM_IF_READ_DQS_WIDTH, &sdr_reg_file->trk_read_dqs_width); 3705 writel(concatenated_refresh, &sdr_reg_file->trk_rfsh); 3706 } 3707 3708 int sdram_calibration_full(void) 3709 { 3710 struct param_type my_param; 3711 struct gbl_type my_gbl; 3712 uint32_t pass; 3713 uint32_t i; 3714 3715 param = &my_param; 3716 gbl = &my_gbl; 3717 3718 /* Initialize the debug mode flags */ 3719 gbl->phy_debug_mode_flags = 0; 3720 /* Set the calibration enabled by default */ 3721 gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT; 3722 /* 3723 * Only sweep all groups (regardless of fail state) by default 3724 * Set enabled read test by default. 3725 */ 3726 #if DISABLE_GUARANTEED_READ 3727 gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ; 3728 #endif 3729 /* Initialize the register file */ 3730 initialize_reg_file(); 3731 3732 /* Initialize any PHY CSR */ 3733 initialize_hps_phy(); 3734 3735 scc_mgr_initialize(); 3736 3737 initialize_tracking(); 3738 3739 /* USER Enable all ranks, groups */ 3740 for (i = 0; i < RW_MGR_MEM_NUMBER_OF_RANKS; i++) 3741 param->skip_ranks[i] = 0; 3742 for (i = 0; i < NUM_SHADOW_REGS; ++i) 3743 param->skip_shadow_regs[i] = 0; 3744 param->skip_groups = 0; 3745 3746 printf("%s: Preparing to start memory calibration\n", __FILE__); 3747 3748 debug("%s:%d\n", __func__, __LINE__); 3749 debug_cond(DLEVEL == 1, 3750 "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ", 3751 RW_MGR_MEM_NUMBER_OF_RANKS, RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM, 3752 RW_MGR_MEM_DQ_PER_READ_DQS, RW_MGR_MEM_DQ_PER_WRITE_DQS, 3753 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS, 3754 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS); 3755 debug_cond(DLEVEL == 1, 3756 "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ", 3757 RW_MGR_MEM_IF_READ_DQS_WIDTH, RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3758 RW_MGR_MEM_DATA_WIDTH, RW_MGR_MEM_DATA_MASK_WIDTH, 3759 IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP); 3760 debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u", 3761 IO_DELAY_PER_DQS_EN_DCHAIN_TAP, IO_DLL_CHAIN_LENGTH); 3762 debug_cond(DLEVEL == 1, "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ", 3763 IO_DQS_EN_PHASE_MAX, IO_DQDQS_OUT_PHASE_MAX, 3764 IO_DQS_EN_DELAY_MAX, IO_DQS_IN_DELAY_MAX); 3765 debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ", 3766 IO_IO_IN_DELAY_MAX, IO_IO_OUT1_DELAY_MAX, 3767 IO_IO_OUT2_DELAY_MAX); 3768 debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n", 3769 IO_DQS_IN_RESERVE, IO_DQS_OUT_RESERVE); 3770 3771 hc_initialize_rom_data(); 3772 3773 /* update info for sims */ 3774 reg_file_set_stage(CAL_STAGE_NIL); 3775 reg_file_set_group(0); 3776 3777 /* 3778 * Load global needed for those actions that require 3779 * some dynamic calibration support. 3780 */ 3781 dyn_calib_steps = STATIC_CALIB_STEPS; 3782 /* 3783 * Load global to allow dynamic selection of delay loop settings 3784 * based on calibration mode. 3785 */ 3786 if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS)) 3787 skip_delay_mask = 0xff; 3788 else 3789 skip_delay_mask = 0x0; 3790 3791 pass = run_mem_calibrate(); 3792 3793 printf("%s: Calibration complete\n", __FILE__); 3794 return pass; 3795 } 3796