1 /* 2 * Copyright Altera Corporation (C) 2012-2015 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7 #include <common.h> 8 #include <asm/io.h> 9 #include <asm/arch/sdram.h> 10 #include <errno.h> 11 #include "sequencer.h" 12 13 static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs = 14 (struct socfpga_sdr_rw_load_manager *) 15 (SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800); 16 static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs = 17 (struct socfpga_sdr_rw_load_jump_manager *) 18 (SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00); 19 static struct socfpga_sdr_reg_file *sdr_reg_file = 20 (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS; 21 static struct socfpga_sdr_scc_mgr *sdr_scc_mgr = 22 (struct socfpga_sdr_scc_mgr *) 23 (SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00); 24 static struct socfpga_phy_mgr_cmd *phy_mgr_cmd = 25 (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS; 26 static struct socfpga_phy_mgr_cfg *phy_mgr_cfg = 27 (struct socfpga_phy_mgr_cfg *) 28 (SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40); 29 static struct socfpga_data_mgr *data_mgr = 30 (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS; 31 static struct socfpga_sdr_ctrl *sdr_ctrl = 32 (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS; 33 34 const struct socfpga_sdram_rw_mgr_config *rwcfg; 35 const struct socfpga_sdram_io_config *iocfg; 36 const struct socfpga_sdram_misc_config *misccfg; 37 38 #define DELTA_D 1 39 40 /* 41 * In order to reduce ROM size, most of the selectable calibration steps are 42 * decided at compile time based on the user's calibration mode selection, 43 * as captured by the STATIC_CALIB_STEPS selection below. 44 * 45 * However, to support simulation-time selection of fast simulation mode, where 46 * we skip everything except the bare minimum, we need a few of the steps to 47 * be dynamic. In those cases, we either use the DYNAMIC_CALIB_STEPS for the 48 * check, which is based on the rtl-supplied value, or we dynamically compute 49 * the value to use based on the dynamically-chosen calibration mode 50 */ 51 52 #define DLEVEL 0 53 #define STATIC_IN_RTL_SIM 0 54 #define STATIC_SKIP_DELAY_LOOPS 0 55 56 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \ 57 STATIC_SKIP_DELAY_LOOPS) 58 59 /* calibration steps requested by the rtl */ 60 static u16 dyn_calib_steps; 61 62 /* 63 * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option 64 * instead of static, we use boolean logic to select between 65 * non-skip and skip values 66 * 67 * The mask is set to include all bits when not-skipping, but is 68 * zero when skipping 69 */ 70 71 static u16 skip_delay_mask; /* mask off bits when skipping/not-skipping */ 72 73 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \ 74 ((non_skip_value) & skip_delay_mask) 75 76 static struct gbl_type *gbl; 77 static struct param_type *param; 78 79 static void set_failing_group_stage(u32 group, u32 stage, 80 u32 substage) 81 { 82 /* 83 * Only set the global stage if there was not been any other 84 * failing group 85 */ 86 if (gbl->error_stage == CAL_STAGE_NIL) { 87 gbl->error_substage = substage; 88 gbl->error_stage = stage; 89 gbl->error_group = group; 90 } 91 } 92 93 static void reg_file_set_group(u16 set_group) 94 { 95 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16); 96 } 97 98 static void reg_file_set_stage(u8 set_stage) 99 { 100 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff); 101 } 102 103 static void reg_file_set_sub_stage(u8 set_sub_stage) 104 { 105 set_sub_stage &= 0xff; 106 clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8); 107 } 108 109 /** 110 * phy_mgr_initialize() - Initialize PHY Manager 111 * 112 * Initialize PHY Manager. 113 */ 114 static void phy_mgr_initialize(void) 115 { 116 u32 ratio; 117 118 debug("%s:%d\n", __func__, __LINE__); 119 /* Calibration has control over path to memory */ 120 /* 121 * In Hard PHY this is a 2-bit control: 122 * 0: AFI Mux Select 123 * 1: DDIO Mux Select 124 */ 125 writel(0x3, &phy_mgr_cfg->mux_sel); 126 127 /* USER memory clock is not stable we begin initialization */ 128 writel(0, &phy_mgr_cfg->reset_mem_stbl); 129 130 /* USER calibration status all set to zero */ 131 writel(0, &phy_mgr_cfg->cal_status); 132 133 writel(0, &phy_mgr_cfg->cal_debug_info); 134 135 /* Init params only if we do NOT skip calibration. */ 136 if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) 137 return; 138 139 ratio = rwcfg->mem_dq_per_read_dqs / 140 rwcfg->mem_virtual_groups_per_read_dqs; 141 param->read_correct_mask_vg = (1 << ratio) - 1; 142 param->write_correct_mask_vg = (1 << ratio) - 1; 143 param->read_correct_mask = (1 << rwcfg->mem_dq_per_read_dqs) - 1; 144 param->write_correct_mask = (1 << rwcfg->mem_dq_per_write_dqs) - 1; 145 } 146 147 /** 148 * set_rank_and_odt_mask() - Set Rank and ODT mask 149 * @rank: Rank mask 150 * @odt_mode: ODT mode, OFF or READ_WRITE 151 * 152 * Set Rank and ODT mask (On-Die Termination). 153 */ 154 static void set_rank_and_odt_mask(const u32 rank, const u32 odt_mode) 155 { 156 u32 odt_mask_0 = 0; 157 u32 odt_mask_1 = 0; 158 u32 cs_and_odt_mask; 159 160 if (odt_mode == RW_MGR_ODT_MODE_OFF) { 161 odt_mask_0 = 0x0; 162 odt_mask_1 = 0x0; 163 } else { /* RW_MGR_ODT_MODE_READ_WRITE */ 164 switch (rwcfg->mem_number_of_ranks) { 165 case 1: /* 1 Rank */ 166 /* Read: ODT = 0 ; Write: ODT = 1 */ 167 odt_mask_0 = 0x0; 168 odt_mask_1 = 0x1; 169 break; 170 case 2: /* 2 Ranks */ 171 if (rwcfg->mem_number_of_cs_per_dimm == 1) { 172 /* 173 * - Dual-Slot , Single-Rank (1 CS per DIMM) 174 * OR 175 * - RDIMM, 4 total CS (2 CS per DIMM, 2 DIMM) 176 * 177 * Since MEM_NUMBER_OF_RANKS is 2, they 178 * are both single rank with 2 CS each 179 * (special for RDIMM). 180 * 181 * Read: Turn on ODT on the opposite rank 182 * Write: Turn on ODT on all ranks 183 */ 184 odt_mask_0 = 0x3 & ~(1 << rank); 185 odt_mask_1 = 0x3; 186 } else { 187 /* 188 * - Single-Slot , Dual-Rank (2 CS per DIMM) 189 * 190 * Read: Turn on ODT off on all ranks 191 * Write: Turn on ODT on active rank 192 */ 193 odt_mask_0 = 0x0; 194 odt_mask_1 = 0x3 & (1 << rank); 195 } 196 break; 197 case 4: /* 4 Ranks */ 198 /* Read: 199 * ----------+-----------------------+ 200 * | ODT | 201 * Read From +-----------------------+ 202 * Rank | 3 | 2 | 1 | 0 | 203 * ----------+-----+-----+-----+-----+ 204 * 0 | 0 | 1 | 0 | 0 | 205 * 1 | 1 | 0 | 0 | 0 | 206 * 2 | 0 | 0 | 0 | 1 | 207 * 3 | 0 | 0 | 1 | 0 | 208 * ----------+-----+-----+-----+-----+ 209 * 210 * Write: 211 * ----------+-----------------------+ 212 * | ODT | 213 * Write To +-----------------------+ 214 * Rank | 3 | 2 | 1 | 0 | 215 * ----------+-----+-----+-----+-----+ 216 * 0 | 0 | 1 | 0 | 1 | 217 * 1 | 1 | 0 | 1 | 0 | 218 * 2 | 0 | 1 | 0 | 1 | 219 * 3 | 1 | 0 | 1 | 0 | 220 * ----------+-----+-----+-----+-----+ 221 */ 222 switch (rank) { 223 case 0: 224 odt_mask_0 = 0x4; 225 odt_mask_1 = 0x5; 226 break; 227 case 1: 228 odt_mask_0 = 0x8; 229 odt_mask_1 = 0xA; 230 break; 231 case 2: 232 odt_mask_0 = 0x1; 233 odt_mask_1 = 0x5; 234 break; 235 case 3: 236 odt_mask_0 = 0x2; 237 odt_mask_1 = 0xA; 238 break; 239 } 240 break; 241 } 242 } 243 244 cs_and_odt_mask = (0xFF & ~(1 << rank)) | 245 ((0xFF & odt_mask_0) << 8) | 246 ((0xFF & odt_mask_1) << 16); 247 writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS | 248 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET); 249 } 250 251 /** 252 * scc_mgr_set() - Set SCC Manager register 253 * @off: Base offset in SCC Manager space 254 * @grp: Read/Write group 255 * @val: Value to be set 256 * 257 * This function sets the SCC Manager (Scan Chain Control Manager) register. 258 */ 259 static void scc_mgr_set(u32 off, u32 grp, u32 val) 260 { 261 writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2)); 262 } 263 264 /** 265 * scc_mgr_initialize() - Initialize SCC Manager registers 266 * 267 * Initialize SCC Manager registers. 268 */ 269 static void scc_mgr_initialize(void) 270 { 271 /* 272 * Clear register file for HPS. 16 (2^4) is the size of the 273 * full register file in the scc mgr: 274 * RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS + 275 * MEM_IF_READ_DQS_WIDTH - 1); 276 */ 277 int i; 278 279 for (i = 0; i < 16; i++) { 280 debug_cond(DLEVEL >= 1, "%s:%d: Clearing SCC RFILE index %u\n", 281 __func__, __LINE__, i); 282 scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, i, 0); 283 } 284 } 285 286 static void scc_mgr_set_dqdqs_output_phase(u32 write_group, u32 phase) 287 { 288 scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase); 289 } 290 291 static void scc_mgr_set_dqs_bus_in_delay(u32 read_group, u32 delay) 292 { 293 scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay); 294 } 295 296 static void scc_mgr_set_dqs_en_phase(u32 read_group, u32 phase) 297 { 298 scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase); 299 } 300 301 static void scc_mgr_set_dqs_en_delay(u32 read_group, u32 delay) 302 { 303 scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay); 304 } 305 306 static void scc_mgr_set_dq_in_delay(u32 dq_in_group, u32 delay) 307 { 308 scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay); 309 } 310 311 static void scc_mgr_set_dqs_io_in_delay(u32 delay) 312 { 313 scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, rwcfg->mem_dq_per_write_dqs, 314 delay); 315 } 316 317 static void scc_mgr_set_dm_in_delay(u32 dm, u32 delay) 318 { 319 scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, 320 rwcfg->mem_dq_per_write_dqs + 1 + dm, 321 delay); 322 } 323 324 static void scc_mgr_set_dq_out1_delay(u32 dq_in_group, u32 delay) 325 { 326 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay); 327 } 328 329 static void scc_mgr_set_dqs_out1_delay(u32 delay) 330 { 331 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, rwcfg->mem_dq_per_write_dqs, 332 delay); 333 } 334 335 static void scc_mgr_set_dm_out1_delay(u32 dm, u32 delay) 336 { 337 scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, 338 rwcfg->mem_dq_per_write_dqs + 1 + dm, 339 delay); 340 } 341 342 /* load up dqs config settings */ 343 static void scc_mgr_load_dqs(u32 dqs) 344 { 345 writel(dqs, &sdr_scc_mgr->dqs_ena); 346 } 347 348 /* load up dqs io config settings */ 349 static void scc_mgr_load_dqs_io(void) 350 { 351 writel(0, &sdr_scc_mgr->dqs_io_ena); 352 } 353 354 /* load up dq config settings */ 355 static void scc_mgr_load_dq(u32 dq_in_group) 356 { 357 writel(dq_in_group, &sdr_scc_mgr->dq_ena); 358 } 359 360 /* load up dm config settings */ 361 static void scc_mgr_load_dm(u32 dm) 362 { 363 writel(dm, &sdr_scc_mgr->dm_ena); 364 } 365 366 /** 367 * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks 368 * @off: Base offset in SCC Manager space 369 * @grp: Read/Write group 370 * @val: Value to be set 371 * @update: If non-zero, trigger SCC Manager update for all ranks 372 * 373 * This function sets the SCC Manager (Scan Chain Control Manager) register 374 * and optionally triggers the SCC update for all ranks. 375 */ 376 static void scc_mgr_set_all_ranks(const u32 off, const u32 grp, const u32 val, 377 const int update) 378 { 379 u32 r; 380 381 for (r = 0; r < rwcfg->mem_number_of_ranks; 382 r += NUM_RANKS_PER_SHADOW_REG) { 383 scc_mgr_set(off, grp, val); 384 385 if (update || (r == 0)) { 386 writel(grp, &sdr_scc_mgr->dqs_ena); 387 writel(0, &sdr_scc_mgr->update); 388 } 389 } 390 } 391 392 static void scc_mgr_set_dqs_en_phase_all_ranks(u32 read_group, u32 phase) 393 { 394 /* 395 * USER although the h/w doesn't support different phases per 396 * shadow register, for simplicity our scc manager modeling 397 * keeps different phase settings per shadow reg, and it's 398 * important for us to keep them in sync to match h/w. 399 * for efficiency, the scan chain update should occur only 400 * once to sr0. 401 */ 402 scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_PHASE_OFFSET, 403 read_group, phase, 0); 404 } 405 406 static void scc_mgr_set_dqdqs_output_phase_all_ranks(u32 write_group, 407 u32 phase) 408 { 409 /* 410 * USER although the h/w doesn't support different phases per 411 * shadow register, for simplicity our scc manager modeling 412 * keeps different phase settings per shadow reg, and it's 413 * important for us to keep them in sync to match h/w. 414 * for efficiency, the scan chain update should occur only 415 * once to sr0. 416 */ 417 scc_mgr_set_all_ranks(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, 418 write_group, phase, 0); 419 } 420 421 static void scc_mgr_set_dqs_en_delay_all_ranks(u32 read_group, 422 u32 delay) 423 { 424 /* 425 * In shadow register mode, the T11 settings are stored in 426 * registers in the core, which are updated by the DQS_ENA 427 * signals. Not issuing the SCC_MGR_UPD command allows us to 428 * save lots of rank switching overhead, by calling 429 * select_shadow_regs_for_update with update_scan_chains 430 * set to 0. 431 */ 432 scc_mgr_set_all_ranks(SCC_MGR_DQS_EN_DELAY_OFFSET, 433 read_group, delay, 1); 434 } 435 436 /** 437 * scc_mgr_set_oct_out1_delay() - Set OCT output delay 438 * @write_group: Write group 439 * @delay: Delay value 440 * 441 * This function sets the OCT output delay in SCC manager. 442 */ 443 static void scc_mgr_set_oct_out1_delay(const u32 write_group, const u32 delay) 444 { 445 const int ratio = rwcfg->mem_if_read_dqs_width / 446 rwcfg->mem_if_write_dqs_width; 447 const int base = write_group * ratio; 448 int i; 449 /* 450 * Load the setting in the SCC manager 451 * Although OCT affects only write data, the OCT delay is controlled 452 * by the DQS logic block which is instantiated once per read group. 453 * For protocols where a write group consists of multiple read groups, 454 * the setting must be set multiple times. 455 */ 456 for (i = 0; i < ratio; i++) 457 scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay); 458 } 459 460 /** 461 * scc_mgr_set_hhp_extras() - Set HHP extras. 462 * 463 * Load the fixed setting in the SCC manager HHP extras. 464 */ 465 static void scc_mgr_set_hhp_extras(void) 466 { 467 /* 468 * Load the fixed setting in the SCC manager 469 * bits: 0:0 = 1'b1 - DQS bypass 470 * bits: 1:1 = 1'b1 - DQ bypass 471 * bits: 4:2 = 3'b001 - rfifo_mode 472 * bits: 6:5 = 2'b01 - rfifo clock_select 473 * bits: 7:7 = 1'b0 - separate gating from ungating setting 474 * bits: 8:8 = 1'b0 - separate OE from Output delay setting 475 */ 476 const u32 value = (0 << 8) | (0 << 7) | (1 << 5) | 477 (1 << 2) | (1 << 1) | (1 << 0); 478 const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | 479 SCC_MGR_HHP_GLOBALS_OFFSET | 480 SCC_MGR_HHP_EXTRAS_OFFSET; 481 482 debug_cond(DLEVEL >= 1, "%s:%d Setting HHP Extras\n", 483 __func__, __LINE__); 484 writel(value, addr); 485 debug_cond(DLEVEL >= 1, "%s:%d Done Setting HHP Extras\n", 486 __func__, __LINE__); 487 } 488 489 /** 490 * scc_mgr_zero_all() - Zero all DQS config 491 * 492 * Zero all DQS config. 493 */ 494 static void scc_mgr_zero_all(void) 495 { 496 int i, r; 497 498 /* 499 * USER Zero all DQS config settings, across all groups and all 500 * shadow registers 501 */ 502 for (r = 0; r < rwcfg->mem_number_of_ranks; 503 r += NUM_RANKS_PER_SHADOW_REG) { 504 for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) { 505 /* 506 * The phases actually don't exist on a per-rank basis, 507 * but there's no harm updating them several times, so 508 * let's keep the code simple. 509 */ 510 scc_mgr_set_dqs_bus_in_delay(i, iocfg->dqs_in_reserve); 511 scc_mgr_set_dqs_en_phase(i, 0); 512 scc_mgr_set_dqs_en_delay(i, 0); 513 } 514 515 for (i = 0; i < rwcfg->mem_if_write_dqs_width; i++) { 516 scc_mgr_set_dqdqs_output_phase(i, 0); 517 /* Arria V/Cyclone V don't have out2. */ 518 scc_mgr_set_oct_out1_delay(i, iocfg->dqs_out_reserve); 519 } 520 } 521 522 /* Multicast to all DQS group enables. */ 523 writel(0xff, &sdr_scc_mgr->dqs_ena); 524 writel(0, &sdr_scc_mgr->update); 525 } 526 527 /** 528 * scc_set_bypass_mode() - Set bypass mode and trigger SCC update 529 * @write_group: Write group 530 * 531 * Set bypass mode and trigger SCC update. 532 */ 533 static void scc_set_bypass_mode(const u32 write_group) 534 { 535 /* Multicast to all DQ enables. */ 536 writel(0xff, &sdr_scc_mgr->dq_ena); 537 writel(0xff, &sdr_scc_mgr->dm_ena); 538 539 /* Update current DQS IO enable. */ 540 writel(0, &sdr_scc_mgr->dqs_io_ena); 541 542 /* Update the DQS logic. */ 543 writel(write_group, &sdr_scc_mgr->dqs_ena); 544 545 /* Hit update. */ 546 writel(0, &sdr_scc_mgr->update); 547 } 548 549 /** 550 * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group 551 * @write_group: Write group 552 * 553 * Load DQS settings for Write Group, do not trigger SCC update. 554 */ 555 static void scc_mgr_load_dqs_for_write_group(const u32 write_group) 556 { 557 const int ratio = rwcfg->mem_if_read_dqs_width / 558 rwcfg->mem_if_write_dqs_width; 559 const int base = write_group * ratio; 560 int i; 561 /* 562 * Load the setting in the SCC manager 563 * Although OCT affects only write data, the OCT delay is controlled 564 * by the DQS logic block which is instantiated once per read group. 565 * For protocols where a write group consists of multiple read groups, 566 * the setting must be set multiple times. 567 */ 568 for (i = 0; i < ratio; i++) 569 writel(base + i, &sdr_scc_mgr->dqs_ena); 570 } 571 572 /** 573 * scc_mgr_zero_group() - Zero all configs for a group 574 * 575 * Zero DQ, DM, DQS and OCT configs for a group. 576 */ 577 static void scc_mgr_zero_group(const u32 write_group, const int out_only) 578 { 579 int i, r; 580 581 for (r = 0; r < rwcfg->mem_number_of_ranks; 582 r += NUM_RANKS_PER_SHADOW_REG) { 583 /* Zero all DQ config settings. */ 584 for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) { 585 scc_mgr_set_dq_out1_delay(i, 0); 586 if (!out_only) 587 scc_mgr_set_dq_in_delay(i, 0); 588 } 589 590 /* Multicast to all DQ enables. */ 591 writel(0xff, &sdr_scc_mgr->dq_ena); 592 593 /* Zero all DM config settings. */ 594 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { 595 if (!out_only) 596 scc_mgr_set_dm_in_delay(i, 0); 597 scc_mgr_set_dm_out1_delay(i, 0); 598 } 599 600 /* Multicast to all DM enables. */ 601 writel(0xff, &sdr_scc_mgr->dm_ena); 602 603 /* Zero all DQS IO settings. */ 604 if (!out_only) 605 scc_mgr_set_dqs_io_in_delay(0); 606 607 /* Arria V/Cyclone V don't have out2. */ 608 scc_mgr_set_dqs_out1_delay(iocfg->dqs_out_reserve); 609 scc_mgr_set_oct_out1_delay(write_group, iocfg->dqs_out_reserve); 610 scc_mgr_load_dqs_for_write_group(write_group); 611 612 /* Multicast to all DQS IO enables (only 1 in total). */ 613 writel(0, &sdr_scc_mgr->dqs_io_ena); 614 615 /* Hit update to zero everything. */ 616 writel(0, &sdr_scc_mgr->update); 617 } 618 } 619 620 /* 621 * apply and load a particular input delay for the DQ pins in a group 622 * group_bgn is the index of the first dq pin (in the write group) 623 */ 624 static void scc_mgr_apply_group_dq_in_delay(u32 group_bgn, u32 delay) 625 { 626 u32 i, p; 627 628 for (i = 0, p = group_bgn; i < rwcfg->mem_dq_per_read_dqs; i++, p++) { 629 scc_mgr_set_dq_in_delay(p, delay); 630 scc_mgr_load_dq(p); 631 } 632 } 633 634 /** 635 * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the DQ pins in a group 636 * @delay: Delay value 637 * 638 * Apply and load a particular output delay for the DQ pins in a group. 639 */ 640 static void scc_mgr_apply_group_dq_out1_delay(const u32 delay) 641 { 642 int i; 643 644 for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) { 645 scc_mgr_set_dq_out1_delay(i, delay); 646 scc_mgr_load_dq(i); 647 } 648 } 649 650 /* apply and load a particular output delay for the DM pins in a group */ 651 static void scc_mgr_apply_group_dm_out1_delay(u32 delay1) 652 { 653 u32 i; 654 655 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { 656 scc_mgr_set_dm_out1_delay(i, delay1); 657 scc_mgr_load_dm(i); 658 } 659 } 660 661 662 /* apply and load delay on both DQS and OCT out1 */ 663 static void scc_mgr_apply_group_dqs_io_and_oct_out1(u32 write_group, 664 u32 delay) 665 { 666 scc_mgr_set_dqs_out1_delay(delay); 667 scc_mgr_load_dqs_io(); 668 669 scc_mgr_set_oct_out1_delay(write_group, delay); 670 scc_mgr_load_dqs_for_write_group(write_group); 671 } 672 673 /** 674 * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side: DQ, DM, DQS, OCT 675 * @write_group: Write group 676 * @delay: Delay value 677 * 678 * Apply a delay to the entire output side: DQ, DM, DQS, OCT. 679 */ 680 static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group, 681 const u32 delay) 682 { 683 u32 i, new_delay; 684 685 /* DQ shift */ 686 for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) 687 scc_mgr_load_dq(i); 688 689 /* DM shift */ 690 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) 691 scc_mgr_load_dm(i); 692 693 /* DQS shift */ 694 new_delay = READ_SCC_DQS_IO_OUT2_DELAY + delay; 695 if (new_delay > iocfg->io_out2_delay_max) { 696 debug_cond(DLEVEL >= 1, 697 "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n", 698 __func__, __LINE__, write_group, delay, new_delay, 699 iocfg->io_out2_delay_max, 700 new_delay - iocfg->io_out2_delay_max); 701 new_delay -= iocfg->io_out2_delay_max; 702 scc_mgr_set_dqs_out1_delay(new_delay); 703 } 704 705 scc_mgr_load_dqs_io(); 706 707 /* OCT shift */ 708 new_delay = READ_SCC_OCT_OUT2_DELAY + delay; 709 if (new_delay > iocfg->io_out2_delay_max) { 710 debug_cond(DLEVEL >= 1, 711 "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n", 712 __func__, __LINE__, write_group, delay, 713 new_delay, iocfg->io_out2_delay_max, 714 new_delay - iocfg->io_out2_delay_max); 715 new_delay -= iocfg->io_out2_delay_max; 716 scc_mgr_set_oct_out1_delay(write_group, new_delay); 717 } 718 719 scc_mgr_load_dqs_for_write_group(write_group); 720 } 721 722 /** 723 * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side to all ranks 724 * @write_group: Write group 725 * @delay: Delay value 726 * 727 * Apply a delay to the entire output side (DQ, DM, DQS, OCT) to all ranks. 728 */ 729 static void 730 scc_mgr_apply_group_all_out_delay_add_all_ranks(const u32 write_group, 731 const u32 delay) 732 { 733 int r; 734 735 for (r = 0; r < rwcfg->mem_number_of_ranks; 736 r += NUM_RANKS_PER_SHADOW_REG) { 737 scc_mgr_apply_group_all_out_delay_add(write_group, delay); 738 writel(0, &sdr_scc_mgr->update); 739 } 740 } 741 742 /** 743 * set_jump_as_return() - Return instruction optimization 744 * 745 * Optimization used to recover some slots in ddr3 inst_rom could be 746 * applied to other protocols if we wanted to 747 */ 748 static void set_jump_as_return(void) 749 { 750 /* 751 * To save space, we replace return with jump to special shared 752 * RETURN instruction so we set the counter to large value so that 753 * we always jump. 754 */ 755 writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0); 756 writel(rwcfg->rreturn, &sdr_rw_load_jump_mgr_regs->load_jump_add0); 757 } 758 759 /** 760 * delay_for_n_mem_clocks() - Delay for N memory clocks 761 * @clocks: Length of the delay 762 * 763 * Delay for N memory clocks. 764 */ 765 static void delay_for_n_mem_clocks(const u32 clocks) 766 { 767 u32 afi_clocks; 768 u16 c_loop; 769 u8 inner; 770 u8 outer; 771 772 debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks); 773 774 /* Scale (rounding up) to get afi clocks. */ 775 afi_clocks = DIV_ROUND_UP(clocks, misccfg->afi_rate_ratio); 776 if (afi_clocks) /* Temporary underflow protection */ 777 afi_clocks--; 778 779 /* 780 * Note, we don't bother accounting for being off a little 781 * bit because of a few extra instructions in outer loops. 782 * Note, the loops have a test at the end, and do the test 783 * before the decrement, and so always perform the loop 784 * 1 time more than the counter value 785 */ 786 c_loop = afi_clocks >> 16; 787 outer = c_loop ? 0xff : (afi_clocks >> 8); 788 inner = outer ? 0xff : afi_clocks; 789 790 /* 791 * rom instructions are structured as follows: 792 * 793 * IDLE_LOOP2: jnz cntr0, TARGET_A 794 * IDLE_LOOP1: jnz cntr1, TARGET_B 795 * return 796 * 797 * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and 798 * TARGET_B is set to IDLE_LOOP2 as well 799 * 800 * if we have no outer loop, though, then we can use IDLE_LOOP1 only, 801 * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely 802 * 803 * a little confusing, but it helps save precious space in the inst_rom 804 * and sequencer rom and keeps the delays more accurate and reduces 805 * overhead 806 */ 807 if (afi_clocks < 0x100) { 808 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), 809 &sdr_rw_load_mgr_regs->load_cntr1); 810 811 writel(rwcfg->idle_loop1, 812 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 813 814 writel(rwcfg->idle_loop1, SDR_PHYGRP_RWMGRGRP_ADDRESS | 815 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 816 } else { 817 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), 818 &sdr_rw_load_mgr_regs->load_cntr0); 819 820 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer), 821 &sdr_rw_load_mgr_regs->load_cntr1); 822 823 writel(rwcfg->idle_loop2, 824 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 825 826 writel(rwcfg->idle_loop2, 827 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 828 829 do { 830 writel(rwcfg->idle_loop2, 831 SDR_PHYGRP_RWMGRGRP_ADDRESS | 832 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 833 } while (c_loop-- != 0); 834 } 835 debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks); 836 } 837 838 /** 839 * rw_mgr_mem_init_load_regs() - Load instruction registers 840 * @cntr0: Counter 0 value 841 * @cntr1: Counter 1 value 842 * @cntr2: Counter 2 value 843 * @jump: Jump instruction value 844 * 845 * Load instruction registers. 846 */ 847 static void rw_mgr_mem_init_load_regs(u32 cntr0, u32 cntr1, u32 cntr2, u32 jump) 848 { 849 u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS | 850 RW_MGR_RUN_SINGLE_GROUP_OFFSET; 851 852 /* Load counters */ 853 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr0), 854 &sdr_rw_load_mgr_regs->load_cntr0); 855 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr1), 856 &sdr_rw_load_mgr_regs->load_cntr1); 857 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr2), 858 &sdr_rw_load_mgr_regs->load_cntr2); 859 860 /* Load jump address */ 861 writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add0); 862 writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add1); 863 writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add2); 864 865 /* Execute count instruction */ 866 writel(jump, grpaddr); 867 } 868 869 /** 870 * rw_mgr_mem_load_user() - Load user calibration values 871 * @fin1: Final instruction 1 872 * @fin2: Final instruction 2 873 * @precharge: If 1, precharge the banks at the end 874 * 875 * Load user calibration values and optionally precharge the banks. 876 */ 877 static void rw_mgr_mem_load_user(const u32 fin1, const u32 fin2, 878 const int precharge) 879 { 880 u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS | 881 RW_MGR_RUN_SINGLE_GROUP_OFFSET; 882 u32 r; 883 884 for (r = 0; r < rwcfg->mem_number_of_ranks; r++) { 885 /* set rank */ 886 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 887 888 /* precharge all banks ... */ 889 if (precharge) 890 writel(rwcfg->precharge_all, grpaddr); 891 892 /* 893 * USER Use Mirror-ed commands for odd ranks if address 894 * mirrorring is on 895 */ 896 if ((rwcfg->mem_address_mirroring >> r) & 0x1) { 897 set_jump_as_return(); 898 writel(rwcfg->mrs2_mirr, grpaddr); 899 delay_for_n_mem_clocks(4); 900 set_jump_as_return(); 901 writel(rwcfg->mrs3_mirr, grpaddr); 902 delay_for_n_mem_clocks(4); 903 set_jump_as_return(); 904 writel(rwcfg->mrs1_mirr, grpaddr); 905 delay_for_n_mem_clocks(4); 906 set_jump_as_return(); 907 writel(fin1, grpaddr); 908 } else { 909 set_jump_as_return(); 910 writel(rwcfg->mrs2, grpaddr); 911 delay_for_n_mem_clocks(4); 912 set_jump_as_return(); 913 writel(rwcfg->mrs3, grpaddr); 914 delay_for_n_mem_clocks(4); 915 set_jump_as_return(); 916 writel(rwcfg->mrs1, grpaddr); 917 set_jump_as_return(); 918 writel(fin2, grpaddr); 919 } 920 921 if (precharge) 922 continue; 923 924 set_jump_as_return(); 925 writel(rwcfg->zqcl, grpaddr); 926 927 /* tZQinit = tDLLK = 512 ck cycles */ 928 delay_for_n_mem_clocks(512); 929 } 930 } 931 932 /** 933 * rw_mgr_mem_initialize() - Initialize RW Manager 934 * 935 * Initialize RW Manager. 936 */ 937 static void rw_mgr_mem_initialize(void) 938 { 939 debug("%s:%d\n", __func__, __LINE__); 940 941 /* The reset / cke part of initialization is broadcasted to all ranks */ 942 writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS | 943 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET); 944 945 /* 946 * Here's how you load register for a loop 947 * Counters are located @ 0x800 948 * Jump address are located @ 0xC00 949 * For both, registers 0 to 3 are selected using bits 3 and 2, like 950 * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C 951 * I know this ain't pretty, but Avalon bus throws away the 2 least 952 * significant bits 953 */ 954 955 /* Start with memory RESET activated */ 956 957 /* tINIT = 200us */ 958 959 /* 960 * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles 961 * If a and b are the number of iteration in 2 nested loops 962 * it takes the following number of cycles to complete the operation: 963 * number_of_cycles = ((2 + n) * a + 2) * b 964 * where n is the number of instruction in the inner loop 965 * One possible solution is n = 0 , a = 256 , b = 106 => a = FF, 966 * b = 6A 967 */ 968 rw_mgr_mem_init_load_regs(misccfg->tinit_cntr0_val, 969 misccfg->tinit_cntr1_val, 970 misccfg->tinit_cntr2_val, 971 rwcfg->init_reset_0_cke_0); 972 973 /* Indicate that memory is stable. */ 974 writel(1, &phy_mgr_cfg->reset_mem_stbl); 975 976 /* 977 * transition the RESET to high 978 * Wait for 500us 979 */ 980 981 /* 982 * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles 983 * If a and b are the number of iteration in 2 nested loops 984 * it takes the following number of cycles to complete the operation 985 * number_of_cycles = ((2 + n) * a + 2) * b 986 * where n is the number of instruction in the inner loop 987 * One possible solution is n = 2 , a = 131 , b = 256 => a = 83, 988 * b = FF 989 */ 990 rw_mgr_mem_init_load_regs(misccfg->treset_cntr0_val, 991 misccfg->treset_cntr1_val, 992 misccfg->treset_cntr2_val, 993 rwcfg->init_reset_1_cke_0); 994 995 /* Bring up clock enable. */ 996 997 /* tXRP < 250 ck cycles */ 998 delay_for_n_mem_clocks(250); 999 1000 rw_mgr_mem_load_user(rwcfg->mrs0_dll_reset_mirr, rwcfg->mrs0_dll_reset, 1001 0); 1002 } 1003 1004 /** 1005 * rw_mgr_mem_handoff() - Hand off the memory to user 1006 * 1007 * At the end of calibration we have to program the user settings in 1008 * and hand off the memory to the user. 1009 */ 1010 static void rw_mgr_mem_handoff(void) 1011 { 1012 rw_mgr_mem_load_user(rwcfg->mrs0_user_mirr, rwcfg->mrs0_user, 1); 1013 /* 1014 * Need to wait tMOD (12CK or 15ns) time before issuing other 1015 * commands, but we will have plenty of NIOS cycles before actual 1016 * handoff so its okay. 1017 */ 1018 } 1019 1020 /** 1021 * rw_mgr_mem_calibrate_write_test_issue() - Issue write test command 1022 * @group: Write Group 1023 * @use_dm: Use DM 1024 * 1025 * Issue write test command. Two variants are provided, one that just tests 1026 * a write pattern and another that tests datamask functionality. 1027 */ 1028 static void rw_mgr_mem_calibrate_write_test_issue(u32 group, 1029 u32 test_dm) 1030 { 1031 const u32 quick_write_mode = 1032 (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) && 1033 misccfg->enable_super_quick_calibration; 1034 u32 mcc_instruction; 1035 u32 rw_wl_nop_cycles; 1036 1037 /* 1038 * Set counter and jump addresses for the right 1039 * number of NOP cycles. 1040 * The number of supported NOP cycles can range from -1 to infinity 1041 * Three different cases are handled: 1042 * 1043 * 1. For a number of NOP cycles greater than 0, the RW Mgr looping 1044 * mechanism will be used to insert the right number of NOPs 1045 * 1046 * 2. For a number of NOP cycles equals to 0, the micro-instruction 1047 * issuing the write command will jump straight to the 1048 * micro-instruction that turns on DQS (for DDRx), or outputs write 1049 * data (for RLD), skipping 1050 * the NOP micro-instruction all together 1051 * 1052 * 3. A number of NOP cycles equal to -1 indicates that DQS must be 1053 * turned on in the same micro-instruction that issues the write 1054 * command. Then we need 1055 * to directly jump to the micro-instruction that sends out the data 1056 * 1057 * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters 1058 * (2 and 3). One jump-counter (0) is used to perform multiple 1059 * write-read operations. 1060 * one counter left to issue this command in "multiple-group" mode 1061 */ 1062 1063 rw_wl_nop_cycles = gbl->rw_wl_nop_cycles; 1064 1065 if (rw_wl_nop_cycles == -1) { 1066 /* 1067 * CNTR 2 - We want to execute the special write operation that 1068 * turns on DQS right away and then skip directly to the 1069 * instruction that sends out the data. We set the counter to a 1070 * large number so that the jump is always taken. 1071 */ 1072 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); 1073 1074 /* CNTR 3 - Not used */ 1075 if (test_dm) { 1076 mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0_wl_1; 1077 writel(rwcfg->lfsr_wr_rd_dm_bank_0_data, 1078 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1079 writel(rwcfg->lfsr_wr_rd_dm_bank_0_nop, 1080 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 1081 } else { 1082 mcc_instruction = rwcfg->lfsr_wr_rd_bank_0_wl_1; 1083 writel(rwcfg->lfsr_wr_rd_bank_0_data, 1084 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1085 writel(rwcfg->lfsr_wr_rd_bank_0_nop, 1086 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 1087 } 1088 } else if (rw_wl_nop_cycles == 0) { 1089 /* 1090 * CNTR 2 - We want to skip the NOP operation and go straight 1091 * to the DQS enable instruction. We set the counter to a large 1092 * number so that the jump is always taken. 1093 */ 1094 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2); 1095 1096 /* CNTR 3 - Not used */ 1097 if (test_dm) { 1098 mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0; 1099 writel(rwcfg->lfsr_wr_rd_dm_bank_0_dqs, 1100 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1101 } else { 1102 mcc_instruction = rwcfg->lfsr_wr_rd_bank_0; 1103 writel(rwcfg->lfsr_wr_rd_bank_0_dqs, 1104 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1105 } 1106 } else { 1107 /* 1108 * CNTR 2 - In this case we want to execute the next instruction 1109 * and NOT take the jump. So we set the counter to 0. The jump 1110 * address doesn't count. 1111 */ 1112 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2); 1113 writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1114 1115 /* 1116 * CNTR 3 - Set the nop counter to the number of cycles we 1117 * need to loop for, minus 1. 1118 */ 1119 writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3); 1120 if (test_dm) { 1121 mcc_instruction = rwcfg->lfsr_wr_rd_dm_bank_0; 1122 writel(rwcfg->lfsr_wr_rd_dm_bank_0_nop, 1123 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 1124 } else { 1125 mcc_instruction = rwcfg->lfsr_wr_rd_bank_0; 1126 writel(rwcfg->lfsr_wr_rd_bank_0_nop, 1127 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 1128 } 1129 } 1130 1131 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1132 RW_MGR_RESET_READ_DATAPATH_OFFSET); 1133 1134 if (quick_write_mode) 1135 writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0); 1136 else 1137 writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0); 1138 1139 writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1140 1141 /* 1142 * CNTR 1 - This is used to ensure enough time elapses 1143 * for read data to come back. 1144 */ 1145 writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1); 1146 1147 if (test_dm) { 1148 writel(rwcfg->lfsr_wr_rd_dm_bank_0_wait, 1149 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1150 } else { 1151 writel(rwcfg->lfsr_wr_rd_bank_0_wait, 1152 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1153 } 1154 1155 writel(mcc_instruction, (SDR_PHYGRP_RWMGRGRP_ADDRESS | 1156 RW_MGR_RUN_SINGLE_GROUP_OFFSET) + 1157 (group << 2)); 1158 } 1159 1160 /** 1161 * rw_mgr_mem_calibrate_write_test() - Test writes, check for single/multiple pass 1162 * @rank_bgn: Rank number 1163 * @write_group: Write Group 1164 * @use_dm: Use DM 1165 * @all_correct: All bits must be correct in the mask 1166 * @bit_chk: Resulting bit mask after the test 1167 * @all_ranks: Test all ranks 1168 * 1169 * Test writes, can check for a single bit pass or multiple bit pass. 1170 */ 1171 static int 1172 rw_mgr_mem_calibrate_write_test(const u32 rank_bgn, const u32 write_group, 1173 const u32 use_dm, const u32 all_correct, 1174 u32 *bit_chk, const u32 all_ranks) 1175 { 1176 const u32 rank_end = all_ranks ? 1177 rwcfg->mem_number_of_ranks : 1178 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1179 const u32 shift_ratio = rwcfg->mem_dq_per_write_dqs / 1180 rwcfg->mem_virtual_groups_per_write_dqs; 1181 const u32 correct_mask_vg = param->write_correct_mask_vg; 1182 1183 u32 tmp_bit_chk, base_rw_mgr; 1184 int vg, r; 1185 1186 *bit_chk = param->write_correct_mask; 1187 1188 for (r = rank_bgn; r < rank_end; r++) { 1189 /* Set rank */ 1190 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1191 1192 tmp_bit_chk = 0; 1193 for (vg = rwcfg->mem_virtual_groups_per_write_dqs - 1; 1194 vg >= 0; vg--) { 1195 /* Reset the FIFOs to get pointers to known state. */ 1196 writel(0, &phy_mgr_cmd->fifo_reset); 1197 1198 rw_mgr_mem_calibrate_write_test_issue( 1199 write_group * 1200 rwcfg->mem_virtual_groups_per_write_dqs + vg, 1201 use_dm); 1202 1203 base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); 1204 tmp_bit_chk <<= shift_ratio; 1205 tmp_bit_chk |= (correct_mask_vg & ~(base_rw_mgr)); 1206 } 1207 1208 *bit_chk &= tmp_bit_chk; 1209 } 1210 1211 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1212 if (all_correct) { 1213 debug_cond(DLEVEL >= 2, 1214 "write_test(%u,%u,ALL) : %u == %u => %i\n", 1215 write_group, use_dm, *bit_chk, 1216 param->write_correct_mask, 1217 *bit_chk == param->write_correct_mask); 1218 return *bit_chk == param->write_correct_mask; 1219 } else { 1220 debug_cond(DLEVEL >= 2, 1221 "write_test(%u,%u,ONE) : %u != %i => %i\n", 1222 write_group, use_dm, *bit_chk, 0, *bit_chk != 0); 1223 return *bit_chk != 0x00; 1224 } 1225 } 1226 1227 /** 1228 * rw_mgr_mem_calibrate_read_test_patterns() - Read back test patterns 1229 * @rank_bgn: Rank number 1230 * @group: Read/Write Group 1231 * @all_ranks: Test all ranks 1232 * 1233 * Performs a guaranteed read on the patterns we are going to use during a 1234 * read test to ensure memory works. 1235 */ 1236 static int 1237 rw_mgr_mem_calibrate_read_test_patterns(const u32 rank_bgn, const u32 group, 1238 const u32 all_ranks) 1239 { 1240 const u32 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | 1241 RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1242 const u32 addr_offset = 1243 (group * rwcfg->mem_virtual_groups_per_read_dqs) << 2; 1244 const u32 rank_end = all_ranks ? 1245 rwcfg->mem_number_of_ranks : 1246 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1247 const u32 shift_ratio = rwcfg->mem_dq_per_read_dqs / 1248 rwcfg->mem_virtual_groups_per_read_dqs; 1249 const u32 correct_mask_vg = param->read_correct_mask_vg; 1250 1251 u32 tmp_bit_chk, base_rw_mgr, bit_chk; 1252 int vg, r; 1253 int ret = 0; 1254 1255 bit_chk = param->read_correct_mask; 1256 1257 for (r = rank_bgn; r < rank_end; r++) { 1258 /* Set rank */ 1259 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1260 1261 /* Load up a constant bursts of read commands */ 1262 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0); 1263 writel(rwcfg->guaranteed_read, 1264 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1265 1266 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1); 1267 writel(rwcfg->guaranteed_read_cont, 1268 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1269 1270 tmp_bit_chk = 0; 1271 for (vg = rwcfg->mem_virtual_groups_per_read_dqs - 1; 1272 vg >= 0; vg--) { 1273 /* Reset the FIFOs to get pointers to known state. */ 1274 writel(0, &phy_mgr_cmd->fifo_reset); 1275 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1276 RW_MGR_RESET_READ_DATAPATH_OFFSET); 1277 writel(rwcfg->guaranteed_read, 1278 addr + addr_offset + (vg << 2)); 1279 1280 base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); 1281 tmp_bit_chk <<= shift_ratio; 1282 tmp_bit_chk |= correct_mask_vg & ~base_rw_mgr; 1283 } 1284 1285 bit_chk &= tmp_bit_chk; 1286 } 1287 1288 writel(rwcfg->clear_dqs_enable, addr + (group << 2)); 1289 1290 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1291 1292 if (bit_chk != param->read_correct_mask) 1293 ret = -EIO; 1294 1295 debug_cond(DLEVEL >= 1, 1296 "%s:%d test_load_patterns(%u,ALL) => (%u == %u) => %i\n", 1297 __func__, __LINE__, group, bit_chk, 1298 param->read_correct_mask, ret); 1299 1300 return ret; 1301 } 1302 1303 /** 1304 * rw_mgr_mem_calibrate_read_load_patterns() - Load up the patterns for read test 1305 * @rank_bgn: Rank number 1306 * @all_ranks: Test all ranks 1307 * 1308 * Load up the patterns we are going to use during a read test. 1309 */ 1310 static void rw_mgr_mem_calibrate_read_load_patterns(const u32 rank_bgn, 1311 const int all_ranks) 1312 { 1313 const u32 rank_end = all_ranks ? 1314 rwcfg->mem_number_of_ranks : 1315 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1316 u32 r; 1317 1318 debug("%s:%d\n", __func__, __LINE__); 1319 1320 for (r = rank_bgn; r < rank_end; r++) { 1321 /* set rank */ 1322 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1323 1324 /* Load up a constant bursts */ 1325 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0); 1326 1327 writel(rwcfg->guaranteed_write_wait0, 1328 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1329 1330 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1); 1331 1332 writel(rwcfg->guaranteed_write_wait1, 1333 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1334 1335 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2); 1336 1337 writel(rwcfg->guaranteed_write_wait2, 1338 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1339 1340 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3); 1341 1342 writel(rwcfg->guaranteed_write_wait3, 1343 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 1344 1345 writel(rwcfg->guaranteed_write, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1346 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 1347 } 1348 1349 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1350 } 1351 1352 /** 1353 * rw_mgr_mem_calibrate_read_test() - Perform READ test on single rank 1354 * @rank_bgn: Rank number 1355 * @group: Read/Write group 1356 * @num_tries: Number of retries of the test 1357 * @all_correct: All bits must be correct in the mask 1358 * @bit_chk: Resulting bit mask after the test 1359 * @all_groups: Test all R/W groups 1360 * @all_ranks: Test all ranks 1361 * 1362 * Try a read and see if it returns correct data back. Test has dummy reads 1363 * inserted into the mix used to align DQS enable. Test has more thorough 1364 * checks than the regular read test. 1365 */ 1366 static int 1367 rw_mgr_mem_calibrate_read_test(const u32 rank_bgn, const u32 group, 1368 const u32 num_tries, const u32 all_correct, 1369 u32 *bit_chk, 1370 const u32 all_groups, const u32 all_ranks) 1371 { 1372 const u32 rank_end = all_ranks ? rwcfg->mem_number_of_ranks : 1373 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1374 const u32 quick_read_mode = 1375 ((STATIC_CALIB_STEPS & CALIB_SKIP_DELAY_SWEEPS) && 1376 misccfg->enable_super_quick_calibration); 1377 u32 correct_mask_vg = param->read_correct_mask_vg; 1378 u32 tmp_bit_chk; 1379 u32 base_rw_mgr; 1380 u32 addr; 1381 1382 int r, vg, ret; 1383 1384 *bit_chk = param->read_correct_mask; 1385 1386 for (r = rank_bgn; r < rank_end; r++) { 1387 /* set rank */ 1388 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1389 1390 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1); 1391 1392 writel(rwcfg->read_b2b_wait1, 1393 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 1394 1395 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2); 1396 writel(rwcfg->read_b2b_wait2, 1397 &sdr_rw_load_jump_mgr_regs->load_jump_add2); 1398 1399 if (quick_read_mode) 1400 writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0); 1401 /* need at least two (1+1) reads to capture failures */ 1402 else if (all_groups) 1403 writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0); 1404 else 1405 writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0); 1406 1407 writel(rwcfg->read_b2b, 1408 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 1409 if (all_groups) 1410 writel(rwcfg->mem_if_read_dqs_width * 1411 rwcfg->mem_virtual_groups_per_read_dqs - 1, 1412 &sdr_rw_load_mgr_regs->load_cntr3); 1413 else 1414 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3); 1415 1416 writel(rwcfg->read_b2b, 1417 &sdr_rw_load_jump_mgr_regs->load_jump_add3); 1418 1419 tmp_bit_chk = 0; 1420 for (vg = rwcfg->mem_virtual_groups_per_read_dqs - 1; vg >= 0; 1421 vg--) { 1422 /* Reset the FIFOs to get pointers to known state. */ 1423 writel(0, &phy_mgr_cmd->fifo_reset); 1424 writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS | 1425 RW_MGR_RESET_READ_DATAPATH_OFFSET); 1426 1427 if (all_groups) { 1428 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | 1429 RW_MGR_RUN_ALL_GROUPS_OFFSET; 1430 } else { 1431 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | 1432 RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1433 } 1434 1435 writel(rwcfg->read_b2b, addr + 1436 ((group * 1437 rwcfg->mem_virtual_groups_per_read_dqs + 1438 vg) << 2)); 1439 1440 base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS); 1441 tmp_bit_chk <<= rwcfg->mem_dq_per_read_dqs / 1442 rwcfg->mem_virtual_groups_per_read_dqs; 1443 tmp_bit_chk |= correct_mask_vg & ~(base_rw_mgr); 1444 } 1445 1446 *bit_chk &= tmp_bit_chk; 1447 } 1448 1449 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1450 writel(rwcfg->clear_dqs_enable, addr + (group << 2)); 1451 1452 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1453 1454 if (all_correct) { 1455 ret = (*bit_chk == param->read_correct_mask); 1456 debug_cond(DLEVEL >= 2, 1457 "%s:%d read_test(%u,ALL,%u) => (%u == %u) => %i\n", 1458 __func__, __LINE__, group, all_groups, *bit_chk, 1459 param->read_correct_mask, ret); 1460 } else { 1461 ret = (*bit_chk != 0x00); 1462 debug_cond(DLEVEL >= 2, 1463 "%s:%d read_test(%u,ONE,%u) => (%u != %u) => %i\n", 1464 __func__, __LINE__, group, all_groups, *bit_chk, 1465 0, ret); 1466 } 1467 1468 return ret; 1469 } 1470 1471 /** 1472 * rw_mgr_mem_calibrate_read_test_all_ranks() - Perform READ test on all ranks 1473 * @grp: Read/Write group 1474 * @num_tries: Number of retries of the test 1475 * @all_correct: All bits must be correct in the mask 1476 * @all_groups: Test all R/W groups 1477 * 1478 * Perform a READ test across all memory ranks. 1479 */ 1480 static int 1481 rw_mgr_mem_calibrate_read_test_all_ranks(const u32 grp, const u32 num_tries, 1482 const u32 all_correct, 1483 const u32 all_groups) 1484 { 1485 u32 bit_chk; 1486 return rw_mgr_mem_calibrate_read_test(0, grp, num_tries, all_correct, 1487 &bit_chk, all_groups, 1); 1488 } 1489 1490 /** 1491 * rw_mgr_incr_vfifo() - Increase VFIFO value 1492 * @grp: Read/Write group 1493 * 1494 * Increase VFIFO value. 1495 */ 1496 static void rw_mgr_incr_vfifo(const u32 grp) 1497 { 1498 writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy); 1499 } 1500 1501 /** 1502 * rw_mgr_decr_vfifo() - Decrease VFIFO value 1503 * @grp: Read/Write group 1504 * 1505 * Decrease VFIFO value. 1506 */ 1507 static void rw_mgr_decr_vfifo(const u32 grp) 1508 { 1509 u32 i; 1510 1511 for (i = 0; i < misccfg->read_valid_fifo_size - 1; i++) 1512 rw_mgr_incr_vfifo(grp); 1513 } 1514 1515 /** 1516 * find_vfifo_failing_read() - Push VFIFO to get a failing read 1517 * @grp: Read/Write group 1518 * 1519 * Push VFIFO until a failing read happens. 1520 */ 1521 static int find_vfifo_failing_read(const u32 grp) 1522 { 1523 u32 v, ret, fail_cnt = 0; 1524 1525 for (v = 0; v < misccfg->read_valid_fifo_size; v++) { 1526 debug_cond(DLEVEL >= 2, "%s:%d: vfifo %u\n", 1527 __func__, __LINE__, v); 1528 ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, 1529 PASS_ONE_BIT, 0); 1530 if (!ret) { 1531 fail_cnt++; 1532 1533 if (fail_cnt == 2) 1534 return v; 1535 } 1536 1537 /* Fiddle with FIFO. */ 1538 rw_mgr_incr_vfifo(grp); 1539 } 1540 1541 /* No failing read found! Something must have gone wrong. */ 1542 debug_cond(DLEVEL >= 2, "%s:%d: vfifo failed\n", __func__, __LINE__); 1543 return 0; 1544 } 1545 1546 /** 1547 * sdr_find_phase_delay() - Find DQS enable phase or delay 1548 * @working: If 1, look for working phase/delay, if 0, look for non-working 1549 * @delay: If 1, look for delay, if 0, look for phase 1550 * @grp: Read/Write group 1551 * @work: Working window position 1552 * @work_inc: Working window increment 1553 * @pd: DQS Phase/Delay Iterator 1554 * 1555 * Find working or non-working DQS enable phase setting. 1556 */ 1557 static int sdr_find_phase_delay(int working, int delay, const u32 grp, 1558 u32 *work, const u32 work_inc, u32 *pd) 1559 { 1560 const u32 max = delay ? iocfg->dqs_en_delay_max : 1561 iocfg->dqs_en_phase_max; 1562 u32 ret; 1563 1564 for (; *pd <= max; (*pd)++) { 1565 if (delay) 1566 scc_mgr_set_dqs_en_delay_all_ranks(grp, *pd); 1567 else 1568 scc_mgr_set_dqs_en_phase_all_ranks(grp, *pd); 1569 1570 ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, 1571 PASS_ONE_BIT, 0); 1572 if (!working) 1573 ret = !ret; 1574 1575 if (ret) 1576 return 0; 1577 1578 if (work) 1579 *work += work_inc; 1580 } 1581 1582 return -EINVAL; 1583 } 1584 /** 1585 * sdr_find_phase() - Find DQS enable phase 1586 * @working: If 1, look for working phase, if 0, look for non-working phase 1587 * @grp: Read/Write group 1588 * @work: Working window position 1589 * @i: Iterator 1590 * @p: DQS Phase Iterator 1591 * 1592 * Find working or non-working DQS enable phase setting. 1593 */ 1594 static int sdr_find_phase(int working, const u32 grp, u32 *work, 1595 u32 *i, u32 *p) 1596 { 1597 const u32 end = misccfg->read_valid_fifo_size + (working ? 0 : 1); 1598 int ret; 1599 1600 for (; *i < end; (*i)++) { 1601 if (working) 1602 *p = 0; 1603 1604 ret = sdr_find_phase_delay(working, 0, grp, work, 1605 iocfg->delay_per_opa_tap, p); 1606 if (!ret) 1607 return 0; 1608 1609 if (*p > iocfg->dqs_en_phase_max) { 1610 /* Fiddle with FIFO. */ 1611 rw_mgr_incr_vfifo(grp); 1612 if (!working) 1613 *p = 0; 1614 } 1615 } 1616 1617 return -EINVAL; 1618 } 1619 1620 /** 1621 * sdr_working_phase() - Find working DQS enable phase 1622 * @grp: Read/Write group 1623 * @work_bgn: Working window start position 1624 * @d: dtaps output value 1625 * @p: DQS Phase Iterator 1626 * @i: Iterator 1627 * 1628 * Find working DQS enable phase setting. 1629 */ 1630 static int sdr_working_phase(const u32 grp, u32 *work_bgn, u32 *d, 1631 u32 *p, u32 *i) 1632 { 1633 const u32 dtaps_per_ptap = iocfg->delay_per_opa_tap / 1634 iocfg->delay_per_dqs_en_dchain_tap; 1635 int ret; 1636 1637 *work_bgn = 0; 1638 1639 for (*d = 0; *d <= dtaps_per_ptap; (*d)++) { 1640 *i = 0; 1641 scc_mgr_set_dqs_en_delay_all_ranks(grp, *d); 1642 ret = sdr_find_phase(1, grp, work_bgn, i, p); 1643 if (!ret) 1644 return 0; 1645 *work_bgn += iocfg->delay_per_dqs_en_dchain_tap; 1646 } 1647 1648 /* Cannot find working solution */ 1649 debug_cond(DLEVEL >= 2, "%s:%d find_dqs_en_phase: no vfifo/ptap/dtap\n", 1650 __func__, __LINE__); 1651 return -EINVAL; 1652 } 1653 1654 /** 1655 * sdr_backup_phase() - Find DQS enable backup phase 1656 * @grp: Read/Write group 1657 * @work_bgn: Working window start position 1658 * @p: DQS Phase Iterator 1659 * 1660 * Find DQS enable backup phase setting. 1661 */ 1662 static void sdr_backup_phase(const u32 grp, u32 *work_bgn, u32 *p) 1663 { 1664 u32 tmp_delay, d; 1665 int ret; 1666 1667 /* Special case code for backing up a phase */ 1668 if (*p == 0) { 1669 *p = iocfg->dqs_en_phase_max; 1670 rw_mgr_decr_vfifo(grp); 1671 } else { 1672 (*p)--; 1673 } 1674 tmp_delay = *work_bgn - iocfg->delay_per_opa_tap; 1675 scc_mgr_set_dqs_en_phase_all_ranks(grp, *p); 1676 1677 for (d = 0; d <= iocfg->dqs_en_delay_max && tmp_delay < *work_bgn; 1678 d++) { 1679 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1680 1681 ret = rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, 1682 PASS_ONE_BIT, 0); 1683 if (ret) { 1684 *work_bgn = tmp_delay; 1685 break; 1686 } 1687 1688 tmp_delay += iocfg->delay_per_dqs_en_dchain_tap; 1689 } 1690 1691 /* Restore VFIFO to old state before we decremented it (if needed). */ 1692 (*p)++; 1693 if (*p > iocfg->dqs_en_phase_max) { 1694 *p = 0; 1695 rw_mgr_incr_vfifo(grp); 1696 } 1697 1698 scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); 1699 } 1700 1701 /** 1702 * sdr_nonworking_phase() - Find non-working DQS enable phase 1703 * @grp: Read/Write group 1704 * @work_end: Working window end position 1705 * @p: DQS Phase Iterator 1706 * @i: Iterator 1707 * 1708 * Find non-working DQS enable phase setting. 1709 */ 1710 static int sdr_nonworking_phase(const u32 grp, u32 *work_end, u32 *p, u32 *i) 1711 { 1712 int ret; 1713 1714 (*p)++; 1715 *work_end += iocfg->delay_per_opa_tap; 1716 if (*p > iocfg->dqs_en_phase_max) { 1717 /* Fiddle with FIFO. */ 1718 *p = 0; 1719 rw_mgr_incr_vfifo(grp); 1720 } 1721 1722 ret = sdr_find_phase(0, grp, work_end, i, p); 1723 if (ret) { 1724 /* Cannot see edge of failing read. */ 1725 debug_cond(DLEVEL >= 2, "%s:%d: end: failed\n", 1726 __func__, __LINE__); 1727 } 1728 1729 return ret; 1730 } 1731 1732 /** 1733 * sdr_find_window_center() - Find center of the working DQS window. 1734 * @grp: Read/Write group 1735 * @work_bgn: First working settings 1736 * @work_end: Last working settings 1737 * 1738 * Find center of the working DQS enable window. 1739 */ 1740 static int sdr_find_window_center(const u32 grp, const u32 work_bgn, 1741 const u32 work_end) 1742 { 1743 u32 work_mid; 1744 int tmp_delay = 0; 1745 int i, p, d; 1746 1747 work_mid = (work_bgn + work_end) / 2; 1748 1749 debug_cond(DLEVEL >= 2, "work_bgn=%d work_end=%d work_mid=%d\n", 1750 work_bgn, work_end, work_mid); 1751 /* Get the middle delay to be less than a VFIFO delay */ 1752 tmp_delay = (iocfg->dqs_en_phase_max + 1) * iocfg->delay_per_opa_tap; 1753 1754 debug_cond(DLEVEL >= 2, "vfifo ptap delay %d\n", tmp_delay); 1755 work_mid %= tmp_delay; 1756 debug_cond(DLEVEL >= 2, "new work_mid %d\n", work_mid); 1757 1758 tmp_delay = rounddown(work_mid, iocfg->delay_per_opa_tap); 1759 if (tmp_delay > iocfg->dqs_en_phase_max * iocfg->delay_per_opa_tap) 1760 tmp_delay = iocfg->dqs_en_phase_max * iocfg->delay_per_opa_tap; 1761 p = tmp_delay / iocfg->delay_per_opa_tap; 1762 1763 debug_cond(DLEVEL >= 2, "new p %d, tmp_delay=%d\n", p, tmp_delay); 1764 1765 d = DIV_ROUND_UP(work_mid - tmp_delay, 1766 iocfg->delay_per_dqs_en_dchain_tap); 1767 if (d > iocfg->dqs_en_delay_max) 1768 d = iocfg->dqs_en_delay_max; 1769 tmp_delay += d * iocfg->delay_per_dqs_en_dchain_tap; 1770 1771 debug_cond(DLEVEL >= 2, "new d %d, tmp_delay=%d\n", d, tmp_delay); 1772 1773 scc_mgr_set_dqs_en_phase_all_ranks(grp, p); 1774 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1775 1776 /* 1777 * push vfifo until we can successfully calibrate. We can do this 1778 * because the largest possible margin in 1 VFIFO cycle. 1779 */ 1780 for (i = 0; i < misccfg->read_valid_fifo_size; i++) { 1781 debug_cond(DLEVEL >= 2, "find_dqs_en_phase: center\n"); 1782 if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, 1783 PASS_ONE_BIT, 1784 0)) { 1785 debug_cond(DLEVEL >= 2, 1786 "%s:%d center: found: ptap=%u dtap=%u\n", 1787 __func__, __LINE__, p, d); 1788 return 0; 1789 } 1790 1791 /* Fiddle with FIFO. */ 1792 rw_mgr_incr_vfifo(grp); 1793 } 1794 1795 debug_cond(DLEVEL >= 2, "%s:%d center: failed.\n", 1796 __func__, __LINE__); 1797 return -EINVAL; 1798 } 1799 1800 /** 1801 * rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase() - Find a good DQS enable to use 1802 * @grp: Read/Write Group 1803 * 1804 * Find a good DQS enable to use. 1805 */ 1806 static int rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(const u32 grp) 1807 { 1808 u32 d, p, i; 1809 u32 dtaps_per_ptap; 1810 u32 work_bgn, work_end; 1811 u32 found_passing_read, found_failing_read = 0, initial_failing_dtap; 1812 int ret; 1813 1814 debug("%s:%d %u\n", __func__, __LINE__, grp); 1815 1816 reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); 1817 1818 scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); 1819 scc_mgr_set_dqs_en_phase_all_ranks(grp, 0); 1820 1821 /* Step 0: Determine number of delay taps for each phase tap. */ 1822 dtaps_per_ptap = iocfg->delay_per_opa_tap / 1823 iocfg->delay_per_dqs_en_dchain_tap; 1824 1825 /* Step 1: First push vfifo until we get a failing read. */ 1826 find_vfifo_failing_read(grp); 1827 1828 /* Step 2: Find first working phase, increment in ptaps. */ 1829 work_bgn = 0; 1830 ret = sdr_working_phase(grp, &work_bgn, &d, &p, &i); 1831 if (ret) 1832 return ret; 1833 1834 work_end = work_bgn; 1835 1836 /* 1837 * If d is 0 then the working window covers a phase tap and we can 1838 * follow the old procedure. Otherwise, we've found the beginning 1839 * and we need to increment the dtaps until we find the end. 1840 */ 1841 if (d == 0) { 1842 /* 1843 * Step 3a: If we have room, back off by one and 1844 * increment in dtaps. 1845 */ 1846 sdr_backup_phase(grp, &work_bgn, &p); 1847 1848 /* 1849 * Step 4a: go forward from working phase to non working 1850 * phase, increment in ptaps. 1851 */ 1852 ret = sdr_nonworking_phase(grp, &work_end, &p, &i); 1853 if (ret) 1854 return ret; 1855 1856 /* Step 5a: Back off one from last, increment in dtaps. */ 1857 1858 /* Special case code for backing up a phase */ 1859 if (p == 0) { 1860 p = iocfg->dqs_en_phase_max; 1861 rw_mgr_decr_vfifo(grp); 1862 } else { 1863 p = p - 1; 1864 } 1865 1866 work_end -= iocfg->delay_per_opa_tap; 1867 scc_mgr_set_dqs_en_phase_all_ranks(grp, p); 1868 1869 d = 0; 1870 1871 debug_cond(DLEVEL >= 2, "%s:%d p: ptap=%u\n", 1872 __func__, __LINE__, p); 1873 } 1874 1875 /* The dtap increment to find the failing edge is done here. */ 1876 sdr_find_phase_delay(0, 1, grp, &work_end, 1877 iocfg->delay_per_dqs_en_dchain_tap, &d); 1878 1879 /* Go back to working dtap */ 1880 if (d != 0) 1881 work_end -= iocfg->delay_per_dqs_en_dchain_tap; 1882 1883 debug_cond(DLEVEL >= 2, 1884 "%s:%d p/d: ptap=%u dtap=%u end=%u\n", 1885 __func__, __LINE__, p, d - 1, work_end); 1886 1887 if (work_end < work_bgn) { 1888 /* nil range */ 1889 debug_cond(DLEVEL >= 2, "%s:%d end-2: failed\n", 1890 __func__, __LINE__); 1891 return -EINVAL; 1892 } 1893 1894 debug_cond(DLEVEL >= 2, "%s:%d found range [%u,%u]\n", 1895 __func__, __LINE__, work_bgn, work_end); 1896 1897 /* 1898 * We need to calculate the number of dtaps that equal a ptap. 1899 * To do that we'll back up a ptap and re-find the edge of the 1900 * window using dtaps 1901 */ 1902 debug_cond(DLEVEL >= 2, "%s:%d calculate dtaps_per_ptap for tracking\n", 1903 __func__, __LINE__); 1904 1905 /* Special case code for backing up a phase */ 1906 if (p == 0) { 1907 p = iocfg->dqs_en_phase_max; 1908 rw_mgr_decr_vfifo(grp); 1909 debug_cond(DLEVEL >= 2, "%s:%d backedup cycle/phase: p=%u\n", 1910 __func__, __LINE__, p); 1911 } else { 1912 p = p - 1; 1913 debug_cond(DLEVEL >= 2, "%s:%d backedup phase only: p=%u", 1914 __func__, __LINE__, p); 1915 } 1916 1917 scc_mgr_set_dqs_en_phase_all_ranks(grp, p); 1918 1919 /* 1920 * Increase dtap until we first see a passing read (in case the 1921 * window is smaller than a ptap), and then a failing read to 1922 * mark the edge of the window again. 1923 */ 1924 1925 /* Find a passing read. */ 1926 debug_cond(DLEVEL >= 2, "%s:%d find passing read\n", 1927 __func__, __LINE__); 1928 1929 initial_failing_dtap = d; 1930 1931 found_passing_read = !sdr_find_phase_delay(1, 1, grp, NULL, 0, &d); 1932 if (found_passing_read) { 1933 /* Find a failing read. */ 1934 debug_cond(DLEVEL >= 2, "%s:%d find failing read\n", 1935 __func__, __LINE__); 1936 d++; 1937 found_failing_read = !sdr_find_phase_delay(0, 1, grp, NULL, 0, 1938 &d); 1939 } else { 1940 debug_cond(DLEVEL >= 1, 1941 "%s:%d failed to calculate dtaps per ptap. Fall back on static value\n", 1942 __func__, __LINE__); 1943 } 1944 1945 /* 1946 * The dynamically calculated dtaps_per_ptap is only valid if we 1947 * found a passing/failing read. If we didn't, it means d hit the max 1948 * (iocfg->dqs_en_delay_max). Otherwise, dtaps_per_ptap retains its 1949 * statically calculated value. 1950 */ 1951 if (found_passing_read && found_failing_read) 1952 dtaps_per_ptap = d - initial_failing_dtap; 1953 1954 writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap); 1955 debug_cond(DLEVEL >= 2, "%s:%d dtaps_per_ptap=%u - %u = %u", 1956 __func__, __LINE__, d, initial_failing_dtap, dtaps_per_ptap); 1957 1958 /* Step 6: Find the centre of the window. */ 1959 ret = sdr_find_window_center(grp, work_bgn, work_end); 1960 1961 return ret; 1962 } 1963 1964 /** 1965 * search_stop_check() - Check if the detected edge is valid 1966 * @write: Perform read (Stage 2) or write (Stage 3) calibration 1967 * @d: DQS delay 1968 * @rank_bgn: Rank number 1969 * @write_group: Write Group 1970 * @read_group: Read Group 1971 * @bit_chk: Resulting bit mask after the test 1972 * @sticky_bit_chk: Resulting sticky bit mask after the test 1973 * @use_read_test: Perform read test 1974 * 1975 * Test if the found edge is valid. 1976 */ 1977 static u32 search_stop_check(const int write, const int d, const int rank_bgn, 1978 const u32 write_group, const u32 read_group, 1979 u32 *bit_chk, u32 *sticky_bit_chk, 1980 const u32 use_read_test) 1981 { 1982 const u32 ratio = rwcfg->mem_if_read_dqs_width / 1983 rwcfg->mem_if_write_dqs_width; 1984 const u32 correct_mask = write ? param->write_correct_mask : 1985 param->read_correct_mask; 1986 const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : 1987 rwcfg->mem_dq_per_read_dqs; 1988 u32 ret; 1989 /* 1990 * Stop searching when the read test doesn't pass AND when 1991 * we've seen a passing read on every bit. 1992 */ 1993 if (write) { /* WRITE-ONLY */ 1994 ret = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1995 0, PASS_ONE_BIT, 1996 bit_chk, 0); 1997 } else if (use_read_test) { /* READ-ONLY */ 1998 ret = !rw_mgr_mem_calibrate_read_test(rank_bgn, read_group, 1999 NUM_READ_PB_TESTS, 2000 PASS_ONE_BIT, bit_chk, 2001 0, 0); 2002 } else { /* READ-ONLY */ 2003 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 0, 2004 PASS_ONE_BIT, bit_chk, 0); 2005 *bit_chk = *bit_chk >> (per_dqs * 2006 (read_group - (write_group * ratio))); 2007 ret = (*bit_chk == 0); 2008 } 2009 *sticky_bit_chk = *sticky_bit_chk | *bit_chk; 2010 ret = ret && (*sticky_bit_chk == correct_mask); 2011 debug_cond(DLEVEL >= 2, 2012 "%s:%d center(left): dtap=%u => %u == %u && %u", 2013 __func__, __LINE__, d, 2014 *sticky_bit_chk, correct_mask, ret); 2015 return ret; 2016 } 2017 2018 /** 2019 * search_left_edge() - Find left edge of DQ/DQS working phase 2020 * @write: Perform read (Stage 2) or write (Stage 3) calibration 2021 * @rank_bgn: Rank number 2022 * @write_group: Write Group 2023 * @read_group: Read Group 2024 * @test_bgn: Rank number to begin the test 2025 * @sticky_bit_chk: Resulting sticky bit mask after the test 2026 * @left_edge: Left edge of the DQ/DQS phase 2027 * @right_edge: Right edge of the DQ/DQS phase 2028 * @use_read_test: Perform read test 2029 * 2030 * Find left edge of DQ/DQS working phase. 2031 */ 2032 static void search_left_edge(const int write, const int rank_bgn, 2033 const u32 write_group, const u32 read_group, const u32 test_bgn, 2034 u32 *sticky_bit_chk, 2035 int *left_edge, int *right_edge, const u32 use_read_test) 2036 { 2037 const u32 delay_max = write ? iocfg->io_out1_delay_max : 2038 iocfg->io_in_delay_max; 2039 const u32 dqs_max = write ? iocfg->io_out1_delay_max : 2040 iocfg->dqs_in_delay_max; 2041 const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : 2042 rwcfg->mem_dq_per_read_dqs; 2043 u32 stop, bit_chk; 2044 int i, d; 2045 2046 for (d = 0; d <= dqs_max; d++) { 2047 if (write) 2048 scc_mgr_apply_group_dq_out1_delay(d); 2049 else 2050 scc_mgr_apply_group_dq_in_delay(test_bgn, d); 2051 2052 writel(0, &sdr_scc_mgr->update); 2053 2054 stop = search_stop_check(write, d, rank_bgn, write_group, 2055 read_group, &bit_chk, sticky_bit_chk, 2056 use_read_test); 2057 if (stop == 1) 2058 break; 2059 2060 /* stop != 1 */ 2061 for (i = 0; i < per_dqs; i++) { 2062 if (bit_chk & 1) { 2063 /* 2064 * Remember a passing test as 2065 * the left_edge. 2066 */ 2067 left_edge[i] = d; 2068 } else { 2069 /* 2070 * If a left edge has not been seen 2071 * yet, then a future passing test 2072 * will mark this edge as the right 2073 * edge. 2074 */ 2075 if (left_edge[i] == delay_max + 1) 2076 right_edge[i] = -(d + 1); 2077 } 2078 bit_chk >>= 1; 2079 } 2080 } 2081 2082 /* Reset DQ delay chains to 0 */ 2083 if (write) 2084 scc_mgr_apply_group_dq_out1_delay(0); 2085 else 2086 scc_mgr_apply_group_dq_in_delay(test_bgn, 0); 2087 2088 *sticky_bit_chk = 0; 2089 for (i = per_dqs - 1; i >= 0; i--) { 2090 debug_cond(DLEVEL >= 2, 2091 "%s:%d vfifo_center: left_edge[%u]: %d right_edge[%u]: %d\n", 2092 __func__, __LINE__, i, left_edge[i], 2093 i, right_edge[i]); 2094 2095 /* 2096 * Check for cases where we haven't found the left edge, 2097 * which makes our assignment of the the right edge invalid. 2098 * Reset it to the illegal value. 2099 */ 2100 if ((left_edge[i] == delay_max + 1) && 2101 (right_edge[i] != delay_max + 1)) { 2102 right_edge[i] = delay_max + 1; 2103 debug_cond(DLEVEL >= 2, 2104 "%s:%d vfifo_center: reset right_edge[%u]: %d\n", 2105 __func__, __LINE__, i, right_edge[i]); 2106 } 2107 2108 /* 2109 * Reset sticky bit 2110 * READ: except for bits where we have seen both 2111 * the left and right edge. 2112 * WRITE: except for bits where we have seen the 2113 * left edge. 2114 */ 2115 *sticky_bit_chk <<= 1; 2116 if (write) { 2117 if (left_edge[i] != delay_max + 1) 2118 *sticky_bit_chk |= 1; 2119 } else { 2120 if ((left_edge[i] != delay_max + 1) && 2121 (right_edge[i] != delay_max + 1)) 2122 *sticky_bit_chk |= 1; 2123 } 2124 } 2125 } 2126 2127 /** 2128 * search_right_edge() - Find right edge of DQ/DQS working phase 2129 * @write: Perform read (Stage 2) or write (Stage 3) calibration 2130 * @rank_bgn: Rank number 2131 * @write_group: Write Group 2132 * @read_group: Read Group 2133 * @start_dqs: DQS start phase 2134 * @start_dqs_en: DQS enable start phase 2135 * @sticky_bit_chk: Resulting sticky bit mask after the test 2136 * @left_edge: Left edge of the DQ/DQS phase 2137 * @right_edge: Right edge of the DQ/DQS phase 2138 * @use_read_test: Perform read test 2139 * 2140 * Find right edge of DQ/DQS working phase. 2141 */ 2142 static int search_right_edge(const int write, const int rank_bgn, 2143 const u32 write_group, const u32 read_group, 2144 const int start_dqs, const int start_dqs_en, 2145 u32 *sticky_bit_chk, 2146 int *left_edge, int *right_edge, const u32 use_read_test) 2147 { 2148 const u32 delay_max = write ? iocfg->io_out1_delay_max : 2149 iocfg->io_in_delay_max; 2150 const u32 dqs_max = write ? iocfg->io_out1_delay_max : 2151 iocfg->dqs_in_delay_max; 2152 const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : 2153 rwcfg->mem_dq_per_read_dqs; 2154 u32 stop, bit_chk; 2155 int i, d; 2156 2157 for (d = 0; d <= dqs_max - start_dqs; d++) { 2158 if (write) { /* WRITE-ONLY */ 2159 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, 2160 d + start_dqs); 2161 } else { /* READ-ONLY */ 2162 scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs); 2163 if (iocfg->shift_dqs_en_when_shift_dqs) { 2164 u32 delay = d + start_dqs_en; 2165 if (delay > iocfg->dqs_en_delay_max) 2166 delay = iocfg->dqs_en_delay_max; 2167 scc_mgr_set_dqs_en_delay(read_group, delay); 2168 } 2169 scc_mgr_load_dqs(read_group); 2170 } 2171 2172 writel(0, &sdr_scc_mgr->update); 2173 2174 stop = search_stop_check(write, d, rank_bgn, write_group, 2175 read_group, &bit_chk, sticky_bit_chk, 2176 use_read_test); 2177 if (stop == 1) { 2178 if (write && (d == 0)) { /* WRITE-ONLY */ 2179 for (i = 0; i < rwcfg->mem_dq_per_write_dqs; 2180 i++) { 2181 /* 2182 * d = 0 failed, but it passed when 2183 * testing the left edge, so it must be 2184 * marginal, set it to -1 2185 */ 2186 if (right_edge[i] == delay_max + 1 && 2187 left_edge[i] != delay_max + 1) 2188 right_edge[i] = -1; 2189 } 2190 } 2191 break; 2192 } 2193 2194 /* stop != 1 */ 2195 for (i = 0; i < per_dqs; i++) { 2196 if (bit_chk & 1) { 2197 /* 2198 * Remember a passing test as 2199 * the right_edge. 2200 */ 2201 right_edge[i] = d; 2202 } else { 2203 if (d != 0) { 2204 /* 2205 * If a right edge has not 2206 * been seen yet, then a future 2207 * passing test will mark this 2208 * edge as the left edge. 2209 */ 2210 if (right_edge[i] == delay_max + 1) 2211 left_edge[i] = -(d + 1); 2212 } else { 2213 /* 2214 * d = 0 failed, but it passed 2215 * when testing the left edge, 2216 * so it must be marginal, set 2217 * it to -1 2218 */ 2219 if (right_edge[i] == delay_max + 1 && 2220 left_edge[i] != delay_max + 1) 2221 right_edge[i] = -1; 2222 /* 2223 * If a right edge has not been 2224 * seen yet, then a future 2225 * passing test will mark this 2226 * edge as the left edge. 2227 */ 2228 else if (right_edge[i] == delay_max + 1) 2229 left_edge[i] = -(d + 1); 2230 } 2231 } 2232 2233 debug_cond(DLEVEL >= 2, "%s:%d center[r,d=%u]: ", 2234 __func__, __LINE__, d); 2235 debug_cond(DLEVEL >= 2, 2236 "bit_chk_test=%i left_edge[%u]: %d ", 2237 bit_chk & 1, i, left_edge[i]); 2238 debug_cond(DLEVEL >= 2, "right_edge[%u]: %d\n", i, 2239 right_edge[i]); 2240 bit_chk >>= 1; 2241 } 2242 } 2243 2244 /* Check that all bits have a window */ 2245 for (i = 0; i < per_dqs; i++) { 2246 debug_cond(DLEVEL >= 2, 2247 "%s:%d write_center: left_edge[%u]: %d right_edge[%u]: %d", 2248 __func__, __LINE__, i, left_edge[i], 2249 i, right_edge[i]); 2250 if ((left_edge[i] == dqs_max + 1) || 2251 (right_edge[i] == dqs_max + 1)) 2252 return i + 1; /* FIXME: If we fail, retval > 0 */ 2253 } 2254 2255 return 0; 2256 } 2257 2258 /** 2259 * get_window_mid_index() - Find the best middle setting of DQ/DQS phase 2260 * @write: Perform read (Stage 2) or write (Stage 3) calibration 2261 * @left_edge: Left edge of the DQ/DQS phase 2262 * @right_edge: Right edge of the DQ/DQS phase 2263 * @mid_min: Best DQ/DQS phase middle setting 2264 * 2265 * Find index and value of the middle of the DQ/DQS working phase. 2266 */ 2267 static int get_window_mid_index(const int write, int *left_edge, 2268 int *right_edge, int *mid_min) 2269 { 2270 const u32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : 2271 rwcfg->mem_dq_per_read_dqs; 2272 int i, mid, min_index; 2273 2274 /* Find middle of window for each DQ bit */ 2275 *mid_min = left_edge[0] - right_edge[0]; 2276 min_index = 0; 2277 for (i = 1; i < per_dqs; i++) { 2278 mid = left_edge[i] - right_edge[i]; 2279 if (mid < *mid_min) { 2280 *mid_min = mid; 2281 min_index = i; 2282 } 2283 } 2284 2285 /* 2286 * -mid_min/2 represents the amount that we need to move DQS. 2287 * If mid_min is odd and positive we'll need to add one to make 2288 * sure the rounding in further calculations is correct (always 2289 * bias to the right), so just add 1 for all positive values. 2290 */ 2291 if (*mid_min > 0) 2292 (*mid_min)++; 2293 *mid_min = *mid_min / 2; 2294 2295 debug_cond(DLEVEL >= 1, "%s:%d vfifo_center: *mid_min=%d (index=%u)\n", 2296 __func__, __LINE__, *mid_min, min_index); 2297 return min_index; 2298 } 2299 2300 /** 2301 * center_dq_windows() - Center the DQ/DQS windows 2302 * @write: Perform read (Stage 2) or write (Stage 3) calibration 2303 * @left_edge: Left edge of the DQ/DQS phase 2304 * @right_edge: Right edge of the DQ/DQS phase 2305 * @mid_min: Adjusted DQ/DQS phase middle setting 2306 * @orig_mid_min: Original DQ/DQS phase middle setting 2307 * @min_index: DQ/DQS phase middle setting index 2308 * @test_bgn: Rank number to begin the test 2309 * @dq_margin: Amount of shift for the DQ 2310 * @dqs_margin: Amount of shift for the DQS 2311 * 2312 * Align the DQ/DQS windows in each group. 2313 */ 2314 static void center_dq_windows(const int write, int *left_edge, int *right_edge, 2315 const int mid_min, const int orig_mid_min, 2316 const int min_index, const int test_bgn, 2317 int *dq_margin, int *dqs_margin) 2318 { 2319 const s32 delay_max = write ? iocfg->io_out1_delay_max : 2320 iocfg->io_in_delay_max; 2321 const s32 per_dqs = write ? rwcfg->mem_dq_per_write_dqs : 2322 rwcfg->mem_dq_per_read_dqs; 2323 const s32 delay_off = write ? SCC_MGR_IO_OUT1_DELAY_OFFSET : 2324 SCC_MGR_IO_IN_DELAY_OFFSET; 2325 const s32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | delay_off; 2326 2327 s32 temp_dq_io_delay1; 2328 int shift_dq, i, p; 2329 2330 /* Initialize data for export structures */ 2331 *dqs_margin = delay_max + 1; 2332 *dq_margin = delay_max + 1; 2333 2334 /* add delay to bring centre of all DQ windows to the same "level" */ 2335 for (i = 0, p = test_bgn; i < per_dqs; i++, p++) { 2336 /* Use values before divide by 2 to reduce round off error */ 2337 shift_dq = (left_edge[i] - right_edge[i] - 2338 (left_edge[min_index] - right_edge[min_index]))/2 + 2339 (orig_mid_min - mid_min); 2340 2341 debug_cond(DLEVEL >= 2, 2342 "vfifo_center: before: shift_dq[%u]=%d\n", 2343 i, shift_dq); 2344 2345 temp_dq_io_delay1 = readl(addr + (i << 2)); 2346 2347 if (shift_dq + temp_dq_io_delay1 > delay_max) 2348 shift_dq = delay_max - temp_dq_io_delay1; 2349 else if (shift_dq + temp_dq_io_delay1 < 0) 2350 shift_dq = -temp_dq_io_delay1; 2351 2352 debug_cond(DLEVEL >= 2, 2353 "vfifo_center: after: shift_dq[%u]=%d\n", 2354 i, shift_dq); 2355 2356 if (write) 2357 scc_mgr_set_dq_out1_delay(i, 2358 temp_dq_io_delay1 + shift_dq); 2359 else 2360 scc_mgr_set_dq_in_delay(p, 2361 temp_dq_io_delay1 + shift_dq); 2362 2363 scc_mgr_load_dq(p); 2364 2365 debug_cond(DLEVEL >= 2, 2366 "vfifo_center: margin[%u]=[%d,%d]\n", i, 2367 left_edge[i] - shift_dq + (-mid_min), 2368 right_edge[i] + shift_dq - (-mid_min)); 2369 2370 /* To determine values for export structures */ 2371 if (left_edge[i] - shift_dq + (-mid_min) < *dq_margin) 2372 *dq_margin = left_edge[i] - shift_dq + (-mid_min); 2373 2374 if (right_edge[i] + shift_dq - (-mid_min) < *dqs_margin) 2375 *dqs_margin = right_edge[i] + shift_dq - (-mid_min); 2376 } 2377 } 2378 2379 /** 2380 * rw_mgr_mem_calibrate_vfifo_center() - Per-bit deskew DQ and centering 2381 * @rank_bgn: Rank number 2382 * @rw_group: Read/Write Group 2383 * @test_bgn: Rank at which the test begins 2384 * @use_read_test: Perform a read test 2385 * @update_fom: Update FOM 2386 * 2387 * Per-bit deskew DQ and centering. 2388 */ 2389 static int rw_mgr_mem_calibrate_vfifo_center(const u32 rank_bgn, 2390 const u32 rw_group, const u32 test_bgn, 2391 const int use_read_test, const int update_fom) 2392 { 2393 const u32 addr = 2394 SDR_PHYGRP_SCCGRP_ADDRESS + SCC_MGR_DQS_IN_DELAY_OFFSET + 2395 (rw_group << 2); 2396 /* 2397 * Store these as signed since there are comparisons with 2398 * signed numbers. 2399 */ 2400 u32 sticky_bit_chk; 2401 int32_t left_edge[rwcfg->mem_dq_per_read_dqs]; 2402 int32_t right_edge[rwcfg->mem_dq_per_read_dqs]; 2403 int32_t orig_mid_min, mid_min; 2404 int32_t new_dqs, start_dqs, start_dqs_en = 0, final_dqs_en; 2405 int32_t dq_margin, dqs_margin; 2406 int i, min_index; 2407 int ret; 2408 2409 debug("%s:%d: %u %u", __func__, __LINE__, rw_group, test_bgn); 2410 2411 start_dqs = readl(addr); 2412 if (iocfg->shift_dqs_en_when_shift_dqs) 2413 start_dqs_en = readl(addr - iocfg->dqs_en_delay_offset); 2414 2415 /* set the left and right edge of each bit to an illegal value */ 2416 /* use (iocfg->io_in_delay_max + 1) as an illegal value */ 2417 sticky_bit_chk = 0; 2418 for (i = 0; i < rwcfg->mem_dq_per_read_dqs; i++) { 2419 left_edge[i] = iocfg->io_in_delay_max + 1; 2420 right_edge[i] = iocfg->io_in_delay_max + 1; 2421 } 2422 2423 /* Search for the left edge of the window for each bit */ 2424 search_left_edge(0, rank_bgn, rw_group, rw_group, test_bgn, 2425 &sticky_bit_chk, 2426 left_edge, right_edge, use_read_test); 2427 2428 2429 /* Search for the right edge of the window for each bit */ 2430 ret = search_right_edge(0, rank_bgn, rw_group, rw_group, 2431 start_dqs, start_dqs_en, 2432 &sticky_bit_chk, 2433 left_edge, right_edge, use_read_test); 2434 if (ret) { 2435 /* 2436 * Restore delay chain settings before letting the loop 2437 * in rw_mgr_mem_calibrate_vfifo to retry different 2438 * dqs/ck relationships. 2439 */ 2440 scc_mgr_set_dqs_bus_in_delay(rw_group, start_dqs); 2441 if (iocfg->shift_dqs_en_when_shift_dqs) 2442 scc_mgr_set_dqs_en_delay(rw_group, start_dqs_en); 2443 2444 scc_mgr_load_dqs(rw_group); 2445 writel(0, &sdr_scc_mgr->update); 2446 2447 debug_cond(DLEVEL >= 1, 2448 "%s:%d vfifo_center: failed to find edge [%u]: %d %d", 2449 __func__, __LINE__, i, left_edge[i], right_edge[i]); 2450 if (use_read_test) { 2451 set_failing_group_stage(rw_group * 2452 rwcfg->mem_dq_per_read_dqs + i, 2453 CAL_STAGE_VFIFO, 2454 CAL_SUBSTAGE_VFIFO_CENTER); 2455 } else { 2456 set_failing_group_stage(rw_group * 2457 rwcfg->mem_dq_per_read_dqs + i, 2458 CAL_STAGE_VFIFO_AFTER_WRITES, 2459 CAL_SUBSTAGE_VFIFO_CENTER); 2460 } 2461 return -EIO; 2462 } 2463 2464 min_index = get_window_mid_index(0, left_edge, right_edge, &mid_min); 2465 2466 /* Determine the amount we can change DQS (which is -mid_min) */ 2467 orig_mid_min = mid_min; 2468 new_dqs = start_dqs - mid_min; 2469 if (new_dqs > iocfg->dqs_in_delay_max) 2470 new_dqs = iocfg->dqs_in_delay_max; 2471 else if (new_dqs < 0) 2472 new_dqs = 0; 2473 2474 mid_min = start_dqs - new_dqs; 2475 debug_cond(DLEVEL >= 1, "vfifo_center: new mid_min=%d new_dqs=%d\n", 2476 mid_min, new_dqs); 2477 2478 if (iocfg->shift_dqs_en_when_shift_dqs) { 2479 if (start_dqs_en - mid_min > iocfg->dqs_en_delay_max) 2480 mid_min += start_dqs_en - mid_min - 2481 iocfg->dqs_en_delay_max; 2482 else if (start_dqs_en - mid_min < 0) 2483 mid_min += start_dqs_en - mid_min; 2484 } 2485 new_dqs = start_dqs - mid_min; 2486 2487 debug_cond(DLEVEL >= 1, 2488 "vfifo_center: start_dqs=%d start_dqs_en=%d new_dqs=%d mid_min=%d\n", 2489 start_dqs, 2490 iocfg->shift_dqs_en_when_shift_dqs ? start_dqs_en : -1, 2491 new_dqs, mid_min); 2492 2493 /* Add delay to bring centre of all DQ windows to the same "level". */ 2494 center_dq_windows(0, left_edge, right_edge, mid_min, orig_mid_min, 2495 min_index, test_bgn, &dq_margin, &dqs_margin); 2496 2497 /* Move DQS-en */ 2498 if (iocfg->shift_dqs_en_when_shift_dqs) { 2499 final_dqs_en = start_dqs_en - mid_min; 2500 scc_mgr_set_dqs_en_delay(rw_group, final_dqs_en); 2501 scc_mgr_load_dqs(rw_group); 2502 } 2503 2504 /* Move DQS */ 2505 scc_mgr_set_dqs_bus_in_delay(rw_group, new_dqs); 2506 scc_mgr_load_dqs(rw_group); 2507 debug_cond(DLEVEL >= 2, 2508 "%s:%d vfifo_center: dq_margin=%d dqs_margin=%d", 2509 __func__, __LINE__, dq_margin, dqs_margin); 2510 2511 /* 2512 * Do not remove this line as it makes sure all of our decisions 2513 * have been applied. Apply the update bit. 2514 */ 2515 writel(0, &sdr_scc_mgr->update); 2516 2517 if ((dq_margin < 0) || (dqs_margin < 0)) 2518 return -EINVAL; 2519 2520 return 0; 2521 } 2522 2523 /** 2524 * rw_mgr_mem_calibrate_guaranteed_write() - Perform guaranteed write into the device 2525 * @rw_group: Read/Write Group 2526 * @phase: DQ/DQS phase 2527 * 2528 * Because initially no communication ca be reliably performed with the memory 2529 * device, the sequencer uses a guaranteed write mechanism to write data into 2530 * the memory device. 2531 */ 2532 static int rw_mgr_mem_calibrate_guaranteed_write(const u32 rw_group, 2533 const u32 phase) 2534 { 2535 int ret; 2536 2537 /* Set a particular DQ/DQS phase. */ 2538 scc_mgr_set_dqdqs_output_phase_all_ranks(rw_group, phase); 2539 2540 debug_cond(DLEVEL >= 1, "%s:%d guaranteed write: g=%u p=%u\n", 2541 __func__, __LINE__, rw_group, phase); 2542 2543 /* 2544 * Altera EMI_RM 2015.05.04 :: Figure 1-25 2545 * Load up the patterns used by read calibration using the 2546 * current DQDQS phase. 2547 */ 2548 rw_mgr_mem_calibrate_read_load_patterns(0, 1); 2549 2550 if (gbl->phy_debug_mode_flags & PHY_DEBUG_DISABLE_GUARANTEED_READ) 2551 return 0; 2552 2553 /* 2554 * Altera EMI_RM 2015.05.04 :: Figure 1-26 2555 * Back-to-Back reads of the patterns used for calibration. 2556 */ 2557 ret = rw_mgr_mem_calibrate_read_test_patterns(0, rw_group, 1); 2558 if (ret) 2559 debug_cond(DLEVEL >= 1, 2560 "%s:%d Guaranteed read test failed: g=%u p=%u\n", 2561 __func__, __LINE__, rw_group, phase); 2562 return ret; 2563 } 2564 2565 /** 2566 * rw_mgr_mem_calibrate_dqs_enable_calibration() - DQS Enable Calibration 2567 * @rw_group: Read/Write Group 2568 * @test_bgn: Rank at which the test begins 2569 * 2570 * DQS enable calibration ensures reliable capture of the DQ signal without 2571 * glitches on the DQS line. 2572 */ 2573 static int rw_mgr_mem_calibrate_dqs_enable_calibration(const u32 rw_group, 2574 const u32 test_bgn) 2575 { 2576 /* 2577 * Altera EMI_RM 2015.05.04 :: Figure 1-27 2578 * DQS and DQS Eanble Signal Relationships. 2579 */ 2580 2581 /* We start at zero, so have one less dq to devide among */ 2582 const u32 delay_step = iocfg->io_in_delay_max / 2583 (rwcfg->mem_dq_per_read_dqs - 1); 2584 int ret; 2585 u32 i, p, d, r; 2586 2587 debug("%s:%d (%u,%u)\n", __func__, __LINE__, rw_group, test_bgn); 2588 2589 /* Try different dq_in_delays since the DQ path is shorter than DQS. */ 2590 for (r = 0; r < rwcfg->mem_number_of_ranks; 2591 r += NUM_RANKS_PER_SHADOW_REG) { 2592 for (i = 0, p = test_bgn, d = 0; 2593 i < rwcfg->mem_dq_per_read_dqs; 2594 i++, p++, d += delay_step) { 2595 debug_cond(DLEVEL >= 1, 2596 "%s:%d: g=%u r=%u i=%u p=%u d=%u\n", 2597 __func__, __LINE__, rw_group, r, i, p, d); 2598 2599 scc_mgr_set_dq_in_delay(p, d); 2600 scc_mgr_load_dq(p); 2601 } 2602 2603 writel(0, &sdr_scc_mgr->update); 2604 } 2605 2606 /* 2607 * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different 2608 * dq_in_delay values 2609 */ 2610 ret = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(rw_group); 2611 2612 debug_cond(DLEVEL >= 1, 2613 "%s:%d: g=%u found=%u; Reseting delay chain to zero\n", 2614 __func__, __LINE__, rw_group, !ret); 2615 2616 for (r = 0; r < rwcfg->mem_number_of_ranks; 2617 r += NUM_RANKS_PER_SHADOW_REG) { 2618 scc_mgr_apply_group_dq_in_delay(test_bgn, 0); 2619 writel(0, &sdr_scc_mgr->update); 2620 } 2621 2622 return ret; 2623 } 2624 2625 /** 2626 * rw_mgr_mem_calibrate_dq_dqs_centering() - Centering DQ/DQS 2627 * @rw_group: Read/Write Group 2628 * @test_bgn: Rank at which the test begins 2629 * @use_read_test: Perform a read test 2630 * @update_fom: Update FOM 2631 * 2632 * The centerin DQ/DQS stage attempts to align DQ and DQS signals on reads 2633 * within a group. 2634 */ 2635 static int 2636 rw_mgr_mem_calibrate_dq_dqs_centering(const u32 rw_group, const u32 test_bgn, 2637 const int use_read_test, 2638 const int update_fom) 2639 2640 { 2641 int ret, grp_calibrated; 2642 u32 rank_bgn, sr; 2643 2644 /* 2645 * Altera EMI_RM 2015.05.04 :: Figure 1-28 2646 * Read per-bit deskew can be done on a per shadow register basis. 2647 */ 2648 grp_calibrated = 1; 2649 for (rank_bgn = 0, sr = 0; 2650 rank_bgn < rwcfg->mem_number_of_ranks; 2651 rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) { 2652 ret = rw_mgr_mem_calibrate_vfifo_center(rank_bgn, rw_group, 2653 test_bgn, 2654 use_read_test, 2655 update_fom); 2656 if (!ret) 2657 continue; 2658 2659 grp_calibrated = 0; 2660 } 2661 2662 if (!grp_calibrated) 2663 return -EIO; 2664 2665 return 0; 2666 } 2667 2668 /** 2669 * rw_mgr_mem_calibrate_vfifo() - Calibrate the read valid prediction FIFO 2670 * @rw_group: Read/Write Group 2671 * @test_bgn: Rank at which the test begins 2672 * 2673 * Stage 1: Calibrate the read valid prediction FIFO. 2674 * 2675 * This function implements UniPHY calibration Stage 1, as explained in 2676 * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages". 2677 * 2678 * - read valid prediction will consist of finding: 2679 * - DQS enable phase and DQS enable delay (DQS Enable Calibration) 2680 * - DQS input phase and DQS input delay (DQ/DQS Centering) 2681 * - we also do a per-bit deskew on the DQ lines. 2682 */ 2683 static int rw_mgr_mem_calibrate_vfifo(const u32 rw_group, const u32 test_bgn) 2684 { 2685 u32 p, d; 2686 u32 dtaps_per_ptap; 2687 u32 failed_substage; 2688 2689 int ret; 2690 2691 debug("%s:%d: %u %u\n", __func__, __LINE__, rw_group, test_bgn); 2692 2693 /* Update info for sims */ 2694 reg_file_set_group(rw_group); 2695 reg_file_set_stage(CAL_STAGE_VFIFO); 2696 reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ); 2697 2698 failed_substage = CAL_SUBSTAGE_GUARANTEED_READ; 2699 2700 /* USER Determine number of delay taps for each phase tap. */ 2701 dtaps_per_ptap = DIV_ROUND_UP(iocfg->delay_per_opa_tap, 2702 iocfg->delay_per_dqs_en_dchain_tap) - 1; 2703 2704 for (d = 0; d <= dtaps_per_ptap; d += 2) { 2705 /* 2706 * In RLDRAMX we may be messing the delay of pins in 2707 * the same write rw_group but outside of the current read 2708 * the rw_group, but that's ok because we haven't calibrated 2709 * output side yet. 2710 */ 2711 if (d > 0) { 2712 scc_mgr_apply_group_all_out_delay_add_all_ranks( 2713 rw_group, d); 2714 } 2715 2716 for (p = 0; p <= iocfg->dqdqs_out_phase_max; p++) { 2717 /* 1) Guaranteed Write */ 2718 ret = rw_mgr_mem_calibrate_guaranteed_write(rw_group, p); 2719 if (ret) 2720 break; 2721 2722 /* 2) DQS Enable Calibration */ 2723 ret = rw_mgr_mem_calibrate_dqs_enable_calibration(rw_group, 2724 test_bgn); 2725 if (ret) { 2726 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE; 2727 continue; 2728 } 2729 2730 /* 3) Centering DQ/DQS */ 2731 /* 2732 * If doing read after write calibration, do not update 2733 * FOM now. Do it then. 2734 */ 2735 ret = rw_mgr_mem_calibrate_dq_dqs_centering(rw_group, 2736 test_bgn, 1, 0); 2737 if (ret) { 2738 failed_substage = CAL_SUBSTAGE_VFIFO_CENTER; 2739 continue; 2740 } 2741 2742 /* All done. */ 2743 goto cal_done_ok; 2744 } 2745 } 2746 2747 /* Calibration Stage 1 failed. */ 2748 set_failing_group_stage(rw_group, CAL_STAGE_VFIFO, failed_substage); 2749 return 0; 2750 2751 /* Calibration Stage 1 completed OK. */ 2752 cal_done_ok: 2753 /* 2754 * Reset the delay chains back to zero if they have moved > 1 2755 * (check for > 1 because loop will increase d even when pass in 2756 * first case). 2757 */ 2758 if (d > 2) 2759 scc_mgr_zero_group(rw_group, 1); 2760 2761 return 1; 2762 } 2763 2764 /** 2765 * rw_mgr_mem_calibrate_vfifo_end() - DQ/DQS Centering. 2766 * @rw_group: Read/Write Group 2767 * @test_bgn: Rank at which the test begins 2768 * 2769 * Stage 3: DQ/DQS Centering. 2770 * 2771 * This function implements UniPHY calibration Stage 3, as explained in 2772 * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages". 2773 */ 2774 static int rw_mgr_mem_calibrate_vfifo_end(const u32 rw_group, 2775 const u32 test_bgn) 2776 { 2777 int ret; 2778 2779 debug("%s:%d %u %u", __func__, __LINE__, rw_group, test_bgn); 2780 2781 /* Update info for sims. */ 2782 reg_file_set_group(rw_group); 2783 reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES); 2784 reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); 2785 2786 ret = rw_mgr_mem_calibrate_dq_dqs_centering(rw_group, test_bgn, 0, 1); 2787 if (ret) 2788 set_failing_group_stage(rw_group, 2789 CAL_STAGE_VFIFO_AFTER_WRITES, 2790 CAL_SUBSTAGE_VFIFO_CENTER); 2791 return ret; 2792 } 2793 2794 /** 2795 * rw_mgr_mem_calibrate_lfifo() - Minimize latency 2796 * 2797 * Stage 4: Minimize latency. 2798 * 2799 * This function implements UniPHY calibration Stage 4, as explained in 2800 * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages". 2801 * Calibrate LFIFO to find smallest read latency. 2802 */ 2803 static u32 rw_mgr_mem_calibrate_lfifo(void) 2804 { 2805 int found_one = 0; 2806 2807 debug("%s:%d\n", __func__, __LINE__); 2808 2809 /* Update info for sims. */ 2810 reg_file_set_stage(CAL_STAGE_LFIFO); 2811 reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY); 2812 2813 /* Load up the patterns used by read calibration for all ranks */ 2814 rw_mgr_mem_calibrate_read_load_patterns(0, 1); 2815 2816 do { 2817 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 2818 debug_cond(DLEVEL >= 2, "%s:%d lfifo: read_lat=%u", 2819 __func__, __LINE__, gbl->curr_read_lat); 2820 2821 if (!rw_mgr_mem_calibrate_read_test_all_ranks(0, NUM_READ_TESTS, 2822 PASS_ALL_BITS, 1)) 2823 break; 2824 2825 found_one = 1; 2826 /* 2827 * Reduce read latency and see if things are 2828 * working correctly. 2829 */ 2830 gbl->curr_read_lat--; 2831 } while (gbl->curr_read_lat > 0); 2832 2833 /* Reset the fifos to get pointers to known state. */ 2834 writel(0, &phy_mgr_cmd->fifo_reset); 2835 2836 if (found_one) { 2837 /* Add a fudge factor to the read latency that was determined */ 2838 gbl->curr_read_lat += 2; 2839 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 2840 debug_cond(DLEVEL >= 2, 2841 "%s:%d lfifo: success: using read_lat=%u\n", 2842 __func__, __LINE__, gbl->curr_read_lat); 2843 } else { 2844 set_failing_group_stage(0xff, CAL_STAGE_LFIFO, 2845 CAL_SUBSTAGE_READ_LATENCY); 2846 2847 debug_cond(DLEVEL >= 2, 2848 "%s:%d lfifo: failed at initial read_lat=%u\n", 2849 __func__, __LINE__, gbl->curr_read_lat); 2850 } 2851 2852 return found_one; 2853 } 2854 2855 /** 2856 * search_window() - Search for the/part of the window with DM/DQS shift 2857 * @search_dm: If 1, search for the DM shift, if 0, search for DQS shift 2858 * @rank_bgn: Rank number 2859 * @write_group: Write Group 2860 * @bgn_curr: Current window begin 2861 * @end_curr: Current window end 2862 * @bgn_best: Current best window begin 2863 * @end_best: Current best window end 2864 * @win_best: Size of the best window 2865 * @new_dqs: New DQS value (only applicable if search_dm = 0). 2866 * 2867 * Search for the/part of the window with DM/DQS shift. 2868 */ 2869 static void search_window(const int search_dm, 2870 const u32 rank_bgn, const u32 write_group, 2871 int *bgn_curr, int *end_curr, int *bgn_best, 2872 int *end_best, int *win_best, int new_dqs) 2873 { 2874 u32 bit_chk; 2875 const int max = iocfg->io_out1_delay_max - new_dqs; 2876 int d, di; 2877 2878 /* Search for the/part of the window with DM/DQS shift. */ 2879 for (di = max; di >= 0; di -= DELTA_D) { 2880 if (search_dm) { 2881 d = di; 2882 scc_mgr_apply_group_dm_out1_delay(d); 2883 } else { 2884 /* For DQS, we go from 0...max */ 2885 d = max - di; 2886 /* 2887 * Note: This only shifts DQS, so are we limiting 2888 * ourselves to width of DQ unnecessarily. 2889 */ 2890 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, 2891 d + new_dqs); 2892 } 2893 2894 writel(0, &sdr_scc_mgr->update); 2895 2896 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, 2897 PASS_ALL_BITS, &bit_chk, 2898 0)) { 2899 /* Set current end of the window. */ 2900 *end_curr = search_dm ? -d : d; 2901 2902 /* 2903 * If a starting edge of our window has not been seen 2904 * this is our current start of the DM window. 2905 */ 2906 if (*bgn_curr == iocfg->io_out1_delay_max + 1) 2907 *bgn_curr = search_dm ? -d : d; 2908 2909 /* 2910 * If current window is bigger than best seen. 2911 * Set best seen to be current window. 2912 */ 2913 if ((*end_curr - *bgn_curr + 1) > *win_best) { 2914 *win_best = *end_curr - *bgn_curr + 1; 2915 *bgn_best = *bgn_curr; 2916 *end_best = *end_curr; 2917 } 2918 } else { 2919 /* We just saw a failing test. Reset temp edge. */ 2920 *bgn_curr = iocfg->io_out1_delay_max + 1; 2921 *end_curr = iocfg->io_out1_delay_max + 1; 2922 2923 /* Early exit is only applicable to DQS. */ 2924 if (search_dm) 2925 continue; 2926 2927 /* 2928 * Early exit optimization: if the remaining delay 2929 * chain space is less than already seen largest 2930 * window we can exit. 2931 */ 2932 if (*win_best - 1 > iocfg->io_out1_delay_max - new_dqs - d) 2933 break; 2934 } 2935 } 2936 } 2937 2938 /* 2939 * rw_mgr_mem_calibrate_writes_center() - Center all windows 2940 * @rank_bgn: Rank number 2941 * @write_group: Write group 2942 * @test_bgn: Rank at which the test begins 2943 * 2944 * Center all windows. Do per-bit-deskew to possibly increase size of 2945 * certain windows. 2946 */ 2947 static int 2948 rw_mgr_mem_calibrate_writes_center(const u32 rank_bgn, const u32 write_group, 2949 const u32 test_bgn) 2950 { 2951 int i; 2952 u32 sticky_bit_chk; 2953 u32 min_index; 2954 int left_edge[rwcfg->mem_dq_per_write_dqs]; 2955 int right_edge[rwcfg->mem_dq_per_write_dqs]; 2956 int mid; 2957 int mid_min, orig_mid_min; 2958 int new_dqs, start_dqs; 2959 int dq_margin, dqs_margin, dm_margin; 2960 int bgn_curr = iocfg->io_out1_delay_max + 1; 2961 int end_curr = iocfg->io_out1_delay_max + 1; 2962 int bgn_best = iocfg->io_out1_delay_max + 1; 2963 int end_best = iocfg->io_out1_delay_max + 1; 2964 int win_best = 0; 2965 2966 int ret; 2967 2968 debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn); 2969 2970 dm_margin = 0; 2971 2972 start_dqs = readl((SDR_PHYGRP_SCCGRP_ADDRESS | 2973 SCC_MGR_IO_OUT1_DELAY_OFFSET) + 2974 (rwcfg->mem_dq_per_write_dqs << 2)); 2975 2976 /* Per-bit deskew. */ 2977 2978 /* 2979 * Set the left and right edge of each bit to an illegal value. 2980 * Use (iocfg->io_out1_delay_max + 1) as an illegal value. 2981 */ 2982 sticky_bit_chk = 0; 2983 for (i = 0; i < rwcfg->mem_dq_per_write_dqs; i++) { 2984 left_edge[i] = iocfg->io_out1_delay_max + 1; 2985 right_edge[i] = iocfg->io_out1_delay_max + 1; 2986 } 2987 2988 /* Search for the left edge of the window for each bit. */ 2989 search_left_edge(1, rank_bgn, write_group, 0, test_bgn, 2990 &sticky_bit_chk, 2991 left_edge, right_edge, 0); 2992 2993 /* Search for the right edge of the window for each bit. */ 2994 ret = search_right_edge(1, rank_bgn, write_group, 0, 2995 start_dqs, 0, 2996 &sticky_bit_chk, 2997 left_edge, right_edge, 0); 2998 if (ret) { 2999 set_failing_group_stage(test_bgn + ret - 1, CAL_STAGE_WRITES, 3000 CAL_SUBSTAGE_WRITES_CENTER); 3001 return -EINVAL; 3002 } 3003 3004 min_index = get_window_mid_index(1, left_edge, right_edge, &mid_min); 3005 3006 /* Determine the amount we can change DQS (which is -mid_min). */ 3007 orig_mid_min = mid_min; 3008 new_dqs = start_dqs; 3009 mid_min = 0; 3010 debug_cond(DLEVEL >= 1, 3011 "%s:%d write_center: start_dqs=%d new_dqs=%d mid_min=%d\n", 3012 __func__, __LINE__, start_dqs, new_dqs, mid_min); 3013 3014 /* Add delay to bring centre of all DQ windows to the same "level". */ 3015 center_dq_windows(1, left_edge, right_edge, mid_min, orig_mid_min, 3016 min_index, 0, &dq_margin, &dqs_margin); 3017 3018 /* Move DQS */ 3019 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); 3020 writel(0, &sdr_scc_mgr->update); 3021 3022 /* Centre DM */ 3023 debug_cond(DLEVEL >= 2, "%s:%d write_center: DM\n", __func__, __LINE__); 3024 3025 /* 3026 * Set the left and right edge of each bit to an illegal value. 3027 * Use (iocfg->io_out1_delay_max + 1) as an illegal value. 3028 */ 3029 left_edge[0] = iocfg->io_out1_delay_max + 1; 3030 right_edge[0] = iocfg->io_out1_delay_max + 1; 3031 3032 /* Search for the/part of the window with DM shift. */ 3033 search_window(1, rank_bgn, write_group, &bgn_curr, &end_curr, 3034 &bgn_best, &end_best, &win_best, 0); 3035 3036 /* Reset DM delay chains to 0. */ 3037 scc_mgr_apply_group_dm_out1_delay(0); 3038 3039 /* 3040 * Check to see if the current window nudges up aganist 0 delay. 3041 * If so we need to continue the search by shifting DQS otherwise DQS 3042 * search begins as a new search. 3043 */ 3044 if (end_curr != 0) { 3045 bgn_curr = iocfg->io_out1_delay_max + 1; 3046 end_curr = iocfg->io_out1_delay_max + 1; 3047 } 3048 3049 /* Search for the/part of the window with DQS shifts. */ 3050 search_window(0, rank_bgn, write_group, &bgn_curr, &end_curr, 3051 &bgn_best, &end_best, &win_best, new_dqs); 3052 3053 /* Assign left and right edge for cal and reporting. */ 3054 left_edge[0] = -1 * bgn_best; 3055 right_edge[0] = end_best; 3056 3057 debug_cond(DLEVEL >= 2, "%s:%d dm_calib: left=%d right=%d\n", 3058 __func__, __LINE__, left_edge[0], right_edge[0]); 3059 3060 /* Move DQS (back to orig). */ 3061 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); 3062 3063 /* Move DM */ 3064 3065 /* Find middle of window for the DM bit. */ 3066 mid = (left_edge[0] - right_edge[0]) / 2; 3067 3068 /* Only move right, since we are not moving DQS/DQ. */ 3069 if (mid < 0) 3070 mid = 0; 3071 3072 /* dm_marign should fail if we never find a window. */ 3073 if (win_best == 0) 3074 dm_margin = -1; 3075 else 3076 dm_margin = left_edge[0] - mid; 3077 3078 scc_mgr_apply_group_dm_out1_delay(mid); 3079 writel(0, &sdr_scc_mgr->update); 3080 3081 debug_cond(DLEVEL >= 2, 3082 "%s:%d dm_calib: left=%d right=%d mid=%d dm_margin=%d\n", 3083 __func__, __LINE__, left_edge[0], right_edge[0], 3084 mid, dm_margin); 3085 /* Export values. */ 3086 gbl->fom_out += dq_margin + dqs_margin; 3087 3088 debug_cond(DLEVEL >= 2, 3089 "%s:%d write_center: dq_margin=%d dqs_margin=%d dm_margin=%d\n", 3090 __func__, __LINE__, dq_margin, dqs_margin, dm_margin); 3091 3092 /* 3093 * Do not remove this line as it makes sure all of our 3094 * decisions have been applied. 3095 */ 3096 writel(0, &sdr_scc_mgr->update); 3097 3098 if ((dq_margin < 0) || (dqs_margin < 0) || (dm_margin < 0)) 3099 return -EINVAL; 3100 3101 return 0; 3102 } 3103 3104 /** 3105 * rw_mgr_mem_calibrate_writes() - Write Calibration Part One 3106 * @rank_bgn: Rank number 3107 * @group: Read/Write Group 3108 * @test_bgn: Rank at which the test begins 3109 * 3110 * Stage 2: Write Calibration Part One. 3111 * 3112 * This function implements UniPHY calibration Stage 2, as explained in 3113 * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages". 3114 */ 3115 static int rw_mgr_mem_calibrate_writes(const u32 rank_bgn, const u32 group, 3116 const u32 test_bgn) 3117 { 3118 int ret; 3119 3120 /* Update info for sims */ 3121 debug("%s:%d %u %u\n", __func__, __LINE__, group, test_bgn); 3122 3123 reg_file_set_group(group); 3124 reg_file_set_stage(CAL_STAGE_WRITES); 3125 reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER); 3126 3127 ret = rw_mgr_mem_calibrate_writes_center(rank_bgn, group, test_bgn); 3128 if (ret) 3129 set_failing_group_stage(group, CAL_STAGE_WRITES, 3130 CAL_SUBSTAGE_WRITES_CENTER); 3131 3132 return ret; 3133 } 3134 3135 /** 3136 * mem_precharge_and_activate() - Precharge all banks and activate 3137 * 3138 * Precharge all banks and activate row 0 in bank "000..." and bank "111...". 3139 */ 3140 static void mem_precharge_and_activate(void) 3141 { 3142 int r; 3143 3144 for (r = 0; r < rwcfg->mem_number_of_ranks; r++) { 3145 /* Set rank. */ 3146 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 3147 3148 /* Precharge all banks. */ 3149 writel(rwcfg->precharge_all, SDR_PHYGRP_RWMGRGRP_ADDRESS | 3150 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 3151 3152 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0); 3153 writel(rwcfg->activate_0_and_1_wait1, 3154 &sdr_rw_load_jump_mgr_regs->load_jump_add0); 3155 3156 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1); 3157 writel(rwcfg->activate_0_and_1_wait2, 3158 &sdr_rw_load_jump_mgr_regs->load_jump_add1); 3159 3160 /* Activate rows. */ 3161 writel(rwcfg->activate_0_and_1, SDR_PHYGRP_RWMGRGRP_ADDRESS | 3162 RW_MGR_RUN_SINGLE_GROUP_OFFSET); 3163 } 3164 } 3165 3166 /** 3167 * mem_init_latency() - Configure memory RLAT and WLAT settings 3168 * 3169 * Configure memory RLAT and WLAT parameters. 3170 */ 3171 static void mem_init_latency(void) 3172 { 3173 /* 3174 * For AV/CV, LFIFO is hardened and always runs at full rate 3175 * so max latency in AFI clocks, used here, is correspondingly 3176 * smaller. 3177 */ 3178 const u32 max_latency = (1 << misccfg->max_latency_count_width) - 1; 3179 u32 rlat, wlat; 3180 3181 debug("%s:%d\n", __func__, __LINE__); 3182 3183 /* 3184 * Read in write latency. 3185 * WL for Hard PHY does not include additive latency. 3186 */ 3187 wlat = readl(&data_mgr->t_wl_add); 3188 wlat += readl(&data_mgr->mem_t_add); 3189 3190 gbl->rw_wl_nop_cycles = wlat - 1; 3191 3192 /* Read in readl latency. */ 3193 rlat = readl(&data_mgr->t_rl_add); 3194 3195 /* Set a pretty high read latency initially. */ 3196 gbl->curr_read_lat = rlat + 16; 3197 if (gbl->curr_read_lat > max_latency) 3198 gbl->curr_read_lat = max_latency; 3199 3200 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 3201 3202 /* Advertise write latency. */ 3203 writel(wlat, &phy_mgr_cfg->afi_wlat); 3204 } 3205 3206 /** 3207 * @mem_skip_calibrate() - Set VFIFO and LFIFO to instant-on settings 3208 * 3209 * Set VFIFO and LFIFO to instant-on settings in skip calibration mode. 3210 */ 3211 static void mem_skip_calibrate(void) 3212 { 3213 u32 vfifo_offset; 3214 u32 i, j, r; 3215 3216 debug("%s:%d\n", __func__, __LINE__); 3217 /* Need to update every shadow register set used by the interface */ 3218 for (r = 0; r < rwcfg->mem_number_of_ranks; 3219 r += NUM_RANKS_PER_SHADOW_REG) { 3220 /* 3221 * Set output phase alignment settings appropriate for 3222 * skip calibration. 3223 */ 3224 for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) { 3225 scc_mgr_set_dqs_en_phase(i, 0); 3226 if (iocfg->dll_chain_length == 6) 3227 scc_mgr_set_dqdqs_output_phase(i, 6); 3228 else 3229 scc_mgr_set_dqdqs_output_phase(i, 7); 3230 /* 3231 * Case:33398 3232 * 3233 * Write data arrives to the I/O two cycles before write 3234 * latency is reached (720 deg). 3235 * -> due to bit-slip in a/c bus 3236 * -> to allow board skew where dqs is longer than ck 3237 * -> how often can this happen!? 3238 * -> can claim back some ptaps for high freq 3239 * support if we can relax this, but i digress... 3240 * 3241 * The write_clk leads mem_ck by 90 deg 3242 * The minimum ptap of the OPA is 180 deg 3243 * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay 3244 * The write_clk is always delayed by 2 ptaps 3245 * 3246 * Hence, to make DQS aligned to CK, we need to delay 3247 * DQS by: 3248 * (720 - 90 - 180 - 2) * 3249 * (360 / iocfg->dll_chain_length) 3250 * 3251 * Dividing the above by (360 / iocfg->dll_chain_length) 3252 * gives us the number of ptaps, which simplies to: 3253 * 3254 * (1.25 * iocfg->dll_chain_length - 2) 3255 */ 3256 scc_mgr_set_dqdqs_output_phase(i, 3257 ((125 * iocfg->dll_chain_length) / 100) - 2); 3258 } 3259 writel(0xff, &sdr_scc_mgr->dqs_ena); 3260 writel(0xff, &sdr_scc_mgr->dqs_io_ena); 3261 3262 for (i = 0; i < rwcfg->mem_if_write_dqs_width; i++) { 3263 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS | 3264 SCC_MGR_GROUP_COUNTER_OFFSET); 3265 } 3266 writel(0xff, &sdr_scc_mgr->dq_ena); 3267 writel(0xff, &sdr_scc_mgr->dm_ena); 3268 writel(0, &sdr_scc_mgr->update); 3269 } 3270 3271 /* Compensate for simulation model behaviour */ 3272 for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) { 3273 scc_mgr_set_dqs_bus_in_delay(i, 10); 3274 scc_mgr_load_dqs(i); 3275 } 3276 writel(0, &sdr_scc_mgr->update); 3277 3278 /* 3279 * ArriaV has hard FIFOs that can only be initialized by incrementing 3280 * in sequencer. 3281 */ 3282 vfifo_offset = misccfg->calib_vfifo_offset; 3283 for (j = 0; j < vfifo_offset; j++) 3284 writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy); 3285 writel(0, &phy_mgr_cmd->fifo_reset); 3286 3287 /* 3288 * For Arria V and Cyclone V with hard LFIFO, we get the skip-cal 3289 * setting from generation-time constant. 3290 */ 3291 gbl->curr_read_lat = misccfg->calib_lfifo_offset; 3292 writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat); 3293 } 3294 3295 /** 3296 * mem_calibrate() - Memory calibration entry point. 3297 * 3298 * Perform memory calibration. 3299 */ 3300 static u32 mem_calibrate(void) 3301 { 3302 u32 i; 3303 u32 rank_bgn, sr; 3304 u32 write_group, write_test_bgn; 3305 u32 read_group, read_test_bgn; 3306 u32 run_groups, current_run; 3307 u32 failing_groups = 0; 3308 u32 group_failed = 0; 3309 3310 const u32 rwdqs_ratio = rwcfg->mem_if_read_dqs_width / 3311 rwcfg->mem_if_write_dqs_width; 3312 3313 debug("%s:%d\n", __func__, __LINE__); 3314 3315 /* Initialize the data settings */ 3316 gbl->error_substage = CAL_SUBSTAGE_NIL; 3317 gbl->error_stage = CAL_STAGE_NIL; 3318 gbl->error_group = 0xff; 3319 gbl->fom_in = 0; 3320 gbl->fom_out = 0; 3321 3322 /* Initialize WLAT and RLAT. */ 3323 mem_init_latency(); 3324 3325 /* Initialize bit slips. */ 3326 mem_precharge_and_activate(); 3327 3328 for (i = 0; i < rwcfg->mem_if_read_dqs_width; i++) { 3329 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS | 3330 SCC_MGR_GROUP_COUNTER_OFFSET); 3331 /* Only needed once to set all groups, pins, DQ, DQS, DM. */ 3332 if (i == 0) 3333 scc_mgr_set_hhp_extras(); 3334 3335 scc_set_bypass_mode(i); 3336 } 3337 3338 /* Calibration is skipped. */ 3339 if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) { 3340 /* 3341 * Set VFIFO and LFIFO to instant-on settings in skip 3342 * calibration mode. 3343 */ 3344 mem_skip_calibrate(); 3345 3346 /* 3347 * Do not remove this line as it makes sure all of our 3348 * decisions have been applied. 3349 */ 3350 writel(0, &sdr_scc_mgr->update); 3351 return 1; 3352 } 3353 3354 /* Calibration is not skipped. */ 3355 for (i = 0; i < NUM_CALIB_REPEAT; i++) { 3356 /* 3357 * Zero all delay chain/phase settings for all 3358 * groups and all shadow register sets. 3359 */ 3360 scc_mgr_zero_all(); 3361 3362 run_groups = ~0; 3363 3364 for (write_group = 0, write_test_bgn = 0; write_group 3365 < rwcfg->mem_if_write_dqs_width; write_group++, 3366 write_test_bgn += rwcfg->mem_dq_per_write_dqs) { 3367 /* Initialize the group failure */ 3368 group_failed = 0; 3369 3370 current_run = run_groups & ((1 << 3371 RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1); 3372 run_groups = run_groups >> 3373 RW_MGR_NUM_DQS_PER_WRITE_GROUP; 3374 3375 if (current_run == 0) 3376 continue; 3377 3378 writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS | 3379 SCC_MGR_GROUP_COUNTER_OFFSET); 3380 scc_mgr_zero_group(write_group, 0); 3381 3382 for (read_group = write_group * rwdqs_ratio, 3383 read_test_bgn = 0; 3384 read_group < (write_group + 1) * rwdqs_ratio; 3385 read_group++, 3386 read_test_bgn += rwcfg->mem_dq_per_read_dqs) { 3387 if (STATIC_CALIB_STEPS & CALIB_SKIP_VFIFO) 3388 continue; 3389 3390 /* Calibrate the VFIFO */ 3391 if (rw_mgr_mem_calibrate_vfifo(read_group, 3392 read_test_bgn)) 3393 continue; 3394 3395 if (!(gbl->phy_debug_mode_flags & 3396 PHY_DEBUG_SWEEP_ALL_GROUPS)) 3397 return 0; 3398 3399 /* The group failed, we're done. */ 3400 goto grp_failed; 3401 } 3402 3403 /* Calibrate the output side */ 3404 for (rank_bgn = 0, sr = 0; 3405 rank_bgn < rwcfg->mem_number_of_ranks; 3406 rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) { 3407 if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) 3408 continue; 3409 3410 /* Not needed in quick mode! */ 3411 if (STATIC_CALIB_STEPS & 3412 CALIB_SKIP_DELAY_SWEEPS) 3413 continue; 3414 3415 /* Calibrate WRITEs */ 3416 if (!rw_mgr_mem_calibrate_writes(rank_bgn, 3417 write_group, 3418 write_test_bgn)) 3419 continue; 3420 3421 group_failed = 1; 3422 if (!(gbl->phy_debug_mode_flags & 3423 PHY_DEBUG_SWEEP_ALL_GROUPS)) 3424 return 0; 3425 } 3426 3427 /* Some group failed, we're done. */ 3428 if (group_failed) 3429 goto grp_failed; 3430 3431 for (read_group = write_group * rwdqs_ratio, 3432 read_test_bgn = 0; 3433 read_group < (write_group + 1) * rwdqs_ratio; 3434 read_group++, 3435 read_test_bgn += rwcfg->mem_dq_per_read_dqs) { 3436 if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) 3437 continue; 3438 3439 if (!rw_mgr_mem_calibrate_vfifo_end(read_group, 3440 read_test_bgn)) 3441 continue; 3442 3443 if (!(gbl->phy_debug_mode_flags & 3444 PHY_DEBUG_SWEEP_ALL_GROUPS)) 3445 return 0; 3446 3447 /* The group failed, we're done. */ 3448 goto grp_failed; 3449 } 3450 3451 /* No group failed, continue as usual. */ 3452 continue; 3453 3454 grp_failed: /* A group failed, increment the counter. */ 3455 failing_groups++; 3456 } 3457 3458 /* 3459 * USER If there are any failing groups then report 3460 * the failure. 3461 */ 3462 if (failing_groups != 0) 3463 return 0; 3464 3465 if (STATIC_CALIB_STEPS & CALIB_SKIP_LFIFO) 3466 continue; 3467 3468 /* Calibrate the LFIFO */ 3469 if (!rw_mgr_mem_calibrate_lfifo()) 3470 return 0; 3471 } 3472 3473 /* 3474 * Do not remove this line as it makes sure all of our decisions 3475 * have been applied. 3476 */ 3477 writel(0, &sdr_scc_mgr->update); 3478 return 1; 3479 } 3480 3481 /** 3482 * run_mem_calibrate() - Perform memory calibration 3483 * 3484 * This function triggers the entire memory calibration procedure. 3485 */ 3486 static int run_mem_calibrate(void) 3487 { 3488 int pass; 3489 u32 ctrl_cfg; 3490 3491 debug("%s:%d\n", __func__, __LINE__); 3492 3493 /* Reset pass/fail status shown on afi_cal_success/fail */ 3494 writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status); 3495 3496 /* Stop tracking manager. */ 3497 ctrl_cfg = readl(&sdr_ctrl->ctrl_cfg); 3498 writel(ctrl_cfg & ~SDR_CTRLGRP_CTRLCFG_DQSTRKEN_MASK, 3499 &sdr_ctrl->ctrl_cfg); 3500 3501 phy_mgr_initialize(); 3502 rw_mgr_mem_initialize(); 3503 3504 /* Perform the actual memory calibration. */ 3505 pass = mem_calibrate(); 3506 3507 mem_precharge_and_activate(); 3508 writel(0, &phy_mgr_cmd->fifo_reset); 3509 3510 /* Handoff. */ 3511 rw_mgr_mem_handoff(); 3512 /* 3513 * In Hard PHY this is a 2-bit control: 3514 * 0: AFI Mux Select 3515 * 1: DDIO Mux Select 3516 */ 3517 writel(0x2, &phy_mgr_cfg->mux_sel); 3518 3519 /* Start tracking manager. */ 3520 writel(ctrl_cfg, &sdr_ctrl->ctrl_cfg); 3521 3522 return pass; 3523 } 3524 3525 /** 3526 * debug_mem_calibrate() - Report result of memory calibration 3527 * @pass: Value indicating whether calibration passed or failed 3528 * 3529 * This function reports the results of the memory calibration 3530 * and writes debug information into the register file. 3531 */ 3532 static void debug_mem_calibrate(int pass) 3533 { 3534 u32 debug_info; 3535 3536 if (pass) { 3537 printf("%s: CALIBRATION PASSED\n", __FILE__); 3538 3539 gbl->fom_in /= 2; 3540 gbl->fom_out /= 2; 3541 3542 if (gbl->fom_in > 0xff) 3543 gbl->fom_in = 0xff; 3544 3545 if (gbl->fom_out > 0xff) 3546 gbl->fom_out = 0xff; 3547 3548 /* Update the FOM in the register file */ 3549 debug_info = gbl->fom_in; 3550 debug_info |= gbl->fom_out << 8; 3551 writel(debug_info, &sdr_reg_file->fom); 3552 3553 writel(debug_info, &phy_mgr_cfg->cal_debug_info); 3554 writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status); 3555 } else { 3556 printf("%s: CALIBRATION FAILED\n", __FILE__); 3557 3558 debug_info = gbl->error_stage; 3559 debug_info |= gbl->error_substage << 8; 3560 debug_info |= gbl->error_group << 16; 3561 3562 writel(debug_info, &sdr_reg_file->failing_stage); 3563 writel(debug_info, &phy_mgr_cfg->cal_debug_info); 3564 writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status); 3565 3566 /* Update the failing group/stage in the register file */ 3567 debug_info = gbl->error_stage; 3568 debug_info |= gbl->error_substage << 8; 3569 debug_info |= gbl->error_group << 16; 3570 writel(debug_info, &sdr_reg_file->failing_stage); 3571 } 3572 3573 printf("%s: Calibration complete\n", __FILE__); 3574 } 3575 3576 /** 3577 * hc_initialize_rom_data() - Initialize ROM data 3578 * 3579 * Initialize ROM data. 3580 */ 3581 static void hc_initialize_rom_data(void) 3582 { 3583 unsigned int nelem = 0; 3584 const u32 *rom_init; 3585 u32 i, addr; 3586 3587 socfpga_get_seq_inst_init(&rom_init, &nelem); 3588 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET; 3589 for (i = 0; i < nelem; i++) 3590 writel(rom_init[i], addr + (i << 2)); 3591 3592 socfpga_get_seq_ac_init(&rom_init, &nelem); 3593 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET; 3594 for (i = 0; i < nelem; i++) 3595 writel(rom_init[i], addr + (i << 2)); 3596 } 3597 3598 /** 3599 * initialize_reg_file() - Initialize SDR register file 3600 * 3601 * Initialize SDR register file. 3602 */ 3603 static void initialize_reg_file(void) 3604 { 3605 /* Initialize the register file with the correct data */ 3606 writel(misccfg->reg_file_init_seq_signature, &sdr_reg_file->signature); 3607 writel(0, &sdr_reg_file->debug_data_addr); 3608 writel(0, &sdr_reg_file->cur_stage); 3609 writel(0, &sdr_reg_file->fom); 3610 writel(0, &sdr_reg_file->failing_stage); 3611 writel(0, &sdr_reg_file->debug1); 3612 writel(0, &sdr_reg_file->debug2); 3613 } 3614 3615 /** 3616 * initialize_hps_phy() - Initialize HPS PHY 3617 * 3618 * Initialize HPS PHY. 3619 */ 3620 static void initialize_hps_phy(void) 3621 { 3622 u32 reg; 3623 /* 3624 * Tracking also gets configured here because it's in the 3625 * same register. 3626 */ 3627 u32 trk_sample_count = 7500; 3628 u32 trk_long_idle_sample_count = (10 << 16) | 100; 3629 /* 3630 * Format is number of outer loops in the 16 MSB, sample 3631 * count in 16 LSB. 3632 */ 3633 3634 reg = 0; 3635 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2); 3636 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1); 3637 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1); 3638 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1); 3639 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0); 3640 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1); 3641 /* 3642 * This field selects the intrinsic latency to RDATA_EN/FULL path. 3643 * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles. 3644 */ 3645 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0); 3646 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET( 3647 trk_sample_count); 3648 writel(reg, &sdr_ctrl->phy_ctrl0); 3649 3650 reg = 0; 3651 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET( 3652 trk_sample_count >> 3653 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH); 3654 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET( 3655 trk_long_idle_sample_count); 3656 writel(reg, &sdr_ctrl->phy_ctrl1); 3657 3658 reg = 0; 3659 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET( 3660 trk_long_idle_sample_count >> 3661 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH); 3662 writel(reg, &sdr_ctrl->phy_ctrl2); 3663 } 3664 3665 /** 3666 * initialize_tracking() - Initialize tracking 3667 * 3668 * Initialize the register file with usable initial data. 3669 */ 3670 static void initialize_tracking(void) 3671 { 3672 /* 3673 * Initialize the register file with the correct data. 3674 * Compute usable version of value in case we skip full 3675 * computation later. 3676 */ 3677 writel(DIV_ROUND_UP(iocfg->delay_per_opa_tap, 3678 iocfg->delay_per_dchain_tap) - 1, 3679 &sdr_reg_file->dtaps_per_ptap); 3680 3681 /* trk_sample_count */ 3682 writel(7500, &sdr_reg_file->trk_sample_count); 3683 3684 /* longidle outer loop [15:0] */ 3685 writel((10 << 16) | (100 << 0), &sdr_reg_file->trk_longidle); 3686 3687 /* 3688 * longidle sample count [31:24] 3689 * trfc, worst case of 933Mhz 4Gb [23:16] 3690 * trcd, worst case [15:8] 3691 * vfifo wait [7:0] 3692 */ 3693 writel((243 << 24) | (14 << 16) | (10 << 8) | (4 << 0), 3694 &sdr_reg_file->delays); 3695 3696 /* mux delay */ 3697 writel((rwcfg->idle << 24) | (rwcfg->activate_1 << 16) | 3698 (rwcfg->sgle_read << 8) | (rwcfg->precharge_all << 0), 3699 &sdr_reg_file->trk_rw_mgr_addr); 3700 3701 writel(rwcfg->mem_if_read_dqs_width, 3702 &sdr_reg_file->trk_read_dqs_width); 3703 3704 /* trefi [7:0] */ 3705 writel((rwcfg->refresh_all << 24) | (1000 << 0), 3706 &sdr_reg_file->trk_rfsh); 3707 } 3708 3709 int sdram_calibration_full(void) 3710 { 3711 struct param_type my_param; 3712 struct gbl_type my_gbl; 3713 u32 pass; 3714 3715 memset(&my_param, 0, sizeof(my_param)); 3716 memset(&my_gbl, 0, sizeof(my_gbl)); 3717 3718 param = &my_param; 3719 gbl = &my_gbl; 3720 3721 rwcfg = socfpga_get_sdram_rwmgr_config(); 3722 iocfg = socfpga_get_sdram_io_config(); 3723 misccfg = socfpga_get_sdram_misc_config(); 3724 3725 /* Set the calibration enabled by default */ 3726 gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT; 3727 /* 3728 * Only sweep all groups (regardless of fail state) by default 3729 * Set enabled read test by default. 3730 */ 3731 #if DISABLE_GUARANTEED_READ 3732 gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ; 3733 #endif 3734 /* Initialize the register file */ 3735 initialize_reg_file(); 3736 3737 /* Initialize any PHY CSR */ 3738 initialize_hps_phy(); 3739 3740 scc_mgr_initialize(); 3741 3742 initialize_tracking(); 3743 3744 printf("%s: Preparing to start memory calibration\n", __FILE__); 3745 3746 debug("%s:%d\n", __func__, __LINE__); 3747 debug_cond(DLEVEL >= 1, 3748 "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ", 3749 rwcfg->mem_number_of_ranks, rwcfg->mem_number_of_cs_per_dimm, 3750 rwcfg->mem_dq_per_read_dqs, rwcfg->mem_dq_per_write_dqs, 3751 rwcfg->mem_virtual_groups_per_read_dqs, 3752 rwcfg->mem_virtual_groups_per_write_dqs); 3753 debug_cond(DLEVEL >= 1, 3754 "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ", 3755 rwcfg->mem_if_read_dqs_width, rwcfg->mem_if_write_dqs_width, 3756 rwcfg->mem_data_width, rwcfg->mem_data_mask_width, 3757 iocfg->delay_per_opa_tap, iocfg->delay_per_dchain_tap); 3758 debug_cond(DLEVEL >= 1, "dtap_dqsen_delay=%u, dll=%u", 3759 iocfg->delay_per_dqs_en_dchain_tap, iocfg->dll_chain_length); 3760 debug_cond(DLEVEL >= 1, 3761 "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ", 3762 iocfg->dqs_en_phase_max, iocfg->dqdqs_out_phase_max, 3763 iocfg->dqs_en_delay_max, iocfg->dqs_in_delay_max); 3764 debug_cond(DLEVEL >= 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ", 3765 iocfg->io_in_delay_max, iocfg->io_out1_delay_max, 3766 iocfg->io_out2_delay_max); 3767 debug_cond(DLEVEL >= 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n", 3768 iocfg->dqs_in_reserve, iocfg->dqs_out_reserve); 3769 3770 hc_initialize_rom_data(); 3771 3772 /* update info for sims */ 3773 reg_file_set_stage(CAL_STAGE_NIL); 3774 reg_file_set_group(0); 3775 3776 /* 3777 * Load global needed for those actions that require 3778 * some dynamic calibration support. 3779 */ 3780 dyn_calib_steps = STATIC_CALIB_STEPS; 3781 /* 3782 * Load global to allow dynamic selection of delay loop settings 3783 * based on calibration mode. 3784 */ 3785 if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS)) 3786 skip_delay_mask = 0xff; 3787 else 3788 skip_delay_mask = 0x0; 3789 3790 pass = run_mem_calibrate(); 3791 debug_mem_calibrate(pass); 3792 return pass; 3793 } 3794