1 /* 2 * Copyright Altera Corporation (C) 2012-2015 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7 #include <common.h> 8 #include <asm/io.h> 9 #include <asm/arch/sdram.h> 10 #include "sequencer.h" 11 #include "sequencer_auto.h" 12 #include "sequencer_auto_ac_init.h" 13 #include "sequencer_auto_inst_init.h" 14 #include "sequencer_defines.h" 15 16 static void scc_mgr_load_dqs_for_write_group(uint32_t write_group); 17 18 static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs = 19 (struct socfpga_sdr_rw_load_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800); 20 21 static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs = 22 (struct socfpga_sdr_rw_load_jump_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00); 23 24 static struct socfpga_sdr_reg_file *sdr_reg_file = 25 (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS; 26 27 static struct socfpga_sdr_scc_mgr *sdr_scc_mgr = 28 (struct socfpga_sdr_scc_mgr *)(SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00); 29 30 static struct socfpga_phy_mgr_cmd *phy_mgr_cmd = 31 (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS; 32 33 static struct socfpga_phy_mgr_cfg *phy_mgr_cfg = 34 (struct socfpga_phy_mgr_cfg *)(SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40); 35 36 static struct socfpga_data_mgr *data_mgr = 37 (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS; 38 39 #define DELTA_D 1 40 41 /* 42 * In order to reduce ROM size, most of the selectable calibration steps are 43 * decided at compile time based on the user's calibration mode selection, 44 * as captured by the STATIC_CALIB_STEPS selection below. 45 * 46 * However, to support simulation-time selection of fast simulation mode, where 47 * we skip everything except the bare minimum, we need a few of the steps to 48 * be dynamic. In those cases, we either use the DYNAMIC_CALIB_STEPS for the 49 * check, which is based on the rtl-supplied value, or we dynamically compute 50 * the value to use based on the dynamically-chosen calibration mode 51 */ 52 53 #define DLEVEL 0 54 #define STATIC_IN_RTL_SIM 0 55 #define STATIC_SKIP_DELAY_LOOPS 0 56 57 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \ 58 STATIC_SKIP_DELAY_LOOPS) 59 60 /* calibration steps requested by the rtl */ 61 uint16_t dyn_calib_steps; 62 63 /* 64 * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option 65 * instead of static, we use boolean logic to select between 66 * non-skip and skip values 67 * 68 * The mask is set to include all bits when not-skipping, but is 69 * zero when skipping 70 */ 71 72 uint16_t skip_delay_mask; /* mask off bits when skipping/not-skipping */ 73 74 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \ 75 ((non_skip_value) & skip_delay_mask) 76 77 struct gbl_type *gbl; 78 struct param_type *param; 79 uint32_t curr_shadow_reg; 80 81 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, 82 uint32_t write_group, uint32_t use_dm, 83 uint32_t all_correct, uint32_t *bit_chk, uint32_t all_ranks); 84 85 static void set_failing_group_stage(uint32_t group, uint32_t stage, 86 uint32_t substage) 87 { 88 /* 89 * Only set the global stage if there was not been any other 90 * failing group 91 */ 92 if (gbl->error_stage == CAL_STAGE_NIL) { 93 gbl->error_substage = substage; 94 gbl->error_stage = stage; 95 gbl->error_group = group; 96 } 97 } 98 99 static void reg_file_set_group(uint32_t set_group) 100 { 101 u32 addr = (u32)&sdr_reg_file->cur_stage; 102 103 /* Read the current group and stage */ 104 uint32_t cur_stage_group = readl(SOCFPGA_SDR_ADDRESS + addr); 105 106 /* Clear the group */ 107 cur_stage_group &= 0x0000FFFF; 108 109 /* Set the group */ 110 cur_stage_group |= (set_group << 16); 111 112 /* Write the data back */ 113 writel(cur_stage_group, SOCFPGA_SDR_ADDRESS + addr); 114 } 115 116 static void reg_file_set_stage(uint32_t set_stage) 117 { 118 u32 addr = (u32)&sdr_reg_file->cur_stage; 119 120 /* Read the current group and stage */ 121 uint32_t cur_stage_group = readl(SOCFPGA_SDR_ADDRESS + addr); 122 123 /* Clear the stage and substage */ 124 cur_stage_group &= 0xFFFF0000; 125 126 /* Set the stage */ 127 cur_stage_group |= (set_stage & 0x000000FF); 128 129 /* Write the data back */ 130 writel(cur_stage_group, SOCFPGA_SDR_ADDRESS + addr); 131 } 132 133 static void reg_file_set_sub_stage(uint32_t set_sub_stage) 134 { 135 u32 addr = (u32)&sdr_reg_file->cur_stage; 136 137 /* Read the current group and stage */ 138 uint32_t cur_stage_group = readl(SOCFPGA_SDR_ADDRESS + addr); 139 140 /* Clear the substage */ 141 cur_stage_group &= 0xFFFF00FF; 142 143 /* Set the sub stage */ 144 cur_stage_group |= ((set_sub_stage << 8) & 0x0000FF00); 145 146 /* Write the data back */ 147 writel(cur_stage_group, SOCFPGA_SDR_ADDRESS + addr); 148 } 149 150 static void initialize(void) 151 { 152 u32 addr = (u32)&phy_mgr_cfg->mux_sel; 153 154 debug("%s:%d\n", __func__, __LINE__); 155 /* USER calibration has control over path to memory */ 156 /* 157 * In Hard PHY this is a 2-bit control: 158 * 0: AFI Mux Select 159 * 1: DDIO Mux Select 160 */ 161 writel(0x3, SOCFPGA_SDR_ADDRESS + addr); 162 163 /* USER memory clock is not stable we begin initialization */ 164 addr = (u32)&phy_mgr_cfg->reset_mem_stbl; 165 writel(0, SOCFPGA_SDR_ADDRESS + addr); 166 167 /* USER calibration status all set to zero */ 168 addr = (u32)&phy_mgr_cfg->cal_status; 169 writel(0, SOCFPGA_SDR_ADDRESS + addr); 170 171 addr = (u32)&phy_mgr_cfg->cal_debug_info; 172 writel(0, SOCFPGA_SDR_ADDRESS + addr); 173 174 if ((dyn_calib_steps & CALIB_SKIP_ALL) != CALIB_SKIP_ALL) { 175 param->read_correct_mask_vg = ((uint32_t)1 << 176 (RW_MGR_MEM_DQ_PER_READ_DQS / 177 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1; 178 param->write_correct_mask_vg = ((uint32_t)1 << 179 (RW_MGR_MEM_DQ_PER_READ_DQS / 180 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1; 181 param->read_correct_mask = ((uint32_t)1 << 182 RW_MGR_MEM_DQ_PER_READ_DQS) - 1; 183 param->write_correct_mask = ((uint32_t)1 << 184 RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1; 185 param->dm_correct_mask = ((uint32_t)1 << 186 (RW_MGR_MEM_DATA_WIDTH / RW_MGR_MEM_DATA_MASK_WIDTH)) 187 - 1; 188 } 189 } 190 191 static void set_rank_and_odt_mask(uint32_t rank, uint32_t odt_mode) 192 { 193 uint32_t odt_mask_0 = 0; 194 uint32_t odt_mask_1 = 0; 195 uint32_t cs_and_odt_mask; 196 uint32_t addr; 197 198 if (odt_mode == RW_MGR_ODT_MODE_READ_WRITE) { 199 if (RW_MGR_MEM_NUMBER_OF_RANKS == 1) { 200 /* 201 * 1 Rank 202 * Read: ODT = 0 203 * Write: ODT = 1 204 */ 205 odt_mask_0 = 0x0; 206 odt_mask_1 = 0x1; 207 } else if (RW_MGR_MEM_NUMBER_OF_RANKS == 2) { 208 /* 2 Ranks */ 209 if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) { 210 /* - Dual-Slot , Single-Rank 211 * (1 chip-select per DIMM) 212 * OR 213 * - RDIMM, 4 total CS (2 CS per DIMM) 214 * means 2 DIMM 215 * Since MEM_NUMBER_OF_RANKS is 2 they are 216 * both single rank 217 * with 2 CS each (special for RDIMM) 218 * Read: Turn on ODT on the opposite rank 219 * Write: Turn on ODT on all ranks 220 */ 221 odt_mask_0 = 0x3 & ~(1 << rank); 222 odt_mask_1 = 0x3; 223 } else { 224 /* 225 * USER - Single-Slot , Dual-rank DIMMs 226 * (2 chip-selects per DIMM) 227 * USER Read: Turn on ODT off on all ranks 228 * USER Write: Turn on ODT on active rank 229 */ 230 odt_mask_0 = 0x0; 231 odt_mask_1 = 0x3 & (1 << rank); 232 } 233 } else { 234 /* 4 Ranks 235 * Read: 236 * ----------+-----------------------+ 237 * | | 238 * | ODT | 239 * Read From +-----------------------+ 240 * Rank | 3 | 2 | 1 | 0 | 241 * ----------+-----+-----+-----+-----+ 242 * 0 | 0 | 1 | 0 | 0 | 243 * 1 | 1 | 0 | 0 | 0 | 244 * 2 | 0 | 0 | 0 | 1 | 245 * 3 | 0 | 0 | 1 | 0 | 246 * ----------+-----+-----+-----+-----+ 247 * 248 * Write: 249 * ----------+-----------------------+ 250 * | | 251 * | ODT | 252 * Write To +-----------------------+ 253 * Rank | 3 | 2 | 1 | 0 | 254 * ----------+-----+-----+-----+-----+ 255 * 0 | 0 | 1 | 0 | 1 | 256 * 1 | 1 | 0 | 1 | 0 | 257 * 2 | 0 | 1 | 0 | 1 | 258 * 3 | 1 | 0 | 1 | 0 | 259 * ----------+-----+-----+-----+-----+ 260 */ 261 switch (rank) { 262 case 0: 263 odt_mask_0 = 0x4; 264 odt_mask_1 = 0x5; 265 break; 266 case 1: 267 odt_mask_0 = 0x8; 268 odt_mask_1 = 0xA; 269 break; 270 case 2: 271 odt_mask_0 = 0x1; 272 odt_mask_1 = 0x5; 273 break; 274 case 3: 275 odt_mask_0 = 0x2; 276 odt_mask_1 = 0xA; 277 break; 278 } 279 } 280 } else { 281 odt_mask_0 = 0x0; 282 odt_mask_1 = 0x0; 283 } 284 285 cs_and_odt_mask = 286 (0xFF & ~(1 << rank)) | 287 ((0xFF & odt_mask_0) << 8) | 288 ((0xFF & odt_mask_1) << 16); 289 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_SET_CS_AND_ODT_MASK_OFFSET; 290 writel(cs_and_odt_mask, SOCFPGA_SDR_ADDRESS + addr); 291 } 292 293 static void scc_mgr_initialize(void) 294 { 295 u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_HHP_RFILE_OFFSET; 296 297 /* 298 * Clear register file for HPS 299 * 16 (2^4) is the size of the full register file in the scc mgr: 300 * RFILE_DEPTH = log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS + 301 * MEM_IF_READ_DQS_WIDTH - 1) + 1; 302 */ 303 uint32_t i; 304 for (i = 0; i < 16; i++) { 305 debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n", 306 __func__, __LINE__, i); 307 writel(0, SOCFPGA_SDR_ADDRESS + addr + (i << 2)); 308 } 309 } 310 311 static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, 312 uint32_t delay) 313 { 314 u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_IN_DELAY_OFFSET; 315 316 /* Load the setting in the SCC manager */ 317 writel(delay, SOCFPGA_SDR_ADDRESS + addr + (read_group << 2)); 318 } 319 320 static void scc_mgr_set_dqs_io_in_delay(uint32_t write_group, 321 uint32_t delay) 322 { 323 u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET; 324 325 writel(delay, SOCFPGA_SDR_ADDRESS + addr + (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2)); 326 } 327 328 static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase) 329 { 330 u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_EN_PHASE_OFFSET; 331 332 /* Load the setting in the SCC manager */ 333 writel(phase, SOCFPGA_SDR_ADDRESS + addr + (read_group << 2)); 334 } 335 336 static void scc_mgr_set_dqs_en_phase_all_ranks(uint32_t read_group, 337 uint32_t phase) 338 { 339 uint32_t r; 340 uint32_t update_scan_chains; 341 uint32_t addr; 342 343 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 344 r += NUM_RANKS_PER_SHADOW_REG) { 345 /* 346 * USER although the h/w doesn't support different phases per 347 * shadow register, for simplicity our scc manager modeling 348 * keeps different phase settings per shadow reg, and it's 349 * important for us to keep them in sync to match h/w. 350 * for efficiency, the scan chain update should occur only 351 * once to sr0. 352 */ 353 update_scan_chains = (r == 0) ? 1 : 0; 354 355 scc_mgr_set_dqs_en_phase(read_group, phase); 356 357 if (update_scan_chains) { 358 addr = (u32)&sdr_scc_mgr->dqs_ena; 359 writel(read_group, SOCFPGA_SDR_ADDRESS + addr); 360 361 addr = (u32)&sdr_scc_mgr->update; 362 writel(0, SOCFPGA_SDR_ADDRESS + addr); 363 } 364 } 365 } 366 367 static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, 368 uint32_t phase) 369 { 370 u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQDQS_OUT_PHASE_OFFSET; 371 372 /* Load the setting in the SCC manager */ 373 writel(phase, SOCFPGA_SDR_ADDRESS + addr + (write_group << 2)); 374 } 375 376 static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group, 377 uint32_t phase) 378 { 379 uint32_t r; 380 uint32_t update_scan_chains; 381 uint32_t addr; 382 383 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 384 r += NUM_RANKS_PER_SHADOW_REG) { 385 /* 386 * USER although the h/w doesn't support different phases per 387 * shadow register, for simplicity our scc manager modeling 388 * keeps different phase settings per shadow reg, and it's 389 * important for us to keep them in sync to match h/w. 390 * for efficiency, the scan chain update should occur only 391 * once to sr0. 392 */ 393 update_scan_chains = (r == 0) ? 1 : 0; 394 395 scc_mgr_set_dqdqs_output_phase(write_group, phase); 396 397 if (update_scan_chains) { 398 addr = (u32)&sdr_scc_mgr->dqs_ena; 399 writel(write_group, SOCFPGA_SDR_ADDRESS + addr); 400 401 addr = (u32)&sdr_scc_mgr->update; 402 writel(0, SOCFPGA_SDR_ADDRESS + addr); 403 } 404 } 405 } 406 407 static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay) 408 { 409 uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_EN_DELAY_OFFSET; 410 411 /* Load the setting in the SCC manager */ 412 writel(delay + IO_DQS_EN_DELAY_OFFSET, SOCFPGA_SDR_ADDRESS + addr + 413 (read_group << 2)); 414 } 415 416 static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group, 417 uint32_t delay) 418 { 419 uint32_t r; 420 uint32_t addr; 421 422 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 423 r += NUM_RANKS_PER_SHADOW_REG) { 424 scc_mgr_set_dqs_en_delay(read_group, delay); 425 426 addr = (u32)&sdr_scc_mgr->dqs_ena; 427 writel(read_group, SOCFPGA_SDR_ADDRESS + addr); 428 /* 429 * In shadow register mode, the T11 settings are stored in 430 * registers in the core, which are updated by the DQS_ENA 431 * signals. Not issuing the SCC_MGR_UPD command allows us to 432 * save lots of rank switching overhead, by calling 433 * select_shadow_regs_for_update with update_scan_chains 434 * set to 0. 435 */ 436 addr = (u32)&sdr_scc_mgr->update; 437 writel(0, SOCFPGA_SDR_ADDRESS + addr); 438 } 439 /* 440 * In shadow register mode, the T11 settings are stored in 441 * registers in the core, which are updated by the DQS_ENA 442 * signals. Not issuing the SCC_MGR_UPD command allows us to 443 * save lots of rank switching overhead, by calling 444 * select_shadow_regs_for_update with update_scan_chains 445 * set to 0. 446 */ 447 addr = (u32)&sdr_scc_mgr->update; 448 writel(0, SOCFPGA_SDR_ADDRESS + addr); 449 } 450 451 static void scc_mgr_set_oct_out1_delay(uint32_t write_group, uint32_t delay) 452 { 453 uint32_t read_group; 454 uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_OCT_OUT1_DELAY_OFFSET; 455 456 /* 457 * Load the setting in the SCC manager 458 * Although OCT affects only write data, the OCT delay is controlled 459 * by the DQS logic block which is instantiated once per read group. 460 * For protocols where a write group consists of multiple read groups, 461 * the setting must be set multiple times. 462 */ 463 for (read_group = write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH / 464 RW_MGR_MEM_IF_WRITE_DQS_WIDTH; 465 read_group < (write_group + 1) * RW_MGR_MEM_IF_READ_DQS_WIDTH / 466 RW_MGR_MEM_IF_WRITE_DQS_WIDTH; ++read_group) 467 writel(delay, SOCFPGA_SDR_ADDRESS + addr + (read_group << 2)); 468 } 469 470 static void scc_mgr_set_dq_out1_delay(uint32_t write_group, 471 uint32_t dq_in_group, uint32_t delay) 472 { 473 uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; 474 475 /* Load the setting in the SCC manager */ 476 writel(delay, SOCFPGA_SDR_ADDRESS + addr + (dq_in_group << 2)); 477 } 478 479 static void scc_mgr_set_dq_in_delay(uint32_t write_group, 480 uint32_t dq_in_group, uint32_t delay) 481 { 482 uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET; 483 484 /* Load the setting in the SCC manager */ 485 writel(delay, SOCFPGA_SDR_ADDRESS + addr + (dq_in_group << 2)); 486 } 487 488 static void scc_mgr_set_hhp_extras(void) 489 { 490 /* 491 * Load the fixed setting in the SCC manager 492 * bits: 0:0 = 1'b1 - dqs bypass 493 * bits: 1:1 = 1'b1 - dq bypass 494 * bits: 4:2 = 3'b001 - rfifo_mode 495 * bits: 6:5 = 2'b01 - rfifo clock_select 496 * bits: 7:7 = 1'b0 - separate gating from ungating setting 497 * bits: 8:8 = 1'b0 - separate OE from Output delay setting 498 */ 499 uint32_t value = (0<<8) | (0<<7) | (1<<5) | (1<<2) | (1<<1) | (1<<0); 500 uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_HHP_GLOBALS_OFFSET; 501 502 writel(value, SOCFPGA_SDR_ADDRESS + addr + SCC_MGR_HHP_EXTRAS_OFFSET); 503 } 504 505 static void scc_mgr_set_dqs_out1_delay(uint32_t write_group, 506 uint32_t delay) 507 { 508 uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; 509 510 /* Load the setting in the SCC manager */ 511 writel(delay, SOCFPGA_SDR_ADDRESS + addr + (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2)); 512 } 513 514 static void scc_mgr_set_dm_out1_delay(uint32_t write_group, 515 uint32_t dm, uint32_t delay) 516 { 517 uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; 518 519 /* Load the setting in the SCC manager */ 520 writel(delay, SOCFPGA_SDR_ADDRESS + addr + 521 ((RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + dm) << 2)); 522 } 523 524 /* 525 * USER Zero all DQS config 526 * TODO: maybe rename to scc_mgr_zero_dqs_config (or something) 527 */ 528 static void scc_mgr_zero_all(void) 529 { 530 uint32_t i, r; 531 uint32_t addr; 532 533 /* 534 * USER Zero all DQS config settings, across all groups and all 535 * shadow registers 536 */ 537 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += 538 NUM_RANKS_PER_SHADOW_REG) { 539 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 540 /* 541 * The phases actually don't exist on a per-rank basis, 542 * but there's no harm updating them several times, so 543 * let's keep the code simple. 544 */ 545 scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE); 546 scc_mgr_set_dqs_en_phase(i, 0); 547 scc_mgr_set_dqs_en_delay(i, 0); 548 } 549 550 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { 551 scc_mgr_set_dqdqs_output_phase(i, 0); 552 /* av/cv don't have out2 */ 553 scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE); 554 } 555 } 556 557 /* multicast to all DQS group enables */ 558 addr = (u32)&sdr_scc_mgr->dqs_ena; 559 writel(0xff, SOCFPGA_SDR_ADDRESS + addr); 560 561 addr = (u32)&sdr_scc_mgr->update; 562 writel(0, SOCFPGA_SDR_ADDRESS + addr); 563 } 564 565 static void scc_set_bypass_mode(uint32_t write_group, uint32_t mode) 566 { 567 uint32_t addr; 568 /* mode = 0 : Do NOT bypass - Half Rate Mode */ 569 /* mode = 1 : Bypass - Full Rate Mode */ 570 571 /* only need to set once for all groups, pins, dq, dqs, dm */ 572 if (write_group == 0) { 573 debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n", __func__, 574 __LINE__); 575 scc_mgr_set_hhp_extras(); 576 debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n", 577 __func__, __LINE__); 578 } 579 /* multicast to all DQ enables */ 580 addr = (u32)&sdr_scc_mgr->dq_ena; 581 writel(0xff, SOCFPGA_SDR_ADDRESS + addr); 582 583 addr = (u32)&sdr_scc_mgr->dm_ena; 584 writel(0xff, SOCFPGA_SDR_ADDRESS + addr); 585 586 /* update current DQS IO enable */ 587 addr = (u32)&sdr_scc_mgr->dqs_io_ena; 588 writel(0, SOCFPGA_SDR_ADDRESS + addr); 589 590 /* update the DQS logic */ 591 addr = (u32)&sdr_scc_mgr->dqs_ena; 592 writel(write_group, SOCFPGA_SDR_ADDRESS + addr); 593 594 /* hit update */ 595 addr = (u32)&sdr_scc_mgr->update; 596 writel(0, SOCFPGA_SDR_ADDRESS + addr); 597 } 598 599 static void scc_mgr_zero_group(uint32_t write_group, uint32_t test_begin, 600 int32_t out_only) 601 { 602 uint32_t i, r; 603 uint32_t addr; 604 605 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += 606 NUM_RANKS_PER_SHADOW_REG) { 607 /* Zero all DQ config settings */ 608 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 609 scc_mgr_set_dq_out1_delay(write_group, i, 0); 610 if (!out_only) 611 scc_mgr_set_dq_in_delay(write_group, i, 0); 612 } 613 614 /* multicast to all DQ enables */ 615 addr = (u32)&sdr_scc_mgr->dq_ena; 616 writel(0xff, SOCFPGA_SDR_ADDRESS + addr); 617 618 /* Zero all DM config settings */ 619 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { 620 scc_mgr_set_dm_out1_delay(write_group, i, 0); 621 } 622 623 /* multicast to all DM enables */ 624 addr = (u32)&sdr_scc_mgr->dm_ena; 625 writel(0xff, SOCFPGA_SDR_ADDRESS + addr); 626 627 /* zero all DQS io settings */ 628 if (!out_only) 629 scc_mgr_set_dqs_io_in_delay(write_group, 0); 630 /* av/cv don't have out2 */ 631 scc_mgr_set_dqs_out1_delay(write_group, IO_DQS_OUT_RESERVE); 632 scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE); 633 scc_mgr_load_dqs_for_write_group(write_group); 634 635 /* multicast to all DQS IO enables (only 1) */ 636 addr = (u32)&sdr_scc_mgr->dqs_io_ena; 637 writel(0, SOCFPGA_SDR_ADDRESS + addr); 638 639 /* hit update to zero everything */ 640 addr = (u32)&sdr_scc_mgr->update; 641 writel(0, SOCFPGA_SDR_ADDRESS + addr); 642 } 643 } 644 645 /* load up dqs config settings */ 646 static void scc_mgr_load_dqs(uint32_t dqs) 647 { 648 uint32_t addr = (u32)&sdr_scc_mgr->dqs_ena; 649 650 writel(dqs, SOCFPGA_SDR_ADDRESS + addr); 651 } 652 653 static void scc_mgr_load_dqs_for_write_group(uint32_t write_group) 654 { 655 uint32_t read_group; 656 uint32_t addr = (u32)&sdr_scc_mgr->dqs_ena; 657 /* 658 * Although OCT affects only write data, the OCT delay is controlled 659 * by the DQS logic block which is instantiated once per read group. 660 * For protocols where a write group consists of multiple read groups, 661 * the setting must be scanned multiple times. 662 */ 663 for (read_group = write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH / 664 RW_MGR_MEM_IF_WRITE_DQS_WIDTH; 665 read_group < (write_group + 1) * RW_MGR_MEM_IF_READ_DQS_WIDTH / 666 RW_MGR_MEM_IF_WRITE_DQS_WIDTH; ++read_group) 667 writel(read_group, SOCFPGA_SDR_ADDRESS + addr); 668 } 669 670 /* load up dqs io config settings */ 671 static void scc_mgr_load_dqs_io(void) 672 { 673 uint32_t addr = (u32)&sdr_scc_mgr->dqs_io_ena; 674 675 writel(0, SOCFPGA_SDR_ADDRESS + addr); 676 } 677 678 /* load up dq config settings */ 679 static void scc_mgr_load_dq(uint32_t dq_in_group) 680 { 681 uint32_t addr = (u32)&sdr_scc_mgr->dq_ena; 682 683 writel(dq_in_group, SOCFPGA_SDR_ADDRESS + addr); 684 } 685 686 /* load up dm config settings */ 687 static void scc_mgr_load_dm(uint32_t dm) 688 { 689 uint32_t addr = (u32)&sdr_scc_mgr->dm_ena; 690 691 writel(dm, SOCFPGA_SDR_ADDRESS + addr); 692 } 693 694 /* 695 * apply and load a particular input delay for the DQ pins in a group 696 * group_bgn is the index of the first dq pin (in the write group) 697 */ 698 static void scc_mgr_apply_group_dq_in_delay(uint32_t write_group, 699 uint32_t group_bgn, uint32_t delay) 700 { 701 uint32_t i, p; 702 703 for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { 704 scc_mgr_set_dq_in_delay(write_group, p, delay); 705 scc_mgr_load_dq(p); 706 } 707 } 708 709 /* apply and load a particular output delay for the DQ pins in a group */ 710 static void scc_mgr_apply_group_dq_out1_delay(uint32_t write_group, 711 uint32_t group_bgn, 712 uint32_t delay1) 713 { 714 uint32_t i, p; 715 716 for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { 717 scc_mgr_set_dq_out1_delay(write_group, i, delay1); 718 scc_mgr_load_dq(i); 719 } 720 } 721 722 /* apply and load a particular output delay for the DM pins in a group */ 723 static void scc_mgr_apply_group_dm_out1_delay(uint32_t write_group, 724 uint32_t delay1) 725 { 726 uint32_t i; 727 728 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { 729 scc_mgr_set_dm_out1_delay(write_group, i, delay1); 730 scc_mgr_load_dm(i); 731 } 732 } 733 734 735 /* apply and load delay on both DQS and OCT out1 */ 736 static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group, 737 uint32_t delay) 738 { 739 scc_mgr_set_dqs_out1_delay(write_group, delay); 740 scc_mgr_load_dqs_io(); 741 742 scc_mgr_set_oct_out1_delay(write_group, delay); 743 scc_mgr_load_dqs_for_write_group(write_group); 744 } 745 746 /* apply a delay to the entire output side: DQ, DM, DQS, OCT */ 747 static void scc_mgr_apply_group_all_out_delay_add(uint32_t write_group, 748 uint32_t group_bgn, 749 uint32_t delay) 750 { 751 uint32_t i, p, new_delay; 752 753 /* dq shift */ 754 for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { 755 new_delay = READ_SCC_DQ_OUT2_DELAY; 756 new_delay += delay; 757 758 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 759 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQ[%u,%u]:\ 760 %u > %lu => %lu", __func__, __LINE__, 761 write_group, group_bgn, delay, i, p, new_delay, 762 (long unsigned int)IO_IO_OUT2_DELAY_MAX, 763 (long unsigned int)IO_IO_OUT2_DELAY_MAX); 764 new_delay = IO_IO_OUT2_DELAY_MAX; 765 } 766 767 scc_mgr_load_dq(i); 768 } 769 770 /* dm shift */ 771 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { 772 new_delay = READ_SCC_DM_IO_OUT2_DELAY; 773 new_delay += delay; 774 775 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 776 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DM[%u]:\ 777 %u > %lu => %lu\n", __func__, __LINE__, 778 write_group, group_bgn, delay, i, new_delay, 779 (long unsigned int)IO_IO_OUT2_DELAY_MAX, 780 (long unsigned int)IO_IO_OUT2_DELAY_MAX); 781 new_delay = IO_IO_OUT2_DELAY_MAX; 782 } 783 784 scc_mgr_load_dm(i); 785 } 786 787 /* dqs shift */ 788 new_delay = READ_SCC_DQS_IO_OUT2_DELAY; 789 new_delay += delay; 790 791 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 792 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;" 793 " adding %u to OUT1\n", __func__, __LINE__, 794 write_group, group_bgn, delay, new_delay, 795 IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX, 796 new_delay - IO_IO_OUT2_DELAY_MAX); 797 scc_mgr_set_dqs_out1_delay(write_group, new_delay - 798 IO_IO_OUT2_DELAY_MAX); 799 new_delay = IO_IO_OUT2_DELAY_MAX; 800 } 801 802 scc_mgr_load_dqs_io(); 803 804 /* oct shift */ 805 new_delay = READ_SCC_OCT_OUT2_DELAY; 806 new_delay += delay; 807 808 if (new_delay > IO_IO_OUT2_DELAY_MAX) { 809 debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;" 810 " adding %u to OUT1\n", __func__, __LINE__, 811 write_group, group_bgn, delay, new_delay, 812 IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX, 813 new_delay - IO_IO_OUT2_DELAY_MAX); 814 scc_mgr_set_oct_out1_delay(write_group, new_delay - 815 IO_IO_OUT2_DELAY_MAX); 816 new_delay = IO_IO_OUT2_DELAY_MAX; 817 } 818 819 scc_mgr_load_dqs_for_write_group(write_group); 820 } 821 822 /* 823 * USER apply a delay to the entire output side (DQ, DM, DQS, OCT) 824 * and to all ranks 825 */ 826 static void scc_mgr_apply_group_all_out_delay_add_all_ranks( 827 uint32_t write_group, uint32_t group_bgn, uint32_t delay) 828 { 829 uint32_t r; 830 uint32_t addr = (u32)&sdr_scc_mgr->update; 831 832 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 833 r += NUM_RANKS_PER_SHADOW_REG) { 834 scc_mgr_apply_group_all_out_delay_add(write_group, 835 group_bgn, delay); 836 writel(0, SOCFPGA_SDR_ADDRESS + addr); 837 } 838 } 839 840 /* optimization used to recover some slots in ddr3 inst_rom */ 841 /* could be applied to other protocols if we wanted to */ 842 static void set_jump_as_return(void) 843 { 844 uint32_t addr = (u32)&sdr_rw_load_mgr_regs->load_cntr0; 845 846 /* 847 * to save space, we replace return with jump to special shared 848 * RETURN instruction so we set the counter to large value so that 849 * we always jump 850 */ 851 writel(0xff, SOCFPGA_SDR_ADDRESS + addr); 852 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add0; 853 writel(RW_MGR_RETURN, SOCFPGA_SDR_ADDRESS + addr); 854 } 855 856 /* 857 * should always use constants as argument to ensure all computations are 858 * performed at compile time 859 */ 860 static void delay_for_n_mem_clocks(const uint32_t clocks) 861 { 862 uint32_t afi_clocks; 863 uint8_t inner = 0; 864 uint8_t outer = 0; 865 uint16_t c_loop = 0; 866 uint32_t addr; 867 868 debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks); 869 870 871 afi_clocks = (clocks + AFI_RATE_RATIO-1) / AFI_RATE_RATIO; 872 /* scale (rounding up) to get afi clocks */ 873 874 /* 875 * Note, we don't bother accounting for being off a little bit 876 * because of a few extra instructions in outer loops 877 * Note, the loops have a test at the end, and do the test before 878 * the decrement, and so always perform the loop 879 * 1 time more than the counter value 880 */ 881 if (afi_clocks == 0) { 882 ; 883 } else if (afi_clocks <= 0x100) { 884 inner = afi_clocks-1; 885 outer = 0; 886 c_loop = 0; 887 } else if (afi_clocks <= 0x10000) { 888 inner = 0xff; 889 outer = (afi_clocks-1) >> 8; 890 c_loop = 0; 891 } else { 892 inner = 0xff; 893 outer = 0xff; 894 c_loop = (afi_clocks-1) >> 16; 895 } 896 897 /* 898 * rom instructions are structured as follows: 899 * 900 * IDLE_LOOP2: jnz cntr0, TARGET_A 901 * IDLE_LOOP1: jnz cntr1, TARGET_B 902 * return 903 * 904 * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and 905 * TARGET_B is set to IDLE_LOOP2 as well 906 * 907 * if we have no outer loop, though, then we can use IDLE_LOOP1 only, 908 * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely 909 * 910 * a little confusing, but it helps save precious space in the inst_rom 911 * and sequencer rom and keeps the delays more accurate and reduces 912 * overhead 913 */ 914 if (afi_clocks <= 0x100) { 915 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr1; 916 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), SOCFPGA_SDR_ADDRESS + addr); 917 918 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add1; 919 writel(RW_MGR_IDLE_LOOP1, SOCFPGA_SDR_ADDRESS + addr); 920 921 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 922 writel(RW_MGR_IDLE_LOOP1, SOCFPGA_SDR_ADDRESS + addr); 923 } else { 924 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr0; 925 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner), SOCFPGA_SDR_ADDRESS + addr); 926 927 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr1; 928 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer), SOCFPGA_SDR_ADDRESS + addr); 929 930 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add0; 931 writel(RW_MGR_IDLE_LOOP2, SOCFPGA_SDR_ADDRESS + addr); 932 933 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add1; 934 writel(RW_MGR_IDLE_LOOP2, SOCFPGA_SDR_ADDRESS + addr); 935 936 /* hack to get around compiler not being smart enough */ 937 if (afi_clocks <= 0x10000) { 938 /* only need to run once */ 939 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 940 writel(RW_MGR_IDLE_LOOP2, SOCFPGA_SDR_ADDRESS + addr); 941 } else { 942 do { 943 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 944 writel(RW_MGR_IDLE_LOOP2, SOCFPGA_SDR_ADDRESS + addr); 945 } while (c_loop-- != 0); 946 } 947 } 948 debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks); 949 } 950 951 static void rw_mgr_mem_initialize(void) 952 { 953 uint32_t r; 954 uint32_t addr; 955 956 debug("%s:%d\n", __func__, __LINE__); 957 958 /* The reset / cke part of initialization is broadcasted to all ranks */ 959 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_SET_CS_AND_ODT_MASK_OFFSET; 960 writel(RW_MGR_RANK_ALL, SOCFPGA_SDR_ADDRESS + addr); 961 962 /* 963 * Here's how you load register for a loop 964 * Counters are located @ 0x800 965 * Jump address are located @ 0xC00 966 * For both, registers 0 to 3 are selected using bits 3 and 2, like 967 * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C 968 * I know this ain't pretty, but Avalon bus throws away the 2 least 969 * significant bits 970 */ 971 972 /* start with memory RESET activated */ 973 974 /* tINIT = 200us */ 975 976 /* 977 * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles 978 * If a and b are the number of iteration in 2 nested loops 979 * it takes the following number of cycles to complete the operation: 980 * number_of_cycles = ((2 + n) * a + 2) * b 981 * where n is the number of instruction in the inner loop 982 * One possible solution is n = 0 , a = 256 , b = 106 => a = FF, 983 * b = 6A 984 */ 985 986 /* Load counters */ 987 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr0; 988 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR0_VAL), 989 SOCFPGA_SDR_ADDRESS + addr); 990 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr1; 991 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR1_VAL), 992 SOCFPGA_SDR_ADDRESS + addr); 993 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr2; 994 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR2_VAL), 995 SOCFPGA_SDR_ADDRESS + addr); 996 997 /* Load jump address */ 998 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add0; 999 writel(RW_MGR_INIT_RESET_0_CKE_0, SOCFPGA_SDR_ADDRESS + addr); 1000 1001 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add1; 1002 writel(RW_MGR_INIT_RESET_0_CKE_0, SOCFPGA_SDR_ADDRESS + addr); 1003 1004 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add2; 1005 writel(RW_MGR_INIT_RESET_0_CKE_0, SOCFPGA_SDR_ADDRESS + addr); 1006 1007 /* Execute count instruction */ 1008 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1009 writel(RW_MGR_INIT_RESET_0_CKE_0, SOCFPGA_SDR_ADDRESS + addr); 1010 1011 /* indicate that memory is stable */ 1012 addr = (u32)&phy_mgr_cfg->reset_mem_stbl; 1013 writel(1, SOCFPGA_SDR_ADDRESS + addr); 1014 1015 /* 1016 * transition the RESET to high 1017 * Wait for 500us 1018 */ 1019 1020 /* 1021 * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles 1022 * If a and b are the number of iteration in 2 nested loops 1023 * it takes the following number of cycles to complete the operation 1024 * number_of_cycles = ((2 + n) * a + 2) * b 1025 * where n is the number of instruction in the inner loop 1026 * One possible solution is n = 2 , a = 131 , b = 256 => a = 83, 1027 * b = FF 1028 */ 1029 1030 /* Load counters */ 1031 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr0; 1032 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR0_VAL), 1033 SOCFPGA_SDR_ADDRESS + addr); 1034 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr1; 1035 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR1_VAL), 1036 SOCFPGA_SDR_ADDRESS + addr); 1037 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr2; 1038 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR2_VAL), 1039 SOCFPGA_SDR_ADDRESS + addr); 1040 1041 /* Load jump address */ 1042 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add0; 1043 writel(RW_MGR_INIT_RESET_1_CKE_0, SOCFPGA_SDR_ADDRESS + addr); 1044 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add1; 1045 writel(RW_MGR_INIT_RESET_1_CKE_0, SOCFPGA_SDR_ADDRESS + addr); 1046 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add2; 1047 writel(RW_MGR_INIT_RESET_1_CKE_0, SOCFPGA_SDR_ADDRESS + addr); 1048 1049 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1050 writel(RW_MGR_INIT_RESET_1_CKE_0, SOCFPGA_SDR_ADDRESS + addr); 1051 1052 /* bring up clock enable */ 1053 1054 /* tXRP < 250 ck cycles */ 1055 delay_for_n_mem_clocks(250); 1056 1057 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { 1058 if (param->skip_ranks[r]) { 1059 /* request to skip the rank */ 1060 continue; 1061 } 1062 1063 /* set rank */ 1064 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 1065 1066 /* 1067 * USER Use Mirror-ed commands for odd ranks if address 1068 * mirrorring is on 1069 */ 1070 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) { 1071 set_jump_as_return(); 1072 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1073 writel(RW_MGR_MRS2_MIRR, SOCFPGA_SDR_ADDRESS + addr); 1074 delay_for_n_mem_clocks(4); 1075 set_jump_as_return(); 1076 writel(RW_MGR_MRS3_MIRR, SOCFPGA_SDR_ADDRESS + addr); 1077 delay_for_n_mem_clocks(4); 1078 set_jump_as_return(); 1079 writel(RW_MGR_MRS1_MIRR, SOCFPGA_SDR_ADDRESS + addr); 1080 delay_for_n_mem_clocks(4); 1081 set_jump_as_return(); 1082 writel(RW_MGR_MRS0_DLL_RESET_MIRR, SOCFPGA_SDR_ADDRESS + addr); 1083 } else { 1084 set_jump_as_return(); 1085 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1086 writel(RW_MGR_MRS2, SOCFPGA_SDR_ADDRESS + addr); 1087 delay_for_n_mem_clocks(4); 1088 set_jump_as_return(); 1089 writel(RW_MGR_MRS3, SOCFPGA_SDR_ADDRESS + addr); 1090 delay_for_n_mem_clocks(4); 1091 set_jump_as_return(); 1092 writel(RW_MGR_MRS1, SOCFPGA_SDR_ADDRESS + addr); 1093 set_jump_as_return(); 1094 writel(RW_MGR_MRS0_DLL_RESET, SOCFPGA_SDR_ADDRESS + addr); 1095 } 1096 set_jump_as_return(); 1097 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1098 writel(RW_MGR_ZQCL, SOCFPGA_SDR_ADDRESS + addr); 1099 1100 /* tZQinit = tDLLK = 512 ck cycles */ 1101 delay_for_n_mem_clocks(512); 1102 } 1103 } 1104 1105 /* 1106 * At the end of calibration we have to program the user settings in, and 1107 * USER hand off the memory to the user. 1108 */ 1109 static void rw_mgr_mem_handoff(void) 1110 { 1111 uint32_t r; 1112 uint32_t addr; 1113 1114 debug("%s:%d\n", __func__, __LINE__); 1115 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { 1116 if (param->skip_ranks[r]) 1117 /* request to skip the rank */ 1118 continue; 1119 /* set rank */ 1120 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 1121 1122 /* precharge all banks ... */ 1123 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1124 writel(RW_MGR_PRECHARGE_ALL, SOCFPGA_SDR_ADDRESS + addr); 1125 1126 /* load up MR settings specified by user */ 1127 1128 /* 1129 * Use Mirror-ed commands for odd ranks if address 1130 * mirrorring is on 1131 */ 1132 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1133 if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) { 1134 set_jump_as_return(); 1135 writel(RW_MGR_MRS2_MIRR, SOCFPGA_SDR_ADDRESS + addr); 1136 delay_for_n_mem_clocks(4); 1137 set_jump_as_return(); 1138 writel(RW_MGR_MRS3_MIRR, SOCFPGA_SDR_ADDRESS + addr); 1139 delay_for_n_mem_clocks(4); 1140 set_jump_as_return(); 1141 writel(RW_MGR_MRS1_MIRR, SOCFPGA_SDR_ADDRESS + addr); 1142 delay_for_n_mem_clocks(4); 1143 set_jump_as_return(); 1144 writel(RW_MGR_MRS0_USER_MIRR, SOCFPGA_SDR_ADDRESS + addr); 1145 } else { 1146 set_jump_as_return(); 1147 writel(RW_MGR_MRS2, SOCFPGA_SDR_ADDRESS + addr); 1148 delay_for_n_mem_clocks(4); 1149 set_jump_as_return(); 1150 writel(RW_MGR_MRS3, SOCFPGA_SDR_ADDRESS + addr); 1151 delay_for_n_mem_clocks(4); 1152 set_jump_as_return(); 1153 writel(RW_MGR_MRS1, SOCFPGA_SDR_ADDRESS + addr); 1154 delay_for_n_mem_clocks(4); 1155 set_jump_as_return(); 1156 writel(RW_MGR_MRS0_USER, SOCFPGA_SDR_ADDRESS + addr); 1157 } 1158 /* 1159 * USER need to wait tMOD (12CK or 15ns) time before issuing 1160 * other commands, but we will have plenty of NIOS cycles before 1161 * actual handoff so its okay. 1162 */ 1163 } 1164 } 1165 1166 /* 1167 * performs a guaranteed read on the patterns we are going to use during a 1168 * read test to ensure memory works 1169 */ 1170 static uint32_t rw_mgr_mem_calibrate_read_test_patterns(uint32_t rank_bgn, 1171 uint32_t group, uint32_t num_tries, uint32_t *bit_chk, 1172 uint32_t all_ranks) 1173 { 1174 uint32_t r, vg; 1175 uint32_t correct_mask_vg; 1176 uint32_t tmp_bit_chk; 1177 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1178 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1179 uint32_t addr; 1180 uint32_t base_rw_mgr; 1181 1182 *bit_chk = param->read_correct_mask; 1183 correct_mask_vg = param->read_correct_mask_vg; 1184 1185 for (r = rank_bgn; r < rank_end; r++) { 1186 if (param->skip_ranks[r]) 1187 /* request to skip the rank */ 1188 continue; 1189 1190 /* set rank */ 1191 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1192 1193 /* Load up a constant bursts of read commands */ 1194 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr0; 1195 writel(0x20, SOCFPGA_SDR_ADDRESS + addr); 1196 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add0; 1197 writel(RW_MGR_GUARANTEED_READ, SOCFPGA_SDR_ADDRESS + addr); 1198 1199 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr1; 1200 writel(0x20, SOCFPGA_SDR_ADDRESS + addr); 1201 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add1; 1202 writel(RW_MGR_GUARANTEED_READ_CONT, SOCFPGA_SDR_ADDRESS + addr); 1203 1204 tmp_bit_chk = 0; 1205 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) { 1206 /* reset the fifos to get pointers to known state */ 1207 1208 addr = (u32)&phy_mgr_cmd->fifo_reset; 1209 writel(0, SOCFPGA_SDR_ADDRESS + addr); 1210 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RESET_READ_DATAPATH_OFFSET; 1211 writel(0, SOCFPGA_SDR_ADDRESS + addr); 1212 1213 tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS 1214 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); 1215 1216 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1217 writel(RW_MGR_GUARANTEED_READ, SOCFPGA_SDR_ADDRESS + addr + 1218 ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + 1219 vg) << 2)); 1220 1221 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS; 1222 base_rw_mgr = readl(SOCFPGA_SDR_ADDRESS + addr); 1223 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & (~base_rw_mgr)); 1224 1225 if (vg == 0) 1226 break; 1227 } 1228 *bit_chk &= tmp_bit_chk; 1229 } 1230 1231 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1232 writel(RW_MGR_CLEAR_DQS_ENABLE, SOCFPGA_SDR_ADDRESS + addr + (group << 2)); 1233 1234 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1235 debug_cond(DLEVEL == 1, "%s:%d test_load_patterns(%u,ALL) => (%u == %u) =>\ 1236 %lu\n", __func__, __LINE__, group, *bit_chk, param->read_correct_mask, 1237 (long unsigned int)(*bit_chk == param->read_correct_mask)); 1238 return *bit_chk == param->read_correct_mask; 1239 } 1240 1241 static uint32_t rw_mgr_mem_calibrate_read_test_patterns_all_ranks 1242 (uint32_t group, uint32_t num_tries, uint32_t *bit_chk) 1243 { 1244 return rw_mgr_mem_calibrate_read_test_patterns(0, group, 1245 num_tries, bit_chk, 1); 1246 } 1247 1248 /* load up the patterns we are going to use during a read test */ 1249 static void rw_mgr_mem_calibrate_read_load_patterns(uint32_t rank_bgn, 1250 uint32_t all_ranks) 1251 { 1252 uint32_t r; 1253 uint32_t addr; 1254 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1255 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1256 1257 debug("%s:%d\n", __func__, __LINE__); 1258 for (r = rank_bgn; r < rank_end; r++) { 1259 if (param->skip_ranks[r]) 1260 /* request to skip the rank */ 1261 continue; 1262 1263 /* set rank */ 1264 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1265 1266 /* Load up a constant bursts */ 1267 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr0; 1268 writel(0x20, SOCFPGA_SDR_ADDRESS + addr); 1269 1270 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add0; 1271 writel(RW_MGR_GUARANTEED_WRITE_WAIT0, SOCFPGA_SDR_ADDRESS + addr); 1272 1273 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr1; 1274 writel(0x20, SOCFPGA_SDR_ADDRESS + addr); 1275 1276 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add1; 1277 writel(RW_MGR_GUARANTEED_WRITE_WAIT1, SOCFPGA_SDR_ADDRESS + addr); 1278 1279 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr2; 1280 writel(0x04, SOCFPGA_SDR_ADDRESS + addr); 1281 1282 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add2; 1283 writel(RW_MGR_GUARANTEED_WRITE_WAIT2, SOCFPGA_SDR_ADDRESS + addr); 1284 1285 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr3; 1286 writel(0x04, SOCFPGA_SDR_ADDRESS + addr); 1287 1288 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add3; 1289 writel(RW_MGR_GUARANTEED_WRITE_WAIT3, SOCFPGA_SDR_ADDRESS + addr); 1290 1291 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1292 writel(RW_MGR_GUARANTEED_WRITE, SOCFPGA_SDR_ADDRESS + addr); 1293 } 1294 1295 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1296 } 1297 1298 /* 1299 * try a read and see if it returns correct data back. has dummy reads 1300 * inserted into the mix used to align dqs enable. has more thorough checks 1301 * than the regular read test. 1302 */ 1303 static uint32_t rw_mgr_mem_calibrate_read_test(uint32_t rank_bgn, uint32_t group, 1304 uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk, 1305 uint32_t all_groups, uint32_t all_ranks) 1306 { 1307 uint32_t r, vg; 1308 uint32_t correct_mask_vg; 1309 uint32_t tmp_bit_chk; 1310 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 1311 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 1312 uint32_t addr; 1313 uint32_t base_rw_mgr; 1314 1315 *bit_chk = param->read_correct_mask; 1316 correct_mask_vg = param->read_correct_mask_vg; 1317 1318 uint32_t quick_read_mode = (((STATIC_CALIB_STEPS) & 1319 CALIB_SKIP_DELAY_SWEEPS) && ENABLE_SUPER_QUICK_CALIBRATION); 1320 1321 for (r = rank_bgn; r < rank_end; r++) { 1322 if (param->skip_ranks[r]) 1323 /* request to skip the rank */ 1324 continue; 1325 1326 /* set rank */ 1327 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 1328 1329 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr1; 1330 writel(0x10, SOCFPGA_SDR_ADDRESS + addr); 1331 1332 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add1; 1333 writel(RW_MGR_READ_B2B_WAIT1, SOCFPGA_SDR_ADDRESS + addr); 1334 1335 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr2; 1336 writel(0x10, SOCFPGA_SDR_ADDRESS + addr); 1337 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add2; 1338 writel(RW_MGR_READ_B2B_WAIT2, SOCFPGA_SDR_ADDRESS + addr); 1339 1340 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr0; 1341 if (quick_read_mode) 1342 writel(0x1, SOCFPGA_SDR_ADDRESS + addr); 1343 /* need at least two (1+1) reads to capture failures */ 1344 else if (all_groups) 1345 writel(0x06, SOCFPGA_SDR_ADDRESS + addr); 1346 else 1347 writel(0x32, SOCFPGA_SDR_ADDRESS + addr); 1348 1349 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add0; 1350 writel(RW_MGR_READ_B2B, SOCFPGA_SDR_ADDRESS + addr); 1351 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr3; 1352 if (all_groups) 1353 writel(RW_MGR_MEM_IF_READ_DQS_WIDTH * 1354 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1, 1355 SOCFPGA_SDR_ADDRESS + addr); 1356 else 1357 writel(0x0, SOCFPGA_SDR_ADDRESS + addr); 1358 1359 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add3; 1360 writel(RW_MGR_READ_B2B, SOCFPGA_SDR_ADDRESS + addr); 1361 1362 tmp_bit_chk = 0; 1363 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) { 1364 /* reset the fifos to get pointers to known state */ 1365 addr = (u32)&phy_mgr_cmd->fifo_reset; 1366 writel(0, SOCFPGA_SDR_ADDRESS + addr); 1367 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RESET_READ_DATAPATH_OFFSET; 1368 writel(0, SOCFPGA_SDR_ADDRESS + addr); 1369 1370 tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS 1371 / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); 1372 1373 if (all_groups) 1374 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_ALL_GROUPS_OFFSET; 1375 else 1376 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1377 1378 writel(RW_MGR_READ_B2B, SOCFPGA_SDR_ADDRESS + addr + 1379 ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + 1380 vg) << 2)); 1381 1382 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS; 1383 base_rw_mgr = readl(SOCFPGA_SDR_ADDRESS + addr); 1384 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr)); 1385 1386 if (vg == 0) 1387 break; 1388 } 1389 *bit_chk &= tmp_bit_chk; 1390 } 1391 1392 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 1393 writel(RW_MGR_CLEAR_DQS_ENABLE, SOCFPGA_SDR_ADDRESS + addr + (group << 2)); 1394 1395 if (all_correct) { 1396 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1397 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ALL,%u) =>\ 1398 (%u == %u) => %lu", __func__, __LINE__, group, 1399 all_groups, *bit_chk, param->read_correct_mask, 1400 (long unsigned int)(*bit_chk == 1401 param->read_correct_mask)); 1402 return *bit_chk == param->read_correct_mask; 1403 } else { 1404 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 1405 debug_cond(DLEVEL == 2, "%s:%d read_test(%u,ONE,%u) =>\ 1406 (%u != %lu) => %lu\n", __func__, __LINE__, 1407 group, all_groups, *bit_chk, (long unsigned int)0, 1408 (long unsigned int)(*bit_chk != 0x00)); 1409 return *bit_chk != 0x00; 1410 } 1411 } 1412 1413 static uint32_t rw_mgr_mem_calibrate_read_test_all_ranks(uint32_t group, 1414 uint32_t num_tries, uint32_t all_correct, uint32_t *bit_chk, 1415 uint32_t all_groups) 1416 { 1417 return rw_mgr_mem_calibrate_read_test(0, group, num_tries, all_correct, 1418 bit_chk, all_groups, 1); 1419 } 1420 1421 static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t *v) 1422 { 1423 uint32_t addr = (u32)&phy_mgr_cmd->inc_vfifo_hard_phy; 1424 1425 writel(grp, SOCFPGA_SDR_ADDRESS + addr); 1426 (*v)++; 1427 } 1428 1429 static void rw_mgr_decr_vfifo(uint32_t grp, uint32_t *v) 1430 { 1431 uint32_t i; 1432 1433 for (i = 0; i < VFIFO_SIZE-1; i++) 1434 rw_mgr_incr_vfifo(grp, v); 1435 } 1436 1437 static int find_vfifo_read(uint32_t grp, uint32_t *bit_chk) 1438 { 1439 uint32_t v; 1440 uint32_t fail_cnt = 0; 1441 uint32_t test_status; 1442 1443 for (v = 0; v < VFIFO_SIZE; ) { 1444 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo %u\n", 1445 __func__, __LINE__, v); 1446 test_status = rw_mgr_mem_calibrate_read_test_all_ranks 1447 (grp, 1, PASS_ONE_BIT, bit_chk, 0); 1448 if (!test_status) { 1449 fail_cnt++; 1450 1451 if (fail_cnt == 2) 1452 break; 1453 } 1454 1455 /* fiddle with FIFO */ 1456 rw_mgr_incr_vfifo(grp, &v); 1457 } 1458 1459 if (v >= VFIFO_SIZE) { 1460 /* no failing read found!! Something must have gone wrong */ 1461 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: vfifo failed\n", 1462 __func__, __LINE__); 1463 return 0; 1464 } else { 1465 return v; 1466 } 1467 } 1468 1469 static int find_working_phase(uint32_t *grp, uint32_t *bit_chk, 1470 uint32_t dtaps_per_ptap, uint32_t *work_bgn, 1471 uint32_t *v, uint32_t *d, uint32_t *p, 1472 uint32_t *i, uint32_t *max_working_cnt) 1473 { 1474 uint32_t found_begin = 0; 1475 uint32_t tmp_delay = 0; 1476 uint32_t test_status; 1477 1478 for (*d = 0; *d <= dtaps_per_ptap; (*d)++, tmp_delay += 1479 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1480 *work_bgn = tmp_delay; 1481 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1482 1483 for (*i = 0; *i < VFIFO_SIZE; (*i)++) { 1484 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_bgn += 1485 IO_DELAY_PER_OPA_TAP) { 1486 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1487 1488 test_status = 1489 rw_mgr_mem_calibrate_read_test_all_ranks 1490 (*grp, 1, PASS_ONE_BIT, bit_chk, 0); 1491 1492 if (test_status) { 1493 *max_working_cnt = 1; 1494 found_begin = 1; 1495 break; 1496 } 1497 } 1498 1499 if (found_begin) 1500 break; 1501 1502 if (*p > IO_DQS_EN_PHASE_MAX) 1503 /* fiddle with FIFO */ 1504 rw_mgr_incr_vfifo(*grp, v); 1505 } 1506 1507 if (found_begin) 1508 break; 1509 } 1510 1511 if (*i >= VFIFO_SIZE) { 1512 /* cannot find working solution */ 1513 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: no vfifo/\ 1514 ptap/dtap\n", __func__, __LINE__); 1515 return 0; 1516 } else { 1517 return 1; 1518 } 1519 } 1520 1521 static void sdr_backup_phase(uint32_t *grp, uint32_t *bit_chk, 1522 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1523 uint32_t *p, uint32_t *max_working_cnt) 1524 { 1525 uint32_t found_begin = 0; 1526 uint32_t tmp_delay; 1527 1528 /* Special case code for backing up a phase */ 1529 if (*p == 0) { 1530 *p = IO_DQS_EN_PHASE_MAX; 1531 rw_mgr_decr_vfifo(*grp, v); 1532 } else { 1533 (*p)--; 1534 } 1535 tmp_delay = *work_bgn - IO_DELAY_PER_OPA_TAP; 1536 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1537 1538 for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_bgn; 1539 (*d)++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1540 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1541 1542 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1, 1543 PASS_ONE_BIT, 1544 bit_chk, 0)) { 1545 found_begin = 1; 1546 *work_bgn = tmp_delay; 1547 break; 1548 } 1549 } 1550 1551 /* We have found a working dtap before the ptap found above */ 1552 if (found_begin == 1) 1553 (*max_working_cnt)++; 1554 1555 /* 1556 * Restore VFIFO to old state before we decremented it 1557 * (if needed). 1558 */ 1559 (*p)++; 1560 if (*p > IO_DQS_EN_PHASE_MAX) { 1561 *p = 0; 1562 rw_mgr_incr_vfifo(*grp, v); 1563 } 1564 1565 scc_mgr_set_dqs_en_delay_all_ranks(*grp, 0); 1566 } 1567 1568 static int sdr_nonworking_phase(uint32_t *grp, uint32_t *bit_chk, 1569 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1570 uint32_t *p, uint32_t *i, uint32_t *max_working_cnt, 1571 uint32_t *work_end) 1572 { 1573 uint32_t found_end = 0; 1574 1575 (*p)++; 1576 *work_end += IO_DELAY_PER_OPA_TAP; 1577 if (*p > IO_DQS_EN_PHASE_MAX) { 1578 /* fiddle with FIFO */ 1579 *p = 0; 1580 rw_mgr_incr_vfifo(*grp, v); 1581 } 1582 1583 for (; *i < VFIFO_SIZE + 1; (*i)++) { 1584 for (; *p <= IO_DQS_EN_PHASE_MAX; (*p)++, *work_end 1585 += IO_DELAY_PER_OPA_TAP) { 1586 scc_mgr_set_dqs_en_phase_all_ranks(*grp, *p); 1587 1588 if (!rw_mgr_mem_calibrate_read_test_all_ranks 1589 (*grp, 1, PASS_ONE_BIT, bit_chk, 0)) { 1590 found_end = 1; 1591 break; 1592 } else { 1593 (*max_working_cnt)++; 1594 } 1595 } 1596 1597 if (found_end) 1598 break; 1599 1600 if (*p > IO_DQS_EN_PHASE_MAX) { 1601 /* fiddle with FIFO */ 1602 rw_mgr_incr_vfifo(*grp, v); 1603 *p = 0; 1604 } 1605 } 1606 1607 if (*i >= VFIFO_SIZE + 1) { 1608 /* cannot see edge of failing read */ 1609 debug_cond(DLEVEL == 2, "%s:%d sdr_nonworking_phase: end:\ 1610 failed\n", __func__, __LINE__); 1611 return 0; 1612 } else { 1613 return 1; 1614 } 1615 } 1616 1617 static int sdr_find_window_centre(uint32_t *grp, uint32_t *bit_chk, 1618 uint32_t *work_bgn, uint32_t *v, uint32_t *d, 1619 uint32_t *p, uint32_t *work_mid, 1620 uint32_t *work_end) 1621 { 1622 int i; 1623 int tmp_delay = 0; 1624 1625 *work_mid = (*work_bgn + *work_end) / 2; 1626 1627 debug_cond(DLEVEL == 2, "work_bgn=%d work_end=%d work_mid=%d\n", 1628 *work_bgn, *work_end, *work_mid); 1629 /* Get the middle delay to be less than a VFIFO delay */ 1630 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX; 1631 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP) 1632 ; 1633 debug_cond(DLEVEL == 2, "vfifo ptap delay %d\n", tmp_delay); 1634 while (*work_mid > tmp_delay) 1635 *work_mid -= tmp_delay; 1636 debug_cond(DLEVEL == 2, "new work_mid %d\n", *work_mid); 1637 1638 tmp_delay = 0; 1639 for (*p = 0; *p <= IO_DQS_EN_PHASE_MAX && tmp_delay < *work_mid; 1640 (*p)++, tmp_delay += IO_DELAY_PER_OPA_TAP) 1641 ; 1642 tmp_delay -= IO_DELAY_PER_OPA_TAP; 1643 debug_cond(DLEVEL == 2, "new p %d, tmp_delay=%d\n", (*p) - 1, tmp_delay); 1644 for (*d = 0; *d <= IO_DQS_EN_DELAY_MAX && tmp_delay < *work_mid; (*d)++, 1645 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) 1646 ; 1647 debug_cond(DLEVEL == 2, "new d %d, tmp_delay=%d\n", *d, tmp_delay); 1648 1649 scc_mgr_set_dqs_en_phase_all_ranks(*grp, (*p) - 1); 1650 scc_mgr_set_dqs_en_delay_all_ranks(*grp, *d); 1651 1652 /* 1653 * push vfifo until we can successfully calibrate. We can do this 1654 * because the largest possible margin in 1 VFIFO cycle. 1655 */ 1656 for (i = 0; i < VFIFO_SIZE; i++) { 1657 debug_cond(DLEVEL == 2, "find_dqs_en_phase: center: vfifo=%u\n", 1658 *v); 1659 if (rw_mgr_mem_calibrate_read_test_all_ranks(*grp, 1, 1660 PASS_ONE_BIT, 1661 bit_chk, 0)) { 1662 break; 1663 } 1664 1665 /* fiddle with FIFO */ 1666 rw_mgr_incr_vfifo(*grp, v); 1667 } 1668 1669 if (i >= VFIFO_SIZE) { 1670 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center: \ 1671 failed\n", __func__, __LINE__); 1672 return 0; 1673 } else { 1674 return 1; 1675 } 1676 } 1677 1678 /* find a good dqs enable to use */ 1679 static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp) 1680 { 1681 uint32_t v, d, p, i; 1682 uint32_t max_working_cnt; 1683 uint32_t bit_chk; 1684 uint32_t dtaps_per_ptap; 1685 uint32_t work_bgn, work_mid, work_end; 1686 uint32_t found_passing_read, found_failing_read, initial_failing_dtap; 1687 uint32_t addr; 1688 1689 debug("%s:%d %u\n", __func__, __LINE__, grp); 1690 1691 reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); 1692 1693 scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); 1694 scc_mgr_set_dqs_en_phase_all_ranks(grp, 0); 1695 1696 /* ************************************************************** */ 1697 /* * Step 0 : Determine number of delay taps for each phase tap * */ 1698 dtaps_per_ptap = IO_DELAY_PER_OPA_TAP/IO_DELAY_PER_DQS_EN_DCHAIN_TAP; 1699 1700 /* ********************************************************* */ 1701 /* * Step 1 : First push vfifo until we get a failing read * */ 1702 v = find_vfifo_read(grp, &bit_chk); 1703 1704 max_working_cnt = 0; 1705 1706 /* ******************************************************** */ 1707 /* * step 2: find first working phase, increment in ptaps * */ 1708 work_bgn = 0; 1709 if (find_working_phase(&grp, &bit_chk, dtaps_per_ptap, &work_bgn, &v, &d, 1710 &p, &i, &max_working_cnt) == 0) 1711 return 0; 1712 1713 work_end = work_bgn; 1714 1715 /* 1716 * If d is 0 then the working window covers a phase tap and 1717 * we can follow the old procedure otherwise, we've found the beginning, 1718 * and we need to increment the dtaps until we find the end. 1719 */ 1720 if (d == 0) { 1721 /* ********************************************************* */ 1722 /* * step 3a: if we have room, back off by one and 1723 increment in dtaps * */ 1724 1725 sdr_backup_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1726 &max_working_cnt); 1727 1728 /* ********************************************************* */ 1729 /* * step 4a: go forward from working phase to non working 1730 phase, increment in ptaps * */ 1731 if (sdr_nonworking_phase(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1732 &i, &max_working_cnt, &work_end) == 0) 1733 return 0; 1734 1735 /* ********************************************************* */ 1736 /* * step 5a: back off one from last, increment in dtaps * */ 1737 1738 /* Special case code for backing up a phase */ 1739 if (p == 0) { 1740 p = IO_DQS_EN_PHASE_MAX; 1741 rw_mgr_decr_vfifo(grp, &v); 1742 } else { 1743 p = p - 1; 1744 } 1745 1746 work_end -= IO_DELAY_PER_OPA_TAP; 1747 scc_mgr_set_dqs_en_phase_all_ranks(grp, p); 1748 1749 /* * The actual increment of dtaps is done outside of 1750 the if/else loop to share code */ 1751 d = 0; 1752 1753 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p: \ 1754 vfifo=%u ptap=%u\n", __func__, __LINE__, 1755 v, p); 1756 } else { 1757 /* ******************************************************* */ 1758 /* * step 3-5b: Find the right edge of the window using 1759 delay taps * */ 1760 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase:vfifo=%u \ 1761 ptap=%u dtap=%u bgn=%u\n", __func__, __LINE__, 1762 v, p, d, work_bgn); 1763 1764 work_end = work_bgn; 1765 1766 /* * The actual increment of dtaps is done outside of the 1767 if/else loop to share code */ 1768 1769 /* Only here to counterbalance a subtract later on which is 1770 not needed if this branch of the algorithm is taken */ 1771 max_working_cnt++; 1772 } 1773 1774 /* The dtap increment to find the failing edge is done here */ 1775 for (; d <= IO_DQS_EN_DELAY_MAX; d++, work_end += 1776 IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { 1777 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \ 1778 end-2: dtap=%u\n", __func__, __LINE__, d); 1779 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1780 1781 if (!rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, 1782 PASS_ONE_BIT, 1783 &bit_chk, 0)) { 1784 break; 1785 } 1786 } 1787 1788 /* Go back to working dtap */ 1789 if (d != 0) 1790 work_end -= IO_DELAY_PER_DQS_EN_DCHAIN_TAP; 1791 1792 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: v/p/d: vfifo=%u \ 1793 ptap=%u dtap=%u end=%u\n", __func__, __LINE__, 1794 v, p, d-1, work_end); 1795 1796 if (work_end < work_bgn) { 1797 /* nil range */ 1798 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: end-2: \ 1799 failed\n", __func__, __LINE__); 1800 return 0; 1801 } 1802 1803 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: found range [%u,%u]\n", 1804 __func__, __LINE__, work_bgn, work_end); 1805 1806 /* *************************************************************** */ 1807 /* 1808 * * We need to calculate the number of dtaps that equal a ptap 1809 * * To do that we'll back up a ptap and re-find the edge of the 1810 * * window using dtaps 1811 */ 1812 1813 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: calculate dtaps_per_ptap \ 1814 for tracking\n", __func__, __LINE__); 1815 1816 /* Special case code for backing up a phase */ 1817 if (p == 0) { 1818 p = IO_DQS_EN_PHASE_MAX; 1819 rw_mgr_decr_vfifo(grp, &v); 1820 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \ 1821 cycle/phase: v=%u p=%u\n", __func__, __LINE__, 1822 v, p); 1823 } else { 1824 p = p - 1; 1825 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: backedup \ 1826 phase only: v=%u p=%u", __func__, __LINE__, 1827 v, p); 1828 } 1829 1830 scc_mgr_set_dqs_en_phase_all_ranks(grp, p); 1831 1832 /* 1833 * Increase dtap until we first see a passing read (in case the 1834 * window is smaller than a ptap), 1835 * and then a failing read to mark the edge of the window again 1836 */ 1837 1838 /* Find a passing read */ 1839 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find passing read\n", 1840 __func__, __LINE__); 1841 found_passing_read = 0; 1842 found_failing_read = 0; 1843 initial_failing_dtap = d; 1844 for (; d <= IO_DQS_EN_DELAY_MAX; d++) { 1845 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: testing \ 1846 read d=%u\n", __func__, __LINE__, d); 1847 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1848 1849 if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, 1850 PASS_ONE_BIT, 1851 &bit_chk, 0)) { 1852 found_passing_read = 1; 1853 break; 1854 } 1855 } 1856 1857 if (found_passing_read) { 1858 /* Find a failing read */ 1859 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: find failing \ 1860 read\n", __func__, __LINE__); 1861 for (d = d + 1; d <= IO_DQS_EN_DELAY_MAX; d++) { 1862 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: \ 1863 testing read d=%u\n", __func__, __LINE__, d); 1864 scc_mgr_set_dqs_en_delay_all_ranks(grp, d); 1865 1866 if (!rw_mgr_mem_calibrate_read_test_all_ranks 1867 (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { 1868 found_failing_read = 1; 1869 break; 1870 } 1871 } 1872 } else { 1873 debug_cond(DLEVEL == 1, "%s:%d find_dqs_en_phase: failed to \ 1874 calculate dtaps", __func__, __LINE__); 1875 debug_cond(DLEVEL == 1, "per ptap. Fall back on static value\n"); 1876 } 1877 1878 /* 1879 * The dynamically calculated dtaps_per_ptap is only valid if we 1880 * found a passing/failing read. If we didn't, it means d hit the max 1881 * (IO_DQS_EN_DELAY_MAX). Otherwise, dtaps_per_ptap retains its 1882 * statically calculated value. 1883 */ 1884 if (found_passing_read && found_failing_read) 1885 dtaps_per_ptap = d - initial_failing_dtap; 1886 1887 addr = (u32)&sdr_reg_file->dtaps_per_ptap; 1888 writel(dtaps_per_ptap, SOCFPGA_SDR_ADDRESS + addr); 1889 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: dtaps_per_ptap=%u \ 1890 - %u = %u", __func__, __LINE__, d, 1891 initial_failing_dtap, dtaps_per_ptap); 1892 1893 /* ******************************************** */ 1894 /* * step 6: Find the centre of the window * */ 1895 if (sdr_find_window_centre(&grp, &bit_chk, &work_bgn, &v, &d, &p, 1896 &work_mid, &work_end) == 0) 1897 return 0; 1898 1899 debug_cond(DLEVEL == 2, "%s:%d find_dqs_en_phase: center found: \ 1900 vfifo=%u ptap=%u dtap=%u\n", __func__, __LINE__, 1901 v, p-1, d); 1902 return 1; 1903 } 1904 1905 /* 1906 * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different 1907 * dq_in_delay values 1908 */ 1909 static uint32_t 1910 rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay 1911 (uint32_t write_group, uint32_t read_group, uint32_t test_bgn) 1912 { 1913 uint32_t found; 1914 uint32_t i; 1915 uint32_t p; 1916 uint32_t d; 1917 uint32_t r; 1918 uint32_t addr; 1919 1920 const uint32_t delay_step = IO_IO_IN_DELAY_MAX / 1921 (RW_MGR_MEM_DQ_PER_READ_DQS-1); 1922 /* we start at zero, so have one less dq to devide among */ 1923 1924 debug("%s:%d (%u,%u,%u)", __func__, __LINE__, write_group, read_group, 1925 test_bgn); 1926 1927 /* try different dq_in_delays since the dq path is shorter than dqs */ 1928 1929 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 1930 r += NUM_RANKS_PER_SHADOW_REG) { 1931 for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; 1932 i++, p++, d += delay_step) { 1933 debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_\ 1934 vfifo_find_dqs_", __func__, __LINE__); 1935 debug_cond(DLEVEL == 1, "en_phase_sweep_dq_in_delay: g=%u/%u ", 1936 write_group, read_group); 1937 debug_cond(DLEVEL == 1, "r=%u, i=%u p=%u d=%u\n", r, i , p, d); 1938 scc_mgr_set_dq_in_delay(write_group, p, d); 1939 scc_mgr_load_dq(p); 1940 } 1941 addr = (u32)&sdr_scc_mgr->update; 1942 writel(0, SOCFPGA_SDR_ADDRESS + addr); 1943 } 1944 1945 found = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(read_group); 1946 1947 debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_vfifo_find_dqs_\ 1948 en_phase_sweep_dq", __func__, __LINE__); 1949 debug_cond(DLEVEL == 1, "_in_delay: g=%u/%u found=%u; Reseting delay \ 1950 chain to zero\n", write_group, read_group, found); 1951 1952 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 1953 r += NUM_RANKS_PER_SHADOW_REG) { 1954 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; 1955 i++, p++) { 1956 scc_mgr_set_dq_in_delay(write_group, p, 0); 1957 scc_mgr_load_dq(p); 1958 } 1959 addr = (u32)&sdr_scc_mgr->update; 1960 writel(0, SOCFPGA_SDR_ADDRESS + addr); 1961 } 1962 1963 return found; 1964 } 1965 1966 /* per-bit deskew DQ and center */ 1967 static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn, 1968 uint32_t write_group, uint32_t read_group, uint32_t test_bgn, 1969 uint32_t use_read_test, uint32_t update_fom) 1970 { 1971 uint32_t i, p, d, min_index; 1972 /* 1973 * Store these as signed since there are comparisons with 1974 * signed numbers. 1975 */ 1976 uint32_t bit_chk; 1977 uint32_t sticky_bit_chk; 1978 int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; 1979 int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; 1980 int32_t final_dq[RW_MGR_MEM_DQ_PER_READ_DQS]; 1981 int32_t mid; 1982 int32_t orig_mid_min, mid_min; 1983 int32_t new_dqs, start_dqs, start_dqs_en, shift_dq, final_dqs, 1984 final_dqs_en; 1985 int32_t dq_margin, dqs_margin; 1986 uint32_t stop; 1987 uint32_t temp_dq_in_delay1, temp_dq_in_delay2; 1988 uint32_t addr; 1989 1990 debug("%s:%d: %u %u", __func__, __LINE__, read_group, test_bgn); 1991 1992 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_IN_DELAY_OFFSET; 1993 start_dqs = readl(SOCFPGA_SDR_ADDRESS + addr + (read_group << 2)); 1994 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) 1995 start_dqs_en = readl(SOCFPGA_SDR_ADDRESS + addr + ((read_group << 2) 1996 - IO_DQS_EN_DELAY_OFFSET)); 1997 1998 /* set the left and right edge of each bit to an illegal value */ 1999 /* use (IO_IO_IN_DELAY_MAX + 1) as an illegal value */ 2000 sticky_bit_chk = 0; 2001 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 2002 left_edge[i] = IO_IO_IN_DELAY_MAX + 1; 2003 right_edge[i] = IO_IO_IN_DELAY_MAX + 1; 2004 } 2005 2006 addr = (u32)&sdr_scc_mgr->update; 2007 /* Search for the left edge of the window for each bit */ 2008 for (d = 0; d <= IO_IO_IN_DELAY_MAX; d++) { 2009 scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, d); 2010 2011 writel(0, SOCFPGA_SDR_ADDRESS + addr); 2012 2013 /* 2014 * Stop searching when the read test doesn't pass AND when 2015 * we've seen a passing read on every bit. 2016 */ 2017 if (use_read_test) { 2018 stop = !rw_mgr_mem_calibrate_read_test(rank_bgn, 2019 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT, 2020 &bit_chk, 0, 0); 2021 } else { 2022 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 2023 0, PASS_ONE_BIT, 2024 &bit_chk, 0); 2025 bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * 2026 (read_group - (write_group * 2027 RW_MGR_MEM_IF_READ_DQS_WIDTH / 2028 RW_MGR_MEM_IF_WRITE_DQS_WIDTH))); 2029 stop = (bit_chk == 0); 2030 } 2031 sticky_bit_chk = sticky_bit_chk | bit_chk; 2032 stop = stop && (sticky_bit_chk == param->read_correct_mask); 2033 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(left): dtap=%u => %u == %u \ 2034 && %u", __func__, __LINE__, d, 2035 sticky_bit_chk, 2036 param->read_correct_mask, stop); 2037 2038 if (stop == 1) { 2039 break; 2040 } else { 2041 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 2042 if (bit_chk & 1) { 2043 /* Remember a passing test as the 2044 left_edge */ 2045 left_edge[i] = d; 2046 } else { 2047 /* If a left edge has not been seen yet, 2048 then a future passing test will mark 2049 this edge as the right edge */ 2050 if (left_edge[i] == 2051 IO_IO_IN_DELAY_MAX + 1) { 2052 right_edge[i] = -(d + 1); 2053 } 2054 } 2055 bit_chk = bit_chk >> 1; 2056 } 2057 } 2058 } 2059 2060 /* Reset DQ delay chains to 0 */ 2061 scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, 0); 2062 sticky_bit_chk = 0; 2063 for (i = RW_MGR_MEM_DQ_PER_READ_DQS - 1;; i--) { 2064 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \ 2065 %d right_edge[%u]: %d\n", __func__, __LINE__, 2066 i, left_edge[i], i, right_edge[i]); 2067 2068 /* 2069 * Check for cases where we haven't found the left edge, 2070 * which makes our assignment of the the right edge invalid. 2071 * Reset it to the illegal value. 2072 */ 2073 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) && ( 2074 right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { 2075 right_edge[i] = IO_IO_IN_DELAY_MAX + 1; 2076 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: reset \ 2077 right_edge[%u]: %d\n", __func__, __LINE__, 2078 i, right_edge[i]); 2079 } 2080 2081 /* 2082 * Reset sticky bit (except for bits where we have seen 2083 * both the left and right edge). 2084 */ 2085 sticky_bit_chk = sticky_bit_chk << 1; 2086 if ((left_edge[i] != IO_IO_IN_DELAY_MAX + 1) && 2087 (right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { 2088 sticky_bit_chk = sticky_bit_chk | 1; 2089 } 2090 2091 if (i == 0) 2092 break; 2093 } 2094 2095 addr = (u32)&sdr_scc_mgr->update; 2096 /* Search for the right edge of the window for each bit */ 2097 for (d = 0; d <= IO_DQS_IN_DELAY_MAX - start_dqs; d++) { 2098 scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs); 2099 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2100 uint32_t delay = d + start_dqs_en; 2101 if (delay > IO_DQS_EN_DELAY_MAX) 2102 delay = IO_DQS_EN_DELAY_MAX; 2103 scc_mgr_set_dqs_en_delay(read_group, delay); 2104 } 2105 scc_mgr_load_dqs(read_group); 2106 2107 writel(0, SOCFPGA_SDR_ADDRESS + addr); 2108 2109 /* 2110 * Stop searching when the read test doesn't pass AND when 2111 * we've seen a passing read on every bit. 2112 */ 2113 if (use_read_test) { 2114 stop = !rw_mgr_mem_calibrate_read_test(rank_bgn, 2115 read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT, 2116 &bit_chk, 0, 0); 2117 } else { 2118 rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 2119 0, PASS_ONE_BIT, 2120 &bit_chk, 0); 2121 bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * 2122 (read_group - (write_group * 2123 RW_MGR_MEM_IF_READ_DQS_WIDTH / 2124 RW_MGR_MEM_IF_WRITE_DQS_WIDTH))); 2125 stop = (bit_chk == 0); 2126 } 2127 sticky_bit_chk = sticky_bit_chk | bit_chk; 2128 stop = stop && (sticky_bit_chk == param->read_correct_mask); 2129 2130 debug_cond(DLEVEL == 2, "%s:%d vfifo_center(right): dtap=%u => %u == \ 2131 %u && %u", __func__, __LINE__, d, 2132 sticky_bit_chk, param->read_correct_mask, stop); 2133 2134 if (stop == 1) { 2135 break; 2136 } else { 2137 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 2138 if (bit_chk & 1) { 2139 /* Remember a passing test as 2140 the right_edge */ 2141 right_edge[i] = d; 2142 } else { 2143 if (d != 0) { 2144 /* If a right edge has not been 2145 seen yet, then a future passing 2146 test will mark this edge as the 2147 left edge */ 2148 if (right_edge[i] == 2149 IO_IO_IN_DELAY_MAX + 1) { 2150 left_edge[i] = -(d + 1); 2151 } 2152 } else { 2153 /* d = 0 failed, but it passed 2154 when testing the left edge, 2155 so it must be marginal, 2156 set it to -1 */ 2157 if (right_edge[i] == 2158 IO_IO_IN_DELAY_MAX + 1 && 2159 left_edge[i] != 2160 IO_IO_IN_DELAY_MAX 2161 + 1) { 2162 right_edge[i] = -1; 2163 } 2164 /* If a right edge has not been 2165 seen yet, then a future passing 2166 test will mark this edge as the 2167 left edge */ 2168 else if (right_edge[i] == 2169 IO_IO_IN_DELAY_MAX + 2170 1) { 2171 left_edge[i] = -(d + 1); 2172 } 2173 } 2174 } 2175 2176 debug_cond(DLEVEL == 2, "%s:%d vfifo_center[r,\ 2177 d=%u]: ", __func__, __LINE__, d); 2178 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d ", 2179 (int)(bit_chk & 1), i, left_edge[i]); 2180 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 2181 right_edge[i]); 2182 bit_chk = bit_chk >> 1; 2183 } 2184 } 2185 } 2186 2187 /* Check that all bits have a window */ 2188 addr = (u32)&sdr_scc_mgr->update; 2189 for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 2190 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \ 2191 %d right_edge[%u]: %d", __func__, __LINE__, 2192 i, left_edge[i], i, right_edge[i]); 2193 if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) || (right_edge[i] 2194 == IO_IO_IN_DELAY_MAX + 1)) { 2195 /* 2196 * Restore delay chain settings before letting the loop 2197 * in rw_mgr_mem_calibrate_vfifo to retry different 2198 * dqs/ck relationships. 2199 */ 2200 scc_mgr_set_dqs_bus_in_delay(read_group, start_dqs); 2201 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2202 scc_mgr_set_dqs_en_delay(read_group, 2203 start_dqs_en); 2204 } 2205 scc_mgr_load_dqs(read_group); 2206 writel(0, SOCFPGA_SDR_ADDRESS + addr); 2207 2208 debug_cond(DLEVEL == 1, "%s:%d vfifo_center: failed to \ 2209 find edge [%u]: %d %d", __func__, __LINE__, 2210 i, left_edge[i], right_edge[i]); 2211 if (use_read_test) { 2212 set_failing_group_stage(read_group * 2213 RW_MGR_MEM_DQ_PER_READ_DQS + i, 2214 CAL_STAGE_VFIFO, 2215 CAL_SUBSTAGE_VFIFO_CENTER); 2216 } else { 2217 set_failing_group_stage(read_group * 2218 RW_MGR_MEM_DQ_PER_READ_DQS + i, 2219 CAL_STAGE_VFIFO_AFTER_WRITES, 2220 CAL_SUBSTAGE_VFIFO_CENTER); 2221 } 2222 return 0; 2223 } 2224 } 2225 2226 /* Find middle of window for each DQ bit */ 2227 mid_min = left_edge[0] - right_edge[0]; 2228 min_index = 0; 2229 for (i = 1; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { 2230 mid = left_edge[i] - right_edge[i]; 2231 if (mid < mid_min) { 2232 mid_min = mid; 2233 min_index = i; 2234 } 2235 } 2236 2237 /* 2238 * -mid_min/2 represents the amount that we need to move DQS. 2239 * If mid_min is odd and positive we'll need to add one to 2240 * make sure the rounding in further calculations is correct 2241 * (always bias to the right), so just add 1 for all positive values. 2242 */ 2243 if (mid_min > 0) 2244 mid_min++; 2245 2246 mid_min = mid_min / 2; 2247 2248 debug_cond(DLEVEL == 1, "%s:%d vfifo_center: mid_min=%d (index=%u)\n", 2249 __func__, __LINE__, mid_min, min_index); 2250 2251 /* Determine the amount we can change DQS (which is -mid_min) */ 2252 orig_mid_min = mid_min; 2253 new_dqs = start_dqs - mid_min; 2254 if (new_dqs > IO_DQS_IN_DELAY_MAX) 2255 new_dqs = IO_DQS_IN_DELAY_MAX; 2256 else if (new_dqs < 0) 2257 new_dqs = 0; 2258 2259 mid_min = start_dqs - new_dqs; 2260 debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n", 2261 mid_min, new_dqs); 2262 2263 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2264 if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX) 2265 mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX; 2266 else if (start_dqs_en - mid_min < 0) 2267 mid_min += start_dqs_en - mid_min; 2268 } 2269 new_dqs = start_dqs - mid_min; 2270 2271 debug_cond(DLEVEL == 1, "vfifo_center: start_dqs=%d start_dqs_en=%d \ 2272 new_dqs=%d mid_min=%d\n", start_dqs, 2273 IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1, 2274 new_dqs, mid_min); 2275 2276 /* Initialize data for export structures */ 2277 dqs_margin = IO_IO_IN_DELAY_MAX + 1; 2278 dq_margin = IO_IO_IN_DELAY_MAX + 1; 2279 2280 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET; 2281 /* add delay to bring centre of all DQ windows to the same "level" */ 2282 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { 2283 /* Use values before divide by 2 to reduce round off error */ 2284 shift_dq = (left_edge[i] - right_edge[i] - 2285 (left_edge[min_index] - right_edge[min_index]))/2 + 2286 (orig_mid_min - mid_min); 2287 2288 debug_cond(DLEVEL == 2, "vfifo_center: before: \ 2289 shift_dq[%u]=%d\n", i, shift_dq); 2290 2291 temp_dq_in_delay1 = readl(SOCFPGA_SDR_ADDRESS + addr + (p << 2)); 2292 temp_dq_in_delay2 = readl(SOCFPGA_SDR_ADDRESS + addr + (i << 2)); 2293 2294 if (shift_dq + (int32_t)temp_dq_in_delay1 > 2295 (int32_t)IO_IO_IN_DELAY_MAX) { 2296 shift_dq = (int32_t)IO_IO_IN_DELAY_MAX - temp_dq_in_delay2; 2297 } else if (shift_dq + (int32_t)temp_dq_in_delay1 < 0) { 2298 shift_dq = -(int32_t)temp_dq_in_delay1; 2299 } 2300 debug_cond(DLEVEL == 2, "vfifo_center: after: \ 2301 shift_dq[%u]=%d\n", i, shift_dq); 2302 final_dq[i] = temp_dq_in_delay1 + shift_dq; 2303 scc_mgr_set_dq_in_delay(write_group, p, final_dq[i]); 2304 scc_mgr_load_dq(p); 2305 2306 debug_cond(DLEVEL == 2, "vfifo_center: margin[%u]=[%d,%d]\n", i, 2307 left_edge[i] - shift_dq + (-mid_min), 2308 right_edge[i] + shift_dq - (-mid_min)); 2309 /* To determine values for export structures */ 2310 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) 2311 dq_margin = left_edge[i] - shift_dq + (-mid_min); 2312 2313 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin) 2314 dqs_margin = right_edge[i] + shift_dq - (-mid_min); 2315 } 2316 2317 final_dqs = new_dqs; 2318 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) 2319 final_dqs_en = start_dqs_en - mid_min; 2320 2321 /* Move DQS-en */ 2322 if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { 2323 scc_mgr_set_dqs_en_delay(read_group, final_dqs_en); 2324 scc_mgr_load_dqs(read_group); 2325 } 2326 2327 /* Move DQS */ 2328 scc_mgr_set_dqs_bus_in_delay(read_group, final_dqs); 2329 scc_mgr_load_dqs(read_group); 2330 debug_cond(DLEVEL == 2, "%s:%d vfifo_center: dq_margin=%d \ 2331 dqs_margin=%d", __func__, __LINE__, 2332 dq_margin, dqs_margin); 2333 2334 /* 2335 * Do not remove this line as it makes sure all of our decisions 2336 * have been applied. Apply the update bit. 2337 */ 2338 addr = (u32)&sdr_scc_mgr->update; 2339 writel(0, SOCFPGA_SDR_ADDRESS + addr); 2340 2341 return (dq_margin >= 0) && (dqs_margin >= 0); 2342 } 2343 2344 /* 2345 * calibrate the read valid prediction FIFO. 2346 * 2347 * - read valid prediction will consist of finding a good DQS enable phase, 2348 * DQS enable delay, DQS input phase, and DQS input delay. 2349 * - we also do a per-bit deskew on the DQ lines. 2350 */ 2351 static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group, 2352 uint32_t test_bgn) 2353 { 2354 uint32_t p, d, rank_bgn, sr; 2355 uint32_t dtaps_per_ptap; 2356 uint32_t tmp_delay; 2357 uint32_t bit_chk; 2358 uint32_t grp_calibrated; 2359 uint32_t write_group, write_test_bgn; 2360 uint32_t failed_substage; 2361 2362 debug("%s:%d: %u %u\n", __func__, __LINE__, read_group, test_bgn); 2363 2364 /* update info for sims */ 2365 reg_file_set_stage(CAL_STAGE_VFIFO); 2366 2367 write_group = read_group; 2368 write_test_bgn = test_bgn; 2369 2370 /* USER Determine number of delay taps for each phase tap */ 2371 dtaps_per_ptap = 0; 2372 tmp_delay = 0; 2373 while (tmp_delay < IO_DELAY_PER_OPA_TAP) { 2374 dtaps_per_ptap++; 2375 tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP; 2376 } 2377 dtaps_per_ptap--; 2378 tmp_delay = 0; 2379 2380 /* update info for sims */ 2381 reg_file_set_group(read_group); 2382 2383 grp_calibrated = 0; 2384 2385 reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ); 2386 failed_substage = CAL_SUBSTAGE_GUARANTEED_READ; 2387 2388 for (d = 0; d <= dtaps_per_ptap && grp_calibrated == 0; d += 2) { 2389 /* 2390 * In RLDRAMX we may be messing the delay of pins in 2391 * the same write group but outside of the current read 2392 * the group, but that's ok because we haven't 2393 * calibrated output side yet. 2394 */ 2395 if (d > 0) { 2396 scc_mgr_apply_group_all_out_delay_add_all_ranks 2397 (write_group, write_test_bgn, d); 2398 } 2399 2400 for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX && grp_calibrated == 0; 2401 p++) { 2402 /* set a particular dqdqs phase */ 2403 scc_mgr_set_dqdqs_output_phase_all_ranks(read_group, p); 2404 2405 debug_cond(DLEVEL == 1, "%s:%d calibrate_vfifo: g=%u \ 2406 p=%u d=%u\n", __func__, __LINE__, 2407 read_group, p, d); 2408 2409 /* 2410 * Load up the patterns used by read calibration 2411 * using current DQDQS phase. 2412 */ 2413 rw_mgr_mem_calibrate_read_load_patterns(0, 1); 2414 if (!(gbl->phy_debug_mode_flags & 2415 PHY_DEBUG_DISABLE_GUARANTEED_READ)) { 2416 if (!rw_mgr_mem_calibrate_read_test_patterns_all_ranks 2417 (read_group, 1, &bit_chk)) { 2418 debug_cond(DLEVEL == 1, "%s:%d Guaranteed read test failed:", 2419 __func__, __LINE__); 2420 debug_cond(DLEVEL == 1, " g=%u p=%u d=%u\n", 2421 read_group, p, d); 2422 break; 2423 } 2424 } 2425 2426 /* case:56390 */ 2427 grp_calibrated = 1; 2428 if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay 2429 (write_group, read_group, test_bgn)) { 2430 /* 2431 * USER Read per-bit deskew can be done on a 2432 * per shadow register basis. 2433 */ 2434 for (rank_bgn = 0, sr = 0; 2435 rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; 2436 rank_bgn += NUM_RANKS_PER_SHADOW_REG, 2437 ++sr) { 2438 /* 2439 * Determine if this set of ranks 2440 * should be skipped entirely. 2441 */ 2442 if (!param->skip_shadow_regs[sr]) { 2443 /* 2444 * If doing read after write 2445 * calibration, do not update 2446 * FOM, now - do it then. 2447 */ 2448 if (!rw_mgr_mem_calibrate_vfifo_center 2449 (rank_bgn, write_group, 2450 read_group, test_bgn, 1, 0)) { 2451 grp_calibrated = 0; 2452 failed_substage = 2453 CAL_SUBSTAGE_VFIFO_CENTER; 2454 } 2455 } 2456 } 2457 } else { 2458 grp_calibrated = 0; 2459 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE; 2460 } 2461 } 2462 } 2463 2464 if (grp_calibrated == 0) { 2465 set_failing_group_stage(write_group, CAL_STAGE_VFIFO, 2466 failed_substage); 2467 return 0; 2468 } 2469 2470 /* 2471 * Reset the delay chains back to zero if they have moved > 1 2472 * (check for > 1 because loop will increase d even when pass in 2473 * first case). 2474 */ 2475 if (d > 2) 2476 scc_mgr_zero_group(write_group, write_test_bgn, 1); 2477 2478 return 1; 2479 } 2480 2481 /* VFIFO Calibration -- Read Deskew Calibration after write deskew */ 2482 static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group, 2483 uint32_t test_bgn) 2484 { 2485 uint32_t rank_bgn, sr; 2486 uint32_t grp_calibrated; 2487 uint32_t write_group; 2488 2489 debug("%s:%d %u %u", __func__, __LINE__, read_group, test_bgn); 2490 2491 /* update info for sims */ 2492 2493 reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES); 2494 reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); 2495 2496 write_group = read_group; 2497 2498 /* update info for sims */ 2499 reg_file_set_group(read_group); 2500 2501 grp_calibrated = 1; 2502 /* Read per-bit deskew can be done on a per shadow register basis */ 2503 for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; 2504 rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) { 2505 /* Determine if this set of ranks should be skipped entirely */ 2506 if (!param->skip_shadow_regs[sr]) { 2507 /* This is the last calibration round, update FOM here */ 2508 if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn, 2509 write_group, 2510 read_group, 2511 test_bgn, 0, 2512 1)) { 2513 grp_calibrated = 0; 2514 } 2515 } 2516 } 2517 2518 2519 if (grp_calibrated == 0) { 2520 set_failing_group_stage(write_group, 2521 CAL_STAGE_VFIFO_AFTER_WRITES, 2522 CAL_SUBSTAGE_VFIFO_CENTER); 2523 return 0; 2524 } 2525 2526 return 1; 2527 } 2528 2529 /* Calibrate LFIFO to find smallest read latency */ 2530 static uint32_t rw_mgr_mem_calibrate_lfifo(void) 2531 { 2532 uint32_t found_one; 2533 uint32_t bit_chk; 2534 uint32_t addr; 2535 2536 debug("%s:%d\n", __func__, __LINE__); 2537 2538 /* update info for sims */ 2539 reg_file_set_stage(CAL_STAGE_LFIFO); 2540 reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY); 2541 2542 /* Load up the patterns used by read calibration for all ranks */ 2543 rw_mgr_mem_calibrate_read_load_patterns(0, 1); 2544 found_one = 0; 2545 2546 addr = (u32)&phy_mgr_cfg->phy_rlat; 2547 do { 2548 writel(gbl->curr_read_lat, SOCFPGA_SDR_ADDRESS + addr); 2549 debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u", 2550 __func__, __LINE__, gbl->curr_read_lat); 2551 2552 if (!rw_mgr_mem_calibrate_read_test_all_ranks(0, 2553 NUM_READ_TESTS, 2554 PASS_ALL_BITS, 2555 &bit_chk, 1)) { 2556 break; 2557 } 2558 2559 found_one = 1; 2560 /* reduce read latency and see if things are working */ 2561 /* correctly */ 2562 gbl->curr_read_lat--; 2563 } while (gbl->curr_read_lat > 0); 2564 2565 /* reset the fifos to get pointers to known state */ 2566 2567 addr = (u32)&phy_mgr_cmd->fifo_reset; 2568 writel(0, SOCFPGA_SDR_ADDRESS + addr); 2569 2570 if (found_one) { 2571 /* add a fudge factor to the read latency that was determined */ 2572 gbl->curr_read_lat += 2; 2573 addr = (u32)&phy_mgr_cfg->phy_rlat; 2574 writel(gbl->curr_read_lat, SOCFPGA_SDR_ADDRESS + addr); 2575 debug_cond(DLEVEL == 2, "%s:%d lfifo: success: using \ 2576 read_lat=%u\n", __func__, __LINE__, 2577 gbl->curr_read_lat); 2578 return 1; 2579 } else { 2580 set_failing_group_stage(0xff, CAL_STAGE_LFIFO, 2581 CAL_SUBSTAGE_READ_LATENCY); 2582 2583 debug_cond(DLEVEL == 2, "%s:%d lfifo: failed at initial \ 2584 read_lat=%u\n", __func__, __LINE__, 2585 gbl->curr_read_lat); 2586 return 0; 2587 } 2588 } 2589 2590 /* 2591 * issue write test command. 2592 * two variants are provided. one that just tests a write pattern and 2593 * another that tests datamask functionality. 2594 */ 2595 static void rw_mgr_mem_calibrate_write_test_issue(uint32_t group, 2596 uint32_t test_dm) 2597 { 2598 uint32_t mcc_instruction; 2599 uint32_t quick_write_mode = (((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES) && 2600 ENABLE_SUPER_QUICK_CALIBRATION); 2601 uint32_t rw_wl_nop_cycles; 2602 uint32_t addr; 2603 2604 /* 2605 * Set counter and jump addresses for the right 2606 * number of NOP cycles. 2607 * The number of supported NOP cycles can range from -1 to infinity 2608 * Three different cases are handled: 2609 * 2610 * 1. For a number of NOP cycles greater than 0, the RW Mgr looping 2611 * mechanism will be used to insert the right number of NOPs 2612 * 2613 * 2. For a number of NOP cycles equals to 0, the micro-instruction 2614 * issuing the write command will jump straight to the 2615 * micro-instruction that turns on DQS (for DDRx), or outputs write 2616 * data (for RLD), skipping 2617 * the NOP micro-instruction all together 2618 * 2619 * 3. A number of NOP cycles equal to -1 indicates that DQS must be 2620 * turned on in the same micro-instruction that issues the write 2621 * command. Then we need 2622 * to directly jump to the micro-instruction that sends out the data 2623 * 2624 * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters 2625 * (2 and 3). One jump-counter (0) is used to perform multiple 2626 * write-read operations. 2627 * one counter left to issue this command in "multiple-group" mode 2628 */ 2629 2630 rw_wl_nop_cycles = gbl->rw_wl_nop_cycles; 2631 2632 if (rw_wl_nop_cycles == -1) { 2633 /* 2634 * CNTR 2 - We want to execute the special write operation that 2635 * turns on DQS right away and then skip directly to the 2636 * instruction that sends out the data. We set the counter to a 2637 * large number so that the jump is always taken. 2638 */ 2639 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr2; 2640 writel(0xFF, SOCFPGA_SDR_ADDRESS + addr); 2641 2642 /* CNTR 3 - Not used */ 2643 if (test_dm) { 2644 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1; 2645 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add2; 2646 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA, 2647 SOCFPGA_SDR_ADDRESS + addr); 2648 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add3; 2649 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP, 2650 SOCFPGA_SDR_ADDRESS + addr); 2651 } else { 2652 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0_WL_1; 2653 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add2; 2654 writel(RW_MGR_LFSR_WR_RD_BANK_0_DATA, SOCFPGA_SDR_ADDRESS + addr); 2655 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add3; 2656 writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP, SOCFPGA_SDR_ADDRESS + addr); 2657 } 2658 } else if (rw_wl_nop_cycles == 0) { 2659 /* 2660 * CNTR 2 - We want to skip the NOP operation and go straight 2661 * to the DQS enable instruction. We set the counter to a large 2662 * number so that the jump is always taken. 2663 */ 2664 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr2; 2665 writel(0xFF, SOCFPGA_SDR_ADDRESS + addr); 2666 2667 /* CNTR 3 - Not used */ 2668 if (test_dm) { 2669 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0; 2670 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add2; 2671 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS, 2672 SOCFPGA_SDR_ADDRESS + addr); 2673 } else { 2674 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0; 2675 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add2; 2676 writel(RW_MGR_LFSR_WR_RD_BANK_0_DQS, SOCFPGA_SDR_ADDRESS + addr); 2677 } 2678 } else { 2679 /* 2680 * CNTR 2 - In this case we want to execute the next instruction 2681 * and NOT take the jump. So we set the counter to 0. The jump 2682 * address doesn't count. 2683 */ 2684 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr2; 2685 writel(0x0, SOCFPGA_SDR_ADDRESS + addr); 2686 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add2; 2687 writel(0x0, SOCFPGA_SDR_ADDRESS + addr); 2688 2689 /* 2690 * CNTR 3 - Set the nop counter to the number of cycles we 2691 * need to loop for, minus 1. 2692 */ 2693 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr3; 2694 writel(rw_wl_nop_cycles - 1, SOCFPGA_SDR_ADDRESS + addr); 2695 if (test_dm) { 2696 mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0; 2697 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add3; 2698 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP, SOCFPGA_SDR_ADDRESS + addr); 2699 } else { 2700 mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0; 2701 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add3; 2702 writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP, SOCFPGA_SDR_ADDRESS + addr); 2703 } 2704 } 2705 2706 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RESET_READ_DATAPATH_OFFSET; 2707 writel(0, SOCFPGA_SDR_ADDRESS + addr); 2708 2709 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr0; 2710 if (quick_write_mode) 2711 writel(0x08, SOCFPGA_SDR_ADDRESS + addr); 2712 else 2713 writel(0x40, SOCFPGA_SDR_ADDRESS + addr); 2714 2715 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add0; 2716 writel(mcc_instruction, SOCFPGA_SDR_ADDRESS + addr); 2717 2718 /* 2719 * CNTR 1 - This is used to ensure enough time elapses 2720 * for read data to come back. 2721 */ 2722 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr1; 2723 writel(0x30, SOCFPGA_SDR_ADDRESS + addr); 2724 2725 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add1; 2726 if (test_dm) { 2727 writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT, SOCFPGA_SDR_ADDRESS + addr); 2728 } else { 2729 writel(RW_MGR_LFSR_WR_RD_BANK_0_WAIT, SOCFPGA_SDR_ADDRESS + addr); 2730 } 2731 2732 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 2733 writel(mcc_instruction, SOCFPGA_SDR_ADDRESS + addr + (group << 2)); 2734 } 2735 2736 /* Test writes, can check for a single bit pass or multiple bit pass */ 2737 static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, 2738 uint32_t write_group, uint32_t use_dm, uint32_t all_correct, 2739 uint32_t *bit_chk, uint32_t all_ranks) 2740 { 2741 uint32_t addr; 2742 uint32_t r; 2743 uint32_t correct_mask_vg; 2744 uint32_t tmp_bit_chk; 2745 uint32_t vg; 2746 uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : 2747 (rank_bgn + NUM_RANKS_PER_SHADOW_REG); 2748 uint32_t addr_rw_mgr; 2749 uint32_t base_rw_mgr; 2750 2751 *bit_chk = param->write_correct_mask; 2752 correct_mask_vg = param->write_correct_mask_vg; 2753 2754 for (r = rank_bgn; r < rank_end; r++) { 2755 if (param->skip_ranks[r]) { 2756 /* request to skip the rank */ 2757 continue; 2758 } 2759 2760 /* set rank */ 2761 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); 2762 2763 tmp_bit_chk = 0; 2764 addr = (u32)&phy_mgr_cmd->fifo_reset; 2765 addr_rw_mgr = SDR_PHYGRP_RWMGRGRP_ADDRESS; 2766 for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS-1; ; vg--) { 2767 /* reset the fifos to get pointers to known state */ 2768 writel(0, SOCFPGA_SDR_ADDRESS + addr); 2769 2770 tmp_bit_chk = tmp_bit_chk << 2771 (RW_MGR_MEM_DQ_PER_WRITE_DQS / 2772 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS); 2773 rw_mgr_mem_calibrate_write_test_issue(write_group * 2774 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS+vg, 2775 use_dm); 2776 2777 base_rw_mgr = readl(SOCFPGA_SDR_ADDRESS + addr_rw_mgr); 2778 tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr)); 2779 if (vg == 0) 2780 break; 2781 } 2782 *bit_chk &= tmp_bit_chk; 2783 } 2784 2785 if (all_correct) { 2786 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 2787 debug_cond(DLEVEL == 2, "write_test(%u,%u,ALL) : %u == \ 2788 %u => %lu", write_group, use_dm, 2789 *bit_chk, param->write_correct_mask, 2790 (long unsigned int)(*bit_chk == 2791 param->write_correct_mask)); 2792 return *bit_chk == param->write_correct_mask; 2793 } else { 2794 set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); 2795 debug_cond(DLEVEL == 2, "write_test(%u,%u,ONE) : %u != ", 2796 write_group, use_dm, *bit_chk); 2797 debug_cond(DLEVEL == 2, "%lu" " => %lu", (long unsigned int)0, 2798 (long unsigned int)(*bit_chk != 0)); 2799 return *bit_chk != 0x00; 2800 } 2801 } 2802 2803 /* 2804 * center all windows. do per-bit-deskew to possibly increase size of 2805 * certain windows. 2806 */ 2807 static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn, 2808 uint32_t write_group, uint32_t test_bgn) 2809 { 2810 uint32_t i, p, min_index; 2811 int32_t d; 2812 /* 2813 * Store these as signed since there are comparisons with 2814 * signed numbers. 2815 */ 2816 uint32_t bit_chk; 2817 uint32_t sticky_bit_chk; 2818 int32_t left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS]; 2819 int32_t right_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS]; 2820 int32_t mid; 2821 int32_t mid_min, orig_mid_min; 2822 int32_t new_dqs, start_dqs, shift_dq; 2823 int32_t dq_margin, dqs_margin, dm_margin; 2824 uint32_t stop; 2825 uint32_t temp_dq_out1_delay; 2826 uint32_t addr; 2827 2828 debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn); 2829 2830 dm_margin = 0; 2831 2832 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; 2833 start_dqs = readl(SOCFPGA_SDR_ADDRESS + addr + 2834 (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2)); 2835 2836 /* per-bit deskew */ 2837 2838 /* 2839 * set the left and right edge of each bit to an illegal value 2840 * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value. 2841 */ 2842 sticky_bit_chk = 0; 2843 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2844 left_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2845 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2846 } 2847 2848 /* Search for the left edge of the window for each bit */ 2849 addr = (u32)&sdr_scc_mgr->update; 2850 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++) { 2851 scc_mgr_apply_group_dq_out1_delay(write_group, test_bgn, d); 2852 2853 writel(0, SOCFPGA_SDR_ADDRESS + addr); 2854 2855 /* 2856 * Stop searching when the read test doesn't pass AND when 2857 * we've seen a passing read on every bit. 2858 */ 2859 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 2860 0, PASS_ONE_BIT, &bit_chk, 0); 2861 sticky_bit_chk = sticky_bit_chk | bit_chk; 2862 stop = stop && (sticky_bit_chk == param->write_correct_mask); 2863 debug_cond(DLEVEL == 2, "write_center(left): dtap=%d => %u \ 2864 == %u && %u [bit_chk= %u ]\n", 2865 d, sticky_bit_chk, param->write_correct_mask, 2866 stop, bit_chk); 2867 2868 if (stop == 1) { 2869 break; 2870 } else { 2871 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2872 if (bit_chk & 1) { 2873 /* 2874 * Remember a passing test as the 2875 * left_edge. 2876 */ 2877 left_edge[i] = d; 2878 } else { 2879 /* 2880 * If a left edge has not been seen 2881 * yet, then a future passing test will 2882 * mark this edge as the right edge. 2883 */ 2884 if (left_edge[i] == 2885 IO_IO_OUT1_DELAY_MAX + 1) { 2886 right_edge[i] = -(d + 1); 2887 } 2888 } 2889 debug_cond(DLEVEL == 2, "write_center[l,d=%d):", d); 2890 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d", 2891 (int)(bit_chk & 1), i, left_edge[i]); 2892 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 2893 right_edge[i]); 2894 bit_chk = bit_chk >> 1; 2895 } 2896 } 2897 } 2898 2899 /* Reset DQ delay chains to 0 */ 2900 scc_mgr_apply_group_dq_out1_delay(write_group, test_bgn, 0); 2901 sticky_bit_chk = 0; 2902 for (i = RW_MGR_MEM_DQ_PER_WRITE_DQS - 1;; i--) { 2903 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \ 2904 %d right_edge[%u]: %d\n", __func__, __LINE__, 2905 i, left_edge[i], i, right_edge[i]); 2906 2907 /* 2908 * Check for cases where we haven't found the left edge, 2909 * which makes our assignment of the the right edge invalid. 2910 * Reset it to the illegal value. 2911 */ 2912 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) && 2913 (right_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) { 2914 right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; 2915 debug_cond(DLEVEL == 2, "%s:%d write_center: reset \ 2916 right_edge[%u]: %d\n", __func__, __LINE__, 2917 i, right_edge[i]); 2918 } 2919 2920 /* 2921 * Reset sticky bit (except for bits where we have 2922 * seen the left edge). 2923 */ 2924 sticky_bit_chk = sticky_bit_chk << 1; 2925 if ((left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) 2926 sticky_bit_chk = sticky_bit_chk | 1; 2927 2928 if (i == 0) 2929 break; 2930 } 2931 2932 /* Search for the right edge of the window for each bit */ 2933 addr = (u32)&sdr_scc_mgr->update; 2934 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - start_dqs; d++) { 2935 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, 2936 d + start_dqs); 2937 2938 writel(0, SOCFPGA_SDR_ADDRESS + addr); 2939 2940 /* 2941 * Stop searching when the read test doesn't pass AND when 2942 * we've seen a passing read on every bit. 2943 */ 2944 stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 2945 0, PASS_ONE_BIT, &bit_chk, 0); 2946 2947 sticky_bit_chk = sticky_bit_chk | bit_chk; 2948 stop = stop && (sticky_bit_chk == param->write_correct_mask); 2949 2950 debug_cond(DLEVEL == 2, "write_center (right): dtap=%u => %u == \ 2951 %u && %u\n", d, sticky_bit_chk, 2952 param->write_correct_mask, stop); 2953 2954 if (stop == 1) { 2955 if (d == 0) { 2956 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; 2957 i++) { 2958 /* d = 0 failed, but it passed when 2959 testing the left edge, so it must be 2960 marginal, set it to -1 */ 2961 if (right_edge[i] == 2962 IO_IO_OUT1_DELAY_MAX + 1 && 2963 left_edge[i] != 2964 IO_IO_OUT1_DELAY_MAX + 1) { 2965 right_edge[i] = -1; 2966 } 2967 } 2968 } 2969 break; 2970 } else { 2971 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 2972 if (bit_chk & 1) { 2973 /* 2974 * Remember a passing test as 2975 * the right_edge. 2976 */ 2977 right_edge[i] = d; 2978 } else { 2979 if (d != 0) { 2980 /* 2981 * If a right edge has not 2982 * been seen yet, then a future 2983 * passing test will mark this 2984 * edge as the left edge. 2985 */ 2986 if (right_edge[i] == 2987 IO_IO_OUT1_DELAY_MAX + 1) 2988 left_edge[i] = -(d + 1); 2989 } else { 2990 /* 2991 * d = 0 failed, but it passed 2992 * when testing the left edge, 2993 * so it must be marginal, set 2994 * it to -1. 2995 */ 2996 if (right_edge[i] == 2997 IO_IO_OUT1_DELAY_MAX + 1 && 2998 left_edge[i] != 2999 IO_IO_OUT1_DELAY_MAX + 1) 3000 right_edge[i] = -1; 3001 /* 3002 * If a right edge has not been 3003 * seen yet, then a future 3004 * passing test will mark this 3005 * edge as the left edge. 3006 */ 3007 else if (right_edge[i] == 3008 IO_IO_OUT1_DELAY_MAX + 3009 1) 3010 left_edge[i] = -(d + 1); 3011 } 3012 } 3013 debug_cond(DLEVEL == 2, "write_center[r,d=%d):", d); 3014 debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d", 3015 (int)(bit_chk & 1), i, left_edge[i]); 3016 debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i, 3017 right_edge[i]); 3018 bit_chk = bit_chk >> 1; 3019 } 3020 } 3021 } 3022 3023 /* Check that all bits have a window */ 3024 for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 3025 debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \ 3026 %d right_edge[%u]: %d", __func__, __LINE__, 3027 i, left_edge[i], i, right_edge[i]); 3028 if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) || 3029 (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1)) { 3030 set_failing_group_stage(test_bgn + i, 3031 CAL_STAGE_WRITES, 3032 CAL_SUBSTAGE_WRITES_CENTER); 3033 return 0; 3034 } 3035 } 3036 3037 /* Find middle of window for each DQ bit */ 3038 mid_min = left_edge[0] - right_edge[0]; 3039 min_index = 0; 3040 for (i = 1; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { 3041 mid = left_edge[i] - right_edge[i]; 3042 if (mid < mid_min) { 3043 mid_min = mid; 3044 min_index = i; 3045 } 3046 } 3047 3048 /* 3049 * -mid_min/2 represents the amount that we need to move DQS. 3050 * If mid_min is odd and positive we'll need to add one to 3051 * make sure the rounding in further calculations is correct 3052 * (always bias to the right), so just add 1 for all positive values. 3053 */ 3054 if (mid_min > 0) 3055 mid_min++; 3056 mid_min = mid_min / 2; 3057 debug_cond(DLEVEL == 1, "%s:%d write_center: mid_min=%d\n", __func__, 3058 __LINE__, mid_min); 3059 3060 /* Determine the amount we can change DQS (which is -mid_min) */ 3061 orig_mid_min = mid_min; 3062 new_dqs = start_dqs; 3063 mid_min = 0; 3064 debug_cond(DLEVEL == 1, "%s:%d write_center: start_dqs=%d new_dqs=%d \ 3065 mid_min=%d\n", __func__, __LINE__, start_dqs, new_dqs, mid_min); 3066 /* Initialize data for export structures */ 3067 dqs_margin = IO_IO_OUT1_DELAY_MAX + 1; 3068 dq_margin = IO_IO_OUT1_DELAY_MAX + 1; 3069 3070 /* add delay to bring centre of all DQ windows to the same "level" */ 3071 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET; 3072 for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { 3073 /* Use values before divide by 2 to reduce round off error */ 3074 shift_dq = (left_edge[i] - right_edge[i] - 3075 (left_edge[min_index] - right_edge[min_index]))/2 + 3076 (orig_mid_min - mid_min); 3077 3078 debug_cond(DLEVEL == 2, "%s:%d write_center: before: shift_dq \ 3079 [%u]=%d\n", __func__, __LINE__, i, shift_dq); 3080 3081 temp_dq_out1_delay = readl(SOCFPGA_SDR_ADDRESS + addr + (i << 2)); 3082 if (shift_dq + (int32_t)temp_dq_out1_delay > 3083 (int32_t)IO_IO_OUT1_DELAY_MAX) { 3084 shift_dq = (int32_t)IO_IO_OUT1_DELAY_MAX - temp_dq_out1_delay; 3085 } else if (shift_dq + (int32_t)temp_dq_out1_delay < 0) { 3086 shift_dq = -(int32_t)temp_dq_out1_delay; 3087 } 3088 debug_cond(DLEVEL == 2, "write_center: after: shift_dq[%u]=%d\n", 3089 i, shift_dq); 3090 scc_mgr_set_dq_out1_delay(write_group, i, temp_dq_out1_delay + 3091 shift_dq); 3092 scc_mgr_load_dq(i); 3093 3094 debug_cond(DLEVEL == 2, "write_center: margin[%u]=[%d,%d]\n", i, 3095 left_edge[i] - shift_dq + (-mid_min), 3096 right_edge[i] + shift_dq - (-mid_min)); 3097 /* To determine values for export structures */ 3098 if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) 3099 dq_margin = left_edge[i] - shift_dq + (-mid_min); 3100 3101 if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin) 3102 dqs_margin = right_edge[i] + shift_dq - (-mid_min); 3103 } 3104 3105 /* Move DQS */ 3106 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); 3107 addr = (u32)&sdr_scc_mgr->update; 3108 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3109 3110 /* Centre DM */ 3111 debug_cond(DLEVEL == 2, "%s:%d write_center: DM\n", __func__, __LINE__); 3112 3113 /* 3114 * set the left and right edge of each bit to an illegal value, 3115 * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value, 3116 */ 3117 left_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; 3118 right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; 3119 int32_t bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 3120 int32_t end_curr = IO_IO_OUT1_DELAY_MAX + 1; 3121 int32_t bgn_best = IO_IO_OUT1_DELAY_MAX + 1; 3122 int32_t end_best = IO_IO_OUT1_DELAY_MAX + 1; 3123 int32_t win_best = 0; 3124 3125 /* Search for the/part of the window with DM shift */ 3126 addr = (u32)&sdr_scc_mgr->update; 3127 for (d = IO_IO_OUT1_DELAY_MAX; d >= 0; d -= DELTA_D) { 3128 scc_mgr_apply_group_dm_out1_delay(write_group, d); 3129 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3130 3131 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, 3132 PASS_ALL_BITS, &bit_chk, 3133 0)) { 3134 /* USE Set current end of the window */ 3135 end_curr = -d; 3136 /* 3137 * If a starting edge of our window has not been seen 3138 * this is our current start of the DM window. 3139 */ 3140 if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) 3141 bgn_curr = -d; 3142 3143 /* 3144 * If current window is bigger than best seen. 3145 * Set best seen to be current window. 3146 */ 3147 if ((end_curr-bgn_curr+1) > win_best) { 3148 win_best = end_curr-bgn_curr+1; 3149 bgn_best = bgn_curr; 3150 end_best = end_curr; 3151 } 3152 } else { 3153 /* We just saw a failing test. Reset temp edge */ 3154 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 3155 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 3156 } 3157 } 3158 3159 3160 /* Reset DM delay chains to 0 */ 3161 scc_mgr_apply_group_dm_out1_delay(write_group, 0); 3162 3163 /* 3164 * Check to see if the current window nudges up aganist 0 delay. 3165 * If so we need to continue the search by shifting DQS otherwise DQS 3166 * search begins as a new search. */ 3167 if (end_curr != 0) { 3168 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 3169 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 3170 } 3171 3172 /* Search for the/part of the window with DQS shifts */ 3173 addr = (u32)&sdr_scc_mgr->update; 3174 for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - new_dqs; d += DELTA_D) { 3175 /* 3176 * Note: This only shifts DQS, so are we limiting ourselve to 3177 * width of DQ unnecessarily. 3178 */ 3179 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, 3180 d + new_dqs); 3181 3182 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3183 if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, 3184 PASS_ALL_BITS, &bit_chk, 3185 0)) { 3186 /* USE Set current end of the window */ 3187 end_curr = d; 3188 /* 3189 * If a beginning edge of our window has not been seen 3190 * this is our current begin of the DM window. 3191 */ 3192 if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) 3193 bgn_curr = d; 3194 3195 /* 3196 * If current window is bigger than best seen. Set best 3197 * seen to be current window. 3198 */ 3199 if ((end_curr-bgn_curr+1) > win_best) { 3200 win_best = end_curr-bgn_curr+1; 3201 bgn_best = bgn_curr; 3202 end_best = end_curr; 3203 } 3204 } else { 3205 /* We just saw a failing test. Reset temp edge */ 3206 bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; 3207 end_curr = IO_IO_OUT1_DELAY_MAX + 1; 3208 3209 /* Early exit optimization: if ther remaining delay 3210 chain space is less than already seen largest window 3211 we can exit */ 3212 if ((win_best-1) > 3213 (IO_IO_OUT1_DELAY_MAX - new_dqs - d)) { 3214 break; 3215 } 3216 } 3217 } 3218 3219 /* assign left and right edge for cal and reporting; */ 3220 left_edge[0] = -1*bgn_best; 3221 right_edge[0] = end_best; 3222 3223 debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d\n", __func__, 3224 __LINE__, left_edge[0], right_edge[0]); 3225 3226 /* Move DQS (back to orig) */ 3227 scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); 3228 3229 /* Move DM */ 3230 3231 /* Find middle of window for the DM bit */ 3232 mid = (left_edge[0] - right_edge[0]) / 2; 3233 3234 /* only move right, since we are not moving DQS/DQ */ 3235 if (mid < 0) 3236 mid = 0; 3237 3238 /* dm_marign should fail if we never find a window */ 3239 if (win_best == 0) 3240 dm_margin = -1; 3241 else 3242 dm_margin = left_edge[0] - mid; 3243 3244 scc_mgr_apply_group_dm_out1_delay(write_group, mid); 3245 addr = (u32)&sdr_scc_mgr->update; 3246 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3247 3248 debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d mid=%d \ 3249 dm_margin=%d\n", __func__, __LINE__, left_edge[0], 3250 right_edge[0], mid, dm_margin); 3251 /* Export values */ 3252 gbl->fom_out += dq_margin + dqs_margin; 3253 3254 debug_cond(DLEVEL == 2, "%s:%d write_center: dq_margin=%d \ 3255 dqs_margin=%d dm_margin=%d\n", __func__, __LINE__, 3256 dq_margin, dqs_margin, dm_margin); 3257 3258 /* 3259 * Do not remove this line as it makes sure all of our 3260 * decisions have been applied. 3261 */ 3262 addr = (u32)&sdr_scc_mgr->update; 3263 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3264 return (dq_margin >= 0) && (dqs_margin >= 0) && (dm_margin >= 0); 3265 } 3266 3267 /* calibrate the write operations */ 3268 static uint32_t rw_mgr_mem_calibrate_writes(uint32_t rank_bgn, uint32_t g, 3269 uint32_t test_bgn) 3270 { 3271 /* update info for sims */ 3272 debug("%s:%d %u %u\n", __func__, __LINE__, g, test_bgn); 3273 3274 reg_file_set_stage(CAL_STAGE_WRITES); 3275 reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER); 3276 3277 reg_file_set_group(g); 3278 3279 if (!rw_mgr_mem_calibrate_writes_center(rank_bgn, g, test_bgn)) { 3280 set_failing_group_stage(g, CAL_STAGE_WRITES, 3281 CAL_SUBSTAGE_WRITES_CENTER); 3282 return 0; 3283 } 3284 3285 return 1; 3286 } 3287 3288 /* precharge all banks and activate row 0 in bank "000..." and bank "111..." */ 3289 static void mem_precharge_and_activate(void) 3290 { 3291 uint32_t r; 3292 uint32_t addr; 3293 3294 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { 3295 if (param->skip_ranks[r]) { 3296 /* request to skip the rank */ 3297 continue; 3298 } 3299 3300 /* set rank */ 3301 set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); 3302 3303 /* precharge all banks ... */ 3304 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 3305 writel(RW_MGR_PRECHARGE_ALL, SOCFPGA_SDR_ADDRESS + addr); 3306 3307 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr0; 3308 writel(0x0F, SOCFPGA_SDR_ADDRESS + addr); 3309 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add0; 3310 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT1, SOCFPGA_SDR_ADDRESS + addr); 3311 3312 addr = (u32)&sdr_rw_load_mgr_regs->load_cntr1; 3313 writel(0x0F, SOCFPGA_SDR_ADDRESS + addr); 3314 addr = (u32)&sdr_rw_load_jump_mgr_regs->load_jump_add1; 3315 writel(RW_MGR_ACTIVATE_0_AND_1_WAIT2, SOCFPGA_SDR_ADDRESS + addr); 3316 3317 /* activate rows */ 3318 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET; 3319 writel(RW_MGR_ACTIVATE_0_AND_1, SOCFPGA_SDR_ADDRESS + addr); 3320 } 3321 } 3322 3323 /* Configure various memory related parameters. */ 3324 static void mem_config(void) 3325 { 3326 uint32_t rlat, wlat; 3327 uint32_t rw_wl_nop_cycles; 3328 uint32_t max_latency; 3329 uint32_t addr; 3330 3331 debug("%s:%d\n", __func__, __LINE__); 3332 /* read in write and read latency */ 3333 addr = (u32)&data_mgr->t_wl_add; 3334 wlat = readl(SOCFPGA_SDR_ADDRESS + addr); 3335 3336 addr = (u32)&data_mgr->mem_t_add; 3337 wlat += readl(SOCFPGA_SDR_ADDRESS + addr); 3338 /* WL for hard phy does not include additive latency */ 3339 3340 /* 3341 * add addtional write latency to offset the address/command extra 3342 * clock cycle. We change the AC mux setting causing AC to be delayed 3343 * by one mem clock cycle. Only do this for DDR3 3344 */ 3345 wlat = wlat + 1; 3346 3347 addr = (u32)&data_mgr->t_rl_add; 3348 rlat = readl(SOCFPGA_SDR_ADDRESS + addr); 3349 3350 rw_wl_nop_cycles = wlat - 2; 3351 gbl->rw_wl_nop_cycles = rw_wl_nop_cycles; 3352 3353 /* 3354 * For AV/CV, lfifo is hardened and always runs at full rate so 3355 * max latency in AFI clocks, used here, is correspondingly smaller. 3356 */ 3357 max_latency = (1<<MAX_LATENCY_COUNT_WIDTH)/1 - 1; 3358 /* configure for a burst length of 8 */ 3359 3360 /* write latency */ 3361 /* Adjust Write Latency for Hard PHY */ 3362 wlat = wlat + 1; 3363 3364 /* set a pretty high read latency initially */ 3365 gbl->curr_read_lat = rlat + 16; 3366 3367 if (gbl->curr_read_lat > max_latency) 3368 gbl->curr_read_lat = max_latency; 3369 3370 addr = (u32)&phy_mgr_cfg->phy_rlat; 3371 writel(gbl->curr_read_lat, SOCFPGA_SDR_ADDRESS + addr); 3372 3373 /* advertise write latency */ 3374 gbl->curr_write_lat = wlat; 3375 addr = (u32)&phy_mgr_cfg->afi_wlat; 3376 writel(wlat - 2, SOCFPGA_SDR_ADDRESS + addr); 3377 3378 /* initialize bit slips */ 3379 mem_precharge_and_activate(); 3380 } 3381 3382 /* Set VFIFO and LFIFO to instant-on settings in skip calibration mode */ 3383 static void mem_skip_calibrate(void) 3384 { 3385 uint32_t vfifo_offset; 3386 uint32_t i, j, r; 3387 uint32_t addr; 3388 3389 debug("%s:%d\n", __func__, __LINE__); 3390 /* Need to update every shadow register set used by the interface */ 3391 for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; 3392 r += NUM_RANKS_PER_SHADOW_REG) { 3393 /* 3394 * Set output phase alignment settings appropriate for 3395 * skip calibration. 3396 */ 3397 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3398 scc_mgr_set_dqs_en_phase(i, 0); 3399 #if IO_DLL_CHAIN_LENGTH == 6 3400 scc_mgr_set_dqdqs_output_phase(i, 6); 3401 #else 3402 scc_mgr_set_dqdqs_output_phase(i, 7); 3403 #endif 3404 /* 3405 * Case:33398 3406 * 3407 * Write data arrives to the I/O two cycles before write 3408 * latency is reached (720 deg). 3409 * -> due to bit-slip in a/c bus 3410 * -> to allow board skew where dqs is longer than ck 3411 * -> how often can this happen!? 3412 * -> can claim back some ptaps for high freq 3413 * support if we can relax this, but i digress... 3414 * 3415 * The write_clk leads mem_ck by 90 deg 3416 * The minimum ptap of the OPA is 180 deg 3417 * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay 3418 * The write_clk is always delayed by 2 ptaps 3419 * 3420 * Hence, to make DQS aligned to CK, we need to delay 3421 * DQS by: 3422 * (720 - 90 - 180 - 2 * (360 / IO_DLL_CHAIN_LENGTH)) 3423 * 3424 * Dividing the above by (360 / IO_DLL_CHAIN_LENGTH) 3425 * gives us the number of ptaps, which simplies to: 3426 * 3427 * (1.25 * IO_DLL_CHAIN_LENGTH - 2) 3428 */ 3429 scc_mgr_set_dqdqs_output_phase(i, (1.25 * 3430 IO_DLL_CHAIN_LENGTH - 2)); 3431 } 3432 addr = (u32)&sdr_scc_mgr->dqs_ena; 3433 writel(0xff, SOCFPGA_SDR_ADDRESS + addr); 3434 addr = (u32)&sdr_scc_mgr->dqs_io_ena; 3435 writel(0xff, SOCFPGA_SDR_ADDRESS + addr); 3436 3437 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_GROUP_COUNTER_OFFSET; 3438 for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { 3439 writel(i, SOCFPGA_SDR_ADDRESS + addr); 3440 } 3441 addr = (u32)&sdr_scc_mgr->dq_ena; 3442 writel(0xff, SOCFPGA_SDR_ADDRESS + addr); 3443 addr = (u32)&sdr_scc_mgr->dm_ena; 3444 writel(0xff, SOCFPGA_SDR_ADDRESS + addr); 3445 addr = (u32)&sdr_scc_mgr->update; 3446 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3447 } 3448 3449 /* Compensate for simulation model behaviour */ 3450 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3451 scc_mgr_set_dqs_bus_in_delay(i, 10); 3452 scc_mgr_load_dqs(i); 3453 } 3454 addr = (u32)&sdr_scc_mgr->update; 3455 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3456 3457 /* 3458 * ArriaV has hard FIFOs that can only be initialized by incrementing 3459 * in sequencer. 3460 */ 3461 vfifo_offset = CALIB_VFIFO_OFFSET; 3462 addr = (u32)&phy_mgr_cmd->inc_vfifo_hard_phy; 3463 for (j = 0; j < vfifo_offset; j++) { 3464 writel(0xff, SOCFPGA_SDR_ADDRESS + addr); 3465 } 3466 addr = (u32)&phy_mgr_cmd->fifo_reset; 3467 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3468 3469 /* 3470 * For ACV with hard lfifo, we get the skip-cal setting from 3471 * generation-time constant. 3472 */ 3473 gbl->curr_read_lat = CALIB_LFIFO_OFFSET; 3474 addr = (u32)&phy_mgr_cfg->phy_rlat; 3475 writel(gbl->curr_read_lat, SOCFPGA_SDR_ADDRESS + addr); 3476 } 3477 3478 /* Memory calibration entry point */ 3479 static uint32_t mem_calibrate(void) 3480 { 3481 uint32_t i; 3482 uint32_t rank_bgn, sr; 3483 uint32_t write_group, write_test_bgn; 3484 uint32_t read_group, read_test_bgn; 3485 uint32_t run_groups, current_run; 3486 uint32_t failing_groups = 0; 3487 uint32_t group_failed = 0; 3488 uint32_t sr_failed = 0; 3489 uint32_t addr; 3490 3491 debug("%s:%d\n", __func__, __LINE__); 3492 /* Initialize the data settings */ 3493 3494 gbl->error_substage = CAL_SUBSTAGE_NIL; 3495 gbl->error_stage = CAL_STAGE_NIL; 3496 gbl->error_group = 0xff; 3497 gbl->fom_in = 0; 3498 gbl->fom_out = 0; 3499 3500 mem_config(); 3501 3502 uint32_t bypass_mode = 0x1; 3503 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_GROUP_COUNTER_OFFSET; 3504 for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { 3505 writel(i, SOCFPGA_SDR_ADDRESS + addr); 3506 scc_set_bypass_mode(i, bypass_mode); 3507 } 3508 3509 if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) { 3510 /* 3511 * Set VFIFO and LFIFO to instant-on settings in skip 3512 * calibration mode. 3513 */ 3514 mem_skip_calibrate(); 3515 } else { 3516 for (i = 0; i < NUM_CALIB_REPEAT; i++) { 3517 /* 3518 * Zero all delay chain/phase settings for all 3519 * groups and all shadow register sets. 3520 */ 3521 scc_mgr_zero_all(); 3522 3523 run_groups = ~param->skip_groups; 3524 3525 for (write_group = 0, write_test_bgn = 0; write_group 3526 < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; write_group++, 3527 write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) { 3528 /* Initialized the group failure */ 3529 group_failed = 0; 3530 3531 current_run = run_groups & ((1 << 3532 RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1); 3533 run_groups = run_groups >> 3534 RW_MGR_NUM_DQS_PER_WRITE_GROUP; 3535 3536 if (current_run == 0) 3537 continue; 3538 3539 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_GROUP_COUNTER_OFFSET; 3540 writel(write_group, SOCFPGA_SDR_ADDRESS + addr); 3541 scc_mgr_zero_group(write_group, write_test_bgn, 3542 0); 3543 3544 for (read_group = write_group * 3545 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3546 RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3547 read_test_bgn = 0; 3548 read_group < (write_group + 1) * 3549 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3550 RW_MGR_MEM_IF_WRITE_DQS_WIDTH && 3551 group_failed == 0; 3552 read_group++, read_test_bgn += 3553 RW_MGR_MEM_DQ_PER_READ_DQS) { 3554 /* Calibrate the VFIFO */ 3555 if (!((STATIC_CALIB_STEPS) & 3556 CALIB_SKIP_VFIFO)) { 3557 if (!rw_mgr_mem_calibrate_vfifo 3558 (read_group, 3559 read_test_bgn)) { 3560 group_failed = 1; 3561 3562 if (!(gbl-> 3563 phy_debug_mode_flags & 3564 PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3565 return 0; 3566 } 3567 } 3568 } 3569 } 3570 3571 /* Calibrate the output side */ 3572 if (group_failed == 0) { 3573 for (rank_bgn = 0, sr = 0; rank_bgn 3574 < RW_MGR_MEM_NUMBER_OF_RANKS; 3575 rank_bgn += 3576 NUM_RANKS_PER_SHADOW_REG, 3577 ++sr) { 3578 sr_failed = 0; 3579 if (!((STATIC_CALIB_STEPS) & 3580 CALIB_SKIP_WRITES)) { 3581 if ((STATIC_CALIB_STEPS) 3582 & CALIB_SKIP_DELAY_SWEEPS) { 3583 /* not needed in quick mode! */ 3584 } else { 3585 /* 3586 * Determine if this set of 3587 * ranks should be skipped 3588 * entirely. 3589 */ 3590 if (!param->skip_shadow_regs[sr]) { 3591 if (!rw_mgr_mem_calibrate_writes 3592 (rank_bgn, write_group, 3593 write_test_bgn)) { 3594 sr_failed = 1; 3595 if (!(gbl-> 3596 phy_debug_mode_flags & 3597 PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3598 return 0; 3599 } 3600 } 3601 } 3602 } 3603 } 3604 if (sr_failed != 0) 3605 group_failed = 1; 3606 } 3607 } 3608 3609 if (group_failed == 0) { 3610 for (read_group = write_group * 3611 RW_MGR_MEM_IF_READ_DQS_WIDTH / 3612 RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3613 read_test_bgn = 0; 3614 read_group < (write_group + 1) 3615 * RW_MGR_MEM_IF_READ_DQS_WIDTH 3616 / RW_MGR_MEM_IF_WRITE_DQS_WIDTH && 3617 group_failed == 0; 3618 read_group++, read_test_bgn += 3619 RW_MGR_MEM_DQ_PER_READ_DQS) { 3620 if (!((STATIC_CALIB_STEPS) & 3621 CALIB_SKIP_WRITES)) { 3622 if (!rw_mgr_mem_calibrate_vfifo_end 3623 (read_group, read_test_bgn)) { 3624 group_failed = 1; 3625 3626 if (!(gbl->phy_debug_mode_flags 3627 & PHY_DEBUG_SWEEP_ALL_GROUPS)) { 3628 return 0; 3629 } 3630 } 3631 } 3632 } 3633 } 3634 3635 if (group_failed != 0) 3636 failing_groups++; 3637 } 3638 3639 /* 3640 * USER If there are any failing groups then report 3641 * the failure. 3642 */ 3643 if (failing_groups != 0) 3644 return 0; 3645 3646 /* Calibrate the LFIFO */ 3647 if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_LFIFO)) { 3648 /* 3649 * If we're skipping groups as part of debug, 3650 * don't calibrate LFIFO. 3651 */ 3652 if (param->skip_groups == 0) { 3653 if (!rw_mgr_mem_calibrate_lfifo()) 3654 return 0; 3655 } 3656 } 3657 } 3658 } 3659 3660 /* 3661 * Do not remove this line as it makes sure all of our decisions 3662 * have been applied. 3663 */ 3664 addr = (u32)&sdr_scc_mgr->update; 3665 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3666 return 1; 3667 } 3668 3669 static uint32_t run_mem_calibrate(void) 3670 { 3671 uint32_t pass; 3672 uint32_t debug_info; 3673 uint32_t addr; 3674 3675 debug("%s:%d\n", __func__, __LINE__); 3676 3677 /* Reset pass/fail status shown on afi_cal_success/fail */ 3678 addr = (u32)&phy_mgr_cfg->cal_status; 3679 writel(PHY_MGR_CAL_RESET, SOCFPGA_SDR_ADDRESS + addr); 3680 3681 addr = SDR_CTRLGRP_ADDRESS; 3682 /* stop tracking manger */ 3683 uint32_t ctrlcfg = readl(SOCFPGA_SDR_ADDRESS + addr); 3684 3685 addr = SDR_CTRLGRP_ADDRESS; 3686 writel(ctrlcfg & 0xFFBFFFFF, SOCFPGA_SDR_ADDRESS + addr); 3687 3688 initialize(); 3689 rw_mgr_mem_initialize(); 3690 3691 pass = mem_calibrate(); 3692 3693 mem_precharge_and_activate(); 3694 addr = (u32)&phy_mgr_cmd->fifo_reset; 3695 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3696 3697 /* 3698 * Handoff: 3699 * Don't return control of the PHY back to AFI when in debug mode. 3700 */ 3701 if ((gbl->phy_debug_mode_flags & PHY_DEBUG_IN_DEBUG_MODE) == 0) { 3702 rw_mgr_mem_handoff(); 3703 /* 3704 * In Hard PHY this is a 2-bit control: 3705 * 0: AFI Mux Select 3706 * 1: DDIO Mux Select 3707 */ 3708 addr = (u32)&phy_mgr_cfg->mux_sel; 3709 writel(0x2, SOCFPGA_SDR_ADDRESS + addr); 3710 } 3711 3712 addr = SDR_CTRLGRP_ADDRESS; 3713 writel(ctrlcfg, SOCFPGA_SDR_ADDRESS + addr); 3714 3715 if (pass) { 3716 printf("%s: CALIBRATION PASSED\n", __FILE__); 3717 3718 gbl->fom_in /= 2; 3719 gbl->fom_out /= 2; 3720 3721 if (gbl->fom_in > 0xff) 3722 gbl->fom_in = 0xff; 3723 3724 if (gbl->fom_out > 0xff) 3725 gbl->fom_out = 0xff; 3726 3727 /* Update the FOM in the register file */ 3728 debug_info = gbl->fom_in; 3729 debug_info |= gbl->fom_out << 8; 3730 addr = (u32)&sdr_reg_file->fom; 3731 writel(debug_info, SOCFPGA_SDR_ADDRESS + addr); 3732 3733 addr = (u32)&phy_mgr_cfg->cal_debug_info; 3734 writel(debug_info, SOCFPGA_SDR_ADDRESS + addr); 3735 addr = (u32)&phy_mgr_cfg->cal_status; 3736 writel(PHY_MGR_CAL_SUCCESS, SOCFPGA_SDR_ADDRESS + addr); 3737 } else { 3738 printf("%s: CALIBRATION FAILED\n", __FILE__); 3739 3740 debug_info = gbl->error_stage; 3741 debug_info |= gbl->error_substage << 8; 3742 debug_info |= gbl->error_group << 16; 3743 3744 addr = (u32)&sdr_reg_file->failing_stage; 3745 writel(debug_info, SOCFPGA_SDR_ADDRESS + addr); 3746 addr = (u32)&phy_mgr_cfg->cal_debug_info; 3747 writel(debug_info, SOCFPGA_SDR_ADDRESS + addr); 3748 addr = (u32)&phy_mgr_cfg->cal_status; 3749 writel(PHY_MGR_CAL_FAIL, SOCFPGA_SDR_ADDRESS + addr); 3750 3751 /* Update the failing group/stage in the register file */ 3752 debug_info = gbl->error_stage; 3753 debug_info |= gbl->error_substage << 8; 3754 debug_info |= gbl->error_group << 16; 3755 addr = (u32)&sdr_reg_file->failing_stage; 3756 writel(debug_info, SOCFPGA_SDR_ADDRESS + addr); 3757 } 3758 3759 return pass; 3760 } 3761 3762 static void hc_initialize_rom_data(void) 3763 { 3764 uint32_t i; 3765 uint32_t addr; 3766 3767 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET; 3768 for (i = 0; i < ARRAY_SIZE(inst_rom_init); i++) { 3769 uint32_t data = inst_rom_init[i]; 3770 writel(data, SOCFPGA_SDR_ADDRESS + addr + (i << 2)); 3771 } 3772 3773 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET; 3774 for (i = 0; i < ARRAY_SIZE(ac_rom_init); i++) { 3775 uint32_t data = ac_rom_init[i]; 3776 writel(data, SOCFPGA_SDR_ADDRESS + addr + (i << 2)); 3777 } 3778 } 3779 3780 static void initialize_reg_file(void) 3781 { 3782 uint32_t addr; 3783 3784 /* Initialize the register file with the correct data */ 3785 addr = (u32)&sdr_reg_file->signature; 3786 writel(REG_FILE_INIT_SEQ_SIGNATURE, SOCFPGA_SDR_ADDRESS + addr); 3787 3788 addr = (u32)&sdr_reg_file->debug_data_addr; 3789 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3790 3791 addr = (u32)&sdr_reg_file->cur_stage; 3792 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3793 3794 addr = (u32)&sdr_reg_file->fom; 3795 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3796 3797 addr = (u32)&sdr_reg_file->failing_stage; 3798 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3799 3800 addr = (u32)&sdr_reg_file->debug1; 3801 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3802 3803 addr = (u32)&sdr_reg_file->debug2; 3804 writel(0, SOCFPGA_SDR_ADDRESS + addr); 3805 } 3806 3807 static void initialize_hps_phy(void) 3808 { 3809 uint32_t reg; 3810 uint32_t addr; 3811 /* 3812 * Tracking also gets configured here because it's in the 3813 * same register. 3814 */ 3815 uint32_t trk_sample_count = 7500; 3816 uint32_t trk_long_idle_sample_count = (10 << 16) | 100; 3817 /* 3818 * Format is number of outer loops in the 16 MSB, sample 3819 * count in 16 LSB. 3820 */ 3821 3822 reg = 0; 3823 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2); 3824 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1); 3825 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1); 3826 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1); 3827 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0); 3828 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1); 3829 /* 3830 * This field selects the intrinsic latency to RDATA_EN/FULL path. 3831 * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles. 3832 */ 3833 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0); 3834 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET( 3835 trk_sample_count); 3836 addr = SDR_CTRLGRP_ADDRESS; 3837 writel(reg, SOCFPGA_SDR_ADDRESS + addr + SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_OFFSET); 3838 3839 reg = 0; 3840 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET( 3841 trk_sample_count >> 3842 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH); 3843 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET( 3844 trk_long_idle_sample_count); 3845 writel(reg, SOCFPGA_SDR_ADDRESS + addr + SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_OFFSET); 3846 3847 reg = 0; 3848 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET( 3849 trk_long_idle_sample_count >> 3850 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH); 3851 writel(reg, SOCFPGA_SDR_ADDRESS + addr + SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_OFFSET); 3852 } 3853 3854 static void initialize_tracking(void) 3855 { 3856 uint32_t concatenated_longidle = 0x0; 3857 uint32_t concatenated_delays = 0x0; 3858 uint32_t concatenated_rw_addr = 0x0; 3859 uint32_t concatenated_refresh = 0x0; 3860 uint32_t trk_sample_count = 7500; 3861 uint32_t dtaps_per_ptap; 3862 uint32_t tmp_delay; 3863 uint32_t addr; 3864 3865 /* 3866 * compute usable version of value in case we skip full 3867 * computation later 3868 */ 3869 dtaps_per_ptap = 0; 3870 tmp_delay = 0; 3871 while (tmp_delay < IO_DELAY_PER_OPA_TAP) { 3872 dtaps_per_ptap++; 3873 tmp_delay += IO_DELAY_PER_DCHAIN_TAP; 3874 } 3875 dtaps_per_ptap--; 3876 3877 concatenated_longidle = concatenated_longidle ^ 10; 3878 /*longidle outer loop */ 3879 concatenated_longidle = concatenated_longidle << 16; 3880 concatenated_longidle = concatenated_longidle ^ 100; 3881 /*longidle sample count */ 3882 concatenated_delays = concatenated_delays ^ 243; 3883 /* trfc, worst case of 933Mhz 4Gb */ 3884 concatenated_delays = concatenated_delays << 8; 3885 concatenated_delays = concatenated_delays ^ 14; 3886 /* trcd, worst case */ 3887 concatenated_delays = concatenated_delays << 8; 3888 concatenated_delays = concatenated_delays ^ 10; 3889 /* vfifo wait */ 3890 concatenated_delays = concatenated_delays << 8; 3891 concatenated_delays = concatenated_delays ^ 4; 3892 /* mux delay */ 3893 3894 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_IDLE; 3895 concatenated_rw_addr = concatenated_rw_addr << 8; 3896 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_ACTIVATE_1; 3897 concatenated_rw_addr = concatenated_rw_addr << 8; 3898 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_SGLE_READ; 3899 concatenated_rw_addr = concatenated_rw_addr << 8; 3900 concatenated_rw_addr = concatenated_rw_addr ^ RW_MGR_PRECHARGE_ALL; 3901 3902 concatenated_refresh = concatenated_refresh ^ RW_MGR_REFRESH_ALL; 3903 concatenated_refresh = concatenated_refresh << 24; 3904 concatenated_refresh = concatenated_refresh ^ 1000; /* trefi */ 3905 3906 /* Initialize the register file with the correct data */ 3907 addr = (u32)&sdr_reg_file->dtaps_per_ptap; 3908 writel(dtaps_per_ptap, SOCFPGA_SDR_ADDRESS + addr); 3909 3910 addr = (u32)&sdr_reg_file->trk_sample_count; 3911 writel(trk_sample_count, SOCFPGA_SDR_ADDRESS + addr); 3912 3913 addr = (u32)&sdr_reg_file->trk_longidle; 3914 writel(concatenated_longidle, SOCFPGA_SDR_ADDRESS + addr); 3915 3916 addr = (u32)&sdr_reg_file->delays; 3917 writel(concatenated_delays, SOCFPGA_SDR_ADDRESS + addr); 3918 3919 addr = (u32)&sdr_reg_file->trk_rw_mgr_addr; 3920 writel(concatenated_rw_addr, SOCFPGA_SDR_ADDRESS + addr); 3921 3922 addr = (u32)&sdr_reg_file->trk_read_dqs_width; 3923 writel(RW_MGR_MEM_IF_READ_DQS_WIDTH, SOCFPGA_SDR_ADDRESS + addr); 3924 3925 addr = (u32)&sdr_reg_file->trk_rfsh; 3926 writel(concatenated_refresh, SOCFPGA_SDR_ADDRESS + addr); 3927 } 3928 3929 int sdram_calibration_full(void) 3930 { 3931 struct param_type my_param; 3932 struct gbl_type my_gbl; 3933 uint32_t pass; 3934 uint32_t i; 3935 3936 param = &my_param; 3937 gbl = &my_gbl; 3938 3939 /* Initialize the debug mode flags */ 3940 gbl->phy_debug_mode_flags = 0; 3941 /* Set the calibration enabled by default */ 3942 gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT; 3943 /* 3944 * Only sweep all groups (regardless of fail state) by default 3945 * Set enabled read test by default. 3946 */ 3947 #if DISABLE_GUARANTEED_READ 3948 gbl->phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ; 3949 #endif 3950 /* Initialize the register file */ 3951 initialize_reg_file(); 3952 3953 /* Initialize any PHY CSR */ 3954 initialize_hps_phy(); 3955 3956 scc_mgr_initialize(); 3957 3958 initialize_tracking(); 3959 3960 /* USER Enable all ranks, groups */ 3961 for (i = 0; i < RW_MGR_MEM_NUMBER_OF_RANKS; i++) 3962 param->skip_ranks[i] = 0; 3963 for (i = 0; i < NUM_SHADOW_REGS; ++i) 3964 param->skip_shadow_regs[i] = 0; 3965 param->skip_groups = 0; 3966 3967 printf("%s: Preparing to start memory calibration\n", __FILE__); 3968 3969 debug("%s:%d\n", __func__, __LINE__); 3970 debug_cond(DLEVEL == 1, 3971 "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ", 3972 RW_MGR_MEM_NUMBER_OF_RANKS, RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM, 3973 RW_MGR_MEM_DQ_PER_READ_DQS, RW_MGR_MEM_DQ_PER_WRITE_DQS, 3974 RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS, 3975 RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS); 3976 debug_cond(DLEVEL == 1, 3977 "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ", 3978 RW_MGR_MEM_IF_READ_DQS_WIDTH, RW_MGR_MEM_IF_WRITE_DQS_WIDTH, 3979 RW_MGR_MEM_DATA_WIDTH, RW_MGR_MEM_DATA_MASK_WIDTH, 3980 IO_DELAY_PER_OPA_TAP, IO_DELAY_PER_DCHAIN_TAP); 3981 debug_cond(DLEVEL == 1, "dtap_dqsen_delay=%u, dll=%u", 3982 IO_DELAY_PER_DQS_EN_DCHAIN_TAP, IO_DLL_CHAIN_LENGTH); 3983 debug_cond(DLEVEL == 1, "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ", 3984 IO_DQS_EN_PHASE_MAX, IO_DQDQS_OUT_PHASE_MAX, 3985 IO_DQS_EN_DELAY_MAX, IO_DQS_IN_DELAY_MAX); 3986 debug_cond(DLEVEL == 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ", 3987 IO_IO_IN_DELAY_MAX, IO_IO_OUT1_DELAY_MAX, 3988 IO_IO_OUT2_DELAY_MAX); 3989 debug_cond(DLEVEL == 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n", 3990 IO_DQS_IN_RESERVE, IO_DQS_OUT_RESERVE); 3991 3992 hc_initialize_rom_data(); 3993 3994 /* update info for sims */ 3995 reg_file_set_stage(CAL_STAGE_NIL); 3996 reg_file_set_group(0); 3997 3998 /* 3999 * Load global needed for those actions that require 4000 * some dynamic calibration support. 4001 */ 4002 dyn_calib_steps = STATIC_CALIB_STEPS; 4003 /* 4004 * Load global to allow dynamic selection of delay loop settings 4005 * based on calibration mode. 4006 */ 4007 if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS)) 4008 skip_delay_mask = 0xff; 4009 else 4010 skip_delay_mask = 0x0; 4011 4012 pass = run_mem_calibrate(); 4013 4014 printf("%s: Calibration complete\n", __FILE__); 4015 return pass; 4016 } 4017