1 /* 2 * Copyright 2008-2014 Freescale Semiconductor, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * Version 2 as published by the Free Software Foundation. 7 */ 8 9 #include <common.h> 10 #include <fsl_ddr_sdram.h> 11 12 #include <fsl_ddr.h> 13 14 #if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4) 15 static unsigned int 16 compute_cas_latency(const unsigned int ctrl_num, 17 const dimm_params_t *dimm_params, 18 common_timing_params_t *outpdimm, 19 unsigned int number_of_dimms) 20 { 21 unsigned int i; 22 unsigned int common_caslat; 23 unsigned int caslat_actual; 24 unsigned int retry = 16; 25 unsigned int tmp; 26 const unsigned int mclk_ps = get_memory_clk_period_ps(ctrl_num); 27 #ifdef CONFIG_SYS_FSL_DDR3 28 const unsigned int taamax = 20000; 29 #else 30 const unsigned int taamax = 18000; 31 #endif 32 33 /* compute the common CAS latency supported between slots */ 34 tmp = dimm_params[0].caslat_x; 35 for (i = 1; i < number_of_dimms; i++) { 36 if (dimm_params[i].n_ranks) 37 tmp &= dimm_params[i].caslat_x; 38 } 39 common_caslat = tmp; 40 41 /* validate if the memory clk is in the range of dimms */ 42 if (mclk_ps < outpdimm->tckmin_x_ps) { 43 printf("DDR clock (MCLK cycle %u ps) is faster than " 44 "the slowest DIMM(s) (tCKmin %u ps) can support.\n", 45 mclk_ps, outpdimm->tckmin_x_ps); 46 } 47 #ifdef CONFIG_SYS_FSL_DDR4 48 if (mclk_ps > outpdimm->tckmax_ps) { 49 printf("DDR clock (MCLK cycle %u ps) is slower than DIMM(s) (tCKmax %u ps) can support.\n", 50 mclk_ps, outpdimm->tckmax_ps); 51 } 52 #endif 53 /* determine the acutal cas latency */ 54 caslat_actual = (outpdimm->taamin_ps + mclk_ps - 1) / mclk_ps; 55 /* check if the dimms support the CAS latency */ 56 while (!(common_caslat & (1 << caslat_actual)) && retry > 0) { 57 caslat_actual++; 58 retry--; 59 } 60 /* once the caculation of caslat_actual is completed 61 * we must verify that this CAS latency value does not 62 * exceed tAAmax, which is 20 ns for all DDR3 speed grades, 63 * 18ns for all DDR4 speed grades. 64 */ 65 if (caslat_actual * mclk_ps > taamax) { 66 printf("The choosen cas latency %d is too large\n", 67 caslat_actual); 68 } 69 outpdimm->lowest_common_spd_caslat = caslat_actual; 70 debug("lowest_common_spd_caslat is 0x%x\n", caslat_actual); 71 72 return 0; 73 } 74 #else /* for DDR1 and DDR2 */ 75 static unsigned int 76 compute_cas_latency(const unsigned int ctrl_num, 77 const dimm_params_t *dimm_params, 78 common_timing_params_t *outpdimm, 79 unsigned int number_of_dimms) 80 { 81 int i; 82 const unsigned int mclk_ps = get_memory_clk_period_ps(ctrl_num); 83 unsigned int lowest_good_caslat; 84 unsigned int not_ok; 85 unsigned int temp1, temp2; 86 87 debug("using mclk_ps = %u\n", mclk_ps); 88 if (mclk_ps > outpdimm->tckmax_ps) { 89 printf("Warning: DDR clock (%u ps) is slower than DIMM(s) (tCKmax %u ps)\n", 90 mclk_ps, outpdimm->tckmax_ps); 91 } 92 93 /* 94 * Compute a CAS latency suitable for all DIMMs 95 * 96 * Strategy for SPD-defined latencies: compute only 97 * CAS latency defined by all DIMMs. 98 */ 99 100 /* 101 * Step 1: find CAS latency common to all DIMMs using bitwise 102 * operation. 103 */ 104 temp1 = 0xFF; 105 for (i = 0; i < number_of_dimms; i++) { 106 if (dimm_params[i].n_ranks) { 107 temp2 = 0; 108 temp2 |= 1 << dimm_params[i].caslat_x; 109 temp2 |= 1 << dimm_params[i].caslat_x_minus_1; 110 temp2 |= 1 << dimm_params[i].caslat_x_minus_2; 111 /* 112 * If there was no entry for X-2 (X-1) in 113 * the SPD, then caslat_x_minus_2 114 * (caslat_x_minus_1) contains either 255 or 115 * 0xFFFFFFFF because that's what the glorious 116 * __ilog2 function returns for an input of 0. 117 * On 32-bit PowerPC, left shift counts with bit 118 * 26 set (that the value of 255 or 0xFFFFFFFF 119 * will have), cause the destination register to 120 * be 0. That is why this works. 121 */ 122 temp1 &= temp2; 123 } 124 } 125 126 /* 127 * Step 2: check each common CAS latency against tCK of each 128 * DIMM's SPD. 129 */ 130 lowest_good_caslat = 0; 131 temp2 = 0; 132 while (temp1) { 133 not_ok = 0; 134 temp2 = __ilog2(temp1); 135 debug("checking common caslat = %u\n", temp2); 136 137 /* Check if this CAS latency will work on all DIMMs at tCK. */ 138 for (i = 0; i < number_of_dimms; i++) { 139 if (!dimm_params[i].n_ranks) 140 continue; 141 142 if (dimm_params[i].caslat_x == temp2) { 143 if (mclk_ps >= dimm_params[i].tckmin_x_ps) { 144 debug("CL = %u ok on DIMM %u at tCK=%u ps with tCKmin_X_ps of %u\n", 145 temp2, i, mclk_ps, 146 dimm_params[i].tckmin_x_ps); 147 continue; 148 } else { 149 not_ok++; 150 } 151 } 152 153 if (dimm_params[i].caslat_x_minus_1 == temp2) { 154 unsigned int tckmin_x_minus_1_ps 155 = dimm_params[i].tckmin_x_minus_1_ps; 156 if (mclk_ps >= tckmin_x_minus_1_ps) { 157 debug("CL = %u ok on DIMM %u at tCK=%u ps with tckmin_x_minus_1_ps of %u\n", 158 temp2, i, mclk_ps, 159 tckmin_x_minus_1_ps); 160 continue; 161 } else { 162 not_ok++; 163 } 164 } 165 166 if (dimm_params[i].caslat_x_minus_2 == temp2) { 167 unsigned int tckmin_x_minus_2_ps 168 = dimm_params[i].tckmin_x_minus_2_ps; 169 if (mclk_ps >= tckmin_x_minus_2_ps) { 170 debug("CL = %u ok on DIMM %u at tCK=%u ps with tckmin_x_minus_2_ps of %u\n", 171 temp2, i, mclk_ps, 172 tckmin_x_minus_2_ps); 173 continue; 174 } else { 175 not_ok++; 176 } 177 } 178 } 179 180 if (!not_ok) 181 lowest_good_caslat = temp2; 182 183 temp1 &= ~(1 << temp2); 184 } 185 186 debug("lowest common SPD-defined CAS latency = %u\n", 187 lowest_good_caslat); 188 outpdimm->lowest_common_spd_caslat = lowest_good_caslat; 189 190 191 /* 192 * Compute a common 'de-rated' CAS latency. 193 * 194 * The strategy here is to find the *highest* dereated cas latency 195 * with the assumption that all of the DIMMs will support a dereated 196 * CAS latency higher than or equal to their lowest dereated value. 197 */ 198 temp1 = 0; 199 for (i = 0; i < number_of_dimms; i++) 200 temp1 = max(temp1, dimm_params[i].caslat_lowest_derated); 201 202 outpdimm->highest_common_derated_caslat = temp1; 203 debug("highest common dereated CAS latency = %u\n", temp1); 204 205 return 0; 206 } 207 #endif 208 209 /* 210 * compute_lowest_common_dimm_parameters() 211 * 212 * Determine the worst-case DIMM timing parameters from the set of DIMMs 213 * whose parameters have been computed into the array pointed to 214 * by dimm_params. 215 */ 216 unsigned int 217 compute_lowest_common_dimm_parameters(const unsigned int ctrl_num, 218 const dimm_params_t *dimm_params, 219 common_timing_params_t *outpdimm, 220 const unsigned int number_of_dimms) 221 { 222 unsigned int i, j; 223 224 unsigned int tckmin_x_ps = 0; 225 unsigned int tckmax_ps = 0xFFFFFFFF; 226 unsigned int trcd_ps = 0; 227 unsigned int trp_ps = 0; 228 unsigned int tras_ps = 0; 229 #if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4) 230 unsigned int taamin_ps = 0; 231 #endif 232 #ifdef CONFIG_SYS_FSL_DDR4 233 unsigned int twr_ps = 15000; 234 unsigned int trfc1_ps = 0; 235 unsigned int trfc2_ps = 0; 236 unsigned int trfc4_ps = 0; 237 unsigned int trrds_ps = 0; 238 unsigned int trrdl_ps = 0; 239 unsigned int tccdl_ps = 0; 240 #else 241 unsigned int twr_ps = 0; 242 unsigned int twtr_ps = 0; 243 unsigned int trfc_ps = 0; 244 unsigned int trrd_ps = 0; 245 unsigned int trtp_ps = 0; 246 #endif 247 unsigned int trc_ps = 0; 248 unsigned int refresh_rate_ps = 0; 249 unsigned int extended_op_srt = 1; 250 #if defined(CONFIG_SYS_FSL_DDR1) || defined(CONFIG_SYS_FSL_DDR2) 251 unsigned int tis_ps = 0; 252 unsigned int tih_ps = 0; 253 unsigned int tds_ps = 0; 254 unsigned int tdh_ps = 0; 255 unsigned int tdqsq_max_ps = 0; 256 unsigned int tqhs_ps = 0; 257 #endif 258 unsigned int temp1, temp2; 259 unsigned int additive_latency = 0; 260 261 temp1 = 0; 262 for (i = 0; i < number_of_dimms; i++) { 263 /* 264 * If there are no ranks on this DIMM, 265 * it probably doesn't exist, so skip it. 266 */ 267 if (dimm_params[i].n_ranks == 0) { 268 temp1++; 269 continue; 270 } 271 if (dimm_params[i].n_ranks == 4 && i != 0) { 272 printf("Found Quad-rank DIMM in wrong bank, ignored." 273 " Software may not run as expected.\n"); 274 temp1++; 275 continue; 276 } 277 278 /* 279 * check if quad-rank DIMM is plugged if 280 * CONFIG_CHIP_SELECT_QUAD_CAPABLE is not defined 281 * Only the board with proper design is capable 282 */ 283 #ifndef CONFIG_FSL_DDR_FIRST_SLOT_QUAD_CAPABLE 284 if (dimm_params[i].n_ranks == 4 && \ 285 CONFIG_CHIP_SELECTS_PER_CTRL/CONFIG_DIMM_SLOTS_PER_CTLR < 4) { 286 printf("Found Quad-rank DIMM, not able to support."); 287 temp1++; 288 continue; 289 } 290 #endif 291 /* 292 * Find minimum tckmax_ps to find fastest slow speed, 293 * i.e., this is the slowest the whole system can go. 294 */ 295 tckmax_ps = min(tckmax_ps, 296 (unsigned int)dimm_params[i].tckmax_ps); 297 #if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4) 298 taamin_ps = max(taamin_ps, 299 (unsigned int)dimm_params[i].taa_ps); 300 #endif 301 tckmin_x_ps = max(tckmin_x_ps, 302 (unsigned int)dimm_params[i].tckmin_x_ps); 303 trcd_ps = max(trcd_ps, (unsigned int)dimm_params[i].trcd_ps); 304 trp_ps = max(trp_ps, (unsigned int)dimm_params[i].trp_ps); 305 tras_ps = max(tras_ps, (unsigned int)dimm_params[i].tras_ps); 306 #ifdef CONFIG_SYS_FSL_DDR4 307 trfc1_ps = max(trfc1_ps, 308 (unsigned int)dimm_params[i].trfc1_ps); 309 trfc2_ps = max(trfc2_ps, 310 (unsigned int)dimm_params[i].trfc2_ps); 311 trfc4_ps = max(trfc4_ps, 312 (unsigned int)dimm_params[i].trfc4_ps); 313 trrds_ps = max(trrds_ps, 314 (unsigned int)dimm_params[i].trrds_ps); 315 trrdl_ps = max(trrdl_ps, 316 (unsigned int)dimm_params[i].trrdl_ps); 317 tccdl_ps = max(tccdl_ps, 318 (unsigned int)dimm_params[i].tccdl_ps); 319 #else 320 twr_ps = max(twr_ps, (unsigned int)dimm_params[i].twr_ps); 321 twtr_ps = max(twtr_ps, (unsigned int)dimm_params[i].twtr_ps); 322 trfc_ps = max(trfc_ps, (unsigned int)dimm_params[i].trfc_ps); 323 trrd_ps = max(trrd_ps, (unsigned int)dimm_params[i].trrd_ps); 324 trtp_ps = max(trtp_ps, (unsigned int)dimm_params[i].trtp_ps); 325 #endif 326 trc_ps = max(trc_ps, (unsigned int)dimm_params[i].trc_ps); 327 #if defined(CONFIG_SYS_FSL_DDR1) || defined(CONFIG_SYS_FSL_DDR2) 328 tis_ps = max(tis_ps, (unsigned int)dimm_params[i].tis_ps); 329 tih_ps = max(tih_ps, (unsigned int)dimm_params[i].tih_ps); 330 tds_ps = max(tds_ps, (unsigned int)dimm_params[i].tds_ps); 331 tdh_ps = max(tdh_ps, (unsigned int)dimm_params[i].tdh_ps); 332 tqhs_ps = max(tqhs_ps, (unsigned int)dimm_params[i].tqhs_ps); 333 /* 334 * Find maximum tdqsq_max_ps to find slowest. 335 * 336 * FIXME: is finding the slowest value the correct 337 * strategy for this parameter? 338 */ 339 tdqsq_max_ps = max(tdqsq_max_ps, 340 (unsigned int)dimm_params[i].tdqsq_max_ps); 341 #endif 342 refresh_rate_ps = max(refresh_rate_ps, 343 (unsigned int)dimm_params[i].refresh_rate_ps); 344 /* extended_op_srt is either 0 or 1, 0 having priority */ 345 extended_op_srt = min(extended_op_srt, 346 (unsigned int)dimm_params[i].extended_op_srt); 347 } 348 349 outpdimm->ndimms_present = number_of_dimms - temp1; 350 351 if (temp1 == number_of_dimms) { 352 debug("no dimms this memory controller\n"); 353 return 0; 354 } 355 356 outpdimm->tckmin_x_ps = tckmin_x_ps; 357 outpdimm->tckmax_ps = tckmax_ps; 358 #if defined(CONFIG_SYS_FSL_DDR3) || defined(CONFIG_SYS_FSL_DDR4) 359 outpdimm->taamin_ps = taamin_ps; 360 #endif 361 outpdimm->trcd_ps = trcd_ps; 362 outpdimm->trp_ps = trp_ps; 363 outpdimm->tras_ps = tras_ps; 364 #ifdef CONFIG_SYS_FSL_DDR4 365 outpdimm->trfc1_ps = trfc1_ps; 366 outpdimm->trfc2_ps = trfc2_ps; 367 outpdimm->trfc4_ps = trfc4_ps; 368 outpdimm->trrds_ps = trrds_ps; 369 outpdimm->trrdl_ps = trrdl_ps; 370 outpdimm->tccdl_ps = tccdl_ps; 371 #else 372 outpdimm->twtr_ps = twtr_ps; 373 outpdimm->trfc_ps = trfc_ps; 374 outpdimm->trrd_ps = trrd_ps; 375 outpdimm->trtp_ps = trtp_ps; 376 #endif 377 outpdimm->twr_ps = twr_ps; 378 outpdimm->trc_ps = trc_ps; 379 outpdimm->refresh_rate_ps = refresh_rate_ps; 380 outpdimm->extended_op_srt = extended_op_srt; 381 #if defined(CONFIG_SYS_FSL_DDR1) || defined(CONFIG_SYS_FSL_DDR2) 382 outpdimm->tis_ps = tis_ps; 383 outpdimm->tih_ps = tih_ps; 384 outpdimm->tds_ps = tds_ps; 385 outpdimm->tdh_ps = tdh_ps; 386 outpdimm->tdqsq_max_ps = tdqsq_max_ps; 387 outpdimm->tqhs_ps = tqhs_ps; 388 #endif 389 390 /* Determine common burst length for all DIMMs. */ 391 temp1 = 0xff; 392 for (i = 0; i < number_of_dimms; i++) { 393 if (dimm_params[i].n_ranks) { 394 temp1 &= dimm_params[i].burst_lengths_bitmask; 395 } 396 } 397 outpdimm->all_dimms_burst_lengths_bitmask = temp1; 398 399 /* Determine if all DIMMs registered buffered. */ 400 temp1 = temp2 = 0; 401 for (i = 0; i < number_of_dimms; i++) { 402 if (dimm_params[i].n_ranks) { 403 if (dimm_params[i].registered_dimm) { 404 temp1 = 1; 405 #ifndef CONFIG_SPL_BUILD 406 printf("Detected RDIMM %s\n", 407 dimm_params[i].mpart); 408 #endif 409 } else { 410 temp2 = 1; 411 #ifndef CONFIG_SPL_BUILD 412 printf("Detected UDIMM %s\n", 413 dimm_params[i].mpart); 414 #endif 415 } 416 } 417 } 418 419 outpdimm->all_dimms_registered = 0; 420 outpdimm->all_dimms_unbuffered = 0; 421 if (temp1 && !temp2) { 422 outpdimm->all_dimms_registered = 1; 423 } else if (!temp1 && temp2) { 424 outpdimm->all_dimms_unbuffered = 1; 425 } else { 426 printf("ERROR: Mix of registered buffered and unbuffered " 427 "DIMMs detected!\n"); 428 } 429 430 temp1 = 0; 431 if (outpdimm->all_dimms_registered) 432 for (j = 0; j < 16; j++) { 433 outpdimm->rcw[j] = dimm_params[0].rcw[j]; 434 for (i = 1; i < number_of_dimms; i++) { 435 if (!dimm_params[i].n_ranks) 436 continue; 437 if (dimm_params[i].rcw[j] != dimm_params[0].rcw[j]) { 438 temp1 = 1; 439 break; 440 } 441 } 442 } 443 444 if (temp1 != 0) 445 printf("ERROR: Mix different RDIMM detected!\n"); 446 447 /* calculate cas latency for all DDR types */ 448 if (compute_cas_latency(ctrl_num, dimm_params, 449 outpdimm, number_of_dimms)) 450 return 1; 451 452 /* Determine if all DIMMs ECC capable. */ 453 temp1 = 1; 454 for (i = 0; i < number_of_dimms; i++) { 455 if (dimm_params[i].n_ranks && 456 !(dimm_params[i].edc_config & EDC_ECC)) { 457 temp1 = 0; 458 break; 459 } 460 } 461 if (temp1) { 462 debug("all DIMMs ECC capable\n"); 463 } else { 464 debug("Warning: not all DIMMs ECC capable, cant enable ECC\n"); 465 } 466 outpdimm->all_dimms_ecc_capable = temp1; 467 468 /* 469 * Compute additive latency. 470 * 471 * For DDR1, additive latency should be 0. 472 * 473 * For DDR2, with ODT enabled, use "a value" less than ACTTORW, 474 * which comes from Trcd, and also note that: 475 * add_lat + caslat must be >= 4 476 * 477 * For DDR3, we use the AL=0 478 * 479 * When to use additive latency for DDR2: 480 * 481 * I. Because you are using CL=3 and need to do ODT on writes and 482 * want functionality. 483 * 1. Are you going to use ODT? (Does your board not have 484 * additional termination circuitry for DQ, DQS, DQS_, 485 * DM, RDQS, RDQS_ for x4/x8 configs?) 486 * 2. If so, is your lowest supported CL going to be 3? 487 * 3. If so, then you must set AL=1 because 488 * 489 * WL >= 3 for ODT on writes 490 * RL = AL + CL 491 * WL = RL - 1 492 * -> 493 * WL = AL + CL - 1 494 * AL + CL - 1 >= 3 495 * AL + CL >= 4 496 * QED 497 * 498 * RL >= 3 for ODT on reads 499 * RL = AL + CL 500 * 501 * Since CL aren't usually less than 2, AL=0 is a minimum, 502 * so the WL-derived AL should be the -- FIXME? 503 * 504 * II. Because you are using auto-precharge globally and want to 505 * use additive latency (posted CAS) to get more bandwidth. 506 * 1. Are you going to use auto-precharge mode globally? 507 * 508 * Use addtivie latency and compute AL to be 1 cycle less than 509 * tRCD, i.e. the READ or WRITE command is in the cycle 510 * immediately following the ACTIVATE command.. 511 * 512 * III. Because you feel like it or want to do some sort of 513 * degraded-performance experiment. 514 * 1. Do you just want to use additive latency because you feel 515 * like it? 516 * 517 * Validation: AL is less than tRCD, and within the other 518 * read-to-precharge constraints. 519 */ 520 521 additive_latency = 0; 522 523 #if defined(CONFIG_SYS_FSL_DDR2) 524 if ((outpdimm->lowest_common_spd_caslat < 4) && 525 (picos_to_mclk(ctrl_num, trcd_ps) > 526 outpdimm->lowest_common_spd_caslat)) { 527 additive_latency = picos_to_mclk(ctrl_num, trcd_ps) - 528 outpdimm->lowest_common_spd_caslat; 529 if (mclk_to_picos(ctrl_num, additive_latency) > trcd_ps) { 530 additive_latency = picos_to_mclk(ctrl_num, trcd_ps); 531 debug("setting additive_latency to %u because it was " 532 " greater than tRCD_ps\n", additive_latency); 533 } 534 } 535 #endif 536 537 /* 538 * Validate additive latency 539 * 540 * AL <= tRCD(min) 541 */ 542 if (mclk_to_picos(ctrl_num, additive_latency) > trcd_ps) { 543 printf("Error: invalid additive latency exceeds tRCD(min).\n"); 544 return 1; 545 } 546 547 /* 548 * RL = CL + AL; RL >= 3 for ODT_RD_CFG to be enabled 549 * WL = RL - 1; WL >= 3 for ODT_WL_CFG to be enabled 550 * ADD_LAT (the register) must be set to a value less 551 * than ACTTORW if WL = 1, then AL must be set to 1 552 * RD_TO_PRE (the register) must be set to a minimum 553 * tRTP + AL if AL is nonzero 554 */ 555 556 /* 557 * Additive latency will be applied only if the memctl option to 558 * use it. 559 */ 560 outpdimm->additive_latency = additive_latency; 561 562 debug("tCKmin_ps = %u\n", outpdimm->tckmin_x_ps); 563 debug("trcd_ps = %u\n", outpdimm->trcd_ps); 564 debug("trp_ps = %u\n", outpdimm->trp_ps); 565 debug("tras_ps = %u\n", outpdimm->tras_ps); 566 #ifdef CONFIG_SYS_FSL_DDR4 567 debug("trfc1_ps = %u\n", trfc1_ps); 568 debug("trfc2_ps = %u\n", trfc2_ps); 569 debug("trfc4_ps = %u\n", trfc4_ps); 570 debug("trrds_ps = %u\n", trrds_ps); 571 debug("trrdl_ps = %u\n", trrdl_ps); 572 debug("tccdl_ps = %u\n", tccdl_ps); 573 #else 574 debug("twtr_ps = %u\n", outpdimm->twtr_ps); 575 debug("trfc_ps = %u\n", outpdimm->trfc_ps); 576 debug("trrd_ps = %u\n", outpdimm->trrd_ps); 577 #endif 578 debug("twr_ps = %u\n", outpdimm->twr_ps); 579 debug("trc_ps = %u\n", outpdimm->trc_ps); 580 581 return 0; 582 } 583