1 #include <assert.h> 2 3 #include <analyzer/analyzer_main.hpp> 4 #include <analyzer/ras-data/ras-data-parser.hpp> 5 #include <hei_main.hpp> 6 #include <hei_util.hpp> 7 #include <util/pdbg.hpp> 8 9 #include <algorithm> 10 #include <limits> 11 #include <string> 12 13 namespace analyzer 14 { 15 //------------------------------------------------------------------------------ 16 17 bool __findRcsOscError(const std::vector<libhei::Signature>& i_list, 18 libhei::Signature& o_rootCause) 19 { 20 // TODO: Consider returning all of them instead of one as root cause. 21 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) { 22 return (libhei::hash<libhei::NodeId_t>("TP_LOCAL_FIR") == t.getId() && 23 (42 == t.getBit() || 43 == t.getBit())); 24 }); 25 26 if (i_list.end() != itr) 27 { 28 o_rootCause = *itr; 29 return true; 30 } 31 32 return false; 33 } 34 35 //------------------------------------------------------------------------------ 36 37 bool __findPllUnlock(const std::vector<libhei::Signature>& i_list, 38 libhei::Signature& o_rootCause) 39 { 40 // TODO: Consider returning all of them instead of one as root cause. 41 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) { 42 return (libhei::hash<libhei::NodeId_t>("PLL_UNLOCK") == t.getId() && 43 (0 == t.getBit() || 1 == t.getBit())); 44 }); 45 46 if (i_list.end() != itr) 47 { 48 o_rootCause = *itr; 49 return true; 50 } 51 52 return false; 53 } 54 55 //------------------------------------------------------------------------------ 56 57 bool __findIueTh(const std::vector<libhei::Signature>& i_list, 58 libhei::Signature& o_rootCause) 59 { 60 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) { 61 return (libhei::hash<libhei::NodeId_t>("RDFFIR") == t.getId() && 62 (17 == t.getBit() || 37 == t.getBit())); 63 }); 64 65 if (i_list.end() != itr) 66 { 67 o_rootCause = *itr; 68 return true; 69 } 70 71 return false; 72 } 73 74 //------------------------------------------------------------------------------ 75 76 bool __findMemoryChannelFailure(const std::vector<libhei::Signature>& i_list, 77 libhei::Signature& o_rootCause, 78 const RasDataParser& i_rasData) 79 { 80 using namespace util::pdbg; 81 82 using func = libhei::NodeId_t (*)(const std::string& i_str); 83 func __hash = libhei::hash<libhei::NodeId_t>; 84 85 static const auto mc_dstl_fir = __hash("MC_DSTL_FIR"); 86 static const auto mc_ustl_fir = __hash("MC_USTL_FIR"); 87 static const auto mc_omi_dl_err_rpt = __hash("MC_OMI_DL_ERR_RPT"); 88 static const auto srqfir = __hash("SRQFIR"); 89 90 for (const auto s : i_list) 91 { 92 // Version 1 of the RAS data files 93 if (1 == i_rasData.getVersion(s)) 94 { 95 const auto targetType = getTrgtType(getTrgt(s.getChip())); 96 const auto id = s.getId(); 97 const auto bit = s.getBit(); 98 const auto attnType = s.getAttnType(); 99 100 // Look for any unit checkstop attentions from OCMBs. 101 if (TYPE_OCMB == targetType) 102 { 103 // Any unit checkstop attentions will trigger a channel failure. 104 if (libhei::ATTN_TYPE_UNIT_CS == attnType) 105 { 106 // If the channel was specifically a firmware initiated 107 // channel fail (SRQFIR[25]) check for any IUE bits that are 108 // on that would have caused that (RDFFIR[17,37]). 109 if ((srqfir == id && 25 == bit) && 110 __findIueTh(i_list, o_rootCause)) 111 { 112 return true; 113 } 114 115 o_rootCause = s; 116 return true; 117 } 118 } 119 // Look for channel failure attentions on processors. 120 else if (TYPE_PROC == targetType) 121 { 122 // TODO: All of these channel failure bits are configurable. 123 // Eventually, we will need some mechanism to check that 124 // config registers for a more accurate analysis. For now, 125 // simply check for all bits that could potentially be 126 // configured to channel failure. 127 128 // Any unit checkstop bit in the MC_DSTL_FIR or MC_USTL_FIR 129 // could be a channel failure. 130 if (libhei::ATTN_TYPE_UNIT_CS == attnType) 131 { 132 // Ignore bits MC_DSTL_FIR[0:7] because they simply indicate 133 // attentions occurred on the attached OCMBs. 134 if ((mc_dstl_fir == id && 8 <= bit) || (mc_ustl_fir == id)) 135 { 136 o_rootCause = s; 137 return true; 138 } 139 } 140 141 // All bits in MC_OMI_DL_ERR_RPT eventually feed into 142 // MC_OMI_DL_FIR[0,20] which are configurable to channel 143 // failure. 144 if (mc_omi_dl_err_rpt == id) 145 { 146 o_rootCause = s; 147 return true; 148 } 149 } 150 } 151 // Version 2 and above of the RAS data files 152 else if (2 <= i_rasData.getVersion(s)) 153 { 154 if (libhei::ATTN_TYPE_UNIT_CS == s.getAttnType() && 155 i_rasData.isFlagSet(s, RasDataParser::RasDataFlags::SUE_SOURCE)) 156 { 157 // Special Cases: 158 // If the channel fail was specifically a firmware initiated 159 // channel fail (SRQFIR[25]) check for any IUE bits that are on 160 // that would have caused that (RDFFIR[17,37]). 161 if ((srqfir == s.getId() && 25 == s.getBit()) && 162 __findIueTh(i_list, o_rootCause)) 163 { 164 return true; 165 } 166 167 // TODO: The proc side channel failure bits are configurable. 168 // Eventually, we will need some mechanism to check the 169 // config registers for a more accurate analysis. For now, 170 // simply check for all bits that could potentially be 171 // configured to channel failure. 172 173 o_rootCause = s; 174 } 175 // The bits in the MC_OMI_DL_ERR_RPT register are a special case. 176 // They are possible channel fail bits but the MC_OMI_DL_FIR they 177 // feed into can't be set up to report UNIT_CS attentions, so they 178 // report as recoverable instead. 179 else if (mc_omi_dl_err_rpt == s.getId()) 180 { 181 o_rootCause = s; 182 return true; 183 } 184 } 185 } 186 187 return false; // default, nothing found 188 } 189 190 //------------------------------------------------------------------------------ 191 192 // Will query if a signature is a potential system checkstop root cause. 193 // attention. Note that this function excludes memory channel failure attentions 194 // which are checked in __findMemoryChannelFailure(). 195 bool __findCsRootCause(const libhei::Signature& i_signature, 196 const RasDataParser& i_rasData) 197 { 198 // Version 1 of the RAS data files. 199 if (1 == i_rasData.getVersion(i_signature)) 200 { 201 using namespace util::pdbg; 202 203 using func = libhei::NodeId_t (*)(const std::string& i_str); 204 func __hash = libhei::hash<libhei::NodeId_t>; 205 206 // PROC registers 207 static const auto eq_core_fir = __hash("EQ_CORE_FIR"); 208 static const auto eq_l2_fir = __hash("EQ_L2_FIR"); 209 static const auto eq_l3_fir = __hash("EQ_L3_FIR"); 210 static const auto eq_ncu_fir = __hash("EQ_NCU_FIR"); 211 static const auto iohs_dlp_fir_oc = __hash("IOHS_DLP_FIR_OC"); 212 static const auto iohs_dlp_fir_smp = __hash("IOHS_DLP_FIR_SMP"); 213 static const auto nx_cq_fir = __hash("NX_CQ_FIR"); 214 static const auto nx_dma_eng_fir = __hash("NX_DMA_ENG_FIR"); 215 static const auto pau_fir_0 = __hash("PAU_FIR_0"); 216 static const auto pau_fir_1 = __hash("PAU_FIR_1"); 217 static const auto pau_fir_2 = __hash("PAU_FIR_2"); 218 static const auto pau_ptl_fir = __hash("PAU_PTL_FIR"); 219 220 // OCMB registers 221 static const auto rdffir = __hash("RDFFIR"); 222 223 const auto targetType = getTrgtType(getTrgt(i_signature.getChip())); 224 const auto id = i_signature.getId(); 225 const auto bit = i_signature.getBit(); 226 227 if (TYPE_PROC == targetType) 228 { 229 if (eq_core_fir == id && 230 (0 == bit || 2 == bit || 3 == bit || 4 == bit || 5 == bit || 231 7 == bit || 8 == bit || 9 == bit || 11 == bit || 12 == bit || 232 13 == bit || 18 == bit || 21 == bit || 22 == bit || 233 24 == bit || 25 == bit || 29 == bit || 31 == bit || 234 32 == bit || 36 == bit || 37 == bit || 38 == bit || 235 43 == bit || 46 == bit || 47 == bit)) 236 { 237 return true; 238 } 239 240 if (eq_l2_fir == id && 241 (1 == bit || 12 == bit || 13 == bit || 17 == bit || 18 == bit || 242 20 == bit || 27 == bit)) 243 { 244 return true; 245 } 246 247 if (eq_l3_fir == id && 248 (2 == bit || 5 == bit || 8 == bit || 11 == bit || 17 == bit)) 249 { 250 return true; 251 } 252 253 if (eq_ncu_fir == id && 254 (3 == bit || 4 == bit || 5 == bit || 7 == bit || 8 == bit || 255 10 == bit || 17 == bit)) 256 { 257 return true; 258 } 259 260 if (iohs_dlp_fir_oc == id && (54 <= bit && bit <= 61)) 261 { 262 return true; 263 } 264 265 if (iohs_dlp_fir_smp == id && (54 <= bit && bit <= 61)) 266 { 267 return true; 268 } 269 270 if (nx_cq_fir == id && (7 == bit || 16 == bit || 21 == bit)) 271 { 272 return true; 273 } 274 275 if (nx_dma_eng_fir == id && (0 == bit)) 276 { 277 return true; 278 } 279 280 if (pau_fir_0 == id && 281 (15 == bit || 18 == bit || 19 == bit || 25 == bit || 282 26 == bit || 29 == bit || 33 == bit || 34 == bit || 283 35 == bit || 40 == bit || 42 == bit || 44 == bit || 45 == bit)) 284 { 285 return true; 286 } 287 288 if (pau_fir_1 == id && 289 (13 == bit || 14 == bit || 15 == bit || 37 == bit || 290 39 == bit || 40 == bit || 41 == bit || 42 == bit)) 291 { 292 return true; 293 } 294 295 if (pau_fir_2 == id && 296 ((4 <= bit && bit <= 18) || (20 <= bit && bit <= 31) || 297 (36 <= bit && bit <= 41) || 45 == bit || 47 == bit || 298 48 == bit || 50 == bit || 51 == bit || 52 == bit)) 299 { 300 return true; 301 } 302 303 if (pau_ptl_fir == id && (4 == bit || 8 == bit)) 304 { 305 return true; 306 } 307 } 308 else if (TYPE_OCMB == targetType) 309 { 310 if (rdffir == id && 311 (14 == bit || 15 == bit || 17 == bit || 37 == bit)) 312 { 313 return true; 314 } 315 } 316 } 317 // Version 2 of the RAS data files. Check if the input signature has the 318 // CS_POSSIBLE or SUE_SOURCE flag set. 319 else if (i_rasData.isFlagSet(i_signature, 320 RasDataParser::RasDataFlags::CS_POSSIBLE) || 321 i_rasData.isFlagSet(i_signature, 322 RasDataParser::RasDataFlags::SUE_SOURCE)) 323 { 324 return true; 325 } 326 327 return false; // default, nothing found 328 } 329 330 //------------------------------------------------------------------------------ 331 332 bool __findCsRootCause_RE(const std::vector<libhei::Signature>& i_list, 333 libhei::Signature& o_rootCause, 334 const RasDataParser& i_rasData) 335 { 336 for (const auto s : i_list) 337 { 338 // Only looking for recoverable attentions. 339 if (libhei::ATTN_TYPE_RECOVERABLE != s.getAttnType()) 340 { 341 continue; 342 } 343 344 if (__findCsRootCause(s, i_rasData)) 345 { 346 o_rootCause = s; 347 return true; 348 } 349 } 350 351 return false; // default, nothing found 352 } 353 354 //------------------------------------------------------------------------------ 355 356 bool __findCsRootCause_UCS(const std::vector<libhei::Signature>& i_list, 357 libhei::Signature& o_rootCause, 358 const RasDataParser& i_rasData) 359 { 360 for (const auto s : i_list) 361 { 362 // Only looking for unit checkstop attentions. 363 if (libhei::ATTN_TYPE_UNIT_CS != s.getAttnType()) 364 { 365 continue; 366 } 367 368 if (__findCsRootCause(s, i_rasData)) 369 { 370 o_rootCause = s; 371 return true; 372 } 373 } 374 375 return false; // default, nothing found 376 } 377 378 //------------------------------------------------------------------------------ 379 380 bool __findNonExternalCs(const std::vector<libhei::Signature>& i_list, 381 libhei::Signature& o_rootCause) 382 { 383 using namespace util::pdbg; 384 385 static const auto pb_ext_fir = libhei::hash<libhei::NodeId_t>("PB_EXT_FIR"); 386 387 for (const auto s : i_list) 388 { 389 const auto targetType = getTrgtType(getTrgt(s.getChip())); 390 const auto id = s.getId(); 391 const auto attnType = s.getAttnType(); 392 393 // Find any processor with system checkstop attention that did not 394 // originate from the PB_EXT_FIR. 395 if ((TYPE_PROC == targetType) && 396 (libhei::ATTN_TYPE_CHECKSTOP == attnType) && (pb_ext_fir != id)) 397 { 398 o_rootCause = s; 399 return true; 400 } 401 } 402 403 return false; // default, nothing found 404 } 405 406 //------------------------------------------------------------------------------ 407 408 bool __findTiRootCause(const std::vector<libhei::Signature>& i_list, 409 libhei::Signature& o_rootCause) 410 { 411 using namespace util::pdbg; 412 413 using func = libhei::NodeId_t (*)(const std::string& i_str); 414 func __hash = libhei::hash<libhei::NodeId_t>; 415 416 // PROC registers 417 static const auto tp_local_fir = __hash("TP_LOCAL_FIR"); 418 static const auto occ_fir = __hash("OCC_FIR"); 419 static const auto pbao_fir = __hash("PBAO_FIR"); 420 static const auto n0_local_fir = __hash("N0_LOCAL_FIR"); 421 static const auto int_cq_fir = __hash("INT_CQ_FIR"); 422 static const auto nx_cq_fir = __hash("NX_CQ_FIR"); 423 static const auto nx_dma_eng_fir = __hash("NX_DMA_ENG_FIR"); 424 static const auto vas_fir = __hash("VAS_FIR"); 425 static const auto n1_local_fir = __hash("N1_LOCAL_FIR"); 426 static const auto mcd_fir = __hash("MCD_FIR"); 427 static const auto pb_station_fir_en_1 = __hash("PB_STATION_FIR_EN_1"); 428 static const auto pb_station_fir_en_2 = __hash("PB_STATION_FIR_EN_2"); 429 static const auto pb_station_fir_en_3 = __hash("PB_STATION_FIR_EN_3"); 430 static const auto pb_station_fir_en_4 = __hash("PB_STATION_FIR_EN_4"); 431 static const auto pb_station_fir_es_1 = __hash("PB_STATION_FIR_ES_1"); 432 static const auto pb_station_fir_es_2 = __hash("PB_STATION_FIR_ES_2"); 433 static const auto pb_station_fir_es_3 = __hash("PB_STATION_FIR_ES_3"); 434 static const auto pb_station_fir_es_4 = __hash("PB_STATION_FIR_ES_4"); 435 static const auto pb_station_fir_eq = __hash("PB_STATION_FIR_EQ"); 436 static const auto psihb_fir = __hash("PSIHB_FIR"); 437 static const auto pbaf_fir = __hash("PBAF_FIR"); 438 static const auto lpc_fir = __hash("LPC_FIR"); 439 static const auto eq_core_fir = __hash("EQ_CORE_FIR"); 440 static const auto eq_l2_fir = __hash("EQ_L2_FIR"); 441 static const auto eq_l3_fir = __hash("EQ_L3_FIR"); 442 static const auto eq_ncu_fir = __hash("EQ_NCU_FIR"); 443 static const auto eq_local_fir = __hash("EQ_LOCAL_FIR"); 444 static const auto eq_qme_fir = __hash("EQ_QME_FIR"); 445 static const auto iohs_local_fir = __hash("IOHS_LOCAL_FIR"); 446 static const auto iohs_dlp_fir_oc = __hash("IOHS_DLP_FIR_OC"); 447 static const auto iohs_dlp_fir_smp = __hash("IOHS_DLP_FIR_SMP"); 448 static const auto mc_local_fir = __hash("MC_LOCAL_FIR"); 449 static const auto mc_fir = __hash("MC_FIR"); 450 static const auto mc_dstl_fir = __hash("MC_DSTL_FIR"); 451 static const auto mc_ustl_fir = __hash("MC_USTL_FIR"); 452 static const auto nmmu_cq_fir = __hash("NMMU_CQ_FIR"); 453 static const auto nmmu_fir = __hash("NMMU_FIR"); 454 static const auto mc_omi_dl = __hash("MC_OMI_DL"); 455 static const auto pau_local_fir = __hash("PAU_LOCAL_FIR"); 456 static const auto pau_ptl_fir = __hash("PAU_PTL_FIR"); 457 static const auto pau_phy_fir = __hash("PAU_PHY_FIR"); 458 static const auto pau_fir_0 = __hash("PAU_FIR_0"); 459 static const auto pau_fir_2 = __hash("PAU_FIR_2"); 460 static const auto pci_local_fir = __hash("PCI_LOCAL_FIR"); 461 static const auto pci_iop_fir = __hash("PCI_IOP_FIR"); 462 static const auto pci_nest_fir = __hash("PCI_NEST_FIR"); 463 464 // OCMB registers 465 static const auto ocmb_lfir = __hash("OCMB_LFIR"); 466 static const auto mmiofir = __hash("MMIOFIR"); 467 static const auto srqfir = __hash("SRQFIR"); 468 static const auto rdffir = __hash("RDFFIR"); 469 static const auto tlxfir = __hash("TLXFIR"); 470 static const auto omi_dl = __hash("OMI_DL"); 471 472 for (const auto& signature : i_list) 473 { 474 const auto targetType = getTrgtType(getTrgt(signature.getChip())); 475 const auto attnType = signature.getAttnType(); 476 const auto id = signature.getId(); 477 const auto bit = signature.getBit(); 478 479 // Only looking for recoverable or unit checkstop attentions. 480 if (libhei::ATTN_TYPE_RECOVERABLE != attnType && 481 libhei::ATTN_TYPE_UNIT_CS != attnType) 482 { 483 continue; 484 } 485 486 // Ignore attentions that should not be blamed as root cause of a TI. 487 // This would include informational only FIRs or correctable errors. 488 if (TYPE_PROC == targetType) 489 { 490 if (tp_local_fir == id && 491 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit || 492 5 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit || 493 11 == bit || 20 == bit || 22 == bit || 23 == bit || 494 24 == bit || 38 == bit || 40 == bit || 41 == bit || 495 46 == bit || 47 == bit || 48 == bit || 55 == bit || 496 56 == bit || 57 == bit || 58 == bit || 59 == bit)) 497 { 498 continue; 499 } 500 501 if (occ_fir == id && 502 (9 == bit || 10 == bit || 15 == bit || 20 == bit || 21 == bit || 503 22 == bit || 23 == bit || 32 == bit || 33 == bit || 504 34 == bit || 36 == bit || 42 == bit || 43 == bit || 505 46 == bit || 47 == bit || 48 == bit || 51 == bit || 506 52 == bit || 53 == bit || 54 == bit || 57 == bit)) 507 { 508 continue; 509 } 510 511 if (pbao_fir == id && 512 (0 == bit || 1 == bit || 2 == bit || 8 == bit || 11 == bit || 513 13 == bit || 15 == bit || 16 == bit || 17 == bit)) 514 { 515 continue; 516 } 517 518 if ((n0_local_fir == id || n1_local_fir == id || 519 iohs_local_fir == id || mc_local_fir == id || 520 pau_local_fir == id || pci_local_fir == id) && 521 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit || 522 5 == bit || 6 == bit || 7 == bit || 8 == bit || 9 == bit || 523 10 == bit || 11 == bit || 20 == bit || 21 == bit)) 524 { 525 continue; 526 } 527 528 if (int_cq_fir == id && 529 (0 == bit || 3 == bit || 5 == bit || 7 == bit || 36 == bit || 530 58 == bit || 59 == bit || 60 == bit)) 531 { 532 continue; 533 } 534 535 if (nx_cq_fir == id && 536 (1 == bit || 4 == bit || 18 == bit || 32 == bit || 33 == bit)) 537 { 538 continue; 539 } 540 541 if (nx_dma_eng_fir == id && 542 (4 == bit || 6 == bit || 9 == bit || 10 == bit || 11 == bit || 543 34 == bit || 35 == bit || 36 == bit || 37 == bit || 39 == bit)) 544 { 545 continue; 546 } 547 548 if (vas_fir == id && 549 (8 == bit || 9 == bit || 11 == bit || 12 == bit || 13 == bit)) 550 { 551 continue; 552 } 553 554 if (mcd_fir == id && (0 == bit)) 555 { 556 continue; 557 } 558 559 if ((pb_station_fir_en_1 == id || pb_station_fir_en_2 == id || 560 pb_station_fir_en_3 == id || pb_station_fir_en_4 == id || 561 pb_station_fir_es_1 == id || pb_station_fir_es_2 == id || 562 pb_station_fir_es_3 == id || pb_station_fir_es_4 == id || 563 pb_station_fir_eq == id) && 564 (9 == bit)) 565 { 566 continue; 567 } 568 569 if (psihb_fir == id && (0 == bit || 23 == bit)) 570 { 571 continue; 572 } 573 574 if (pbaf_fir == id && 575 (0 == bit || 1 == bit || 3 == bit || 4 == bit || 5 == bit || 576 6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit || 577 11 == bit || 19 == bit || 20 == bit || 21 == bit || 578 28 == bit || 29 == bit || 30 == bit || 31 == bit || 579 32 == bit || 33 == bit || 34 == bit || 35 == bit || 36 == bit)) 580 { 581 continue; 582 } 583 584 if (lpc_fir == id && (5 == bit)) 585 { 586 continue; 587 } 588 589 if (eq_core_fir == id && 590 (0 == bit || 2 == bit || 4 == bit || 7 == bit || 9 == bit || 591 11 == bit || 13 == bit || 18 == bit || 21 == bit || 592 24 == bit || 29 == bit || 31 == bit || 37 == bit || 593 43 == bit || 56 == bit || 57 == bit)) 594 { 595 continue; 596 } 597 598 if (eq_l2_fir == id && 599 (0 == bit || 6 == bit || 11 == bit || 19 == bit || 36 == bit)) 600 { 601 continue; 602 } 603 604 if (eq_l3_fir == id && 605 (3 == bit || 4 == bit || 7 == bit || 10 == bit || 13 == bit)) 606 { 607 continue; 608 } 609 610 if (eq_ncu_fir == id && (9 == bit)) 611 { 612 continue; 613 } 614 615 if (eq_local_fir == id && 616 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 5 == bit || 617 6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit || 618 11 == bit || 12 == bit || 13 == bit || 14 == bit || 619 15 == bit || 16 == bit || 20 == bit || 21 == bit || 620 22 == bit || 23 == bit || 24 == bit || 25 == bit || 621 26 == bit || 27 == bit || 28 == bit || 29 == bit || 622 30 == bit || 31 == bit || 32 == bit || 33 == bit || 623 34 == bit || 35 == bit || 36 == bit || 37 == bit || 624 38 == bit || 39 == bit)) 625 { 626 continue; 627 } 628 629 if (eq_qme_fir == id && (7 == bit || 25 == bit)) 630 { 631 continue; 632 } 633 634 if (iohs_dlp_fir_oc == id && 635 (6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit || 636 48 == bit || 49 == bit || 52 == bit || 53 == bit)) 637 { 638 continue; 639 } 640 641 if (iohs_dlp_fir_smp == id && 642 (6 == bit || 7 == bit || 14 == bit || 15 == bit || 16 == bit || 643 17 == bit || 38 == bit || 39 == bit || 44 == bit || 644 45 == bit || 50 == bit || 51 == bit)) 645 { 646 continue; 647 } 648 649 if (mc_fir == id && 650 (5 == bit || 8 == bit || 15 == bit || 16 == bit)) 651 { 652 continue; 653 } 654 655 if (mc_dstl_fir == id && 656 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit || 657 5 == bit || 6 == bit || 7 == bit || 14 == bit || 15 == bit)) 658 { 659 continue; 660 } 661 662 if (mc_ustl_fir == id && 663 (6 == bit || 20 == bit || 33 == bit || 34 == bit)) 664 { 665 continue; 666 } 667 668 if (nmmu_cq_fir == id && (8 == bit || 11 == bit || 14 == bit)) 669 { 670 continue; 671 } 672 673 if (nmmu_fir == id && 674 (0 == bit || 3 == bit || 8 == bit || 9 == bit || 10 == bit || 675 11 == bit || 12 == bit || 13 == bit || 14 == bit || 676 15 == bit || 30 == bit || 31 == bit || 41 == bit)) 677 { 678 continue; 679 } 680 681 if (mc_omi_dl == id && (2 == bit || 3 == bit || 6 == bit || 682 7 == bit || 9 == bit || 10 == bit)) 683 { 684 continue; 685 } 686 687 if (pau_ptl_fir == id && (5 == bit || 9 == bit)) 688 { 689 continue; 690 } 691 692 if (pau_phy_fir == id && 693 (2 == bit || 3 == bit || 6 == bit || 7 == bit || 15 == bit)) 694 { 695 continue; 696 } 697 698 if (pau_fir_0 == id && (13 == bit || 30 == bit || 41 == bit)) 699 { 700 continue; 701 } 702 703 if (pau_fir_2 == id && (19 == bit || 46 == bit || 49 == bit)) 704 { 705 continue; 706 } 707 708 if (pci_iop_fir == id && 709 (0 == bit || 2 == bit || 4 == bit || 6 == bit || 7 == bit || 710 8 == bit || 10 == bit)) 711 { 712 continue; 713 } 714 715 if (pci_nest_fir == id && (2 == bit || 5 == bit)) 716 { 717 continue; 718 } 719 } 720 else if (TYPE_OCMB == targetType) 721 { 722 if (ocmb_lfir == id && 723 (0 == bit || 1 == bit || 2 == bit || 8 == bit || 23 == bit || 724 37 == bit || 63 == bit)) 725 { 726 continue; 727 } 728 729 if (mmiofir == id && (2 == bit)) 730 { 731 continue; 732 } 733 734 if (srqfir == id && 735 (2 == bit || 4 == bit || 14 == bit || 15 == bit || 23 == bit || 736 25 == bit || 28 == bit)) 737 { 738 continue; 739 } 740 741 if (rdffir == id && 742 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit || 743 5 == bit || 6 == bit || 7 == bit || 8 == bit || 9 == bit || 744 18 == bit || 38 == bit || 40 == bit || 41 == bit || 745 45 == bit || 46 == bit)) 746 { 747 continue; 748 } 749 750 if (tlxfir == id && (0 == bit || 9 == bit || 26 == bit)) 751 { 752 continue; 753 } 754 755 if (omi_dl == id && (2 == bit || 3 == bit || 6 == bit || 7 == bit || 756 9 == bit || 10 == bit)) 757 { 758 continue; 759 } 760 } 761 762 // At this point, the attention has not been explicitly ignored. So 763 // return this signature and exit. 764 o_rootCause = signature; 765 return true; 766 } 767 768 return false; // default, nothing found 769 } 770 771 //------------------------------------------------------------------------------ 772 773 bool filterRootCause(AnalysisType i_type, 774 const libhei::IsolationData& i_isoData, 775 libhei::Signature& o_rootCause, 776 const RasDataParser& i_rasData) 777 { 778 // We'll need to make a copy of the list so that the original list is 779 // maintained for the PEL. 780 std::vector<libhei::Signature> list{i_isoData.getSignatureList()}; 781 782 // START WORKAROUND 783 // TODO: Filtering should be data driven. Until that support is available, 784 // use the following isolation rules. 785 786 // Ensure the list is not empty before continuing. 787 if (list.empty()) 788 { 789 return false; // nothing more to do 790 } 791 792 // First, look for any RCS OSC errors. This must always be first because 793 // they can cause downstream PLL unlock attentions. 794 if (__findRcsOscError(list, o_rootCause)) 795 { 796 return true; 797 } 798 799 // Second, look for any PLL unlock attentions. This must always be second 800 // because PLL unlock attentions can cause any number of downstream 801 // attentions, including a system checkstop. 802 if (__findPllUnlock(list, o_rootCause)) 803 { 804 return true; 805 } 806 807 // Regardless of the analysis type, always look for anything that could be 808 // blamed as the root cause of a system checkstop. 809 810 // Memory channel failure attentions will produce SUEs and likely cause 811 // downstream attentions, including a system checkstop. 812 if (__findMemoryChannelFailure(list, o_rootCause, i_rasData)) 813 { 814 return true; 815 } 816 817 // Look for any recoverable attentions that have been identified as a 818 // potential root cause of a system checkstop attention. These would include 819 // any attention that would generate an SUE. Note that is it possible for 820 // recoverables to generate unit checkstop attentions so we must check them 821 // first. 822 if (__findCsRootCause_RE(list, o_rootCause, i_rasData)) 823 { 824 return true; 825 } 826 827 // Look for any unit checkstop attentions (other than memory channel 828 // failures) that have been identified as a potential root cause of a 829 // system checkstop attention. These would include any attention that would 830 // generate an SUE. 831 if (__findCsRootCause_UCS(list, o_rootCause, i_rasData)) 832 { 833 return true; 834 } 835 836 // Look for any system checkstop attentions that originated from within the 837 // chip that reported the attention. In other words, no external checkstop 838 // attentions. 839 if (__findNonExternalCs(list, o_rootCause)) 840 { 841 return true; 842 } 843 844 if (AnalysisType::SYSTEM_CHECKSTOP != i_type) 845 { 846 // No system checkstop root cause attentions were found. Next, look for 847 // any recoverable or unit checkstop attentions that could be associated 848 // with a TI. 849 if (__findTiRootCause(list, o_rootCause)) 850 { 851 return true; 852 } 853 854 if (AnalysisType::TERMINATE_IMMEDIATE != i_type) 855 { 856 // No attentions associated with a system checkstop or TI were 857 // found. Simply, return the first entry in the list. 858 o_rootCause = list.front(); 859 return true; 860 } 861 } 862 863 // END WORKAROUND 864 865 return false; // default, no active attentions found. 866 } 867 868 //------------------------------------------------------------------------------ 869 870 } // namespace analyzer 871