1 #include <assert.h> 2 3 #include <analyzer/analyzer_main.hpp> 4 #include <analyzer/ras-data/ras-data-parser.hpp> 5 #include <hei_main.hpp> 6 #include <hei_util.hpp> 7 #include <util/pdbg.hpp> 8 9 #include <algorithm> 10 #include <limits> 11 #include <string> 12 13 namespace analyzer 14 { 15 //------------------------------------------------------------------------------ 16 17 bool __findRcsOscError(const std::vector<libhei::Signature>& i_list, 18 libhei::Signature& o_rootCause) 19 { 20 // TODO: Consider returning all of them instead of one as root cause. 21 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) { 22 return (libhei::hash<libhei::NodeId_t>("TP_LOCAL_FIR") == t.getId() && 23 (42 == t.getBit() || 43 == t.getBit())); 24 }); 25 26 if (i_list.end() != itr) 27 { 28 o_rootCause = *itr; 29 return true; 30 } 31 32 return false; 33 } 34 35 //------------------------------------------------------------------------------ 36 37 bool __findPllUnlock(const std::vector<libhei::Signature>& i_list, 38 libhei::Signature& o_rootCause) 39 { 40 // TODO: Consider returning all of them instead of one as root cause. 41 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) { 42 return (libhei::hash<libhei::NodeId_t>("PLL_UNLOCK") == t.getId() && 43 (0 == t.getBit() || 1 == t.getBit())); 44 }); 45 46 if (i_list.end() != itr) 47 { 48 o_rootCause = *itr; 49 return true; 50 } 51 52 return false; 53 } 54 55 //------------------------------------------------------------------------------ 56 57 bool __findIueTh(const std::vector<libhei::Signature>& i_list, 58 libhei::Signature& o_rootCause) 59 { 60 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) { 61 return (libhei::hash<libhei::NodeId_t>("RDFFIR") == t.getId() && 62 (17 == t.getBit() || 37 == t.getBit())); 63 }); 64 65 if (i_list.end() != itr) 66 { 67 o_rootCause = *itr; 68 return true; 69 } 70 71 return false; 72 } 73 74 //------------------------------------------------------------------------------ 75 76 bool __findMemoryChannelFailure(const std::vector<libhei::Signature>& i_list, 77 libhei::Signature& o_rootCause, 78 const RasDataParser& i_rasData) 79 { 80 using namespace util::pdbg; 81 82 using func = libhei::NodeId_t (*)(const std::string& i_str); 83 func __hash = libhei::hash<libhei::NodeId_t>; 84 85 static const auto mc_dstl_fir = __hash("MC_DSTL_FIR"); 86 static const auto mc_ustl_fir = __hash("MC_USTL_FIR"); 87 static const auto mc_omi_dl_err_rpt = __hash("MC_OMI_DL_ERR_RPT"); 88 static const auto srqfir = __hash("SRQFIR"); 89 90 for (const auto s : i_list) 91 { 92 // Version 1 of the RAS data files 93 if (1 == i_rasData.getVersion(s)) 94 { 95 const auto targetType = getTrgtType(getTrgt(s.getChip())); 96 const auto id = s.getId(); 97 const auto bit = s.getBit(); 98 const auto attnType = s.getAttnType(); 99 100 // Look for any unit checkstop attentions from OCMBs. 101 if (TYPE_OCMB == targetType) 102 { 103 // Any unit checkstop attentions will trigger a channel failure. 104 if (libhei::ATTN_TYPE_UNIT_CS == attnType) 105 { 106 // If the channel was specifically a firmware initiated 107 // channel fail (SRQFIR[25]) check for any IUE bits that are 108 // on that would have caused that (RDFFIR[17,37]). 109 if ((srqfir == id && 25 == bit) && 110 __findIueTh(i_list, o_rootCause)) 111 { 112 return true; 113 } 114 115 o_rootCause = s; 116 return true; 117 } 118 } 119 // Look for channel failure attentions on processors. 120 else if (TYPE_PROC == targetType) 121 { 122 // TODO: All of these channel failure bits are configurable. 123 // Eventually, we will need some mechanism to check that 124 // config registers for a more accurate analysis. For now, 125 // simply check for all bits that could potentially be 126 // configured to channel failure. 127 128 // Any unit checkstop bit in the MC_DSTL_FIR or MC_USTL_FIR 129 // could be a channel failure. 130 if (libhei::ATTN_TYPE_UNIT_CS == attnType) 131 { 132 // Ignore bits MC_DSTL_FIR[0:7] because they simply indicate 133 // attentions occurred on the attached OCMBs. 134 if ((mc_dstl_fir == id && 8 <= bit) || (mc_ustl_fir == id)) 135 { 136 o_rootCause = s; 137 return true; 138 } 139 } 140 141 // All bits in MC_OMI_DL_ERR_RPT eventually feed into 142 // MC_OMI_DL_FIR[0,20] which are configurable to channel 143 // failure. 144 if (mc_omi_dl_err_rpt == id) 145 { 146 o_rootCause = s; 147 return true; 148 } 149 } 150 } 151 // Version 2 and above of the RAS data files 152 else if (2 <= i_rasData.getVersion(s)) 153 { 154 if (libhei::ATTN_TYPE_UNIT_CS == s.getAttnType() && 155 i_rasData.isFlagSet(s, RasDataParser::RasDataFlags::SUE_SOURCE)) 156 { 157 // Special Cases: 158 // If the channel fail was specifically a firmware initiated 159 // channel fail (SRQFIR[25]) check for any IUE bits that are on 160 // that would have caused that (RDFFIR[17,37]). 161 if ((srqfir == s.getId() && 25 == s.getBit()) && 162 __findIueTh(i_list, o_rootCause)) 163 { 164 return true; 165 } 166 167 // TODO: The proc side channel failure bits are configurable. 168 // Eventually, we will need some mechanism to check the 169 // config registers for a more accurate analysis. For now, 170 // simply check for all bits that could potentially be 171 // configured to channel failure. 172 173 o_rootCause = s; 174 } 175 // The bits in the MC_OMI_DL_ERR_RPT register are a special case. 176 // They are possible channel fail bits but the MC_OMI_DL_FIR they 177 // feed into can't be set up to report UNIT_CS attentions, so they 178 // report as recoverable instead. 179 else if (mc_omi_dl_err_rpt == s.getId()) 180 { 181 o_rootCause = s; 182 return true; 183 } 184 } 185 } 186 187 return false; // default, nothing found 188 } 189 190 //------------------------------------------------------------------------------ 191 192 // Will query if a signature is a potential system checkstop root cause. 193 // attention. Note that this function excludes memory channel failure attentions 194 // which are checked in __findMemoryChannelFailure(). 195 bool __findCsRootCause(const libhei::Signature& i_signature, 196 const RasDataParser& i_rasData) 197 { 198 // Version 1 of the RAS data files. 199 if (1 == i_rasData.getVersion(i_signature)) 200 { 201 using namespace util::pdbg; 202 203 using func = libhei::NodeId_t (*)(const std::string& i_str); 204 func __hash = libhei::hash<libhei::NodeId_t>; 205 206 // PROC registers 207 static const auto eq_core_fir = __hash("EQ_CORE_FIR"); 208 static const auto eq_l2_fir = __hash("EQ_L2_FIR"); 209 static const auto eq_l3_fir = __hash("EQ_L3_FIR"); 210 static const auto eq_ncu_fir = __hash("EQ_NCU_FIR"); 211 static const auto iohs_dlp_fir_oc = __hash("IOHS_DLP_FIR_OC"); 212 static const auto iohs_dlp_fir_smp = __hash("IOHS_DLP_FIR_SMP"); 213 static const auto nx_cq_fir = __hash("NX_CQ_FIR"); 214 static const auto nx_dma_eng_fir = __hash("NX_DMA_ENG_FIR"); 215 static const auto pau_fir_0 = __hash("PAU_FIR_0"); 216 static const auto pau_fir_1 = __hash("PAU_FIR_1"); 217 static const auto pau_fir_2 = __hash("PAU_FIR_2"); 218 static const auto pau_ptl_fir = __hash("PAU_PTL_FIR"); 219 220 // OCMB registers 221 static const auto rdffir = __hash("RDFFIR"); 222 223 const auto targetType = getTrgtType(getTrgt(i_signature.getChip())); 224 const auto id = i_signature.getId(); 225 const auto bit = i_signature.getBit(); 226 227 if (TYPE_PROC == targetType) 228 { 229 if (eq_core_fir == id && 230 (0 == bit || 2 == bit || 3 == bit || 4 == bit || 5 == bit || 231 7 == bit || 8 == bit || 9 == bit || 11 == bit || 12 == bit || 232 13 == bit || 18 == bit || 21 == bit || 22 == bit || 233 24 == bit || 25 == bit || 29 == bit || 31 == bit || 234 32 == bit || 36 == bit || 37 == bit || 38 == bit || 235 43 == bit || 46 == bit || 47 == bit)) 236 { 237 return true; 238 } 239 240 if (eq_l2_fir == id && 241 (1 == bit || 12 == bit || 13 == bit || 17 == bit || 18 == bit || 242 20 == bit || 27 == bit)) 243 { 244 return true; 245 } 246 247 if (eq_l3_fir == id && 248 (2 == bit || 5 == bit || 8 == bit || 11 == bit || 17 == bit)) 249 { 250 return true; 251 } 252 253 if (eq_ncu_fir == id && 254 (3 == bit || 4 == bit || 5 == bit || 7 == bit || 8 == bit || 255 10 == bit || 17 == bit)) 256 { 257 return true; 258 } 259 260 if (iohs_dlp_fir_oc == id && (54 <= bit && bit <= 61)) 261 { 262 return true; 263 } 264 265 if (iohs_dlp_fir_smp == id && (54 <= bit && bit <= 61)) 266 { 267 return true; 268 } 269 270 if (nx_cq_fir == id && (7 == bit || 16 == bit || 21 == bit)) 271 { 272 return true; 273 } 274 275 if (nx_dma_eng_fir == id && (0 == bit)) 276 { 277 return true; 278 } 279 280 if (pau_fir_0 == id && 281 (15 == bit || 18 == bit || 19 == bit || 25 == bit || 282 26 == bit || 29 == bit || 33 == bit || 34 == bit || 283 35 == bit || 40 == bit || 42 == bit || 44 == bit || 45 == bit)) 284 { 285 return true; 286 } 287 288 if (pau_fir_1 == id && 289 (13 == bit || 14 == bit || 15 == bit || 37 == bit || 290 39 == bit || 40 == bit || 41 == bit || 42 == bit)) 291 { 292 return true; 293 } 294 295 if (pau_fir_2 == id && 296 ((4 <= bit && bit <= 18) || (20 <= bit && bit <= 31) || 297 (36 <= bit && bit <= 41) || 45 == bit || 47 == bit || 298 48 == bit || 50 == bit || 51 == bit || 52 == bit)) 299 { 300 return true; 301 } 302 303 if (pau_ptl_fir == id && (4 == bit || 8 == bit)) 304 { 305 return true; 306 } 307 } 308 else if (TYPE_OCMB == targetType) 309 { 310 if (rdffir == id && 311 (14 == bit || 15 == bit || 17 == bit || 37 == bit)) 312 { 313 return true; 314 } 315 } 316 } 317 // Version 2 of the RAS data files. Check if the input signature has the 318 // CS_POSSIBLE or SUE_SOURCE flag set. 319 else if (i_rasData.isFlagSet(i_signature, 320 RasDataParser::RasDataFlags::CS_POSSIBLE) || 321 i_rasData.isFlagSet(i_signature, 322 RasDataParser::RasDataFlags::SUE_SOURCE)) 323 { 324 return true; 325 } 326 327 return false; // default, nothing found 328 } 329 330 //------------------------------------------------------------------------------ 331 332 bool __findCsRootCause_RE(const std::vector<libhei::Signature>& i_list, 333 libhei::Signature& o_rootCause, 334 const RasDataParser& i_rasData) 335 { 336 for (const auto s : i_list) 337 { 338 // Only looking for recoverable attentions. 339 if (libhei::ATTN_TYPE_RECOVERABLE != s.getAttnType()) 340 { 341 continue; 342 } 343 344 if (__findCsRootCause(s, i_rasData)) 345 { 346 o_rootCause = s; 347 return true; 348 } 349 } 350 351 return false; // default, nothing found 352 } 353 354 //------------------------------------------------------------------------------ 355 356 bool __findCsRootCause_UCS(const std::vector<libhei::Signature>& i_list, 357 libhei::Signature& o_rootCause, 358 const RasDataParser& i_rasData) 359 { 360 for (const auto s : i_list) 361 { 362 // Only looking for unit checkstop attentions. 363 if (libhei::ATTN_TYPE_UNIT_CS != s.getAttnType()) 364 { 365 continue; 366 } 367 368 if (__findCsRootCause(s, i_rasData)) 369 { 370 o_rootCause = s; 371 return true; 372 } 373 } 374 375 return false; // default, nothing found 376 } 377 378 //------------------------------------------------------------------------------ 379 380 bool __findNonExternalCs(const std::vector<libhei::Signature>& i_list, 381 libhei::Signature& o_rootCause) 382 { 383 using namespace util::pdbg; 384 385 static const auto pb_ext_fir = libhei::hash<libhei::NodeId_t>("PB_EXT_FIR"); 386 387 for (const auto s : i_list) 388 { 389 const auto targetType = getTrgtType(getTrgt(s.getChip())); 390 const auto id = s.getId(); 391 const auto attnType = s.getAttnType(); 392 393 // Find any processor with system checkstop attention that did not 394 // originate from the PB_EXT_FIR. 395 if ((TYPE_PROC == targetType) && 396 (libhei::ATTN_TYPE_CHECKSTOP == attnType) && (pb_ext_fir != id)) 397 { 398 o_rootCause = s; 399 return true; 400 } 401 } 402 403 return false; // default, nothing found 404 } 405 406 //------------------------------------------------------------------------------ 407 408 bool __findTiRootCause(const std::vector<libhei::Signature>& i_list, 409 libhei::Signature& o_rootCause) 410 { 411 using namespace util::pdbg; 412 413 using func = libhei::NodeId_t (*)(const std::string& i_str); 414 func __hash = libhei::hash<libhei::NodeId_t>; 415 416 // PROC registers 417 static const auto tp_local_fir = __hash("TP_LOCAL_FIR"); 418 static const auto occ_fir = __hash("OCC_FIR"); 419 static const auto pbao_fir = __hash("PBAO_FIR"); 420 static const auto n0_local_fir = __hash("N0_LOCAL_FIR"); 421 static const auto int_cq_fir = __hash("INT_CQ_FIR"); 422 static const auto nx_cq_fir = __hash("NX_CQ_FIR"); 423 static const auto nx_dma_eng_fir = __hash("NX_DMA_ENG_FIR"); 424 static const auto vas_fir = __hash("VAS_FIR"); 425 static const auto n1_local_fir = __hash("N1_LOCAL_FIR"); 426 static const auto mcd_fir = __hash("MCD_FIR"); 427 static const auto pb_station_fir_en_1 = __hash("PB_STATION_FIR_EN_1"); 428 static const auto pb_station_fir_en_2 = __hash("PB_STATION_FIR_EN_2"); 429 static const auto pb_station_fir_en_3 = __hash("PB_STATION_FIR_EN_3"); 430 static const auto pb_station_fir_en_4 = __hash("PB_STATION_FIR_EN_4"); 431 static const auto pb_station_fir_es_1 = __hash("PB_STATION_FIR_ES_1"); 432 static const auto pb_station_fir_es_2 = __hash("PB_STATION_FIR_ES_2"); 433 static const auto pb_station_fir_es_3 = __hash("PB_STATION_FIR_ES_3"); 434 static const auto pb_station_fir_es_4 = __hash("PB_STATION_FIR_ES_4"); 435 static const auto pb_station_fir_eq = __hash("PB_STATION_FIR_EQ"); 436 static const auto psihb_fir = __hash("PSIHB_FIR"); 437 static const auto pbaf_fir = __hash("PBAF_FIR"); 438 static const auto lpc_fir = __hash("LPC_FIR"); 439 static const auto eq_core_fir = __hash("EQ_CORE_FIR"); 440 static const auto eq_l2_fir = __hash("EQ_L2_FIR"); 441 static const auto eq_l3_fir = __hash("EQ_L3_FIR"); 442 static const auto eq_ncu_fir = __hash("EQ_NCU_FIR"); 443 static const auto eq_local_fir = __hash("EQ_LOCAL_FIR"); 444 static const auto eq_qme_fir = __hash("EQ_QME_FIR"); 445 static const auto iohs_local_fir = __hash("IOHS_LOCAL_FIR"); 446 static const auto iohs_dlp_fir_oc = __hash("IOHS_DLP_FIR_OC"); 447 static const auto iohs_dlp_fir_smp = __hash("IOHS_DLP_FIR_SMP"); 448 static const auto mc_local_fir = __hash("MC_LOCAL_FIR"); 449 static const auto mc_fir = __hash("MC_FIR"); 450 static const auto mc_dstl_fir = __hash("MC_DSTL_FIR"); 451 static const auto mc_ustl_fir = __hash("MC_USTL_FIR"); 452 static const auto nmmu_cq_fir = __hash("NMMU_CQ_FIR"); 453 static const auto nmmu_fir = __hash("NMMU_FIR"); 454 static const auto mc_omi_dl = __hash("MC_OMI_DL"); 455 static const auto pau_local_fir = __hash("PAU_LOCAL_FIR"); 456 static const auto pau_ptl_fir = __hash("PAU_PTL_FIR"); 457 static const auto pau_phy_fir = __hash("PAU_PHY_FIR"); 458 static const auto pau_fir_0 = __hash("PAU_FIR_0"); 459 static const auto pau_fir_2 = __hash("PAU_FIR_2"); 460 static const auto pci_local_fir = __hash("PCI_LOCAL_FIR"); 461 static const auto pci_iop_fir = __hash("PCI_IOP_FIR"); 462 static const auto pci_nest_fir = __hash("PCI_NEST_FIR"); 463 464 // OCMB registers 465 static const auto ocmb_lfir = __hash("OCMB_LFIR"); 466 static const auto mmiofir = __hash("MMIOFIR"); 467 static const auto srqfir = __hash("SRQFIR"); 468 static const auto rdffir = __hash("RDFFIR"); 469 static const auto tlxfir = __hash("TLXFIR"); 470 static const auto omi_dl = __hash("OMI_DL"); 471 472 for (const auto& signature : i_list) 473 { 474 const auto targetType = getTrgtType(getTrgt(signature.getChip())); 475 const auto attnType = signature.getAttnType(); 476 const auto id = signature.getId(); 477 const auto bit = signature.getBit(); 478 479 // Only looking for recoverable or unit checkstop attentions. 480 if (libhei::ATTN_TYPE_RECOVERABLE != attnType && 481 libhei::ATTN_TYPE_UNIT_CS != attnType) 482 { 483 continue; 484 } 485 486 // Ignore attentions that should not be blamed as root cause of a TI. 487 // This would include informational only FIRs or correctable errors. 488 if (TYPE_PROC == targetType) 489 { 490 if (tp_local_fir == id && 491 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit || 492 5 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit || 493 11 == bit || 20 == bit || 22 == bit || 23 == bit || 494 24 == bit || 38 == bit || 40 == bit || 41 == bit || 495 46 == bit || 47 == bit || 48 == bit || 55 == bit || 496 56 == bit || 57 == bit || 58 == bit || 59 == bit)) 497 { 498 continue; 499 } 500 501 if (occ_fir == id && 502 (9 == bit || 10 == bit || 15 == bit || 20 == bit || 21 == bit || 503 22 == bit || 23 == bit || 32 == bit || 33 == bit || 504 34 == bit || 36 == bit || 42 == bit || 43 == bit || 505 46 == bit || 47 == bit || 48 == bit || 51 == bit || 506 52 == bit || 53 == bit || 54 == bit || 57 == bit)) 507 { 508 continue; 509 } 510 511 if (pbao_fir == id && 512 (0 == bit || 1 == bit || 2 == bit || 8 == bit || 11 == bit || 513 13 == bit || 15 == bit || 16 == bit || 17 == bit)) 514 { 515 continue; 516 } 517 518 if ((n0_local_fir == id || n1_local_fir == id || 519 iohs_local_fir == id || mc_local_fir == id || 520 pau_local_fir == id || pci_local_fir == id) && 521 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit || 522 5 == bit || 6 == bit || 7 == bit || 8 == bit || 9 == bit || 523 10 == bit || 11 == bit || 20 == bit || 21 == bit)) 524 { 525 continue; 526 } 527 528 if (int_cq_fir == id && 529 (0 == bit || 3 == bit || 5 == bit || 7 == bit || 36 == bit || 530 47 == bit || 48 == bit || 49 == bit || 50 == bit || 531 58 == bit || 59 == bit || 60 == bit)) 532 { 533 continue; 534 } 535 536 if (nx_cq_fir == id && 537 (1 == bit || 4 == bit || 18 == bit || 32 == bit || 33 == bit)) 538 { 539 continue; 540 } 541 542 if (nx_dma_eng_fir == id && 543 (4 == bit || 6 == bit || 9 == bit || 10 == bit || 11 == bit || 544 34 == bit || 35 == bit || 36 == bit || 37 == bit || 39 == bit)) 545 { 546 continue; 547 } 548 549 if (vas_fir == id && 550 (8 == bit || 9 == bit || 11 == bit || 12 == bit || 13 == bit)) 551 { 552 continue; 553 } 554 555 if (mcd_fir == id && (0 == bit)) 556 { 557 continue; 558 } 559 560 if ((pb_station_fir_en_1 == id || pb_station_fir_en_2 == id || 561 pb_station_fir_en_3 == id || pb_station_fir_en_4 == id || 562 pb_station_fir_es_1 == id || pb_station_fir_es_2 == id || 563 pb_station_fir_es_3 == id || pb_station_fir_es_4 == id || 564 pb_station_fir_eq == id) && 565 (9 == bit)) 566 { 567 continue; 568 } 569 570 if (psihb_fir == id && (0 == bit || 23 == bit)) 571 { 572 continue; 573 } 574 575 if (pbaf_fir == id && 576 (0 == bit || 1 == bit || 3 == bit || 4 == bit || 5 == bit || 577 6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit || 578 11 == bit || 19 == bit || 20 == bit || 21 == bit || 579 28 == bit || 29 == bit || 30 == bit || 31 == bit || 580 32 == bit || 33 == bit || 34 == bit || 35 == bit || 36 == bit)) 581 { 582 continue; 583 } 584 585 if (lpc_fir == id && (5 == bit)) 586 { 587 continue; 588 } 589 590 if (eq_core_fir == id && 591 (0 == bit || 2 == bit || 4 == bit || 7 == bit || 9 == bit || 592 11 == bit || 13 == bit || 18 == bit || 21 == bit || 593 24 == bit || 29 == bit || 31 == bit || 37 == bit || 594 43 == bit || 56 == bit || 57 == bit)) 595 { 596 continue; 597 } 598 599 if (eq_l2_fir == id && 600 (0 == bit || 6 == bit || 11 == bit || 19 == bit || 36 == bit)) 601 { 602 continue; 603 } 604 605 if (eq_l3_fir == id && 606 (3 == bit || 4 == bit || 7 == bit || 10 == bit || 13 == bit)) 607 { 608 continue; 609 } 610 611 if (eq_ncu_fir == id && (9 == bit)) 612 { 613 continue; 614 } 615 616 if (eq_local_fir == id && 617 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 5 == bit || 618 6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit || 619 11 == bit || 12 == bit || 13 == bit || 14 == bit || 620 15 == bit || 16 == bit || 20 == bit || 21 == bit || 621 22 == bit || 23 == bit || 24 == bit || 25 == bit || 622 26 == bit || 27 == bit || 28 == bit || 29 == bit || 623 30 == bit || 31 == bit || 32 == bit || 33 == bit || 624 34 == bit || 35 == bit || 36 == bit || 37 == bit || 625 38 == bit || 39 == bit)) 626 { 627 continue; 628 } 629 630 if (eq_qme_fir == id && (7 == bit || 25 == bit)) 631 { 632 continue; 633 } 634 635 if (iohs_dlp_fir_oc == id && 636 (6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit || 637 48 == bit || 49 == bit || 52 == bit || 53 == bit)) 638 { 639 continue; 640 } 641 642 if (iohs_dlp_fir_smp == id && 643 (6 == bit || 7 == bit || 14 == bit || 15 == bit || 16 == bit || 644 17 == bit || 38 == bit || 39 == bit || 44 == bit || 645 45 == bit || 50 == bit || 51 == bit)) 646 { 647 continue; 648 } 649 650 if (mc_fir == id && 651 (5 == bit || 8 == bit || 15 == bit || 16 == bit)) 652 { 653 continue; 654 } 655 656 if (mc_dstl_fir == id && 657 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit || 658 5 == bit || 6 == bit || 7 == bit || 14 == bit || 15 == bit)) 659 { 660 continue; 661 } 662 663 if (mc_ustl_fir == id && 664 (6 == bit || 20 == bit || 33 == bit || 34 == bit)) 665 { 666 continue; 667 } 668 669 if (nmmu_cq_fir == id && (8 == bit || 11 == bit || 14 == bit)) 670 { 671 continue; 672 } 673 674 if (nmmu_fir == id && 675 (0 == bit || 3 == bit || 8 == bit || 9 == bit || 10 == bit || 676 11 == bit || 12 == bit || 13 == bit || 14 == bit || 677 15 == bit || 30 == bit || 31 == bit || 41 == bit)) 678 { 679 continue; 680 } 681 682 if (mc_omi_dl == id && (2 == bit || 3 == bit || 6 == bit || 683 7 == bit || 9 == bit || 10 == bit)) 684 { 685 continue; 686 } 687 688 if (pau_ptl_fir == id && (5 == bit || 9 == bit)) 689 { 690 continue; 691 } 692 693 if (pau_phy_fir == id && 694 (2 == bit || 3 == bit || 6 == bit || 7 == bit || 15 == bit)) 695 { 696 continue; 697 } 698 699 if (pau_fir_0 == id && (13 == bit || 30 == bit || 41 == bit)) 700 { 701 continue; 702 } 703 704 if (pau_fir_2 == id && (19 == bit || 46 == bit || 49 == bit)) 705 { 706 continue; 707 } 708 709 if (pci_iop_fir == id && 710 (0 == bit || 2 == bit || 4 == bit || 6 == bit || 7 == bit || 711 8 == bit || 10 == bit)) 712 { 713 continue; 714 } 715 716 if (pci_nest_fir == id && (2 == bit || 5 == bit)) 717 { 718 continue; 719 } 720 } 721 else if (TYPE_OCMB == targetType) 722 { 723 if (ocmb_lfir == id && 724 (0 == bit || 1 == bit || 2 == bit || 8 == bit || 23 == bit || 725 37 == bit || 63 == bit)) 726 { 727 continue; 728 } 729 730 if (mmiofir == id && (2 == bit)) 731 { 732 continue; 733 } 734 735 if (srqfir == id && 736 (2 == bit || 4 == bit || 14 == bit || 15 == bit || 23 == bit || 737 25 == bit || 28 == bit)) 738 { 739 continue; 740 } 741 742 if (rdffir == id && 743 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit || 744 5 == bit || 6 == bit || 7 == bit || 8 == bit || 9 == bit || 745 18 == bit || 38 == bit || 40 == bit || 41 == bit || 746 45 == bit || 46 == bit)) 747 { 748 continue; 749 } 750 751 if (tlxfir == id && (0 == bit || 9 == bit || 26 == bit)) 752 { 753 continue; 754 } 755 756 if (omi_dl == id && (2 == bit || 3 == bit || 6 == bit || 7 == bit || 757 9 == bit || 10 == bit)) 758 { 759 continue; 760 } 761 } 762 763 // At this point, the attention has not been explicitly ignored. So 764 // return this signature and exit. 765 o_rootCause = signature; 766 return true; 767 } 768 769 return false; // default, nothing found 770 } 771 772 //------------------------------------------------------------------------------ 773 774 bool filterRootCause(AnalysisType i_type, 775 const libhei::IsolationData& i_isoData, 776 libhei::Signature& o_rootCause, 777 const RasDataParser& i_rasData) 778 { 779 // We'll need to make a copy of the list so that the original list is 780 // maintained for the PEL. 781 std::vector<libhei::Signature> list{i_isoData.getSignatureList()}; 782 783 // START WORKAROUND 784 // TODO: Filtering should be data driven. Until that support is available, 785 // use the following isolation rules. 786 787 // Ensure the list is not empty before continuing. 788 if (list.empty()) 789 { 790 return false; // nothing more to do 791 } 792 793 // First, look for any RCS OSC errors. This must always be first because 794 // they can cause downstream PLL unlock attentions. 795 if (__findRcsOscError(list, o_rootCause)) 796 { 797 return true; 798 } 799 800 // Second, look for any PLL unlock attentions. This must always be second 801 // because PLL unlock attentions can cause any number of downstream 802 // attentions, including a system checkstop. 803 if (__findPllUnlock(list, o_rootCause)) 804 { 805 return true; 806 } 807 808 // Regardless of the analysis type, always look for anything that could be 809 // blamed as the root cause of a system checkstop. 810 811 // Memory channel failure attentions will produce SUEs and likely cause 812 // downstream attentions, including a system checkstop. 813 if (__findMemoryChannelFailure(list, o_rootCause, i_rasData)) 814 { 815 return true; 816 } 817 818 // Look for any recoverable attentions that have been identified as a 819 // potential root cause of a system checkstop attention. These would include 820 // any attention that would generate an SUE. Note that is it possible for 821 // recoverables to generate unit checkstop attentions so we must check them 822 // first. 823 if (__findCsRootCause_RE(list, o_rootCause, i_rasData)) 824 { 825 return true; 826 } 827 828 // Look for any unit checkstop attentions (other than memory channel 829 // failures) that have been identified as a potential root cause of a 830 // system checkstop attention. These would include any attention that would 831 // generate an SUE. 832 if (__findCsRootCause_UCS(list, o_rootCause, i_rasData)) 833 { 834 return true; 835 } 836 837 // Look for any system checkstop attentions that originated from within the 838 // chip that reported the attention. In other words, no external checkstop 839 // attentions. 840 if (__findNonExternalCs(list, o_rootCause)) 841 { 842 return true; 843 } 844 845 if (AnalysisType::SYSTEM_CHECKSTOP != i_type) 846 { 847 // No system checkstop root cause attentions were found. Next, look for 848 // any recoverable or unit checkstop attentions that could be associated 849 // with a TI. 850 if (__findTiRootCause(list, o_rootCause)) 851 { 852 return true; 853 } 854 855 if (AnalysisType::TERMINATE_IMMEDIATE != i_type) 856 { 857 // No attentions associated with a system checkstop or TI were 858 // found. Simply, return the first entry in the list. 859 o_rootCause = list.front(); 860 return true; 861 } 862 } 863 864 // END WORKAROUND 865 866 return false; // default, no active attentions found. 867 } 868 869 //------------------------------------------------------------------------------ 870 871 } // namespace analyzer 872