1 #include <assert.h> 2 3 #include <analyzer_main.hpp> 4 #include <hei_main.hpp> 5 #include <hei_util.hpp> 6 #include <util/pdbg.hpp> 7 8 #include <algorithm> 9 #include <limits> 10 #include <string> 11 12 namespace analyzer 13 { 14 //------------------------------------------------------------------------------ 15 16 bool __findRcsOscError(const std::vector<libhei::Signature>& i_list, 17 libhei::Signature& o_rootCause) 18 { 19 // TODO: Consider returning all of them instead of one as root cause. 20 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) { 21 return (libhei::hash<libhei::NodeId_t>("TP_LOCAL_FIR") == t.getId() && 22 (42 == t.getBit() || 43 == t.getBit())); 23 }); 24 25 if (i_list.end() != itr) 26 { 27 o_rootCause = *itr; 28 return true; 29 } 30 31 return false; 32 } 33 34 //------------------------------------------------------------------------------ 35 36 bool __findPllUnlock(const std::vector<libhei::Signature>& i_list, 37 libhei::Signature& o_rootCause) 38 { 39 // TODO: Consider returning all of them instead of one as root cause. 40 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) { 41 return (libhei::hash<libhei::NodeId_t>("PLL_UNLOCK") == t.getId() && 42 (0 == t.getBit() || 1 == t.getBit())); 43 }); 44 45 if (i_list.end() != itr) 46 { 47 o_rootCause = *itr; 48 return true; 49 } 50 51 return false; 52 } 53 54 //------------------------------------------------------------------------------ 55 56 bool __findIueTh(const std::vector<libhei::Signature>& i_list, 57 libhei::Signature& o_rootCause) 58 { 59 auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) { 60 return (libhei::hash<libhei::NodeId_t>("RDFFIR") == t.getId() && 61 (17 == t.getBit() || 37 == t.getBit())); 62 }); 63 64 if (i_list.end() != itr) 65 { 66 o_rootCause = *itr; 67 return true; 68 } 69 70 return false; 71 } 72 73 //------------------------------------------------------------------------------ 74 75 bool __findMemoryChannelFailure(const std::vector<libhei::Signature>& i_list, 76 libhei::Signature& o_rootCause) 77 { 78 using namespace util::pdbg; 79 80 using func = libhei::NodeId_t (*)(const std::string& i_str); 81 func __hash = libhei::hash<libhei::NodeId_t>; 82 83 static const auto mc_dstl_fir = __hash("MC_DSTL_FIR"); 84 static const auto mc_ustl_fir = __hash("MC_USTL_FIR"); 85 static const auto mc_omi_dl_err_rpt = __hash("MC_OMI_DL_ERR_RPT"); 86 87 for (const auto s : i_list) 88 { 89 const auto targetType = getTrgtType(getTrgt(s.getChip())); 90 const auto id = s.getId(); 91 const auto bit = s.getBit(); 92 const auto attnType = s.getAttnType(); 93 94 // Look for any unit checkstop attentions from OCMBs. 95 if (TYPE_OCMB == targetType) 96 { 97 // Any unit checkstop attentions will trigger a channel failure. 98 if (libhei::ATTN_TYPE_UNIT_CS == attnType) 99 { 100 static const auto srqfir = __hash("SRQFIR"); 101 102 // If the channel was specifically a firmware initiated channel 103 // fail (SRQFIR[25]) check for any IUE bits that are on that 104 // would have caused that (RDFFIR[17,37]). 105 if ((srqfir == id && 25 == bit) && 106 __findIueTh(i_list, o_rootCause)) 107 { 108 return true; 109 } 110 111 o_rootCause = s; 112 return true; 113 } 114 } 115 // Look for channel failure attentions on processors. 116 else if (TYPE_PROC == targetType) 117 { 118 // TODO: All of these channel failure bits are configurable. 119 // Eventually, we will need some mechanism to check that 120 // config registers for a more accurate analysis. For now, 121 // simply check for all bits that could potentially be 122 // configured to channel failure. 123 124 // Any unit checkstop bit in the MC_DSTL_FIR or MC_USTL_FIR could 125 // be a channel failure. 126 if (libhei::ATTN_TYPE_UNIT_CS == attnType) 127 { 128 // Ignore bits MC_DSTL_FIR[0:7] because they simply indicate 129 // attentions occurred on the attached OCMBs. 130 if ((mc_dstl_fir == id && 8 <= bit) || (mc_ustl_fir == id)) 131 { 132 o_rootCause = s; 133 return true; 134 } 135 } 136 137 // All bits in MC_OMI_DL_ERR_RPT eventually feed into 138 // MC_OMI_DL_FIR[0,20] which are configurable to channel failure. 139 if (mc_omi_dl_err_rpt == id) 140 { 141 o_rootCause = s; 142 return true; 143 } 144 } 145 } 146 147 return false; // default, nothing found 148 } 149 150 //------------------------------------------------------------------------------ 151 152 // Will query if a signature is a potential system checkstop root cause. 153 // attention. Note that this function excludes memory channel failure attentions 154 // which are checked in __findMemoryChannelFailure(). 155 bool __findCsRootCause(const libhei::Signature& i_signature) 156 { 157 using namespace util::pdbg; 158 159 using func = libhei::NodeId_t (*)(const std::string& i_str); 160 func __hash = libhei::hash<libhei::NodeId_t>; 161 162 // PROC registers 163 static const auto eq_core_fir = __hash("EQ_CORE_FIR"); 164 static const auto eq_l2_fir = __hash("EQ_L2_FIR"); 165 static const auto eq_l3_fir = __hash("EQ_L3_FIR"); 166 static const auto eq_ncu_fir = __hash("EQ_NCU_FIR"); 167 static const auto iohs_dlp_fir_oc = __hash("IOHS_DLP_FIR_OC"); 168 static const auto iohs_dlp_fir_smp = __hash("IOHS_DLP_FIR_SMP"); 169 static const auto nx_cq_fir = __hash("NX_CQ_FIR"); 170 static const auto nx_dma_eng_fir = __hash("NX_DMA_ENG_FIR"); 171 static const auto pau_fir_0 = __hash("PAU_FIR_0"); 172 static const auto pau_fir_1 = __hash("PAU_FIR_1"); 173 static const auto pau_fir_2 = __hash("PAU_FIR_2"); 174 static const auto pau_ptl_fir = __hash("PAU_PTL_FIR"); 175 176 // OCMB registers 177 static const auto rdffir = __hash("RDFFIR"); 178 179 const auto targetType = getTrgtType(getTrgt(i_signature.getChip())); 180 const auto id = i_signature.getId(); 181 const auto bit = i_signature.getBit(); 182 183 if (TYPE_PROC == targetType) 184 { 185 if (eq_core_fir == id && 186 (0 == bit || 2 == bit || 3 == bit || 4 == bit || 5 == bit || 187 7 == bit || 8 == bit || 9 == bit || 11 == bit || 12 == bit || 188 13 == bit || 18 == bit || 21 == bit || 22 == bit || 24 == bit || 189 25 == bit || 29 == bit || 31 == bit || 32 == bit || 36 == bit || 190 37 == bit || 38 == bit || 43 == bit || 46 == bit || 47 == bit)) 191 { 192 return true; 193 } 194 195 if (eq_l2_fir == id && 196 (1 == bit || 12 == bit || 13 == bit || 17 == bit || 18 == bit || 197 20 == bit || 27 == bit)) 198 { 199 return true; 200 } 201 202 if (eq_l3_fir == id && 203 (2 == bit || 5 == bit || 8 == bit || 11 == bit || 17 == bit)) 204 { 205 return true; 206 } 207 208 if (eq_ncu_fir == id && (3 == bit || 4 == bit || 5 == bit || 7 == bit || 209 8 == bit || 10 == bit || 17 == bit)) 210 { 211 return true; 212 } 213 214 if (iohs_dlp_fir_oc == id && (54 <= bit && bit <= 61)) 215 { 216 return true; 217 } 218 219 if (iohs_dlp_fir_smp == id && (54 <= bit && bit <= 61)) 220 { 221 return true; 222 } 223 224 if (nx_cq_fir == id && (7 == bit || 16 == bit || 21 == bit)) 225 { 226 return true; 227 } 228 229 if (nx_dma_eng_fir == id && (0 == bit)) 230 { 231 return true; 232 } 233 234 if (pau_fir_0 == id && 235 (15 == bit || 18 == bit || 19 == bit || 25 == bit || 26 == bit || 236 29 == bit || 33 == bit || 34 == bit || 35 == bit || 40 == bit || 237 42 == bit || 44 == bit || 45 == bit)) 238 { 239 return true; 240 } 241 242 if (pau_fir_1 == id && 243 (13 == bit || 14 == bit || 15 == bit || 37 == bit || 39 == bit || 244 40 == bit || 41 == bit || 42 == bit)) 245 { 246 return true; 247 } 248 249 if (pau_fir_2 == id && 250 ((4 <= bit && bit <= 18) || (20 <= bit && bit <= 31) || 251 (36 <= bit && bit <= 41) || 45 == bit || 47 == bit || 48 == bit || 252 50 == bit || 51 == bit || 52 == bit)) 253 { 254 return true; 255 } 256 257 if (pau_ptl_fir == id && (4 == bit || 8 == bit)) 258 { 259 return true; 260 } 261 } 262 else if (TYPE_OCMB == targetType) 263 { 264 if (rdffir == id && (14 == bit || 15 == bit || 17 == bit || 37 == bit)) 265 { 266 return true; 267 } 268 } 269 270 return false; // default, nothing found 271 } 272 273 //------------------------------------------------------------------------------ 274 275 bool __findCsRootCause_RE(const std::vector<libhei::Signature>& i_list, 276 libhei::Signature& o_rootCause) 277 { 278 for (const auto s : i_list) 279 { 280 // Only looking for recoverable attentions. 281 if (libhei::ATTN_TYPE_RECOVERABLE != s.getAttnType()) 282 { 283 continue; 284 } 285 286 if (__findCsRootCause(s)) 287 { 288 o_rootCause = s; 289 return true; 290 } 291 } 292 293 return false; // default, nothing found 294 } 295 296 //------------------------------------------------------------------------------ 297 298 bool __findCsRootCause_UCS(const std::vector<libhei::Signature>& i_list, 299 libhei::Signature& o_rootCause) 300 { 301 for (const auto s : i_list) 302 { 303 // Only looking for unit checkstop attentions. 304 if (libhei::ATTN_TYPE_UNIT_CS != s.getAttnType()) 305 { 306 continue; 307 } 308 309 if (__findCsRootCause(s)) 310 { 311 o_rootCause = s; 312 return true; 313 } 314 } 315 316 return false; // default, nothing found 317 } 318 319 //------------------------------------------------------------------------------ 320 321 bool __findNonExternalCs(const std::vector<libhei::Signature>& i_list, 322 libhei::Signature& o_rootCause) 323 { 324 using namespace util::pdbg; 325 326 static const auto pb_ext_fir = libhei::hash<libhei::NodeId_t>("PB_EXT_FIR"); 327 328 for (const auto s : i_list) 329 { 330 const auto targetType = getTrgtType(getTrgt(s.getChip())); 331 const auto id = s.getId(); 332 const auto attnType = s.getAttnType(); 333 334 // Find any processor with system checkstop attention that did not 335 // originate from the PB_EXT_FIR. 336 if ((TYPE_PROC == targetType) && 337 (libhei::ATTN_TYPE_CHECKSTOP == attnType) && (pb_ext_fir != id)) 338 { 339 o_rootCause = s; 340 return true; 341 } 342 } 343 344 return false; // default, nothing found 345 } 346 347 //------------------------------------------------------------------------------ 348 349 bool __findTiRootCause(const std::vector<libhei::Signature>& i_list, 350 libhei::Signature& o_rootCause) 351 { 352 using namespace util::pdbg; 353 354 using func = libhei::NodeId_t (*)(const std::string& i_str); 355 func __hash = libhei::hash<libhei::NodeId_t>; 356 357 // PROC registers 358 static const auto tp_local_fir = __hash("TP_LOCAL_FIR"); 359 static const auto occ_fir = __hash("OCC_FIR"); 360 static const auto pbao_fir = __hash("PBAO_FIR"); 361 static const auto n0_local_fir = __hash("N0_LOCAL_FIR"); 362 static const auto int_cq_fir = __hash("INT_CQ_FIR"); 363 static const auto nx_cq_fir = __hash("NX_CQ_FIR"); 364 static const auto nx_dma_eng_fir = __hash("NX_DMA_ENG_FIR"); 365 static const auto vas_fir = __hash("VAS_FIR"); 366 static const auto n1_local_fir = __hash("N1_LOCAL_FIR"); 367 static const auto mcd_fir = __hash("MCD_FIR"); 368 static const auto pb_station_fir_en_1 = __hash("PB_STATION_FIR_EN_1"); 369 static const auto pb_station_fir_en_2 = __hash("PB_STATION_FIR_EN_2"); 370 static const auto pb_station_fir_en_3 = __hash("PB_STATION_FIR_EN_3"); 371 static const auto pb_station_fir_en_4 = __hash("PB_STATION_FIR_EN_4"); 372 static const auto pb_station_fir_es_1 = __hash("PB_STATION_FIR_ES_1"); 373 static const auto pb_station_fir_es_2 = __hash("PB_STATION_FIR_ES_2"); 374 static const auto pb_station_fir_es_3 = __hash("PB_STATION_FIR_ES_3"); 375 static const auto pb_station_fir_es_4 = __hash("PB_STATION_FIR_ES_4"); 376 static const auto pb_station_fir_eq = __hash("PB_STATION_FIR_EQ"); 377 static const auto psihb_fir = __hash("PSIHB_FIR"); 378 static const auto pbaf_fir = __hash("PBAF_FIR"); 379 static const auto lpc_fir = __hash("LPC_FIR"); 380 static const auto eq_core_fir = __hash("EQ_CORE_FIR"); 381 static const auto eq_l2_fir = __hash("EQ_L2_FIR"); 382 static const auto eq_l3_fir = __hash("EQ_L3_FIR"); 383 static const auto eq_ncu_fir = __hash("EQ_NCU_FIR"); 384 static const auto eq_local_fir = __hash("EQ_LOCAL_FIR"); 385 static const auto eq_qme_fir = __hash("EQ_QME_FIR"); 386 static const auto iohs_local_fir = __hash("IOHS_LOCAL_FIR"); 387 static const auto iohs_dlp_fir_oc = __hash("IOHS_DLP_FIR_OC"); 388 static const auto iohs_dlp_fir_smp = __hash("IOHS_DLP_FIR_SMP"); 389 static const auto mc_local_fir = __hash("MC_LOCAL_FIR"); 390 static const auto mc_fir = __hash("MC_FIR"); 391 static const auto mc_dstl_fir = __hash("MC_DSTL_FIR"); 392 static const auto mc_ustl_fir = __hash("MC_USTL_FIR"); 393 static const auto nmmu_cq_fir = __hash("NMMU_CQ_FIR"); 394 static const auto nmmu_fir = __hash("NMMU_FIR"); 395 static const auto mc_omi_dl = __hash("MC_OMI_DL"); 396 static const auto pau_local_fir = __hash("PAU_LOCAL_FIR"); 397 static const auto pau_ptl_fir = __hash("PAU_PTL_FIR"); 398 static const auto pau_phy_fir = __hash("PAU_PHY_FIR"); 399 static const auto pau_fir_0 = __hash("PAU_FIR_0"); 400 static const auto pau_fir_2 = __hash("PAU_FIR_2"); 401 static const auto pci_local_fir = __hash("PCI_LOCAL_FIR"); 402 static const auto pci_iop_fir = __hash("PCI_IOP_FIR"); 403 static const auto pci_nest_fir = __hash("PCI_NEST_FIR"); 404 405 // OCMB registers 406 static const auto ocmb_lfir = __hash("OCMB_LFIR"); 407 static const auto mmiofir = __hash("MMIOFIR"); 408 static const auto srqfir = __hash("SRQFIR"); 409 static const auto rdffir = __hash("RDFFIR"); 410 static const auto tlxfir = __hash("TLXFIR"); 411 static const auto omi_dl = __hash("OMI_DL"); 412 413 for (const auto& signature : i_list) 414 { 415 const auto targetType = getTrgtType(getTrgt(signature.getChip())); 416 const auto attnType = signature.getAttnType(); 417 const auto id = signature.getId(); 418 const auto bit = signature.getBit(); 419 420 // Only looking for recoverable or unit checkstop attentions. 421 if (libhei::ATTN_TYPE_RECOVERABLE != attnType && 422 libhei::ATTN_TYPE_UNIT_CS != attnType) 423 { 424 continue; 425 } 426 427 // Ignore attentions that should not be blamed as root cause of a TI. 428 // This would include informational only FIRs or correctable errors. 429 if (TYPE_PROC == targetType) 430 { 431 if (tp_local_fir == id && 432 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit || 433 5 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit || 434 11 == bit || 20 == bit || 22 == bit || 23 == bit || 435 24 == bit || 38 == bit || 40 == bit || 41 == bit || 436 46 == bit || 47 == bit || 48 == bit || 55 == bit || 437 56 == bit || 57 == bit || 58 == bit || 59 == bit)) 438 { 439 continue; 440 } 441 442 if (occ_fir == id && 443 (9 == bit || 10 == bit || 15 == bit || 20 == bit || 21 == bit || 444 22 == bit || 23 == bit || 32 == bit || 33 == bit || 445 34 == bit || 36 == bit || 42 == bit || 43 == bit || 446 46 == bit || 47 == bit || 48 == bit || 51 == bit || 447 52 == bit || 53 == bit || 54 == bit || 57 == bit)) 448 { 449 continue; 450 } 451 452 if (pbao_fir == id && 453 (0 == bit || 1 == bit || 2 == bit || 8 == bit || 11 == bit || 454 13 == bit || 15 == bit || 16 == bit || 17 == bit)) 455 { 456 continue; 457 } 458 459 if ((n0_local_fir == id || n1_local_fir == id || 460 iohs_local_fir == id || mc_local_fir == id || 461 pau_local_fir == id || pci_local_fir == id) && 462 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit || 463 5 == bit || 6 == bit || 7 == bit || 8 == bit || 9 == bit || 464 10 == bit || 11 == bit || 20 == bit || 21 == bit)) 465 { 466 continue; 467 } 468 469 if (int_cq_fir == id && 470 (0 == bit || 3 == bit || 5 == bit || 7 == bit || 36 == bit || 471 58 == bit || 59 == bit || 60 == bit)) 472 { 473 continue; 474 } 475 476 if (nx_cq_fir == id && 477 (1 == bit || 4 == bit || 18 == bit || 32 == bit || 33 == bit)) 478 { 479 continue; 480 } 481 482 if (nx_dma_eng_fir == id && 483 (4 == bit || 6 == bit || 9 == bit || 10 == bit || 11 == bit || 484 34 == bit || 35 == bit || 36 == bit || 37 == bit || 39 == bit)) 485 { 486 continue; 487 } 488 489 if (vas_fir == id && 490 (8 == bit || 9 == bit || 11 == bit || 12 == bit || 13 == bit)) 491 { 492 continue; 493 } 494 495 if (mcd_fir == id && (0 == bit)) 496 { 497 continue; 498 } 499 500 if ((pb_station_fir_en_1 == id || pb_station_fir_en_2 == id || 501 pb_station_fir_en_3 == id || pb_station_fir_en_4 == id || 502 pb_station_fir_es_1 == id || pb_station_fir_es_2 == id || 503 pb_station_fir_es_3 == id || pb_station_fir_es_4 == id || 504 pb_station_fir_eq == id) && 505 (9 == bit)) 506 { 507 continue; 508 } 509 510 if (psihb_fir == id && (0 == bit || 23 == bit)) 511 { 512 continue; 513 } 514 515 if (pbaf_fir == id && 516 (0 == bit || 1 == bit || 3 == bit || 4 == bit || 5 == bit || 517 6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit || 518 11 == bit || 19 == bit || 20 == bit || 21 == bit || 519 28 == bit || 29 == bit || 30 == bit || 31 == bit || 520 32 == bit || 33 == bit || 34 == bit || 35 == bit || 36 == bit)) 521 { 522 continue; 523 } 524 525 if (lpc_fir == id && (5 == bit)) 526 { 527 continue; 528 } 529 530 if (eq_core_fir == id && 531 (0 == bit || 2 == bit || 4 == bit || 7 == bit || 9 == bit || 532 11 == bit || 13 == bit || 18 == bit || 21 == bit || 533 24 == bit || 29 == bit || 31 == bit || 37 == bit || 534 43 == bit || 56 == bit || 57 == bit)) 535 { 536 continue; 537 } 538 539 if (eq_l2_fir == id && 540 (0 == bit || 6 == bit || 11 == bit || 19 == bit || 36 == bit)) 541 { 542 continue; 543 } 544 545 if (eq_l3_fir == id && 546 (3 == bit || 4 == bit || 7 == bit || 10 == bit || 13 == bit)) 547 { 548 continue; 549 } 550 551 if (eq_ncu_fir == id && (9 == bit)) 552 { 553 continue; 554 } 555 556 if (eq_local_fir == id && 557 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 5 == bit || 558 6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit || 559 11 == bit || 12 == bit || 13 == bit || 14 == bit || 560 15 == bit || 16 == bit || 20 == bit || 21 == bit || 561 22 == bit || 23 == bit || 24 == bit || 25 == bit || 562 26 == bit || 27 == bit || 28 == bit || 29 == bit || 563 30 == bit || 31 == bit || 32 == bit || 33 == bit || 564 34 == bit || 35 == bit || 36 == bit || 37 == bit || 565 38 == bit || 39 == bit)) 566 { 567 continue; 568 } 569 570 if (eq_qme_fir == id && (7 == bit || 25 == bit)) 571 { 572 continue; 573 } 574 575 if (iohs_dlp_fir_oc == id && 576 (6 == bit || 7 == bit || 8 == bit || 9 == bit || 10 == bit || 577 48 == bit || 49 == bit || 52 == bit || 53 == bit)) 578 { 579 continue; 580 } 581 582 if (iohs_dlp_fir_smp == id && 583 (6 == bit || 7 == bit || 14 == bit || 15 == bit || 16 == bit || 584 17 == bit || 38 == bit || 39 == bit || 44 == bit || 585 45 == bit || 50 == bit || 51 == bit)) 586 { 587 continue; 588 } 589 590 if (mc_fir == id && 591 (5 == bit || 8 == bit || 15 == bit || 16 == bit)) 592 { 593 continue; 594 } 595 596 if (mc_dstl_fir == id && 597 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit || 598 5 == bit || 6 == bit || 7 == bit || 14 == bit || 15 == bit)) 599 { 600 continue; 601 } 602 603 if (mc_ustl_fir == id && 604 (6 == bit || 20 == bit || 33 == bit || 34 == bit)) 605 { 606 continue; 607 } 608 609 if (nmmu_cq_fir == id && (8 == bit || 11 == bit || 14 == bit)) 610 { 611 continue; 612 } 613 614 if (nmmu_fir == id && 615 (0 == bit || 3 == bit || 8 == bit || 9 == bit || 10 == bit || 616 11 == bit || 12 == bit || 13 == bit || 14 == bit || 617 15 == bit || 30 == bit || 31 == bit || 41 == bit)) 618 { 619 continue; 620 } 621 622 if (mc_omi_dl == id && (2 == bit || 3 == bit || 6 == bit || 623 7 == bit || 9 == bit || 10 == bit)) 624 { 625 continue; 626 } 627 628 if (pau_ptl_fir == id && (5 == bit || 9 == bit)) 629 { 630 continue; 631 } 632 633 if (pau_phy_fir == id && 634 (2 == bit || 3 == bit || 6 == bit || 7 == bit || 15 == bit)) 635 { 636 continue; 637 } 638 639 if (pau_fir_0 == id && (13 == bit || 30 == bit || 41 == bit)) 640 { 641 continue; 642 } 643 644 if (pau_fir_2 == id && (19 == bit || 46 == bit || 49 == bit)) 645 { 646 continue; 647 } 648 649 if (pci_iop_fir == id && 650 (0 == bit || 2 == bit || 4 == bit || 6 == bit || 7 == bit || 651 8 == bit || 10 == bit)) 652 { 653 continue; 654 } 655 656 if (pci_nest_fir == id && (2 == bit || 5 == bit)) 657 { 658 continue; 659 } 660 } 661 else if (TYPE_OCMB == targetType) 662 { 663 if (ocmb_lfir == id && 664 (0 == bit || 1 == bit || 2 == bit || 8 == bit || 23 == bit || 665 37 == bit || 63 == bit)) 666 { 667 continue; 668 } 669 670 if (mmiofir == id && (2 == bit)) 671 { 672 continue; 673 } 674 675 if (srqfir == id && 676 (2 == bit || 4 == bit || 14 == bit || 15 == bit || 23 == bit || 677 25 == bit || 28 == bit)) 678 { 679 continue; 680 } 681 682 if (rdffir == id && 683 (0 == bit || 1 == bit || 2 == bit || 3 == bit || 4 == bit || 684 5 == bit || 6 == bit || 7 == bit || 8 == bit || 9 == bit || 685 18 == bit || 38 == bit || 40 == bit || 41 == bit || 686 45 == bit || 46 == bit)) 687 { 688 continue; 689 } 690 691 if (tlxfir == id && (0 == bit || 9 == bit || 26 == bit)) 692 { 693 continue; 694 } 695 696 if (omi_dl == id && (2 == bit || 3 == bit || 6 == bit || 7 == bit || 697 9 == bit || 10 == bit)) 698 { 699 continue; 700 } 701 } 702 703 // At this point, the attention has not been explicitly ignored. So 704 // return this signature and exit. 705 o_rootCause = signature; 706 return true; 707 } 708 709 return false; // default, nothing found 710 } 711 712 //------------------------------------------------------------------------------ 713 714 bool filterRootCause(AnalysisType i_type, 715 const libhei::IsolationData& i_isoData, 716 libhei::Signature& o_rootCause) 717 { 718 // We'll need to make a copy of the list so that the original list is 719 // maintained for the PEL. 720 std::vector<libhei::Signature> list{i_isoData.getSignatureList()}; 721 722 // START WORKAROUND 723 // TODO: Filtering should be data driven. Until that support is available, 724 // use the following isolation rules. 725 726 // Ensure the list is not empty before continuing. 727 if (list.empty()) 728 { 729 return false; // nothing more to do 730 } 731 732 // First, look for any RCS OSC errors. This must always be first because 733 // they can cause downstream PLL unlock attentions. 734 if (__findRcsOscError(list, o_rootCause)) 735 { 736 return true; 737 } 738 739 // Second, look for any PLL unlock attentions. This must always be second 740 // because PLL unlock attentions can cause any number of downstream 741 // attentions, including a system checkstop. 742 if (__findPllUnlock(list, o_rootCause)) 743 { 744 return true; 745 } 746 747 // Regardless of the analysis type, always look for anything that could be 748 // blamed as the root cause of a system checkstop. 749 750 // Memory channel failure attentions will produce SUEs and likely cause 751 // downstream attentions, including a system checkstop. 752 if (__findMemoryChannelFailure(list, o_rootCause)) 753 { 754 return true; 755 } 756 757 // Look for any recoverable attentions that have been identified as a 758 // potential root cause of a system checkstop attention. These would include 759 // any attention that would generate an SUE. Note that is it possible for 760 // recoverables to generate unit checkstop attentions so we must check them 761 // first. 762 if (__findCsRootCause_RE(list, o_rootCause)) 763 { 764 return true; 765 } 766 767 // Look for any unit checkstop attentions (other than memory channel 768 // failures) that have been identified as a potential root cause of a 769 // system checkstop attention. These would include any attention that would 770 // generate an SUE. 771 if (__findCsRootCause_UCS(list, o_rootCause)) 772 { 773 return true; 774 } 775 776 // Look for any system checkstop attentions that originated from within the 777 // chip that reported the attention. In other words, no external checkstop 778 // attentions. 779 if (__findNonExternalCs(list, o_rootCause)) 780 { 781 return true; 782 } 783 784 if (AnalysisType::SYSTEM_CHECKSTOP != i_type) 785 { 786 // No system checkstop root cause attentions were found. Next, look for 787 // any recoverable or unit checkstop attentions that could be associated 788 // with a TI. 789 if (__findTiRootCause(list, o_rootCause)) 790 { 791 return true; 792 } 793 794 if (AnalysisType::TERMINATE_IMMEDIATE != i_type) 795 { 796 // No attentions associated with a system checkstop or TI were 797 // found. Simply, return the first entry in the list. 798 o_rootCause = list.front(); 799 return true; 800 } 801 } 802 803 // END WORKAROUND 804 805 return false; // default, no active attentions found. 806 } 807 808 //------------------------------------------------------------------------------ 809 810 } // namespace analyzer 811